diff --git a/Cargo.lock b/Cargo.lock index 71464cb0..306a939c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -67,6 +67,24 @@ dependencies = [ "backtrace", ] +[[package]] +name = "arrow2" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "467e88c67d518f9992bb1f6c8faa202eaf93b0ce244889d241c9c44d0df0ab46" +dependencies = [ + "ahash", + "bytemuck", + "chrono", + "dyn-clone", + "either", + "ethnum", + "foreign_vec", + "hash_hasher", + "num-traits", + "simdutf8", +] + [[package]] name = "async-trait" version = "0.1.56" @@ -253,6 +271,26 @@ version = "3.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37ccbd214614c6783386c1af30caf03192f17891059cecc394b4fb119e363de3" +[[package]] +name = "bytemuck" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5377c8865e74a160d21f29c2d40669f53286db6eab59b88540cbb12ffc8b835" +dependencies = [ + "bytemuck_derive", +] + +[[package]] +name = "bytemuck_derive" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfd2f4180c5721da6335cc9e9061cce522b87a35e51cc57636d28d22a9863c80" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "byteorder" version = "1.4.3" @@ -671,6 +709,12 @@ dependencies = [ "shared_child", ] +[[package]] +name = "dyn-clone" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f94fa09c2aeea5b8839e414b7b841bf429fd25b9c522116ac97ee87856d88b2" + [[package]] name = "either" version = "1.7.0" @@ -741,6 +785,12 @@ dependencies = [ "erl_nif_macro", ] +[[package]] +name = "ethnum" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2eac3c0b9fa6eb75255ebb42c0ba3e2210d102a66d2795afef6fed668f373311" + [[package]] name = "event-listener" version = "2.5.2" @@ -802,6 +852,12 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foreign_vec" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee1b05cbd864bcaecbd3455d6d967862d446e4ebfc3c2e5e5b9841e53cba6673" + [[package]] name = "form_urlencoded" version = "1.0.1" @@ -1035,6 +1091,12 @@ dependencies = [ "tracing", ] +[[package]] +name = "hash_hasher" +version = "2.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74721d007512d0cb3338cd20f0654ac913920061a4c4d0d8708edb3f2a698c0c" + [[package]] name = "hashbrown" version = "0.11.2" @@ -3060,6 +3122,7 @@ name = "modelfox_core" version = "0.8.0" dependencies = [ "anyhow", + "arrow2", "bitvec", "buffalo", "chrono", @@ -3270,9 +3333,12 @@ name = "modelfox_python" version = "0.8.0" dependencies = [ "anyhow", + "arrow2", "chrono", "memmap", "modelfox_core", + "modelfox_id", + "modelfox_kill_chip", "modelfox_model", "pyo3", "reqwest", @@ -3299,6 +3365,7 @@ name = "modelfox_table" version = "0.8.0" dependencies = [ "anyhow", + "arrow2", "csv", "fast-float", "fnv", @@ -4928,6 +4995,12 @@ dependencies = [ "libc", ] +[[package]] +name = "simdutf8" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a" + [[package]] name = "similar" version = "2.1.0" diff --git a/Cargo.toml b/Cargo.toml index 9412e23c..591f1aed 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,7 @@ version = "0.8.0" [workspace.dependencies] anyhow = { version = "1.0", features = ["backtrace"] } +arrow2 = { version = "0.14" } backtrace = "0.3" base64 = "0.13" bitvec = "1.0" diff --git a/crates/cli/train.rs b/crates/cli/train.rs index 9b94323f..23b45edc 100644 --- a/crates/cli/train.rs +++ b/crates/cli/train.rs @@ -53,12 +53,14 @@ pub fn train(args: TrainArgs) -> Result<()> { let input = match (&args.file, &args.file_train, &args.file_test, args.stdin) { (None, None, None, true) => modelfox_core::train::TrainingDataSource::Stdin, (Some(file_path), None, None, false) => { - modelfox_core::train::TrainingDataSource::File(file_path.to_owned()) + modelfox_core::train::TrainingDataSource::Train( + modelfox_core::train::FileOrArrow::File(file_path.to_owned()), + ) } (None, Some(file_path_train), Some(file_path_test), false) => { modelfox_core::train::TrainingDataSource::TrainAndTest { - train: file_path_train.to_owned(), - test: file_path_test.to_owned(), + train: modelfox_core::train::FileOrArrow::File(file_path_train.to_owned()), + test: modelfox_core::train::FileOrArrow::File(file_path_test.to_owned()), } } _ => bail!("Must use the stdin flag or provide training data files."), diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index 48deecdc..bd15f6b6 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -18,6 +18,7 @@ path = "lib.rs" [dependencies] anyhow = { workspace = true } +arrow2 = { workspace = true } bitvec = { workspace = true } buffalo = { workspace = true } chrono = { workspace = true } diff --git a/crates/core/train.rs b/crates/core/train.rs index a503b54b..4b938442 100644 --- a/crates/core/train.rs +++ b/crates/core/train.rs @@ -18,6 +18,7 @@ use crate::{ test, }; use anyhow::{anyhow, bail, Result}; +use arrow2::ffi::ArrowArrayStream; use modelfox_id::Id; use modelfox_kill_chip::KillChip; use modelfox_progress_counter::ProgressCounter; @@ -35,12 +36,17 @@ use std::{ unreachable, }; +pub enum FileOrArrow { + File(std::path::PathBuf), + Arrow(*const ArrowArrayStream), +} + pub enum TrainingDataSource { Stdin, - File(std::path::PathBuf), + Train(FileOrArrow), TrainAndTest { - train: std::path::PathBuf, - test: std::path::PathBuf, + train: FileOrArrow, + test: FileOrArrow, }, } @@ -82,7 +88,7 @@ impl Trainer { target_column_name, handle_progress_event, )?), - TrainingDataSource::File(file_path) => Dataset::Train(load_and_shuffle_dataset_train( + TrainingDataSource::Train(file_path) => Dataset::Train(load_and_shuffle_dataset_train( &file_path, &config, target_column_name, @@ -729,25 +735,31 @@ fn load_and_shuffle_dataset_stdin( } fn load_and_shuffle_dataset_train( - file_path: &Path, + file_path: &FileOrArrow, config: &Config, target_column_name: &str, handle_progress_event: &mut dyn FnMut(ProgressEvent), ) -> Result { + let mut handle_progress_event_inner = |progress_event| { + handle_progress_event(ProgressEvent::Load(LoadProgressEvent::Train( + progress_event, + ))) + }; // Get the column types from the config, if set. - let mut table = Table::from_path( - file_path, - modelfox_table::FromCsvOptions { - column_types: column_types_from_config(config), - infer_options: Default::default(), - ..Default::default() - }, - &mut |progress_event| { - handle_progress_event(ProgressEvent::Load(LoadProgressEvent::Train( - progress_event, - ))) - }, - )?; + let mut table = match file_path { + FileOrArrow::File(file_path) => Table::from_path( + file_path, + modelfox_table::FromCsvOptions { + column_types: column_types_from_config(config), + infer_options: Default::default(), + ..Default::default() + }, + &mut handle_progress_event_inner, + )?, + FileOrArrow::Arrow(stream_ptr) => { + Table::from_arrow(*stream_ptr, &mut handle_progress_event_inner)? + } + }; // Drop any rows with invalid data in the target column drop_invalid_target_rows(&mut table, target_column_name, handle_progress_event); // Shuffle the table if enabled. @@ -761,27 +773,33 @@ fn load_and_shuffle_dataset_train( } fn load_and_shuffle_dataset_train_and_test( - file_path_train: &Path, - file_path_test: &Path, + file_path_train: &FileOrArrow, + file_path_test: &FileOrArrow, config: &Config, target_column_name: &str, handle_progress_event: &mut dyn FnMut(ProgressEvent), ) -> Result { + let mut handle_progress_event_inner = |progress_event| { + handle_progress_event(ProgressEvent::Load(LoadProgressEvent::Train( + progress_event, + ))) + }; // Get the column types from the config, if set. let column_types = column_types_from_config(config); - let mut table_train = Table::from_path( - file_path_train, - modelfox_table::FromCsvOptions { - column_types, - infer_options: Default::default(), - ..Default::default() - }, - &mut |progress_event| { - handle_progress_event(ProgressEvent::Load(LoadProgressEvent::Train( - progress_event, - ))) - }, - )?; + let mut table_train = match file_path_train { + FileOrArrow::File(file_path_train) => Table::from_path( + file_path_train, + modelfox_table::FromCsvOptions { + column_types, + infer_options: Default::default(), + ..Default::default() + }, + &mut handle_progress_event_inner, + )?, + FileOrArrow::Arrow(stream_ptr_train) => { + Table::from_arrow(*stream_ptr_train, &mut handle_progress_event_inner)? + } + }; // Force the column types for table_test to be the same as table_train. let column_types = table_train .columns() @@ -802,17 +820,20 @@ fn load_and_shuffle_dataset_train_and_test( TableColumn::Text(column) => (column.name().to_owned().unwrap(), TableColumnType::Text), }) .collect(); - let mut table_test = Table::from_path( - file_path_test, - modelfox_table::FromCsvOptions { - column_types: Some(column_types), - infer_options: Default::default(), - ..Default::default() - }, - &mut |progress_event| { - handle_progress_event(ProgressEvent::Load(LoadProgressEvent::Test(progress_event))) - }, - )?; + let mut table_test = match file_path_test { + FileOrArrow::File(file_path_test) => Table::from_path( + file_path_test, + modelfox_table::FromCsvOptions { + column_types: Some(column_types), + infer_options: Default::default(), + ..Default::default() + }, + &mut handle_progress_event_inner, + )?, + FileOrArrow::Arrow(stream_ptr_test) => { + Table::from_arrow(*stream_ptr_test, &mut handle_progress_event_inner)? + } + }; if table_train.columns().len() != table_test.columns().len() { bail!("Training data and test data must contain the same number of columns.") } diff --git a/crates/table/Cargo.toml b/crates/table/Cargo.toml index aeee218e..796bfa7b 100644 --- a/crates/table/Cargo.toml +++ b/crates/table/Cargo.toml @@ -21,6 +21,7 @@ insta = { workspace = true } [dependencies] anyhow = { workspace = true } +arrow2 = { workspace = true } csv = { workspace = true } fast-float = { workspace = true } fnv = { workspace = true } diff --git a/crates/table/load.rs b/crates/table/load.rs index efc5f896..ebe60e8c 100644 --- a/crates/table/load.rs +++ b/crates/table/load.rs @@ -1,5 +1,10 @@ use super::{Table, TableColumn, TableColumnType}; use anyhow::Result; +use arrow2::{ + array::{BooleanArray, PrimitiveArray, StructArray}, + datatypes::DataType, + ffi, +}; use modelfox_progress_counter::ProgressCounter; use modelfox_zip::zip; // NOTE - this import is actually used, false positive with the lint. @@ -8,6 +13,7 @@ use num::ToPrimitive; use std::{ collections::{BTreeMap, BTreeSet}, path::Path, + vec, }; #[derive(Clone)] @@ -244,6 +250,130 @@ impl Table { handle_progress_event(ProgressEvent::LoadDone); Ok(table) } + + pub fn from_arrow( + stream_ptr: *const ffi::ArrowArrayStream, + handle_progress_event: &mut impl FnMut(ProgressEvent), + ) -> Result { + let stream = unsafe { Box::from_raw(stream_ptr as *mut ffi::ArrowArrayStream) }; + + // copy fields out from stream reader + let mut iter = unsafe { ffi::ArrowArrayStreamReader::try_new(stream) }?; + let mut all_values = vec![]; + let mut column_names = vec![]; + let mut column_types = vec![]; + + while let Some(array) = unsafe { iter.next() } { + let array = array.unwrap(); + let array = array.as_any().downcast_ref::().unwrap(); + let (fields, values, _) = array.clone().into_data(); + + for (field, value) in zip!(fields, values) { + let column_name = field.name.clone(); + let column_type = match field.data_type { + DataType::Int8 + | DataType::Int16 + | DataType::Int32 + | DataType::Int64 + | DataType::UInt8 + | DataType::UInt16 + | DataType::UInt32 + | DataType::UInt64 + | DataType::Float16 + | DataType::Float32 + | DataType::Float64 + | DataType::Boolean => TableColumnType::Number, + DataType::Utf8 => { + let mut uniques = BTreeSet::new(); + if let Some(value) = value + .as_any() + .downcast_ref::>() + { + uniques + .extend(value.values_iter().map(std::string::ToString::to_string)); + } else if let Some(value) = value + .as_any() + .downcast_ref::>() + { + uniques + .extend(value.values_iter().map(std::string::ToString::to_string)); + } else { + unreachable!(); + } + let variants = uniques.into_iter().collect(); + TableColumnType::Enum { variants } + } + _ => TableColumnType::Unknown, + }; + + column_names.push(Some(column_name)); + column_types.push(column_type); + all_values.push(value); + } + } + std::mem::forget(iter); + handle_progress_event(ProgressEvent::InferDone); + + // write table data + let mut table = Table::new(column_names, column_types); + + for (column, value) in zip!(&mut table.columns, all_values) { + match column { + TableColumn::Unknown(_) => { + unreachable!(); + } + TableColumn::Number(column) => { + if let Some(value) = value.as_any().downcast_ref::>() { + column.data.extend(value.values_iter()); + } else if let Some(value) = value.as_any().downcast_ref::>() + { + column.data.extend(value.values_iter().map(|&x| x as f32)); + } else if let Some(value) = value.as_any().downcast_ref::>() + { + column.data.extend(value.values_iter().map(|&x| x as f32)); + } else if let Some(value) = value.as_any().downcast_ref::>() + { + column.data.extend(value.values_iter().map(|&x| x as f32)); + } else if let Some(value) = value.as_any().downcast_ref::() { + column + .data + .extend(value.values_iter().map(|x| i32::from(x) as f32)); + } else { + unreachable!(); + } + } + TableColumn::Enum(column) => { + if let Some(value) = value + .as_any() + .downcast_ref::>() + { + let mut v: Vec> = Vec::new(); + for s in value.values_iter() { + v.push(column.value_for_variant(s)); + } + column.data.extend(v); + } else if let Some(value) = value + .as_any() + .downcast_ref::>() + { + let mut v: Vec> = Vec::new(); + for s in value.values_iter() { + v.push(column.value_for_variant(s)); + } + column.data.extend(v); + } else { + unreachable!(); + } + } + TableColumn::Text(_column) => { + unreachable!(); + } + } + } + + handle_progress_event(ProgressEvent::LoadDone); + Ok(table) + } } #[derive(Clone, Debug)] diff --git a/crates/www/content/Cargo.toml b/crates/www/content/Cargo.toml index 13270231..ddef8a74 100644 --- a/crates/www/content/Cargo.toml +++ b/crates/www/content/Cargo.toml @@ -15,6 +15,7 @@ version = { workspace = true } [lib] path = "lib.rs" +doctest = false [dependencies] anyhow = { workspace = true } diff --git a/languages/python/Cargo.toml b/languages/python/Cargo.toml index 4d13de66..4db11b44 100644 --- a/languages/python/Cargo.toml +++ b/languages/python/Cargo.toml @@ -17,6 +17,7 @@ path = "lib.rs" [dependencies] anyhow = { workspace = true } +arrow2 = { workspace = true } chrono = { workspace = true } memmap = { workspace = true } pyo3 = { workspace = true } @@ -26,4 +27,6 @@ serde_json = { workspace = true } url = { workspace = true } modelfox_core = { workspace = true } -modelfox_model = { workspace = true } \ No newline at end of file +modelfox_model = { workspace = true } +modelfox_kill_chip = { workspace = true } +modelfox_id = { workspace = true } diff --git a/languages/python/Makefile b/languages/python/Makefile new file mode 100644 index 00000000..03873f27 --- /dev/null +++ b/languages/python/Makefile @@ -0,0 +1,12 @@ +.PHONY: test dev + +test: dev + .venv/bin/python examples/basic/train.py + +dev: .venv + cargo build -p modelfox_python + cp ../../target/debug/libmodelfox_python.so modelfox/modelfox_python.so + .venv/bin/pip install -e . + +.venv: + virtualenv .venv diff --git a/languages/python/examples/basic/heart_disease.csv b/languages/python/examples/basic/heart_disease.csv new file mode 100644 index 00000000..f3b713d2 --- /dev/null +++ b/languages/python/examples/basic/heart_disease.csv @@ -0,0 +1,304 @@ +age,gender,chest_pain,resting_blood_pressure,cholesterol,fasting_blood_sugar_greater_than_120,resting_ecg_result,exercise_max_heart_rate,exercise_induced_angina,exercise_st_depression,exercise_st_slope,fluoroscopy_vessels_colored,thallium_stress_test,diagnosis +63,male,typical angina,145,233,true,probable or definite left ventricular hypertrophy,150,no,2.3,downsloping,0,fixed defect,Negative +67,male,asymptomatic,160,286,false,probable or definite left ventricular hypertrophy,108,yes,1.5,flat,3,normal,Positive +67,male,asymptomatic,120,229,false,probable or definite left ventricular hypertrophy,129,yes,2.6,flat,2,reversible defect,Positive +37,male,non-angina pain,130,250,false,normal,187,no,3.5,downsloping,0,normal,Negative +41,female,atypical angina,130,204,false,probable or definite left ventricular hypertrophy,172,no,1.4,upsloping,0,normal,Negative +56,male,atypical angina,120,236,false,normal,178,no,0.8,upsloping,0,normal,Negative +62,female,asymptomatic,140,268,false,probable or definite left ventricular hypertrophy,160,no,3.6,downsloping,2,normal,Positive +57,female,asymptomatic,120,354,false,normal,163,yes,0.6,upsloping,0,normal,Negative +63,male,asymptomatic,130,254,false,probable or definite left ventricular hypertrophy,147,no,1.4,flat,1,reversible defect,Positive +53,male,asymptomatic,140,203,true,probable or definite left ventricular hypertrophy,155,yes,3.1,downsloping,0,reversible defect,Positive +57,male,asymptomatic,140,192,false,normal,148,no,0.4,flat,0,fixed defect,Negative +56,female,atypical angina,140,294,false,probable or definite left ventricular hypertrophy,153,no,1.3,flat,0,normal,Negative +56,male,non-angina pain,130,256,true,probable or definite left ventricular hypertrophy,142,yes,0.6,flat,1,fixed defect,Positive +44,male,atypical angina,120,263,false,normal,173,no,0,upsloping,0,reversible defect,Negative +52,male,non-angina pain,172,199,true,normal,162,no,0.5,upsloping,0,reversible defect,Negative +57,male,non-angina pain,150,168,false,normal,174,no,1.6,upsloping,0,normal,Negative +48,male,atypical angina,110,229,false,normal,168,no,1,downsloping,0,reversible defect,Positive +54,male,asymptomatic,140,239,false,normal,160,no,1.2,upsloping,0,normal,Negative +48,female,non-angina pain,130,275,false,normal,139,no,0.2,upsloping,0,normal,Negative +49,male,atypical angina,130,266,false,normal,171,no,0.6,upsloping,0,normal,Negative +64,male,typical angina,110,211,false,probable or definite left ventricular hypertrophy,144,yes,1.8,flat,0,normal,Negative +58,female,typical angina,150,283,true,probable or definite left ventricular hypertrophy,162,no,1,upsloping,0,normal,Negative +58,male,atypical angina,120,284,false,probable or definite left ventricular hypertrophy,160,no,1.8,flat,0,normal,Positive +58,male,non-angina pain,132,224,false,probable or definite left ventricular hypertrophy,173,no,3.2,upsloping,2,reversible defect,Positive +60,male,asymptomatic,130,206,false,probable or definite left ventricular hypertrophy,132,yes,2.4,flat,2,reversible defect,Positive +50,female,non-angina pain,120,219,false,normal,158,no,1.6,flat,0,normal,Negative +58,female,non-angina pain,120,340,false,normal,172,no,0,upsloping,0,normal,Negative +66,female,typical angina,150,226,false,normal,114,no,2.6,downsloping,0,normal,Negative +43,male,asymptomatic,150,247,false,normal,171,no,1.5,upsloping,0,normal,Negative +40,male,asymptomatic,110,167,false,probable or definite left ventricular hypertrophy,114,yes,2,flat,0,reversible defect,Positive +69,female,typical angina,140,239,false,normal,151,no,1.8,upsloping,2,normal,Negative +60,male,asymptomatic,117,230,true,normal,160,yes,1.4,upsloping,2,reversible defect,Positive +64,male,non-angina pain,140,335,false,normal,158,no,0,upsloping,0,normal,Positive +59,male,asymptomatic,135,234,false,normal,161,no,0.5,flat,0,reversible defect,Negative +44,male,non-angina pain,130,233,false,normal,179,yes,0.4,upsloping,0,normal,Negative +42,male,asymptomatic,140,226,false,normal,178,no,0,upsloping,0,normal,Negative +43,male,asymptomatic,120,177,false,probable or definite left ventricular hypertrophy,120,yes,2.5,flat,0,reversible defect,Positive +57,male,asymptomatic,150,276,false,probable or definite left ventricular hypertrophy,112,yes,0.6,flat,1,fixed defect,Positive +55,male,asymptomatic,132,353,false,normal,132,yes,1.2,flat,1,reversible defect,Positive +61,male,non-angina pain,150,243,true,normal,137,yes,1,flat,0,normal,Negative +65,female,asymptomatic,150,225,false,probable or definite left ventricular hypertrophy,114,no,1,flat,3,reversible defect,Positive +40,male,typical angina,140,199,false,normal,178,yes,1.4,upsloping,0,reversible defect,Negative +71,female,atypical angina,160,302,false,normal,162,no,0.4,upsloping,2,normal,Negative +59,male,non-angina pain,150,212,true,normal,157,no,1.6,upsloping,0,normal,Negative +61,female,asymptomatic,130,330,false,probable or definite left ventricular hypertrophy,169,no,0,upsloping,0,normal,Positive +58,male,non-angina pain,112,230,false,probable or definite left ventricular hypertrophy,165,no,2.5,flat,1,reversible defect,Positive +51,male,non-angina pain,110,175,false,normal,123,no,0.6,upsloping,0,normal,Negative +50,male,asymptomatic,150,243,false,probable or definite left ventricular hypertrophy,128,no,2.6,flat,0,reversible defect,Positive +65,female,non-angina pain,140,417,true,probable or definite left ventricular hypertrophy,157,no,0.8,upsloping,1,normal,Negative +53,male,non-angina pain,130,197,true,probable or definite left ventricular hypertrophy,152,no,1.2,downsloping,0,normal,Negative +41,female,atypical angina,105,198,false,normal,168,no,0,upsloping,1,normal,Negative +65,male,asymptomatic,120,177,false,normal,140,no,0.4,upsloping,0,reversible defect,Negative +44,male,asymptomatic,112,290,false,probable or definite left ventricular hypertrophy,153,no,0,upsloping,1,normal,Positive +44,male,atypical angina,130,219,false,probable or definite left ventricular hypertrophy,188,no,0,upsloping,0,normal,Negative +60,male,asymptomatic,130,253,false,normal,144,yes,1.4,upsloping,1,reversible defect,Positive +54,male,asymptomatic,124,266,false,probable or definite left ventricular hypertrophy,109,yes,2.2,flat,1,reversible defect,Positive +50,male,non-angina pain,140,233,false,normal,163,no,0.6,flat,1,reversible defect,Positive +41,male,asymptomatic,110,172,false,probable or definite left ventricular hypertrophy,158,no,0,upsloping,0,reversible defect,Positive +54,male,non-angina pain,125,273,false,probable or definite left ventricular hypertrophy,152,no,0.5,downsloping,1,normal,Negative +51,male,typical angina,125,213,false,probable or definite left ventricular hypertrophy,125,yes,1.4,upsloping,1,normal,Negative +51,female,asymptomatic,130,305,false,normal,142,yes,1.2,flat,0,reversible defect,Positive +46,female,non-angina pain,142,177,false,probable or definite left ventricular hypertrophy,160,yes,1.4,downsloping,0,normal,Negative +58,male,asymptomatic,128,216,false,probable or definite left ventricular hypertrophy,131,yes,2.2,flat,3,reversible defect,Positive +54,female,non-angina pain,135,304,true,normal,170,no,0,upsloping,0,normal,Negative +54,male,asymptomatic,120,188,false,normal,113,no,1.4,flat,1,reversible defect,Positive +60,male,asymptomatic,145,282,false,probable or definite left ventricular hypertrophy,142,yes,2.8,flat,2,reversible defect,Positive +60,male,non-angina pain,140,185,false,probable or definite left ventricular hypertrophy,155,no,3,flat,0,normal,Positive +54,male,non-angina pain,150,232,false,probable or definite left ventricular hypertrophy,165,no,1.6,upsloping,0,reversible defect,Negative +59,male,asymptomatic,170,326,false,probable or definite left ventricular hypertrophy,140,yes,3.4,downsloping,0,reversible defect,Positive +46,male,non-angina pain,150,231,false,normal,147,no,3.6,flat,0,normal,Positive +65,female,non-angina pain,155,269,false,normal,148,no,0.8,upsloping,0,normal,Negative +67,male,asymptomatic,125,254,true,normal,163,no,0.2,flat,2,reversible defect,Positive +62,male,asymptomatic,120,267,false,normal,99,yes,1.8,flat,2,reversible defect,Positive +65,male,asymptomatic,110,248,false,probable or definite left ventricular hypertrophy,158,no,0.6,upsloping,2,fixed defect,Positive +44,male,asymptomatic,110,197,false,probable or definite left ventricular hypertrophy,177,no,0,upsloping,1,normal,Positive +65,female,non-angina pain,160,360,false,probable or definite left ventricular hypertrophy,151,no,0.8,upsloping,0,normal,Negative +60,male,asymptomatic,125,258,false,probable or definite left ventricular hypertrophy,141,yes,2.8,flat,1,reversible defect,Positive +51,female,non-angina pain,140,308,false,probable or definite left ventricular hypertrophy,142,no,1.5,upsloping,1,normal,Negative +48,male,atypical angina,130,245,false,probable or definite left ventricular hypertrophy,180,no,0.2,flat,0,normal,Negative +58,male,asymptomatic,150,270,false,probable or definite left ventricular hypertrophy,111,yes,0.8,upsloping,0,reversible defect,Positive +45,male,asymptomatic,104,208,false,probable or definite left ventricular hypertrophy,148,yes,3,flat,0,normal,Negative +53,female,asymptomatic,130,264,false,probable or definite left ventricular hypertrophy,143,no,0.4,flat,0,normal,Negative +39,male,non-angina pain,140,321,false,probable or definite left ventricular hypertrophy,182,no,0,upsloping,0,normal,Negative +68,male,non-angina pain,180,274,true,probable or definite left ventricular hypertrophy,150,yes,1.6,flat,0,reversible defect,Positive +52,male,atypical angina,120,325,false,normal,172,no,0.2,upsloping,0,normal,Negative +44,male,non-angina pain,140,235,false,probable or definite left ventricular hypertrophy,180,no,0,upsloping,0,normal,Negative +47,male,non-angina pain,138,257,false,probable or definite left ventricular hypertrophy,156,no,0,upsloping,0,normal,Negative +53,female,non-angina pain,128,216,false,probable or definite left ventricular hypertrophy,115,no,0,upsloping,0,,Negative +53,female,asymptomatic,138,234,false,probable or definite left ventricular hypertrophy,160,no,0,upsloping,0,normal,Negative +51,female,non-angina pain,130,256,false,probable or definite left ventricular hypertrophy,149,no,0.5,upsloping,0,normal,Negative +66,male,asymptomatic,120,302,false,probable or definite left ventricular hypertrophy,151,no,0.4,flat,0,normal,Negative +62,female,asymptomatic,160,164,false,probable or definite left ventricular hypertrophy,145,no,6.2,downsloping,3,reversible defect,Positive +62,male,non-angina pain,130,231,false,normal,146,no,1.8,flat,3,reversible defect,Negative +44,female,non-angina pain,108,141,false,normal,175,no,0.6,flat,0,normal,Negative +63,female,non-angina pain,135,252,false,probable or definite left ventricular hypertrophy,172,no,0,upsloping,0,normal,Negative +52,male,asymptomatic,128,255,false,normal,161,yes,0,upsloping,1,reversible defect,Positive +59,male,asymptomatic,110,239,false,probable or definite left ventricular hypertrophy,142,yes,1.2,flat,1,reversible defect,Positive +60,female,asymptomatic,150,258,false,probable or definite left ventricular hypertrophy,157,no,2.6,flat,2,reversible defect,Positive +52,male,atypical angina,134,201,false,normal,158,no,0.8,upsloping,1,normal,Negative +48,male,asymptomatic,122,222,false,probable or definite left ventricular hypertrophy,186,no,0,upsloping,0,normal,Negative +45,male,asymptomatic,115,260,false,probable or definite left ventricular hypertrophy,185,no,0,upsloping,0,normal,Negative +34,male,typical angina,118,182,false,probable or definite left ventricular hypertrophy,174,no,0,upsloping,0,normal,Negative +57,female,asymptomatic,128,303,false,probable or definite left ventricular hypertrophy,159,no,0,upsloping,1,normal,Negative +71,female,non-angina pain,110,265,true,probable or definite left ventricular hypertrophy,130,no,0,upsloping,1,normal,Negative +49,male,non-angina pain,120,188,false,normal,139,no,2,flat,3,reversible defect,Positive +54,male,atypical angina,108,309,false,normal,156,no,0,upsloping,0,reversible defect,Negative +59,male,asymptomatic,140,177,false,normal,162,yes,0,upsloping,1,reversible defect,Positive +57,male,non-angina pain,128,229,false,probable or definite left ventricular hypertrophy,150,no,0.4,flat,1,reversible defect,Positive +61,male,asymptomatic,120,260,false,normal,140,yes,3.6,flat,1,reversible defect,Positive +39,male,asymptomatic,118,219,false,normal,140,no,1.2,flat,0,reversible defect,Positive +61,female,asymptomatic,145,307,false,probable or definite left ventricular hypertrophy,146,yes,1,flat,0,reversible defect,Positive +56,male,asymptomatic,125,249,true,probable or definite left ventricular hypertrophy,144,yes,1.2,flat,1,normal,Positive +52,male,typical angina,118,186,false,probable or definite left ventricular hypertrophy,190,no,0,flat,0,fixed defect,Negative +43,female,asymptomatic,132,341,true,probable or definite left ventricular hypertrophy,136,yes,3,flat,0,reversible defect,Positive +62,female,non-angina pain,130,263,false,normal,97,no,1.2,flat,1,reversible defect,Positive +41,male,atypical angina,135,203,false,normal,132,no,0,flat,0,fixed defect,Negative +58,male,non-angina pain,140,211,true,probable or definite left ventricular hypertrophy,165,no,0,upsloping,0,normal,Negative +35,female,asymptomatic,138,183,false,normal,182,no,1.4,upsloping,0,normal,Negative +63,male,asymptomatic,130,330,true,probable or definite left ventricular hypertrophy,132,yes,1.8,upsloping,3,reversible defect,Positive +65,male,asymptomatic,135,254,false,probable or definite left ventricular hypertrophy,127,no,2.8,flat,1,reversible defect,Positive +48,male,asymptomatic,130,256,true,probable or definite left ventricular hypertrophy,150,yes,0,upsloping,2,reversible defect,Positive +63,female,asymptomatic,150,407,false,probable or definite left ventricular hypertrophy,154,no,4,flat,3,reversible defect,Positive +51,male,non-angina pain,100,222,false,normal,143,yes,1.2,flat,0,normal,Negative +55,male,asymptomatic,140,217,false,normal,111,yes,5.6,downsloping,0,reversible defect,Positive +65,male,typical angina,138,282,true,probable or definite left ventricular hypertrophy,174,no,1.4,flat,1,normal,Positive +45,female,atypical angina,130,234,false,probable or definite left ventricular hypertrophy,175,no,0.6,flat,0,normal,Negative +56,female,asymptomatic,200,288,true,probable or definite left ventricular hypertrophy,133,yes,4,downsloping,2,reversible defect,Positive +54,male,asymptomatic,110,239,false,normal,126,yes,2.8,flat,1,reversible defect,Positive +44,male,atypical angina,120,220,false,normal,170,no,0,upsloping,0,normal,Negative +62,female,asymptomatic,124,209,false,normal,163,no,0,upsloping,0,normal,Negative +54,male,non-angina pain,120,258,false,probable or definite left ventricular hypertrophy,147,no,0.4,flat,0,reversible defect,Negative +51,male,non-angina pain,94,227,false,normal,154,yes,0,upsloping,1,reversible defect,Negative +29,male,atypical angina,130,204,false,probable or definite left ventricular hypertrophy,202,no,0,upsloping,0,normal,Negative +51,male,asymptomatic,140,261,false,probable or definite left ventricular hypertrophy,186,yes,0,upsloping,0,normal,Negative +43,female,non-angina pain,122,213,false,normal,165,no,0.2,flat,0,normal,Negative +55,female,atypical angina,135,250,false,probable or definite left ventricular hypertrophy,161,no,1.4,flat,0,normal,Negative +70,male,asymptomatic,145,174,false,normal,125,yes,2.6,downsloping,0,reversible defect,Positive +62,male,atypical angina,120,281,false,probable or definite left ventricular hypertrophy,103,no,1.4,flat,1,reversible defect,Positive +35,male,asymptomatic,120,198,false,normal,130,yes,1.6,flat,0,reversible defect,Positive +51,male,non-angina pain,125,245,true,probable or definite left ventricular hypertrophy,166,no,2.4,flat,0,normal,Negative +59,male,atypical angina,140,221,false,normal,164,yes,0,upsloping,0,normal,Negative +59,male,typical angina,170,288,false,probable or definite left ventricular hypertrophy,159,no,0.2,flat,0,reversible defect,Positive +52,male,atypical angina,128,205,true,normal,184,no,0,upsloping,0,normal,Negative +64,male,non-angina pain,125,309,false,normal,131,yes,1.8,flat,0,reversible defect,Positive +58,male,non-angina pain,105,240,false,probable or definite left ventricular hypertrophy,154,yes,0.6,flat,0,reversible defect,Negative +47,male,non-angina pain,108,243,false,normal,152,no,0,upsloping,0,normal,Positive +57,male,asymptomatic,165,289,true,probable or definite left ventricular hypertrophy,124,no,1,flat,3,reversible defect,Positive +41,male,non-angina pain,112,250,false,normal,179,no,0,upsloping,0,normal,Negative +45,male,atypical angina,128,308,false,probable or definite left ventricular hypertrophy,170,no,0,upsloping,0,normal,Negative +60,female,non-angina pain,102,318,false,normal,160,no,0,upsloping,1,normal,Negative +52,male,typical angina,152,298,true,normal,178,no,1.2,flat,0,reversible defect,Negative +42,female,asymptomatic,102,265,false,probable or definite left ventricular hypertrophy,122,no,0.6,flat,0,normal,Negative +67,female,non-angina pain,115,564,false,probable or definite left ventricular hypertrophy,160,no,1.6,flat,0,reversible defect,Negative +55,male,asymptomatic,160,289,false,probable or definite left ventricular hypertrophy,145,yes,0.8,flat,1,reversible defect,Positive +64,male,asymptomatic,120,246,false,probable or definite left ventricular hypertrophy,96,yes,2.2,downsloping,1,normal,Positive +70,male,asymptomatic,130,322,false,probable or definite left ventricular hypertrophy,109,no,2.4,flat,3,normal,Positive +51,male,asymptomatic,140,299,false,normal,173,yes,1.6,upsloping,0,reversible defect,Positive +58,male,asymptomatic,125,300,false,probable or definite left ventricular hypertrophy,171,no,0,upsloping,2,reversible defect,Positive +60,male,asymptomatic,140,293,false,probable or definite left ventricular hypertrophy,170,no,1.2,flat,2,reversible defect,Positive +68,male,non-angina pain,118,277,false,normal,151,no,1,upsloping,1,reversible defect,Negative +46,male,atypical angina,101,197,true,normal,156,no,0,upsloping,0,reversible defect,Negative +77,male,asymptomatic,125,304,false,probable or definite left ventricular hypertrophy,162,yes,0,upsloping,3,normal,Positive +54,female,non-angina pain,110,214,false,normal,158,no,1.6,flat,0,normal,Negative +58,female,asymptomatic,100,248,false,probable or definite left ventricular hypertrophy,122,no,1,flat,0,normal,Negative +48,male,non-angina pain,124,255,true,normal,175,no,0,upsloping,2,normal,Negative +57,male,asymptomatic,132,207,false,normal,168,yes,0,upsloping,0,reversible defect,Negative +52,male,non-angina pain,138,223,false,normal,169,no,0,upsloping,,normal,Negative +54,female,atypical angina,132,288,true,probable or definite left ventricular hypertrophy,159,yes,0,upsloping,1,normal,Negative +35,male,asymptomatic,126,282,false,probable or definite left ventricular hypertrophy,156,yes,0,upsloping,0,reversible defect,Positive +45,female,atypical angina,112,160,false,normal,138,no,0,flat,0,normal,Negative +70,male,non-angina pain,160,269,false,normal,112,yes,2.9,flat,1,reversible defect,Positive +53,male,asymptomatic,142,226,false,probable or definite left ventricular hypertrophy,111,yes,0,upsloping,0,reversible defect,Negative +59,female,asymptomatic,174,249,false,normal,143,yes,0,flat,0,normal,Positive +62,female,asymptomatic,140,394,false,probable or definite left ventricular hypertrophy,157,no,1.2,flat,0,normal,Negative +64,male,asymptomatic,145,212,false,probable or definite left ventricular hypertrophy,132,no,2,flat,2,fixed defect,Positive +57,male,asymptomatic,152,274,false,normal,88,yes,1.2,flat,1,reversible defect,Positive +52,male,asymptomatic,108,233,true,normal,147,no,0.1,upsloping,3,reversible defect,Negative +56,male,asymptomatic,132,184,false,probable or definite left ventricular hypertrophy,105,yes,2.1,flat,1,fixed defect,Positive +43,male,non-angina pain,130,315,false,normal,162,no,1.9,upsloping,1,normal,Negative +53,male,non-angina pain,130,246,true,probable or definite left ventricular hypertrophy,173,no,0,upsloping,3,normal,Negative +48,male,asymptomatic,124,274,false,probable or definite left ventricular hypertrophy,166,no,0.5,flat,0,reversible defect,Positive +56,female,asymptomatic,134,409,false,probable or definite left ventricular hypertrophy,150,yes,1.9,flat,2,reversible defect,Positive +42,male,typical angina,148,244,false,probable or definite left ventricular hypertrophy,178,no,0.8,upsloping,2,normal,Negative +59,male,typical angina,178,270,false,probable or definite left ventricular hypertrophy,145,no,4.2,downsloping,0,reversible defect,Negative +60,female,asymptomatic,158,305,false,probable or definite left ventricular hypertrophy,161,no,0,upsloping,0,normal,Positive +63,female,atypical angina,140,195,false,normal,179,no,0,upsloping,2,normal,Negative +42,male,non-angina pain,120,240,true,normal,194,no,0.8,downsloping,0,reversible defect,Negative +66,male,atypical angina,160,246,false,normal,120,yes,0,flat,3,fixed defect,Positive +54,male,atypical angina,192,283,false,probable or definite left ventricular hypertrophy,195,no,0,upsloping,1,reversible defect,Positive +69,male,non-angina pain,140,254,false,probable or definite left ventricular hypertrophy,146,no,2,flat,3,reversible defect,Positive +50,male,non-angina pain,129,196,false,normal,163,no,0,upsloping,0,normal,Negative +51,male,asymptomatic,140,298,false,normal,122,yes,4.2,flat,3,reversible defect,Positive +43,male,asymptomatic,132,247,true,probable or definite left ventricular hypertrophy,143,yes,0.1,flat,,reversible defect,Positive +62,female,asymptomatic,138,294,true,normal,106,no,1.9,flat,3,normal,Positive +68,female,non-angina pain,120,211,false,probable or definite left ventricular hypertrophy,115,no,1.5,flat,0,normal,Negative +67,male,asymptomatic,100,299,false,probable or definite left ventricular hypertrophy,125,yes,0.9,flat,2,normal,Positive +69,male,typical angina,160,234,true,probable or definite left ventricular hypertrophy,131,no,0.1,flat,1,normal,Negative +45,female,asymptomatic,138,236,false,probable or definite left ventricular hypertrophy,152,yes,0.2,flat,0,normal,Negative +50,female,atypical angina,120,244,false,normal,162,no,1.1,upsloping,0,normal,Negative +59,male,typical angina,160,273,false,probable or definite left ventricular hypertrophy,125,no,0,upsloping,0,normal,Positive +50,female,asymptomatic,110,254,false,probable or definite left ventricular hypertrophy,159,no,0,upsloping,0,normal,Negative +64,female,asymptomatic,180,325,false,normal,154,yes,0,upsloping,0,normal,Negative +57,male,non-angina pain,150,126,true,normal,173,no,0.2,upsloping,1,reversible defect,Negative +64,female,non-angina pain,140,313,false,normal,133,no,0.2,upsloping,0,reversible defect,Negative +43,male,asymptomatic,110,211,false,normal,161,no,0,upsloping,0,reversible defect,Negative +45,male,asymptomatic,142,309,false,probable or definite left ventricular hypertrophy,147,yes,0,flat,3,reversible defect,Positive +58,male,asymptomatic,128,259,false,probable or definite left ventricular hypertrophy,130,yes,3,flat,2,reversible defect,Positive +50,male,asymptomatic,144,200,false,probable or definite left ventricular hypertrophy,126,yes,0.9,flat,0,reversible defect,Positive +55,male,atypical angina,130,262,false,normal,155,no,0,upsloping,0,normal,Negative +62,female,asymptomatic,150,244,false,normal,154,yes,1.4,flat,0,normal,Positive +37,female,non-angina pain,120,215,false,normal,170,no,0,upsloping,0,normal,Negative +38,male,typical angina,120,231,false,normal,182,yes,3.8,flat,0,reversible defect,Positive +41,male,non-angina pain,130,214,false,probable or definite left ventricular hypertrophy,168,no,2,flat,0,normal,Negative +66,female,asymptomatic,178,228,true,normal,165,yes,1,flat,2,reversible defect,Positive +52,male,asymptomatic,112,230,false,normal,160,no,0,upsloping,1,normal,Positive +56,male,typical angina,120,193,false,probable or definite left ventricular hypertrophy,162,no,1.9,flat,0,reversible defect,Negative +46,female,atypical angina,105,204,false,normal,172,no,0,upsloping,0,normal,Negative +46,female,asymptomatic,138,243,false,probable or definite left ventricular hypertrophy,152,yes,0,flat,0,normal,Negative +64,female,asymptomatic,130,303,false,normal,122,no,2,flat,2,normal,Negative +59,male,asymptomatic,138,271,false,probable or definite left ventricular hypertrophy,182,no,0,upsloping,0,normal,Negative +41,female,non-angina pain,112,268,false,probable or definite left ventricular hypertrophy,172,yes,0,upsloping,0,normal,Negative +54,female,non-angina pain,108,267,false,probable or definite left ventricular hypertrophy,167,no,0,upsloping,0,normal,Negative +39,female,non-angina pain,94,199,false,normal,179,no,0,upsloping,0,normal,Negative +53,male,asymptomatic,123,282,false,normal,95,yes,2,flat,2,reversible defect,Positive +63,female,asymptomatic,108,269,false,normal,169,yes,1.8,flat,2,normal,Positive +34,female,atypical angina,118,210,false,normal,192,no,0.7,upsloping,0,normal,Negative +47,male,asymptomatic,112,204,false,normal,143,no,0.1,upsloping,0,normal,Negative +67,female,non-angina pain,152,277,false,normal,172,no,0,upsloping,1,normal,Negative +54,male,asymptomatic,110,206,false,probable or definite left ventricular hypertrophy,108,yes,0,flat,1,normal,Positive +66,male,asymptomatic,112,212,false,probable or definite left ventricular hypertrophy,132,yes,0.1,upsloping,1,normal,Positive +52,female,non-angina pain,136,196,false,probable or definite left ventricular hypertrophy,169,no,0.1,flat,0,normal,Negative +55,female,asymptomatic,180,327,false,ST-T wave abnormality,117,yes,3.4,flat,0,normal,Positive +49,male,non-angina pain,118,149,false,probable or definite left ventricular hypertrophy,126,no,0.8,upsloping,3,normal,Positive +74,female,atypical angina,120,269,false,probable or definite left ventricular hypertrophy,121,yes,0.2,upsloping,1,normal,Negative +54,female,non-angina pain,160,201,false,normal,163,no,0,upsloping,1,normal,Negative +54,male,asymptomatic,122,286,false,probable or definite left ventricular hypertrophy,116,yes,3.2,flat,2,normal,Positive +56,male,asymptomatic,130,283,true,probable or definite left ventricular hypertrophy,103,yes,1.6,downsloping,0,reversible defect,Positive +46,male,asymptomatic,120,249,false,probable or definite left ventricular hypertrophy,144,no,0.8,upsloping,0,reversible defect,Positive +49,female,atypical angina,134,271,false,normal,162,no,0,flat,0,normal,Negative +42,male,atypical angina,120,295,false,normal,162,no,0,upsloping,0,normal,Negative +41,male,atypical angina,110,235,false,normal,153,no,0,upsloping,0,normal,Negative +41,female,atypical angina,126,306,false,normal,163,no,0,upsloping,0,normal,Negative +49,female,asymptomatic,130,269,false,normal,163,no,0,upsloping,0,normal,Negative +61,male,typical angina,134,234,false,normal,145,no,2.6,flat,2,normal,Positive +60,female,non-angina pain,120,178,true,normal,96,no,0,upsloping,0,normal,Negative +67,male,asymptomatic,120,237,false,normal,71,no,1,flat,0,normal,Positive +58,male,asymptomatic,100,234,false,normal,156,no,0.1,upsloping,1,reversible defect,Positive +47,male,asymptomatic,110,275,false,probable or definite left ventricular hypertrophy,118,yes,1,flat,1,normal,Positive +52,male,asymptomatic,125,212,false,normal,168,no,1,upsloping,2,reversible defect,Positive +62,male,atypical angina,128,208,true,probable or definite left ventricular hypertrophy,140,no,0,upsloping,0,normal,Negative +57,male,asymptomatic,110,201,false,normal,126,yes,1.5,flat,0,fixed defect,Negative +58,male,asymptomatic,146,218,false,normal,105,no,2,flat,1,reversible defect,Positive +64,male,asymptomatic,128,263,false,normal,105,yes,0.2,flat,1,reversible defect,Negative +51,female,non-angina pain,120,295,false,probable or definite left ventricular hypertrophy,157,no,0.6,upsloping,0,normal,Negative +43,male,asymptomatic,115,303,false,normal,181,no,1.2,flat,0,normal,Negative +42,female,non-angina pain,120,209,false,normal,173,no,0,flat,0,normal,Negative +67,female,asymptomatic,106,223,false,normal,142,no,0.3,upsloping,2,normal,Negative +76,female,non-angina pain,140,197,false,ST-T wave abnormality,116,no,1.1,flat,0,normal,Negative +70,male,atypical angina,156,245,false,probable or definite left ventricular hypertrophy,143,no,0,upsloping,0,normal,Negative +57,male,atypical angina,124,261,false,normal,141,no,0.3,upsloping,0,reversible defect,Positive +44,female,non-angina pain,118,242,false,normal,149,no,0.3,flat,1,normal,Negative +58,female,atypical angina,136,319,true,probable or definite left ventricular hypertrophy,152,no,0,upsloping,2,normal,Positive +60,female,typical angina,150,240,false,normal,171,no,0.9,upsloping,0,normal,Negative +44,male,non-angina pain,120,226,false,normal,169,no,0,upsloping,0,normal,Negative +61,male,asymptomatic,138,166,false,probable or definite left ventricular hypertrophy,125,yes,3.6,flat,1,normal,Positive +42,male,asymptomatic,136,315,false,normal,125,yes,1.8,flat,0,fixed defect,Positive +52,male,asymptomatic,128,204,true,normal,156,yes,1,flat,0,,Positive +59,male,non-angina pain,126,218,true,normal,134,no,2.2,flat,1,fixed defect,Positive +40,male,asymptomatic,152,223,false,normal,181,no,0,upsloping,0,reversible defect,Positive +42,male,non-angina pain,130,180,false,normal,150,no,0,upsloping,0,normal,Negative +61,male,asymptomatic,140,207,false,probable or definite left ventricular hypertrophy,138,yes,1.9,upsloping,1,reversible defect,Positive +66,male,asymptomatic,160,228,false,probable or definite left ventricular hypertrophy,138,no,2.3,upsloping,0,fixed defect,Negative +46,male,asymptomatic,140,311,false,normal,120,yes,1.8,flat,2,reversible defect,Positive +71,female,asymptomatic,112,149,false,normal,125,no,1.6,flat,0,normal,Negative +59,male,typical angina,134,204,false,normal,162,no,0.8,upsloping,2,normal,Positive +64,male,typical angina,170,227,false,probable or definite left ventricular hypertrophy,155,no,0.6,flat,0,reversible defect,Negative +66,female,non-angina pain,146,278,false,probable or definite left ventricular hypertrophy,152,no,0,flat,1,normal,Negative +39,female,non-angina pain,138,220,false,normal,152,no,0,flat,0,normal,Negative +57,male,atypical angina,154,232,false,probable or definite left ventricular hypertrophy,164,no,0,upsloping,1,normal,Positive +58,female,asymptomatic,130,197,false,normal,131,no,0.6,flat,0,normal,Negative +57,male,asymptomatic,110,335,false,normal,143,yes,3,flat,1,reversible defect,Positive +47,male,non-angina pain,130,253,false,normal,179,no,0,upsloping,0,normal,Negative +55,female,asymptomatic,128,205,false,ST-T wave abnormality,130,yes,2,flat,1,reversible defect,Positive +35,male,atypical angina,122,192,false,normal,174,no,0,upsloping,0,normal,Negative +61,male,asymptomatic,148,203,false,normal,161,no,0,upsloping,1,reversible defect,Positive +58,male,asymptomatic,114,318,false,ST-T wave abnormality,140,no,4.4,downsloping,3,fixed defect,Positive +58,female,asymptomatic,170,225,true,probable or definite left ventricular hypertrophy,146,yes,2.8,flat,2,fixed defect,Positive +58,male,atypical angina,125,220,false,normal,144,no,0.4,flat,,reversible defect,Negative +56,male,atypical angina,130,221,false,probable or definite left ventricular hypertrophy,163,no,0,upsloping,0,reversible defect,Negative +56,male,atypical angina,120,240,false,normal,169,no,0,downsloping,0,normal,Negative +67,male,non-angina pain,152,212,false,probable or definite left ventricular hypertrophy,150,no,0.8,flat,0,reversible defect,Positive +55,female,atypical angina,132,342,false,normal,166,no,1.2,upsloping,0,normal,Negative +44,male,asymptomatic,120,169,false,normal,144,yes,2.8,downsloping,0,fixed defect,Positive +63,male,asymptomatic,140,187,false,probable or definite left ventricular hypertrophy,144,yes,4,upsloping,2,reversible defect,Positive +63,female,asymptomatic,124,197,false,normal,136,yes,0,flat,0,normal,Positive +41,male,atypical angina,120,157,false,normal,182,no,0,upsloping,0,normal,Negative +59,male,asymptomatic,164,176,true,probable or definite left ventricular hypertrophy,90,no,1,flat,2,fixed defect,Positive +57,female,asymptomatic,140,241,false,normal,123,yes,0.2,flat,0,reversible defect,Positive +45,male,typical angina,110,264,false,normal,132,no,1.2,flat,0,reversible defect,Positive +68,male,asymptomatic,144,193,true,normal,141,no,3.4,flat,2,reversible defect,Positive +57,male,asymptomatic,130,131,false,normal,115,yes,1.2,flat,1,reversible defect,Positive +57,female,atypical angina,130,236,false,probable or definite left ventricular hypertrophy,174,no,0,flat,1,normal,Positive +38,male,non-angina pain,138,175,false,normal,173,no,0,upsloping,,normal,Negative diff --git a/languages/python/examples/basic/heart_disease.modelfox b/languages/python/examples/basic/heart_disease.modelfox new file mode 100644 index 00000000..f00975e6 Binary files /dev/null and b/languages/python/examples/basic/heart_disease.modelfox differ diff --git a/languages/python/examples/basic/main.py b/languages/python/examples/basic/main.py index 77bfd760..4707177e 100644 --- a/languages/python/examples/basic/main.py +++ b/languages/python/examples/basic/main.py @@ -1,7 +1,5 @@ import os -import json import modelfox -from typing import cast # Get the path to the .modelfox file. model_path = os.path.join(os.path.dirname(__file__), "heart_disease.modelfox") @@ -9,7 +7,7 @@ model = modelfox.Model.from_path(model_path) # Create an example input matching the schema of the CSV file the model was trained on. Here the data is just hard-coded, but in your application you will probably get this from a database or user input. -input = { +specimen = { "age": 63, "gender": "male", "chest_pain": "typical angina", @@ -26,7 +24,8 @@ } # Make the prediction! -output = model.predict(5) +output = model.predict(specimen) # Print the output. -print("Output:", output) +print("Output.class_name:", output.class_name) +print("Output.probability:", output.probability) diff --git a/languages/python/examples/basic/pyrightconfig.json b/languages/python/examples/basic/pyrightconfig.json new file mode 100644 index 00000000..d268e16a --- /dev/null +++ b/languages/python/examples/basic/pyrightconfig.json @@ -0,0 +1,5 @@ +{ + "venvPath": "../..", + "venv": ".venv" +} + diff --git a/languages/python/examples/basic/train.py b/languages/python/examples/basic/train.py new file mode 100644 index 00000000..4fbf4e79 --- /dev/null +++ b/languages/python/examples/basic/train.py @@ -0,0 +1,47 @@ +import os +import pyarrow as pa +from pyarrow.cffi import ffi as arrow_c +import pandas as pd +import modelfox + +# Get the path to the CSV file. +csv_path = os.path.join(os.path.dirname(__file__), "heart_disease.csv") +# Get the path to the .modelfox file. +model_path = os.path.join(os.path.dirname(__file__), "heart_disease.modelfox") + +# # Read the CSV file into a PyArrow. +df = pd.read_csv(csv_path) + +batch = pa.RecordBatch.from_pandas(df) +reader = pa.ipc.RecordBatchStreamReader.from_batches(batch.schema, [batch]) + +with arrow_c.new("struct ArrowArrayStream*") as c_stream: + c_stream_ptr = int(arrow_c.cast("uintptr_t", c_stream)) + reader._export_to_c(c_stream_ptr) + + # Train a model. + model = modelfox.Model.train(c_stream_ptr, "diagnosis", model_path) + +# Create an example input matching the schema of the CSV file the model was trained on. Here the data is just hard-coded, but in your application you will probably get this from a database or user input. +specimen = { + "age": 63, + "gender": "male", + "chest_pain": "typical angina", + "resting_blood_pressure": 145, + "cholesterol": 233, + "fasting_blood_sugar_greater_than_120": "true", + "resting_ecg_result": "probable or definite left ventricular hypertrophy", + "exercise_max_heart_rate": 150, + "exercise_induced_angina": "no", + "exercise_st_depression": 2.3, + "exercise_st_slope": "downsloping", + "fluoroscopy_vessels_colored": "0", + "thallium_stress_test": "fixed defect", +} + +# Make the prediction! +output = model.predict(specimen) + +# Print the output. +print("Output.class_name:", output.class_name) +print("Output.probability:", output.probability) diff --git a/languages/python/lib.rs b/languages/python/lib.rs index 014c2970..2b055214 100644 --- a/languages/python/lib.rs +++ b/languages/python/lib.rs @@ -1,7 +1,8 @@ use anyhow::anyhow; +use arrow2::ffi::ArrowArrayStream; use memmap::Mmap; use pyo3::{prelude::*, type_object::PyTypeObject, types::PyType}; -use std::collections::BTreeMap; +use std::{collections::BTreeMap, path::PathBuf}; use url::Url; #[pymodule] @@ -118,6 +119,70 @@ impl Model { self.model.id.clone() } + /** + Train a model! + + Args: + input (Union[List[`PredictInput`], `PredictInput`]): A predict input is either a single predict input which is a dict from strings to strings or floats or an array of such dicts. The keys should match the columns in the CSV file you trained your model with. + options (Optional[`PredictOptions`]): These are the predict options. + + Returns: + [Union[List[`PredictOutput`], `PredictOutput`]). Return a single output if `input` was a single input, or an array if `input` was an array of `input`s. + */ + #[classmethod] + #[args(input, target, output, config = "None")] + #[pyo3(text_signature = "(input, target, output, config=None)")] + pub fn train( + cls: &PyType, + input: Input, + target: String, + output: String, + config: Option, + ) -> PyResult { + let mut handle_progress_event = |_progress_event| {}; + let input = match input { + Input::Train(file) => modelfox_core::train::TrainingDataSource::Train(file.into()), + Input::TrainAndTest((file_train, file_test)) => { + modelfox_core::train::TrainingDataSource::TrainAndTest { + train: file_train.into(), + test: file_test.into(), + } + } + }; + // Load the dataset, compute stats, and prepare for training. + let mut trainer = modelfox_core::train::Trainer::prepare( + modelfox_id::Id::generate(), + input, + &target, + config.map(PathBuf::from).as_deref(), + &mut handle_progress_event, + ) + .map_err(ModelFoxError)?; + let kill_chip = modelfox_kill_chip::KillChip::new(); + let train_grid_item_outputs = trainer + .train_grid(&kill_chip, &mut handle_progress_event) + .map_err(ModelFoxError)?; + let model = trainer + .test_and_assemble_model(train_grid_item_outputs, &mut handle_progress_event) + .map_err(ModelFoxError)?; + + // Write the model to the output path. + let output_path = PathBuf::from(output.clone()); + model.to_path(&output_path).map_err(ModelFoxError)?; + + // Announce that everything worked! + eprintln!("Your model was written to {}.", output_path.display()); + eprintln!( + "For help making predictions in your code, read the docs at https://www.modelfox.dev/docs." + ); + eprintln!( + "To learn more about how your model works and set up production monitoring, run `modelfox app`." + ); + + // TODO: load the model more efficiently + Model::from_path(cls, output, None) + } + /** Make a prediction! @@ -306,6 +371,29 @@ impl LoadModelOptions { } } +#[derive(FromPyObject)] +enum FileOrArrow { + File(String), + Arrow(usize), +} + +#[derive(FromPyObject)] +enum Input { + Train(FileOrArrow), + TrainAndTest((FileOrArrow, FileOrArrow)), +} + +impl From for modelfox_core::train::FileOrArrow { + fn from(value: FileOrArrow) -> modelfox_core::train::FileOrArrow { + match value { + FileOrArrow::File(file) => modelfox_core::train::FileOrArrow::File(file.into()), + FileOrArrow::Arrow(stream_ptr) => { + modelfox_core::train::FileOrArrow::Arrow(stream_ptr as *const ArrowArrayStream) + } + } + } +} + #[derive(FromPyObject)] enum PredictInputSingleOrMultiple { Single(PredictInput), diff --git a/languages/python/modelfox/tangram_python.pyi b/languages/python/modelfox/tangram_python.pyi index b945044f..731ff469 100644 --- a/languages/python/modelfox/tangram_python.pyi +++ b/languages/python/modelfox/tangram_python.pyi @@ -1,6 +1,5 @@ from typing import ( Any, - cast, Dict, List, Literal, @@ -26,6 +25,14 @@ class Model: ) -> "Model": ... @property def id(self) -> str: ... + @classmethod + def train( + cls, + input: Union[str, Tuple[str, str]], + target: str, + output: str, + config: Optional[str] = None, + ) -> "Model": ... @overload def predict( self,