You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I would suggest adding dataset parameters to allow for handling categorical data.
See the suggested code below. Also note, that the polaris code has been updated to be compatible with the newer versions of polars.
pub fn from_mat(data: Vec<Vec<f64>>, label: Vec<f32>, dataset_parameters: Option<&Value>) -> Result<Self> {
let dataset_params = match dataset_parameters {
Some(v) => v.as_object()
.unwrap()
.iter()
.map(|(k, v)| format!("{}={}", k, v))
.collect::<Vec<_>>()
.join(" "),
None => "".to_string()
};
let dataset_params_cstring = CString::new(dataset_params).unwrap();
let data_length = data.len();
let feature_length = data[0].len();
let label_str = CString::new("label").unwrap();
let reference = std::ptr::null_mut(); // not use
let mut handle = std::ptr::null_mut();
let flat_data = data.into_iter().flatten().collect::<Vec<_>>();
lgbm_call!(lightgbm_sys::LGBM_DatasetCreateFromMat(
flat_data.as_ptr() as *const c_void,
lightgbm_sys::C_API_DTYPE_FLOAT64 as i32,
data_length as i32,
feature_length as i32,
1_i32,
dataset_params_cstring.as_ptr() as *const c_char,
reference,
&mut handle
))?;
lgbm_call!(lightgbm_sys::LGBM_DatasetSetField(
handle,
label_str.as_ptr() as *const c_char,
label.as_ptr() as *const c_void,
data_length as i32,
lightgbm_sys::C_API_DTYPE_FLOAT32 as i32
))?;
Ok(Self::new(handle))
}
pub fn from_dataframe(mut dataframe: DataFrame, label_column: String, dataset_parameters: Option<&Value>) -> Result<Self> {
let label_col_name = label_column.as_str();
let (m, n) = dataframe.shape();
let label_series = dataframe.column(label_col_name).unwrap().cast(&Float32Type::get_dtype()).unwrap();
if label_series.null_count() != 0 {
panic!("Cannot create a dataset with null values, encountered nulls when creating the label array")
}
dataframe.drop_in_place(label_col_name).unwrap();
let mut label_values = Vec::with_capacity(m);
let label_values_ca = label_series.unpack::<Float32Type>().unwrap();
label_values_ca
.into_no_null_iter()
.enumerate()
.for_each(|(_row_idx, val)| {
label_values.push(val);
});
let mut feature_values = Vec::with_capacity(m);
for _i in 0..m {
feature_values.push(Vec::with_capacity(n));
}
for (_col_idx, series) in dataframe.get_columns().iter().enumerate() {
if series.null_count() != 0 {
panic!("Cannot create a dataset with null values, encountered nulls when creating the features array")
}
let series = series.cast(&Float64Type::get_dtype()).unwrap();
let ca = series.unpack::<Float64Type>().unwrap();
ca.into_no_null_iter()
.enumerate()
.for_each(|(row_idx, val)| feature_values[row_idx].push(val));
}
Self::from_mat(feature_values, label_values, dataset_parameters)
}
}
The text was updated successfully, but these errors were encountered:
Hi!
I would suggest adding dataset parameters to allow for handling categorical data.
See the suggested code below. Also note, that the polaris code has been updated to be compatible with the newer versions of polars.
The text was updated successfully, but these errors were encountered: