From 024726873e0c1ec34bbe201de83364ef2730e2c3 Mon Sep 17 00:00:00 2001 From: Adebowale Daniel Date: Sun, 26 Nov 2023 16:21:51 -0500 Subject: [PATCH 1/7] Created new dataset --- data/raw.dvc | 6 +++--- datasets.py | 9 +++++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/data/raw.dvc b/data/raw.dvc index a6beb00d..f27b2414 100644 --- a/data/raw.dvc +++ b/data/raw.dvc @@ -1,6 +1,6 @@ outs: -- md5: eb021dc8fddb549a048a03e29e81bfbd.dir - size: 443655024 - nfiles: 386 +- md5: c033b32e0901a197d9f743c8329b24ae.dir + size: 443888649 + nfiles: 387 path: raw hash: md5 diff --git a/datasets.py b/datasets.py index 789c510a..36b5e828 100644 --- a/datasets.py +++ b/datasets.py @@ -1204,6 +1204,15 @@ def load_labels(self) -> pd.DataFrame: longitude_col="lon", filter_df=clean_ceo_data, ), + RawLabels( + filename="ceo-Senegal-March-2022---March-2023-Stratified-sample-(Set-3)-sample-data-2023-11-26.csv", # noqa: E501 + class_prob=lambda df: (df["Does this pixel contain active cropland?"] == "Crop"), + start_year=2022, + train_val_test=(0.2, 0.4, 0.4), + latitude_col="lat", + longitude_col="lon", + filter_df=clean_ceo_data, + ), ), ), HawaiiAgriculturalLandUse2020(), From 256d43c7217cd9e5e7d1f18f91327f613373a872 Mon Sep 17 00:00:00 2001 From: ivanzvonkov Date: Mon, 27 Nov 2023 10:45:55 -0600 Subject: [PATCH 2/7] Remove old Senegal dataset to retrigger pipeline --- data/datasets.dvc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/data/datasets.dvc b/data/datasets.dvc index bf297766..536d481c 100644 --- a/data/datasets.dvc +++ b/data/datasets.dvc @@ -1,6 +1,6 @@ outs: -- md5: 293db6eddf1a985f540e6bbc989baa81.dir - size: 669670393 - nfiles: 48 +- md5: ce2c97650cc503bd283c62b2b6db09f3.dir + size: 664348616 + nfiles: 47 path: datasets hash: md5 From 7be49753f7f6b1ed9671de5650576b4094d29c8b Mon Sep 17 00:00:00 2001 From: Dataset bot Date: Mon, 27 Nov 2023 17:08:34 +0000 Subject: [PATCH 3/7] Automated dataset updates --- data/datasets.dvc | 6 +++--- data/report.txt | 16 ++++++++-------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/data/datasets.dvc b/data/datasets.dvc index 536d481c..75884736 100644 --- a/data/datasets.dvc +++ b/data/datasets.dvc @@ -1,6 +1,6 @@ outs: -- md5: ce2c97650cc503bd283c62b2b6db09f3.dir - size: 664348616 - nfiles: 47 +- md5: 7625924cb6e656a62fa17224fa4218b4.dir + size: 664926956 + nfiles: 48 path: datasets hash: md5 diff --git a/data/report.txt b/data/report.txt index 550c38d2..7e63eae0 100644 --- a/data/report.txt +++ b/data/report.txt @@ -287,14 +287,14 @@ eo_data_skipped 82 -Senegal_CEO_2022 (Timesteps: 16) ----------------------------------------------------------------------------- -disagreement: 10.5% -eo_data_complete 1342 -eo_data_skipped 158 -✔ training amount: 276, positive class: 4.7% -✔ validation amount: 516, positive class: 6.6% -✔ testing amount: 550, positive class: 10.7% +Senegal_CEO_2022 (Timesteps: 19) +---------------------------------------------------------------------------- +disagreement: 0.1% +eo_data_exporting 1499 +eo_data_skipped 1 +✖ training: 310 labels, but 0 features +✖ validation: 579 labels, but 0 features +✖ testing: 610 labels, but 0 features From 659ae780f9d2cd295a95cc10dd5239f05662bbf9 Mon Sep 17 00:00:00 2001 From: Dataset bot Date: Mon, 27 Nov 2023 18:45:20 +0000 Subject: [PATCH 4/7] Automated dataset updates --- data/datasets.dvc | 4 ++-- data/report.txt | 9 +++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/data/datasets.dvc b/data/datasets.dvc index 75884736..2bde01ed 100644 --- a/data/datasets.dvc +++ b/data/datasets.dvc @@ -1,6 +1,6 @@ outs: -- md5: 7625924cb6e656a62fa17224fa4218b4.dir - size: 664926956 +- md5: 58c3eac0d3e6daa93afd3921cd4495a4.dir + size: 665681894 nfiles: 48 path: datasets hash: md5 diff --git a/data/report.txt b/data/report.txt index 7e63eae0..e909c53b 100644 --- a/data/report.txt +++ b/data/report.txt @@ -290,11 +290,12 @@ eo_data_skipped 82 Senegal_CEO_2022 (Timesteps: 19) ---------------------------------------------------------------------------- disagreement: 0.1% -eo_data_exporting 1499 +eo_data_exporting 1323 +eo_data_complete 176 eo_data_skipped 1 -✖ training: 310 labels, but 0 features -✖ validation: 579 labels, but 0 features -✖ testing: 610 labels, but 0 features +✖ training: 310 labels, but 34 features +✖ validation: 579 labels, but 62 features +✖ testing: 610 labels, but 80 features From afe46e71121a0db0caee330b7356c2bb847a9564 Mon Sep 17 00:00:00 2001 From: Dataset bot Date: Mon, 27 Nov 2023 18:54:27 +0000 Subject: [PATCH 5/7] Automated dataset updates --- data/datasets.dvc | 4 ++-- data/report.txt | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/data/datasets.dvc b/data/datasets.dvc index 2bde01ed..7e2cafbc 100644 --- a/data/datasets.dvc +++ b/data/datasets.dvc @@ -1,6 +1,6 @@ outs: -- md5: 58c3eac0d3e6daa93afd3921cd4495a4.dir - size: 665681894 +- md5: 3b5cf521d7d61b3351e62106640f5f9b.dir + size: 665793336 nfiles: 48 path: datasets hash: md5 diff --git a/data/report.txt b/data/report.txt index e909c53b..b745782c 100644 --- a/data/report.txt +++ b/data/report.txt @@ -290,12 +290,12 @@ eo_data_skipped 82 Senegal_CEO_2022 (Timesteps: 19) ---------------------------------------------------------------------------- disagreement: 0.1% -eo_data_exporting 1323 -eo_data_complete 176 +eo_data_exporting 1297 +eo_data_complete 202 eo_data_skipped 1 -✖ training: 310 labels, but 34 features -✖ validation: 579 labels, but 62 features -✖ testing: 610 labels, but 80 features +✖ training: 310 labels, but 42 features +✖ validation: 579 labels, but 69 features +✖ testing: 610 labels, but 91 features From 8c4fd5c1d9c51804d579bf4581d0e058bca63158 Mon Sep 17 00:00:00 2001 From: Dataset bot Date: Tue, 28 Nov 2023 01:48:19 +0000 Subject: [PATCH 6/7] Automated dataset updates --- data/datasets.dvc | 4 ++-- data/report.txt | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/data/datasets.dvc b/data/datasets.dvc index 7e2cafbc..da555ded 100644 --- a/data/datasets.dvc +++ b/data/datasets.dvc @@ -1,6 +1,6 @@ outs: -- md5: 3b5cf521d7d61b3351e62106640f5f9b.dir - size: 665793336 +- md5: b1a768daea6adcedf3f57bd357759231.dir + size: 670988390 nfiles: 48 path: datasets hash: md5 diff --git a/data/report.txt b/data/report.txt index b745782c..4b603db1 100644 --- a/data/report.txt +++ b/data/report.txt @@ -290,12 +290,12 @@ eo_data_skipped 82 Senegal_CEO_2022 (Timesteps: 19) ---------------------------------------------------------------------------- disagreement: 0.1% -eo_data_exporting 1297 -eo_data_complete 202 +eo_data_complete 1414 +eo_data_exporting 85 eo_data_skipped 1 -✖ training: 310 labels, but 42 features -✖ validation: 579 labels, but 69 features -✖ testing: 610 labels, but 91 features +✖ training: 310 labels, but 293 features +✖ validation: 579 labels, but 541 features +✖ testing: 610 labels, but 580 features From 241895133fcd9395bfee2494783270e77ef7eeca Mon Sep 17 00:00:00 2001 From: Dataset bot Date: Tue, 28 Nov 2023 16:12:21 +0000 Subject: [PATCH 7/7] Automated dataset updates --- data/datasets.dvc | 4 ++-- data/report.txt | 11 +++++------ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/data/datasets.dvc b/data/datasets.dvc index da555ded..95f44e6e 100644 --- a/data/datasets.dvc +++ b/data/datasets.dvc @@ -1,6 +1,6 @@ outs: -- md5: b1a768daea6adcedf3f57bd357759231.dir - size: 670988390 +- md5: 63c6f22459e60941aa459671fbff891b.dir + size: 671350235 nfiles: 48 path: datasets hash: md5 diff --git a/data/report.txt b/data/report.txt index 4b603db1..35af2f70 100644 --- a/data/report.txt +++ b/data/report.txt @@ -290,12 +290,11 @@ eo_data_skipped 82 Senegal_CEO_2022 (Timesteps: 19) ---------------------------------------------------------------------------- disagreement: 0.1% -eo_data_complete 1414 -eo_data_exporting 85 -eo_data_skipped 1 -✖ training: 310 labels, but 293 features -✖ validation: 579 labels, but 541 features -✖ testing: 610 labels, but 580 features +eo_data_complete 1499 +eo_data_skipped 1 +✔ training amount: 310, positive class: 11.0% +✔ validation amount: 579, positive class: 11.7% +✔ testing amount: 610, positive class: 14.8%