Skip to content

Commit

Permalink
Merge pull request #244 from nasaharvest/fix-sudan-data
Browse files Browse the repository at this point in the history
Fix sudan data
  • Loading branch information
ivanzvonkov authored Nov 28, 2022
2 parents e1885fb + d8df8aa commit 7f5f809
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 22 deletions.
4 changes: 2 additions & 2 deletions data/datasets.dvc
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
outs:
- md5: 3624f5ceff22d1d6b00977d4b270dd67.dir
size: 490308540
- md5: a196ad419b883f66c347c2861a840595.dir
size: 485726438
nfiles: 28
path: datasets
6 changes: 3 additions & 3 deletions data/models.dvc
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
outs:
- md5: b59480c1c3a66b1ba52ff65c51f471a1.dir
size: 32192495
nfiles: 18
- md5: 6486062b20b5fa548fab44385aef7401.dir
size: 33730375
nfiles: 20
path: models
22 changes: 11 additions & 11 deletions data/models.json
Original file line number Diff line number Diff line change
Expand Up @@ -68,20 +68,20 @@
}
},
"Sudan_Blue_Nile_2019": {
"params": "https://wandb.ai/nasa-harvest/crop-mask/runs/1yjwv3dl",
"params": "https://wandb.ai/nasa-harvest/crop-mask/runs/z0c5j1yg",
"test_metrics": {
"accuracy": 0.9408,
"f1_score": 0.9227,
"precision_score": 0.9034,
"recall_score": 0.943,
"roc_auc_score": 0.9723
"accuracy": 0.9468,
"f1_score": 0.9213,
"precision_score": 0.8962,
"recall_score": 0.948,
"roc_auc_score": 0.9819
},
"val_metrics": {
"accuracy": 0.9105,
"f1_score": 0.8865,
"precision_score": 0.8529,
"recall_score": 0.9227,
"roc_auc_score": 0.9607
"accuracy": 0.9427,
"f1_score": 0.9201,
"precision_score": 0.8836,
"recall_score": 0.9598,
"roc_auc_score": 0.9762
}
},
"Tanzania-2019": {
Expand Down
11 changes: 6 additions & 5 deletions data/report.txt
Original file line number Diff line number Diff line change
Expand Up @@ -276,9 +276,10 @@ eo_data_complete 184

Sudan_Blue_Nile_CEO_2019 (Timesteps: 24)
----------------------------------------------------------------------------
disagreement: 0.0%
eo_data_complete 1500
✔ training amount: 311, positive class: 31.5%
✔ validation amount: 581, positive class: 37.9%
✔ testing amount: 608, positive class: 37.5%
disagreement: 13.2%
eo_data_complete 1302
eo_data_skipped 198
✔ training amount: 270, positive class: 29.3%
✔ validation amount: 506, positive class: 34.4%
✔ testing amount: 526, positive class: 32.9%

2 changes: 1 addition & 1 deletion datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -741,7 +741,7 @@ def clean_ceo_data(df: pd.DataFrame) -> pd.DataFrame:
),
RawLabels(
filename=(
"ceo-Sudan-(Blue-Nile)-Feb-2019---Feb-2020-(Set-1)-sample-data-2022-10-31.csv"
"ceo-Sudan-(Blue-Nile)-Feb-2019---Feb-2020-(Set-2)-sample-data-2022-10-31.csv"
),
class_prob=lambda df: (df["Does this pixel contain active cropland?"] == "Crop"),
start_year=2019,
Expand Down
3 changes: 3 additions & 0 deletions src/labeled_dataset_custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ def load_labels(self):
already_processed = df[SOURCE].unique()

new_labels: List[pd.DataFrame] = []
raw_year_files = [(p.filename, p.start_year) for p in self.raw_labels]
if len(raw_year_files) != len(set(raw_year_files)):
raise ValueError(f"Duplicate raw files found in: {raw_year_files}")
for p in self.raw_labels:
if p.filename not in str(already_processed):
new_labels.append(p.process(self.raw_dir))
Expand Down

0 comments on commit 7f5f809

Please sign in to comment.