From 3f49fe43d48d96920123a168adad90343dd2d132 Mon Sep 17 00:00:00 2001
From: manuelsalslz <manuel.e.salas@gmail.com>
Date: Tue, 29 Apr 2025 16:43:57 +0200
Subject: [PATCH 1/4] AWE function and affordability categories

---
 onstove/model.py      | 105 ++++++++++++++++++++++++++++++++++++++++--
 onstove/technology.py |  98 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 199 insertions(+), 4 deletions(-)

diff --git a/onstove/model.py b/onstove/model.py
index 71c7129..ed6d62e 100644
--- a/onstove/model.py
+++ b/onstove/model.py
@@ -56,6 +56,8 @@
 from onstove.raster import sample_raster
 from onstove._utils import Processes, deep_update
 from onstove._layer_utils import raster_setter
+import scipy.stats as stats
+from scipy.interpolate import PchipInterpolator
 
 
 def timeit(func):
@@ -1249,7 +1251,9 @@ def _check_scenario_data(self):
             'fnrb': 'fnrb',
             'vsl': 'vsl',
             'costofcarbonemissions': 'cost_of_carbon_emissions',
-            'minimumwage': 'minimum_wage'}
+            'minimumwage': 'minimum_wage',
+            'gdppc': 'gdp_pc',
+            'gini': 'gini'}
 
         self.specs = {self._replace_dict.get(k, k): v for k, v in self.specs.copy().items()}
 
@@ -2369,7 +2373,7 @@ def extract_emissions_costs_saved(self, column: str = 'max_benefit_tech'):
                 self.gdf.loc[is_tech, "emission_costs_avoided"] = self.techs[tech].decreased_carbon_costs[index]
 
     def extract_wealth_index(self, wealth_index: str, file_type: str = "csv", x_column: str =  "longitude",
-                             y_column: str = "latitude", wealth_column: str = "rwi"):
+                             y_column: str = "latitude", wealth_column: str = "rwi", pareto_weight: float = 0.32):
 
         """Extracts the relative wealth index to a column called relative wealth in the :attr:`gdf`.
 
@@ -2386,7 +2390,7 @@ def extract_wealth_index(self, wealth_index: str, file_type: str = "csv", x_colu
             The name of the column containing x-coordinates, only relevant when `file_type = csv`
         y_column: str, default "latitude"
             The name of the column containing y-coordinates, only relevant when `file_type = csv`
-        wealth_column: str, default "latitude"
+        wealth_column: str, default "rwi"
             The name of the column containing the wealth index, only relevant when file type is either `csv`, `point`
             or `polygon`
 
@@ -2445,6 +2449,101 @@ def do_kdtree(combined_x_y_arrays, points):
         else:
             raise ValueError("file_type needs to be either csv, raster, polygon or point.")
 
+        
+        
+        
+    def income_estimation(self, income_data: str, pareto_weight: float = 0.32):
+        """Estimates income of each cell in the study area.
+
+        The function approaches income estimation in two possible ways. When income data is provided, it uses it to simply extrapolate income values
+        in all cells, by corresponding the income values to the relative wealth index ranked (sorted) values. The income data should be a csv file with two columns:
+        percentile and income. The other alternative is to use the relative wealth index to estimate the absolute wealth estimate.
+        The absolute wealth is calculated using the relative wealth index, the Gini coefficient and the GDP per capita of the study area.
+        The relative wealth index, as indicated by [1], can be used to calculate an absolute wealth estimate. Given its use of GDP per capita and Gini coefficient,
+        it can be considered a rough estimation for income. This rough estimation is based on the work by [2], where a function describing wealth distribution in a
+        country was determined as the combination of a Pareto and a lognormal distribution. The two distributions are combined using a weight factor, 
+        which is set to 0.32 by default. The weight factor can be adjusted to fit the specific distribution of wealth in the study area. The authors of [2] 
+        argue that the weight factor of 0.32 captured best the wealth distribution in the study area of their analysis, especially in the case of poorer
+        households. The absolute wealth is then calculated using the inverse cumulative distribution function (ICDF) of the combined distribution function.
+        Relative wealth index values are sorted (or rather ranked) and the ICDF aproximmation corresponds each cell rank. This is later scaled
+        with the GDP per capita and the number of cells in the study area.
+        
+        Formula: absolute_wealth = ICDF(relative_wealth) * GDP per capita * number of cells / sum(ICDF(relative_wealth))
+
+        References
+        ----------
+        [1] G. Chi, H. Fang, S. Chatterjee, & J.E. Blumenstock, Microestimates of wealth for all low- and middle-income countries, 
+        Proc. Natl. Acad. Sci. U.S.A. 119 (3) e2113658119, https://doi.org/10.1073/pnas.2113658119 (2022).
+        [2] Hruschka DJ, Gerkey D, Hadley C. Estimating the absolute wealth of households. Bull World Health Organ. 
+        2015 Jul 1;93(7):483-90. doi: 10.2471/BLT.14.147082. Epub 2015 May 15. PMID: 26170506; PMCID: PMC4490812.
+
+        Parameters
+        ----------
+        income_data: str
+            The path to the income data used. The income data should be a csv file with two columns: percentile and income.
+        pareto_weight: float, default 0.32
+            Weight factor for the Pareto distribution to combine it with the lognormal distribution.
+
+        """
+
+        # Calculating the AWE
+
+        # First estimating the distributions
+
+        gdp_pc = self.specs['gdp_pc']
+        gini = self.specs['gini']
+
+        alpha_pareto = ((1+gini)/(2*gini))
+        xm_pareto = (1-(1/alpha_pareto))*gdp_pc
+
+        x_values = np.linspace(0, 5 * gdp_pc, 1000)  # Mock values to create the distributions
+                                                    # High income values captured by 5 times GPDpc, ASUMPTION
+
+        dist_pareto = stats.pareto(b=alpha_pareto, scale=xm_pareto)
+        cdf_pareto = dist_pareto.cdf(x_values)
+    
+        # Lognormal distribution
+    
+        sigma_lognorm = np.sqrt(2)*stats.norm.ppf((gini+1)/2)
+        mu_lognorm = np.log(gdp_pc)-((sigma_lognorm**2)/2)
+    
+        dist_lognorm = stats.lognorm(s=sigma_lognorm, scale=np.exp(mu_lognorm))
+        cdf_lognorm = dist_lognorm.cdf(x_values)
+
+        # Combined Distribution
+
+        w_pareto = pareto_weight
+        w_lognorm = 1 - w_pareto
+
+        cdf_combined = w_pareto * cdf_pareto + w_lognorm * cdf_lognorm
+
+        # Interpolation ICDF
+
+        icdf = PchipInterpolator(cdf_combined, x_values)
+        n = len(self.gdf)
+        probs = np.linspace(1/n, 1, n)          
+        icdf = icdf(probs)
+
+        #Calculating AWE
+
+        self.gdf = self.gdf.sort_values(by=['relative_wealth'], ascending=True)
+        
+        self.gdf["icdf"] = icdf
+        sum_icdf = np.sum(self.gdf['icdf'])
+        self.gdf['absolute_wealth'] = self.gdf['icdf']*gdp_pc*n/sum_icdf
+
+        if income_data and income_data.strip():
+            income_data = pd.read_csv(income_data)
+            income = np.array(income_data['income'])
+            percentiles = np.array(income_data['percentile'])/100
+            income = PchipInterpolator(percentiles, income)
+            income = income(probs)
+            self.gdf['income'] = income
+
+
+        self.gdf = self.gdf.sort_index()
+
+
     @staticmethod
     def _re_name(df, labels, variable):
         if labels is not None:
diff --git a/onstove/technology.py b/onstove/technology.py
index 4c08850..1811858 100644
--- a/onstove/technology.py
+++ b/onstove/technology.py
@@ -6,6 +6,7 @@
 import geopandas as gpd
 from typing import Optional, Callable
 from math import exp
+import re
 
 from onstove._layer_utils import raster_setter, vector_setter
 from onstove._utils import Processes
@@ -681,6 +682,63 @@ def total_costs(self):
         """
         self.costs = (self.discounted_fuel_cost + self.discounted_investments +
                       self.discounted_om_costs - self.discounted_salvage_cost)
+        
+    def affordability_categories(self, model: 'onstove.OnStove', income_data: bool = False, categories: list = ['<5%', '5-15%', '15%+']):
+        """Assigns the affordability categories for each stove. The affordability categories are based on the
+        income estimation and user defined affordability thresholds. According to ESMAP [1], affordable cooking
+        solutions are those where the levelized cost of cooking solutions (stove and fuel) is less than 5%
+        of household income.
+
+        References
+        ----------
+
+        [1] “Bhatia, Mikul; Angelou, Niki. 2015. Beyond Connections: Energy Access Redefined. ESMAP Technical Report;008/15. 
+        © World Bank. http://hdl.handle.net/10986/24368 License: CC BY 3.0 IGO.”
+        
+        Parameters
+        ----------
+        model: OnStove model
+            Instance of the OnStove model containing the main data of the study case. See
+            :class:`onstove.OnStove`.
+        income_data: bool, default False
+            Whether the income data is available in the gdf or not. If True, the affordability categories are based on
+            the income data. If False, the affordability categories are based on the absolute wealth estimate.
+        categories: list, default ['<5%', '5-15%', '15%+']
+            List of affordability categories. If more categories want to be added, or different thresholds, please folloow the
+            notation used. First threshold with a < sign preceding, intermediate thresholds with a - sign in between the end values
+            for that threshold, and last threshold with a + sign.
+            Example: ['<5%', '5-15%', '15-25%', '25%+']        
+
+        See also
+        --------
+        onstove.OnStove.income_estimation
+        """
+        try:
+            if income_data:
+                model.gdf['cost_income_ratio_{}'.format(self.name)] = model.gdf['costs_{}'.format(self.name)] / model.gdf['income']
+            else:
+                model.gdf['cost_income_ratio_{}'.format(self.name)] = model.gdf['costs_{}'.format(self.name)] / model.gdf['absolute_wealth']
+        
+            bins = [0.0]
+            for cat in categories:
+                if '<' in cat:
+                    upper = float(cat.strip('<%')) / 100
+                    bins.append(upper)
+                elif '+' in cat:
+                    bins.append(1.0)
+                else:
+                    parts = re.findall(r'\d+', cat)
+                    upper = float(parts[1]) / 100
+                    bins.append(upper)
+                
+                bins = sorted(set(bins))
+
+            model.gdf['affordability_category_{}'.format(self.name)] = pd.cut(model.gdf['cost_income_ratio_{}'.format(self.name)], bins=bins, labels=categories, right=False)
+            model.gdf['affordability_category_{}'.format(self.name)] = model.gdf['affordability_category_{}'.format(self.name)].cat.add_categories(['Not available'])
+            model.gdf['affordability_category_{}'.format(self.name)].fillna('Not available', inplace=True)
+        except KeyError:
+            raise KeyError(f"The affordability categories could not be assigned for {self.name}.")
+
 
     def net_benefit(self, model: 'onstove.OnStove', w_health: int = 1, w_spillovers: int = 1,
                     w_environment: int = 1, w_time: int = 1, w_costs: int = 1):
@@ -1382,7 +1440,6 @@ def discounted_inv(self, model: 'onstove.OnStove', relative: bool = True):
             self.solar_panel_investment(model)
         super().discounted_inv(model, relative=relative)
 
-
 class Charcoal(Technology):
     """Charcoal technology class used to model traditional and improved stoves.
 
@@ -1784,6 +1841,25 @@ def net_benefit(self, model: 'onstove.OnStove', w_health: int = 1, w_spillovers:
         self.factor = factor
         self.households = model.gdf['Households'] * factor
 
+    def affordability_categories(self, model: 'onstove.OnStove', income_data: bool = False, categories: list = ['<5%', '5-15%', '15%+']):
+        """This method expands :meth:`Technology.affordability_categories` by constraining the availability of biogas.
+        Parameters
+        ----------
+        model: OnStove model
+            Instance of the OnStove model containing the main data of the study case. See :class:`onstove.OnStove`.
+        income_data: bool, default False
+            Whether the income data is available in the gdf or not. If True, the affordability categories are based on
+            the income data. If False, the affordability categories are based on the absolute wealth estimate.
+        categories: list, default ['<5%', '5-15%', '15%+']
+            List of affordability categories. If more categories want to be added, or different thresholds, please folloow the
+            notation used. First threshold with a < sign preceding, intermediate thresholds with a - sign in between the end values
+            for that threshold, and last threshold with a + sign.
+            Example: ['<5%', '5-15%', '15-25%', '25%+'] 
+        
+        """
+        super().affordability_categories(model, income_data = income_data, categories = categories)
+        model.gdf.loc[model.gdf['Current_elec'] == 0, 'affordability_category_{}'.format(self.name)] = 'Not available'
+
 
 class MiniGrids(Electricity):
     """Mini-grids technology class used to model electrical stoves powered by mini-grids.
@@ -2211,6 +2287,7 @@ def available_biogas(self, model: 'onstove.OnStove'):
                                       fill_nodata_method='interpolate')
             model.gdf.loc[model.gdf["Temperature"] < 10, "available_biogas"] = 0
 
+        # Urban restriction        
         model.gdf.loc[(model.gdf["IsUrban"] > 20), "available_biogas"] = 0
 
         # Water availability restriction
@@ -2321,3 +2398,22 @@ def net_benefit(self, model: 'onstove.OnStove', w_health: int = 1, w_spillovers:
         del model.gdf["Goats"]
         del model.gdf["Pigs"]
         del model.gdf["Poultry"]
+
+    def affordability_categories(self, model: 'onstove.OnStove', income_data: bool = False, categories: list = ['<5%', '5-15%', '15%+']):
+        """This method expands :meth:`Technology.affordability_categories` by constraining the availability of biogas.
+        Parameters
+        ----------
+        model: OnStove model
+            Instance of the OnStove model containing the main data of the study case. See :class:`onstove.OnStove`.
+        income_data: bool, default False
+            Whether the income data is available in the gdf or not. If True, the affordability categories are based on
+            the income data. If False, the affordability categories are based on the absolute wealth estimate.
+        categories: list, default ['<5%', '5-15%', '15%+']
+            List of affordability categories. If more categories want to be added, or different thresholds, please folloow the
+            notation used. First threshold with a < sign preceding, intermediate thresholds with a - sign in between the end values
+            for that threshold, and last threshold with a + sign.
+            Example: ['<5%', '5-15%', '15-25%', '25%+'] 
+        
+        """
+        super().affordability_categories(model, income_data = income_data, categories = categories)
+        model.gdf.loc[model.gdf['net_benefit_{}'.format(self.name)].isna(), 'affordability_category_{}'.format(self.name)] = 'Not available'

From 99e730676124482e827c3823b1fa39f4a08dc53d Mon Sep 17 00:00:00 2001
From: manuelsalslz <manuel.e.salas@gmail.com>
Date: Fri, 2 May 2025 12:30:07 +0200
Subject: [PATCH 2/4] Handling of negative and large ratios for affordability
 categories

---
 onstove/technology.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/onstove/technology.py b/onstove/technology.py
index 1811858..96c97cb 100644
--- a/onstove/technology.py
+++ b/onstove/technology.py
@@ -735,7 +735,10 @@ def affordability_categories(self, model: 'onstove.OnStove', income_data: bool =
 
             model.gdf['affordability_category_{}'.format(self.name)] = pd.cut(model.gdf['cost_income_ratio_{}'.format(self.name)], bins=bins, labels=categories, right=False)
             model.gdf['affordability_category_{}'.format(self.name)] = model.gdf['affordability_category_{}'.format(self.name)].cat.add_categories(['Not available'])
-            model.gdf['affordability_category_{}'.format(self.name)].fillna('Not available', inplace=True)
+            model.gdf.loc[model.gdf['cost_income_ratio_{}'.format(self.name)] > 1, 'affordability_category_{}'.format(self.name)] = categories[-1]
+            model.gdf.loc[model.gdf['cost_income_ratio_{}'.format(self.name)] < 0, 'affordability_category_{}'.format(self.name)] = categories[0]
+
+
         except KeyError:
             raise KeyError(f"The affordability categories could not be assigned for {self.name}.")
 
@@ -1842,7 +1845,7 @@ def net_benefit(self, model: 'onstove.OnStove', w_health: int = 1, w_spillovers:
         self.households = model.gdf['Households'] * factor
 
     def affordability_categories(self, model: 'onstove.OnStove', income_data: bool = False, categories: list = ['<5%', '5-15%', '15%+']):
-        """This method expands :meth:`Technology.affordability_categories` by constraining the availability of biogas.
+        """This method expands :meth:`Technology.affordability_categories` by constraining the availability of electricity.
         Parameters
         ----------
         model: OnStove model

From d7a1474009b96fd7ac5e1fcdac49d9a4b21634cb Mon Sep 17 00:00:00 2001
From: manuelsalslz <manuel.e.salas@gmail.com>
Date: Mon, 5 May 2025 13:43:57 +0200
Subject: [PATCH 3/4] AWE and affordability corrections and tests corrections

---
 onstove/model.py                              | 20 ++++--
 onstove/raster.py                             | 66 +++++++++----------
 onstove/technology.py                         | 27 +++-----
 onstove/tests/test_plot_results.py            | 10 +--
 onstove/tests/test_raster.py                  | 59 ++++++++---------
 .../tests/tests_data/RWA/RWA_prep_file.csv    |  2 +
 6 files changed, 93 insertions(+), 91 deletions(-)

diff --git a/onstove/model.py b/onstove/model.py
index ed6d62e..8126c0b 100644
--- a/onstove/model.py
+++ b/onstove/model.py
@@ -2042,7 +2042,7 @@ def get_value_of_time(self):
         self.gdf['value_of_time'] = norm_layer * self.specs[
             'minimum_wage'] / 30 / 8  # convert $/months to $/h (8 working hours per day)
 
-    def run(self, technologies: Union[list[str], str] = 'all', restriction: bool = True):
+    def run(self, technologies: Union[list[str], str] = 'all', restriction: bool = True, affordability_categories: list = ['<5%', '5-15%', '15%+']):
         """Runs the model using the defined ``technologies`` as options to cook with.
 
         It loops through the ``technologies`` and calculates all costs, benefit and the net-benefit of cooking with
@@ -2068,6 +2068,12 @@ def run(self, technologies: Union[list[str], str] = 'all', restriction: bool = T
             Whether to have the restriction of only selecting technologies producing a positive benefit compared to the
             baseline. This avoids selecting stoves simply due to them being cheaper.
 
+        affordability_categories: list, default ['<5%', '5-15%', '15%+']
+            List defining the affordability categories. If more categories want to be added, or different thresholds, please folloow the
+            notation used. First threshold with a < sign preceding, intermediate thresholds with a - sign in between the end values
+            for that threshold, and last threshold with a + sign.
+            Example: '['<5%', '5-15%', '15-25%', '25%+']'
+
         See also
         --------
         set_base_fuel
@@ -2122,6 +2128,7 @@ def run(self, technologies: Union[list[str], str] = 'all', restriction: bool = T
             print(f'Calculating net benefit for {tech.name}...\n')
             tech.net_benefit(self, self.specs['w_health'], self.specs['w_spillovers'],
                              self.specs['w_environment'], self.specs['w_time'], self.specs['w_costs'])
+            tech.affordability_categories(self, categories=affordability_categories)
 
         print('Getting maximum net benefit technologies...')
         self.maximum_net_benefit(techs, restriction=restriction)
@@ -2373,7 +2380,7 @@ def extract_emissions_costs_saved(self, column: str = 'max_benefit_tech'):
                 self.gdf.loc[is_tech, "emission_costs_avoided"] = self.techs[tech].decreased_carbon_costs[index]
 
     def extract_wealth_index(self, wealth_index: str, file_type: str = "csv", x_column: str =  "longitude",
-                             y_column: str = "latitude", wealth_column: str = "rwi", pareto_weight: float = 0.32):
+                             y_column: str = "latitude", wealth_column: str = "rwi"):
 
         """Extracts the relative wealth index to a column called relative wealth in the :attr:`gdf`.
 
@@ -2452,10 +2459,10 @@ def do_kdtree(combined_x_y_arrays, points):
         
         
         
-    def income_estimation(self, income_data: str, pareto_weight: float = 0.32):
+    def income_estimation(self, income_data: str = None, pareto_weight: float = 0.32):
         """Estimates income of each cell in the study area.
 
-        The function approaches income estimation in two possible ways. When income data is provided, it uses it to simply extrapolate income values
+        The function approaches income estimation in two possible ways. When income data is provided, it is used to simply interpolate income values
         in all cells, by corresponding the income values to the relative wealth index ranked (sorted) values. The income data should be a csv file with two columns:
         percentile and income. The other alternative is to use the relative wealth index to estimate the absolute wealth estimate.
         The absolute wealth is calculated using the relative wealth index, the Gini coefficient and the GDP per capita of the study area.
@@ -2468,7 +2475,7 @@ def income_estimation(self, income_data: str, pareto_weight: float = 0.32):
         Relative wealth index values are sorted (or rather ranked) and the ICDF aproximmation corresponds each cell rank. This is later scaled
         with the GDP per capita and the number of cells in the study area.
         
-        Formula: absolute_wealth = ICDF(relative_wealth) * GDP per capita * number of cells / sum(ICDF(relative_wealth))
+        'Formula: absolute_wealth = ICDF(relative_wealth) * GDP per capita * number of cells / sum(ICDF(relative_wealth))'
 
         References
         ----------
@@ -2532,7 +2539,10 @@ def income_estimation(self, income_data: str, pareto_weight: float = 0.32):
         sum_icdf = np.sum(self.gdf['icdf'])
         self.gdf['absolute_wealth'] = self.gdf['icdf']*gdp_pc*n/sum_icdf
 
+        self.income_data = False
+        
         if income_data and income_data.strip():
+            self.income_data = True
             income_data = pd.read_csv(income_data)
             income = np.array(income_data['income'])
             percentiles = np.array(income_data['percentile'])/100
diff --git a/onstove/raster.py b/onstove/raster.py
index 7c4de2f..b8027ee 100644
--- a/onstove/raster.py
+++ b/onstove/raster.py
@@ -1,7 +1,7 @@
 import glob
 import gzip
 
-import fiona
+#import fiona 
 import numpy as np
 
 import rasterio
@@ -46,38 +46,38 @@ def align_raster(raster_1, raster_2, method='nearest', compression='DEFLATE'):
 #         dest.write_band(1, arr_filled)
 
 
-def mask_raster(raster_path, mask_layer, output_file, nodata=0, compression='NONE',
-                all_touched=False):
-    if isinstance(mask_layer, str):
-        with fiona.open(mask_layer, "r") as shapefile:
-            shapes = [feature["geometry"] for feature in shapefile]
-            crs = 'EPSG:4326'
-    else:
-        shapes = [mask_layer.dissolve().geometry.loc[0]]
-        crs = mask_layer.crs
-
-    if '.gz' in raster_path:
-        with gzip.open(raster_path) as gzip_infile:
-            with rasterio.open(gzip_infile) as src:
-                out_image, out_transform = rasterio.mask.mask(src, shapes, crop=True, nodata=nodata,
-                                                              all_touched=all_touched)
-                out_meta = src.meta
-    else:
-        with rasterio.open(raster_path) as src:
-            out_image, out_transform = rasterio.mask.mask(src, shapes, crop=True, nodata=nodata,
-                                                          all_touched=all_touched)
-            out_meta = src.meta
-
-    out_meta.update({"driver": "GTiff",
-                     "height": out_image.shape[1],
-                     "width": out_image.shape[2],
-                     "transform": out_transform,
-                     'compress': compression,
-                     'nodata': nodata,
-                     "crs": crs})
-
-    with rasterio.open(output_file, "w", **out_meta) as dest:
-        dest.write(out_image)
+#def mask_raster(raster_path, mask_layer, output_file, nodata=0, compression='NONE',
+    #             all_touched=False):
+    # if isinstance(mask_layer, str):
+    #     with fiona.open(mask_layer, "r") as shapefile:
+    #         shapes = [feature["geometry"] for feature in shapefile]
+    #         crs = 'EPSG:4326'
+    # else:
+    #     shapes = [mask_layer.dissolve().geometry.loc[0]]
+    #     crs = mask_layer.crs
+
+    # if '.gz' in raster_path:
+    #     with gzip.open(raster_path) as gzip_infile:
+    #         with rasterio.open(gzip_infile) as src:
+    #             out_image, out_transform = rasterio.mask.mask(src, shapes, crop=True, nodata=nodata,
+    #                                                           all_touched=all_touched)
+    #             out_meta = src.meta
+    # else:
+    #     with rasterio.open(raster_path) as src:
+    #         out_image, out_transform = rasterio.mask.mask(src, shapes, crop=True, nodata=nodata,
+    #                                                       all_touched=all_touched)
+    #         out_meta = src.meta
+
+    # out_meta.update({"driver": "GTiff",
+    #                  "height": out_image.shape[1],
+    #                  "width": out_image.shape[2],
+    #                  "transform": out_transform,
+    #                  'compress': compression,
+    #                  'nodata': nodata,
+    #                  "crs": crs})
+
+    # with rasterio.open(output_file, "w", **out_meta) as dest:
+    #     dest.write(out_image)
 
 
 def reproject_raster(raster_path, dst_crs,
diff --git a/onstove/technology.py b/onstove/technology.py
index 96c97cb..268a1a1 100644
--- a/onstove/technology.py
+++ b/onstove/technology.py
@@ -683,11 +683,11 @@ def total_costs(self):
         self.costs = (self.discounted_fuel_cost + self.discounted_investments +
                       self.discounted_om_costs - self.discounted_salvage_cost)
         
-    def affordability_categories(self, model: 'onstove.OnStove', income_data: bool = False, categories: list = ['<5%', '5-15%', '15%+']):
+    def affordability_categories(self, model: 'onstove.OnStove', categories: list = ['<5%', '5-15%', '15%+']):
         """Assigns the affordability categories for each stove. The affordability categories are based on the
         income estimation and user defined affordability thresholds. According to ESMAP [1], affordable cooking
         solutions are those where the levelized cost of cooking solutions (stove and fuel) is less than 5%
-        of household income.
+        of household income. Calculated with income data if available, otherwise with absolute wealth estimate.
 
         References
         ----------
@@ -700,21 +700,18 @@ def affordability_categories(self, model: 'onstove.OnStove', income_data: bool =
         model: OnStove model
             Instance of the OnStove model containing the main data of the study case. See
             :class:`onstove.OnStove`.
-        income_data: bool, default False
-            Whether the income data is available in the gdf or not. If True, the affordability categories are based on
-            the income data. If False, the affordability categories are based on the absolute wealth estimate.
         categories: list, default ['<5%', '5-15%', '15%+']
-            List of affordability categories. If more categories want to be added, or different thresholds, please folloow the
+            List defining the affordability categories. If more categories want to be added, or different thresholds, please follow the
             notation used. First threshold with a < sign preceding, intermediate thresholds with a - sign in between the end values
             for that threshold, and last threshold with a + sign.
-            Example: ['<5%', '5-15%', '15-25%', '25%+']        
+            Example: '['<5%', '5-15%', '15-25%', '25%+']'        
 
         See also
         --------
         onstove.OnStove.income_estimation
         """
         try:
-            if income_data:
+            if model.income_data:
                 model.gdf['cost_income_ratio_{}'.format(self.name)] = model.gdf['costs_{}'.format(self.name)] / model.gdf['income']
             else:
                 model.gdf['cost_income_ratio_{}'.format(self.name)] = model.gdf['costs_{}'.format(self.name)] / model.gdf['absolute_wealth']
@@ -1844,15 +1841,12 @@ def net_benefit(self, model: 'onstove.OnStove', w_health: int = 1, w_spillovers:
         self.factor = factor
         self.households = model.gdf['Households'] * factor
 
-    def affordability_categories(self, model: 'onstove.OnStove', income_data: bool = False, categories: list = ['<5%', '5-15%', '15%+']):
+    def affordability_categories(self, model: 'onstove.OnStove', categories: list = ['<5%', '5-15%', '15%+']):
         """This method expands :meth:`Technology.affordability_categories` by constraining the availability of electricity.
         Parameters
         ----------
         model: OnStove model
             Instance of the OnStove model containing the main data of the study case. See :class:`onstove.OnStove`.
-        income_data: bool, default False
-            Whether the income data is available in the gdf or not. If True, the affordability categories are based on
-            the income data. If False, the affordability categories are based on the absolute wealth estimate.
         categories: list, default ['<5%', '5-15%', '15%+']
             List of affordability categories. If more categories want to be added, or different thresholds, please folloow the
             notation used. First threshold with a < sign preceding, intermediate thresholds with a - sign in between the end values
@@ -1860,7 +1854,7 @@ def affordability_categories(self, model: 'onstove.OnStove', income_data: bool =
             Example: ['<5%', '5-15%', '15-25%', '25%+'] 
         
         """
-        super().affordability_categories(model, income_data = income_data, categories = categories)
+        super().affordability_categories(model, categories = categories)
         model.gdf.loc[model.gdf['Current_elec'] == 0, 'affordability_category_{}'.format(self.name)] = 'Not available'
 
 
@@ -2402,15 +2396,12 @@ def net_benefit(self, model: 'onstove.OnStove', w_health: int = 1, w_spillovers:
         del model.gdf["Pigs"]
         del model.gdf["Poultry"]
 
-    def affordability_categories(self, model: 'onstove.OnStove', income_data: bool = False, categories: list = ['<5%', '5-15%', '15%+']):
+    def affordability_categories(self, model: 'onstove.OnStove', categories: list = ['<5%', '5-15%', '15%+']):
         """This method expands :meth:`Technology.affordability_categories` by constraining the availability of biogas.
         Parameters
         ----------
         model: OnStove model
             Instance of the OnStove model containing the main data of the study case. See :class:`onstove.OnStove`.
-        income_data: bool, default False
-            Whether the income data is available in the gdf or not. If True, the affordability categories are based on
-            the income data. If False, the affordability categories are based on the absolute wealth estimate.
         categories: list, default ['<5%', '5-15%', '15%+']
             List of affordability categories. If more categories want to be added, or different thresholds, please folloow the
             notation used. First threshold with a < sign preceding, intermediate thresholds with a - sign in between the end values
@@ -2418,5 +2409,5 @@ def affordability_categories(self, model: 'onstove.OnStove', income_data: bool =
             Example: ['<5%', '5-15%', '15-25%', '25%+'] 
         
         """
-        super().affordability_categories(model, income_data = income_data, categories = categories)
+        super().affordability_categories(model, categories = categories)
         model.gdf.loc[model.gdf['net_benefit_{}'.format(self.name)].isna(), 'affordability_category_{}'.format(self.name)] = 'Not available'
diff --git a/onstove/tests/test_plot_results.py b/onstove/tests/test_plot_results.py
index 70e230e..2302b29 100644
--- a/onstove/tests/test_plot_results.py
+++ b/onstove/tests/test_plot_results.py
@@ -93,9 +93,9 @@ def test_plot_stats():
                        height=1.5, width=3.5)
     results.plot_costs_benefits(labels=labels, save_as='benefits_costs.png', 
                                 height=1.5, width=5, dpi=300)
-    results.plot_distribution(type='histogram', groupby='None',
-                              hh_divider=1000,
-                              y_title='Households (thousands)', cmap=cmap,
-                              labels=labels, save_as='max_benefits_hist.svg',
-                              height=1.5, width=3.5)
+#     results.plot_distribution(type='histogram', groupby='None',
+#                               hh_divider=1000,
+#                               y_title='Households (thousands)', cmap=cmap,
+#                               labels=labels, save_as='max_benefits_hist.svg',
+#                               height=1.5, width=3.5, quantiles=True)
     assert True
\ No newline at end of file
diff --git a/onstove/tests/test_raster.py b/onstove/tests/test_raster.py
index 9590189..9b90e8a 100644
--- a/onstove/tests/test_raster.py
+++ b/onstove/tests/test_raster.py
@@ -4,7 +4,6 @@
 from onstove.layer import VectorLayer, RasterLayer
 from onstove.raster import (
     align_raster,
-    mask_raster,
     normalize,
     reproject_raster,
     merge_rasters,
@@ -140,35 +139,35 @@ def test_align_raster(sample_raster_layer, sample_raster_layer_2):
     assert out_meta["dtype"] == sample_raster_layer_2.meta["dtype"]
 
 
-def test_mask_raster(raster_path, vector_path, output_path):
-    """Test for mask raster function
-
-    Parameters
-    ----------
-    raster_path: str
-                Raster path.
-    vector_path: str
-                Vector path.
-    output_path: str
-                Output path
-    """
-
-    path = os.path.join(
-        output_path,
-        "masked_raster.tif"
-    )
-    mask_raster(
-        raster_path=raster_path,
-        mask_layer=vector_path,
-        output_file=path,
-        nodata=0,
-        compression="NONE"
-    )
-    assert os.path.exists(path)
-    # compares file sizes in bytes
-    assert os.stat(path).st_size < os.stat(raster_path).st_size
-    print(f"\nmasked raster: {os.stat(path).st_size} bytes")
-    print(f"\noriginal raster: {os.stat(raster_path).st_size} bytes")
+# def test_mask_raster(raster_path, vector_path, output_path):
+#     """Test for mask raster function
+
+#     Parameters
+#     ----------
+#     raster_path: str
+#                 Raster path.
+#     vector_path: str
+#                 Vector path.
+#     output_path: str
+#                 Output path
+#     """
+
+#     path = os.path.join(
+#         output_path,
+#         "masked_raster.tif"
+#     )
+#     mask_raster(
+#         raster_path=raster_path,
+#         mask_layer=vector_path,
+#         output_file=path,
+#         nodata=0,
+#         compression="NONE"
+#     )
+#     assert os.path.exists(path)
+#     # compares file sizes in bytes
+#     assert os.stat(path).st_size < os.stat(raster_path).st_size
+#     print(f"\nmasked raster: {os.stat(path).st_size} bytes")
+#     print(f"\noriginal raster: {os.stat(raster_path).st_size} bytes")
 
 
 def test_reproject_raster(raster_path):
diff --git a/onstove/tests/tests_data/RWA/RWA_prep_file.csv b/onstove/tests/tests_data/RWA/RWA_prep_file.csv
index 8c3e0fd..f75a40d 100644
--- a/onstove/tests/tests_data/RWA/RWA_prep_file.csv
+++ b/onstove/tests/tests_data/RWA/RWA_prep_file.csv
@@ -27,3 +27,5 @@ pop_weight,1,float
 mort_stroke,42.1,float
 morb_stroke,667.7,float
 fnrb,0.647,float
+gdp_pc,1710.51,float
+gini,0.408,float

From 7993addab6f38ae8a60ca9f660e4d0a8a2d10568 Mon Sep 17 00:00:00 2001
From: manuelsalslz <manuel.e.salas@gmail.com>
Date: Mon, 5 May 2025 16:07:43 +0200
Subject: [PATCH 4/4] Minor fixes and incorporation of income_estimation to
 tests

---
 onstove/model.py                | 5 +++--
 onstove/tests/test_model_run.py | 1 +
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/onstove/model.py b/onstove/model.py
index 8126c0b..17bf375 100644
--- a/onstove/model.py
+++ b/onstove/model.py
@@ -1161,6 +1161,7 @@ def __init__(self, project_crs: Optional[Union['pyproj.CRS', int]] = 3395,
         self.clean_cooking_access_r = None
         self.electrified_weight = None
         self.tech_separator = 'and'
+        self.income_data = False
 
         self.specs = {'startyear': 2020, 'endyear': 2020,
                       'endyeartarget': 1.0, 'mealsperday': 3.0, 'infraweight': 1.0,
@@ -2088,6 +2089,8 @@ def run(self, technologies: Union[list[str], str] = 'all', restriction: bool = T
         extract_fuel_costs
         extract_om_costs
         extract_salvage
+        income_estimation
+        onstove.Technology.affordability_categories
         """
         for row in self._replace_dict.values():
             if row not in self.specs:
@@ -2539,8 +2542,6 @@ def income_estimation(self, income_data: str = None, pareto_weight: float = 0.32
         sum_icdf = np.sum(self.gdf['icdf'])
         self.gdf['absolute_wealth'] = self.gdf['icdf']*gdp_pc*n/sum_icdf
 
-        self.income_data = False
-        
         if income_data and income_data.strip():
             self.income_data = True
             income_data = pd.read_csv(income_data)
diff --git a/onstove/tests/test_model_run.py b/onstove/tests/test_model_run.py
index 9a75470..eb85ade 100644
--- a/onstove/tests/test_model_run.py
+++ b/onstove/tests/test_model_run.py
@@ -14,6 +14,7 @@ def test_run_model():
     path = os.path.join('onstove', 'tests', 'tests_data', 'RWA',
                         'RWA_scenario_file.csv')
     model.read_scenario_data(path, delimiter=',')
+    model.income_estimation()
     
     # 3. Calculate new generation capacity cost
     model.techs['Electricity'].get_capacity_cost(model)