From 9a0effbfb1c1752ef81655a6abda9f564c51d0fe Mon Sep 17 00:00:00 2001
From: FernandoVN98 <38290736+FernandoVN98@users.noreply.github.com>
Date: Fri, 10 Nov 2023 12:40:23 +0100
Subject: [PATCH] Random Forest Nested, tests and docs (#449)

* Added nested version of Random Forest, tests and corrected documentation
---
 Dockerfile                                 |    2 +-
 Jenkinsfile                                |    2 +-
 dislib/decomposition/tsqr/base.py          |    2 +-
 dislib/trees/decision_tree.py              |  151 +-
 dislib/trees/distributed/decision_tree.py  |    9 +
 dislib/trees/forest.py                     |  307 ++--
 dislib/trees/nested/__init__.py            |   13 +
 dislib/trees/nested/decision_tree.py       | 1505 ++++++++++++++++++++
 dislib/trees/nested/forest.py              |  755 ++++++++++
 dislib/trees/nested/tasks.py               |   78 +
 dislib/trees/nested/terasort.py            |   95 ++
 docs/source/dislib.trees.distributed.rst   |   22 +
 docs/source/dislib.trees.mmap.rst          |   22 +
 docs/source/dislib.trees.nested.rst        |   22 +
 run_ci_checks.sh                           |    4 +
 run_coverage.sh                            |    3 +
 run_test_nesting.sh                        |  146 ++
 tests/test_array.py                        |    3 +-
 tests/test_tsqr.py                         |   15 +-
 tests_nesting/__init__.py                  |   14 +
 tests_nesting/__main__.py                  |    9 +
 tests_nesting/test_decision_tree_nested.py |  908 ++++++++++++
 tests_nesting/test_rf_classifier_nested.py |  757 ++++++++++
 tests_nesting/test_rf_regressor_nested.py  |  434 ++++++
 24 files changed, 5155 insertions(+), 123 deletions(-)
 create mode 100644 dislib/trees/nested/__init__.py
 create mode 100644 dislib/trees/nested/decision_tree.py
 create mode 100644 dislib/trees/nested/forest.py
 create mode 100644 dislib/trees/nested/tasks.py
 create mode 100644 dislib/trees/nested/terasort.py
 create mode 100644 docs/source/dislib.trees.distributed.rst
 create mode 100644 docs/source/dislib.trees.mmap.rst
 create mode 100644 docs/source/dislib.trees.nested.rst
 create mode 100755 run_test_nesting.sh
 create mode 100644 tests_nesting/__init__.py
 create mode 100644 tests_nesting/__main__.py
 create mode 100644 tests_nesting/test_decision_tree_nested.py
 create mode 100644 tests_nesting/test_rf_classifier_nested.py
 create mode 100644 tests_nesting/test_rf_regressor_nested.py

diff --git a/Dockerfile b/Dockerfile
index 127efbc0..a5200370 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,4 +1,4 @@
-FROM compss/compss-tutorial:3.1
+FROM compss/compss-tutorial:3.3
 MAINTAINER COMPSs Support <support-compss@bsc.es>
 
 COPY . dislib/
diff --git a/Jenkinsfile b/Jenkinsfile
index f1cd7161..c2592fa0 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -11,7 +11,7 @@ def setGithubCommitStatus(state, description) {
 
 pipeline {
     options {
-        timeout(time: 5, unit: 'HOURS')
+        timeout(time: 6, unit: 'HOURS')
     }
     agent {
         node {
diff --git a/dislib/decomposition/tsqr/base.py b/dislib/decomposition/tsqr/base.py
index bd7c5e67..ed48f02a 100644
--- a/dislib/decomposition/tsqr/base.py
+++ b/dislib/decomposition/tsqr/base.py
@@ -231,7 +231,7 @@ def tsqr(a: Array, mode="complete", indexes=None):
                         shape_to_use = a._reg_shape[0]
                     q_blocks = [[object() for _ in range(number_blocks)]
                                 for _ in range(auxiliar_rs)]
-                    q_blocks_2 = [[]]
+                    q_blocks_2 = [[object()]]
                     r_blocks = [[object() for _ in range(len(block))]
                                 for _ in range(number_blocks)]
                     if (irregular_shape_to_use != 0
diff --git a/dislib/trees/decision_tree.py b/dislib/trees/decision_tree.py
index 84a35e0c..dd26ec40 100644
--- a/dislib/trees/decision_tree.py
+++ b/dislib/trees/decision_tree.py
@@ -6,10 +6,15 @@
                                       DecisionTreeClassifierDistributed)
 from dislib.trees.distributed import (DecisionTreeRegressor as
                                       DecisionTreeRegressorDistributed)
+from dislib.trees.nested import (DecisionTreeClassifier as
+                                 DecisionTreeClassifierNested)
+from dislib.trees.nested import (DecisionTreeRegressor as
+                                 DecisionTreeRegressorNested)
 from sklearn.tree import DecisionTreeClassifier as SklearnDTClassifier
 from sklearn.tree import DecisionTreeRegressor as SklearnDTRegressor
 from dislib.trees.distributed.decision_tree import (_RegressionNode,
                                                     _ClassificationNode)
+from pycompss.api.api import compss_wait_on
 
 
 class BaseDecisionTree:
@@ -36,6 +41,7 @@ def __init__(
         split_computation="raw",
         sync_after_fit=True,
         mmap=True,
+        nested=False,
     ):
         self.try_features = try_features
         self.max_depth = max_depth
@@ -60,13 +66,17 @@ def __init__(
         self.sync_after_fit = sync_after_fit
 
         self.mmap = mmap
+        self.nested = nested
 
     def fit(self, dataset):
         """Fits the DecisionTree.
 
         Parameters
         ----------
-        dataset : dislib.classification.rf._data.RfDataset
+        dataset : dislib.trees.mmap.RfDataset / ds-array
+        It has to be dislib.trees.mmap.RfDataset if the mmap decision tree
+        is used. When using distributed or nested decision tree the input
+        to this function should be of type ds-array.
         """
         if self.mmap:
             if SklearnDTRegressor == self.base_tree:
@@ -82,26 +92,46 @@ def fit(self, dataset):
                     self.bootstrap, self.random_state
                 )
         else:
-            if SklearnDTRegressor == self.base_tree:
-                self.tree = DecisionTreeRegressorDistributed(
-                    self.try_features, self.max_depth,
-                    self.distr_depth, self.sklearn_max,
-                    self.bootstrap, self.random_state,
-                    self.range_max, self.range_min,
-                    self.n_split_points, self.split_computation,
-                    self.sync_after_fit
-                )
+            if self.nested:
+                if SklearnDTRegressor == self.base_tree:
+                    self.tree = DecisionTreeRegressorNested(
+                        self.try_features, self.max_depth,
+                        self.distr_depth, self.sklearn_max,
+                        self.bootstrap, self.random_state,
+                        self.range_max, self.range_min,
+                        self.n_split_points, self.split_computation,
+                        self.sync_after_fit)
+                else:
+                    self.tree = DecisionTreeClassifierNested(
+                        self.n_classes,
+                        self.try_features, self.max_depth,
+                        self.distr_depth, self.sklearn_max,
+                        self.bootstrap, self.random_state,
+                        self.range_max, self.range_min,
+                        self.n_split_points, self.split_computation,
+                        self.sync_after_fit)
             else:
-                self.tree = DecisionTreeClassifierDistributed(
-                    self.try_features, self.max_depth,
-                    self.distr_depth, self.sklearn_max,
-                    self.bootstrap, self.random_state,
-                    self.n_classes, self.range_max, self.range_min,
-                    self.n_split_points, self.split_computation,
-                    self.sync_after_fit
-                )
-
-    def predict(self, x_row):
+                if SklearnDTRegressor == self.base_tree:
+                    self.tree = DecisionTreeRegressorDistributed(
+                        self.try_features, self.max_depth,
+                        self.distr_depth, self.sklearn_max,
+                        self.bootstrap, self.random_state,
+                        self.range_max, self.range_min,
+                        self.n_split_points, self.split_computation,
+                        self.sync_after_fit
+                    )
+                else:
+                    self.tree = DecisionTreeClassifierDistributed(
+                        self.try_features, self.max_depth,
+                        self.distr_depth, self.sklearn_max,
+                        self.bootstrap, self.random_state,
+                        self.n_classes, self.range_max, self.range_min,
+                        self.n_split_points, self.split_computation,
+                        self.sync_after_fit
+                    )
+        self.tree.fit(dataset)
+
+    def predict(self, x_row, collect=False):
         """Predicts target values or classes for the given samples using
         a fitted tree.
 
@@ -110,6 +140,12 @@ def predict(self, x_row):
         x_row : ds-array
             A row block of samples.
 
+        collect : boolean
+            Only affects nested and distributed versions of the algorithm.
+            When True, the results are synchronized before the returning,
+            when False, no synchronization is done, but the user should do it
+            manually when he/she wants the results.
+
         Returns
         -------
         predicted : ndarray
@@ -123,7 +159,13 @@ def predict(self, x_row):
         if self.mmap:
             return self.tree.predict(x_row)
         else:
-            return self.tree.predict(x_row, collect=False)
+            if self.nested:
+                prediction = self.tree.predict(x_row)
+                if collect:
+                    prediction = compss_wait_on(prediction)
+                return prediction
+            else:
+                return self.tree.predict(x_row, collect=collect)
 
 
 class DecisionTreeClassifier(BaseDecisionTree):
@@ -148,6 +190,30 @@ class DecisionTreeClassifier(BaseDecisionTree):
         forests).
     random_state : RandomState instance
         The random number generator.
+    n_classes : int
+        Number of classes that appear on the dataset. Only needed on
+        distributed random forest.
+    range_min : ds-array or np.array
+        Contains the minimum values of the different attributes of the dataset
+        Only used on distributed random forest (it is an optional parameter)
+    range_max : ds-array or np.array
+        Contains the maximum values of the different attributes of the dataset
+        Only used on distributed random forest (it is an optional parameter)
+    n_split_points : String or int
+        Number of split points to evaluate.
+        "auto", "sqrt" or integer value.
+        Used on distributed random forest (non memory map version)
+    split_computation : String
+        "raw", "gaussian_approximation" or "uniform_approximation"
+        distribution of the values followed by the split points selected.
+        Used on distributed random forest (non memory map version)
+    sync_after_fit : bool
+        Synchronize or not after the training.
+        Used on distributed random forest (non memory map version)
+    mmap : bool
+        Use the memory map version or not
+    nested : bool
+        Use the nested version or not
 
     Attributes
     ----------
@@ -175,7 +241,7 @@ class DecisionTreeClassifier(BaseDecisionTree):
     predict(x_row)
         Predicts classes for the given samples using a fitted tree.
     predict_proba(x_row)
-        Predicts class probabilities for the given smaples using a fitted tree.
+        Predicts class probabilities for the given samples using a fitted tree.
 
     """
 
@@ -194,6 +260,7 @@ def __init__(
         split_computation="raw",
         sync_after_fit=True,
         mmap=True,
+        nested=False,
     ):
         super().__init__(
             try_features,
@@ -211,9 +278,10 @@ def __init__(
             split_computation=split_computation,
             sync_after_fit=sync_after_fit,
             mmap=mmap,
+            nested=nested,
         )
 
-    def predict_proba(self, x_row):
+    def predict_proba(self, x_row, collect=False):
         """Predicts class probabilities for a row block using a fitted tree.
 
         Parameters
@@ -221,6 +289,12 @@ def predict_proba(self, x_row):
         x_row : ds-array
             A row block of samples.
 
+        collect : boolean
+            Only affects nested and distributed versions of the algorithm.
+            When True, the results are synchronized before the returning,
+            when False, no synchronization is done, but the user should do it
+            manually when he/she wants the results.
+
         Returns
         -------
         predicted_proba : ndarray
@@ -235,7 +309,13 @@ def predict_proba(self, x_row):
         if self.mmap:
             return self.tree.predict_proba(x_row)
         else:
-            return self.tree.predict_proba(x_row, collect=False)
+            if self.nested:
+                prediction = self.tree.predict_proba(x_row)
+                if collect:
+                    prediction = compss_wait_on(prediction)
+                return prediction
+            else:
+                return self.tree.predict_proba(x_row, collect=collect)
 
 
 class DecisionTreeRegressor(BaseDecisionTree):
@@ -260,6 +340,27 @@ class DecisionTreeRegressor(BaseDecisionTree):
         forests).
     random_state : RandomState instance
         The random number generator.
+    range_min : ds-array or np.array
+        Contains the minimum values of the different attributes of the dataset
+        Only used on distributed random forest (it is an optional parameter)
+    range_max : ds-array or np.array
+        Contains the maximum values of the different attributes of the dataset
+        Only used on distributed random forest (it is an optional parameter)
+    n_split_points : String or int
+        Number of split points to evaluate.
+        "auto", "sqrt" or integer value.
+        Used on distributed random forest (non memory map version)
+    split_computation : String
+        "raw", "gaussian_approximation" or "uniform_approximation"
+        distribution of the values followed by the split points selected.
+        Used on distributed random forest (non memory map version)
+    sync_after_fit : bool
+        Synchronize or not after the training.
+        Used on distributed random forest (non memory map version)
+    mmap : bool
+        Use the memory map version or not
+    nested : bool
+        Use the nested version or not
 
     Attributes
     ----------
@@ -299,6 +400,7 @@ def __init__(
         split_computation="raw",
         sync_after_fit=True,
         mmap=True,
+        nested=False,
     ):
         super().__init__(
             try_features,
@@ -315,4 +417,5 @@ def __init__(
             split_computation=split_computation,
             sync_after_fit=sync_after_fit,
             mmap=mmap,
+            nested=nested,
         )
diff --git a/dislib/trees/distributed/decision_tree.py b/dislib/trees/distributed/decision_tree.py
index 28478496..911da813 100644
--- a/dislib/trees/distributed/decision_tree.py
+++ b/dislib/trees/distributed/decision_tree.py
@@ -380,6 +380,15 @@ def __init__(
         )
 
     def fit(self, x, y):
+        """Fits the DecisionTreeRegressor.
+
+        Parameters
+        ----------
+        x : ds-array
+            Samples of the dataset.
+        y: ds-array
+            Labels of the dataset.
+        """
         if self.range_max is None:
             self.range_max = x.max()
         if self.range_min is None:
diff --git a/dislib/trees/forest.py b/dislib/trees/forest.py
index 80fbe50e..00d62981 100644
--- a/dislib/trees/forest.py
+++ b/dislib/trees/forest.py
@@ -5,19 +5,27 @@
                                DecisionTreeClassifierMMap,
                                DecisionTreeRegressor as
                                DecisionTreeRegressorMMap)
+from dislib.trees.mmap import (RandomForestClassifier as
+                               RandomForestClassifierMMap,
+                               RfClassifierDataset, RfRegressorDataset,
+                               RandomForestRegressor as
+                               RandomForestRegressorMMap)
 from dislib.trees.distributed import (DecisionTreeClassifier as
                                       DecisionTreeClassifierDistributed,
                                       DecisionTreeRegressor as
                                       DecisionTreeRegressorDistributed)
-from dislib.trees.mmap import (RandomForestClassifier as
-                               RandomForestClassifierMMap,
-                               RfClassifierDataset, RfRegressorDataset)
-from dislib.trees.mmap import (RandomForestRegressor as
-                               RandomForestRegressorMMap)
 from dislib.trees.distributed import (RandomForestClassifier as
                                       RandomForestClassifierDistributed,
                                       RandomForestRegressor as
                                       RandomForestRegressorDistributed)
+from dislib.trees.nested import (DecisionTreeClassifier as
+                                 DecisionTreeClassifierNested,
+                                 DecisionTreeRegressor as
+                                 DecisionTreeRegressorNested)
+from dislib.trees.nested import (RandomForestClassifier as
+                                 RandomForestClassifierNested,
+                                 RandomForestRegressor as
+                                 RandomForestRegressorNested)
 
 
 class BaseRandomForest(BaseEstimator):
@@ -46,6 +54,7 @@ def __init__(
         split_computation="raw",
         sync_after_fit=True,
         mmap=True,
+        nested=False,
     ):
         self.n_estimators = n_estimators
         self.try_features = try_features
@@ -64,6 +73,7 @@ def __init__(
         self.split_computation = split_computation
         self.sync_after_fit = sync_after_fit
         self.mmap = mmap
+        self.nested = nested
         self.rf = None
 
     def fit(self, x, y):
@@ -93,27 +103,51 @@ def fit(self, x, y):
                     self.distr_depth, self.sklearn_max,
                     self.hard_vote, self.random_state)
         else:
-            if DecisionTreeRegressorDistributed == self.base_tree:
-                self.rf = RandomForestRegressorDistributed(
-                    self.n_estimators, self.try_features, self.max_depth,
-                    self.distr_depth, self.sklearn_max,
-                    self.random_state,
-                    range_max=self.range_max, range_min=self.range_min,
-                    bootstrap=self.bootstrap,
-                    n_split_points=self.n_split_points,
-                    split_computation=self.split_computation,
-                    sync_after_fit=self.sync_after_fit)
+            if self.nested:
+                if DecisionTreeRegressorNested == self.base_tree:
+                    self.rf = RandomForestRegressorNested(
+                        self.n_estimators, self.try_features,
+                        self.max_depth, self.distr_depth,
+                        self.sklearn_max, self.random_state,
+                        range_max=self.range_max,
+                        range_min=self.range_min,
+                        bootstrap=self.bootstrap,
+                        n_split_points=self.n_split_points,
+                        split_computation=self.split_computation,
+                        sync_after_fit=self.sync_after_fit)
+                else:
+                    self.rf = RandomForestClassifierNested(
+                        self.n_classes, self.n_estimators,
+                        self.try_features, self.max_depth,
+                        self.distr_depth, self.sklearn_max,
+                        self.hard_vote, self.random_state,
+                        range_max=self.range_max, range_min=self.range_min,
+                        bootstrap=self.bootstrap,
+                        n_split_points=self.n_split_points,
+                        split_computation=self.split_computation,
+                        sync_after_fit=self.sync_after_fit)
             else:
-                self.rf = RandomForestClassifierDistributed(
-                    self.n_classes, self.n_estimators,
-                    self.try_features, self.max_depth,
-                    self.distr_depth, self.sklearn_max,
-                    self.hard_vote, self.random_state,
-                    range_max=self.range_max, range_min=self.range_min,
-                    bootstrap=self.bootstrap,
-                    n_split_points=self.n_split_points,
-                    split_computation=self.split_computation,
-                    sync_after_fit=self.sync_after_fit)
+                if DecisionTreeRegressorDistributed == self.base_tree:
+                    self.rf = RandomForestRegressorDistributed(
+                        self.n_estimators, self.try_features, self.max_depth,
+                        self.distr_depth, self.sklearn_max,
+                        self.random_state,
+                        range_max=self.range_max, range_min=self.range_min,
+                        bootstrap=self.bootstrap,
+                        n_split_points=self.n_split_points,
+                        split_computation=self.split_computation,
+                        sync_after_fit=self.sync_after_fit)
+                else:
+                    self.rf = RandomForestClassifierDistributed(
+                        self.n_classes, self.n_estimators,
+                        self.try_features, self.max_depth,
+                        self.distr_depth, self.sklearn_max,
+                        self.hard_vote, self.random_state,
+                        range_max=self.range_max, range_min=self.range_min,
+                        bootstrap=self.bootstrap,
+                        n_split_points=self.n_split_points,
+                        split_computation=self.split_computation,
+                        sync_after_fit=self.sync_after_fit)
 
         self.rf.fit(x, y)
         if self.mmap and DecisionTreeClassifierMMap == self.base_tree:
@@ -206,27 +240,51 @@ def load_model(self, filepath, load_format="json"):
                         self.distr_depth, self.sklearn_max,
                         self.hard_vote, self.random_state)
             else:
-                if DecisionTreeRegressorDistributed == self.base_tree:
-                    self.rf = RandomForestRegressorDistributed(
-                        self.n_estimators, self.try_features, self.max_depth,
-                        self.distr_depth, self.sklearn_max,
-                        self.random_state,
-                        range_max=self.range_max, range_min=self.range_min,
-                        bootstrap=self.bootstrap,
-                        n_split_points=self.n_split_points,
-                        split_computation=self.split_computation,
-                        sync_after_fit=self.sync_after_fit)
+                if self.nested:
+                    if DecisionTreeRegressorNested == self.base_tree:
+                        self.rf = RandomForestRegressorNested(
+                            self.n_estimators, self.try_features,
+                            self.max_depth, self.distr_depth,
+                            self.sklearn_max, self.random_state,
+                            range_max=self.range_max,
+                            range_min=self.range_min,
+                            bootstrap=self.bootstrap,
+                            n_split_points=self.n_split_points,
+                            split_computation=self.split_computation,
+                            sync_after_fit=self.sync_after_fit)
+                    else:
+                        self.rf = RandomForestClassifierNested(
+                            self.n_classes, self.n_estimators,
+                            self.try_features, self.max_depth,
+                            self.distr_depth, self.sklearn_max,
+                            self.hard_vote, self.random_state,
+                            range_max=self.range_max, range_min=self.range_min,
+                            bootstrap=self.bootstrap,
+                            n_split_points=self.n_split_points,
+                            split_computation=self.split_computation,
+                            sync_after_fit=self.sync_after_fit)
                 else:
-                    self.rf = RandomForestClassifierDistributed(
-                        self.n_classes, self.n_estimators,
-                        self.try_features, self.max_depth,
-                        self.distr_depth, self.sklearn_max,
-                        self.hard_vote, self.random_state,
-                        range_max=self.range_max, range_min=self.range_min,
-                        bootstrap=self.bootstrap,
-                        n_split_points=self.n_split_points,
-                        split_computation=self.split_computation,
-                        sync_after_fit=self.sync_after_fit)
+                    if DecisionTreeRegressorDistributed == self.base_tree:
+                        self.rf = RandomForestRegressorDistributed(
+                            self.n_estimators, self.try_features,
+                            self.max_depth, self.distr_depth,
+                            self.sklearn_max, self.random_state,
+                            range_max=self.range_max, range_min=self.range_min,
+                            bootstrap=self.bootstrap,
+                            n_split_points=self.n_split_points,
+                            split_computation=self.split_computation,
+                            sync_after_fit=self.sync_after_fit)
+                    else:
+                        self.rf = RandomForestClassifierDistributed(
+                            self.n_classes, self.n_estimators,
+                            self.try_features, self.max_depth,
+                            self.distr_depth, self.sklearn_max,
+                            self.hard_vote, self.random_state,
+                            range_max=self.range_max, range_min=self.range_min,
+                            bootstrap=self.bootstrap,
+                            n_split_points=self.n_split_points,
+                            split_computation=self.split_computation,
+                            sync_after_fit=self.sync_after_fit)
             self.rf.load_model(filepath,
                                load_format=load_format)
 
@@ -291,13 +349,18 @@ class RandomForestClassifier(BaseRandomForest):
     sync_after_fit : bool
         Synchronize or not after the training.
         Used on distributed random forest (non memory map version)
+    mmap : bool
+        Use the memory map version or not.
+    nested : bool
+        Use the nested version or not.
 
     Attributes
     ----------
     classes : None or ndarray
         Array of distinct classes, set at fit().
-    trees : list of DecisionTreeClassifier
-        List of the tree classifiers of this forest, populated at fit().
+    rf : RandomForestClassifier selected
+        Instance of mmap, distributed or nested
+        RandomForestClassifier selected.
     """
 
     def __init__(
@@ -317,6 +380,7 @@ def __init__(
         split_computation="raw",
         sync_after_fit=True,
         mmap=True,
+        nested=False,
     ):
         if mmap:
             super().__init__(
@@ -331,25 +395,48 @@ def __init__(
                 base_dataset=RfClassifierDataset,
             )
         else:
-            super().__init__(
-                n_estimators,
-                try_features,
-                max_depth,
-                distr_depth,
-                sklearn_max,
-                hard_vote,
-                random_state,
-                base_tree=DecisionTreeClassifierDistributed,
-                base_dataset=None,
-                n_classes=n_classes,
-                range_max=range_max,
-                range_min=range_min,
-                bootstrap=bootstrap,
-                n_split_points=n_split_points,
-                split_computation=split_computation,
-                sync_after_fit=sync_after_fit,
-                mmap=mmap,
-            )
+            if nested:
+                super().__init__(
+                    n_estimators,
+                    try_features,
+                    max_depth,
+                    distr_depth,
+                    sklearn_max,
+                    hard_vote,
+                    random_state,
+                    base_tree=DecisionTreeClassifierNested,
+                    base_dataset=None,
+                    n_classes=n_classes,
+                    range_max=range_max,
+                    range_min=range_min,
+                    bootstrap=bootstrap,
+                    n_split_points=n_split_points,
+                    split_computation=split_computation,
+                    sync_after_fit=sync_after_fit,
+                    mmap=mmap,
+                    nested=nested,
+                )
+            else:
+                super().__init__(
+                    n_estimators,
+                    try_features,
+                    max_depth,
+                    distr_depth,
+                    sklearn_max,
+                    hard_vote,
+                    random_state,
+                    base_tree=DecisionTreeClassifierDistributed,
+                    base_dataset=None,
+                    n_classes=n_classes,
+                    range_max=range_max,
+                    range_min=range_min,
+                    bootstrap=bootstrap,
+                    n_split_points=n_split_points,
+                    split_computation=split_computation,
+                    sync_after_fit=sync_after_fit,
+                    mmap=mmap,
+                    nested=nested
+                )
 
     def predict(self, x):
         """Predicts target classes using a fitted forest.
@@ -520,11 +607,36 @@ class RandomForestRegressor(BaseRandomForest):
         If RandomState instance, random_state is the random number generator;
         If None, the random number generator is the RandomState instance used
         by `np.random`.
+    n_classes : int
+        Number of classes that appear on the dataset. Only needed on
+        distributed random forest.
+    range_min : ds-array or np.array
+        Contains the minimum values of the different attributes of the dataset
+        Only used on distributed random forest (it is an optional parameter)
+    range_max : ds-array or np.array
+        Contains the maximum values of the different attributes of the dataset
+        Only used on distributed random forest (it is an optional parameter)
+    n_split_points : String or int
+        Number of split points to evaluate.
+        "auto", "sqrt" or integer value.
+        Used on distributed random forest (non memory map version)
+    split_computation : String
+        "raw", "gaussian_approximation" or "uniform_approximation"
+        distribution of the values followed by the split points selected.
+        Used on distributed random forest (non memory map version)
+    sync_after_fit : bool
+        Synchronize or not after the training.
+        Used on distributed random forest (non memory map version)
+    mmap : bool
+        Use the memory map version of the algorithm or not
+    nested : bool
+        Use the nested version of the algorithm or not
 
     Attributes
     ----------
-    trees : list of DecisionTreeRegressor
-        List of the tree regressors of this forest, populated at fit().
+     rf : RandomForestRegressor selected
+        Instance of mmap, distributed or nested
+        RandomForestRegressor selected.
     """
 
     def __init__(
@@ -542,6 +654,7 @@ def __init__(
         split_computation="raw",
         sync_after_fit=True,
         mmap=True,
+        nested=False,
     ):
         hard_vote = None
         if mmap:
@@ -557,24 +670,46 @@ def __init__(
                 base_dataset=RfRegressorDataset,
             )
         else:
-            super().__init__(
-                n_estimators,
-                try_features,
-                max_depth,
-                distr_depth,
-                sklearn_max,
-                hard_vote,
-                random_state,
-                base_tree=DecisionTreeRegressorDistributed,
-                base_dataset=None,
-                range_max=range_max,
-                range_min=range_min,
-                bootstrap=bootstrap,
-                n_split_points=n_split_points,
-                split_computation=split_computation,
-                sync_after_fit=sync_after_fit,
-                mmap=mmap,
-            )
+            if nested:
+                super().__init__(
+                    n_estimators,
+                    try_features,
+                    max_depth,
+                    distr_depth,
+                    sklearn_max,
+                    hard_vote,
+                    random_state,
+                    base_tree=DecisionTreeRegressorNested,
+                    base_dataset=None,
+                    range_max=range_max,
+                    range_min=range_min,
+                    bootstrap=bootstrap,
+                    n_split_points=n_split_points,
+                    split_computation=split_computation,
+                    sync_after_fit=sync_after_fit,
+                    mmap=mmap,
+                    nested=nested,
+                )
+            else:
+                super().__init__(
+                    n_estimators,
+                    try_features,
+                    max_depth,
+                    distr_depth,
+                    sklearn_max,
+                    hard_vote,
+                    random_state,
+                    base_tree=DecisionTreeRegressorDistributed,
+                    base_dataset=None,
+                    range_max=range_max,
+                    range_min=range_min,
+                    bootstrap=bootstrap,
+                    n_split_points=n_split_points,
+                    split_computation=split_computation,
+                    sync_after_fit=sync_after_fit,
+                    mmap=mmap,
+                    nested=nested,
+                )
 
     def predict(self, x):
         """Predicts target values using a fitted forest.
diff --git a/dislib/trees/nested/__init__.py b/dislib/trees/nested/__init__.py
new file mode 100644
index 00000000..30b0bdb9
--- /dev/null
+++ b/dislib/trees/nested/__init__.py
@@ -0,0 +1,13 @@
+from dislib.trees.nested.forest import (RandomForestClassifier,
+                                        RandomForestRegressor)
+from dislib.trees.nested.decision_tree import (
+    DecisionTreeClassifier,
+    DecisionTreeRegressor,
+)
+
+__all__ = [
+    "RandomForestClassifier",
+    "RandomForestRegressor",
+    "DecisionTreeClassifier",
+    "DecisionTreeRegressor",
+]
diff --git a/dislib/trees/nested/decision_tree.py b/dislib/trees/nested/decision_tree.py
new file mode 100644
index 00000000..73c89bef
--- /dev/null
+++ b/dislib/trees/nested/decision_tree.py
@@ -0,0 +1,1505 @@
+import math
+import numpy as np
+from sklearn.tree import DecisionTreeClassifier as SklearnDTClassifier
+from sklearn.tree import DecisionTreeRegressor as SklearnDTRegressor
+from pycompss.api.parameter import COLLECTION_IN, IN
+from sklearn.utils import check_random_state
+from pycompss.api.api import compss_delete_object, compss_wait_on
+from dislib.data.array import Array
+from pycompss.api.task import task
+from pycompss.api.constraint import constraint
+import scipy
+from dislib.trees.nested.terasort import terasort
+
+
+class BaseDecisionTree:
+    """Base class for distributed decision trees.
+
+    Warning: This class should not be used directly.
+    Use derived classes instead.
+    """
+
+    def __init__(
+            self,
+            try_features,
+            max_depth,
+            distr_depth,
+            sklearn_max,
+            bootstrap,
+            random_state,
+            base_node,
+            base_tree,
+            n_classes=None,
+            range_min=None,
+            range_max=None,
+            n_split_points="auto",
+            split_computation="raw",
+            sync_after_fit=True,
+    ):
+        self.n_classes = n_classes
+        self.try_features = try_features
+        self.max_depth = max_depth
+        self.sklearn_max = sklearn_max
+        self.distr_depth = distr_depth
+        self.bootstrap = bootstrap
+        self.random_state = random_state
+        self.base_node = base_node
+        self.base_tree = base_tree
+
+        self.n_features = None
+
+        self.tree = None
+        self.nodes_info = None
+        self.range_min = range_min
+        self.range_max = range_max
+        self.n_split_points = n_split_points
+        self.split_computation = split_computation
+        self.sync_after_fit = sync_after_fit
+
+    @constraint(computing_units="${ComputingUnits}")
+    @task()
+    def fit(self, x, y):
+        """Fits the DecisionTree.
+
+        Parameters
+        ----------
+        x : ds-array
+            Samples of the dataset.
+        y: ds-array
+            Labels of the dataset.
+        """
+        if self.range_max is None:
+            self.range_max = x.max()
+        if self.range_min is None:
+            self.range_min = x.min()
+        self.range_max._blocks = compss_wait_on(self.range_max._blocks)
+        self.range_min._blocks = compss_wait_on(self.range_min._blocks)
+        if self.n_split_points == "auto":
+            self.n_split_points = int(math.log(x.shape[0]))
+        elif self.n_split_points == "sqrt":
+            self.n_split_points = int(math.sqrt(x.shape[0]))
+        elif self.n_split_points < 1 and self.n_split_points > 0:
+            self.n_split_points = int(self.n_split_points * x.shape[0])
+        elif isinstance(self.n_split_points, int):
+            pass
+        self.total_length = x.shape[0]
+        self.number_attributes = x.shape[1]
+        self.tree = self.base_node()
+        branches = [[x, y, self.tree]]
+        nodes_info = []
+        selection = _sample_selection(x,
+                                      random_state=self.random_state,
+                                      bootstrap=self.bootstrap)
+        num_buckets = x._n_blocks[0] * x._n_blocks[1]
+        for i in range(self.distr_depth):
+            branches_pair = []
+            for idx, branch_data in enumerate(branches):
+                x, y, actual_node = branch_data
+                node_info, results_l, results_l_2, results_r, results_r_2 = (
+                    _compute_split(
+                        x, y, n_classes=self.n_classes,
+                        range_min=self.range_min,
+                        range_max=self.range_max,
+                        num_buckets=int(num_buckets/(i+1)),
+                        m_try=self.try_features,
+                        number_attributes=self.number_attributes,
+                        indexes_selected=selection,
+                        number_split_points=int(self.n_split_points*(i+1)),
+                        split_computation=self.split_computation,
+                        random_state=self.random_state))
+                actual_node.content = int(math.pow(2, int(i)) - 1 + idx)
+                actual_node.left = self.base_node()
+                actual_node.right = self.base_node()
+                splits_computed = []
+                splits_computed.append(results_l)
+                splits_computed.append(results_l_2)
+                splits_computed.append(actual_node.left)
+                branches_pair.append(splits_computed)
+                splits_computed = []
+                splits_computed.append(results_r)
+                splits_computed.append(results_r_2)
+                splits_computed.append(actual_node.right)
+                branches_pair.append(splits_computed)
+                nodes_info.append(node_info)
+            branches = branches_pair
+        for branch in branches:
+            x, y, actual_node = branch
+            actual_node = construct_subtree(x, y, actual_node,
+                                            self.try_features,
+                                            self.distr_depth,
+                                            max_depth=self.max_depth,
+                                            random_state=self.random_state)
+            nodes_info.append(actual_node)
+        nodes_info = compss_wait_on(nodes_info)
+        self.nodes_info = nodes_info
+
+    @constraint(computing_units="${ComputingUnits}")
+    @task(returns=list)
+    def predict(self, x):
+        """Predicts target values or classes for the given samples using
+        a fitted tree.
+
+        Parameters
+        ----------
+        x_row : ds-array
+            A row block of samples.
+
+        Returns
+        -------
+        predicted : ndarray
+            An array with the predicted classes or values for the given
+            samples. For classification, the values are codes of the fitted
+            dislib.classification.rf.data.RfDataset. The returned object can
+            be a pycompss.runtime.Future object.
+        """
+        assert self.tree is not None, "The decision tree is not fitted."
+
+        block_predictions = []
+        for x_block in x._blocks:
+            block_predictions.append(_predict_tree_class(x_block,
+                                                         self.nodes_info,
+                                                         0, self.n_classes))
+        return block_predictions
+
+
+class DecisionTreeClassifier(BaseDecisionTree):
+    """A distributed decision tree classifier.
+
+    Parameters
+    ----------
+    try_features : int
+        The number of features to consider when looking for the best split.
+
+        Note: the search for a split does not stop until at least one
+        valid partition of the node samples is found, even if it requires
+        to effectively inspect more than ``try_features`` features.
+    max_depth : int
+        The maximum depth of the tree. If np.inf, then nodes are expanded
+        until all leaves are pure.
+    distr_depth : int
+        Number of levels of the tree in which the nodes are split in a
+        distributed way.
+    bootstrap : bool
+        Randomly select n_instances samples with repetition (used in random
+        forests).
+    random_state : RandomState instance
+        The random number generator.
+
+    Attributes
+    ----------
+    n_features : int
+        The number of features of the dataset. It can be a
+        pycompss.runtime.Future object.
+    n_classes : int
+        The number of classes of this RfDataset. It can be a
+        pycompss.runtime.Future object.
+    tree : None or _Node
+        The root node of the tree after the tree is fitted.
+    nodes_info : None or list of _InnerNodeInfo and _LeafInfo
+        List of the node information for the nodes of the tree in the same
+        order as obtained in the fit() method, up to ``distr_depth`` depth.
+        After fit(), it is a pycompss.runtime.Future object.
+    subtrees : None or list of _Node
+        List of subtrees of the tree at ``distr_depth`` depth  obtained in the
+        fit() method. After fit(), it is a list of pycompss.runtime.Future
+        objects.
+
+    Methods
+    -------
+    fit(dataset)
+        Fits the DecisionTreeClassifier.
+    predict(x_row)
+        Predicts classes for the given samples using a fitted tree.
+    predict_proba(x_row)
+        Predicts class probabilities for the given smaples using a fitted tree.
+
+    """
+
+    def __init__(
+            self,
+            n_classes,
+            try_features,
+            max_depth,
+            distr_depth,
+            sklearn_max,
+            bootstrap,
+            random_state,
+            range_min=None,
+            range_max=None,
+            n_split_points="auto",
+            split_computation="raw",
+            sync_after_fit=True,
+    ):
+        super().__init__(
+            try_features,
+            max_depth,
+            distr_depth,
+            sklearn_max,
+            bootstrap,
+            random_state,
+            _ClassificationNode,
+            SklearnDTClassifier,
+            n_classes=n_classes,
+            range_min=range_min,
+            range_max=range_max,
+            n_split_points=n_split_points,
+            split_computation=split_computation,
+            sync_after_fit=sync_after_fit,
+        )
+
+    @constraint(computing_units="${ComputingUnits}")
+    @task(returns=1)
+    def predict_proba(self, x):
+        """Predicts class probabilities for a row block using a fitted tree.
+
+                Parameters
+                ----------
+                x_row : ds-array
+                    A row block of samples.
+
+                Returns
+                -------
+                predicted_proba : list
+                    A list with the predicted probabilities
+                    for the given samples.
+                    It contains a numpy array (if collect=True)
+                    or Future object (if collect=False) for each of the blocks
+                    in the ds-array to predict.
+                    Thus the length of the list is the same
+                    as the number of blocks the ds-array contains.
+                    The shape inside each prediction is (len(x.reg_shape[0]),
+                     self.n_classes).
+                    The returned object can be a
+                    pycompss.runtime.Future object.
+                """
+
+        assert self.tree is not None, "The decision tree is not fitted."
+
+        block_predictions = []
+        for x_block in x._blocks:
+            block_predictions.append(_predict_proba_tree(x_block,
+                                                         self.nodes_info,
+                                                         0, self.n_classes))
+        block_predictions = compss_wait_on(block_predictions)
+        return block_predictions
+
+
+class DecisionTreeRegressor(BaseDecisionTree):
+    """A distributed decision tree regressor.
+
+    Parameters
+    ----------
+    try_features : int
+        The number of features to consider when looking for the best split.
+
+        Note: the search for a split does not stop until at least one
+        valid partition of the node samples is found, even if it requires
+        to effectively inspect more than ``try_features`` features.
+    max_depth : int
+        The maximum depth of the tree. If np.inf, then nodes are expanded
+        until all leaves are pure.
+    distr_depth : int
+        Number of levels of the tree in which the nodes are split in a
+        distributed way.
+    bootstrap : bool
+        Randomly select n_instances samples with repetition (used in random
+        forests).
+    random_state : RandomState instance
+        The random number generator.
+
+    Attributes
+    ----------
+    n_features : int
+        The number of features of the dataset. It can be a
+        pycompss.runtime.Future object.
+    tree : None or _Node
+        The root node of the tree after the tree is fitted.
+    nodes_info : None or list of _InnerNodeInfo and _LeafInfo
+        List of the node information for the nodes of the tree in the same
+        order as obtained in the fit() method, up to ``distr_depth`` depth.
+        After fit(), it is a pycompss.runtime.Future object.
+    subtrees : None or list of _Node
+        List of subtrees of the tree at ``distr_depth`` depth  obtained in the
+        fit() method. After fit(), it is a list of pycompss.runtime.Future
+        objects.
+
+    Methods
+    -------
+    fit(dataset)
+        Fits the DecisionTreeRegressor.
+    predict(x_row)
+        Predicts target values for the given samples using a fitted tree.
+    """
+
+    def __init__(
+            self,
+            try_features,
+            max_depth,
+            distr_depth,
+            sklearn_max,
+            bootstrap,
+            random_state,
+            range_min=None,
+            range_max=None,
+            n_split_points="auto",
+            split_computation="raw",
+            sync_after_fit=True
+    ):
+        super().__init__(
+            try_features,
+            max_depth,
+            distr_depth,
+            sklearn_max,
+            bootstrap,
+            random_state,
+            _RegressionNode,
+            SklearnDTRegressor,
+            n_classes=None,
+            range_min=range_min,
+            range_max=range_max,
+            n_split_points=n_split_points,
+            split_computation=split_computation,
+            sync_after_fit=sync_after_fit,
+        )
+
+    @constraint(computing_units="${ComputingUnits}")
+    @task()
+    def fit(self, x, y):
+        """Fits the DecisionTreeRegressor.
+
+        Parameters
+        ----------
+        x : ds-array
+            Samples of the dataset.
+        y: ds-array
+            Labels of the dataset.
+        """
+        if self.range_max is None:
+            self.range_max = x.max()
+        if self.range_min is None:
+            self.range_min = x.min()
+        self.range_max._blocks = compss_wait_on(self.range_max._blocks)
+        self.range_min._blocks = compss_wait_on(self.range_min._blocks)
+        if self.n_split_points == "auto":
+            self.n_split_points = int(math.log(x.shape[0]))
+        elif self.n_split_points == "sqrt":
+            self.n_split_points = int(math.sqrt(x.shape[0]))
+        elif self.n_split_points < 1 and self.n_split_points > 0:
+            self.n_split_points = int(self.n_split_points*x.shape[0])
+        elif isinstance(self.n_split_points, int):
+            pass
+        self.total_length = x.shape[0]
+        self.number_attributes = x.shape[1]
+        self.tree = self.base_node()
+        branches = [[x, y, self.tree]]
+        nodes_info = []
+        selection = _sample_selection(x, random_state=self.random_state,
+                                      bootstrap=self.bootstrap)
+        num_buckets = x._n_blocks[0] * x._n_blocks[1]
+        for i in range(self.distr_depth):
+            branches_pair = []
+            for idx, branch_data in enumerate(branches):
+                x, y, actual_node = branch_data
+                node_info, results_l, results_l_2, results_r, results_r_2 = (
+                    _compute_split_regressor(
+                        x, y, range_min=self.range_min,
+                        range_max=self.range_max,
+                        num_buckets=int(
+                            num_buckets/(i+1)),
+                        m_try=self.try_features,
+                        number_attributes=self.number_attributes,
+                        indexes_selected=selection,
+                        number_split_points=int(self.n_split_points*(i+1)),
+                        split_computation=self.split_computation,
+                        random_state=self.random_state))
+                actual_node.content = int(math.pow(2, int(i)) - 1 + idx)
+                actual_node.left = self.base_node()
+                actual_node.right = self.base_node()
+                splits_computed = [results_l, results_l_2, actual_node.left]
+                branches_pair.append(splits_computed)
+                splits_computed = [results_r, results_r_2, actual_node.right]
+                branches_pair.append(splits_computed)
+                nodes_info.append(node_info)
+            branches = branches_pair
+        for branch in branches:
+            x, y, actual_node = branch
+            actual_node = construct_subtree(x, y, actual_node,
+                                            self.try_features,
+                                            self.distr_depth,
+                                            max_depth=self.max_depth,
+                                            random_state=self.random_state)
+            nodes_info.append(actual_node)
+        nodes_info = compss_wait_on(nodes_info)
+        self.nodes_info = nodes_info
+
+
+@constraint(computing_units="${ComputingUnits}")
+@task(returns=5, priority=True)
+def _compute_split_regressor(x, y, num_buckets=4,
+                             range_min=0, range_max=1, indexes_selected=None,
+                             number_attributes=2, m_try=2,
+                             number_split_points=100, split_computation="raw",
+                             random_state=1):
+    if x[0] is None:
+        return None, [None], [None], [None], [None]
+    indexes_to_try = []
+    random_state = check_random_state(random_state)
+    untried_indices = np.setdiff1d(np.arange(number_attributes),
+                                   indexes_to_try)
+    index_selection = _feature_selection(
+        untried_indices, m_try, random_state
+    )
+    indexes_to_try.append(index_selection)
+    node_info = _NodeInfo()
+    final_rights_x = [object()]
+    final_rights_y = [object()]
+    final_lefts_x = [object()]
+    final_lefts_y = [object()]
+    if num_buckets < 1:
+        num_buckets = 1
+    tried_indices = []
+    for _ in range(number_attributes):
+        untried_indices = np.setdiff1d(np.arange(number_attributes),
+                                       tried_indices)
+        index_selection = _feature_selection(
+            untried_indices, m_try, random_state
+        )
+        results = terasort(x, index_selection, range_min=range_min,
+                           range_max=range_max,
+                           indexes_selected=indexes_selected,
+                           num_buckets=num_buckets)
+        split_points_per_attribute = []
+        for i in range(len(results[0])):
+            split_points_per_attribute.append(
+                get_split_point_various_attributes_bucket(
+                    results[:, i], number_split_points=number_split_points,
+                    split_computation=split_computation))
+        [compss_delete_object(b) for results_2 in results for b in results_2]
+        del results
+        split_points_per_attribute = compss_wait_on(split_points_per_attribute)
+        partial_results_left = []
+        partial_results_right = []
+        for idx, split_values in enumerate(split_points_per_attribute):
+            partial_results_left.append([])
+            partial_results_right.append([])
+            if isinstance(x, Array):
+                for index_blocks, block_s in enumerate(zip(
+                        x._blocks, y._blocks)):
+                    idx_selected = indexes_selected[
+                        indexes_selected < (index_blocks + 1) *
+                        x._reg_shape[0]]
+                    block_x, block_y = block_s
+                    left_class, right_class = classes_per_split(
+                        block_x, block_y, split_values, index_selection,
+                        idx_selected[idx_selected >= (index_blocks) *
+                                     x._reg_shape[0]] % x._reg_shape[0],
+                        regression=True)
+                    partial_results_left[idx].append(left_class)
+                    partial_results_right[idx].append(right_class)
+                    del idx_selected
+            else:
+                for block_x, block_y in zip(x, y):
+                    left_class, right_class = classes_per_split(
+                        block_x, block_y, split_values, index_selection,
+                        np.array([0]), regression=True)
+                    partial_results_left[idx].append(left_class)
+                    partial_results_right[idx].append(right_class)
+        partial_results_right_array = np.array(compss_wait_on(
+            partial_results_right))
+        partial_results_left_array = np.array(compss_wait_on(
+            partial_results_left))
+        store_mse_values = []
+        evaluation_of_splits = []
+        for idx in range(partial_results_right_array.shape[0]):
+            for j in range(partial_results_right_array.shape[2]):
+                global_gini_values, produces_split = (
+                    merge_partial_results_compute_mse_both_sides(
+                        partial_results_left_array[idx, :, j],
+                        partial_results_right_array[idx, :, j]))
+                store_mse_values.append(global_gini_values)
+                evaluation_of_splits.append(produces_split)
+
+        store_mse_values = compss_wait_on(store_mse_values)
+        evaluation_of_splits = compss_wait_on(evaluation_of_splits)
+        del partial_results_right_array
+        del partial_results_left_array
+        [compss_delete_object(result) for results in
+         partial_results_right for result in results]
+        [compss_delete_object(result) for results in
+         partial_results_left for result in results]
+        best_attribute, position_m_g, bucket_minimum_gini, minimum_mse = (
+            get_minimum_measure(store_mse_values, m_try, gini=False))
+        optimal_split_point = select_optimal_split_point(
+            best_attribute, position_m_g, split_points_per_attribute,
+            bucket_minimum_gini)
+        compss_delete_object(position_m_g)
+        compss_delete_object(bucket_minimum_gini)
+        compss_delete_object(*evaluation_of_splits)
+        compss_delete_object(*store_mse_values)
+        compss_delete_object(*split_points_per_attribute)
+        rights_x = []
+        rights_y = []
+        lefts_x = []
+        lefts_y = []
+        right_sums = []
+        right_lengths = []
+        left_sums = []
+        left_lengths = []
+        if isinstance(x, Array):
+            for block_x, block_y in zip(x._blocks, y._blocks):
+                (right_x, right_y, left_x, left_y, compress_r,
+                 len_compress_r, compress_l, len_compress_l) = (
+                    apply_split_points_to_blocks_regression(
+                        block_x, block_y, best_attribute,
+                        optimal_split_point, index_selection))
+                rights_x.append([right_x])
+                rights_y.append([right_y])
+                lefts_x.append([left_x])
+                lefts_y.append([left_y])
+                right_sums.append(compress_r)
+                right_lengths.append(len_compress_r)
+                left_sums.append(compress_l)
+                left_lengths.append(len_compress_l)
+        else:
+            for block_x, block_y in zip(x, y):
+                (right_x, right_y, left_x, left_y, compress_r,
+                 len_compress_r, compress_l, len_compress_l) = (
+                    apply_split_points_to_blocks_regression(
+                        block_x, block_y, best_attribute,
+                        optimal_split_point, index_selection))
+                rights_x.append([right_x])
+                rights_y.append([right_y])
+                lefts_x.append([left_x])
+                lefts_y.append([left_y])
+                right_sums.append(compress_r)
+                right_lengths.append(len_compress_r)
+                left_sums.append(compress_l)
+                left_lengths.append(len_compress_l)
+            [compss_delete_object(x_data[0]) for x_data in x]
+            [compss_delete_object(y_data[0]) for y_data in y]
+        final_rights_x[0] = rights_x
+        final_rights_y[0] = rights_y
+        final_lefts_x[0] = lefts_x
+        final_lefts_y[0] = lefts_y
+        if (np.sum(left_lengths) + np.sum(right_lengths)) <= 4:
+            node_info.set(_compute_leaf_info((np.sum(left_sums) +
+                                              np.sum(right_sums)) /
+                                             (np.sum(left_lengths) +
+                                              np.sum(right_lengths)), None,
+                          occurrences=np.sum(left_lengths)
+                                             +
+                                             np.sum(right_lengths)
+                                             ))
+        elif np.sum(right_lengths) == 0:
+            node_info.set(_compute_leaf_info(
+                (np.sum(left_sums) + np.sum(right_sums)) /
+                (np.sum(left_lengths) + np.sum(right_lengths)), None,
+                occurrences=np.sum(left_lengths) + np.sum(right_lengths)))
+        elif np.sum(left_lengths) == 0:
+            node_info.set(_compute_leaf_info(
+                (np.sum(left_sums) + np.sum(right_sums)) /
+                (np.sum(left_lengths) + np.sum(right_lengths)), None,
+                occurrences=np.sum(left_lengths) + np.sum(right_lengths)))
+        elif best_attribute is None:
+            node_info.set(_compute_leaf_info(
+                (np.sum(left_sums) + np.sum(right_sums)) /
+                (np.sum(left_lengths) + np.sum(right_lengths)), None,
+                occurrences=np.sum(left_lengths) + np.sum(right_lengths)))
+        else:
+            node_info.set(_InnerNodeInfo(index_selection[
+                                             best_attribute],
+                                         optimal_split_point))
+            del right_sums
+            del right_lengths
+            del left_lengths
+            del left_sums
+            del minimum_mse
+            del optimal_split_point
+            del best_attribute
+            return (node_info, final_lefts_x[0], final_lefts_y[0],
+                    final_rights_x[0], final_rights_y[0])
+        del right_sums
+        del right_lengths
+        del left_lengths
+        del left_sums
+        del minimum_mse
+        del optimal_split_point
+        del best_attribute
+        tried_indices.extend(index_selection)
+        if len(tried_indices) == number_attributes:
+            break
+    return node_info, [None], [None], [None], [None]
+
+
+@constraint(computing_units="${ComputingUnits}")
+@task(returns=5, priority=True)
+def _compute_split(x, y, n_classes=None, num_buckets=4,
+                   range_min=0, range_max=1,
+                   indexes_selected=None, number_attributes=2, m_try=2,
+                   number_split_points=100,
+                   split_computation="raw", random_state=None):
+    if x[0] is None:
+        return None, [None], [None], [None], [None]
+    indexes_to_try = []
+    random_state = check_random_state(random_state)
+    untried_indices = np.setdiff1d(np.arange(number_attributes),
+                                   indexes_to_try)
+    index_selection = _feature_selection(
+        untried_indices, m_try, random_state
+    )
+    indexes_to_try.append(index_selection)
+    node_info = _NodeInfo()
+    final_rights_x = [object()]
+    final_rights_y = [object()]
+    final_lefts_x = [object()]
+    final_lefts_y = [object()]
+    tried_indices = []
+    if num_buckets < 1:
+        num_buckets = 2
+    for _ in range(number_attributes):
+        untried_indices = np.setdiff1d(np.arange(
+            number_attributes), tried_indices)
+        index_selection = _feature_selection(
+            untried_indices, m_try, random_state
+        )
+        results = terasort(x, index_selection, range_min=range_min,
+                           range_max=range_max,
+                           indexes_selected=indexes_selected,
+                           num_buckets=num_buckets)
+        split_points_per_attribute = []
+        for i in range(len(
+                results[0])):
+            split_points_per_attribute.append(
+                get_split_point_various_attributes_bucket(
+                    results[:, i], number_split_points=number_split_points,
+                    split_computation=split_computation))
+        [compss_delete_object(b) for results_2 in results for b in results_2]
+        del results
+        split_points_per_attribute = compss_wait_on(
+            split_points_per_attribute)
+        partial_results_left = []
+        partial_results_right = []
+        for idx, split_values in enumerate(split_points_per_attribute):
+            partial_results_left.append([])
+            partial_results_right.append([])
+            if isinstance(x, Array):
+                for index_blocks, block_s in enumerate(
+                        zip(x._blocks, y._blocks)):
+                    idx_selected = indexes_selected[
+                        indexes_selected < (index_blocks + 1) *
+                        x._reg_shape[0]]
+                    block_x, block_y = block_s
+                    left_class, right_class = classes_per_split(
+                        block_x, block_y, split_values, index_selection,
+                        idx_selected[idx_selected >= (index_blocks) *
+                                     x._reg_shape[0]] % x._reg_shape[0])
+                    partial_results_left[idx].append(left_class)
+                    partial_results_right[idx].append(right_class)
+            else:
+                for block_x, block_y in zip(x, y):
+                    left_class, right_class = classes_per_split(
+                        block_x, block_y, split_values,
+                        index_selection, np.array([0]))
+                    partial_results_left[idx].append(left_class)
+                    partial_results_right[idx].append(right_class)
+        partial_results_right_array = np.array(compss_wait_on(
+            partial_results_right))
+        partial_results_left_array = np.array(compss_wait_on(
+            partial_results_left))
+        store_gini_values = []
+        evaluation_of_splits = []
+        for idx in range(partial_results_right_array.shape[0]):
+            for j in range(partial_results_right_array.shape[2]):
+                global_gini_values, produces_split = (
+                    merge_partial_results_compute_gini_both_sides(
+                        partial_results_left_array[idx, :, j],
+                        partial_results_right_array[idx, :, j],
+                        n_classes))
+                store_gini_values.append(global_gini_values)
+                evaluation_of_splits.append(produces_split)
+        store_gini_values = compss_wait_on(store_gini_values)
+        evaluation_of_splits = compss_wait_on(evaluation_of_splits)
+        del partial_results_right_array
+        del partial_results_left_array
+        [compss_delete_object(result) for results in
+         partial_results_right for result in results]
+        [compss_delete_object(result) for results in
+         partial_results_left for result in results]
+        best_attribute, position_m_g, bucket_minimum_gini, minimum_ginis = (
+            get_minimum_measure(store_gini_values,
+                                len(index_selection),
+                                gini=True))
+        optimal_split_point = select_optimal_split_point(
+            best_attribute, position_m_g, split_points_per_attribute,
+            bucket_minimum_gini)
+        compss_delete_object(position_m_g)
+        compss_delete_object(bucket_minimum_gini)
+        compss_delete_object(minimum_ginis)
+        compss_delete_object(*evaluation_of_splits)
+        compss_delete_object(*store_gini_values)
+        compss_delete_object(*split_points_per_attribute)
+        rights_x = []
+        rights_y = []
+        lefts_x = []
+        lefts_y = []
+        aggregate = np.zeros(n_classes, dtype=np.int64)
+        aggregate_r = np.zeros(n_classes, dtype=np.int64)
+        if isinstance(x, Array):
+            for block_x, block_y in zip(x._blocks, y._blocks):
+                right_x, right_y, left_x, left_y, aggregate_r, aggregate = (
+                    apply_split_points_to_blocks(
+                        block_x, block_y, best_attribute,
+                        optimal_split_point, index_selection, n_classes,
+                        aggregate, aggregate_r))
+                rights_x.append([right_x])
+                rights_y.append([right_y])
+                lefts_x.append([left_x])
+                lefts_y.append([left_y])
+        else:
+            for block_x, block_y in zip(x, y):
+                right_x, right_y, left_x, left_y, aggregate_r, aggregate = (
+                    apply_split_points_to_blocks(
+                        block_x, block_y, best_attribute, optimal_split_point,
+                        index_selection, n_classes, aggregate, aggregate_r))
+                rights_x.append([right_x])
+                rights_y.append([right_y])
+                lefts_x.append([left_x])
+                lefts_y.append([left_y])
+            [compss_delete_object(x_data[0]) for x_data in x]
+            [compss_delete_object(y_data[0]) for y_data in y]
+        final_rights_x[0] = rights_x
+        final_rights_y[0] = rights_y
+        final_lefts_x[0] = lefts_x
+        final_lefts_y[0] = lefts_y
+
+        if (np.sum(aggregate) + np.sum(aggregate)) <= 4:
+            node_info.set(_compute_leaf_info(aggregate +
+                                             aggregate_r, n_classes))
+        elif np.sum(aggregate_r) == 0:
+            node_info.set(_compute_leaf_info(aggregate + aggregate_r,
+                                             n_classes))
+        elif np.sum(aggregate) == 0:
+            node_info.set(_compute_leaf_info(aggregate + aggregate_r,
+                                             n_classes))
+        elif best_attribute is None:
+            node_info.set(_compute_leaf_info(aggregate + aggregate_r,
+                                             n_classes))
+        else:
+            node_info.set(_InnerNodeInfo(index_selection[best_attribute],
+                                         optimal_split_point))
+            del best_attribute
+            del evaluation_of_splits
+            del optimal_split_point
+            del aggregate
+            del aggregate_r
+            del minimum_ginis
+            return (node_info, final_lefts_x[0], final_lefts_y[0],
+                    final_rights_x[0], final_rights_y[0])
+        del best_attribute
+        del evaluation_of_splits
+        del optimal_split_point
+        del aggregate
+        del aggregate_r
+        del minimum_ginis
+        tried_indices.extend(index_selection)
+        if len(tried_indices) == number_attributes:
+            break
+    return node_info, [None], [None], [None], [None]
+
+
+def _feature_selection(untried_indices, m_try, random_state):
+    selection_len = min(m_try, len(untried_indices))
+    return random_state.choice(
+        untried_indices, size=selection_len, replace=False
+    )
+
+
+def _compute_leaf_info(y_s, n_classes, occurrences=None):
+    if n_classes is not None:
+        y_s = y_s.squeeze()
+        mode = np.argmax(y_s)
+        return _LeafInfo(np.sum(y_s), y_s, mode)
+    else:
+        return _LeafInfo(occurrences, None, y_s)
+
+
+def _predict_tree_class(x, node, node_content_num, n_classes=None,
+                        rights=0, depth=0):
+    if node_content_num == 0:
+        node_content_num = node_content_num + 1
+    else:
+        node_content_num = node_content_num * 2 + rights
+    x = np.block(x)
+    node_content = node[node_content_num - 1]
+    if len(x) == 0:
+        if n_classes is not None:
+            return np.empty((0, n_classes), dtype=np.float64)
+        else:
+            return np.empty((0,), dtype=np.float64)
+    if isinstance(node_content, _NodeInfo):
+        if isinstance(node_content.get(), _LeafInfo):
+            if n_classes is not None:
+                return np.full((len(x), n_classes), node_content.get().target)
+            return np.full((len(x),), node_content.get().target)
+        elif isinstance(node_content.get(), _InnerNodeInfo):
+            if n_classes is not None:
+                pred = np.empty((x.shape[0], n_classes), dtype=np.float64)
+                l_msk = (x[:, node_content.get().index:
+                           (node_content.get().index + 1)] <=
+                         node_content.get().value)
+                pred[l_msk.flatten(), :] = _predict_tree_class(
+                    x[l_msk.flatten(), :], node, node_content_num,
+                    n_classes=n_classes,
+                    rights=0, depth=depth + 1)
+                pred[~l_msk.flatten(), :] = _predict_tree_class(
+                    x[~l_msk.flatten(), :], node, node_content_num,
+                    n_classes=n_classes,
+                    rights=1, depth=depth + 1)
+                return pred
+            else:
+                pred = np.empty((x.shape[0],), dtype=np.float64)
+                l_msk = (x[:, node_content.get().index:
+                           (node_content.get().index + 1)] <=
+                         node_content.get().value)
+                pred[l_msk.flatten()] = _predict_tree_class(
+                    x[l_msk.flatten()], node, node_content_num,
+                    n_classes=n_classes,
+                    rights=0, depth=depth + 1)
+                pred[~l_msk.flatten()] = _predict_tree_class(
+                    x[~l_msk.flatten()], node, node_content_num,
+                    n_classes=n_classes,
+                    rights=1, depth=depth + 1)
+                return pred
+    elif isinstance(node_content, _ClassificationNode):
+        if len(x) > 0:
+            sk_tree_pred = node_content.content.sk_tree.predict(x)
+            b = np.zeros((sk_tree_pred.size, n_classes))
+            b[np.arange(sk_tree_pred.size), sk_tree_pred] = 1
+            sk_tree_pred = b
+            pred = np.zeros((len(x), n_classes), dtype=np.float64)
+            pred[:, np.arange(n_classes)] = sk_tree_pred
+            return pred
+    elif isinstance(node_content, _RegressionNode):
+        if len(x) > 0:
+            sk_tree_pred = node_content.content.sk_tree.predict(x)
+            return sk_tree_pred
+
+
+def _predict_proba_tree(x, node, node_content_num,
+                        n_classes=None, rights=0, depth=0):
+    if node_content_num == 0:
+        node_content_num = node_content_num + 1
+    else:
+        node_content_num = node_content_num * 2 + rights
+    x = np.block(x)
+    node_content = node[node_content_num - 1]
+    if len(x) == 0:
+        return np.empty((0, n_classes), dtype=np.float64)
+    if isinstance(node_content, _NodeInfo):
+        if isinstance(node_content.get(), _LeafInfo):
+            single_pred = (node_content.get().frequencies /
+                           node_content.get().size)
+            return np.tile(single_pred, (len(x), 1))
+        elif isinstance(node_content.get(), _InnerNodeInfo):
+            pred = np.empty((x.shape[0], n_classes), dtype=np.float64)
+            l_msk = (x[:, node_content.get().index:
+                       (node_content.get().index + 1)] <=
+                     node_content.get().value)
+            pred[l_msk.flatten(), :] = compss_wait_on(
+                _predict_proba_tree(x[l_msk.flatten(), :],
+                                    node, node_content_num,
+                                    n_classes=n_classes,
+                                    rights=0, depth=depth + 1))
+            pred[~l_msk.flatten(), :] = compss_wait_on(
+                _predict_proba_tree(x[~l_msk.flatten(), :],
+                                    node, node_content_num,
+                                    n_classes=n_classes,
+                                    rights=1, depth=depth + 1))
+            return pred
+    elif isinstance(node_content, _ClassificationNode):
+        if len(x) > 0:
+            sk_tree_pred = node_content.content.sk_tree.predict_proba(x)
+            pred = np.zeros((len(x), n_classes), dtype=np.float64)
+            pred[:, node_content.content.sk_tree.classes_] = sk_tree_pred
+            return pred
+
+
+def apply_split_points_to_blocks_regression(x_block, y_block,
+                                            best_attribute,
+                                            optimal_value, indexes_to_try):
+    if optimal_value is None:
+        data_to_compress = np.block(y_block)
+        len_compress_l = np.array([0])
+        compress_l = np.array([0])
+        if len(data_to_compress) > 0:
+            compress_l = np.sum(data_to_compress)
+            len_compress_l = len(data_to_compress)
+        return (None, None, np.block(x_block), np.block(y_block),
+                np.array([0]), np.array([0]), compress_l, len_compress_l)
+    if x_block is None:
+        return (None, None, None, None, np.array([np.nan]),
+                np.array([np.nan]), np.array([np.nan]), np.array([np.nan]))
+    else:
+        x_block = np.block(x_block)
+        y_block = np.block(y_block)
+    left_x = x_block[x_block[:, indexes_to_try[best_attribute]] <
+                     optimal_value]
+    right_x = x_block[x_block[:, indexes_to_try[best_attribute]] >=
+                      optimal_value]
+    right_y = y_block[x_block[:, indexes_to_try[best_attribute]] >=
+                      optimal_value]
+    left_y = y_block[x_block[:, indexes_to_try[best_attribute]] <
+                     optimal_value]
+    data_to_compress = np.block(right_y)
+    data_to_compress_2 = np.block(left_y)
+    if len(data_to_compress) > 0:
+        compress_r = np.sum(data_to_compress)
+        len_compress_r = len(data_to_compress)
+    else:
+        compress_r = np.array([0])
+        len_compress_r = np.array([0])
+    if len(data_to_compress_2) > 0:
+        compress_l = np.sum(data_to_compress_2)
+        len_compress_l = len(data_to_compress_2)
+    else:
+        compress_l = np.array([0])
+        len_compress_l = np.array([0])
+    del x_block
+    del y_block
+    return (right_x, right_y, left_x, left_y, compress_r,
+            len_compress_r, compress_l, len_compress_l)
+
+
+def apply_split_points_to_blocks(x_block, y_block, best_attribute,
+                                 optimal_value, indexes_to_try,
+                                 n_classes, aggregate_r, aggregate):
+    if optimal_value is None:
+        y_block = np.block(y_block)
+        if y_block is not None:
+            if len(y_block) > 0:
+                data_bincount = np.bincount(y_block.astype(int).flatten())
+                if len(data_bincount) < n_classes:
+                    aggregate[:len(data_bincount)] += data_bincount
+                else:
+                    aggregate += data_bincount
+        return (None, None, np.block(x_block), np.block(y_block),
+                aggregate_r, aggregate)
+    if x_block is None:
+        return None, None, None, None, aggregate_r, aggregate
+    else:
+        x_block = np.block(x_block)
+        y_block = np.block(y_block)
+    left_x = x_block[x_block[:, indexes_to_try[best_attribute]] <
+                     optimal_value]
+    right_x = x_block[x_block[:, indexes_to_try[best_attribute]] >=
+                      optimal_value]
+    right_y = y_block[x_block[:, indexes_to_try[best_attribute]] >=
+                      optimal_value]
+    left_y = y_block[x_block[:, indexes_to_try[best_attribute]] <
+                     optimal_value]
+    del x_block
+    del y_block
+    if right_y is not None:
+        if len(right_y) > 0:
+            data_bincount = np.bincount(right_y.astype(int).flatten())
+            if len(data_bincount) < n_classes:
+                aggregate_r[:len(data_bincount)] += data_bincount
+            else:
+                aggregate_r += data_bincount
+    if left_y is not None:
+        if len(left_y) > 0:
+            data_bincount = np.bincount(left_y.astype(int).flatten())
+            if len(data_bincount) < n_classes:
+                aggregate[:len(data_bincount)] += data_bincount
+            else:
+                aggregate += data_bincount
+    return right_x, right_y, left_x, left_y, aggregate_r, aggregate
+
+
+def select_optimal_split_point(best_attribute, position_m_g,
+                               split_points, bucket_minimum_gini):
+    if best_attribute is None:
+        return None
+    return split_points[bucket_minimum_gini][best_attribute][position_m_g]
+
+
+def get_minimum_measure(ginis_list, number_attributes, gini=True):
+    if gini:
+        minimum_measure = 1
+    else:
+        minimum_measure = np.inf
+    for idx, ginis in enumerate(ginis_list):
+        if ginis[np.argmin(ginis)] < minimum_measure:
+            position_m_g = np.argmin(ginis)
+            minimum_measure = ginis[position_m_g]
+            best_attribute = idx % number_attributes
+            actual_bucket = int(math.floor(idx / number_attributes))
+    if minimum_measure == 1:
+        return None, None, None, 1
+    if minimum_measure == np.inf:
+        return None, None, None, np.inf
+    return best_attribute, position_m_g, actual_bucket, minimum_measure
+
+
+@constraint(computing_units="${ComputingUnits}")
+@task(returns=2)
+def merge_partial_results_compute_mse_both_sides(partial_results_l,
+                                                 partial_results_r):
+    if partial_results_l[0] is None or len(partial_results_l[0]) < 1:
+        return np.array([np.inf]), False
+    if partial_results_l[0][0] is None:
+        return np.array([np.inf]), False
+    concatted_values_l = []
+    value_to_compute_mse = []
+    for k in range(len(partial_results_l[0])):
+        value_to_concat = []
+        value_to_mse = []
+        for j in range(len(partial_results_l)):
+            value_to_concat.append(partial_results_l[j][k][1:])
+            if not np.isnan(partial_results_l[j][k][0]):
+                value_to_mse.extend([partial_results_l[j][k][0]])
+            else:
+                value_to_mse.extend([0])
+        concatted_values_l.append(np.sum(value_to_concat, axis=0))
+        value_to_compute_mse.append(value_to_mse)
+    number_occurrences = [occurrences[1] for
+                          occurrences in concatted_values_l]
+    mse_values = []
+    for individual_values, value in zip(value_to_compute_mse,
+                                        concatted_values_l):
+        mse_values.append(np.sum(np.square(np.subtract(individual_values,
+                                                       value[0] / value[1]))))
+        del value
+    del concatted_values_l
+    if partial_results_r[0] is None or len(partial_results_r[0]) < 1:
+        return np.array([np.inf]), False
+    if partial_results_r[0][0] is None:
+        return np.array([np.inf]), False
+    concatted_values_r = []
+    value_to_compute_mse = []
+    for k in range(len(partial_results_r[0])):
+        value_to_concat = []
+        value_to_mse = []
+        for j in range(len(partial_results_r)):
+            value_to_concat.append(partial_results_r[j][k][1:])
+            if not np.isnan(partial_results_r[j][k][0]):
+                value_to_mse.extend([partial_results_r[j][k][0]])
+            else:
+                value_to_mse.extend([0])
+        concatted_values_r.append(np.sum(value_to_concat, axis=0))
+        value_to_compute_mse.append(value_to_mse)
+    number_occurrences_r = [occurrences[1] for
+                            occurrences in concatted_values_r]
+    mse_values_r = []
+    for individual_values, value in zip(value_to_compute_mse,
+                                        concatted_values_r):
+        mse_values_r.append(np.sum(np.square(np.subtract(
+            individual_values, value[0] / value[1]))))
+        del value
+    del concatted_values_r
+    if mse_values is None:
+        return np.array([np.inf]), False
+    return np.add(mse_values, mse_values_r), np.array(
+        [number_occurrences_r[i] != 0 and number_occurrences[i] != 0
+         for i in range(len(mse_values))])
+
+
+def gini_function_compressed(y, classes):
+    if not len(y) != 0:
+        return 0
+    probs = []
+    total_y = np.sum(y)
+    for idx in range(len(classes)):
+        if len(y) > idx:
+            probs.append(y[idx]/total_y)
+    p = np.array(probs)
+    return 1 - ((p * p).sum())
+
+
+@constraint(computing_units="${ComputingUnits}")
+@task(returns=2)
+def merge_partial_results_compute_gini_both_sides(partial_results_l,
+                                                  partial_results_r,
+                                                  n_classes):
+    if partial_results_l[0] is None or len(partial_results_l[0]) < 1:
+        return np.array([5]), False
+    if partial_results_l[0][0] is None:
+        return np.array([5]), False
+    concatted_values_l = []
+    for k in range(len(partial_results_l[0])):
+        value_to_concat = np.zeros(n_classes)
+        for j in range(len(partial_results_l)):
+            if len(partial_results_l[j][k]) > 0:
+                value_to_concat[:len(partial_results_l[j][k])] = (
+                        value_to_concat[:len(partial_results_l[j][k])] +
+                        partial_results_l[j][k])
+        concatted_values_l.append(value_to_concat)
+    number_occurrences = [np.sum(occurrences).astype(int) for
+                          occurrences in concatted_values_l]
+    gini_values = []
+    for value in concatted_values_l:
+        gini_values.append(gini_function_compressed(value,
+                                                    np.arange(n_classes)))
+    if partial_results_r[0] is None or len(partial_results_r[0]) < 1:
+        return np.array([5]), False
+    if partial_results_r[0][0] is None:
+        return np.array([5]), False
+    concatted_values_r = []
+    for k in range(len(partial_results_r[0])):
+        value_to_concat = np.zeros(n_classes)
+        for j in range(len(partial_results_r)):
+            value_to_concat[:len(partial_results_r[j][k])] = (
+                    value_to_concat[:len(partial_results_r[j][k])] +
+                    partial_results_r[j][k])
+        concatted_values_r.append(value_to_concat)
+    gini_values_r = []
+    for value in concatted_values_r:
+        gini_values_r.append(gini_function_compressed(
+            value, np.arange(n_classes)))
+    number_occurrences_r = [np.sum(occurrences).astype(int) for
+                            occurrences in concatted_values_r]
+    del concatted_values_r
+    return np.array(
+        [(number_occurrences_r[i] / (number_occurrences_r[i] +
+                                     number_occurrences[i]) *
+          gini_values_r[i]) + (number_occurrences[i] / (
+                number_occurrences_r[i] + number_occurrences[i]) *
+                               gini_values[i])
+         if number_occurrences[i] >= 4 and number_occurrences_r[i] >= 4 else
+         5 for i in range(len(gini_values))]), \
+        np.array([number_occurrences_r[i] != 0 and number_occurrences[i] != 0
+                  for i in range(len(gini_values))])
+
+
+@constraint(computing_units="${ComputingUnits}")
+@task(x_block=COLLECTION_IN, y_block=COLLECTION_IN, returns=2)
+def classes_per_split(x_block, y_block, split_points, indexes_to_compare,
+                      indexes_to_select=np.array([0]), regression=False):
+    number_classes_l = [np.array([]) for _ in range(len(indexes_to_compare))]
+    number_classes_r = [np.array([]) for _ in range(len(indexes_to_compare))]
+    number_none_split_points = 0
+    for inner_split in split_points:
+        if np.any(inner_split) is None:
+            number_none_split_points = number_none_split_points + 1
+    if x_block is None or len(x_block) == 0 or \
+            number_none_split_points == len(split_points):
+        for idx in range(len(indexes_to_compare)):
+            number_classes_l[idx] = np.array([])
+            number_classes_r[idx] = np.array([])
+        return number_classes_l, number_classes_r
+    x_block = np.block(x_block)
+    y_block = np.block(y_block)
+    if indexes_to_select is not None:
+        if len(indexes_to_select) == 1:
+            if indexes_to_select[0] == 0:
+                x_block = x_block[:, indexes_to_compare]
+            else:
+                y_block = y_block[indexes_to_select]
+                x_block = x_block[indexes_to_select]
+                x_block = x_block[:, indexes_to_compare]
+        else:
+            y_block = y_block[indexes_to_select]
+            x_block = x_block[indexes_to_select]
+            x_block = x_block[:, indexes_to_compare]
+    else:
+        x_block = x_block[:, indexes_to_compare]
+    if regression:
+        for idx, attribute_split_points in enumerate(split_points):
+            attribute_splittings_l = []
+            attribute_splittings_r = []
+            for value in attribute_split_points:
+                attribute_splittings_l.append(np.array(
+                    [np.mean(y_block[x_block[:, idx] < value, 0]),
+                     np.sum(y_block[x_block[:, idx] < value, 0]),
+                     len(y_block[x_block[:, idx] < value, 0])]))
+                attribute_splittings_r.append(np.array(
+                    [np.mean(y_block[x_block[:, idx] >= value, 0]),
+                     np.sum(y_block[x_block[:, idx] >= value, 0]),
+                     len(y_block[x_block[:, idx] >= value, 0])]))
+            if len(attribute_splittings_r) == 0:
+                attribute_splittings_r = np.array([])
+            if len(attribute_splittings_l) == 0:
+                attribute_splittings_l = np.array([])
+            number_classes_l[idx] = attribute_splittings_l
+            number_classes_r[idx] = attribute_splittings_r
+    else:
+        for idx, attribute_split_points in enumerate(split_points):
+            attribute_splittings_l = []
+            attribute_splittings_r = []
+            for value in attribute_split_points:
+                attribute_splittings_l.append(np.bincount(
+                    y_block[x_block[:, idx] < value, 0].astype(int)))
+                attribute_splittings_r.append(np.bincount(
+                    y_block[x_block[:, idx] >= value, 0].astype(int)))
+            if len(attribute_splittings_r) == 0:
+                attribute_splittings_r = np.array([])
+            if len(attribute_splittings_l) == 0:
+                attribute_splittings_l = np.array([])
+            number_classes_l[idx] = attribute_splittings_l
+            number_classes_r[idx] = attribute_splittings_r
+    del x_block
+    del y_block
+    return number_classes_l, number_classes_r
+
+
+@constraint(computing_units="${ComputingUnits}")
+@task(returns=1)
+def get_split_point_various_attributes_bucket(unique_values,
+                                              number_split_points=100,
+                                              split_computation="raw"):
+    sample_blocks_list = []
+    for idx, bucket in enumerate(unique_values):
+        if bucket is None:
+            sample_blocks_list.append([])
+            return sample_blocks_list
+        sample_blocks = np.copy(bucket)
+        if len(sample_blocks) == 0:
+            sample_blocks_list.append([])
+            return sample_blocks_list
+        number_split_points_actual = number_split_points
+        if split_computation == "raw":
+            sample_blocks[:-1] += sample_blocks[1:]
+            sample_blocks[-1] = sample_blocks[-1] * 2
+            sample_blocks = sample_blocks / 2
+            if number_split_points_actual == 0:
+                number_split_points_actual = 1
+            distance_between_split_points = int(len(
+                sample_blocks) / number_split_points_actual)
+            if distance_between_split_points == 0:
+                sample_blocks_list.append(sample_blocks)
+            else:
+                sample_blocks_list.append(
+                    sample_blocks[0::distance_between_split_points])
+        elif split_computation == "gaussian_approximation":
+            std = np.std(sample_blocks)
+            mean = np.mean(sample_blocks)
+            sample_blocks = np.array([mean + std * scipy.stats.norm.ppf(
+                (i + 1) / (number_split_points_actual + 1)) for i in
+                                      range(number_split_points_actual - 1)])
+            sample_blocks_list.append(sample_blocks)
+        elif split_computation == "uniform_approximation":
+            maximum = np.max(sample_blocks)
+            minimum = np.min(sample_blocks)
+            sample_blocks = np.array([minimum + i * ((maximum - minimum) / (
+                    number_split_points_actual + 1)) for i in
+                                      range(number_split_points_actual)])
+            sample_blocks_list.append(sample_blocks)
+    return sample_blocks_list
+
+
+@constraint(computing_units="${ComputingUnits}")
+@task(x=COLLECTION_IN, y=COLLECTION_IN, actual_node=IN, returns=1)
+def construct_subtree(x, y, actual_node, m_try, depth, max_depth=25,
+                      random_state=0):
+    if x is None or x[0] is None:
+        actual_node.content = None
+        return actual_node
+    else:
+        if max_depth == np.inf:
+            sklearn_max_depth = None
+        else:
+            sklearn_max_depth = max_depth - depth
+        if isinstance(actual_node, _ClassificationNode):
+            dt = SklearnDTClassifier(
+                max_features=m_try,
+                max_depth=sklearn_max_depth,
+                random_state=random_state,
+            )
+        elif isinstance(actual_node, _RegressionNode):
+            dt = SklearnDTRegressor(
+                max_features=m_try,
+                max_depth=sklearn_max_depth,
+                random_state=random_state,
+            )
+        x = np.block(x)
+        y = np.block(y)
+        if len(y) == 0 or np.any(y) is None:
+            actual_node.content = None
+        else:
+            dt.fit(x, y.astype(int), check_input=False)
+            actual_node.content = _SkTreeWrapper(dt)
+        return actual_node
+
+
+def _sample_selection(x, random_state, bootstrap=True):
+    if bootstrap:  # bootstrap:
+        selection = random_state.choice(
+            x.shape[0], size=x.shape[0], replace=True
+        )
+        selection.sort()
+    else:
+        selection = np.arange(x.shape[0])
+    return selection
+
+
+class _SkTreeWrapper:
+    def __init__(self, tree):
+        self.sk_tree = tree
+
+    def toJson(self):
+        return {
+            "class_name": self.__class__.__name__,
+            "module_name": self.__module__,
+            "items": self.__dict__,
+        }
+
+
+class _LeafInfo:
+    def __init__(self, size=None, frequencies=None, target=None):
+        self.size = size
+        self.frequencies = frequencies
+        self.target = target
+
+    def toJson(self):
+        return {
+            "class_name": self.__class__.__name__,
+            "module_name": self.__module__,
+            "items": self.__dict__,
+        }
+
+
+class _InnerNodeInfo:
+    def __init__(self, index=None, value=None):
+        self.index = index
+        self.value = value
+
+    def toJson(self):
+        return {
+            "class_name": self.__class__.__name__,
+            "module_name": self.__module__,
+            "items": self.__dict__,
+        }
+
+
+class _Node:
+    """Base class for tree nodes"""
+
+    def __init__(self, is_classifier):
+        self.content = None
+        self.left = None
+        self.right = None
+        self.is_classifier = is_classifier
+        self.predict_dtype = np.int64 if is_classifier else np.float64
+
+    '''def predict(self, sample):
+        node_content = self.content
+        if isinstance(node_content, _LeafInfo):
+            return np.full((len(sample),), node_content.target)
+        if isinstance(node_content, _SkTreeWrapper):
+            if len(sample) > 0:
+                return node_content.sk_tree.predict(sample)
+        if isinstance(node_content, _InnerNodeInfo):
+            pred = np.empty((len(sample),), dtype=self.predict_dtype)
+            left_mask = sample[:, node_content.index] <= node_content.value
+            pred[left_mask] = self.left.predict(sample[left_mask])
+            pred[~left_mask] = self.right.predict(sample[~left_mask])
+            return pred
+        assert len(sample) == 0, "Type not supported"
+        return np.empty((0,), dtype=self.predict_dtype)'''
+
+
+class _ClassificationNode(_Node):
+    def __init__(self):
+        super().__init__(is_classifier=True)
+
+    '''def predict_proba(self, sample, n_classes):
+        node_content = self.content
+        if isinstance(node_content, _LeafInfo):
+            single_pred = node_content.frequencies / node_content.size
+            return np.tile(single_pred, (len(sample), 1))
+        if isinstance(node_content, _SkTreeWrapper):
+            if len(sample) > 0:
+                sk_tree_pred = node_content.sk_tree.predict_proba(sample)
+                pred = np.zeros((len(sample), n_classes), dtype=np.float64)
+                pred[:, node_content.sk_tree.classes_] = sk_tree_pred
+                return pred
+        if isinstance(node_content, _InnerNodeInfo):
+            pred = np.empty((len(sample), n_classes), dtype=np.float64)
+            l_msk = sample[:, node_content.index] <= node_content.value
+            pred[l_msk] = self.left.predict_proba(sample[l_msk], n_classes)
+            pred[~l_msk] = self.right.predict_proba(sample[~l_msk], n_classes)
+            return pred
+        assert len(sample) == 0, "Type not supported"
+        return np.empty((0, n_classes), dtype=np.float64)'''
+
+    def toJson(self):
+        return {
+            "class_name": self.__class__.__name__,
+            "module_name": self.__module__,
+            "items": self.__dict__,
+        }
+
+
+class _RegressionNode(_Node):
+    def __init__(self):
+        super().__init__(is_classifier=False)
+
+    def toJson(self):
+        return {
+            "class_name": self.__class__.__name__,
+            "module_name": self.__module__,
+            "items": self.__dict__,
+        }
+
+
+class _NodeInfo:
+    def __init__(self):
+        self.node_info = None
+
+    def set(self, node_info):
+        self.node_info = node_info
+
+    def get(self):
+        return self.node_info
+
+    def toJson(self):
+        return {
+            "class_name": self.__class__.__name__,
+            "module_name": self.__module__,
+            "items": self.__dict__,
+        }
+
+
+def encode_forest_helper(obj):
+    if isinstance(obj, (DecisionTreeClassifier, DecisionTreeRegressor, _Node,
+                        _NodeInfo,
+                        _ClassificationNode, _RegressionNode, _InnerNodeInfo,
+                        _LeafInfo, _SkTreeWrapper)):
+        return obj.toJson()
+
+
+def decode_forest_helper(class_name, obj):
+    if class_name == 'DecisionTreeClassifier':
+        model = eval(class_name)(
+            n_classes=obj.pop("n_classes"),
+            try_features=obj.pop("try_features"),
+            max_depth=obj.pop("max_depth"),
+            distr_depth=obj.pop("distr_depth"),
+            sklearn_max=obj.pop("sklearn_max"),
+            bootstrap=obj.pop("bootstrap"),
+            random_state=obj.pop("random_state"),
+            range_min=obj.pop("range_min"),
+            range_max=obj.pop("range_max"),
+            n_split_points=obj.pop("n_split_points"),
+            sync_after_fit=obj.pop("sync_after_fit"),
+        )
+    elif class_name == 'DecisionTreeRegressor':
+        model = eval(class_name)(
+            try_features=obj.pop("try_features"),
+            max_depth=obj.pop("max_depth"),
+            distr_depth=obj.pop("distr_depth"),
+            sklearn_max=obj.pop("sklearn_max"),
+            bootstrap=obj.pop("bootstrap"),
+            random_state=obj.pop("random_state"),
+            range_min=obj.pop("range_min"),
+            range_max=obj.pop("range_max"),
+            n_split_points=obj.pop("n_split_points"),
+            sync_after_fit=obj.pop("sync_after_fit"),
+        )
+    elif class_name == '_SkTreeWrapper':
+        sk_tree = obj.pop("sk_tree")
+        model = _SkTreeWrapper(sk_tree)
+    else:
+        model = eval(class_name)()
+    model.__dict__.update(obj)
+    return model
diff --git a/dislib/trees/nested/forest.py b/dislib/trees/nested/forest.py
new file mode 100644
index 00000000..2305beec
--- /dev/null
+++ b/dislib/trees/nested/forest.py
@@ -0,0 +1,755 @@
+from sklearn.base import BaseEstimator
+from sklearn.utils import check_random_state
+import math
+import numpy as np
+from pycompss.api.parameter import COLLECTION_IN, Type, Depth
+
+from dislib.trees.nested.decision_tree import (
+    DecisionTreeClassifier,
+    DecisionTreeRegressor, encode_forest_helper, decode_forest_helper,
+)
+from pycompss.api.api import compss_wait_on
+from pycompss.api.constraint import constraint
+from pycompss.api.task import task
+from dislib.data.array import Array
+from dislib.utils.base import _paired_partition
+from dislib.data.util import decoder_helper, encoder_helper, sync_obj
+import json
+import numbers
+import os
+import pickle
+import dislib.data.util.model as utilmodel
+from sklearn.svm import SVC as SklearnSVC
+from sklearn.tree import DecisionTreeClassifier as SklearnDTClassifier
+from sklearn.tree import DecisionTreeRegressor as SklearnDTRegressor
+from sklearn.tree._tree import Tree as SklearnTree
+SKLEARN_CLASSES = {
+    "SVC": SklearnSVC,
+    "DecisionTreeClassifier": SklearnDTClassifier,
+    "DecisionTreeRegressor": SklearnDTRegressor,
+}
+
+
+class BaseRandomForest(BaseEstimator):
+    """Base class for distributed random forests.
+
+    Warning: This class should not be used directly.
+    Use derived classes instead.
+    """
+
+    def __init__(
+        self,
+        n_estimators,
+        try_features,
+        max_depth,
+        distr_depth,
+        sklearn_max,
+        hard_vote,
+        random_state,
+        base_tree,
+        n_classes=None,
+        range_max=None,
+        range_min=None,
+        bootstrap=True,
+        n_split_points="auto",
+        split_computation="raw",
+        sync_after_fit=True,
+    ):
+        self.n_classes = n_classes
+        self.n_estimators = n_estimators
+        self.try_features = try_features
+        self.max_depth = max_depth
+        self.distr_depth = distr_depth
+        self.sklearn_max = sklearn_max
+        self.hard_vote = hard_vote
+        self.random_state = random_state
+        self.base_tree = base_tree
+        self.range_max = range_max
+        self.range_min = range_min
+        self.bootstrap = bootstrap
+        self.n_split_points = n_split_points
+        self.split_computation = split_computation
+        self.sync_after_fit = sync_after_fit
+
+    def fit(self, x, y):
+        """Fits a RandomForest.
+
+        Parameters
+        ----------
+        x : ds-array, shape=(n_samples, n_features)
+            The training input samples. Internally, its dtype will be converted
+            to ``dtype=np.float32``.
+        y : ds-array, shape=(n_samples, 1)
+            The target values.
+
+        Returns
+        -------
+        self : RandomForest
+        """
+
+        try_features = _resolve_try_features(self.try_features, x.shape[1])
+
+        if self.range_max is None:
+            self.range_max = x.max()
+        if self.range_min is None:
+            self.range_min = x.min()
+        self.range_max._blocks = compss_wait_on(self.range_max._blocks)
+        self.range_min._blocks = compss_wait_on(self.range_min._blocks)
+
+        if self.distr_depth == "auto":
+            distr_depth = max(0, int(math.log10(x.shape[0])) - 4)
+            distr_depth = min(distr_depth, self.max_depth)
+            if distr_depth < 1:
+                self.distr_depth = 1
+            else:
+                self.distr_depth = distr_depth
+
+        self.trees = []
+
+        for _ in range(self.n_estimators):
+            random_state = check_random_state(self.random_state)
+            if isinstance(self.random_state, numbers.Integral):
+                self.random_state = self.random_state+np.random.randint(100)
+            if self.n_classes is not None:
+                tree = self.base_tree(
+                    try_features=try_features,
+                    max_depth=self.max_depth,
+                    distr_depth=self.distr_depth,
+                    sklearn_max=self.sklearn_max,
+                    bootstrap=self.bootstrap,
+                    random_state=random_state,
+                    n_classes=self.n_classes,
+                    range_min=self.range_min,
+                    range_max=self.range_max,
+                    n_split_points=self.n_split_points,
+                    split_computation=self.split_computation,
+                    sync_after_fit=False,
+                )
+            else:
+                tree = self.base_tree(
+                    try_features=try_features,
+                    max_depth=self.max_depth,
+                    distr_depth=self.distr_depth,
+                    sklearn_max=self.sklearn_max,
+                    bootstrap=self.bootstrap,
+                    random_state=random_state,
+                    range_min=self.range_min,
+                    range_max=self.range_max,
+                    n_split_points=self.n_split_points,
+                    split_computation=self.split_computation,
+                    sync_after_fit=False,
+                )
+            self.trees.append(tree)
+
+        for tree in self.trees:
+            tree.fit(x, y)
+        self.trees = compss_wait_on(self.trees)
+
+        return self
+
+    def save_model(self, filepath, overwrite=True, save_format="json"):
+        """Saves a model to a file.
+        The model is synchronized before saving and can be reinstantiated in
+        the exact same state, without any of the code used for model
+        definition or fitting.
+        Parameters
+        ----------
+        filepath : str
+            Path where to save the model
+        overwrite : bool, optional (default=True)
+            Whether any existing model at the target
+            location should be overwritten.
+        save_format : str, optional (default='json)
+            Format used to save the models.
+        Examples
+        --------
+        >>> from dislib.cluster import DecisionTreeClassifier
+        >>> import numpy as np
+        >>> import dislib as ds
+        >>> x = np.array([[1, 2], [1, 4], [1, 0], [4, 2], [4, 4], [4, 0]])
+        >>> x_train = ds.array(x, (2, 2))
+        >>> model = DecisionTreeClassifier(n_clusters=2, random_state=0)
+        >>> model.fit(x_train)
+        >>> save_model(model, '/tmp/model')
+        >>> loaded_model = load_model('/tmp/model')
+        >>> x_test = ds.array(np.array([[0, 0], [4, 4]]), (2, 2))
+        >>> model_pred = model.predict(x_test)
+        >>> loaded_model_pred = loaded_model.predict(x_test)
+        >>> assert np.allclose(model_pred.collect(),
+        loaded_model_pred.collect())
+        """
+
+        # Check overwrite
+        if not overwrite and os.path.isfile(filepath):
+            return
+
+        _sync_rf(self)
+
+        sync_obj(self.__dict__)
+
+        model_metadata = self.__dict__
+        model_metadata["model_name"] = self.__class__.__name__
+
+        # Save model
+        if save_format == "json":
+            with open(filepath, "w") as f:
+                json.dump(model_metadata, f, default=_encode_helper)
+        elif save_format == "cbor":
+            if utilmodel.cbor2 is None:
+                raise ModuleNotFoundError("No module named 'cbor2'")
+            with open(filepath, "wb") as f:
+                utilmodel.cbor2.dump(model_metadata, f,
+                                     default=_encode_helper_cbor)
+        elif save_format == "pickle":
+            with open(filepath, "wb") as f:
+                pickle.dump(model_metadata, f)
+        else:
+            raise ValueError("Wrong save format.")
+
+    def load_model(self, filepath, load_format="json"):
+        """Loads a model from a file.
+        The model is reinstantiated in the exact same state in which it
+        was saved, without any of the code used for model definition or
+        fitting.
+        Parameters
+        ----------
+        filepath : str
+            Path of the saved the model
+        load_format : str, optional (default='json')
+            Format used to load the model.
+        Examples
+        --------
+        >>> from dislib.cluster import DecisionTreeClassifier
+        >>> import numpy as np
+        >>> import dislib as ds
+        >>> x = np.array([[1, 2], [1, 4], [1, 0], [4, 2], [4, 4], [4, 0]])
+        >>> x_train = ds.array(x, (2, 2))
+        >>> model = DecisionTreeClassifier(n_clusters=2, random_state=0)
+        >>> model.fit(x_train)
+        >>> save_model(model, '/tmp/model')
+        >>> loaded_model = load_model('/tmp/model')
+        >>> x_test = ds.array(np.array([[0, 0], [4, 4]]), (2, 2))
+        >>> model_pred = model.predict(x_test)
+        >>> loaded_model_pred = loaded_model.predict(x_test)
+        >>> assert np.allclose(model_pred.collect(),
+        """
+        # Load model
+        if load_format == "json":
+            with open(filepath, "r") as f:
+                model_metadata = json.load(f, object_hook=_decode_helper)
+        elif load_format == "cbor":
+            if utilmodel.cbor2 is None:
+                raise ModuleNotFoundError("No module named 'cbor2'")
+            with open(filepath, "rb") as f:
+                model_metadata = utilmodel.cbor2.\
+                    load(f, object_hook=_decode_helper_cbor)
+        elif load_format == "pickle":
+            with open(filepath, "rb") as f:
+                model_metadata = pickle.load(f)
+        else:
+            raise ValueError("Wrong load format.")
+
+        for key, val in model_metadata.items():
+            setattr(self, key, val)
+
+
+class RandomForestClassifier(BaseRandomForest):
+    """A distributed random forest classifier.
+
+    Parameters
+    ----------
+    n_estimators : int, optional (default=10)
+        Number of trees to fit.
+    try_features : int, str or None, optional (default='sqrt')
+        The number of features to consider when looking for the best split:
+
+        - If "sqrt", then `try_features=sqrt(n_features)`.
+        - If "third", then `try_features=n_features // 3`.
+        - If None, then `try_features=n_features`.
+
+        Note: the search for a split does not stop until at least one
+        valid partition of the node samples is found, even if it requires
+        to effectively inspect more than ``try_features`` features.
+    max_depth : int or np.inf, optional (default=np.inf)
+        The maximum depth of the tree. If np.inf, then nodes are expanded
+        until all leaves are pure.
+    distr_depth : int or str, optional (default='auto')
+        Number of levels of the tree in which the nodes are split in a
+        distributed way.
+    sklearn_max: int or float, optional (default=1e8)
+        Maximum size (len(subsample)*n_features) of the arrays passed to
+        sklearn's DecisionTreeClassifier.fit(), which is called to fit subtrees
+        (subsamples) of our DecisionTreeClassifier. sklearn fit() is used
+        because it's faster, but requires loading the data to memory, which can
+        cause memory problems for large datasets. This parameter can be
+        adjusted to fit the hardware capabilities.
+    hard_vote : bool, optional (default=False)
+        If True, it uses majority voting over the predict() result of the
+        decision tree predictions. If False, it takes the class with the higher
+        probability given by predict_proba(), which is an average of the
+        probabilities given by the decision trees.
+    random_state : int, RandomState instance or None, optional (default=None)
+        If int, random_state is the seed used by the random number generator;
+        If RandomState instance, random_state is the random number generator;
+        If None, the random number generator is the RandomState instance used
+        by `np.random`.
+
+    Attributes
+    ----------
+    classes : None or ndarray
+        Array of distinct classes, set at fit().
+    trees : list of DecisionTreeClassifier
+        List of the tree classifiers of this forest, populated at fit().
+    """
+
+    def __init__(
+        self,
+        n_classes,
+        n_estimators=10,
+        try_features="sqrt",
+        max_depth=np.inf,
+        distr_depth="auto",
+        sklearn_max=1e8,
+        hard_vote=False,
+        random_state=None,
+        range_max=None,
+        range_min=None,
+        bootstrap=True,
+        n_split_points="auto",
+        split_computation="raw",
+        sync_after_fit=True,
+    ):
+        super().__init__(
+            n_estimators,
+            try_features,
+            max_depth,
+            distr_depth,
+            sklearn_max,
+            hard_vote,
+            random_state,
+            base_tree=DecisionTreeClassifier,
+            n_classes=n_classes,
+            range_max=range_max,
+            range_min=range_min,
+            bootstrap=bootstrap,
+            n_split_points=n_split_points,
+            split_computation=split_computation,
+            sync_after_fit=sync_after_fit,
+        )
+
+    def predict(self, x):
+        """Predicts target values using a fitted forest.
+
+        Parameters
+        ----------
+        x : ds-array, shape=(n_samples, n_features)
+            The input samples.
+
+        Returns
+        -------
+        y_pred : ds-array, shape=(n_samples, 1)
+            Predicted values for x.
+        """
+        assert self.trees is not None, "The random forest is not fitted."
+
+        pred_blocks = []
+
+        if self.hard_vote:
+            for x_row in x._iterator(axis=0):
+                tree_predictions = []
+                for tree in self.trees:
+                    tree_predictions.append(tree.predict(x_row))
+                pred_blocks.append([_hard_vote(np.arange(self.n_classes),
+                                               compss_wait_on(
+                                                   tree_predictions))])
+        else:
+            for x_row in x._iterator(axis=0):
+                tree_predictions = []
+                for tree in self.trees:
+                    tree_predictions.append(tree.predict_proba(x_row))
+                pred_blocks.append([_soft_vote(np.arange(self.n_classes),
+                                               compss_wait_on(
+                                                   tree_predictions))])
+        pred_blocks = compss_wait_on(pred_blocks)
+        y_pred = Array(
+            blocks=pred_blocks,
+            top_left_shape=(x._top_left_shape[0], 1),
+            reg_shape=(x._reg_shape[0], 1),
+            shape=(x.shape[0], 1),
+            sparse=False,
+        )
+
+        return y_pred
+
+    def predict_proba(self, x):
+        """Predicts class probabilities using a fitted forest.
+
+        The probabilities are obtained as an average of the probabilities of
+        each decision tree.
+
+
+        Parameters
+        ----------
+        x : ds-array, shape=(n_samples, n_features)
+            The input samples.
+
+        Returns
+        -------
+        probabilities : ds-array, shape=(n_samples, n_classes)
+            Predicted probabilities for the samples to belong to each class.
+            The columns of the array correspond to the classes given at
+            self.classes.
+        """
+        assert self.trees is not None, "The random forest is not fitted."
+
+        prob_blocks = []
+        for x_row in x._iterator(axis=0):
+            tree_predictions = []
+            for tree in self.trees:
+                tree_predictions.append(tree.predict_proba(x_row))
+            prob_blocks.append([_join_predictions(tree_predictions)])
+
+        probabilities = Array(
+            blocks=prob_blocks,
+            top_left_shape=(x._top_left_shape[0], self.n_classes),
+            reg_shape=(x._reg_shape[0], self.n_classes),
+            shape=(x.shape[0], self.n_classes),
+            sparse=False,
+        )
+        return probabilities
+
+    def score(self, x, y, collect=False):
+        assert self.trees is not None, "The random forest is not fitted."
+        partial_scores = []
+        if self.hard_vote:
+            for x_row, y_row in _paired_partition(x, y):
+                tree_predictions = []
+                for tree in self.trees:
+                    tree_predictions.append(tree.predict(x_row))
+                subset_score = _hard_vote_score(
+                    y_row._blocks, np.arange(self.n_classes), tree_predictions
+                )
+                partial_scores.append(subset_score)
+        else:
+            for x_row, y_row in _paired_partition(x, y):
+                tree_predictions = []
+                for tree in self.trees:
+                    tree_predictions.append(tree.predict_proba(x_row))
+                subset_score = _soft_vote_score(
+                    y_row._blocks, np.arange(self.n_classes), tree_predictions
+                )
+                partial_scores.append(subset_score)
+        score = _merge_classification_scores(partial_scores)
+
+        return compss_wait_on(score) if collect else score
+
+
+class RandomForestRegressor(BaseRandomForest):
+    """A distributed random forest regressor.
+
+        Parameters
+        ----------
+        n_estimators : int, optional (default=10)
+            Number of trees to fit.
+        try_features : int, str or None, optional (default='sqrt')
+            The number of features to consider when looking for the best split:
+
+            - If "sqrt", then `try_features=sqrt(n_features)`.
+            - If "third", then `try_features=n_features // 3`.
+            - If None, then `try_features=n_features`.
+
+            Note: the search for a split does not stop until at least one
+            valid partition of the node samples is found, even if it requires
+            to effectively inspect more than ``try_features`` features.
+        max_depth : int or np.inf, optional (default=np.inf)
+            The maximum depth of the tree. If np.inf, then nodes are expanded
+            until all leaves are pure.
+        distr_depth : int or str, optional (default='auto')
+            Number of levels of the tree in which the nodes are split in a
+            distributed way.
+        sklearn_max: int or float, optional (default=1e8)
+            Maximum size (len(subsample)*n_features) of the arrays passed to
+            sklearn's DecisionTreeRegressor.fit(), which is
+            called to fit subtrees (subsamples) of our DecisionTreeRegressor.
+            sklearn fit() is used because it's faster, but requires loading
+            the data to memory, which can cause memory problems
+            for large datasets.
+            This parameter can be adjusted to fit the hardware capabilities.
+        random_state : int, RandomState instance or None, optional
+        (default=None)
+            If int, random_state is the seed used by the random number
+            generator;
+            If RandomState instance, random_state is the random number
+            generator;
+            If None, the random number generator is the RandomState
+            instance used
+            by `np.random`.
+
+        Attributes
+        ----------
+        trees : list of DecisionTreeRegressor
+            List of the tree regressors of this forest, populated at fit().
+        """
+
+    def __init__(
+        self,
+        n_estimators=10,
+        try_features="sqrt",
+        max_depth=np.inf,
+        distr_depth="auto",
+        sklearn_max=1e8,
+        random_state=None,
+        range_max=None,
+        range_min=None,
+        bootstrap=True,
+        n_split_points="auto",
+        split_computation="raw",
+        sync_after_fit=True,
+    ):
+        hard_vote = None
+        super().__init__(
+            n_estimators,
+            try_features,
+            max_depth,
+            distr_depth,
+            sklearn_max,
+            hard_vote,
+            random_state,
+            base_tree=DecisionTreeRegressor,
+            n_classes=None,
+            range_max=range_max,
+            range_min=range_min,
+            bootstrap=bootstrap,
+            n_split_points=n_split_points,
+            split_computation=split_computation,
+            sync_after_fit=sync_after_fit,
+        )
+
+    def predict(self, x):
+        pred_blocks = []
+        for x_row in x._iterator(axis=0):
+            tree_predictions = []
+            for tree in self.trees:
+                tree_predictions.append(tree.predict(x_row))
+            pred_blocks.append(tree_predictions)
+        final_blocks = []
+        for tree_predictions in pred_blocks:
+            final_blocks.append([_join_predictions(
+                compss_wait_on(tree_predictions))])
+
+        y_pred = Array(
+            blocks=final_blocks,
+            top_left_shape=(x._top_left_shape[0], 1),
+            reg_shape=(x._reg_shape[0], 1),
+            shape=(x.shape[0], 1),
+            sparse=False,
+        )
+
+        return y_pred
+
+    def score(self, x, y, collect=False):
+        assert self.trees is not None, "The random forest is not fitted."
+
+        partial_scores = []
+        for x_row, y_row in _paired_partition(x, y):
+            tree_predictions = []
+            for tree in self.trees:
+                tree_predictions.append(tree.predict(x_row))
+            subset_score = _regression_score(y_row._blocks, tree_predictions)
+            partial_scores.append(subset_score)
+
+        score = _merge_regression_scores(partial_scores)
+
+        return compss_wait_on(score) if collect else score
+
+
+def _base_soft_vote(classes, predictions):
+    aggregate = predictions[0][0]
+    for p in predictions[1:]:
+        aggregate += p[0]
+    predicted_labels = classes[np.argmax(aggregate, axis=1)]
+    return np.expand_dims(predicted_labels, axis=1)
+
+
+def _base_hard_vote(classes, predictions):
+    mode = predictions[0][0]
+    for p in predictions[1:]:
+        mode += p[0]
+    predicted_labels = classes[np.argmax(mode, axis=1)]
+    return np.expand_dims(predicted_labels, axis=1)
+
+
+def _soft_vote(classes, predictions):
+    predicted_labels = _base_soft_vote(classes, predictions)
+    return predicted_labels
+
+
+@constraint(computing_units="${ComputingUnits}")
+@task(y_blocks={Type: COLLECTION_IN, Depth: 2},
+      predictions=COLLECTION_IN, returns=1)
+def _soft_vote_score(y_blocks, classes, predictions):
+    predicted_labels = _base_soft_vote(classes, predictions)
+    real_labels = Array._merge_blocks(y_blocks).flatten()
+    correct = np.count_nonzero(predicted_labels.squeeze() == real_labels)
+    return correct, len(real_labels)
+
+
+def _hard_vote(classes, predictions):
+    predicted_labels = _base_hard_vote(classes, predictions)
+    return predicted_labels
+
+
+@constraint(computing_units="${ComputingUnits}")
+@task(y_blocks={Type: COLLECTION_IN, Depth: 2},
+      predictions=COLLECTION_IN, returns=1)
+def _hard_vote_score(y_blocks, classes, predictions):
+    predicted_labels = _base_hard_vote(classes, predictions)
+    real_labels = Array._merge_blocks(y_blocks).flatten()
+    correct = np.count_nonzero(predicted_labels.squeeze() == real_labels)
+    return correct, len(real_labels)
+
+
+def _resolve_try_features(try_features, n_features):
+    if try_features is None:
+        return n_features
+    elif try_features == "sqrt":
+        return int(math.sqrt(n_features))
+    elif try_features == "third":
+        return max(1, n_features // 3)
+    elif try_features >= 1:
+        return int(try_features)
+    else:
+        return int(try_features*n_features)
+
+
+@constraint(computing_units="${ComputingUnits}")
+@task(predictions=COLLECTION_IN, returns=1)
+def _join_predictions(predictions):
+    aggregate = np.block(predictions[0])
+    for p in predictions[1:]:
+        aggregate += np.block(p)
+    labels = aggregate / len(predictions)
+    if len(labels.shape) == 1:
+        labels = labels.reshape(-1, 1)
+    return labels
+
+
+@constraint(computing_units="${ComputingUnits}")
+@task(y_blocks={Type: COLLECTION_IN, Depth: 2},
+      predictions=COLLECTION_IN, returns=1)
+def _regression_score(y_blocks, predictions):
+    y_true = Array._merge_blocks(y_blocks).flatten()
+    y_pred = np.mean(np.squeeze(predictions), axis=0)
+    n_samples = y_true.shape[0]
+    y_avg = np.mean(y_true)
+    u_partial = np.sum(np.square(y_true - y_pred), axis=0)
+    v_partial = np.sum(np.square(y_true - y_avg), axis=0)
+    return u_partial, v_partial, y_avg, n_samples
+
+
+@constraint(computing_units="${ComputingUnits}")
+@task(partial_scores=COLLECTION_IN, returns=1)
+def _merge_classification_scores(partial_scores):
+    correct = sum(subset_score[0] for subset_score in partial_scores)
+    total = sum(subset_score[1] for subset_score in partial_scores)
+    return correct / total
+
+
+@constraint(computing_units="${ComputingUnits}")
+@task(partial_scores=COLLECTION_IN, returns=1)
+def _merge_regression_scores(partial_scores):
+    u = v = avg = n = 0
+    for u_p, v_p, avg_p, n_p in partial_scores:
+        u += u_p
+
+        delta = avg_p - avg
+        avg += delta * n_p / (n + n_p)
+        v += v_p + delta ** 2 * n * n_p / (n + n_p)
+        n += n_p
+
+    return 1 - u / v
+
+
+def _encode_helper_cbor(encoder, obj):
+    encoder.encode(_encode_helper(obj))
+
+
+def _encode_helper(obj):
+    encoded = encoder_helper(obj)
+    if encoded is not None:
+        return encoded
+    elif callable(obj):
+        return {
+            "class_name": "callable",
+            "module": obj.__module__,
+            "name": obj.__name__,
+        }
+    elif isinstance(obj, SklearnTree):
+        return {
+            "class_name": obj.__class__.__name__,
+            "n_features": obj.n_features,
+            "n_classes": obj.n_classes,
+            "n_outputs": obj.n_outputs,
+            "items": obj.__getstate__(),
+        }
+    elif isinstance(obj, (RandomForestClassifier, RandomForestRegressor,
+                          DecisionTreeClassifier, DecisionTreeRegressor,
+                          SklearnDTClassifier, SklearnDTRegressor)):
+        return {
+            "class_name": obj.__class__.__name__,
+            "module_name": obj.__module__,
+            "items": obj.__dict__,
+        }
+    else:
+        return encode_forest_helper(obj)
+
+
+def _decode_helper_cbor(decoder, obj):
+    """Special decoder wrapper for dislib using cbor2."""
+    return _decode_helper(obj)
+
+
+def _decode_helper(obj):
+    if isinstance(obj, dict) and "class_name" in obj:
+        class_name = obj["class_name"]
+        decoded = decoder_helper(class_name, obj)
+        if decoded is not None:
+            return decoded
+        elif class_name == "RandomState":
+            random_state = np.random.RandomState()
+            random_state.set_state(_decode_helper(obj["items"]))
+            return random_state
+        elif class_name == "Tree":
+            dict_ = _decode_helper(obj["items"])
+            model = SklearnTree(
+                obj["n_features"], obj["n_classes"], obj["n_outputs"]
+            )
+            model.__setstate__(dict_)
+            return model
+        elif class_name == "callable":
+            if obj["module"] == "numpy":
+                return getattr(np, obj["name"])
+            return None
+        elif (
+                class_name in SKLEARN_CLASSES.keys()
+                and "sklearn" in obj["module_name"]
+        ):
+            dict_ = _decode_helper(obj["items"])
+            model = SKLEARN_CLASSES[obj["class_name"]]()
+            model.__dict__.update(dict_)
+            return model
+        else:
+            dict_ = _decode_helper(obj["items"])
+            return decode_forest_helper(class_name, dict_)
+    return obj
+
+
+def _sync_rf(rf):
+    """Sync the `try_features` and `n_classes` attribute of the different trees
+    since they cannot be synced recursively.
+    """
+    try_features = compss_wait_on(rf.trees[0].try_features)
+    n_classes = compss_wait_on(rf.trees[0].n_classes)
+    for tree in rf.trees:
+        tree.try_features = try_features
+        tree.n_classes = n_classes
diff --git a/dislib/trees/nested/tasks.py b/dislib/trees/nested/tasks.py
new file mode 100644
index 00000000..5489ae80
--- /dev/null
+++ b/dislib/trees/nested/tasks.py
@@ -0,0 +1,78 @@
+from pycompss.api.task import task
+from pycompss.api.parameter import COLLECTION_IN
+from pycompss.api.parameter import COLLECTION_OUT
+from pycompss.api.constraint import constraint
+import numpy as np
+
+
+@constraint(computing_units="${ComputingUnits}")
+@task(fragment=COLLECTION_IN, fragment_buckets=COLLECTION_OUT,
+      range_min=COLLECTION_IN, range_max=COLLECTION_IN)
+def filter_fragment(fragment, fragment_buckets, indexes_to_try,
+                    num_buckets, range_min=0, range_max=1,
+                    indexes_selected=np.array([0])):
+    """
+    Task that filters a fragment entries for the given ranges.
+        * Ranges is a list of tuples where each tuple corresponds to
+          a range.
+        * Each tuple (range) is composed by two elements, the minimum
+          and maximum of each range.
+        * The filtering is performed by checking which fragment entries'
+          keys belong to each range.
+    The entries that belong to each range are considered a bucket.
+        * The variable buckets is a list of lists, where the inner lists
+          correspond to the bucket of each range.
+
+    :param fragment: The fragment to be sorted and filtered.
+    :param ranges: The ranges to apply when filtering.
+    :return: Multireturn of the buckets.
+    """
+    if len(fragment) == 0:
+        for idx in range(len(fragment_buckets)):
+            for idx_2 in range(len(fragment_buckets[idx])):
+                fragment_buckets[idx][idx_2] = []
+        return
+    fragment = np.block(fragment)
+    range_min = np.block(range_min)
+    range_max = np.block(range_max)
+    for index, value in enumerate(indexes_to_try):
+        if len(indexes_selected) > 1:
+            if indexes_selected[0] != 0:
+                actual_fragment = fragment[indexes_selected, value]
+            else:
+                actual_fragment = fragment[:, value]
+        else:
+            actual_fragment = fragment[:, value]
+        split_indexes = np.linspace(range_min[0, value],
+                                    range_max[0, value] + 1, num_buckets + 1)
+        ranges = []
+        for ind in range(split_indexes.size - 1):
+            ranges.append((split_indexes[ind], split_indexes[ind + 1]))
+        i = 0
+        for _range in ranges:
+            if actual_fragment is not None:
+                fragment_buckets[index][i] = [k_s_v for k_s_v in
+                                              actual_fragment if
+                                              _range[0] <= k_s_v < _range[1]]
+            else:
+                fragment_buckets[index][i] = []
+            i += 1
+
+
+def combine_and_sort_bucket_elements(args):
+    """
+    Task that combines the buckets received as args parameter and final
+    sorting.
+
+    args structure = ([],[], ..., [])
+
+    :param args: args that contains the buckets of a single range
+    :return: A list of tuples with the same format as provided initially
+             sorted by key.
+    """
+    combined = []
+    for e in args:
+        for kv in e:
+            combined.append(kv)
+    sorted_by_key = np.sort(combined)
+    return np.unique(sorted_by_key)
diff --git a/dislib/trees/nested/terasort.py b/dislib/trees/nested/terasort.py
new file mode 100644
index 00000000..82340626
--- /dev/null
+++ b/dislib/trees/nested/terasort.py
@@ -0,0 +1,95 @@
+import numpy as np
+from dislib.trees.nested.tasks import filter_fragment, \
+    combine_and_sort_bucket_elements
+from dislib.data.array import Array
+from pycompss.api.api import compss_delete_object, compss_wait_on
+
+
+def terasorting(dataset, indexes_to_try, num_buckets,
+                range_min=0, range_max=1,
+                indexes_selected=None, reg_shape=None):
+    # Init buckets dictionary
+    list_of_buckets = []
+    total_fragments = []
+    if indexes_selected is not None:
+        for idx, d in enumerate(dataset):
+            fragment_buckets = [[object() for _ in range(num_buckets)]
+                                for _ in range(len(indexes_to_try))]
+            idx_selected = indexes_selected[indexes_selected <
+                                            (idx + 1) * reg_shape]
+            filter_fragment(d, fragment_buckets, indexes_to_try, num_buckets,
+                            range_min=range_min._blocks,
+                            range_max=range_max._blocks,
+                            indexes_selected=idx_selected[
+                                                 idx_selected >=
+                                                 (idx) * reg_shape] %
+                            reg_shape)
+            total_fragments.append(fragment_buckets)
+        total_fragments = np.array(compss_wait_on(total_fragments))
+        for index in range(len(indexes_to_try)):
+            buckets = {}
+            for i in range(num_buckets):
+                buckets[i] = []
+            for i in range(num_buckets):
+                buckets[i].append(total_fragments[:, index, i])
+            list_of_buckets.append(buckets)
+    else:
+        buckets = {}
+        for d in dataset:
+            fragment_buckets = [[object() for _ in range(num_buckets)]
+                                for _ in range(len(indexes_to_try))]
+            filter_fragment(d, fragment_buckets, indexes_to_try, num_buckets,
+                            range_min=range_min._blocks,
+                            range_max=range_max._blocks,
+                            indexes_selected=np.array([0]))
+            total_fragments.append(fragment_buckets)
+        total_fragments = np.array(compss_wait_on(total_fragments))
+        for index in range(len(indexes_to_try)):
+            buckets = {}
+            for i in range(num_buckets):
+                buckets[i] = []
+            for i in range(num_buckets):
+                buckets[i].append(total_fragments[:, index, i])
+            list_of_buckets.append(buckets)
+    result = dict()
+    real_key = 0
+    for index in range(len(indexes_to_try)):
+        for key, value in list(list_of_buckets[index].items()):
+            result[real_key] = combine_and_sort_bucket_elements(value[0])
+            real_key += 1
+    [compss_delete_object(future_objects) for
+     value in buckets.items() for future_objects in value[1]]
+    return_list = []
+    for idx, value in enumerate(result.values()):
+        if idx % num_buckets == 0:
+            return_list.append([])
+        return_list[-1].append(value)
+    return return_list
+
+
+def terasort(dataset, indexes_to_try, range_min=0, range_max=1,
+             indexes_selected=None, num_buckets=4):
+    """
+    ----------------------
+    Terasort main program
+    ----------------------
+    This application generates a set of fragments that contain randomly
+    generated key, value tuples and sorts them all considering the key of
+    each tuple.
+
+    :param num_fragments: Number of fragments to generate
+    :param num_entries: Number of entries (k,v tuples) within each fragment
+    :param num_buckets: Number of buckets to consider.
+    :param seed: Initial seed for the random number generator.
+    """
+    if isinstance(dataset, Array):
+        result = terasorting(dataset._blocks, indexes_to_try, num_buckets,
+                             range_min=range_min,
+                             range_max=range_max,
+                             indexes_selected=indexes_selected,
+                             reg_shape=dataset._reg_shape[0])
+        return np.array(result)
+    else:
+        result = terasorting(dataset, indexes_to_try, num_buckets,
+                             range_min=range_min, range_max=range_max)
+        return np.array(result)
diff --git a/docs/source/dislib.trees.distributed.rst b/docs/source/dislib.trees.distributed.rst
new file mode 100644
index 00000000..5b958e14
--- /dev/null
+++ b/docs/source/dislib.trees.distributed.rst
@@ -0,0 +1,22 @@
+dislib.trees.distributed
+============================================
+
+.. autoclass:: dislib.trees.distributed.DecisionTreeClassifier
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. autoclass:: dislib.trees.distributed.DecisionTreeRegressor
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. autoclass:: dislib.trees.distributed.RandomForestClassifier
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. autoclass:: dislib.trees.distributed.RandomForestRegressor
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/dislib.trees.mmap.rst b/docs/source/dislib.trees.mmap.rst
new file mode 100644
index 00000000..c3b759dc
--- /dev/null
+++ b/docs/source/dislib.trees.mmap.rst
@@ -0,0 +1,22 @@
+dislib.trees.mmap
+============================================
+
+.. autoclass:: dislib.trees.mmap.DecisionTreeClassifier
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. autoclass:: dislib.trees.mmap.DecisionTreeRegressor
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. autoclass:: dislib.trees.mmap.RandomForestClassifier
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. autoclass:: dislib.trees.mmap.RandomForestRegressor
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/source/dislib.trees.nested.rst b/docs/source/dislib.trees.nested.rst
new file mode 100644
index 00000000..7a63ba42
--- /dev/null
+++ b/docs/source/dislib.trees.nested.rst
@@ -0,0 +1,22 @@
+dislib.trees.nested
+============================================
+
+.. autoclass:: dislib.trees.nested.DecisionTreeClassifier
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. autoclass:: dislib.trees.nested.DecisionTreeRegressor
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. autoclass:: dislib.trees.nested.RandomForestClassifier
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
+.. autoclass:: dislib.trees.nested.RandomForestRegressor
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/run_ci_checks.sh b/run_ci_checks.sh
index 48680b1b..7e068d51 100755
--- a/run_ci_checks.sh
+++ b/run_ci_checks.sh
@@ -14,5 +14,9 @@ echo "Running tests"
 # Run the tests in ./tests with PyCOMPSs
 ./run_tests.sh
 
+echo "Running nesting tests"
+# Run the tests in ./tests with PyCOMPSs
+./run_test_nesting.sh
+
 echo "Running code coverage"
 ./run_coverage.sh
diff --git a/run_coverage.sh b/run_coverage.sh
index f95a029f..62432b7b 100755
--- a/run_coverage.sh
+++ b/run_coverage.sh
@@ -2,6 +2,9 @@
 
 # Run the coverage of the dislib using the tests in ./tests (sequential)
 coverage3 run --source dislib tests
+coverage3 run -a --source dislib tests_nesting
+# Create the report
+coverage3 report
 # Report coverage results to the CLI.
 coverage3 report -m
 # Upload coverage report to codecov.io
diff --git a/run_test_nesting.sh b/run_test_nesting.sh
new file mode 100755
index 00000000..979e9d33
--- /dev/null
+++ b/run_test_nesting.sh
@@ -0,0 +1,146 @@
+#!/bin/bash
+
+base_app_dir="$(pwd)/tests_nesting/"
+COMPSs_log_folder="/tmp/COMPSsWorker01"
+target_log_folder="$(pwd)"
+retry_num=1
+
+echo $base_app_dir
+echo $COMPSs_log_folder
+echo $target_log_folder
+
+AGENT_PIDS=""
+exit_value=0
+expected_time="60"
+NUM_RETRIES="50"
+app_name="Nesting_Tests"
+
+  # Traps and Handlers
+function kill_agents() {
+  for pid in ${AGENT_PIDS}; do
+    kill -SIGINT ${pid} 2>/dev/null
+  done
+}
+trap kill_agents EXIT
+
+#sed -i '/<InstallDir>/c<InstallDir>'"${COMPSS_HOME}"'<\/InstallDir>' "${base_app_dir}"/project.xml
+
+mkdir -p /tmp/COMPSsWorker01/
+
+echo ""
+echo "*** RUNNING AGENTS TESTS ON DISLIB ***"
+log_dir="${COMPSs_log_folder}/${app_name}_0${retry_num}/"
+mkdir -p "${log_dir}"
+output_log="${log_dir}test.outputlog"
+error_log="${log_dir}test.errorlog"
+touch "${output_log}"
+touch "${error_log}"
+
+port_offset=100
+
+for file in "${base_app_dir}"test_*; do
+
+  corresponding_file=$(echo "${file}" | cut -d '/' -f4)
+  corresponding_file=$(echo "${corresponding_file}" | cut -d '.' -f1)
+
+  log_dir="${COMPSs_log_folder}/${app_name}_0${retry_num}/"
+  mkdir -p "${log_dir}"
+  output_log="${log_dir}test.outputlog"
+  error_log="${log_dir}test.errorlog"
+  touch "${output_log}"
+  touch "${error_log}"
+
+  # Starting agent
+  agent1_log_dir="${log_dir}/agent1/"
+  mkdir -p "${agent1_log_dir}"
+  agent1_output_log="${log_dir}agent1.outputlog"
+  agent1_error_log="${log_dir}agent1.errorlog"
+
+  rest_port=$(( 46000 + port_offset + 1))
+  comm_port=$(( 46000 + port_offset + 2))
+  which compss_agent_start
+  compss_agent_start \
+    --hostname="COMPSsWorker01" \
+    --classpath="${base_app_dir}" \
+    --log_dir="${agent1_log_dir}" \
+    --rest_port="${rest_port}" \
+    --comm_port="${comm_port}" \
+    --pythonpath="${base_app_dir}" \
+    --python_interpreter="python3"\
+      1>"${agent1_output_log}" 2>"${agent1_error_log}" &
+
+  agent_pid="$!"
+
+  AGENT_PIDS="${AGENT_PIDS} ${agent_pid}"
+  retries="${NUM_RETRIES}"
+  echo "testing first agent"
+  curl -XGET http://127.0.0.1:${rest_port}/COMPSs/test 1>/dev/null 2>/dev/null
+  ev=$?
+
+  while [ "$ev" != "0" ] && [ "${retries}" -gt "0" ]; do
+    echo "testing agent on port ${rest_port}"
+    sleep 2s
+    retries=$((retries - 1 ))
+    curl -XGET http://127.0.0.1:${rest_port}/COMPSs/test 1>/dev/null 2>/dev/null
+    ev=$?
+  done
+  echo "TEST invoked"
+  RESULT=$(grep "test invoked" "${agent1_output_log}")
+  if [ -z "${RESULT}" ]; then
+      echo "Agent failed to start" > >(tee -a "${error_log}")
+      exit 1
+  fi
+  echo "Agent started" > >(tee -a "${output_log}")
+  sleep 2s
+
+  # Invoking DemoFunction method
+  "${COMPSS_HOME}/Runtime/scripts/user/compss_agent_call_operation" \
+    --lang="PYTHON" \
+    --master_node="127.0.0.1" \
+    --master_port="${rest_port}" \
+    --method_name="main" \
+    --stop \
+    "${corresponding_file}" > >(tee -a "${output_log}") 2> >(tee -a "${error_log}")
+    ev=$?
+    if [ "$ev" != "0" ]; then
+    echo "Could not invoke main method." > >(tee -a "${error_log}")
+    exit $ev
+  fi
+  echo "main function invoked" > >(tee -a "${output_log}")
+
+  retries="3"
+  while [ ! -f "${agent1_log_dir}/jobs/job1_NEW.out" ] && [ "${retries}" -gt "0" ]; do
+    sleep 2s
+    retries=$((retries - 1 ))
+  done
+  if [ ! -f "${agent1_log_dir}/jobs/job1_NEW.out" ]; then
+    echo "Could not invoke main method." > >(tee -a "${error_log}")
+    exit 1
+  fi
+
+  wait ${AGENT_PIDS}
+
+  if [ ! -f "${agent1_log_dir}/jobs/job2_NEW.out" ]; then
+    echo "Could not invoke nested method." > >(tee -a "${error_log}")
+    exit 1
+  fi
+
+  job1_end=$(grep "Result tests" "${agent1_log_dir}/jobs/job1_NEW.out")
+  job1_end_value=$(echo "${job1_end}" | cut -d ' ' -f3)
+
+  if [ ! "${job1_end_value}" == "Passed" ]; then
+    echo "Unexpected integer value obtained from the test. Expecting Passed and ${job1_end_value} observed!" > >(tee -a "${error_log}")
+    exit 1
+  fi
+
+
+
+  kill_agents
+  rm -rf /tmp/COMPSsWorker01/*
+  AGENT_PIDS=""
+
+  # Copy LOG files
+  # cp -rf "${log_dir}" "${target_log_folder}"
+  port_offset=$((port_offset + 100 ));
+done
+exit 0
diff --git a/tests/test_array.py b/tests/test_array.py
index 566cf2ab..6947529b 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -1351,8 +1351,7 @@ def test_median(self):
 
 class MathTest(BaseTimedTestCase):
 
-    @parameterized.expand([((21, 33), (10, 15), False),
-                           ((5, 10), (8, 1), False),
+    @parameterized.expand([((5, 10), (8, 1), False),
                            ((17, 13), (1, 9), False),
                            ((6, 1), (12, 23), False),
                            ((1, 22), (25, 16), False),
diff --git a/tests/test_tsqr.py b/tests/test_tsqr.py
index 947b9706..63ccb2be 100644
--- a/tests/test_tsqr.py
+++ b/tests/test_tsqr.py
@@ -11,7 +11,7 @@
 
 class QRTest(BaseTimedTestCase):
     @parameterized.expand([
-        (2, 1, 64, 36), (3, 1, 64, 36), (4, 1, 32, 36), (16, 1, 20, 10),
+        (2, 1, 64, 36), (3, 1, 64, 36), (4, 1, 32, 36),
     ])
     def test_tsqr(self, m_size, n_size, b_size_r, b_size_c):
         """Tests tsqr"""
@@ -60,7 +60,7 @@ def test_tsqr_irregular(self, m_size, n_size, b_size_r, b_size_c):
         self.assertTrue(np.allclose(q.dot(r), m2b))
 
     @parameterized.expand([
-        (2, 1, 64, 36), (4, 1, 32, 36), (16, 1, 20, 10),
+        (2, 1, 64, 36), (4, 1, 32, 36),
     ])
     def test_tsqr_inverse(self, m_size, n_size, b_size_r, b_size_c):
         """Tests tsqr"""
@@ -109,7 +109,7 @@ def test_tsqr_complete_inverse_irregular(self, m_size, n_size,
         self.assertTrue(np.allclose(q.dot(r), m2b))
 
     @parameterized.expand([
-        (2, 1, 64, 36), (4, 1, 32, 36), (16, 1, 20, 10),
+        (2, 1, 64, 36), (4, 1, 32, 36),
     ])
     def test_tsqr_inverse_indexes(self, m_size, n_size, b_size_r, b_size_c):
         """Tests tsqr"""
@@ -130,7 +130,6 @@ def test_tsqr_inverse_indexes(self, m_size, n_size, b_size_r, b_size_c):
 
     @parameterized.expand([
         (2, 1, 64, 36), (3, 1, 64, 36), (4, 1, 36, 32),
-        (16, 1, 20, 10), (16, 2, 20, 10),
     ])
     def test_tsqr_reduced(self, m_size, n_size, b_size_r, b_size_c):
         """Tests tsqr"""
@@ -175,7 +174,7 @@ def test_tsqr_reduced_irregular(self, m_size, n_size, b_size_r, b_size_c):
         self.assertTrue(np.allclose(q.dot(r), m2b))
 
     @parameterized.expand([
-        (2, 1, 64, 36), (4, 1, 36, 32), (16, 1, 20, 10),
+        (2, 1, 64, 36), (4, 1, 36, 32),
     ])
     def test_tsqr_reduced_inverse(self, m_size, n_size, b_size_r, b_size_c):
         """Tests tsqr"""
@@ -219,7 +218,7 @@ def test_tsqr_reduced_inverse_irregular(self, m_size, n_size,
         self.assertTrue(np.allclose(q.dot(r), m2b))
 
     @parameterized.expand([
-        (2, 1, 64, 36), (4, 1, 36, 32), (16, 1, 20, 10),
+        (2, 1, 64, 36), (4, 1, 36, 32),
     ])
     def test_tsqr_reduced_inverse_indexes(self, m_size, n_size,
                                           b_size_r, b_size_c):
@@ -242,7 +241,7 @@ def test_tsqr_reduced_inverse_indexes(self, m_size, n_size,
         self.assertTrue(q.shape == (q.shape[0], 3))
 
     @parameterized.expand([
-        (2, 1, 64, 36), (3, 1, 64, 36), (4, 1, 36, 32), (16, 1, 20, 10),
+        (2, 1, 64, 36), (3, 1, 64, 36), (4, 1, 36, 32),
     ])
     def test_tsqr_compute_r(self, m_size, n_size, b_size_r, b_size_c):
         """Tests tsqr"""
@@ -258,7 +257,7 @@ def test_tsqr_compute_r(self, m_size, n_size, b_size_r, b_size_c):
         self.assertTrue(np.allclose(np.triu(r), r))
 
     @parameterized.expand([
-        (2, 1, 64, 36), (3, 1, 64, 36), (4, 1, 36, 32), (16, 1, 20, 10),
+        (2, 1, 64, 36), (3, 1, 64, 36), (4, 1, 36, 32),
     ])
     def test_tsqr_compute_r_reduced(self, m_size, n_size, b_size_r, b_size_c):
         """Tests tsqr"""
diff --git a/tests_nesting/__init__.py b/tests_nesting/__init__.py
new file mode 100644
index 00000000..8a9b2dab
--- /dev/null
+++ b/tests_nesting/__init__.py
@@ -0,0 +1,14 @@
+from time import time
+import unittest
+import numpy as np
+
+
+class BaseTimedTestCase(unittest.TestCase):
+    def setUp(self):
+        np.random.seed()
+        self.start_time = time()
+
+    def tearDown(self):
+        self.end_time = time()
+        print("Test %s took: %.3f seconds" %
+              (self.id(), self.end_time - self.start_time))
diff --git a/tests_nesting/__main__.py b/tests_nesting/__main__.py
new file mode 100644
index 00000000..b28e8c19
--- /dev/null
+++ b/tests_nesting/__main__.py
@@ -0,0 +1,9 @@
+import unittest
+
+
+def load_tests(loader, tests, pattern):
+    return loader.discover('./tests_nesting/')
+
+
+if __name__ == '__main__':
+    unittest.main(verbosity=2)
diff --git a/tests_nesting/test_decision_tree_nested.py b/tests_nesting/test_decision_tree_nested.py
new file mode 100644
index 00000000..1e4243ed
--- /dev/null
+++ b/tests_nesting/test_decision_tree_nested.py
@@ -0,0 +1,908 @@
+from pycompss.api.task import task
+from tests import BaseTimedTestCase
+import numpy as np
+import dislib as ds
+import dislib.trees.nested.decision_tree as dt_nested
+from dislib.trees.nested.tasks import filter_fragment
+from pycompss.api.api import compss_wait_on
+from sklearn.metrics import r2_score, accuracy_score
+from sklearn.datasets import make_classification, make_regression
+
+
+def test_decision_tree_classifier():
+    x1 = np.array(
+        [
+            [0.3, -0.3],
+            [0.4, -0.5],
+            [0.5, -0.4],
+            [0.3, 0.3],
+            [0.4, 0.5],
+            [0.5, 0.4],
+            [-0.3, -0.3],
+            [-0.4, -0.5],
+            [-0.5, -0.4],
+        ]
+    )
+    x2 = np.array([[0.4, -0.3], [0.4, 0.3], [-0.4, -0.3]])
+    y1 = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])
+    y2 = np.array([0, 1, 2])
+
+    x1_ds = ds.array(x1, (3, 2))
+    x2_ds = ds.array(x2, (3, 2))
+    y1_ds = ds.array(y1[:, np.newaxis], (3, 1))
+
+    # Model
+    try_features = 2
+    max_depth = np.inf
+    distr_depth = 1
+    sklearn_max = 1e8
+    bootstrap = True
+    seed = 0
+    random_state = np.random.RandomState(seed)
+    n_classes = np.bincount(y1).shape[0]
+    # Test bootstrap
+    sample1 = dt_nested._sample_selection(x1, random_state,
+                                          bootstrap=True)
+    sample2 = dt_nested._sample_selection(x1, random_state,
+                                          bootstrap=False)
+    condition = np.array_equal(sample1, np.array([0, 2, 3, 3, 3, 4, 5, 5, 7]))
+    condition = condition and np.array_equal(sample2, np.array([0, 1, 2, 3,
+                                                                4, 5, 6, 7,
+                                                                8]))
+
+    # Assert split wrapper
+    sample = sample2
+    rang_min = x1_ds.min()
+    rang_max = x1_ds.max()
+    rang_max._blocks = compss_wait_on(rang_max._blocks)
+    rang_min._blocks = compss_wait_on(rang_min._blocks)
+
+    split = dt_nested._compute_split(
+        x1_ds,
+        y1_ds,
+        n_classes,
+        indexes_selected=sample,
+        num_buckets=1,
+        range_min=rang_min,
+        range_max=rang_max,
+        number_split_points=2,
+        random_state=0,
+    )
+    node_info, results_l, results_l_2, results_r, results_r_2 = split
+    node_info = compss_wait_on(node_info)
+    left_group = compss_wait_on(results_l)
+    y_l = compss_wait_on(results_l_2)
+    right_group = compss_wait_on(results_r)
+    y_r = compss_wait_on(results_r_2)
+    left_group_compare = np.block(left_group)
+    y_l_compare = np.block(y_l)
+    right_group_compare = np.block(right_group)
+    y_r_compare = np.block(y_r)
+
+    condition = condition and node_info.node_info.index in (0, 1)
+
+    condition = condition and np.array_equal(left_group_compare,
+                                             np.array([[0.3, -0.3],
+                                                       [0.3, 0.3],
+                                                       [-0.3, -0.3],
+                                                       [-0.4, -0.5],
+                                                       [-0.5, -0.4]]
+                                                      ))
+
+    condition = condition and np.array_equal(y_l_compare,
+                                             np.array([[0], [1], [2],
+                                                       [2], [2]]))
+
+    condition = condition and np.array_equal(right_group_compare,
+                                             np.array([[0.4, -0.5],
+                                                       [0.5, -0.4],
+                                                       [0.4, 0.5],
+                                                       [0.5, 0.4]]))
+
+    condition = condition and np.array_equal(y_r_compare,
+                                             np.array([[0], [0],
+                                                       [1], [1]]))
+
+    condition = condition and np.isclose(node_info.node_info.value, 0.35)
+
+    rang_min = x1_ds.min()
+    rang_max = x1_ds.max()
+    rang_max._blocks = compss_wait_on(rang_max._blocks)
+    rang_min._blocks = compss_wait_on(rang_min._blocks)
+    # Test tree
+    tree = dt_nested.DecisionTreeClassifier(
+        3,
+        try_features,
+        max_depth,
+        distr_depth,
+        sklearn_max,
+        bootstrap,
+        random_state,
+        range_max=rang_max,
+        range_min=rang_min,
+        n_split_points=2,
+        split_computation="raw",
+        sync_after_fit=True,
+    )
+    tree.fit(x1_ds, y1_ds)
+
+    y_pred = compss_wait_on(tree.predict(x2_ds))
+    condition = condition and np.array_equal(np.argmax(y_pred, axis=1)[0], y2)
+    y_pred_proba = compss_wait_on(tree.predict_proba(x2_ds))
+    condition = condition and np.array_equal(
+        np.argmax(y_pred_proba, axis=1)[0], y2)
+
+    random_state = np.random.RandomState(seed)
+
+    tree = dt_nested.DecisionTreeClassifier(
+        3,
+        try_features,
+        max_depth,
+        distr_depth,
+        sklearn_max,
+        bootstrap,
+        random_state,
+        n_split_points="auto",
+        split_computation="raw",
+        sync_after_fit=True,
+    )
+
+    tree.fit(x1_ds, y1_ds)
+
+    y_pred = compss_wait_on(tree.predict(x2_ds))
+    condition = condition and np.array_equal(np.argmax(y_pred, axis=1)[0], y2)
+    y_pred_proba = compss_wait_on(tree.predict_proba(x2_ds))
+    condition = condition and np.array_equal(
+        np.argmax(y_pred_proba, axis=1)[0], y2)
+
+    random_state = np.random.RandomState(seed)
+
+    x, y = make_classification(
+        n_samples=300,
+        n_features=10,
+        n_classes=3,
+        n_informative=4,
+        n_redundant=2,
+        n_repeated=1,
+        n_clusters_per_class=2,
+        shuffle=True,
+        random_state=0,
+    )
+    x_train = ds.array(x[::2], (50, 10))
+    y_train = ds.array(y[::2][:, np.newaxis], (50, 1))
+
+    tree = dt_nested.DecisionTreeClassifier(
+        3,
+        try_features,
+        max_depth,
+        distr_depth,
+        sklearn_max,
+        bootstrap,
+        random_state,
+        n_split_points="sqrt",
+        split_computation="uniform_approximation",
+        sync_after_fit=True,
+    )
+
+    tree.fit(x1_ds, y1_ds)
+
+    y_pred = compss_wait_on(tree.predict(x2_ds))
+    condition = condition and np.array_equal(np.argmax(y_pred, axis=1)[0], y2)
+    y_pred_proba = compss_wait_on(tree.predict_proba(x2_ds))
+    condition = condition and np.array_equal(
+        np.argmax(y_pred_proba, axis=1)[0], y2)
+
+    random_state = np.random.RandomState(seed)
+
+    tree = dt_nested.DecisionTreeClassifier(
+        3,
+        try_features,
+        max_depth,
+        2,
+        sklearn_max,
+        bootstrap,
+        random_state,
+        n_split_points=0.444,
+        split_computation="gaussian_approximation",
+        sync_after_fit=True,
+    )
+    tree.fit(x_train, y_train)
+    y_pred = compss_wait_on(tree.predict(x_train))
+    y_pred = np.argmax(np.vstack(y_pred), axis=1)
+    y_train = y_train.collect()
+    condition = condition and accuracy_score(y_train, y_pred) > 0.6
+    y_pred_proba = compss_wait_on(tree.predict_proba(x_train))
+    y_pred_proba = np.argmax(np.vstack(y_pred_proba), axis=1)
+    condition = condition and accuracy_score(y_train,
+                                             y_pred_proba) > 0.6
+    return condition
+
+
+def test_decision_tree_regressor():
+    x1 = np.array(
+        [
+            [0.3, -0.3],
+            [0.4, -0.5],
+            [0.5, -0.4],
+            [0.3, 0.3],
+            [0.4, 0.5],
+            [0.5, 0.4],
+            [-0.3, -0.3],
+            [-0.4, -0.5],
+            [-0.5, -0.4],
+        ]
+    )
+
+    # Model
+    try_features = 2
+    max_depth = np.inf
+    distr_depth = 1
+    sklearn_max = 1e8
+    bootstrap = True
+    seed = 0
+    random_state = np.random.RandomState(seed)
+    # Test bootstrap
+    sample1 = dt_nested._sample_selection(x1, random_state,
+                                          bootstrap=True)
+    sample2 = dt_nested._sample_selection(x1, random_state,
+                                          bootstrap=False)
+    condition = np.array_equal(sample1, np.array([0, 2, 3, 3, 3, 4, 5, 5, 7]))
+    condition = condition and np.array_equal(sample2,
+                                             np.array([0, 1, 2, 3, 4,
+                                                       5, 6, 7, 8]))
+
+    x1, y1 = make_regression(
+        n_samples=1000,
+        n_features=10,
+        n_informative=4,
+        shuffle=True,
+        random_state=0,
+    )
+
+    x2 = x1[800:]
+    x1 = x1[:800]
+    y2 = y1[800:]
+    y1 = y1[:800]
+
+    x1_ds = ds.array(x1, (400, 10))
+    x2_ds = ds.array(x2, (100, 10))
+
+    y1_ds = ds.array(y1, (400, 1))
+    rang_min = x1_ds.min()
+    rang_max = x1_ds.max()
+    rang_max._blocks = compss_wait_on(rang_max._blocks)
+    rang_min._blocks = compss_wait_on(rang_min._blocks)
+
+    # Test tree
+    tree = dt_nested.DecisionTreeRegressor(
+        try_features,
+        max_depth,
+        distr_depth,
+        sklearn_max,
+        bootstrap,
+        random_state,
+        range_max=rang_max,
+        range_min=rang_min,
+        n_split_points=2,
+        split_computation="raw",
+        sync_after_fit=True,
+    )
+    tree.fit(x1_ds, y1_ds)
+    y_pred = compss_wait_on(tree.predict(x2_ds))
+    y_pred = np.block(y_pred)
+    condition = condition and r2_score(y_pred.flatten(), y2) > 0.1
+
+    tree = dt_nested.DecisionTreeRegressor(
+        try_features,
+        max_depth,
+        distr_depth,
+        sklearn_max,
+        bootstrap,
+        random_state,
+        n_split_points="auto",
+        split_computation="uniform_approximation",
+        sync_after_fit=True,
+    )
+    tree.fit(x1_ds, y1_ds)
+    y_pred = compss_wait_on(tree.predict(x2_ds))
+    y_pred = np.block(y_pred)
+    condition = condition and r2_score(y_pred.flatten(), y2) > 0.15
+
+    tree = dt_nested.DecisionTreeRegressor(
+        try_features,
+        max_depth,
+        distr_depth,
+        sklearn_max,
+        bootstrap,
+        random_state,
+        n_split_points="sqrt",
+        split_computation="gaussian_approximation",
+        sync_after_fit=True,
+    )
+    tree.fit(x1_ds, y1_ds)
+    y_pred = compss_wait_on(tree.predict(x2_ds))
+    y_pred = np.block(y_pred)
+    condition = condition and r2_score(y_pred.flatten(), y2) > 0.15
+
+    tree = dt_nested.DecisionTreeRegressor(
+        try_features,
+        max_depth,
+        distr_depth,
+        sklearn_max,
+        bootstrap,
+        random_state,
+        n_split_points=0.1,
+        split_computation="gaussian_approximation",
+        sync_after_fit=True,
+    )
+    tree.fit(x1_ds, y1_ds)
+    y_pred = compss_wait_on(tree.predict(x2_ds))
+    y_pred = np.block(y_pred)
+    condition = condition and r2_score(y_pred.flatten(), y2) > 0.15
+    return condition
+
+
+def test_auxiliar_functions():
+    x1 = np.array(
+        [
+            [0.3, -0.3],
+            [0.4, -0.5],
+            [0.5, -0.4],
+            [0.3, 0.3],
+            [0.4, 0.5],
+            [0.5, 0.4],
+            [-0.3, -0.3],
+            [-0.4, -0.5],
+            [-0.5, -0.4],
+        ]
+    )
+    y1 = np.array([0, 0, 0, 1, 1, 0, 1, 0, 1])
+    right_x, right_y, x, y, aggregate_r, aggregate = \
+        dt_nested.apply_split_points_to_blocks(x1, y1, 1,
+                                               None, [2],
+                                               2, np.array([]),
+                                               np.array([0, 0]))
+
+    condition = right_x is None
+    condition = condition and right_y is None
+    condition = condition and np.all(x == x1)
+    condition = condition and np.all(y == y1)
+    condition = condition and np.all(aggregate_r == np.array([]))
+    condition = condition and np.all(aggregate == np.array([5, 4]))
+
+    x1 = np.array(
+        [
+            [0.3, -0.3],
+            [0.4, -0.5],
+            [0.5, -0.4],
+        ]
+    )
+    y1 = np.array([0, 0, 0])
+    right_x, right_y, x, y, aggregate_r, aggregate = \
+        dt_nested.apply_split_points_to_blocks(x1, y1, 1,
+                                               None, [2],
+                                               2, np.array([]),
+                                               np.array([0, 0]))
+    condition = condition and right_x is None
+    condition = condition and right_y is None
+    condition = condition and np.all(x == x1)
+    condition = condition and np.all(y == y1)
+    condition = condition and np.all(aggregate_r == np.array([]))
+    condition = condition and np.all(aggregate == np.array([3, 0]))
+
+    right_x, right_y, x, y, aggregate_r, aggregate = \
+        dt_nested.apply_split_points_to_blocks(None, None, 1,
+                                               1, [2],
+                                               2, np.array([]),
+                                               np.array([0, 0]))
+    condition = condition and right_x is None
+    condition = condition and right_y is None
+    condition = condition and x is None
+    condition = condition and y is None
+    condition = condition and np.all(aggregate_r == np.array([]))
+    condition = condition and np.all(aggregate == np.array([0, 0]))
+
+    right_x, right_y, x, y, aggregate_r, \
+        len_aggregate_r, aggregate_l, len_aggregate_l = \
+        dt_nested.apply_split_points_to_blocks_regression(x1, y1, 1,
+                                                          None, [2])
+    condition = condition and right_x is None
+    condition = condition and right_y is None
+    condition = condition and np.all(x == x1)
+    condition = condition and np.all(y == y1)
+    condition = condition and np.all(aggregate_r == np.array([0]))
+    condition = condition and np.all(len_aggregate_r == np.array([0]))
+    condition = condition and np.all(aggregate_l == np.array([0]))
+    condition = condition and np.all(len_aggregate_l == np.array([3]))
+
+    optimal_split_point = dt_nested.select_optimal_split_point(None, 3,
+                                                               4, 5)
+    condition = condition and optimal_split_point is None
+
+    gini_value_when_empty_list = dt_nested.get_minimum_measure([], 3)
+    condition = condition and gini_value_when_empty_list[-1] == 1
+
+    mse_value_when_empty_list = dt_nested.get_minimum_measure([],
+                                                              3,
+                                                              gini=False)
+    condition = condition and mse_value_when_empty_list[-1] == np.inf
+
+    mse_value, produces_split = dt_nested. \
+        merge_partial_results_compute_mse_both_sides([[None], [None]],
+                                                     np.array([]))
+    mse_value = compss_wait_on(mse_value)
+    produces_split = compss_wait_on(produces_split)
+    condition = condition and np.all(mse_value == np.array([np.inf]))
+    condition = condition and produces_split is False
+    l_par_results = \
+        [[[-4.93362945e+01, -2.91577501e+04, 5.91000000e+02],
+          [-4.64000975e+01, -3.03920638e+04, 6.55000000e+02],
+          [-3.81689727e+01, -2.71381396e+04, 7.11000000e+02]],
+         [[-4.90482439e+01, -1.46654249e+04, 2.99000000e+02],
+          [-4.67085998e+01, -1.50868777e+04, 3.23000000e+02],
+          [-3.98015317e+01, -1.38111315e+04, 3.47000000e+02]]]
+    mse_value, produces_split = dt_nested. \
+        merge_partial_results_compute_mse_both_sides(l_par_results,
+                                                     [[None], [None]])
+    mse_value = compss_wait_on(mse_value)
+    produces_split = compss_wait_on(produces_split)
+    condition = condition and np.all(mse_value == np.array([np.inf]))
+    condition = condition and produces_split is False
+
+    mse_value, produces_split = dt_nested. \
+        merge_partial_results_compute_mse_both_sides(l_par_results,
+                                                     [None])
+    mse_value = compss_wait_on(mse_value)
+    produces_split = compss_wait_on(produces_split)
+    condition = condition and np.all(mse_value == np.array([np.inf]))
+    condition = condition and produces_split is False
+
+    return condition
+
+
+class RandomForestRegressorTest(BaseTimedTestCase):
+    def test_decision_tree_classifier(self):
+        x1 = np.array(
+            [
+                [0.3, -0.3],
+                [0.4, -0.5],
+                [0.5, -0.4],
+                [0.3, 0.3],
+                [0.4, 0.5],
+                [0.5, 0.4],
+                [-0.3, -0.3],
+                [-0.4, -0.5],
+                [-0.5, -0.4],
+            ]
+        )
+        x2 = np.array([[0.4, -0.3], [0.4, 0.3], [-0.4, -0.3]])
+        y1 = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])
+        y2 = np.array([0, 1, 2])
+
+        x1_ds = ds.array(x1, (3, 2))
+        x2_ds = ds.array(x2, (3, 2))
+        y1_ds = ds.array(y1[:, np.newaxis], (3, 1))
+
+        # Model
+        try_features = 2
+        max_depth = np.inf
+        distr_depth = 1
+        sklearn_max = 1e8
+        bootstrap = True
+        seed = 0
+        random_state = np.random.RandomState(seed)
+        n_classes = np.bincount(y1).shape[0]
+        # Test bootstrap
+        sample1 = dt_nested._sample_selection(x1, random_state,
+                                              bootstrap=True)
+        sample2 = dt_nested._sample_selection(x1, random_state,
+                                              bootstrap=False)
+        self.assertTrue(
+            np.array_equal(sample1, np.array([0, 2, 3, 3, 3, 4, 5, 5, 7]))
+        )
+        self.assertTrue(
+            np.array_equal(sample2, np.array([0, 1, 2, 3, 4, 5, 6, 7, 8]))
+        )
+
+        # Assert split wrapper
+        sample = sample2
+        rang_min = x1_ds.min()
+        rang_max = x1_ds.max()
+
+        split = dt_nested._compute_split(
+            x1_ds,
+            y1_ds,
+            n_classes,
+            indexes_selected=sample,
+            num_buckets=1,
+            range_min=rang_min,
+            range_max=rang_max,
+            number_split_points=2,
+            random_state=0,
+        )
+        node_info, results_l, results_l_2, results_r, results_r_2 = split
+        node_info = compss_wait_on(node_info)
+        left_group = results_l
+        y_l = results_l_2
+        right_group = results_r
+        y_r = results_r_2
+        left_group_compare = np.block(left_group)
+        y_l_compare = np.block(y_l)
+        right_group_compare = np.block(right_group)
+        y_r_compare = np.block(y_r)
+
+        self.assertTrue(node_info.node_info.index in (0, 1))
+
+        self.assertTrue(np.array_equal(left_group_compare,
+                                       np.array([[0.3, -0.3],
+                                                 [0.3, 0.3],
+                                                 [-0.3, -0.3],
+                                                 [-0.4, -0.5],
+                                                 [-0.5, -0.4]]
+                                                )))
+        self.assertTrue(np.array_equal(y_l_compare,
+                                       np.array([[0], [1], [2],
+                                                 [2], [2]])))
+        self.assertTrue(
+            np.array_equal(right_group_compare, np.array([[0.4, -0.5],
+                                                          [0.5, -0.4],
+                                                          [0.4, 0.5],
+                                                          [0.5, 0.4]]))
+        )
+        self.assertTrue(np.array_equal(y_r_compare, np.array([[0], [0],
+                                                              [1], [1]])))
+        self.assertAlmostEqual(node_info.node_info.value, 0.35)
+
+        # Test tree
+        tree = dt_nested.DecisionTreeClassifier(
+            3,
+            try_features,
+            max_depth,
+            distr_depth,
+            sklearn_max,
+            bootstrap,
+            random_state,
+            range_max=rang_max,
+            range_min=rang_min,
+            n_split_points=2,
+            split_computation="raw",
+            sync_after_fit=True,
+        )
+        tree.fit(x1_ds, y1_ds)
+        y_pred = compss_wait_on(tree.predict(x2_ds))
+        self.assertTrue(np.array_equal(np.argmax(y_pred, axis=1)[0], y2))
+        y_pred_proba = tree.predict_proba(x2_ds)
+        self.assertTrue(np.array_equal(np.argmax(y_pred_proba, axis=1)[0], y2))
+
+        random_state = np.random.RandomState(seed)
+
+        tree = dt_nested.DecisionTreeClassifier(
+            3,
+            try_features,
+            max_depth,
+            distr_depth,
+            sklearn_max,
+            bootstrap,
+            random_state,
+            n_split_points="auto",
+            split_computation="raw",
+            sync_after_fit=True,
+        )
+
+        tree.fit(x1_ds, y1_ds)
+        y_pred = compss_wait_on(tree.predict(x2_ds))
+        self.assertTrue(np.array_equal(np.argmax(y_pred, axis=1)[0], y2))
+        y_pred_proba = tree.predict_proba(x2_ds)
+        self.assertTrue(np.array_equal(np.argmax(y_pred_proba, axis=1)[0], y2))
+
+        random_state = np.random.RandomState(seed)
+
+        x, y = make_classification(
+            n_samples=3000,
+            n_features=10,
+            n_classes=3,
+            n_informative=4,
+            n_redundant=2,
+            n_repeated=1,
+            n_clusters_per_class=2,
+            shuffle=True,
+            random_state=0,
+        )
+        x_train = ds.array(x[::2], (500, 10))
+        y_train = ds.array(y[::2][:, np.newaxis], (500, 1))
+
+        tree = dt_nested.DecisionTreeClassifier(
+            3,
+            try_features,
+            max_depth,
+            distr_depth,
+            sklearn_max,
+            bootstrap,
+            random_state,
+            n_split_points="sqrt",
+            split_computation="uniform_approximation",
+            sync_after_fit=True,
+        )
+
+        tree.fit(x1_ds, y1_ds)
+        y_pred = compss_wait_on(tree.predict(x2_ds))
+        self.assertTrue(np.array_equal(np.argmax(y_pred, axis=1)[0], y2))
+        y_pred_proba = tree.predict_proba(x2_ds)
+        self.assertTrue(np.array_equal(np.argmax(y_pred_proba, axis=1)[0], y2))
+
+        random_state = np.random.RandomState(seed)
+
+        tree = dt_nested.DecisionTreeClassifier(
+            3,
+            try_features,
+            max_depth,
+            2,
+            sklearn_max,
+            bootstrap,
+            random_state,
+            n_split_points=0.444,
+            split_computation="gaussian_approximation",
+            sync_after_fit=True,
+        )
+        tree.fit(x_train[:100], y_train[:100])
+        y_pred = tree.predict(x_train)
+        y_pred = np.argmax(np.vstack(y_pred), axis=1)
+        y_train = y_train.collect()
+        self.assertGreater(accuracy_score(y_train,
+                                          y_pred), 0.6)
+        y_pred_proba = tree.predict_proba(x_train)
+        y_pred_proba = np.argmax(np.vstack(y_pred_proba), axis=1)
+        self.assertTrue(accuracy_score(y_train,
+                                       y_pred_proba), 0.6)
+
+    def test_decision_tree_regressor(self):
+        x1 = np.array(
+            [
+                [0.3, -0.3],
+                [0.4, -0.5],
+                [0.5, -0.4],
+                [0.3, 0.3],
+                [0.4, 0.5],
+                [0.5, 0.4],
+                [-0.3, -0.3],
+                [-0.4, -0.5],
+                [-0.5, -0.4],
+            ]
+        )
+
+        # Model
+        try_features = 2
+        max_depth = np.inf
+        distr_depth = 1
+        sklearn_max = 1e8
+        bootstrap = True
+        seed = 0
+        random_state = np.random.RandomState(seed)
+        # Test bootstrap
+        sample1 = dt_nested._sample_selection(x1, random_state,
+                                              bootstrap=True)
+        sample2 = dt_nested._sample_selection(x1, random_state,
+                                              bootstrap=False)
+        self.assertTrue(
+            np.array_equal(sample1, np.array([0, 2, 3, 3, 3, 4, 5, 5, 7]))
+        )
+        self.assertTrue(
+            np.array_equal(sample2, np.array([0, 1, 2, 3, 4, 5, 6, 7, 8]))
+        )
+
+        x1, y1 = make_regression(
+            n_samples=1000,
+            n_features=10,
+            n_informative=4,
+            shuffle=True,
+            random_state=0,
+        )
+
+        x2 = x1[800:]
+        x1 = x1[:800]
+        y2 = y1[800:]
+        y1 = y1[:800]
+
+        x1_ds = ds.array(x1, (400, 10))
+        x2_ds = ds.array(x2, (100, 10))
+
+        y1_ds = ds.array(y1, (400, 1))
+        rang_min = x1_ds.min()
+        rang_max = x1_ds.max()
+
+        # Test tree
+        tree = dt_nested.DecisionTreeRegressor(
+            try_features,
+            max_depth,
+            distr_depth,
+            sklearn_max,
+            bootstrap,
+            random_state,
+            range_max=rang_max,
+            range_min=rang_min,
+            n_split_points=2,
+            split_computation="raw",
+            sync_after_fit=True,
+        )
+        tree.fit(x1_ds, y1_ds)
+        y_pred = compss_wait_on(tree.predict(x2_ds))
+        y_pred = np.block(y_pred)
+        self.assertGreater(r2_score(y_pred.flatten(), y2), 0.15)
+
+        tree = dt_nested.DecisionTreeRegressor(
+            try_features,
+            max_depth,
+            distr_depth,
+            sklearn_max,
+            bootstrap,
+            random_state,
+            n_split_points="auto",
+            split_computation="uniform_approximation",
+            sync_after_fit=True,
+        )
+        tree.fit(x1_ds, y1_ds)
+        y_pred = compss_wait_on(tree.predict(x2_ds))
+        y_pred = np.block(y_pred)
+        self.assertGreater(r2_score(y_pred.flatten(), y2), 0.15)
+
+        tree = dt_nested.DecisionTreeRegressor(
+            try_features,
+            max_depth,
+            distr_depth,
+            sklearn_max,
+            bootstrap,
+            random_state,
+            n_split_points="sqrt",
+            split_computation="gaussian_approximation",
+            sync_after_fit=True,
+        )
+        tree.fit(x1_ds, y1_ds)
+        y_pred = compss_wait_on(tree.predict(x2_ds))
+        y_pred = np.block(y_pred)
+        self.assertGreater(r2_score(y_pred.flatten(), y2), 0.15)
+
+        tree = dt_nested.DecisionTreeRegressor(
+            try_features,
+            max_depth,
+            distr_depth,
+            sklearn_max,
+            bootstrap,
+            random_state,
+            n_split_points=0.1,
+            split_computation="gaussian_approximation",
+            sync_after_fit=True,
+        )
+        tree.fit(x1_ds, y1_ds)
+        y_pred = compss_wait_on(tree.predict(x2_ds))
+        y_pred = np.block(y_pred)
+        self.assertGreater(r2_score(y_pred.flatten(), y2), 0.15)
+
+    def test_auxiliar_functions(self):
+        x1 = np.array(
+            [
+                [0.3, -0.3],
+                [0.4, -0.5],
+                [0.5, -0.4],
+                [0.3, 0.3],
+                [0.4, 0.5],
+                [0.5, 0.4],
+                [-0.3, -0.3],
+                [-0.4, -0.5],
+                [-0.5, -0.4],
+            ]
+        )
+        y1 = np.array([0, 0, 0, 1, 1, 0, 1, 0, 1])
+        right_x, right_y, x, y, aggregate_r, aggregate = \
+            dt_nested.apply_split_points_to_blocks(x1, y1, 1,
+                                                   None, [2],
+                                                   2, np.array([]),
+                                                   np.array([0, 0]))
+        self.assertTrue(right_x is None)
+        self.assertTrue(right_y is None)
+        self.assertTrue(np.all(x == x1))
+        self.assertTrue(np.all(y == y1))
+        self.assertTrue(np.all(aggregate_r == np.array([])))
+        self.assertTrue(np.all(aggregate == np.array([5, 4])))
+
+        x1 = np.array(
+            [
+                [0.3, -0.3],
+                [0.4, -0.5],
+                [0.5, -0.4],
+            ]
+        )
+        y1 = np.array([0, 0, 0])
+        right_x, right_y, x, y, aggregate_r, aggregate = \
+            dt_nested.apply_split_points_to_blocks(x1, y1, 1,
+                                                   None, [2],
+                                                   2, np.array([]),
+                                                   np.array([0, 0]))
+        self.assertTrue(right_x is None)
+        self.assertTrue(right_y is None)
+        self.assertTrue(np.all(x == x1))
+        self.assertTrue(np.all(y == y1))
+        self.assertTrue(np.all(aggregate_r == np.array([])))
+        self.assertTrue(np.all(aggregate == np.array([3, 0])))
+
+        right_x, right_y, x, y, aggregate_r, aggregate = \
+            dt_nested.apply_split_points_to_blocks(None, None, 1,
+                                                   1, [2],
+                                                   2, np.array([]),
+                                                   np.array([0, 0]))
+        self.assertTrue(right_x is None)
+        self.assertTrue(right_y is None)
+        self.assertTrue(x is None)
+        self.assertTrue(y is None)
+        self.assertTrue(np.all(aggregate_r == np.array([])))
+        self.assertTrue(np.all(aggregate == np.array([0, 0])))
+
+        right_x, right_y, x, y, aggregate_r, \
+            len_aggregate_r, aggregate_l, len_aggregate_l = \
+            dt_nested.apply_split_points_to_blocks_regression(x1, y1, 1,
+                                                              None, [2])
+        self.assertTrue(right_x is None)
+        self.assertTrue(right_y is None)
+        self.assertTrue(np.all(x == x1))
+        self.assertTrue(np.all(y == y1))
+        self.assertTrue(np.all(aggregate_r == np.array([0])))
+        self.assertTrue(np.all(len_aggregate_r == np.array([0])))
+        self.assertTrue(np.all(aggregate_l == np.array([0])))
+        self.assertTrue(np.all(len_aggregate_l == np.array([3])))
+
+        optimal_split_point = dt_nested.select_optimal_split_point(None, 3,
+                                                                   4, 5)
+        self.assertTrue(optimal_split_point is None)
+
+        gini_value_when_empty_list = dt_nested.get_minimum_measure([], 3)
+        self.assertTrue(gini_value_when_empty_list[-1] == 1)
+
+        mse_value_when_empty_list = dt_nested.get_minimum_measure([],
+                                                                  3,
+                                                                  gini=False)
+        self.assertTrue(mse_value_when_empty_list[-1] == np.inf)
+
+        mse_value, produces_split = dt_nested.\
+            merge_partial_results_compute_mse_both_sides([[None], [None]],
+                                                         np.array([]))
+        self.assertTrue(np.all(mse_value == np.array([np.inf])))
+        self.assertTrue(produces_split is False)
+        l_par_results = \
+            [[[-4.93362945e+01, -2.91577501e+04,  5.91000000e+02],
+              [-4.64000975e+01, -3.03920638e+04,  6.55000000e+02],
+              [-3.81689727e+01, -2.71381396e+04,  7.11000000e+02]],
+             [[-4.90482439e+01, -1.46654249e+04,  2.99000000e+02],
+              [-4.67085998e+01, -1.50868777e+04,  3.23000000e+02],
+              [-3.98015317e+01, -1.38111315e+04,  3.47000000e+02]]]
+        mse_value, produces_split = dt_nested. \
+            merge_partial_results_compute_mse_both_sides(l_par_results,
+                                                         [[None], [None]])
+        self.assertTrue(np.all(mse_value == np.array([np.inf])))
+        self.assertTrue(produces_split is False)
+
+        mse_value, produces_split = dt_nested.\
+            merge_partial_results_compute_mse_both_sides(l_par_results,
+                                                         [None])
+        self.assertTrue(np.all(mse_value == np.array([np.inf])))
+        self.assertTrue(produces_split is False)
+
+        fragment_buckets = [[object()]]
+        filter_fragment([], fragment_buckets, np.array([2, 3]),
+                        3, range_min=[0], range_max=[1],
+                        indexes_selected=np.array([0]))
+        self.assertTrue(fragment_buckets == [[[]]])
+
+
+@task()
+def main():
+    test = test_decision_tree_classifier()
+    test2 = test_decision_tree_regressor()
+    test3 = test_auxiliar_functions()
+    test = test and test2 and test3
+    if test:
+        print("Result tests: Passed", flush=True)
+    else:
+        print("Result tests: Failed", flush=True)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests_nesting/test_rf_classifier_nested.py b/tests_nesting/test_rf_classifier_nested.py
new file mode 100644
index 00000000..60552976
--- /dev/null
+++ b/tests_nesting/test_rf_classifier_nested.py
@@ -0,0 +1,757 @@
+import numpy as np
+from parameterized import parameterized
+from pycompss.api.api import compss_wait_on
+from sklearn import datasets
+from sklearn.datasets import make_classification
+
+import dislib as ds
+from dislib.classification import RandomForestClassifier
+import dislib.data.util.model as utilmodel
+from dislib.trees.nested.forest import _resolve_try_features
+from tests import BaseTimedTestCase
+from pycompss.api.task import task
+
+
+def test_make_classification_score():
+    """Tests RandomForestClassifier fit and score with default params."""
+    x, y = make_classification(
+        n_samples=3000,
+        n_features=10,
+        n_classes=3,
+        n_informative=4,
+        n_redundant=2,
+        n_repeated=1,
+        n_clusters_per_class=2,
+        shuffle=True,
+        random_state=0,
+    )
+    x_train = ds.array(x[::2], (1000, 10))
+    y_train = ds.array(y[::2][:, np.newaxis], (1000, 1))
+    x_test = ds.array(x[1::2], (1000, 10))
+    y_test = ds.array(y[1::2][:, np.newaxis], (1000, 1))
+
+    rf = RandomForestClassifier(n_classes=3,
+                                random_state=0, mmap=False,
+                                nested=True)
+
+    rf.fit(x_train, y_train)
+    accuracy = rf.score(x_test, y_test, collect=True)
+    return accuracy > 0.7
+
+
+def test_make_classification_predict_and_distr_depth():
+    """Tests RandomForestClassifier fit and predict with a distr_depth."""
+    x, y = make_classification(
+        n_samples=3000,
+        n_features=10,
+        n_classes=3,
+        n_informative=4,
+        n_redundant=2,
+        n_repeated=1,
+        n_clusters_per_class=2,
+        shuffle=True,
+        random_state=0,
+    )
+    x_train = ds.array(x[::2], (1000, 10))
+    y_train = ds.array(y[::2][:, np.newaxis], (1000, 1))
+    x_test = ds.array(x[1::2], (1000, 10))
+    y_test = y[1::2]
+
+    rf = RandomForestClassifier(n_estimators=4, n_classes=3,
+                                distr_depth=2,
+                                n_split_points="auto",
+                                random_state=0, mmap=False,
+                                nested=True)
+
+    rf.fit(x_train, y_train)
+    y_pred = rf.predict(x_test).collect()
+    accuracy1 = np.count_nonzero(y_pred == y_test) / len(y_test)
+
+    rf = RandomForestClassifier(n_estimators=4, n_classes=3,
+                                distr_depth=2,
+                                n_split_points="sqrt",
+                                random_state=0, mmap=False,
+                                nested=True)
+
+    rf.fit(x_train, y_train)
+    y_pred = rf.predict(x_test).collect()
+    accuracy2 = np.count_nonzero(y_pred == y_test) / len(y_test)
+
+    rf = RandomForestClassifier(n_estimators=4, n_classes=3,
+                                distr_depth=2,
+                                n_split_points=0.2,
+                                random_state=0, mmap=False,
+                                nested=True)
+
+    rf.fit(x_train, y_train)
+    y_pred = rf.predict(x_test).collect()
+    accuracy3 = np.count_nonzero(y_pred == y_test) / len(y_test)
+
+    rf = RandomForestClassifier(n_estimators=4, n_classes=3,
+                                distr_depth=2,
+                                n_split_points="sqrt",
+                                random_state=0, mmap=False,
+                                nested=True)
+
+    rf.fit(x_train, y_train)
+    y_pred = rf.predict(x_test).collect()
+    accuracy2 = np.count_nonzero(y_pred == y_test) / len(y_test)
+
+    rf = RandomForestClassifier(n_estimators=4, n_classes=3,
+                                distr_depth=2,
+                                n_split_points=0.2,
+                                random_state=0, mmap=False,
+                                nested=True, hard_vote=True)
+
+    rf.fit(x_train, y_train)
+    y_pred = rf.predict(x_test).collect()
+    accuracy4 = np.count_nonzero(y_pred == y_test) / len(y_test)
+
+    return accuracy1 > 0.7 and accuracy2 > 0.7 and accuracy3 > 0.7 \
+        and accuracy4 > 0.7
+
+
+def test_make_classification_fit_predict():
+    """Tests RandomForestClassifier fit_predict with default params."""
+    x, y = make_classification(
+        n_samples=3000,
+        n_features=10,
+        n_classes=3,
+        n_informative=4,
+        n_redundant=2,
+        n_repeated=1,
+        n_clusters_per_class=2,
+        shuffle=True,
+        random_state=0,
+    )
+    x_train = ds.array(x[::2], (1000, 10))
+    y_train = ds.array(y[::2][:, np.newaxis], (1000, 1))
+
+    rf = RandomForestClassifier(n_classes=3, distr_depth=1,
+                                random_state=0, mmap=False,
+                                nested=True)
+
+    y_pred = rf.fit(x_train, y_train).predict(x_train).collect()
+    y_train = y_train.collect()
+    accuracy = np.count_nonzero(y_pred == y_train) / len(y_train)
+    return accuracy > 0.7
+
+
+def test_make_classification_sklearn_max_predict():
+    """Tests RandomForestClassifier predict with sklearn_max."""
+    x, y = make_classification(
+        n_samples=3000,
+        n_features=10,
+        n_classes=3,
+        n_informative=4,
+        n_redundant=2,
+        n_repeated=1,
+        n_clusters_per_class=2,
+        shuffle=True,
+        random_state=0,
+    )
+    x_train = ds.array(x[::2], (1000, 10))
+    y_train = ds.array(y[::2][:, np.newaxis], (1000, 1))
+    x_test = ds.array(x[1::2], (1000, 10))
+    y_test = y[1::2]
+
+    rf = RandomForestClassifier(n_classes=3, distr_depth=1,
+                                random_state=0, sklearn_max=10,
+                                mmap=False,
+                                nested=True)
+
+    rf.fit(x_train, y_train)
+    y_pred = rf.predict(x_test).collect()
+    accuracy = np.count_nonzero(y_pred == y_test) / len(y_test)
+    return accuracy > 0.7
+
+
+def test_make_classification_sklearn_max_predict_proba():
+    """Tests RandomForestClassifier predict_proba with sklearn_max."""
+    x, y = make_classification(
+        n_samples=3000,
+        n_features=10,
+        n_classes=3,
+        n_informative=4,
+        n_redundant=2,
+        n_repeated=1,
+        n_clusters_per_class=2,
+        shuffle=True,
+        random_state=0,
+    )
+    x_train = ds.array(x[::2], (1000, 10))
+    y_train = ds.array(y[::2][:, np.newaxis], (1000, 1))
+    x_test = ds.array(x[1::2], (1000, 10))
+    y_test = y[1::2]
+
+    rf = RandomForestClassifier(n_classes=3, distr_depth=1, random_state=0,
+                                sklearn_max=10, mmap=False,
+                                nested=True)
+
+    rf.fit(x_train, y_train)
+    probabilities = rf.predict_proba(x_test).collect()
+    rf.classes = np.arange(rf.n_classes)
+    y_pred = rf.classes[np.argmax(probabilities, axis=1)]
+    accuracy = np.count_nonzero(y_pred == y_test) / len(y_test)
+    return accuracy > 0.7
+
+
+def test_make_classification_hard_vote_predict():
+    """Tests RandomForestClassifier predict with hard_vote."""
+    x, y = make_classification(
+        n_samples=3000,
+        n_features=10,
+        n_classes=3,
+        n_informative=4,
+        n_redundant=2,
+        n_repeated=1,
+        n_clusters_per_class=2,
+        shuffle=True,
+        random_state=0,
+    )
+    x_train = ds.array(x[::2], (1000, 10))
+    y_train = ds.array(y[::2][:, np.newaxis], (1000, 1))
+    x_test = ds.array(x[1::2], (1000, 10))
+    y_test = y[1::2]
+
+    rf = RandomForestClassifier(
+        n_classes=3, distr_depth=1, random_state=0,
+        sklearn_max=10, hard_vote=True, mmap=False,
+        nested=True
+    )
+
+    rf.fit(x_train, y_train)
+    y_pred = rf.predict(x_test).collect()
+    accuracy = np.count_nonzero(y_pred == y_test) / len(y_test)
+    return accuracy > 0.7
+
+
+def test_make_classification_hard_vote_score_mix():
+    """Tests RandomForestClassifier score with hard_vote, sklearn_max,
+    distr_depth and max_depth."""
+    x, y = make_classification(
+        n_samples=3000,
+        n_features=10,
+        n_classes=3,
+        n_informative=4,
+        n_redundant=2,
+        n_repeated=1,
+        n_clusters_per_class=2,
+        shuffle=True,
+        random_state=0,
+    )
+    x_train = ds.array(x[::2], (1000, 10))
+    y_train = ds.array(y[::2][:, np.newaxis], (1000, 1))
+    x_test = ds.array(x[1::2], (1000, 10))
+    y_test = ds.array(y[1::2][:, np.newaxis], (1000, 1))
+
+    rf = RandomForestClassifier(
+        n_classes=3,
+        random_state=0,
+        sklearn_max=100,
+        distr_depth=1,
+        max_depth=12,
+        hard_vote=True,
+        mmap=False,
+        nested=True,
+    )
+
+    rf.fit(x_train, y_train)
+    accuracy = compss_wait_on(rf.score(x_test, y_test))
+    return accuracy > 0.7
+
+
+def test_score_on_iris():
+    """Tests RandomForestClassifier with a minimal example."""
+    x, y = datasets.load_iris(return_X_y=True)
+    ds_fit = ds.array(x[::2], block_size=(30, 2))
+    fit_y = ds.array(y[::2].reshape(-1, 1), block_size=(30, 1))
+    ds_validate = ds.array(x[1::2], block_size=(30, 2))
+    validate_y = ds.array(y[1::2].reshape(-1, 1), block_size=(30, 1))
+
+    rf = RandomForestClassifier(
+        n_classes=3, distr_depth=1,
+        n_estimators=1, max_depth=2, random_state=0,
+        mmap=False, nested=True
+    )
+    rf.fit(ds_fit, fit_y)
+    accuracy1 = rf.score(ds_validate, validate_y, True)
+    accuracy2 = rf.score(ds_validate, validate_y, False)
+    accuracy2 = compss_wait_on(accuracy2)
+
+    # Accuracy should be <= 2/3 for any seed, often exactly equal.
+    return accuracy1 > (2 / 3) and accuracy2 > (2 / 3)
+
+
+def test_save_load():
+    """
+    Tests that the save and load methods work properly with the three
+    expected formats and that an exception is raised when a non-supported
+    format is provided.
+    """
+    x, y = make_classification(
+        n_samples=3000,
+        n_features=10,
+        n_classes=3,
+        n_informative=4,
+        n_redundant=2,
+        n_repeated=1,
+        n_clusters_per_class=2,
+        shuffle=True,
+        random_state=0,
+    )
+    x_train = ds.array(x[::2], (1000, 10))
+    y_train = ds.array(y[::2][:, np.newaxis], (1000, 1))
+
+    rf = RandomForestClassifier(n_classes=3, distr_depth=1, random_state=0,
+                                n_estimators=5, mmap=False, nested=True)
+    rf.fit(x_train, y_train)
+    rf.save_model("./saved_model")
+
+    rf2 = RandomForestClassifier(n_classes=3, distr_depth=1,
+                                 random_state=0,
+                                 n_estimators=5, mmap=False, nested=True)
+    rf2.load_model("./saved_model")
+    y_pred = rf2.predict(x_train).collect()
+    y_train = y_train.collect()
+    accuracy = np.count_nonzero(y_pred == y_train) / len(y_train)
+    condition = accuracy > 0.7
+
+    rf.save_model("./saved_model", save_format="cbor")
+
+    rf2 = RandomForestClassifier(n_classes=3, distr_depth=1,
+                                 random_state=0,
+                                 n_estimators=5, mmap=False, nested=True)
+    rf2.load_model("./saved_model", load_format="cbor")
+
+    y_pred = rf2.predict(x_train).collect()
+    accuracy = np.count_nonzero(y_pred == y_train) / len(y_train)
+    condition = condition and accuracy > 0.7
+
+    rf.save_model("./saved_model", save_format="pickle")
+
+    rf2 = RandomForestClassifier(n_classes=3, distr_depth=1,
+                                 random_state=0,
+                                 n_estimators=5, mmap=False, nested=True)
+    rf2.load_model("./saved_model", load_format="pickle")
+    y_pred = rf2.predict(x_train).collect()
+    accuracy = np.count_nonzero(y_pred == y_train) / len(y_train)
+    condition = condition and accuracy > 0.7
+    condition_error = False
+    try:
+        rf.save_model("./saved_model", save_format="txt")
+    except ValueError:
+        condition_error = True
+    condition = condition and condition_error
+
+    condition_error = False
+    try:
+        rf2 = RandomForestClassifier(n_classes=3, distr_depth=1,
+                                     random_state=0,
+                                     n_estimators=5, mmap=False,
+                                     nested=True)
+        rf2.load_model("./saved_model", load_format="txt")
+    except ValueError:
+        condition_error = True
+    condition = condition and condition_error
+
+    rf = RandomForestClassifier(n_classes=3, distr_depth=1, random_state=0,
+                                n_estimators=1, mmap=False, nested=True)
+    x_train2 = ds.array(x[::2], (1000, 10))
+    y_train2 = ds.array(y[::2][:, np.newaxis], (1000, 1))
+    rf.fit(x_train2, y_train2)
+    rf.save_model("./saved_model", overwrite=False)
+
+    rf2 = RandomForestClassifier(n_classes=3, distr_depth=1,
+                                 random_state=0,
+                                 n_estimators=5, mmap=False, nested=True)
+    rf2.load_model("./saved_model", load_format="pickle")
+    y_pred = rf2.predict(x_train).collect()
+    accuracy = np.count_nonzero(y_pred == y_train) / len(y_train)
+    condition = condition and accuracy > 0.7
+
+    cbor2_module = utilmodel.cbor2
+    utilmodel.cbor2 = None
+    condition_error = False
+    try:
+        rf.save_model("./saved_model_error", save_format="cbor")
+    except ModuleNotFoundError:
+        condition_error = True
+    condition = condition and condition_error
+    condition_error = False
+    try:
+        rf2.load_model("./saved_model_error", load_format="cbor")
+    except ModuleNotFoundError:
+        condition_error = True
+    condition = condition and condition_error
+    utilmodel.cbor2 = cbor2_module
+    return condition
+
+
+class RFTest(BaseTimedTestCase):
+    def test_make_classification_score(self):
+        """Tests RandomForestClassifier fit and score with default params."""
+        x, y = make_classification(
+            n_samples=3000,
+            n_features=10,
+            n_classes=3,
+            n_informative=4,
+            n_redundant=2,
+            n_repeated=1,
+            n_clusters_per_class=2,
+            shuffle=True,
+            random_state=0,
+        )
+        x_train = ds.array(x[::2], (1000, 10))
+        y_train = ds.array(y[::2][:, np.newaxis], (1000, 1))
+        x_test = ds.array(x[1::2], (1000, 10))
+        y_test = ds.array(y[1::2][:, np.newaxis], (1000, 1))
+
+        rf = RandomForestClassifier(n_classes=3,
+                                    random_state=0, mmap=False,
+                                    nested=True)
+
+        rf.fit(x_train, y_train)
+        accuracy = compss_wait_on(rf.score(x_test, y_test))
+        self.assertGreater(accuracy, 0.7)
+
+    def test_make_classification_predict_and_distr_depth(self):
+        """Tests RandomForestClassifier fit and predict with a distr_depth."""
+        x, y = make_classification(
+            n_samples=3000,
+            n_features=10,
+            n_classes=3,
+            n_informative=4,
+            n_redundant=2,
+            n_repeated=1,
+            n_clusters_per_class=2,
+            shuffle=True,
+            random_state=0,
+        )
+        x_train = ds.array(x[::2], (1000, 10))
+        y_train = ds.array(y[::2][:, np.newaxis], (1000, 1))
+        x_test = ds.array(x[1::2], (1000, 10))
+        y_test = y[1::2]
+
+        rf = RandomForestClassifier(n_estimators=2, n_classes=3,
+                                    distr_depth=2,
+                                    n_split_points="auto",
+                                    random_state=0, mmap=False,
+                                    nested=True)
+
+        rf.fit(x_train, y_train)
+        y_pred = rf.predict(x_test).collect()
+        accuracy = np.count_nonzero(y_pred == y_test) / len(y_test)
+        self.assertGreater(accuracy, 0.7)
+
+        rf = RandomForestClassifier(n_estimators=2, n_classes=3,
+                                    distr_depth=2,
+                                    n_split_points="sqrt",
+                                    random_state=0, mmap=False,
+                                    nested=True)
+
+        rf.fit(x_train, y_train)
+        y_pred = rf.predict(x_test).collect()
+        accuracy = np.count_nonzero(y_pred == y_test) / len(y_test)
+        self.assertGreater(accuracy, 0.7)
+
+        rf = RandomForestClassifier(n_estimators=2, n_classes=3,
+                                    distr_depth=2,
+                                    n_split_points=0.2,
+                                    random_state=0, mmap=False,
+                                    nested=True)
+
+        rf.fit(x_train, y_train)
+        y_pred = rf.predict(x_test).collect()
+        accuracy = np.count_nonzero(y_pred == y_test) / len(y_test)
+        self.assertGreater(accuracy, 0.7)
+
+    def test_make_classification_fit_predict(self):
+        """Tests RandomForestClassifier fit_predict with default params."""
+        x, y = make_classification(
+            n_samples=3000,
+            n_features=10,
+            n_classes=3,
+            n_informative=4,
+            n_redundant=2,
+            n_repeated=1,
+            n_clusters_per_class=2,
+            shuffle=True,
+            random_state=0,
+        )
+        x_train = ds.array(x[::2], (1000, 10))
+        y_train = ds.array(y[::2][:, np.newaxis], (1000, 1))
+
+        rf = RandomForestClassifier(n_classes=3, distr_depth=1,
+                                    random_state=0, mmap=False,
+                                    nested=True)
+
+        y_pred = rf.fit(x_train, y_train).predict(x_train).collect()
+        y_train = y_train.collect()
+        accuracy = np.count_nonzero(y_pred == y_train) / len(y_train)
+        self.assertGreater(accuracy, 0.7)
+
+    def test_make_classification_sklearn_max_predict(self):
+        """Tests RandomForestClassifier predict with sklearn_max."""
+        x, y = make_classification(
+            n_samples=3000,
+            n_features=10,
+            n_classes=3,
+            n_informative=4,
+            n_redundant=2,
+            n_repeated=1,
+            n_clusters_per_class=2,
+            shuffle=True,
+            random_state=0,
+        )
+        x_train = ds.array(x[::2], (1000, 10))
+        y_train = ds.array(y[::2][:, np.newaxis], (1000, 1))
+        x_test = ds.array(x[1::2], (1000, 10))
+        y_test = y[1::2]
+
+        rf = RandomForestClassifier(n_classes=3, distr_depth=1,
+                                    random_state=0, sklearn_max=10,
+                                    mmap=False,
+                                    nested=True)
+
+        rf.fit(x_train, y_train)
+        y_pred = rf.predict(x_test).collect()
+        accuracy = np.count_nonzero(y_pred == y_test) / len(y_test)
+        self.assertGreater(accuracy, 0.7)
+
+    def test_make_classification_sklearn_max_predict_proba(self):
+        """Tests RandomForestClassifier predict_proba with sklearn_max."""
+        x, y = make_classification(
+            n_samples=3000,
+            n_features=10,
+            n_classes=3,
+            n_informative=4,
+            n_redundant=2,
+            n_repeated=1,
+            n_clusters_per_class=2,
+            shuffle=True,
+            random_state=0,
+        )
+        x_train = ds.array(x[::2], (1000, 10))
+        y_train = ds.array(y[::2][:, np.newaxis], (1000, 1))
+        x_test = ds.array(x[1::2], (1000, 10))
+        y_test = y[1::2]
+
+        rf = RandomForestClassifier(n_classes=3, distr_depth=1, random_state=0,
+                                    sklearn_max=10, mmap=False,
+                                    nested=True)
+
+        rf.fit(x_train, y_train)
+        probabilities = rf.predict_proba(x_test).collect()
+        rf.classes = np.arange(rf.n_classes)
+        y_pred = rf.classes[np.argmax(probabilities, axis=1)]
+        accuracy = np.count_nonzero(y_pred == y_test) / len(y_test)
+        self.assertGreater(accuracy, 0.7)
+
+    def test_make_classification_hard_vote_predict(self):
+        """Tests RandomForestClassifier predict with hard_vote."""
+        x, y = make_classification(
+            n_samples=3000,
+            n_features=10,
+            n_classes=3,
+            n_informative=4,
+            n_redundant=2,
+            n_repeated=1,
+            n_clusters_per_class=2,
+            shuffle=True,
+            random_state=0,
+        )
+        x_train = ds.array(x[::2], (1000, 10))
+        y_train = ds.array(y[::2][:, np.newaxis], (1000, 1))
+        x_test = ds.array(x[1::2], (1000, 10))
+        y_test = y[1::2]
+
+        rf = RandomForestClassifier(
+            n_classes=3, distr_depth=1, random_state=0,
+            sklearn_max=10, hard_vote=True, mmap=False,
+            nested=True
+        )
+
+        rf.fit(x_train, y_train)
+        y_pred = rf.predict(x_test).collect()
+        accuracy = np.count_nonzero(y_pred == y_test) / len(y_test)
+        self.assertGreater(accuracy, 0.7)
+
+    def test_make_classification_hard_vote_score_mix(self):
+        """Tests RandomForestClassifier score with hard_vote, sklearn_max,
+        distr_depth and max_depth."""
+        x, y = make_classification(
+            n_samples=3000,
+            n_features=10,
+            n_classes=3,
+            n_informative=4,
+            n_redundant=2,
+            n_repeated=1,
+            n_clusters_per_class=2,
+            shuffle=True,
+            random_state=0,
+        )
+        x_train = ds.array(x[::2], (1000, 10))
+        y_train = ds.array(y[::2][:, np.newaxis], (1000, 1))
+        x_test = ds.array(x[1::2], (1000, 10))
+        y_test = ds.array(y[1::2][:, np.newaxis], (1000, 1))
+
+        rf = RandomForestClassifier(
+            n_classes=3,
+            random_state=0,
+            sklearn_max=100,
+            distr_depth=1,
+            max_depth=12,
+            hard_vote=True,
+            mmap=False,
+            nested=True,
+        )
+
+        rf.fit(x_train, y_train)
+        accuracy = compss_wait_on(rf.score(x_test, y_test))
+        self.assertGreater(accuracy, 0.7)
+
+    @parameterized.expand([(True,), (False,)])
+    def test_score_on_iris(self, collect):
+        """Tests RandomForestClassifier with a minimal example."""
+        x, y = datasets.load_iris(return_X_y=True)
+        ds_fit = ds.array(x[::2], block_size=(30, 2))
+        fit_y = ds.array(y[::2].reshape(-1, 1), block_size=(30, 1))
+        ds_validate = ds.array(x[1::2], block_size=(30, 2))
+        validate_y = ds.array(y[1::2].reshape(-1, 1), block_size=(30, 1))
+
+        rf = RandomForestClassifier(
+            n_classes=3, distr_depth=1,
+            n_estimators=1, max_depth=2, random_state=0,
+            mmap=False, nested=True
+        )
+        rf.fit(ds_fit, fit_y)
+        accuracy = rf.score(ds_validate, validate_y, collect)
+        if not collect:
+            accuracy = compss_wait_on(accuracy)
+
+        # Accuracy should be <= 2/3 for any seed, often exactly equal.
+        self.assertGreater(accuracy, 2 / 3)
+
+    def test_save_load(self):
+        """
+        Tests that the save and load methods work properly with the three
+        expected formats and that an exception is raised when a non-supported
+        format is provided.
+        """
+        x, y = make_classification(
+            n_samples=3000,
+            n_features=10,
+            n_classes=3,
+            n_informative=4,
+            n_redundant=2,
+            n_repeated=1,
+            n_clusters_per_class=2,
+            shuffle=True,
+            random_state=0,
+        )
+        x_train = ds.array(x[::2], (1000, 10))
+        y_train = ds.array(y[::2][:, np.newaxis], (1000, 1))
+
+        rf = RandomForestClassifier(n_classes=3, distr_depth=1, random_state=0,
+                                    n_estimators=5, mmap=False, nested=True)
+        rf.fit(x_train, y_train)
+        rf.save_model("./saved_model")
+
+        rf2 = RandomForestClassifier(n_classes=3, distr_depth=1,
+                                     random_state=0,
+                                     n_estimators=5, mmap=False, nested=True)
+        rf2.load_model("./saved_model")
+        y_pred = rf2.predict(x_train).collect()
+        y_train = y_train.collect()
+        accuracy = np.count_nonzero(y_pred == y_train) / len(y_train)
+        self.assertGreater(accuracy, 0.7)
+
+        rf.save_model("./saved_model", save_format="cbor")
+
+        rf2 = RandomForestClassifier(n_classes=3, distr_depth=1,
+                                     random_state=0,
+                                     n_estimators=5, mmap=False, nested=True)
+        rf2.load_model("./saved_model", load_format="cbor")
+
+        y_pred = rf2.predict(x_train).collect()
+        accuracy = np.count_nonzero(y_pred == y_train) / len(y_train)
+        self.assertGreater(accuracy, 0.7)
+
+        rf.save_model("./saved_model", save_format="pickle")
+
+        rf2 = RandomForestClassifier(n_classes=3, distr_depth=1,
+                                     random_state=0,
+                                     n_estimators=5, mmap=False, nested=True)
+        rf2.load_model("./saved_model", load_format="pickle")
+        y_pred = rf2.predict(x_train).collect()
+        accuracy = np.count_nonzero(y_pred == y_train) / len(y_train)
+        self.assertGreater(accuracy, 0.7)
+
+        with self.assertRaises(ValueError):
+            rf.save_model("./saved_model", save_format="txt")
+
+        with self.assertRaises(ValueError):
+            rf2 = RandomForestClassifier(n_classes=3, distr_depth=1,
+                                         random_state=0,
+                                         n_estimators=5, mmap=False,
+                                         nested=True)
+            rf2.load_model("./saved_model", load_format="txt")
+
+        rf = RandomForestClassifier(n_classes=3, distr_depth=1, random_state=0,
+                                    n_estimators=1, mmap=False, nested=True)
+        x_train2 = ds.array(x[::2], (1000, 10))
+        y_train2 = ds.array(y[::2][:, np.newaxis], (1000, 1))
+        rf.fit(x_train2, y_train2)
+        rf.save_model("./saved_model", overwrite=False)
+
+        rf2 = RandomForestClassifier(n_classes=3, distr_depth=1,
+                                     random_state=0,
+                                     n_estimators=5, mmap=False, nested=True)
+        rf2.load_model("./saved_model", load_format="pickle")
+        y_pred = rf2.predict(x_train).collect()
+        accuracy = np.count_nonzero(y_pred == y_train) / len(y_train)
+        self.assertGreater(accuracy, 0.7)
+
+        cbor2_module = utilmodel.cbor2
+        utilmodel.cbor2 = None
+        with self.assertRaises(ModuleNotFoundError):
+            rf.save_model("./saved_model_error", save_format="cbor")
+        with self.assertRaises(ModuleNotFoundError):
+            rf2.load_model("./saved_model_error", load_format="cbor")
+        utilmodel.cbor2 = cbor2_module
+
+    def test_other_functions(self):
+        number_features = _resolve_try_features("sqrt", 9)
+        self.assertTrue(number_features == 3)
+        number_features = _resolve_try_features("third", 12)
+        self.assertTrue(number_features == 4)
+        number_features = _resolve_try_features(None, 12)
+        self.assertTrue(number_features == 12)
+        number_features = _resolve_try_features(2, 12)
+        self.assertTrue(number_features == 2)
+        number_features = _resolve_try_features(0.5, 12)
+        self.assertTrue(number_features == 6)
+
+
+@task()
+def main():
+    test = test_make_classification_score()
+    test2 = test_make_classification_predict_and_distr_depth()
+    test3 = test_make_classification_fit_predict()
+    test4 = test_make_classification_sklearn_max_predict()
+    test5 = test_make_classification_sklearn_max_predict_proba()
+    test = test and test2 and test3 and test4 and test5
+    test6 = test_make_classification_hard_vote_predict()
+    test7 = test_make_classification_hard_vote_score_mix()
+    test8 = test_score_on_iris()
+    test9 = test_save_load()
+    test = test and test6 and test7 and test8 and test9
+    if test:
+        print("Result tests: Passed", flush=True)
+    else:
+        print("Result tests: Failed", flush=True)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests_nesting/test_rf_regressor_nested.py b/tests_nesting/test_rf_regressor_nested.py
new file mode 100644
index 00000000..c7eba5ff
--- /dev/null
+++ b/tests_nesting/test_rf_regressor_nested.py
@@ -0,0 +1,434 @@
+import numpy as np
+from pycompss.api.api import compss_wait_on
+from sklearn.datasets import make_regression
+
+import dislib as ds
+from dislib.regression import RandomForestRegressor
+import dislib.data.util.model as utilmodel
+
+from tests import BaseTimedTestCase
+from pycompss.api.task import task
+from math import isclose
+
+
+def _determination_coefficient(y_true, y_pred):
+    u = np.sum(np.square(y_true - y_pred))
+    v = np.sum(np.square(y_true - np.mean(y_true)))
+    return 1 - u / v
+
+
+def test_make_regression():
+    """Tests RandomForestRegressor fit and score with default params."""
+    x, y = make_regression(
+        n_samples=12000,
+        n_features=40,
+        n_informative=4,
+        shuffle=True,
+        random_state=0,
+    )
+    x_train = ds.array(x[::2], (4000, 20))
+    y_train = ds.array(y[::2][:, np.newaxis], (4000, 1))
+    x_test = ds.array(x[1::2], (4000, 20))
+    y_test = ds.array(y[1::2][:, np.newaxis], (4000, 1))
+
+    rf = RandomForestRegressor(distr_depth=1, random_state=0,
+                               n_estimators=2, mmap=False,
+                               nested=True)
+
+    rf.fit(x_train, y_train)
+    accuracy1 = compss_wait_on(rf.score(x_test, y_test))
+
+    y_pred = rf.predict(x_test).collect()
+    y_true = y[1::2]
+    accuracy2 = _determination_coefficient(y_true, y_pred)
+
+    return accuracy1 > 0.5 and accuracy2 > 0.5 and \
+        isclose(accuracy1, accuracy2)
+
+
+def test_make_regression_predict_and_distr_depth():
+    """Tests RandomForestRegressor fit and predict with a distr_depth."""
+    x, y = make_regression(
+        n_samples=3000,
+        n_features=10,
+        n_informative=4,
+        shuffle=True,
+        random_state=0,
+    )
+    x_train = ds.array(x[::2], (1000, 10))
+    y_train = ds.array(y[::2][:, np.newaxis], (1000, 1))
+    x_test = ds.array(x[1::2], (1000, 10))
+    y_test = ds.array(y[1::2][:, np.newaxis], (1000, 1))
+
+    rf = RandomForestRegressor(distr_depth=1, random_state=0,
+                               n_estimators=2,
+                               mmap=False, nested=True)
+
+    rf.fit(x_train, y_train)
+    accuracy1 = compss_wait_on(rf.score(x_test, y_test))
+
+    y_pred = rf.predict(x_test).collect()
+    y_true = y[1::2]
+    accuracy2 = _determination_coefficient(y_true, y_pred)
+
+    return accuracy1 > 0.75 and accuracy2 > 0.75 and \
+        isclose(accuracy1, accuracy2)
+
+
+def test_make_regression_sklearn_max_predict():
+    """Tests RandomForestRegressor predict with sklearn_max."""
+    x, y = make_regression(
+        n_samples=3000,
+        n_features=10,
+        n_informative=4,
+        shuffle=True,
+        random_state=0,
+    )
+    x_train = ds.array(x[::2], (1000, 10))
+    y_train = ds.array(y[::2][:, np.newaxis], (1000, 1))
+    x_test = ds.array(x[1::2], (1000, 10))
+    y_test = ds.array(y[1::2][:, np.newaxis], (1000, 1))
+
+    rf = RandomForestRegressor(distr_depth=2, n_estimators=2,
+                               random_state=0, sklearn_max=10, mmap=False,
+                               nested=True)
+
+    rf.fit(x_train, y_train)
+    accuracy1 = compss_wait_on(rf.score(x_test, y_test))
+
+    y_pred = rf.predict(x_test).collect()
+    y_true = y[1::2]
+    accuracy2 = _determination_coefficient(y_true, y_pred)
+
+    return accuracy1 > 0.75 and accuracy2 > 0.75 and \
+        isclose(accuracy1, accuracy2)
+
+
+def test_save_load():
+    """Tests the save and the load methods of the RandomForestRegressor
+    class"""
+    x, y = make_regression(
+        n_samples=3000,
+        n_features=10,
+        n_informative=4,
+        shuffle=True,
+        random_state=0,
+    )
+    x_train = ds.array(x[::2], (1000, 10))
+    y_train = ds.array(y[::2][:, np.newaxis], (1000, 1))
+    x_test = ds.array(x[1::2], (1000, 10))
+    y_test = ds.array(y[1::2][:, np.newaxis], (1000, 1))
+
+    rf = RandomForestRegressor(distr_depth=1,
+                               random_state=0, n_estimators=2, mmap=False,
+                               nested=True)
+    rf.fit(x_train, y_train)
+    rf.save_model("./rf_regressor")
+
+    rf2 = RandomForestRegressor(distr_depth=1,
+                                random_state=0, n_estimators=2, mmap=False,
+                                nested=True)
+    rf2.load_model("./rf_regressor")
+
+    accuracy1 = compss_wait_on(rf.score(x_test, y_test))
+    accuracy_loaded1 = compss_wait_on(rf2.score(x_test, y_test))
+
+    y_pred = rf.predict(x_test).collect()
+    y_pred_loaded = rf2.predict(x_test).collect()
+    y_true = y[1::2]
+    accuracy2 = _determination_coefficient(y_true, y_pred)
+    accuracy_loaded2 = _determination_coefficient(y_true, y_pred_loaded)
+
+    condition = accuracy1 == accuracy_loaded1
+    condition = condition and accuracy2 == accuracy_loaded2
+
+    rf.save_model("./rf_regressor", save_format="cbor")
+
+    rf2 = RandomForestRegressor(distr_depth=1,
+                                random_state=0, n_estimators=2, mmap=False,
+                                nested=True)
+    rf2.load_model("./rf_regressor", load_format="cbor")
+
+    accuracy_loaded1 = compss_wait_on(rf2.score(x_test, y_test))
+
+    y_pred_loaded = rf2.predict(x_test).collect()
+    accuracy_loaded2 = _determination_coefficient(y_true, y_pred_loaded)
+
+    condition = condition and accuracy1 == accuracy_loaded1
+    condition = condition and accuracy2 == accuracy_loaded2
+
+    rf.save_model("./rf_regressor", save_format="pickle")
+
+    rf2 = RandomForestRegressor(distr_depth=1,
+                                random_state=0, n_estimators=2, mmap=False,
+                                nested=True)
+    rf2.load_model("./rf_regressor", load_format="pickle")
+
+    accuracy_loaded1 = compss_wait_on(rf2.score(x_test, y_test))
+
+    y_pred_loaded = rf2.predict(x_test).collect()
+    accuracy_loaded2 = _determination_coefficient(y_true, y_pred_loaded)
+
+    condition = condition and accuracy1 == accuracy_loaded1
+    condition = condition and accuracy2 == accuracy_loaded2
+
+    try:
+        rf.save_model("./rf_regressor", save_format="txt")
+    except ValueError:
+        condition_error = True
+    condition = condition and condition_error
+
+    try:
+        rf2 = RandomForestRegressor(distr_depth=1,
+                                    random_state=0, n_estimators=2,
+                                    mmap=False, nested=True)
+        rf2.load_model("./rf_regressor", load_format="txt")
+    except ValueError:
+        condition_error = True
+    condition = condition and condition_error
+
+    rf1 = RandomForestRegressor(distr_depth=1,
+                                random_state=0, n_estimators=1,
+                                mmap=False, nested=True)
+    rf1.save_model("./rf_regressor", overwrite=False)
+
+    rf2 = RandomForestRegressor(distr_depth=1,
+                                random_state=0, n_estimators=2,
+                                mmap=False, nested=True)
+    rf2.load_model("./rf_regressor", load_format="pickle")
+
+    accuracy_loaded1 = compss_wait_on(rf2.score(x_test, y_test))
+
+    y_pred_loaded = rf2.predict(x_test).collect()
+    accuracy_loaded2 = _determination_coefficient(y_true, y_pred_loaded)
+
+    condition = condition and accuracy1 == accuracy_loaded1
+    condition = condition and accuracy2 == accuracy_loaded2
+
+    cbor2_module = utilmodel.cbor2
+    utilmodel.cbor2 = None
+    try:
+        rf.save_model("./rf_regressor", save_format="cbor")
+    except ModuleNotFoundError:
+        condition_error = True
+    condition = condition and condition_error
+    try:
+        rf2.load_model("./rf_regressor", load_format="cbor")
+    except ModuleNotFoundError:
+        condition_error = True
+    condition = condition and condition_error
+    utilmodel.cbor2 = cbor2_module
+    return condition
+
+
+class RandomForestRegressorTest(BaseTimedTestCase):
+    def test_make_regression(self):
+        """Tests RandomForestRegressor fit and score with default params."""
+        x, y = make_regression(
+            n_samples=12000,
+            n_features=40,
+            n_informative=4,
+            shuffle=True,
+            random_state=0,
+        )
+        x_train = ds.array(x[::2], (4000, 20))
+        y_train = ds.array(y[::2][:, np.newaxis], (4000, 1))
+        x_test = ds.array(x[1::2], (4000, 20))
+        y_test = ds.array(y[1::2][:, np.newaxis], (4000, 1))
+
+        rf = RandomForestRegressor(distr_depth=2, random_state=0,
+                                   n_estimators=2, mmap=False, nested=True)
+
+        rf.fit(x_train, y_train)
+        accuracy1 = compss_wait_on(rf.score(x_test, y_test))
+
+        y_pred = rf.predict(x_test).collect()
+        y_true = y[1::2]
+        accuracy2 = _determination_coefficient(y_true, y_pred)
+
+        self.assertGreater(accuracy1, 0.50)
+        self.assertGreater(accuracy2, 0.50)
+        self.assertAlmostEqual(accuracy1, accuracy2)
+
+    def test_make_regression_predict_and_distr_depth(self):
+        """Tests RandomForestRegressor fit and predict with a distr_depth."""
+        x, y = make_regression(
+            n_samples=3000,
+            n_features=10,
+            n_informative=4,
+            shuffle=True,
+            random_state=0,
+        )
+        x_train = ds.array(x[::2], (1000, 10))
+        y_train = ds.array(y[::2][:, np.newaxis], (1000, 1))
+        x_test = ds.array(x[1::2], (1000, 10))
+        y_test = ds.array(y[1::2][:, np.newaxis], (1000, 1))
+
+        rf = RandomForestRegressor(distr_depth=1, random_state=0,
+                                   n_estimators=2,
+                                   mmap=False, nested=True)
+
+        rf.fit(x_train, y_train)
+        accuracy1 = compss_wait_on(rf.score(x_test, y_test))
+
+        y_pred = rf.predict(x_test).collect()
+        y_true = y[1::2]
+        accuracy2 = _determination_coefficient(y_true, y_pred)
+
+        self.assertGreater(accuracy1, 0.6)
+        self.assertGreater(accuracy2, 0.6)
+        self.assertAlmostEqual(accuracy1, accuracy2)
+
+    def test_make_regression_sklearn_max_predict(self):
+        """Tests RandomForestRegressor predict with sklearn_max."""
+        x, y = make_regression(
+            n_samples=3000,
+            n_features=10,
+            n_informative=4,
+            shuffle=True,
+            random_state=0,
+        )
+        x_train = ds.array(x[::2], (1000, 10))
+        y_train = ds.array(y[::2][:, np.newaxis], (1000, 1))
+        x_test = ds.array(x[1::2], (1000, 10))
+        y_test = ds.array(y[1::2][:, np.newaxis], (1000, 1))
+
+        rf = RandomForestRegressor(distr_depth=1, n_estimators=2,
+                                   random_state=0, sklearn_max=10, mmap=False,
+                                   nested=True)
+
+        rf.fit(x_train, y_train)
+        accuracy1 = compss_wait_on(rf.score(x_test, y_test))
+
+        y_pred = rf.predict(x_test).collect()
+        y_true = y[1::2]
+        accuracy2 = _determination_coefficient(y_true, y_pred)
+
+        self.assertGreater(accuracy1, 0.75)
+        self.assertGreater(accuracy2, 0.75)
+        self.assertAlmostEqual(accuracy1, accuracy2)
+
+    def test_save_load(self):
+        """Tests the save and the load methods of the RandomForestRegressor
+        class"""
+        x, y = make_regression(
+            n_samples=3000,
+            n_features=10,
+            n_informative=4,
+            shuffle=True,
+            random_state=0,
+        )
+        x_train = ds.array(x[::2], (1000, 10))
+        y_train = ds.array(y[::2][:, np.newaxis], (1000, 1))
+        x_test = ds.array(x[1::2], (1000, 10))
+        y_test = ds.array(y[1::2][:, np.newaxis], (1000, 1))
+
+        rf = RandomForestRegressor(distr_depth=1,
+                                   random_state=0, n_estimators=2, mmap=False,
+                                   nested=True)
+        rf.fit(x_train, y_train)
+        rf.save_model("./rf_regressor")
+
+        rf2 = RandomForestRegressor(distr_depth=1,
+                                    random_state=0, n_estimators=2, mmap=False,
+                                    nested=True)
+        rf2.load_model("./rf_regressor")
+
+        accuracy1 = compss_wait_on(rf.score(x_test, y_test))
+        accuracy_loaded1 = compss_wait_on(rf2.score(x_test, y_test))
+
+        y_pred = rf.predict(x_test).collect()
+        y_pred_loaded = rf2.predict(x_test).collect()
+        y_true = y[1::2]
+        accuracy2 = _determination_coefficient(y_true, y_pred)
+        accuracy_loaded2 = _determination_coefficient(y_true, y_pred_loaded)
+
+        self.assertEqual(accuracy1, accuracy_loaded1)
+        self.assertEqual(accuracy2, accuracy_loaded2)
+
+        rf.save_model("./rf_regressor", save_format="cbor")
+
+        rf2 = RandomForestRegressor(distr_depth=1,
+                                    random_state=0, n_estimators=2, mmap=False,
+                                    nested=True)
+        rf2.load_model("./rf_regressor", load_format="cbor")
+
+        accuracy_loaded1 = compss_wait_on(rf2.score(x_test, y_test))
+
+        y_pred_loaded = rf2.predict(x_test).collect()
+        accuracy_loaded2 = _determination_coefficient(y_true, y_pred_loaded)
+
+        self.assertEqual(accuracy1, accuracy_loaded1)
+        self.assertEqual(accuracy2, accuracy_loaded2)
+
+        rf.save_model("./rf_regressor", save_format="pickle")
+
+        rf2 = RandomForestRegressor(distr_depth=1,
+                                    random_state=0, n_estimators=2, mmap=False,
+                                    nested=True)
+        rf2.load_model("./rf_regressor", load_format="pickle")
+
+        accuracy_loaded1 = compss_wait_on(rf2.score(x_test, y_test))
+
+        y_pred_loaded = rf2.predict(x_test).collect()
+        accuracy_loaded2 = _determination_coefficient(y_true, y_pred_loaded)
+
+        self.assertEqual(accuracy1, accuracy_loaded1)
+        self.assertEqual(accuracy2, accuracy_loaded2)
+
+        with self.assertRaises(ValueError):
+            rf.save_model("./rf_regressor", save_format="txt")
+
+        with self.assertRaises(ValueError):
+            rf2 = RandomForestRegressor(distr_depth=1,
+                                        random_state=0, n_estimators=2,
+                                        mmap=False, nested=True)
+            rf2.load_model("./rf_regressor", load_format="txt")
+
+        rf1 = RandomForestRegressor(distr_depth=1,
+                                    random_state=0, n_estimators=1,
+                                    mmap=False, nested=True)
+        rf1.save_model("./rf_regressor", overwrite=False)
+
+        rf2 = RandomForestRegressor(distr_depth=1,
+                                    random_state=0, n_estimators=2,
+                                    mmap=False, nested=True)
+        rf2.load_model("./rf_regressor", load_format="pickle")
+
+        accuracy_loaded1 = compss_wait_on(rf2.score(x_test, y_test))
+
+        y_pred_loaded = rf2.predict(x_test).collect()
+        accuracy_loaded2 = _determination_coefficient(y_true, y_pred_loaded)
+
+        self.assertEqual(accuracy1, accuracy_loaded1)
+        self.assertEqual(accuracy2, accuracy_loaded2)
+
+        cbor2_module = utilmodel.cbor2
+        utilmodel.cbor2 = None
+        with self.assertRaises(ModuleNotFoundError):
+            rf.save_model("./rf_regressor", save_format="cbor")
+        with self.assertRaises(ModuleNotFoundError):
+            rf2.load_model("./rf_regressor", load_format="cbor")
+        utilmodel.cbor2 = cbor2_module
+
+
+@task()
+def main():
+    test = test_make_regression()
+    test2 = test_make_regression_predict_and_distr_depth()
+    test3 = test_make_regression_sklearn_max_predict()
+    test4 = test_save_load()
+    print("TEST", flush=True)
+    print(test)
+    print(test2)
+    print(test3)
+    print(test4, flush=True)
+    test = test and test2 and test3 and test4
+    if test:
+        print("Result tests: Passed", flush=True)
+    else:
+        print("Result tests: Failed", flush=True)
+
+
+if __name__ == "__main__":
+    main()