From 80b8b4d0ca79f3c4237e5dcdd4de21d3a74b4b25 Mon Sep 17 00:00:00 2001 From: Junbo Chen Date: Mon, 8 Jun 2015 15:52:22 +0800 Subject: [PATCH 1/4] add MinMaxScaling as a preprocessor --- pylearn2/datasets/preprocessing.py | 73 ++++++++++++++++++++++++++---- 1 file changed, 64 insertions(+), 9 deletions(-) diff --git a/pylearn2/datasets/preprocessing.py b/pylearn2/datasets/preprocessing.py index 8676d7c842..8806b9a6c4 100644 --- a/pylearn2/datasets/preprocessing.py +++ b/pylearn2/datasets/preprocessing.py @@ -215,10 +215,10 @@ def apply(self, dataset, can_fit=False): X = dataset.get_topological_view() num_topological_dimensions = len(X.shape) - 2 if num_topological_dimensions != len(self.patch_shape): - raise ValueError("ExtractGridPatches with " - + str(len(self.patch_shape)) - + " topological dimensions called on" - + " dataset with " + + raise ValueError("ExtractGridPatches with " + + str(len(self.patch_shape)) + + " topological dimensions called on" + + " dataset with " + str(num_topological_dimensions) + ".") num_patches = X.shape[0] max_strides = [X.shape[0] - 1] @@ -414,11 +414,11 @@ def apply(self, dataset, can_fit=False): num_topological_dimensions = len(X.shape) - 2 if num_topological_dimensions != len(self.patch_shape): - raise ValueError("ExtractPatches with " - + str(len(self.patch_shape)) - + " topological dimensions called on " - + "dataset with " - + str(num_topological_dimensions) + ".") + raise ValueError("ExtractPatches with " + + str(len(self.patch_shape)) + + " topological dimensions called on " + + "dataset with " + + str(num_topological_dimensions) + ".") # batch size output_shape = [self.num_patches] @@ -1913,3 +1913,58 @@ def apply(self, dataset, can_fit=False): dataset.X = X[start:stop, :] if y is not None: dataset.y = y[start:stop, :] + + +class MinMaxScaling(ExamplewisePreprocessor): + """ + Subtracts the min and divides by the |max - min|. + + Parameters + ---------- + global_mean : bool, optional + If `True`, subtract the (scalar) min over every element + in the design matrix. If `False`, subtract the min from + each column (feature) separately. Default is `False`. + global_std : bool, optional + If `True`, after centering, divide by the (scalar) + |max - min| of every element in the design matrix. If `False`, + divide by the column-wise (per-feature) |max - min|. + Default is `False`. + mm_eps : float, optional + Stabilization factor added to the |max - min| before + dividing, to prevent |max - min| very close to zero + from causing the feature values to blow up too much. + Default is `1e-4`. + """ + + def __init__(self, global_min=False, global_max=False, mm_eps=1e-4): + self._global_min = global_min + self._global_max = global_max + self._mm_eps = mm_eps + self._min = None + self._max = None + + def apply(self, dataset, can_fit=False): + """ + :math:`WMAPE = \frac{\sum_i|R_i - P_i|}{\sum_i |R_i|}` + """ + X = dataset.get_design_matrix() + if can_fit: + self._min = X.min() if self._global_min else X.min(axis=0) + self._max = X.max() if self._global_max else X.max(axis=0) + else: + if self._min is None or self._max is None: + raise ValueError("can_fit is False, but Normalization object " + "has no stored min or max") + new = (X - self._min) / (self._mm_eps + + numpy.abs(self._max - self._min)) + dataset.set_design_matrix(new) + + def as_block(self): + if self._min is None or self._max is None: + raise ValueError("can't convert %s to block without fitting" + % self.__class__.__name__) + return ExamplewiseAddScaleTransform( + add=-self._min, + multiply=numpy.abs(self._max - self._min) ** -1 + ) From f6036d0de7fbb4e051bf85891b7dd7d7f6f763ba Mon Sep 17 00:00:00 2001 From: Junbo Chen Date: Mon, 8 Jun 2015 15:59:34 +0800 Subject: [PATCH 2/4] add math formula for apply() method --- pylearn2/datasets/preprocessing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pylearn2/datasets/preprocessing.py b/pylearn2/datasets/preprocessing.py index 8806b9a6c4..5a2a6cdcc6 100644 --- a/pylearn2/datasets/preprocessing.py +++ b/pylearn2/datasets/preprocessing.py @@ -1946,7 +1946,7 @@ def __init__(self, global_min=False, global_max=False, mm_eps=1e-4): def apply(self, dataset, can_fit=False): """ - :math:`WMAPE = \frac{\sum_i|R_i - P_i|}{\sum_i |R_i|}` + :math:`\hat{x} = \frac{x - min(x)}{|max(x) - min(x)|}` """ X = dataset.get_design_matrix() if can_fit: From b391253f53d011ce1bb8c01827a3b031041ce35c Mon Sep 17 00:00:00 2001 From: Junbo Chen Date: Mon, 8 Jun 2015 17:34:27 +0800 Subject: [PATCH 3/4] add math formula for apply() method --- pylearn2/datasets/preprocessing.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pylearn2/datasets/preprocessing.py b/pylearn2/datasets/preprocessing.py index 5a2a6cdcc6..d02139de57 100644 --- a/pylearn2/datasets/preprocessing.py +++ b/pylearn2/datasets/preprocessing.py @@ -1946,7 +1946,7 @@ def __init__(self, global_min=False, global_max=False, mm_eps=1e-4): def apply(self, dataset, can_fit=False): """ - :math:`\hat{x} = \frac{x - min(x)}{|max(x) - min(x)|}` + :math:`\hat{x} = \frac{x - min(x)}{\mid max(x) - min(x) \mid}` """ X = dataset.get_design_matrix() if can_fit: @@ -1965,6 +1965,5 @@ def as_block(self): raise ValueError("can't convert %s to block without fitting" % self.__class__.__name__) return ExamplewiseAddScaleTransform( - add=-self._min, - multiply=numpy.abs(self._max - self._min) ** -1 - ) + add=-self._min, + multiply=numpy.abs(self._max - self._min) ** -1) From 70594ad7ee1c48e07d58e493bcd0f363b78ec5ef Mon Sep 17 00:00:00 2001 From: Junbo Chen Date: Mon, 8 Jun 2015 20:39:20 +0800 Subject: [PATCH 4/4] add math formula for apply() method --- pylearn2/datasets/preprocessing.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pylearn2/datasets/preprocessing.py b/pylearn2/datasets/preprocessing.py index d02139de57..088c01e974 100644 --- a/pylearn2/datasets/preprocessing.py +++ b/pylearn2/datasets/preprocessing.py @@ -1917,7 +1917,7 @@ def apply(self, dataset, can_fit=False): class MinMaxScaling(ExamplewisePreprocessor): """ - Subtracts the min and divides by the |max - min|. + Subtracts the min and divides by the \|max - min\|. Parameters ---------- @@ -1927,12 +1927,12 @@ class MinMaxScaling(ExamplewisePreprocessor): each column (feature) separately. Default is `False`. global_std : bool, optional If `True`, after centering, divide by the (scalar) - |max - min| of every element in the design matrix. If `False`, - divide by the column-wise (per-feature) |max - min|. + \|max - min\| of every element in the design matrix. If `False`, + divide by the column-wise (per-feature) \|max - min\|. Default is `False`. mm_eps : float, optional - Stabilization factor added to the |max - min| before - dividing, to prevent |max - min| very close to zero + Stabilization factor added to the \|max - min\| before + dividing, to prevent \|max - min\| very close to zero from causing the feature values to blow up too much. Default is `1e-4`. """