From 80b8b4d0ca79f3c4237e5dcdd4de21d3a74b4b25 Mon Sep 17 00:00:00 2001
From: Junbo Chen <junbochen2000@gmail.com>
Date: Mon, 8 Jun 2015 15:52:22 +0800
Subject: [PATCH 1/4] add MinMaxScaling as a preprocessor

---
 pylearn2/datasets/preprocessing.py | 73 ++++++++++++++++++++++++++----
 1 file changed, 64 insertions(+), 9 deletions(-)

diff --git a/pylearn2/datasets/preprocessing.py b/pylearn2/datasets/preprocessing.py
index 8676d7c842..8806b9a6c4 100644
--- a/pylearn2/datasets/preprocessing.py
+++ b/pylearn2/datasets/preprocessing.py
@@ -215,10 +215,10 @@ def apply(self, dataset, can_fit=False):
         X = dataset.get_topological_view()
         num_topological_dimensions = len(X.shape) - 2
         if num_topological_dimensions != len(self.patch_shape):
-            raise ValueError("ExtractGridPatches with "
-                             + str(len(self.patch_shape))
-                             + " topological dimensions called on"
-                             + " dataset with " +
+            raise ValueError("ExtractGridPatches with " +
+                             str(len(self.patch_shape)) +
+                             " topological dimensions called on" +
+                             " dataset with " +
                              str(num_topological_dimensions) + ".")
         num_patches = X.shape[0]
         max_strides = [X.shape[0] - 1]
@@ -414,11 +414,11 @@ def apply(self, dataset, can_fit=False):
         num_topological_dimensions = len(X.shape) - 2
 
         if num_topological_dimensions != len(self.patch_shape):
-            raise ValueError("ExtractPatches with "
-                             + str(len(self.patch_shape))
-                             + " topological dimensions called on "
-                             + "dataset with "
-                             + str(num_topological_dimensions) + ".")
+            raise ValueError("ExtractPatches with " +
+                             str(len(self.patch_shape)) +
+                             " topological dimensions called on " +
+                             "dataset with " +
+                             str(num_topological_dimensions) + ".")
 
         # batch size
         output_shape = [self.num_patches]
@@ -1913,3 +1913,58 @@ def apply(self, dataset, can_fit=False):
         dataset.X = X[start:stop, :]
         if y is not None:
             dataset.y = y[start:stop, :]
+
+
+class MinMaxScaling(ExamplewisePreprocessor):
+    """
+    Subtracts the min and divides by the |max - min|.
+
+    Parameters
+    ----------
+    global_mean : bool, optional
+        If `True`, subtract the (scalar) min over every element
+        in the design matrix. If `False`, subtract the min from
+        each column (feature) separately. Default is `False`.
+    global_std : bool, optional
+        If `True`, after centering, divide by the (scalar)
+        |max - min| of every element in the design matrix. If `False`,
+        divide by the column-wise (per-feature) |max - min|.
+        Default is `False`.
+    mm_eps : float, optional
+        Stabilization factor added to the |max - min| before
+        dividing, to prevent |max - min| very close to zero
+        from causing the feature values to blow up too much.
+        Default is `1e-4`.
+    """
+
+    def __init__(self, global_min=False, global_max=False, mm_eps=1e-4):
+        self._global_min = global_min
+        self._global_max = global_max
+        self._mm_eps = mm_eps
+        self._min = None
+        self._max = None
+
+    def apply(self, dataset, can_fit=False):
+        """
+        :math:`WMAPE = \frac{\sum_i|R_i - P_i|}{\sum_i |R_i|}`
+        """
+        X = dataset.get_design_matrix()
+        if can_fit:
+            self._min = X.min() if self._global_min else X.min(axis=0)
+            self._max = X.max() if self._global_max else X.max(axis=0)
+        else:
+            if self._min is None or self._max is None:
+                raise ValueError("can_fit is False, but Normalization object "
+                                 "has no stored min or max")
+        new = (X - self._min) / (self._mm_eps +
+                                 numpy.abs(self._max - self._min))
+        dataset.set_design_matrix(new)
+
+    def as_block(self):
+        if self._min is None or self._max is None:
+            raise ValueError("can't convert %s to block without fitting"
+                             % self.__class__.__name__)
+        return ExamplewiseAddScaleTransform(
+                    add=-self._min,
+                    multiply=numpy.abs(self._max - self._min) ** -1
+                )

From f6036d0de7fbb4e051bf85891b7dd7d7f6f763ba Mon Sep 17 00:00:00 2001
From: Junbo Chen <junbochen2000@gmail.com>
Date: Mon, 8 Jun 2015 15:59:34 +0800
Subject: [PATCH 2/4] add math formula for apply() method

---
 pylearn2/datasets/preprocessing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pylearn2/datasets/preprocessing.py b/pylearn2/datasets/preprocessing.py
index 8806b9a6c4..5a2a6cdcc6 100644
--- a/pylearn2/datasets/preprocessing.py
+++ b/pylearn2/datasets/preprocessing.py
@@ -1946,7 +1946,7 @@ def __init__(self, global_min=False, global_max=False, mm_eps=1e-4):
 
     def apply(self, dataset, can_fit=False):
         """
-        :math:`WMAPE = \frac{\sum_i|R_i - P_i|}{\sum_i |R_i|}`
+        :math:`\hat{x} = \frac{x - min(x)}{|max(x) - min(x)|}`
         """
         X = dataset.get_design_matrix()
         if can_fit:

From b391253f53d011ce1bb8c01827a3b031041ce35c Mon Sep 17 00:00:00 2001
From: Junbo Chen <junbochen2000@gmail.com>
Date: Mon, 8 Jun 2015 17:34:27 +0800
Subject: [PATCH 3/4] add math formula for apply() method

---
 pylearn2/datasets/preprocessing.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/pylearn2/datasets/preprocessing.py b/pylearn2/datasets/preprocessing.py
index 5a2a6cdcc6..d02139de57 100644
--- a/pylearn2/datasets/preprocessing.py
+++ b/pylearn2/datasets/preprocessing.py
@@ -1946,7 +1946,7 @@ def __init__(self, global_min=False, global_max=False, mm_eps=1e-4):
 
     def apply(self, dataset, can_fit=False):
         """
-        :math:`\hat{x} = \frac{x - min(x)}{|max(x) - min(x)|}`
+        :math:`\hat{x} = \frac{x - min(x)}{\mid max(x) - min(x) \mid}`
         """
         X = dataset.get_design_matrix()
         if can_fit:
@@ -1965,6 +1965,5 @@ def as_block(self):
             raise ValueError("can't convert %s to block without fitting"
                              % self.__class__.__name__)
         return ExamplewiseAddScaleTransform(
-                    add=-self._min,
-                    multiply=numpy.abs(self._max - self._min) ** -1
-                )
+            add=-self._min,
+            multiply=numpy.abs(self._max - self._min) ** -1)

From 70594ad7ee1c48e07d58e493bcd0f363b78ec5ef Mon Sep 17 00:00:00 2001
From: Junbo Chen <junbochen2000@gmail.com>
Date: Mon, 8 Jun 2015 20:39:20 +0800
Subject: [PATCH 4/4] add math formula for apply() method

---
 pylearn2/datasets/preprocessing.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pylearn2/datasets/preprocessing.py b/pylearn2/datasets/preprocessing.py
index d02139de57..088c01e974 100644
--- a/pylearn2/datasets/preprocessing.py
+++ b/pylearn2/datasets/preprocessing.py
@@ -1917,7 +1917,7 @@ def apply(self, dataset, can_fit=False):
 
 class MinMaxScaling(ExamplewisePreprocessor):
     """
-    Subtracts the min and divides by the |max - min|.
+    Subtracts the min and divides by the \|max - min\|.
 
     Parameters
     ----------
@@ -1927,12 +1927,12 @@ class MinMaxScaling(ExamplewisePreprocessor):
         each column (feature) separately. Default is `False`.
     global_std : bool, optional
         If `True`, after centering, divide by the (scalar)
-        |max - min| of every element in the design matrix. If `False`,
-        divide by the column-wise (per-feature) |max - min|.
+        \|max - min\| of every element in the design matrix. If `False`,
+        divide by the column-wise (per-feature) \|max - min\|.
         Default is `False`.
     mm_eps : float, optional
-        Stabilization factor added to the |max - min| before
-        dividing, to prevent |max - min| very close to zero
+        Stabilization factor added to the \|max - min\| before
+        dividing, to prevent \|max - min\| very close to zero
         from causing the feature values to blow up too much.
         Default is `1e-4`.
     """