huggingface · yonigozlan · Feb 27, 2025 · Feb 27, 2025
diff --git a/src/transformers/image_processing_utils_fast.py b/src/transformers/image_processing_utils_fast.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import inspect
 from functools import lru_cache, partial
 from typing import Any, Dict, Iterable, List, Optional, Tuple, TypedDict, Union
 
@@ -149,8 +150,6 @@ class DefaultFastImageProcessorPreprocessKwargs(DefaultFastImageProcessorInitKwa
 
 
 BASE_IMAGE_PROCESSOR_FAST_DOCSTRING = r"""
-
-    Args:
         do_resize (`bool`, *optional*, defaults to `self.do_resize`):
             Whether to resize the image's (height, width) dimensions to the specified `size`. Can be overridden by the
             `do_resize` parameter in the `preprocess` method.
@@ -189,9 +188,6 @@ class DefaultFastImageProcessorPreprocessKwargs(DefaultFastImageProcessorInitKwa
             Whether to convert the image to RGB."""
 
 BASE_IMAGE_PROCESSOR_FAST_DOCSTRING_PREPROCESS = r"""
-    Preprocess an image or batch of images.
-
-    Args:
         images (`ImageInput`):
             Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
             passing in images with pixel values between 0 and 1, set `do_rescale=False`.
@@ -236,6 +232,134 @@ class DefaultFastImageProcessorPreprocessKwargs(DefaultFastImageProcessorInitKwa
             The device to process the images on. If unset, the device is inferred from the input images."""
 
 
+def get_kwargs_dict_from_docstring(docstring, obj):
+    lines = docstring.split("\n")
+    i = 0
+    indent_kwarg = " " * 8
+    indent_desc = " " * 12
+    current_kwarg = None
+    kwargs_dict = {}
+    while i < len(lines):
+        line = lines[i]
+        if line.startswith(indent_desc) and line.strip() != "":
+            kwargs_dict[current_kwarg]["desc"].append(line.strip())
+        elif line.startswith(indent_kwarg) and line.strip() != "":
+            current_kwarg = line.strip().split(" ")[0]
+            header = " ".join(line.strip().split(" ")[1:])
+
+            default = header.split("defaults to ")[-1][:-2]
+            default_name = default.strip("`").split("self.")[-1]
+            if hasattr(obj, default_name):
+                header_replaced = header.replace(default, f"`{repr(getattr(obj, default_name))}`")
+            else:
+                header_replaced = header
+
+            kwargs_dict[current_kwarg] = {
+                "header_replaced": header_replaced,
+                "header": header,  # also keep the original header for the preprocess method
+                "desc": [],
+            }
+        elif line.strip() != "":
+            raise ValueError(f"Unexpected line in docstring: {line}. Check if the indentation is correct.")
+
+        i += 1
+    return kwargs_dict
+
+
+def customize_docstrings(
+    init_header: str, custom_docstring: str = "", custom_preprocess_docstring: Optional[str] = None
+):
+    """
+    Decorator to customize the docstrings of the `__init__` and `preprocess` methods of a class.
+
+    Args:
+        init_header (`str`):
+            The header to use for the `__init__` method docstring.
+        custom_docstring (`str`, *optional*, defaults to `""`):
+            The custom docstring to use for the `__init__` method. One can provide only the docs for the arguments that
+            need to be customized. The rest of the docstring is generated from the default docstring.
+        custom_preprocess_docstring (`str`, *optional*):
+            The custom docstring to use for the `preprocess` method. If not provided, the `custom_docstring` is used
+            for the `preprocess` method as well.
+    """
+
+    def docstring_decorator(obj):
+        # Parse the base docstring, replace the defaults with the actual values from the object (for the init method)
+        base_init_kwargs = get_kwargs_dict_from_docstring(BASE_IMAGE_PROCESSOR_FAST_DOCSTRING, obj)
+        base_preprocess_kwargs = get_kwargs_dict_from_docstring(BASE_IMAGE_PROCESSOR_FAST_DOCSTRING_PREPROCESS, obj)
+
+        # Parse the given custom docstring and replace the defaults with the actual values from the object (for the init method)
+        custom_kwargs = get_kwargs_dict_from_docstring(custom_docstring, obj)
+        # If a custom_preprocess_docstring is not provided, use the given custom_docstring for the preprocess method as well
+        if custom_preprocess_docstring:
+            custom_preprocess_kwargs = get_kwargs_dict_from_docstring(custom_preprocess_docstring, obj)
+        else:
+            custom_preprocess_kwargs = custom_kwargs
+
+        # Remove the doc for unused kwargs
+        unused_kwargs = getattr(obj, "unused_kwargs", [])
+        for kwarg in unused_kwargs:
+            base_init_kwargs.pop(kwarg, None)
+            base_preprocess_kwargs.pop(kwarg, None)
+
+        # Merge the base and custom kwargs, putting the custom ones first
+        all_init_kwargs = {}
+        for kwarg, values in custom_kwargs.items():
+            if kwarg not in base_init_kwargs:
+                all_init_kwargs[kwarg] = values
+        for kwarg, values in base_init_kwargs.items():
+            # if custom_kwargs overrides the doc of a base kwarg, use the custom one
+            if kwarg in custom_kwargs:
+                all_init_kwargs[kwarg] = custom_kwargs[kwarg]
+            else:
+                all_init_kwargs[kwarg] = values
+
+        # Rebuild the docstring with the merged kwargs and the given init_header
+        formatted_header = "\n    " + init_header.strip()
+        docstring = formatted_header + "\n\n" + "    Args:\n"
+        for kwarg, values in all_init_kwargs.items():
+            docstring += f"        {kwarg} {values['header_replaced']}\n"
+            for desc in values["desc"]:
+                docstring += f"            {desc}\n"
+        obj.__doc__ = docstring
+
+        # Merge the base and custom kwargs, first putting the args of the preprocess method (usually just `images`)
+        # then the custom ones, then the base ones
+        all_preprocess_kwargs = {}
+        preprocess_args = inspect.signature(obj.preprocess).parameters
+        for arg_name in preprocess_args:
+            if arg_name == "self" or arg_name == "kwargs":
+                continue
+            # if custom_kwargs overrides the doc of an arg, use the custom one
+            if arg_name in custom_preprocess_kwargs:
+                all_preprocess_kwargs[arg_name] = custom_preprocess_kwargs[arg_name]
+            elif arg_name in base_preprocess_kwargs:
+                all_preprocess_kwargs[arg_name] = base_preprocess_kwargs[arg_name]
+        for kwarg, values in custom_preprocess_kwargs.items():
+            if kwarg not in base_preprocess_kwargs:
+                all_preprocess_kwargs[kwarg] = values
+        for kwarg, values in base_preprocess_kwargs.items():
+            # if custom_kwargs overrides the doc of a base kwarg, use the custom one
+            if kwarg in custom_preprocess_kwargs:
+                all_preprocess_kwargs[kwarg] = custom_preprocess_kwargs[kwarg]
+            else:
+                all_preprocess_kwargs[kwarg] = values
+
+        # Rebuild the docstring with the merged kwargs
+        preprocess_header = "\n    Preprocess an image or batch of images."
+        docstring = preprocess_header + "\n\n" + "    Args:\n"
+        for kwarg, values in all_preprocess_kwargs.items():
+            docstring += f"        {kwarg} {values['header']}\n"
+            for desc in values["desc"]:
+                docstring += f"            {desc}\n"
+
+        obj.preprocess.__doc__ = docstring
+
+        return obj
+
+    return docstring_decorator
+
+
 @add_start_docstrings(
     "Constructs a fast base image processor.",
     BASE_IMAGE_PROCESSOR_FAST_DOCSTRING,

diff --git a/src/transformers/models/blip/image_processing_blip_fast.py b/src/transformers/models/blip/image_processing_blip_fast.py
@@ -14,15 +14,11 @@
 # limitations under the License.
 """Fast Image processor class for BLIP."""
 
-from ...image_processing_utils_fast import BASE_IMAGE_PROCESSOR_FAST_DOCSTRING, BaseImageProcessorFast
+from ...image_processing_utils_fast import BaseImageProcessorFast, customize_docstrings
 from ...image_utils import OPENAI_CLIP_MEAN, OPENAI_CLIP_STD, PILImageResampling
-from ...utils import add_start_docstrings
 
 
-@add_start_docstrings(
-    "Constructs a fast BLIP image processor.",
-    BASE_IMAGE_PROCESSOR_FAST_DOCSTRING,
-)
+@customize_docstrings("Constructs a fast BLIP image processor.")
 class BlipImageProcessorFast(BaseImageProcessorFast):
     # To be checked against the slow image processor
     # None values left after checking can be removed

diff --git a/src/transformers/models/clip/image_processing_clip_fast.py b/src/transformers/models/clip/image_processing_clip_fast.py
@@ -14,15 +14,11 @@
 # limitations under the License.
 """Fast Image processor class for CLIP."""
 
-from ...image_processing_utils_fast import BASE_IMAGE_PROCESSOR_FAST_DOCSTRING, BaseImageProcessorFast
+from ...image_processing_utils_fast import BaseImageProcessorFast, customize_docstrings
 from ...image_utils import OPENAI_CLIP_MEAN, OPENAI_CLIP_STD, PILImageResampling
-from ...utils import add_start_docstrings
 
 
-@add_start_docstrings(
-    "Constructs a fast CLIP image processor.",
-    BASE_IMAGE_PROCESSOR_FAST_DOCSTRING,
-)
+@customize_docstrings("Constructs a fast CLIP image processor.")
 class CLIPImageProcessorFast(BaseImageProcessorFast):
     # To be checked against the slow image processor
     # None values left after checking can be removed

diff --git a/src/transformers/models/convnext/image_processing_convnext_fast.py b/src/transformers/models/convnext/image_processing_convnext_fast.py
@@ -18,11 +18,10 @@
 
 from ...image_processing_utils import BatchFeature
 from ...image_processing_utils_fast import (
-    BASE_IMAGE_PROCESSOR_FAST_DOCSTRING,
-    BASE_IMAGE_PROCESSOR_FAST_DOCSTRING_PREPROCESS,
     BaseImageProcessorFast,
     DefaultFastImageProcessorInitKwargs,
     DefaultFastImageProcessorPreprocessKwargs,
+    customize_docstrings,
     group_images_by_shape,
     reorder_images,
 )
@@ -37,7 +36,6 @@
 from ...processing_utils import Unpack
 from ...utils import (
     TensorType,
-    add_start_docstrings,
     is_torch_available,
     is_torchvision_available,
     is_torchvision_v2_available,
@@ -62,11 +60,16 @@ class ConvNextFastImageProcessorPreprocessKwargs(DefaultFastImageProcessorPrepro
     crop_pct: Optional[float]
 
 
-@add_start_docstrings(
+@customize_docstrings(
     r"Constructs a fast ConvNeXT image processor.",
-    BASE_IMAGE_PROCESSOR_FAST_DOCSTRING,
     """
-        crop_pct (`float`, *optional*):
+        size (`Dict[str, int]` *optional*, defaults to `{"shortest_edge": 384}`):
+                Resolution of the output image after `resize` is applied. If `size["shortest_edge"]` >= 384, the image is
+                resized to `(size["shortest_edge"], size["shortest_edge"])`. Otherwise, the smaller edge of the image will
+                be matched to `int(size["shortest_edge"]/crop_pct)`, after which the image is cropped to
+                `(size["shortest_edge"], size["shortest_edge"])`. Only has an effect if `do_resize` is set to `True`. Can
+                be overriden by `size` in the `preprocess` method.
+        crop_pct (`float`, *optional*, defaults to `224/256`):
             Percentage of the image to crop. Only has an effect if size < 384. Can be
             overridden by `crop_pct` in the`preprocess` method.
     """,
@@ -87,14 +90,6 @@ class ConvNextImageProcessorFast(BaseImageProcessorFast):
     def __init__(self, **kwargs: Unpack[ConvNextFastImageProcessorInitKwargs]):
         super().__init__(**kwargs)
 
-    @add_start_docstrings(
-        BASE_IMAGE_PROCESSOR_FAST_DOCSTRING_PREPROCESS,
-        """
-        crop_pct (`float`, *optional*):
-            Percentage of the image to crop. Only has an effect if size < 384. Can be
-            overridden by `crop_pct` in the`preprocess` method.
-        """,
-    )
     def preprocess(
         self, images: ImageInput, **kwargs: Unpack[ConvNextFastImageProcessorPreprocessKwargs]
     ) -> BatchFeature:

diff --git a/src/transformers/models/deformable_detr/image_processing_deformable_detr_fast.py b/src/transformers/models/deformable_detr/image_processing_deformable_detr_fast.py
@@ -9,12 +9,11 @@
 
 from ...image_processing_utils import BatchFeature, get_size_dict
 from ...image_processing_utils_fast import (
-    BASE_IMAGE_PROCESSOR_FAST_DOCSTRING,
-    BASE_IMAGE_PROCESSOR_FAST_DOCSTRING_PREPROCESS,
     BaseImageProcessorFast,
     DefaultFastImageProcessorInitKwargs,
     DefaultFastImageProcessorPreprocessKwargs,
     SizeDict,
+    customize_docstrings,
     get_image_size_for_max_height_width,
     get_max_height_width,
     safe_squeeze,
@@ -34,7 +33,6 @@
 from ...processing_utils import Unpack
 from ...utils import (
     TensorType,
-    add_start_docstrings,
     is_torch_available,
     is_torchvision_available,
     is_torchvision_v2_available,
@@ -275,10 +273,9 @@ def prepare_coco_panoptic_annotation(
     return new_target
 
 
-@add_start_docstrings(
+@customize_docstrings(
     "Constructs a fast DeformableDetr image processor.",
-    BASE_IMAGE_PROCESSOR_FAST_DOCSTRING,
-    """
+    custom_docstring="""
         format (`str`, *optional*, defaults to `AnnotationFormat.COCO_DETECTION`):
             Data format of the annotations. One of "coco_detection" or "coco_panoptic".
         do_convert_annotations (`bool`, *optional*, defaults to `True`):
@@ -295,6 +292,38 @@ def prepare_coco_panoptic_annotation(
             provided for preprocessing. If `pad_size` is not provided, images will be padded to the largest
             height and width in the batch.
     """,
+    custom_preprocess_docstring="""
+        annotations (`AnnotationType` or `List[AnnotationType]`, *optional*):
+            List of annotations associated with the image or batch of images. If annotation is for object
+            detection, the annotations should be a dictionary with the following keys:
+            - "image_id" (`int`): The image id.
+            - "annotations" (`List[Dict]`): List of annotations for an image. Each annotation should be a
+                dictionary. An image can have no annotations, in which case the list should be empty.
+            If annotation is for segmentation, the annotations should be a dictionary with the following keys:
+            - "image_id" (`int`): The image id.
+            - "segments_info" (`List[Dict]`): List of segments for an image. Each segment should be a dictionary.
+                An image can have no segments, in which case the list should be empty.
+            - "file_name" (`str`): The file name of the image.
+        format (`str`, *optional*, defaults to `AnnotationFormat.COCO_DETECTION`):
+            Data format of the annotations. One of "coco_detection" or "coco_panoptic".
+        do_convert_annotations (`bool`, *optional*, defaults to `True`):
+            Controls whether to convert the annotations to the format expected by the DEFORMABLE_DETR model. Converts the
+            bounding boxes to the format `(center_x, center_y, width, height)` and in the range `[0, 1]`.
+            Can be overridden by the `do_convert_annotations` parameter in the `preprocess` method.
+        do_pad (`bool`, *optional*, defaults to `True`):
+            Controls whether to pad the image. Can be overridden by the `do_pad` parameter in the `preprocess`
+            method. If `True`, padding will be applied to the bottom and right of the image with zeros.
+            If `pad_size` is provided, the image will be padded to the specified dimensions.
+            Otherwise, the image will be padded to the maximum height and width of the batch.
+        pad_size (`Dict[str, int]`, *optional*):
+            The size `{"height": int, "width" int}` to pad the images to. Must be larger than any image size
+            provided for preprocessing. If `pad_size` is not provided, images will be padded to the largest
+            height and width in the batch.
+        return_segmentation_masks (`bool`, *optional*, defaults to `False`):
+            Whether to return segmentation masks.
+        masks_path (`str` or `pathlib.Path`, *optional*):
+            Path to the directory containing the segmentation masks.
+        """,
 )
 class DeformableDetrImageProcessorFast(BaseImageProcessorFast):
     resample = PILImageResampling.BILINEAR
@@ -569,41 +598,6 @@ def pad(
 
         return image, pixel_mask, annotation
 
-    @add_start_docstrings(
-        BASE_IMAGE_PROCESSOR_FAST_DOCSTRING_PREPROCESS,
-        """
-        annotations (`AnnotationType` or `List[AnnotationType]`, *optional*):
-            List of annotations associated with the image or batch of images. If annotation is for object
-            detection, the annotations should be a dictionary with the following keys:
-            - "image_id" (`int`): The image id.
-            - "annotations" (`List[Dict]`): List of annotations for an image. Each annotation should be a
-                dictionary. An image can have no annotations, in which case the list should be empty.
-            If annotation is for segmentation, the annotations should be a dictionary with the following keys:
-            - "image_id" (`int`): The image id.
-            - "segments_info" (`List[Dict]`): List of segments for an image. Each segment should be a dictionary.
-                An image can have no segments, in which case the list should be empty.
-            - "file_name" (`str`): The file name of the image.
-        format (`str`, *optional*, defaults to `AnnotationFormat.COCO_DETECTION`):
-            Data format of the annotations. One of "coco_detection" or "coco_panoptic".
-        do_convert_annotations (`bool`, *optional*, defaults to `True`):
-            Controls whether to convert the annotations to the format expected by the DEFORMABLE_DETR model. Converts the
-            bounding boxes to the format `(center_x, center_y, width, height)` and in the range `[0, 1]`.
-            Can be overridden by the `do_convert_annotations` parameter in the `preprocess` method.
-        do_pad (`bool`, *optional*, defaults to `True`):
-            Controls whether to pad the image. Can be overridden by the `do_pad` parameter in the `preprocess`
-            method. If `True`, padding will be applied to the bottom and right of the image with zeros.
-            If `pad_size` is provided, the image will be padded to the specified dimensions.
-            Otherwise, the image will be padded to the maximum height and width of the batch.
-        pad_size (`Dict[str, int]`, *optional*):
-            The size `{"height": int, "width" int}` to pad the images to. Must be larger than any image size
-            provided for preprocessing. If `pad_size` is not provided, images will be padded to the largest
-            height and width in the batch.
-        return_segmentation_masks (`bool`, *optional*, defaults to `False`):
-            Whether to return segmentation masks.
-        masks_path (`str` or `pathlib.Path`, *optional*):
-            Path to the directory containing the segmentation masks.
-        """,
-    )
     def preprocess(
         self, images: ImageInput, **kwargs: Unpack[DeformableDetrFastImageProcessorPreprocessKwargs]
     ) -> BatchFeature:

diff --git a/src/transformers/models/deit/image_processing_deit_fast.py b/src/transformers/models/deit/image_processing_deit_fast.py
@@ -14,19 +14,15 @@
 # limitations under the License.
 """Fast Image processor class for DeiT."""
 
-from ...image_processing_utils_fast import BASE_IMAGE_PROCESSOR_FAST_DOCSTRING, BaseImageProcessorFast
+from ...image_processing_utils_fast import BaseImageProcessorFast, customize_docstrings
 from ...image_utils import (
     IMAGENET_STANDARD_MEAN,
     IMAGENET_STANDARD_STD,
     PILImageResampling,
 )
-from ...utils import add_start_docstrings
 
 
-@add_start_docstrings(
-    "Constructs a fast DeiT image processor.",
-    BASE_IMAGE_PROCESSOR_FAST_DOCSTRING,
-)
+@customize_docstrings("Constructs a fast DeiT image processor.")
 class DeiTImageProcessorFast(BaseImageProcessorFast):
     # To be checked against the slow image processor
     # None values left after checking can be removed