Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Customize docstrings fast image processor #36466

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 129 additions & 5 deletions src/transformers/image_processing_utils_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import inspect
from functools import lru_cache, partial
from typing import Any, Dict, Iterable, List, Optional, Tuple, TypedDict, Union

Expand Down Expand Up @@ -149,8 +150,6 @@ class DefaultFastImageProcessorPreprocessKwargs(DefaultFastImageProcessorInitKwa


BASE_IMAGE_PROCESSOR_FAST_DOCSTRING = r"""

Args:
do_resize (`bool`, *optional*, defaults to `self.do_resize`):
Whether to resize the image's (height, width) dimensions to the specified `size`. Can be overridden by the
`do_resize` parameter in the `preprocess` method.
Expand Down Expand Up @@ -189,9 +188,6 @@ class DefaultFastImageProcessorPreprocessKwargs(DefaultFastImageProcessorInitKwa
Whether to convert the image to RGB."""

BASE_IMAGE_PROCESSOR_FAST_DOCSTRING_PREPROCESS = r"""
Preprocess an image or batch of images.

Args:
images (`ImageInput`):
Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
passing in images with pixel values between 0 and 1, set `do_rescale=False`.
Expand Down Expand Up @@ -236,6 +232,134 @@ class DefaultFastImageProcessorPreprocessKwargs(DefaultFastImageProcessorInitKwa
The device to process the images on. If unset, the device is inferred from the input images."""


def get_kwargs_dict_from_docstring(docstring, obj):
lines = docstring.split("\n")
i = 0
indent_kwarg = " " * 8
indent_desc = " " * 12
current_kwarg = None
kwargs_dict = {}
while i < len(lines):
line = lines[i]
if line.startswith(indent_desc) and line.strip() != "":
kwargs_dict[current_kwarg]["desc"].append(line.strip())
elif line.startswith(indent_kwarg) and line.strip() != "":
current_kwarg = line.strip().split(" ")[0]
header = " ".join(line.strip().split(" ")[1:])

default = header.split("defaults to ")[-1][:-2]
default_name = default.strip("`").split("self.")[-1]
if hasattr(obj, default_name):
header_replaced = header.replace(default, f"`{repr(getattr(obj, default_name))}`")
else:
header_replaced = header

kwargs_dict[current_kwarg] = {
"header_replaced": header_replaced,
"header": header, # also keep the original header for the preprocess method
"desc": [],
}
elif line.strip() != "":
raise ValueError(f"Unexpected line in docstring: {line}. Check if the indentation is correct.")

i += 1
return kwargs_dict


def customize_docstrings(
init_header: str, custom_docstring: str = "", custom_preprocess_docstring: Optional[str] = None
):
"""
Decorator to customize the docstrings of the `__init__` and `preprocess` methods of a class.

Args:
init_header (`str`):
The header to use for the `__init__` method docstring.
custom_docstring (`str`, *optional*, defaults to `""`):
The custom docstring to use for the `__init__` method. One can provide only the docs for the arguments that
need to be customized. The rest of the docstring is generated from the default docstring.
custom_preprocess_docstring (`str`, *optional*):
The custom docstring to use for the `preprocess` method. If not provided, the `custom_docstring` is used
for the `preprocess` method as well.
"""

def docstring_decorator(obj):
# Parse the base docstring, replace the defaults with the actual values from the object (for the init method)
base_init_kwargs = get_kwargs_dict_from_docstring(BASE_IMAGE_PROCESSOR_FAST_DOCSTRING, obj)
base_preprocess_kwargs = get_kwargs_dict_from_docstring(BASE_IMAGE_PROCESSOR_FAST_DOCSTRING_PREPROCESS, obj)

# Parse the given custom docstring and replace the defaults with the actual values from the object (for the init method)
custom_kwargs = get_kwargs_dict_from_docstring(custom_docstring, obj)
# If a custom_preprocess_docstring is not provided, use the given custom_docstring for the preprocess method as well
if custom_preprocess_docstring:
custom_preprocess_kwargs = get_kwargs_dict_from_docstring(custom_preprocess_docstring, obj)
else:
custom_preprocess_kwargs = custom_kwargs

# Remove the doc for unused kwargs
unused_kwargs = getattr(obj, "unused_kwargs", [])
for kwarg in unused_kwargs:
base_init_kwargs.pop(kwarg, None)
base_preprocess_kwargs.pop(kwarg, None)

# Merge the base and custom kwargs, putting the custom ones first
all_init_kwargs = {}
for kwarg, values in custom_kwargs.items():
if kwarg not in base_init_kwargs:
all_init_kwargs[kwarg] = values
for kwarg, values in base_init_kwargs.items():
# if custom_kwargs overrides the doc of a base kwarg, use the custom one
if kwarg in custom_kwargs:
all_init_kwargs[kwarg] = custom_kwargs[kwarg]
else:
all_init_kwargs[kwarg] = values

# Rebuild the docstring with the merged kwargs and the given init_header
formatted_header = "\n " + init_header.strip()
docstring = formatted_header + "\n\n" + " Args:\n"
for kwarg, values in all_init_kwargs.items():
docstring += f" {kwarg} {values['header_replaced']}\n"
for desc in values["desc"]:
docstring += f" {desc}\n"
obj.__doc__ = docstring

# Merge the base and custom kwargs, first putting the args of the preprocess method (usually just `images`)
# then the custom ones, then the base ones
all_preprocess_kwargs = {}
preprocess_args = inspect.signature(obj.preprocess).parameters
for arg_name in preprocess_args:
if arg_name == "self" or arg_name == "kwargs":
continue
# if custom_kwargs overrides the doc of an arg, use the custom one
if arg_name in custom_preprocess_kwargs:
all_preprocess_kwargs[arg_name] = custom_preprocess_kwargs[arg_name]
elif arg_name in base_preprocess_kwargs:
all_preprocess_kwargs[arg_name] = base_preprocess_kwargs[arg_name]
for kwarg, values in custom_preprocess_kwargs.items():
if kwarg not in base_preprocess_kwargs:
all_preprocess_kwargs[kwarg] = values
for kwarg, values in base_preprocess_kwargs.items():
# if custom_kwargs overrides the doc of a base kwarg, use the custom one
if kwarg in custom_preprocess_kwargs:
all_preprocess_kwargs[kwarg] = custom_preprocess_kwargs[kwarg]
else:
all_preprocess_kwargs[kwarg] = values

# Rebuild the docstring with the merged kwargs
preprocess_header = "\n Preprocess an image or batch of images."
docstring = preprocess_header + "\n\n" + " Args:\n"
for kwarg, values in all_preprocess_kwargs.items():
docstring += f" {kwarg} {values['header']}\n"
for desc in values["desc"]:
docstring += f" {desc}\n"

obj.preprocess.__doc__ = docstring

return obj

return docstring_decorator


@add_start_docstrings(
"Constructs a fast base image processor.",
BASE_IMAGE_PROCESSOR_FAST_DOCSTRING,
Expand Down
8 changes: 2 additions & 6 deletions src/transformers/models/blip/image_processing_blip_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,11 @@
# limitations under the License.
"""Fast Image processor class for BLIP."""

from ...image_processing_utils_fast import BASE_IMAGE_PROCESSOR_FAST_DOCSTRING, BaseImageProcessorFast
from ...image_processing_utils_fast import BaseImageProcessorFast, customize_docstrings
from ...image_utils import OPENAI_CLIP_MEAN, OPENAI_CLIP_STD, PILImageResampling
from ...utils import add_start_docstrings


@add_start_docstrings(
"Constructs a fast BLIP image processor.",
BASE_IMAGE_PROCESSOR_FAST_DOCSTRING,
)
@customize_docstrings("Constructs a fast BLIP image processor.")
class BlipImageProcessorFast(BaseImageProcessorFast):
# To be checked against the slow image processor
# None values left after checking can be removed
Expand Down
8 changes: 2 additions & 6 deletions src/transformers/models/clip/image_processing_clip_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,11 @@
# limitations under the License.
"""Fast Image processor class for CLIP."""

from ...image_processing_utils_fast import BASE_IMAGE_PROCESSOR_FAST_DOCSTRING, BaseImageProcessorFast
from ...image_processing_utils_fast import BaseImageProcessorFast, customize_docstrings
from ...image_utils import OPENAI_CLIP_MEAN, OPENAI_CLIP_STD, PILImageResampling
from ...utils import add_start_docstrings


@add_start_docstrings(
"Constructs a fast CLIP image processor.",
BASE_IMAGE_PROCESSOR_FAST_DOCSTRING,
)
@customize_docstrings("Constructs a fast CLIP image processor.")
class CLIPImageProcessorFast(BaseImageProcessorFast):
# To be checked against the slow image processor
# None values left after checking can be removed
Expand Down
23 changes: 9 additions & 14 deletions src/transformers/models/convnext/image_processing_convnext_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,10 @@

from ...image_processing_utils import BatchFeature
from ...image_processing_utils_fast import (
BASE_IMAGE_PROCESSOR_FAST_DOCSTRING,
BASE_IMAGE_PROCESSOR_FAST_DOCSTRING_PREPROCESS,
BaseImageProcessorFast,
DefaultFastImageProcessorInitKwargs,
DefaultFastImageProcessorPreprocessKwargs,
customize_docstrings,
group_images_by_shape,
reorder_images,
)
Expand All @@ -37,7 +36,6 @@
from ...processing_utils import Unpack
from ...utils import (
TensorType,
add_start_docstrings,
is_torch_available,
is_torchvision_available,
is_torchvision_v2_available,
Expand All @@ -62,11 +60,16 @@ class ConvNextFastImageProcessorPreprocessKwargs(DefaultFastImageProcessorPrepro
crop_pct: Optional[float]


@add_start_docstrings(
@customize_docstrings(
r"Constructs a fast ConvNeXT image processor.",
BASE_IMAGE_PROCESSOR_FAST_DOCSTRING,
"""
crop_pct (`float`, *optional*):
size (`Dict[str, int]` *optional*, defaults to `{"shortest_edge": 384}`):
Resolution of the output image after `resize` is applied. If `size["shortest_edge"]` >= 384, the image is
resized to `(size["shortest_edge"], size["shortest_edge"])`. Otherwise, the smaller edge of the image will
be matched to `int(size["shortest_edge"]/crop_pct)`, after which the image is cropped to
`(size["shortest_edge"], size["shortest_edge"])`. Only has an effect if `do_resize` is set to `True`. Can
be overriden by `size` in the `preprocess` method.
crop_pct (`float`, *optional*, defaults to `224/256`):
Percentage of the image to crop. Only has an effect if size < 384. Can be
overridden by `crop_pct` in the`preprocess` method.
""",
Expand All @@ -87,14 +90,6 @@ class ConvNextImageProcessorFast(BaseImageProcessorFast):
def __init__(self, **kwargs: Unpack[ConvNextFastImageProcessorInitKwargs]):
super().__init__(**kwargs)

@add_start_docstrings(
BASE_IMAGE_PROCESSOR_FAST_DOCSTRING_PREPROCESS,
"""
crop_pct (`float`, *optional*):
Percentage of the image to crop. Only has an effect if size < 384. Can be
overridden by `crop_pct` in the`preprocess` method.
""",
)
def preprocess(
self, images: ImageInput, **kwargs: Unpack[ConvNextFastImageProcessorPreprocessKwargs]
) -> BatchFeature:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,11 @@

from ...image_processing_utils import BatchFeature, get_size_dict
from ...image_processing_utils_fast import (
BASE_IMAGE_PROCESSOR_FAST_DOCSTRING,
BASE_IMAGE_PROCESSOR_FAST_DOCSTRING_PREPROCESS,
BaseImageProcessorFast,
DefaultFastImageProcessorInitKwargs,
DefaultFastImageProcessorPreprocessKwargs,
SizeDict,
customize_docstrings,
get_image_size_for_max_height_width,
get_max_height_width,
safe_squeeze,
Expand All @@ -34,7 +33,6 @@
from ...processing_utils import Unpack
from ...utils import (
TensorType,
add_start_docstrings,
is_torch_available,
is_torchvision_available,
is_torchvision_v2_available,
Expand Down Expand Up @@ -275,10 +273,9 @@ def prepare_coco_panoptic_annotation(
return new_target


@add_start_docstrings(
@customize_docstrings(
"Constructs a fast DeformableDetr image processor.",
BASE_IMAGE_PROCESSOR_FAST_DOCSTRING,
"""
custom_docstring="""
format (`str`, *optional*, defaults to `AnnotationFormat.COCO_DETECTION`):
Data format of the annotations. One of "coco_detection" or "coco_panoptic".
do_convert_annotations (`bool`, *optional*, defaults to `True`):
Expand All @@ -295,6 +292,38 @@ def prepare_coco_panoptic_annotation(
provided for preprocessing. If `pad_size` is not provided, images will be padded to the largest
height and width in the batch.
""",
custom_preprocess_docstring="""
annotations (`AnnotationType` or `List[AnnotationType]`, *optional*):
List of annotations associated with the image or batch of images. If annotation is for object
detection, the annotations should be a dictionary with the following keys:
- "image_id" (`int`): The image id.
- "annotations" (`List[Dict]`): List of annotations for an image. Each annotation should be a
dictionary. An image can have no annotations, in which case the list should be empty.
If annotation is for segmentation, the annotations should be a dictionary with the following keys:
- "image_id" (`int`): The image id.
- "segments_info" (`List[Dict]`): List of segments for an image. Each segment should be a dictionary.
An image can have no segments, in which case the list should be empty.
- "file_name" (`str`): The file name of the image.
format (`str`, *optional*, defaults to `AnnotationFormat.COCO_DETECTION`):
Data format of the annotations. One of "coco_detection" or "coco_panoptic".
do_convert_annotations (`bool`, *optional*, defaults to `True`):
Controls whether to convert the annotations to the format expected by the DEFORMABLE_DETR model. Converts the
bounding boxes to the format `(center_x, center_y, width, height)` and in the range `[0, 1]`.
Can be overridden by the `do_convert_annotations` parameter in the `preprocess` method.
do_pad (`bool`, *optional*, defaults to `True`):
Controls whether to pad the image. Can be overridden by the `do_pad` parameter in the `preprocess`
method. If `True`, padding will be applied to the bottom and right of the image with zeros.
If `pad_size` is provided, the image will be padded to the specified dimensions.
Otherwise, the image will be padded to the maximum height and width of the batch.
pad_size (`Dict[str, int]`, *optional*):
The size `{"height": int, "width" int}` to pad the images to. Must be larger than any image size
provided for preprocessing. If `pad_size` is not provided, images will be padded to the largest
height and width in the batch.
return_segmentation_masks (`bool`, *optional*, defaults to `False`):
Whether to return segmentation masks.
masks_path (`str` or `pathlib.Path`, *optional*):
Path to the directory containing the segmentation masks.
""",
)
class DeformableDetrImageProcessorFast(BaseImageProcessorFast):
resample = PILImageResampling.BILINEAR
Expand Down Expand Up @@ -569,41 +598,6 @@ def pad(

return image, pixel_mask, annotation

@add_start_docstrings(
BASE_IMAGE_PROCESSOR_FAST_DOCSTRING_PREPROCESS,
"""
annotations (`AnnotationType` or `List[AnnotationType]`, *optional*):
List of annotations associated with the image or batch of images. If annotation is for object
detection, the annotations should be a dictionary with the following keys:
- "image_id" (`int`): The image id.
- "annotations" (`List[Dict]`): List of annotations for an image. Each annotation should be a
dictionary. An image can have no annotations, in which case the list should be empty.
If annotation is for segmentation, the annotations should be a dictionary with the following keys:
- "image_id" (`int`): The image id.
- "segments_info" (`List[Dict]`): List of segments for an image. Each segment should be a dictionary.
An image can have no segments, in which case the list should be empty.
- "file_name" (`str`): The file name of the image.
format (`str`, *optional*, defaults to `AnnotationFormat.COCO_DETECTION`):
Data format of the annotations. One of "coco_detection" or "coco_panoptic".
do_convert_annotations (`bool`, *optional*, defaults to `True`):
Controls whether to convert the annotations to the format expected by the DEFORMABLE_DETR model. Converts the
bounding boxes to the format `(center_x, center_y, width, height)` and in the range `[0, 1]`.
Can be overridden by the `do_convert_annotations` parameter in the `preprocess` method.
do_pad (`bool`, *optional*, defaults to `True`):
Controls whether to pad the image. Can be overridden by the `do_pad` parameter in the `preprocess`
method. If `True`, padding will be applied to the bottom and right of the image with zeros.
If `pad_size` is provided, the image will be padded to the specified dimensions.
Otherwise, the image will be padded to the maximum height and width of the batch.
pad_size (`Dict[str, int]`, *optional*):
The size `{"height": int, "width" int}` to pad the images to. Must be larger than any image size
provided for preprocessing. If `pad_size` is not provided, images will be padded to the largest
height and width in the batch.
return_segmentation_masks (`bool`, *optional*, defaults to `False`):
Whether to return segmentation masks.
masks_path (`str` or `pathlib.Path`, *optional*):
Path to the directory containing the segmentation masks.
""",
)
def preprocess(
self, images: ImageInput, **kwargs: Unpack[DeformableDetrFastImageProcessorPreprocessKwargs]
) -> BatchFeature:
Expand Down
8 changes: 2 additions & 6 deletions src/transformers/models/deit/image_processing_deit_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,15 @@
# limitations under the License.
"""Fast Image processor class for DeiT."""

from ...image_processing_utils_fast import BASE_IMAGE_PROCESSOR_FAST_DOCSTRING, BaseImageProcessorFast
from ...image_processing_utils_fast import BaseImageProcessorFast, customize_docstrings
from ...image_utils import (
IMAGENET_STANDARD_MEAN,
IMAGENET_STANDARD_STD,
PILImageResampling,
)
from ...utils import add_start_docstrings


@add_start_docstrings(
"Constructs a fast DeiT image processor.",
BASE_IMAGE_PROCESSOR_FAST_DOCSTRING,
)
@customize_docstrings("Constructs a fast DeiT image processor.")
class DeiTImageProcessorFast(BaseImageProcessorFast):
# To be checked against the slow image processor
# None values left after checking can be removed
Expand Down
Loading