From 134dc5200294356f777e218f0df3e021384b3921 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20C=C3=ADfka?= Date: Thu, 30 Oct 2025 10:51:11 +0100 Subject: [PATCH] Add audio_crop_mode parameter to speaker diarization --- src/pyannote/audio/tasks/segmentation/speaker_diarization.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/pyannote/audio/tasks/segmentation/speaker_diarization.py b/src/pyannote/audio/tasks/segmentation/speaker_diarization.py index 3f8d3ccf9..2fb5782b1 100644 --- a/src/pyannote/audio/tasks/segmentation/speaker_diarization.py +++ b/src/pyannote/audio/tasks/segmentation/speaker_diarization.py @@ -123,6 +123,7 @@ def __init__( pin_memory: bool = False, augmentation: Optional[BaseWaveformTransform] = None, metric: Union[Metric, Sequence[Metric], Dict[str, Metric]] = None, + audio_crop_mode: str = "raise", max_num_speakers: Optional[ int ] = None, # deprecated in favor of `max_speakers_per_chunk`` @@ -163,6 +164,7 @@ def __init__( self.max_speakers_per_frame = max_speakers_per_frame self.balance = balance self.weight = weight + self._audio_crop_mode = audio_crop_mode def setup(self, stage=None): super().setup(stage) @@ -302,7 +304,7 @@ def prepare_chunk(self, file_id: int, start_time: float, duration: float): chunk = Segment(start_time, start_time + duration) sample = dict() - sample["X"], _ = self.model.audio.crop(file, chunk) + sample["X"], _ = self.model.audio.crop(file, chunk, mode=self._audio_crop_mode) # gather all annotations of current file start_id, end_id = self.prepared_data["audio-segments-ids"][file_id]