@@ -83,6 +83,24 @@ void CpuDeviceInterface::convertAVFrameToFrameOutput(
83
83
enum AVPixelFormat frameFormat =
84
84
static_cast <enum AVPixelFormat>(avFrame->format );
85
85
86
+ // This is an early-return optimization: if the format is already what we
87
+ // need, and the dimensions are also what we need, we don't need to call
88
+ // swscale or filtergraph. We can just convert the AVFrame to a tensor.
89
+ if (frameFormat == AV_PIX_FMT_RGB24 &&
90
+ avFrame->width == expectedOutputWidth &&
91
+ avFrame->height == expectedOutputHeight) {
92
+ outputTensor = toTensor (avFrame);
93
+ if (preAllocatedOutputTensor.has_value ()) {
94
+ // We have already validated that preAllocatedOutputTensor and
95
+ // outputTensor have the same shape.
96
+ preAllocatedOutputTensor.value ().copy_ (outputTensor);
97
+ frameOutput.data = preAllocatedOutputTensor.value ();
98
+ } else {
99
+ frameOutput.data = outputTensor;
100
+ }
101
+ return ;
102
+ }
103
+
86
104
// By default, we want to use swscale for color conversion because it is
87
105
// faster. However, it has width requirements, so we may need to fall back
88
106
// to filtergraph. We also need to respect what was requested from the
@@ -159,7 +177,7 @@ void CpuDeviceInterface::convertAVFrameToFrameOutput(
159
177
std::make_unique<FilterGraph>(filtersContext, videoStreamOptions);
160
178
prevFiltersContext_ = std::move (filtersContext);
161
179
}
162
- outputTensor = convertAVFrameToTensorUsingFilterGraph ( avFrame);
180
+ outputTensor = toTensor (filterGraphContext_-> convert ( avFrame) );
163
181
164
182
// Similarly to above, if this check fails it means the frame wasn't
165
183
// reshaped to its expected dimensions by filtergraph.
@@ -208,23 +226,20 @@ int CpuDeviceInterface::convertAVFrameToTensorUsingSwsScale(
208
226
return resultHeight;
209
227
}
210
228
211
- torch::Tensor CpuDeviceInterface::convertAVFrameToTensorUsingFilterGraph (
212
- const UniqueAVFrame& avFrame) {
213
- UniqueAVFrame filteredAVFrame = filterGraphContext_->convert (avFrame);
214
-
215
- TORCH_CHECK_EQ (filteredAVFrame->format , AV_PIX_FMT_RGB24);
229
+ torch::Tensor CpuDeviceInterface::toTensor (const UniqueAVFrame& avFrame) {
230
+ TORCH_CHECK_EQ (avFrame->format , AV_PIX_FMT_RGB24);
216
231
217
- auto frameDims = getHeightAndWidthFromResizedAVFrame (*filteredAVFrame .get ());
232
+ auto frameDims = getHeightAndWidthFromResizedAVFrame (*avFrame .get ());
218
233
int height = frameDims.height ;
219
234
int width = frameDims.width ;
220
235
std::vector<int64_t > shape = {height, width, 3 };
221
- std::vector<int64_t > strides = {filteredAVFrame ->linesize [0 ], 3 , 1 };
222
- AVFrame* filteredAVFramePtr = filteredAVFrame. release ( );
223
- auto deleter = [filteredAVFramePtr ](void *) {
224
- UniqueAVFrame avFrameToDelete (filteredAVFramePtr );
236
+ std::vector<int64_t > strides = {avFrame ->linesize [0 ], 3 , 1 };
237
+ AVFrame* avFrameClone = av_frame_clone (avFrame. get () );
238
+ auto deleter = [avFrameClone ](void *) {
239
+ UniqueAVFrame avFrameToDelete (avFrameClone );
225
240
};
226
241
return torch::from_blob (
227
- filteredAVFramePtr ->data [0 ], shape, strides, deleter, {torch::kUInt8 });
242
+ avFrameClone ->data [0 ], shape, strides, deleter, {torch::kUInt8 });
228
243
}
229
244
230
245
void CpuDeviceInterface::createSwsContext (
0 commit comments