diff --git a/depthai_sdk/examples/CameraComponent/camera_encode.py b/depthai_sdk/examples/CameraComponent/camera_encode.py index 481a52aec..e73c2139c 100644 --- a/depthai_sdk/examples/CameraComponent/camera_encode.py +++ b/depthai_sdk/examples/CameraComponent/camera_encode.py @@ -1,9 +1,10 @@ from depthai_sdk import OakCamera with OakCamera() as oak: - color = oak.create_camera('color', encode='h265') + color = oak.create_camera('color') + encoder = oak.create_encoder(color, codec='h265') - oak.visualize(color.out.encoded, fps=True, scale=2/3) + oak.visualize(encoder, fps=True, scale=2/3) # By default, it will stream non-encoded frames oak.visualize(color, fps=True, scale=2/3) oak.start(blocking=True) diff --git a/depthai_sdk/examples/StereoComponent/stereo_encoded.py b/depthai_sdk/examples/StereoComponent/stereo_encoded.py index 8ca1eb471..3a49960cb 100644 --- a/depthai_sdk/examples/StereoComponent/stereo_encoded.py +++ b/depthai_sdk/examples/StereoComponent/stereo_encoded.py @@ -3,10 +3,9 @@ with OakCamera() as oak: - stereo = oak.create_stereo('800p', fps=30, encode='h264') + stereo = oak.create_stereo('800p', fps=30) + stereo.set_colormap(dai.Colormap.JET) # Must be set before creating the encoder + encoder = oak.create_encoder(stereo, codec='h264') - # Set on-device output colorization, works only for encoded output - stereo.set_colormap(dai.Colormap.JET) - - oak.visualize(stereo.out.encoded, fps=True) + oak.visualize(encoder, fps=True) oak.start(blocking=True) diff --git a/depthai_sdk/examples/mixed/car_tracking.py b/depthai_sdk/examples/mixed/car_tracking.py index 7cbcc8fe9..e0e9da474 100644 --- a/depthai_sdk/examples/mixed/car_tracking.py +++ b/depthai_sdk/examples/mixed/car_tracking.py @@ -2,7 +2,7 @@ # Download public depthai-recording with OakCamera(replay='cars-tracking-above-01') as oak: - # Create color camera, add video encoder + # Create color camera color = oak.create_camera('color') # Download & run pretrained vehicle detection model and track detections diff --git a/depthai_sdk/examples/mixed/sync_multiple_outputs.py b/depthai_sdk/examples/mixed/sync_multiple_outputs.py index 64197a018..a00c5138e 100644 --- a/depthai_sdk/examples/mixed/sync_multiple_outputs.py +++ b/depthai_sdk/examples/mixed/sync_multiple_outputs.py @@ -3,7 +3,8 @@ from depthai_sdk import OakCamera with OakCamera() as oak: - color = oak.create_camera('color', encode='h264') + color = oak.create_camera('color') + encoder = oak.create_encoder(color, codec='h264') nn = oak.create_nn('mobilenet-ssd', color) nn2 = oak.create_nn('face-detection-retail-0004', color) @@ -12,8 +13,7 @@ def cb(msgs: Dict): for name, packet in msgs.items(): print(f"Packet '{name}' with timestamp:", packet.get_timestamp(), 'Seq number:', packet.get_sequence_num(), 'Object', packet) - oak.callback([nn.out.passthrough, nn.out.encoded, nn2.out.encoded], cb) \ + oak.callback([nn.out.passthrough, encoder], cb) \ .configure_syncing(enable_sync=True, threshold_ms=30) - # oak.show_graph() oak.start(blocking=True) diff --git a/depthai_sdk/examples/recording/encode.py b/depthai_sdk/examples/recording/encode.py index 44b235d12..72703bfbc 100644 --- a/depthai_sdk/examples/recording/encode.py +++ b/depthai_sdk/examples/recording/encode.py @@ -1,17 +1,21 @@ from depthai_sdk import OakCamera, RecordType with OakCamera() as oak: - color = oak.create_camera('color', resolution='1080P', fps=10, encode='H265') - left = oak.create_camera('left', resolution='800p', fps=10, encode='H265') - right = oak.create_camera('right', resolution='800p', fps=10, encode='H265') + color = oak.create_camera('color', resolution='1080P', fps=10) + left = oak.create_camera('left', resolution='800p', fps=10) + right = oak.create_camera('right', resolution='800p', fps=10) + + color_encoder = oak.create_encoder(color, codec='h265') + left_encoder = oak.create_encoder(left, codec='h265') + right_encoder = oak.create_encoder(right, codec='h265') stereo = oak.create_stereo(left=left, right=right) nn = oak.create_nn('mobilenet-ssd', color, spatial=stereo) # Sync & save all (encoded) streams - oak.record([color.out.encoded, left.out.encoded, right.out.encoded], './record', RecordType.VIDEO) \ + oak.record([color_encoder, left_encoder, right_encoder], './record', RecordType.VIDEO) \ .configure_syncing(enable_sync=True, threshold_ms=50) - oak.visualize([color.out.encoded], fps=True) + oak.visualize([color_encoder], fps=True) oak.start(blocking=True) diff --git a/depthai_sdk/examples/recording/encoder_preview.py b/depthai_sdk/examples/recording/encoder_preview.py index f519b808f..c3e182dae 100644 --- a/depthai_sdk/examples/recording/encoder_preview.py +++ b/depthai_sdk/examples/recording/encoder_preview.py @@ -7,16 +7,17 @@ rec = AvWriter(Path('./'), 'color', fourcc=fourcc) -def save_raw_mjpeg(packet): +def save_raw(packet): rec.write(packet.msg) with OakCamera() as oak: - color = oak.create_camera('color', encode=fourcc, fps=20) + color = oak.create_camera('color', fps=20) + encoder = oak.create_encoder(color, codec=fourcc) # Stream encoded video packets to host. For visualization, we decode them # on the host side, and for callback we write encoded frames directly to disk. - oak.visualize(color.out.encoded, scale=2 / 3, fps=True) - oak.callback(color.out.encoded, callback=save_raw_mjpeg) + oak.visualize(encoder, scale=2 / 3, fps=True) + oak.callback(encoder, callback=save_raw) oak.start(blocking=True) diff --git a/depthai_sdk/examples/recording/recording_duration.py b/depthai_sdk/examples/recording/recording_duration.py index 65fe7e21e..f0b165129 100644 --- a/depthai_sdk/examples/recording/recording_duration.py +++ b/depthai_sdk/examples/recording/recording_duration.py @@ -2,15 +2,18 @@ import time with OakCamera() as oak: - color = oak.create_camera('color', resolution='1080P', fps=10, encode='H265') - left = oak.create_camera('left', resolution='800p', fps=10, encode='H265') - right = oak.create_camera('right', resolution='800p', fps=10, encode='H265') + color = oak.create_camera('color', resolution='1080P', fps=10) + left = oak.create_camera('left', resolution='800p', fps=10) + right = oak.create_camera('right', resolution='800p', fps=10) + color_encoder = oak.create_encoder(color, codec='h265') + left_encoder = oak.create_encoder(left, codec='h265') + right_encoder = oak.create_encoder(right, codec='h265') # Sync & save all (encoded) streams - oak.record([color.out.encoded, left.out.encoded, right.out.encoded], './record') + oak.record([color_encoder, left_encoder, right_encoder], './record') oak.start() start_time = time.monotonic() while oak.running(): if time.monotonic() - start_time > 5: break - oak.poll() \ No newline at end of file + oak.poll() diff --git a/depthai_sdk/examples/recording/rosbag_record.py b/depthai_sdk/examples/recording/rosbag_record.py index 4f462dd59..2c09b3e6b 100644 --- a/depthai_sdk/examples/recording/rosbag_record.py +++ b/depthai_sdk/examples/recording/rosbag_record.py @@ -1,18 +1,21 @@ from depthai_sdk import OakCamera, RecordType with OakCamera() as oak: - color = oak.create_camera('color', encode='jpeg', fps=30) - left = oak.create_camera('left', resolution='800p', encode='jpeg', fps=30) - right = oak.create_camera('right', resolution='800p', encode='jpeg', fps=30) + color = oak.create_camera('color', fps=30) + left = oak.create_camera('left', resolution='800p', fps=30) + right = oak.create_camera('right', resolution='800p', fps=30) + color_encoder = oak.create_encoder(color, codec='mjpeg') + left_encoder = oak.create_encoder(left, codec='mjpeg') + right_encoder = oak.create_encoder(right, codec='mjpeg') stereo = oak.create_stereo(left=left, right=right) stereo.config_stereo(align=color) imu = oak.create_imu() imu.config_imu(report_rate=400, batch_report_threshold=5) # DB3 / ROSBAG. ROSBAG doesn't require having ROS installed, while DB3 does. - record_components = [left.out.encoded, color.out.encoded, right.out.encoded, stereo.out.depth, imu] + record_components = [left_encoder, color_encoder, right_encoder, stereo.out.depth, imu] oak.record(record_components, 'record', record_type=RecordType.ROSBAG) # Visualize only color stream - oak.visualize(color.out.encoded) + oak.visualize(color_encoder) oak.start(blocking=True) diff --git a/depthai_sdk/examples/trigger_action/person_record.py b/depthai_sdk/examples/trigger_action/person_record.py index 89819f07c..0245bda4b 100644 --- a/depthai_sdk/examples/trigger_action/person_record.py +++ b/depthai_sdk/examples/trigger_action/person_record.py @@ -3,13 +3,14 @@ from depthai_sdk.trigger_action.triggers.detection_trigger import DetectionTrigger with OakCamera() as oak: - color = oak.create_camera('color', encode='jpeg') + color = oak.create_camera('color') + color_encoder = oak.create_encoder(color, codec='mjpeg') stereo = oak.create_stereo('400p') nn = oak.create_nn('mobilenet-ssd', color) trigger = DetectionTrigger(input=nn, min_detections={'person': 1}, cooldown=30) - action = RecordAction(inputs=[color, stereo.out.disparity], dir_path='./recordings/', + action = RecordAction(inputs=[color_encoder, stereo.out.disparity], dir_path='./record/', duration_before_trigger=5, duration_after_trigger=10) oak.trigger_action(trigger=trigger, action=action) diff --git a/depthai_sdk/setup.py b/depthai_sdk/setup.py index 6891734b6..e4a6c7708 100644 --- a/depthai_sdk/setup.py +++ b/depthai_sdk/setup.py @@ -28,9 +28,10 @@ 'matplotlib==3.5.3; python_version <= "3.7"', 'matplotlib==3.6.1; python_version > "3.7"'], "replay": ['mcap>=0.0.10', + 'mcap-protobuf-support==0.0.4', 'mcap-ros1-support==0.0.8', 'rosbags==0.9.11'], - "record": ['av'], + "record": ['av<9'], "test": ['pytest'] }, project_urls={ diff --git a/depthai_sdk/src/depthai_sdk/components/__init__.py b/depthai_sdk/src/depthai_sdk/components/__init__.py index cd7df5ccf..3e9384a28 100644 --- a/depthai_sdk/src/depthai_sdk/components/__init__.py +++ b/depthai_sdk/src/depthai_sdk/components/__init__.py @@ -1,5 +1,6 @@ from .component import Component from .camera_component import CameraComponent +from .encoder_component import EncoderComponent from .nn_component import NNComponent from .stereo_component import StereoComponent from .imu_component import IMUComponent diff --git a/depthai_sdk/src/depthai_sdk/components/camera_component.py b/depthai_sdk/src/depthai_sdk/components/camera_component.py index 34ae1534a..23269b16f 100644 --- a/depthai_sdk/src/depthai_sdk/components/camera_component.py +++ b/depthai_sdk/src/depthai_sdk/components/camera_component.py @@ -1,11 +1,11 @@ import logging -from typing import Dict +from typing import Dict, List, Optional, Union from depthai_sdk.classes.enum import ResizeMode from depthai_sdk.components.camera_control import CameraControl from depthai_sdk.components.camera_helper import * from depthai_sdk.components.component import Component, ComponentOutput -from depthai_sdk.components.parser import parse_resolution, parse_encode, encoder_profile_to_fourcc +from depthai_sdk.components.parser import parse_resolution from depthai_sdk.oak_outputs.xout.xout_base import XoutBase, StreamXout, ReplayStream from depthai_sdk.oak_outputs.xout.xout_frames import XoutFrames from depthai_sdk.replay import Replay @@ -21,7 +21,6 @@ def __init__(self, str, dai.ColorCameraProperties.SensorResolution, dai.MonoCameraProperties.SensorResolution ]] = None, fps: Optional[float] = None, - encode: Union[None, str, bool, dai.VideoEncoderProperties.Profile] = None, sensor_type: Optional[dai.CameraSensorType] = None, rotation: Optional[int] = None, replay: Optional[Replay] = None, @@ -38,7 +37,6 @@ def __init__(self, source (str or dai.CameraBoardSocket): Source of the camera. Either color/rgb/right/left resolution (optional): Camera resolution, eg. '800p' or '4k' fps (float, optional): Camera FPS - encode: Encode streams before sending them to the host. Either True (use default), or mjpeg/h264/h265 sensor_type: To force color/mono/tof camera rotation (int, optional): Rotate the camera by 90, 180, 270 degrees replay (Replay object): Replay object to use for mocking the camera @@ -54,7 +52,6 @@ def __init__(self, self._device = device self.node: Optional[Union[dai.node.ColorCamera, dai.node.MonoCamera, dai.node.XLinkIn]] = None - self.encoder: Optional[dai.node.VideoEncoder] = None self.stream: Optional[dai.Node.Output] = None # Node output to be used as eg. an input into NN self.stream_size: Optional[Tuple[int, int]] = None # Output size @@ -64,7 +61,6 @@ def __init__(self, self._source = self._source[len('CameraBoardSocket.'):] self._socket = source - self._replay: Optional[Replay] = replay self._args: Dict = args self.name = name @@ -151,12 +147,11 @@ def __init__(self, targetWidthIsp = targetWidthRes res = getClosesResolution(sensor, sensor_type, width=targetWidthRes) self.node.setResolution(res) - scale = getClosestIspScale(self.node.getIspSize(), width=targetWidthIsp, - videoEncoder=(encode is not None)) + scale = getClosestIspScale(self.node.getIspSize(), width=targetWidthIsp) self.node.setIspScale(*scale) curr_size = self.node.getVideoSize() - closest = getClosestVideoSize(*curr_size, videoEncoder=encode) + closest = getClosestVideoSize(*curr_size) self.node.setVideoSize(*closest) self.node.setVideoNumFramesPool(2) # We will increase it later if we are streaming to host @@ -177,26 +172,6 @@ def __init__(self, self.stream = rot_manip.out self.stream_size = self.stream_size[::-1] - if encode: - self.encoder = pipeline.createVideoEncoder() - self._encoder_profile = parse_encode(encode) # MJPEG by default - self.encoder.setDefaultProfilePreset(self.get_fps(), self._encoder_profile) - - if self.is_replay(): # TODO - this might be not needed, we check for replay above and return - # Create ImageManip to convert to NV12 - type_manip = pipeline.createImageManip() - type_manip.setFrameType(dai.ImgFrame.Type.NV12) - type_manip.setMaxOutputFrameSize(self.stream_size[0] * self.stream_size[1] * 3) - - self.stream.link(type_manip.inputImage) - type_manip.out.link(self.encoder.input) - elif self.is_mono(): - self.stream.link(self.encoder.input) - elif self.is_color(): - self.node.video.link(self.encoder.input) - else: - raise ValueError('CameraComponent is neither Color, Mono, nor Replay!') - if self._args: self._config_camera_args(self._args) @@ -210,6 +185,18 @@ def __init__(self, # CameraControl message doesn't use any additional data (only metadata) self._control_xlink_in.setMaxDataSize(1) + def ensure_encoder_compatible_size(self) -> None: + if self.is_color(): + self.node.setIspScale( + *getClosestIspScale( + self.node.getIspSize(), + width=self.node.getIspWidth(), + videoEncoder=True), + ) + self.node.setVideoSize( + *getClosestVideoSize(*self.node.getVideoSize(), videoEncoder=True) + ) + def on_pipeline_started(self, device: dai.Device): if self._control_xlink_in is not None: queue = device.getInputQueue(self._control_xlink_in.getStreamName()) @@ -402,46 +389,8 @@ def set_fps(self, fps: float): else: self.node.setFps(fps) - def config_encoder_h26x(self, - rate_control_mode: Optional[dai.VideoEncoderProperties.RateControlMode] = None, - keyframe_freq: Optional[int] = None, - bitrate_kbps: Optional[int] = None, - num_b_frames: Optional[int] = None, - ): - if self.encoder is None: - raise Exception('Video encoder was not enabled!') - if self._encoder_profile == dai.VideoEncoderProperties.Profile.MJPEG: - raise Exception('Video encoder was set to MJPEG while trying to configure H26X attributes!') - - if rate_control_mode is not None: - self.encoder.setRateControlMode(rate_control_mode) - if keyframe_freq is not None: - self.encoder.setKeyframeFrequency(keyframe_freq) - if bitrate_kbps is not None: - self.encoder.setBitrateKbps(bitrate_kbps) - if num_b_frames is not None: - self.encoder.setNumBFrames(num_b_frames) - - def config_encoder_mjpeg(self, - quality: Optional[int] = None, - lossless: bool = False - ): - if self.encoder is None: - raise Exception('Video encoder was not enabled!') - if self._encoder_profile != dai.VideoEncoderProperties.Profile.MJPEG: - raise Exception( - f'Video encoder was set to {self._encoder_profile} while trying to configure MJPEG attributes!' - ) - - if quality is not None: - self.encoder.setQuality(quality) - if lossless is not None: - self.encoder.setLossless(lossless) - - def get_stream_xout(self, fourcc: Optional[str] = None) -> StreamXout: - if self.encoder is not None and fourcc is not None: - return StreamXout(self.encoder.bitstream, name=self.name or self._source + '_bitstream') - elif self.is_replay(): + def get_stream_xout(self) -> StreamXout: + if self.is_replay(): return ReplayStream(self.name or self._source) elif self.is_mono(): return StreamXout(self.stream, name=self.name or self._source + '_mono') @@ -464,32 +413,22 @@ def set_num_frames_pool(self, num_frames: int, preview_num_frames: Optional[int] if preview_num_frames is not None: self._preview_num_frames_pool = preview_num_frames - def get_fourcc(self) -> Optional[str]: - if self.encoder is None: - return None - return encoder_profile_to_fourcc(self._encoder_profile) - """ Available outputs (to the host) of this component """ class Out: class CameraOut(ComponentOutput): - def __call__(self, device: dai.Device, fourcc: Optional[str] = None) -> XoutBase: - return XoutFrames(self._comp.get_stream_xout(fourcc), fourcc).set_comp_out(self) + def __call__(self, device: dai.Device) -> XoutBase: + return XoutFrames(self._comp.get_stream_xout()).set_comp_out(self) class ReplayOut(ComponentOutput): def __call__(self, device: dai.Device) -> XoutBase: return XoutFrames(ReplayStream(self._comp._source)).set_comp_out(self) - class EncodedOut(CameraOut): - def __call__(self, device: dai.Device) -> XoutBase: - return super().__call__(device, fourcc=self._comp.get_fourcc()) - def __init__(self, camera_component: 'CameraComponent'): self.replay = self.ReplayOut(camera_component) self.camera = self.CameraOut(camera_component) - self.encoded = self.EncodedOut(camera_component) self.main = self.replay if camera_component.is_replay() else self.camera diff --git a/depthai_sdk/src/depthai_sdk/components/encoder_component.py b/depthai_sdk/src/depthai_sdk/components/encoder_component.py new file mode 100644 index 000000000..546ec2f03 --- /dev/null +++ b/depthai_sdk/src/depthai_sdk/components/encoder_component.py @@ -0,0 +1,107 @@ +from typing import Optional, Union + +import depthai as dai +from depthai_sdk.components.component import Component, ComponentOutput +from depthai_sdk.components.camera_component import CameraComponent +from depthai_sdk.components.stereo_component import StereoComponent +from depthai_sdk.components.parser import encoder_profile_to_fourcc, parse_encode +from depthai_sdk.oak_outputs.xout.xout_base import StreamXout, XoutBase +from depthai_sdk.oak_outputs.xout.xout_frames import XoutFrames + + +class EncoderComponent(Component): + def __init__( + self, + pipeline: dai.Pipeline, + input: Union[CameraComponent, StereoComponent], + codec: Union[str, dai.VideoEncoderProperties.Profile], + name: Optional[str] = None, + ) -> None: + super().__init__() + self.out = _EncoderComponentOutputs(self) + + input.ensure_encoder_compatible_size() + + self.name = name + self.pipeline = pipeline + self.manip: Optional[dai.node.ImageManip] = None + self.node = pipeline.create(dai.node.VideoEncoder) + self.node.setDefaultProfilePreset(input.get_fps(), parse_encode(codec)) + + self._get_node_out(input).link(self.node.input) + + def config_encoder_h26x( + self, + rate_control_mode: Optional[dai.VideoEncoderProperties.RateControlMode] = None, + keyframe_freq: Optional[int] = None, + bitrate_kbps: Optional[int] = None, + num_b_frames: Optional[int] = None, + ): + if self.node.getProfile() not in [ + dai.VideoEncoderProperties.Profile.H264_BASELINE, + dai.VideoEncoderProperties.Profile.H264_HIGH, + dai.VideoEncoderProperties.Profile.H264_MAIN, + dai.VideoEncoderProperties.Profile.H265_MAIN, + ]: + raise ValueError(f"Encoder profile {self.node.getProfile()} is not H.26x") + if rate_control_mode is not None: + self.node.setRateControlMode(rate_control_mode) + if keyframe_freq is not None: + self.node.setKeyframeFrequency(keyframe_freq) + if bitrate_kbps is not None: + self.node.setBitrateKbps(bitrate_kbps) + if num_b_frames is not None: + self.node.setNumBFrames(num_b_frames) + + def config_encoder_mjpeg( + self, + quality: Optional[int] = None, + lossless: bool = False, + ): + if self.node.getProfile() != dai.VideoEncoderProperties.Profile.MJPEG: + raise ValueError(f"Encoder profile {self.node.getProfile()} is not MJPEG") + if quality is not None: + self.node.setQuality(quality) + if lossless is not None: + self.node.setLossless(lossless) + + def get_stream_xout(self) -> StreamXout: + return StreamXout(self.node.bitstream, self.name) + + def get_fourcc(self) -> str: + return encoder_profile_to_fourcc(self.node.getProfile()) + + def _get_node_out( + self, component: Union[CameraComponent, StereoComponent] + ) -> dai.Node.Output: + if isinstance(component, CameraComponent): + if isinstance(component.node, dai.node.ColorCamera): + return component.node.video + elif isinstance(component.node, dai.node.MonoCamera): + return component.node.out + elif isinstance(component.node, dai.node.XLinkIn): + # Ensure input is in NV12 format when using XLinkIn + self.manip = self.pipeline.create(dai.node.ImageManip) + self.manip.setFrameType(dai.ImgFrame.Type.NV12) + width, height = component.stream_size + self.manip.setMaxOutputFrameSize(width * height * 3 // 2) + component.node.out.link(self.manip.inputImage) + return self.manip.out + raise ValueError(f"Unknown camera node: {component.node}") + elif isinstance(component, StereoComponent): + if component.colormap_manip: + return component.colormap_manip.out + return component.node.disparity + raise ValueError(f"Unknown component: {component}") + + +class _EncoderComponentMainOutput(ComponentOutput): + def __call__(self, device: dai.Device) -> XoutBase: + return XoutFrames( + self._comp.get_stream_xout(), self._comp.get_fourcc() + ).set_comp_out(self) + + +class _EncoderComponentOutputs: + def __init__(self, component: EncoderComponent) -> None: + self.main = _EncoderComponentMainOutput(component) diff --git a/depthai_sdk/src/depthai_sdk/components/nn_component.py b/depthai_sdk/src/depthai_sdk/components/nn_component.py index 6449f9841..25e2acb8f 100644 --- a/depthai_sdk/src/depthai_sdk/components/nn_component.py +++ b/depthai_sdk/src/depthai_sdk/components/nn_component.py @@ -738,15 +738,6 @@ def __call__(self, device: dai.Device) -> XoutTracker: calculate_speed=self._comp.calculate_speed, ).set_comp_out(self) - class EncodedOut(MainOut): - def __call__(self, device: dai.Device) -> XoutNnResults: - """ - Streams NN results and encoded frames (frames used for inferencing) - Produces DetectionPacket or TwoStagePacket (if it's 2. stage NNComponent). - """ - # A bit hacky, maybe we can remove this alltogether - return super().__call__(device, fourcc=self._comp._get_camera_comp().get_fourcc()) - class NnDataOut(ComponentOutput): def __call__(self, device: dai.Device) -> XoutNnData: node_output = self._comp.node.out if \ @@ -763,7 +754,6 @@ def __init__(self, nn_component: 'NNComponent'): self.spatials = self.SpatialOut(nn_component) self.twostage_crops = self.TwoStageOut(nn_component) self.tracker = self.TrackerOut(nn_component) - self.encoded = self.EncodedOut(nn_component) self.nn_data = self.NnDataOut(nn_component) # Checks diff --git a/depthai_sdk/src/depthai_sdk/components/stereo_component.py b/depthai_sdk/src/depthai_sdk/components/stereo_component.py index 2b9d7de72..28477166e 100644 --- a/depthai_sdk/src/depthai_sdk/components/stereo_component.py +++ b/depthai_sdk/src/depthai_sdk/components/stereo_component.py @@ -9,7 +9,7 @@ from depthai_sdk.components.camera_component import CameraComponent, ComponentOutput from depthai_sdk.components.component import Component -from depthai_sdk.components.parser import parse_median_filter, parse_encode, encoder_profile_to_fourcc +from depthai_sdk.components.parser import parse_median_filter from depthai_sdk.components.stereo_control import StereoControl from depthai_sdk.components.undistort import _get_mesh from depthai_sdk.oak_outputs.xout.xout_base import XoutBase, StreamXout @@ -46,8 +46,7 @@ def __init__(self, left: Union[CameraComponent, dai.node.MonoCamera], # Left stereo camera right: Union[CameraComponent, dai.node.MonoCamera], # Right stereo camera replay: Optional[Replay] = None, - args: Any = None, - encode: Union[None, str, bool, dai.VideoEncoderProperties.Profile] = None): + args: Any = None): """ Args: device (dai.Device): DepthAI device. @@ -56,7 +55,6 @@ def __init__(self, right (dai.None.Output / CameraComponent): Right mono camera source. Will get handled by Camera object. replay (Replay object, optional): Replay object to use for playback. args (Any, optional): Use user defined arguments when constructing the pipeline. - encode (str/bool/Profile, optional): Encode the output stream. """ super().__init__() self.out = self.Out(self) @@ -78,6 +76,7 @@ def __init__(self, self.node: dai.node.StereoDepth = pipeline.createStereoDepth() self.node.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY) + self.colormap_manip: Optional[dai.node.ImageManip] = None self._align_component: Optional[CameraComponent] = None self.ir_settings = { @@ -85,13 +84,6 @@ def __init__(self, 'continuous_mode': False, } - # Encoder - self.encoder = None - if encode: - self.encoder = pipeline.createVideoEncoder() - # MJPEG by default - self._encoderProfile = parse_encode(encode) - # Postprocessing options self._colorize = None self._postprocess_colormap = None @@ -167,15 +159,6 @@ def __init__(self, self._left_stream.link(self.node.left) self._right_stream.link(self.node.right) - if self.encoder: - try: - fps = self.left.get_fps() # CameraComponent - except AttributeError: - fps = self.left.getFps() # MonoCamera - - self.encoder.setDefaultProfilePreset(fps, self._encoderProfile) - self.node.disparity.link(self.encoder.input) - self.node.setRectifyEdgeFillColor(0) if self._undistortion_offset is not None: @@ -198,6 +181,12 @@ def __init__(self, # CameraControl message doesn't use any additional data (only metadata) self._control_xlink_in.setMaxDataSize(1) + def ensure_encoder_compatible_size(self) -> None: + if isinstance(self.left, Component): + self.left.ensure_encoder_compatible_size() + if isinstance(self.right, Component): + self.right.ensure_encoder_compatible_size() + def on_pipeline_started(self, device: dai.Device): if self._control_xlink_in is not None: queue = device.getInputQueue(self._control_xlink_in.getStreamName()) @@ -323,32 +312,29 @@ def config_wls(self, def set_colormap(self, colormap: dai.Colormap): """ Sets the colormap to use for colorizing the disparity map. Used for on-device postprocessing. - Works only with `encoded` output. + Works only with encoded output. Note: This setting can affect the performance. Args: colormap: Colormap to use for colorizing the disparity map. """ - if self.colormap != colormap and self.encoder: - colormap_manip = self.node.getParentPipeline().create(dai.node.ImageManip) - colormap_manip.initialConfig.setColormap(colormap, self.node.initialConfig.getMaxDisparity()) - colormap_manip.initialConfig.setFrameType(dai.ImgFrame.Type.NV12) + if self.colormap != colormap: + if self.colormap_manip is not None: + self.node.disparity.unlink(self.colormap_manip.inputImage) + + self.colormap_manip = self.node.getParentPipeline().create(dai.node.ImageManip) + self.colormap_manip.initialConfig.setColormap(colormap, self.node.initialConfig.getMaxDisparity()) + self.colormap_manip.initialConfig.setFrameType(dai.ImgFrame.Type.NV12) if self._align_component: - h, w = self._align_component.node.getIspSize() \ - if isinstance(self._align_component.node, dai.node.ColorCamera) \ - else self._align_component.node.getResolutionSize() + if isinstance(self._align_component.node, dai.node.ColorCamera): + h, w = self._align_component.node.getIspSize() + else: + h, w = self._align_component.node.getResolutionSize() else: h, w = self.left.stream_size - colormap_manip.setMaxOutputFrameSize(h * w * 3) - self.node.disparity.link(colormap_manip.inputImage) - - if self.encoder: - self.node.disparity.unlink(self.encoder.input) - colormap_manip.out.link(self.encoder.input) - elif not self.encoder: - warnings.warn('At the moment, colormap can be used only if encoder is enabled.') - - self.colormap = colormap + self.colormap_manip.setMaxOutputFrameSize(h * w * 3) + self.node.disparity.link(self.colormap_manip.inputImage) + self.colormap = colormap def set_auto_ir(self, auto_mode: bool, continuous_mode: bool = False) -> None: """ @@ -420,10 +406,11 @@ def _get_maps(self, width: int, height: int, calib: dai.CalibrationHandler): mapX_r, mapY_r = cv2.initUndistortRectifyMap(M2, d2, R2, M2, image_size, cv2.CV_32FC1) return mapX_l, mapY_l, mapX_r, mapY_r - def get_fourcc(self) -> Optional[str]: - if self.encoder is None: - return None - return encoder_profile_to_fourcc(self._encoderProfile) + def get_fps(self) -> float: + try: + return self.left.get_fps() # CameraComponent + except AttributeError: + return self.left.getFps() # MonoCamera """ Available outputs (to the host) of this component @@ -452,11 +439,10 @@ def __call__(self, device: dai.Device) -> XoutBase: ).set_comp_out(self) class DisparityOut(ComponentOutput): - def __call__(self, device: dai.Device, fourcc: Optional[str] = None) -> XoutBase: + def __call__(self, device: dai.Device) -> XoutBase: return XoutDisparity( device=device, - frames=StreamXout(self._comp.encoder.bitstream) if fourcc else - StreamXout(self._comp.disparity), + frames=StreamXout(self._comp.disparity), disp_factor=255.0 / self._comp.node.getMaxDisparity(), mono_frames=self._comp._mono_frames(), colorize=self._comp._colorize, @@ -473,15 +459,6 @@ class RectifiedRightOut(ComponentOutput): def __call__(self, device: dai.Device) -> XoutBase: return XoutFrames(StreamXout(self._comp.node.rectifiedRight, 'Rectified right')).set_comp_out(self) - class EncodedOut(DisparityOut): - def __call__(self, device: dai.Device) -> XoutBase: - if not self._comp.encoder: - raise RuntimeError('Encoder not enabled, cannot output encoded frames') - if self._comp.wls_config['enabled']: - warnings.warn('WLS filter is enabled, but cannot be applied to encoded frames.') - - return super().__call__(device, fourcc=self._comp.get_fourcc()) - def __init__(self, stereo_component: 'StereoComponent'): self._comp = stereo_component @@ -489,5 +466,4 @@ def __init__(self, stereo_component: 'StereoComponent'): self.rectified_left = self.RectifiedLeftOut(stereo_component) self.rectified_right = self.RectifiedRightOut(stereo_component) self.disparity = self.DisparityOut(stereo_component) - self.encoded = self.EncodedOut(stereo_component) self.main = self.depth diff --git a/depthai_sdk/src/depthai_sdk/oak_camera.py b/depthai_sdk/src/depthai_sdk/oak_camera.py index 9d15f1874..72c7671b0 100644 --- a/depthai_sdk/src/depthai_sdk/oak_camera.py +++ b/depthai_sdk/src/depthai_sdk/oak_camera.py @@ -29,6 +29,7 @@ # RecordConfig, OutputConfig, SyncConfig, RosStreamConfig, TriggerActionConfig from depthai_sdk.components.camera_component import CameraComponent from depthai_sdk.components.component import Component, ComponentOutput +from depthai_sdk.components.encoder_component import EncoderComponent from depthai_sdk.components.imu_component import IMUComponent from depthai_sdk.components.tof_component import ToFComponent from depthai_sdk.components.nn_component import NNComponent @@ -131,13 +132,39 @@ def __init__(self, logging.info(f'Available streams from recording: {self.replay.getStreams()}') self._calibration = self._init_calibration() + def _init_device(self, + config: dai.Device.Config, + device_str: Optional[str] = None, + ) -> None: + + """ + Connect to the OAK camera + """ + self.device = None + if device_str is not None: + device_info = dai.DeviceInfo(device_str) + else: + (found, device_info) = dai.Device.getFirstAvailableDevice() + if not found: + raise Exception("No OAK device found to connect to!") + + self.device = dai.Device( + config=config, + deviceInfo=device_info, + ) + + # TODO test with usb3 (SUPER speed) + if config.board.usb.maxSpeed != dai.UsbSpeed.HIGH and self.device.getUsbSpeed() == dai.UsbSpeed.HIGH: + warnings.warn("Device connected in USB2 mode! This might cause some issues. " + "In such case, please try using a (different) USB3 cable, " + "or force USB2 mode 'with OakCamera(usb_speed='usb2') as oak:'", UsbWarning) + def camera(self, source: Union[str, dai.CameraBoardSocket], resolution: Optional[Union[ str, dai.ColorCameraProperties.SensorResolution, dai.MonoCameraProperties.SensorResolution ]] = None, fps: Optional[float] = None, - encode: Union[None, str, bool, dai.VideoEncoderProperties.Profile] = None, ) -> CameraComponent: """ Creates Camera component. This abstracts ColorCamera/MonoCamera nodes and supports mocking the camera when @@ -186,7 +213,6 @@ def camera(self, source=source, resolution=resolution, fps=fps, - encode=encode, sensor_type=sensor_type, rotation=self._rotation, replay=self.replay, @@ -194,38 +220,10 @@ def camera(self, self._components.append(comp) return comp - def _init_device(self, - config: dai.Device.Config, - device_str: Optional[str] = None, - ) -> None: - - """ - Connect to the OAK camera - """ - self.device = None - if device_str is not None: - device_info = dai.DeviceInfo(device_str) - else: - (found, device_info) = dai.Device.getFirstAvailableDevice() - if not found: - raise Exception("No OAK device found to connect to!") - - self.device = dai.Device( - config=config, - deviceInfo=device_info, - ) - - # TODO test with usb3 (SUPER speed) - if config.board.usb.maxSpeed != dai.UsbSpeed.HIGH and self.device.getUsbSpeed() == dai.UsbSpeed.HIGH: - warnings.warn("Device connected in USB2 mode! This might cause some issues. " - "In such case, please try using a (different) USB3 cable, " - "or force USB2 mode 'with OakCamera(usb_speed='usb2') as oak:'", UsbWarning) - def create_camera(self, source: Union[str, dai.CameraBoardSocket], resolution: Optional[Resolution] = None, fps: Optional[float] = None, - encode: Union[None, str, bool, dai.VideoEncoderProperties.Profile] = None, ) -> CameraComponent: """ Deprecated, use camera() instead. @@ -240,14 +238,13 @@ def create_camera(self, fps (float): Sensor FPS encode (bool/str/Profile): Whether we want to enable video encoding (accessible via cameraComponent.out_encoded). If True, it will use MJPEG """ - return self.camera(source, resolution, fps, encode) + return self.camera(source, resolution, fps) def all_cameras(self, resolution: Optional[Union[ str, dai.ColorCameraProperties.SensorResolution, dai.MonoCameraProperties.SensorResolution ]] = None, fps: Optional[float] = None, - encode: Union[None, str, bool, dai.VideoEncoderProperties.Profile] = None, ) -> List[CameraComponent]: """ Creates Camera component for each camera sensors on the OAK camera. @@ -255,27 +252,22 @@ def all_cameras(self, Args: resolution (str/SensorResolution): Sensor resolution of the camera. fps (float): Sensor FPS - encode (bool/str/Profile): Whether we want to enable video encoding (accessible via cameraComponent.out_encoded). If True, it will use MJPEG """ components: List[CameraComponent] = [] # Loop over all available camera sensors if self.replay: sources = self.replay.getStreams() # TODO handle in case the stream is not from a camera else: - sources = [cam_sensor.socket for cam_sensor in self.device.getConnectedCameraFeatures()] + sources = self.device.getConnectedCameras() for source in sources: - comp = CameraComponent(self.device, - self.pipeline, - source=source, - resolution=resolution, - fps=fps, - encode=encode, - rotation=self._rotation, - replay=self.replay, - args=self._args) - components.append(comp) - - self._components.extend(components) + components.append( + self.camera( + source=source, + resolution=resolution, + fps=fps, + ) + ) + return components def create_all_cameras(self, @@ -284,7 +276,6 @@ def create_all_cameras(self, dai.MonoCameraProperties.SensorResolution ]] = None, fps: Optional[float] = None, - encode: Union[None, str, bool, dai.VideoEncoderProperties.Profile] = None, ) -> List[CameraComponent]: """ Deprecated, use all_cameras() instead. @@ -296,7 +287,7 @@ def create_all_cameras(self, fps (float): Sensor FPS encode (bool/str/Profile): Whether we want to enable video encoding (accessible via cameraComponent.out_encoded). If True, it will use MJPEG """ - return self.all_cameras(resolution, fps, encode) + return self.all_cameras(resolution, fps) def create_nn(self, model: Union[str, Dict, Path], @@ -338,7 +329,6 @@ def stereo(self, fps: Optional[float] = None, left: Union[None, dai.Node.Output, CameraComponent] = None, # Left mono camera right: Union[None, dai.Node.Output, CameraComponent] = None, # Right mono camera - encode: Union[None, str, bool, dai.VideoEncoderProperties.Profile] = None ) -> StereoComponent: """ Create Stereo camera component. If left/right cameras/component aren't specified they will get created internally. @@ -348,7 +338,6 @@ def stereo(self, fps (float): If monochrome cameras aren't already passed, create them and set specified FPS left (CameraComponent/dai.node.MonoCamera): Pass the camera object (component/node) that will be used for stereo camera. right (CameraComponent/dai.node.MonoCamera): Pass the camera object (component/node) that will be used for stereo camera. - encode (bool/str/Profile): Whether we want to enable video encoding (accessible via StereoComponent.out.encoded). If True, it will use h264 codec. """ if left is None: left = self.camera(source="left", resolution=resolution, fps=fps) @@ -363,8 +352,7 @@ def stereo(self, left=left, right=right, replay=self.replay, - args=self._args, - encode=encode) + args=self._args) self._components.append(comp) return comp @@ -373,8 +361,7 @@ def create_stereo(self, fps: Optional[float] = None, left: Union[None, dai.Node.Output, CameraComponent] = None, # Left mono camera right: Union[None, dai.Node.Output, CameraComponent] = None, # Right mono camera - name: Optional[str] = None, - encode: Union[None, str, bool, dai.VideoEncoderProperties.Profile] = None + name: Optional[str] = None ) -> StereoComponent: """ Deprecated, use stereo() instead. @@ -386,9 +373,24 @@ def create_stereo(self, fps (float): If monochrome cameras aren't already passed, create them and set specified FPS left (CameraComponent/dai.node.MonoCamera): Pass the camera object (component/node) that will be used for stereo camera. right (CameraComponent/dai.node.MonoCamera): Pass the camera object (component/node) that will be used for stereo camera. - encode (bool/str/Profile): Whether we want to enable video encoding (accessible via StereoComponent.out.encoded). If True, it will use h264 codec. """ - return self.stereo(resolution, fps, left, right, encode) + return self.stereo(resolution, fps, left, right) + + def create_encoder( + self, + input: Union[CameraComponent, StereoComponent], + codec: Union[str, dai.VideoEncoderProperties.Profile], + ) -> EncoderComponent: + """ + Create Encoder component. + + Args: + input (CameraComponent/StereoComponent): Input to the encoder + codec (str/Profile): Codec to use for encoding + """ + comp = EncoderComponent(self.pipeline, input, codec) + self._components.append(comp) + return comp def create_tof(self, source: Union[str, dai.CameraBoardSocket, None] = None) -> ToFComponent: """