diff --git a/Dockerfile b/Dockerfile index 6db81e46b0..d1dbd07552 100644 --- a/Dockerfile +++ b/Dockerfile @@ -98,7 +98,9 @@ COPY labelmap.txt . COPY --from=ov-converter /models/public/ssdlite_mobilenet_v2/FP16 openvino-model RUN wget -q https://github.com/openvinotoolkit/open_model_zoo/raw/master/data/dataset_classes/coco_91cl_bkgr.txt -O openvino-model/coco_91cl_bkgr.txt && \ sed -i 's/truck/car/g' openvino-model/coco_91cl_bkgr.txt - +# Get Audio Model and labels +RUN wget -qO cpu_audio_model.tflite https://tfhub.dev/google/lite-model/yamnet/classification/tflite/1?lite-format=tflite +COPY audio-labelmap.txt . FROM wget AS s6-overlay diff --git a/audio-labelmap.txt b/audio-labelmap.txt new file mode 100644 index 0000000000..4a38b5f639 --- /dev/null +++ b/audio-labelmap.txt @@ -0,0 +1,521 @@ +speech +speech +speech +speech +babbling +speech +yell +bellow +whoop +yell +yell +yell +whispering +laughter +laughter +laughter +snicker +laughter +laughter +crying +crying +crying +yell +sigh +singing +choir +sodeling +chant +mantra +child_singing +synthetic_singing +rapping +humming +groan +grunt +whistling +breathing +wheeze +snoring +gasp +pant +snort +cough +throat_clearing +sneeze +sniff +run +shuffle +footsteps +chewing +biting +gargling +stomach_rumble +burping +hiccup +fart +hands +finger_snapping +clapping +heartbeat +heart_murmur +cheering +applause +chatter +crowd +speech +children_playing +animal +pets +dog +bark +yip +howl +bow-wow +growling +whimper_dog +cat +purr +meow +hiss +caterwaul +livestock +horse +clip-clop +neigh +cattle +moo +cowbell +pig +oink +goat +bleat +sheep +fowl +chicken +cluck +cock-a-doodle-doo +turkey +gobble +duck +quack +goose +honk +wild_animals +roaring_cats +roar +bird +chird +chirp +squawk +pigeon +coo +crow +caw +owl +hoot +flapping_wings +dogs +rats +mouse +patter +insect +cricket +mosquito +fly +buzz +buzz +frog +croak +snake +rattle +whale_vocalization +music +musical_instrument +plucked_string_instrument +guitar +electric_guitar +bass_guitar +acoustic_guitar +steel_guitar +tapping +strum +banjo +sitar +mandolin +zither +ukulele +keyboard +piano +electric_piano +organ +electronic_organ +hammond_organ +synthesizer +sampler +harpsichord +percussion +drum_kit +drum_machine +drum +snare_drum +rimshot +drum_roll +bass_drum +timpani +tabla +cymbal +hi-hat +wood_block +tambourine +rattle +maraca +gong +tubular_bells +mallet_percussion +marimba +glockenspiel +vibraphone +steelpan +orchestra +brass_instrument +french_horn +trumpet +trombone +bowed_string_instrument +string_section +violin +pizzicato +cello +double_bass +wind_instrument +flute +saxophone +clarinet +harp +bell +church_bell +jingle_bell +bicycle_bell +tuning_fork +chime +wind_chime +change_ringing +harmonica +accordion +bagpipes +didgeridoo +shofar +theremin +singing_bowl +scratching +pop_music +hip_hop_music +beatboxing +rock_music +heavy_metal +punk_rock +grunge +progressive_rock +rock_and_roll +psychedelic_rock +rhythm_and_blues +soul_music +reggae +country +swing_music +bluegrass +funk +folk_music +middle_eastern_music +jazz +disco +classical_music +opera +electronic_music +house_music +techno +dubstep +drum_and_bass +electronica +electronic_dance_music +ambient_music +trance_music +music_of_latin_america +salsa_music +flamenco +blues +music_for_children +new-age_music +vocal_music +a_capella +music_of_africa +afrobeat +christian_music +gospel_music +music_of_asia +carnatic_music +music_of_bollywood +ska +traditional_music +independent_music +song +background_music +theme_music +jingle +soundtrack_music +lullaby +video_game_music +christmas_music +dance_music +wedding_music +happy_music +sad_music +tender_music +exciting_music +angry_music +scary_music +wind +rustling_leaves +wind_noise +thunderstorm +thunder +water +rain +raindrop +rain_on_surface +stream +waterfall +ocean +waves +steam +gurgling +fire +crackle +vehicle +boat +sailboat +rowboat +motorboat +ship +motor_vehicle +car +honk +toot +car_alarm +power_windows +skidding +tire_squeal +car_passing_by +race_car +truck +air_brake +air_horn +reversing_beeps +ice_cream_truck +bus +emergency_vehicle +police_car +ambulance +fire_engine +motorcycle +traffic_noise +rail_transport +train +train_whistle +train_horn +railroad_car +train_wheels_squealing +subway +aircraft +aircraft_engine +jet_engine +propeller +helicopter +fixed-wing_aircraft +bicycle +skateboard +engine +light_engine +dental_drill's_drill +lawn_mower +chainsaw +medium_engine +heavy_engine +engine_knocking +engine_starting +idling +accelerating +door +doorbell +ding-dong +sliding_door +slam +knock +tap +squeak +cupboard_open_or_close +drawer_open_or_close +dishes +cutlery +chopping +frying +microwave_oven +blender +water_tap +sink +bathtub +hair_dryer +toilet_flush +toothbrush +electric_toothbrush +vacuum_cleaner +zipper +keys_jangling +coin +scissors +electric_shaver +shuffling_cards +typing +typewriter +computer_keyboard +writing +alarm +telephone +telephone_bell_ringing +ringtone +telephone_dialing +dial_tone +busy_signal +alarm_clock +siren +civil_defense_siren +buzzer +smoke_detector +fire_alarm +foghorn +whistle +steam_whistle +mechanisms +ratchet +clock +tick +tick-tock +gears +pulleys +sewing_machine +mechanical_fan +air_conditioning +cash_register +printer +camera +single-lens_reflex_camera +tools +hammer +jackhammer +sawing +filing +sanding +power_tool +drill +explosion +gunshot +machine_gun +fusillade +artillery_fire +cap_gun +fireworks +firecracker +burst +eruption +boom +wood +chop +splinter +crack +glass +chink +shatter +liquid +splash +slosh +squish +drip +pour +trickle +gush +fill +spray +pump +stir +boiling +sonar +arrow +whoosh +thump +thunk +electronic_tuner +effects_unit +chorus_effect +basketball_bounce +bang +slap +whack +smash +breaking +bouncing +whip +flap +scratch +scrape +rub +roll +crushing +crumpling +tearing +beep +ping +ding +clang +squeal +creak +rustle +whir +clatter +sizzle +clicking +clickety-clack +rumble +plop +jingle +hum +zing +boing +crunch +silence +sine_wave +harmonic +chirp_tone +sound_effect +pulse +inside +inside +inside +outside +outside +reverberation +echo +noise +environmental_noise +static +mains_hum +distortion +sidetone +cacophony +white_noise +pink_noise +throbbing +vibration +television +radio +field_recording diff --git a/docs/docs/configuration/audio_detectors.md b/docs/docs/configuration/audio_detectors.md new file mode 100644 index 0000000000..ef1d8227cf --- /dev/null +++ b/docs/docs/configuration/audio_detectors.md @@ -0,0 +1,63 @@ +--- +id: audio_detectors +title: Audio Detectors +--- + +Frigate provides a builtin audio detector which runs on the CPU. Compared to object detection in images, audio detection is a relatively lightweight operation so the only option is to run the detection on a CPU. + +## Configuration + +Audio events work by detecting a type of audio and creating an event, the event will end once the type of audio has not been heard for the configured amount of time. Audio events save a snapshot at the beginning of the event as well as recordings throughout the event. The recordings are retained using the configured recording retention. + +### Enabling Audio Events + +Audio events can be enabled for all cameras or only for specific cameras. + +```yaml + +audio: # <- enable audio events for all camera + enabled: True + +cameras: + front_camera: + ffmpeg: + ... + audio: + enabled: True # <- enable audio events for the front_camera +``` + +If you are using multiple streams then you must set the `audio` role on the stream that is going to be used for audio detection, this can be any stream but the stream must have audio included. + +:::note + +The ffmpeg process for capturing audio will be a separate connection to the camera along with the other roles assigned to the camera, for this reason it is recommended that the go2rtc restream is used for this purpose. See [the restream docs](/configuration/restream.md) for more information. + +::: + +```yaml +cameras: + front_camera: + ffmpeg: + inputs: + - path: rtsp://.../main_stream + roles: + - record + - path: rtsp://.../sub_stream # <- this stream must have audio enabled + roles: + - audio + - detect +``` + +### Configuring Audio Events + +The included audio model has over 500 different types of audio that can be detected, many of which are not practical. By default `bark`, `speech`, `yell`, and `scream` are enabled but these can be customized. + +```yaml +audio: + enabled: True + listen: + - bark + - scream + - speech + - yell +``` diff --git a/docs/docs/configuration/index.md b/docs/docs/configuration/index.md index 7279a76520..8915db6b3a 100644 --- a/docs/docs/configuration/index.md +++ b/docs/docs/configuration/index.md @@ -138,6 +138,20 @@ model: labelmap: 2: vehicle +# Optional: Audio Events Configuration +# NOTE: Can be overridden at the camera level +audio: + # Optional: Enable audio events (default: shown below) + enabled: False + # Optional: Configure the amount of seconds without detected audio to end the event (default: shown below) + max_not_heard: 30 + # Optional: Types of audio to listen for (default: shown below) + listen: + - bark + - scream + - speech + - yell + # Optional: logger verbosity settings logger: # Optional: Default log verbosity (default: shown below) diff --git a/docs/docs/configuration/detectors.md b/docs/docs/configuration/object_detectors.md similarity index 99% rename from docs/docs/configuration/detectors.md rename to docs/docs/configuration/object_detectors.md index 84507321c3..3f48423bcf 100644 --- a/docs/docs/configuration/detectors.md +++ b/docs/docs/configuration/object_detectors.md @@ -1,6 +1,6 @@ --- -id: detectors -title: Detectors +id: object_detectors +title: Object Detectors --- Frigate provides the following builtin detector types: `cpu`, `edgetpu`, `openvino`, and `tensorrt`. By default, Frigate will use a single CPU detector. Other detectors may require additional configuration as described below. When using multiple detectors they will run in dedicated processes, but pull from a common queue of detection requests from across all cameras. @@ -275,6 +275,6 @@ detectors: api_timeout: 0.1 # seconds ``` -Replace `` and `` with the IP address and port of your CodeProject.AI server. +Replace `` and `` with the IP address and port of your CodeProject.AI server. To verify that the integration is working correctly, start Frigate and observe the logs for any error messages related to CodeProject.AI. Additionally, you can check the Frigate web interface to see if the objects detected by CodeProject.AI are being displayed and tracked properly. \ No newline at end of file diff --git a/docs/docs/configuration/restream.md b/docs/docs/configuration/restream.md index 2d5c565b2e..61393a91c5 100644 --- a/docs/docs/configuration/restream.md +++ b/docs/docs/configuration/restream.md @@ -67,6 +67,7 @@ cameras: roles: - record - detect + - audio # <- only necessary if audio detection is enabled http_cam: ffmpeg: output_args: @@ -77,6 +78,7 @@ cameras: roles: - record - detect + - audio # <- only necessary if audio detection is enabled ``` ### With Sub Stream @@ -112,6 +114,7 @@ cameras: - path: rtsp://127.0.0.1:8554/rtsp_cam_sub # <--- the name here must match the name of the camera_sub in restream input_args: preset-rtsp-restream roles: + - audio # <- only necessary if audio detection is enabled - detect http_cam: ffmpeg: @@ -125,6 +128,7 @@ cameras: - path: rtsp://127.0.0.1:8554/http_cam_sub # <--- the name here must match the name of the camera_sub in restream input_args: preset-rtsp-restream roles: + - audio # <- only necessary if audio detection is enabled - detect ``` diff --git a/docs/docs/frigate/hardware.md b/docs/docs/frigate/hardware.md index e5233e218c..36233ea686 100644 --- a/docs/docs/frigate/hardware.md +++ b/docs/docs/frigate/hardware.md @@ -50,7 +50,7 @@ The OpenVINO detector type is able to run on: - 6th Gen Intel Platforms and newer that have an iGPU - x86 & Arm64 hosts with VPU Hardware (ex: Intel NCS2) -More information is available [in the detector docs](/configuration/detectors#openvino-detector) +More information is available [in the detector docs](/configuration/object_detectors#openvino-detector) Inference speeds vary greatly depending on the CPU, GPU, or VPU used, some known examples are below: @@ -72,7 +72,7 @@ Inference speeds vary greatly depending on the CPU, GPU, or VPU used, some known ### TensorRT -The TensortRT detector is able to run on x86 hosts that have an Nvidia GPU which supports the 11.x series of CUDA libraries. The minimum driver version on the host system must be `>=450.80.02`. Also the GPU must support a Compute Capability of `5.0` or greater. This generally correlates to a Maxwell-era GPU or newer, check the [TensorRT docs for more info](/configuration/detectors#nvidia-tensorrt-detector). +The TensortRT detector is able to run on x86 hosts that have an Nvidia GPU which supports the 11.x series of CUDA libraries. The minimum driver version on the host system must be `>=450.80.02`. Also the GPU must support a Compute Capability of `5.0` or greater. This generally correlates to a Maxwell-era GPU or newer, check the [TensorRT docs for more info](/configuration/object_detectors#nvidia-tensorrt-detector). Inference speeds will vary greatly depending on the GPU and the model used. `tiny` variants are faster than the equivalent non-tiny model, some known examples are below: diff --git a/docs/docs/guides/getting_started.md b/docs/docs/guides/getting_started.md index adbddb9c2c..cb67c59b43 100644 --- a/docs/docs/guides/getting_started.md +++ b/docs/docs/guides/getting_started.md @@ -71,7 +71,7 @@ cameras: ... ``` -More details on available detectors can be found [here](../configuration/detectors.md). +More details on available detectors can be found [here](../configuration/object_detectors.md). Restart Frigate and you should start seeing detections for `person`. If you want to track other objects, they will need to be added according to the [configuration file reference](../configuration/index.md#full-configuration-reference). diff --git a/docs/sidebars.js b/docs/sidebars.js index 41628f2edc..35ce2bee3b 100644 --- a/docs/sidebars.js +++ b/docs/sidebars.js @@ -16,7 +16,8 @@ module.exports = { ], Configuration: [ "configuration/index", - "configuration/detectors", + "configuration/object_detectors", + "configuration/audio_detectors", "configuration/cameras", "configuration/masks", "configuration/record", diff --git a/frigate/app.py b/frigate/app.py index 0b476cd432..ccfbd46961 100644 --- a/frigate/app.py +++ b/frigate/app.py @@ -29,6 +29,7 @@ MODEL_CACHE_DIR, RECORD_DIR, ) +from frigate.events.audio import listen_to_audio from frigate.events.cleanup import EventCleanup from frigate.events.external import ExternalEventProcessor from frigate.events.maintainer import EventProcessor @@ -44,7 +45,7 @@ from frigate.stats import StatsEmitter, stats_init from frigate.storage import StorageMaintainer from frigate.timeline import TimelineProcessor -from frigate.types import CameraMetricsTypes, RecordMetricsTypes +from frigate.types import CameraMetricsTypes, FeatureMetricsTypes from frigate.version import VERSION from frigate.video import capture_camera, track_camera from frigate.watchdog import FrigateWatchdog @@ -62,7 +63,7 @@ def __init__(self) -> None: self.log_queue: Queue = mp.Queue() self.plus_api = PlusApi() self.camera_metrics: dict[str, CameraMetricsTypes] = {} - self.record_metrics: dict[str, RecordMetricsTypes] = {} + self.feature_metrics: dict[str, FeatureMetricsTypes] = {} self.processes: dict[str, int] = {} def set_environment_vars(self) -> None: @@ -104,7 +105,7 @@ def init_config(self) -> None: user_config = FrigateConfig.parse_file(config_file) self.config = user_config.runtime_config(self.plus_api) - for camera_name in self.config.cameras.keys(): + for camera_name, camera_config in self.config.cameras.items(): # create camera_metrics self.camera_metrics[camera_name] = { "camera_fps": mp.Value("d", 0.0), # type: ignore[typeddict-item] @@ -159,13 +160,19 @@ def init_config(self) -> None: "capture_process": None, "process": None, } - self.record_metrics[camera_name] = { + self.feature_metrics[camera_name] = { + "audio_enabled": mp.Value( # type: ignore[typeddict-item] + # issue https://github.com/python/typeshed/issues/8799 + # from mypy 0.981 onwards + "i", + self.config.cameras[camera_name].audio.enabled, + ), "record_enabled": mp.Value( # type: ignore[typeddict-item] # issue https://github.com/python/typeshed/issues/8799 # from mypy 0.981 onwards "i", self.config.cameras[camera_name].record.enabled, - ) + ), } def set_log_levels(self) -> None: @@ -253,7 +260,7 @@ def init_recording_manager(self) -> None: recording_process = mp.Process( target=manage_recordings, name="recording_manager", - args=(self.config, self.recordings_info_queue, self.record_metrics), + args=(self.config, self.recordings_info_queue, self.feature_metrics), ) recording_process.daemon = True self.recording_process = recording_process @@ -312,7 +319,7 @@ def init_dispatcher(self) -> None: self.config, self.onvif_controller, self.camera_metrics, - self.record_metrics, + self.feature_metrics, comms, ) @@ -421,6 +428,17 @@ def start_camera_capture_processes(self) -> None: capture_process.start() logger.info(f"Capture process started for {name}: {capture_process.pid}") + def start_audio_processors(self) -> None: + if len([c for c in self.config.cameras.values() if c.audio.enabled]) > 0: + audio_process = mp.Process( + target=listen_to_audio, + name="audio_capture", + args=(self.config, self.feature_metrics), + ) + audio_process.daemon = True + audio_process.start() + logger.info(f"Audio process started: {audio_process.pid}") + def start_timeline_processor(self) -> None: self.timeline_processor = TimelineProcessor( self.config, self.timeline_queue, self.stop_event @@ -517,6 +535,7 @@ def start(self) -> None: self.start_detected_frames_processor() self.start_camera_processors() self.start_camera_capture_processes() + self.start_audio_processors() self.start_storage_maintainer() self.init_stats() self.init_external_event_processor() diff --git a/frigate/comms/dispatcher.py b/frigate/comms/dispatcher.py index b7e9e88586..1c9105ce87 100644 --- a/frigate/comms/dispatcher.py +++ b/frigate/comms/dispatcher.py @@ -6,7 +6,7 @@ from frigate.config import FrigateConfig from frigate.ptz import OnvifCommandEnum, OnvifController -from frigate.types import CameraMetricsTypes, RecordMetricsTypes +from frigate.types import CameraMetricsTypes, FeatureMetricsTypes from frigate.util import restart_frigate logger = logging.getLogger(__name__) @@ -39,19 +39,20 @@ def __init__( config: FrigateConfig, onvif: OnvifController, camera_metrics: dict[str, CameraMetricsTypes], - record_metrics: dict[str, RecordMetricsTypes], + feature_metrics: dict[str, FeatureMetricsTypes], communicators: list[Communicator], ) -> None: self.config = config self.onvif = onvif self.camera_metrics = camera_metrics - self.record_metrics = record_metrics + self.feature_metrics = feature_metrics self.comms = communicators for comm in self.comms: comm.subscribe(self._receive) self._camera_settings_handlers: dict[str, Callable] = { + "audio": self._on_audio_command, "detect": self._on_detect_command, "improve_contrast": self._on_motion_improve_contrast_command, "motion": self._on_motion_command, @@ -186,6 +187,29 @@ def _on_motion_threshold_command(self, camera_name: str, payload: int) -> None: motion_settings.threshold = payload # type: ignore[union-attr] self.publish(f"{camera_name}/motion_threshold/state", payload, retain=True) + def _on_audio_command(self, camera_name: str, payload: str) -> None: + """Callback for audio topic.""" + audio_settings = self.config.cameras[camera_name].audio + + if payload == "ON": + if not self.config.cameras[camera_name].audio.enabled_in_config: + logger.error( + "Audio detection must be enabled in the config to be turned on via MQTT." + ) + return + + if not audio_settings.enabled: + logger.info(f"Turning on audio detection for {camera_name}") + audio_settings.enabled = True + self.feature_metrics[camera_name]["audio_enabled"].value = True + elif payload == "OFF": + if self.feature_metrics[camera_name]["audio_enabled"].value: + logger.info(f"Turning off audio detection for {camera_name}") + audio_settings.enabled = False + self.feature_metrics[camera_name]["audio_enabled"].value = False + + self.publish(f"{camera_name}/audio/state", payload, retain=True) + def _on_recordings_command(self, camera_name: str, payload: str) -> None: """Callback for recordings topic.""" record_settings = self.config.cameras[camera_name].record @@ -200,12 +224,12 @@ def _on_recordings_command(self, camera_name: str, payload: str) -> None: if not record_settings.enabled: logger.info(f"Turning on recordings for {camera_name}") record_settings.enabled = True - self.record_metrics[camera_name]["record_enabled"].value = True + self.feature_metrics[camera_name]["record_enabled"].value = True elif payload == "OFF": - if self.record_metrics[camera_name]["record_enabled"].value: + if self.feature_metrics[camera_name]["record_enabled"].value: logger.info(f"Turning off recordings for {camera_name}") record_settings.enabled = False - self.record_metrics[camera_name]["record_enabled"].value = False + self.feature_metrics[camera_name]["record_enabled"].value = False self.publish(f"{camera_name}/recordings/state", payload, retain=True) diff --git a/frigate/comms/mqtt.py b/frigate/comms/mqtt.py index 07799f9dab..4ddfbe7f13 100644 --- a/frigate/comms/mqtt.py +++ b/frigate/comms/mqtt.py @@ -41,7 +41,7 @@ def _set_initial_topics(self) -> None: for camera_name, camera in self.config.cameras.items(): self.publish( f"{camera_name}/recordings/state", - "ON" if camera.record.enabled else "OFF", + "ON" if camera.record.enabled_in_config else "OFF", retain=True, ) self.publish( @@ -49,6 +49,11 @@ def _set_initial_topics(self) -> None: "ON" if camera.snapshots.enabled else "OFF", retain=True, ) + self.publish( + f"{camera_name}/audio/state", + "ON" if camera.audio.enabled_in_config else "OFF", + retain=True, + ) self.publish( f"{camera_name}/detect/state", "ON" if camera.detect.enabled else "OFF", diff --git a/frigate/config.py b/frigate/config.py index 662b7b8bd0..ea7ecdc49e 100644 --- a/frigate/config.py +++ b/frigate/config.py @@ -40,6 +40,7 @@ FRIGATE_ENV_VARS = {k: v for k, v in os.environ.items() if k.startswith("FRIGATE_")} DEFAULT_TRACKED_OBJECTS = ["person"] +DEFAULT_LISTEN_AUDIO = ["bark", "speech", "yell", "scream"] DEFAULT_DETECTORS = {"cpu": {"type": "cpu"}} @@ -387,6 +388,19 @@ class ObjectConfig(FrigateBaseModel): mask: Union[str, List[str]] = Field(default="", title="Object mask.") +class AudioConfig(FrigateBaseModel): + enabled: bool = Field(default=False, title="Enable audio events.") + max_not_heard: int = Field( + default=30, title="Seconds of not hearing the type of audio to end the event." + ) + listen: List[str] = Field( + default=DEFAULT_LISTEN_AUDIO, title="Audio to listen for." + ) + enabled_in_config: Optional[bool] = Field( + title="Keep track of original state of audio detection." + ) + + class BirdseyeModeEnum(str, Enum): objects = "objects" motion = "motion" @@ -470,6 +484,7 @@ class FfmpegConfig(FrigateBaseModel): class CameraRoleEnum(str, Enum): + audio = "audio" record = "record" rtmp = "rtmp" detect = "detect" @@ -631,6 +646,9 @@ class CameraConfig(FrigateBaseModel): objects: ObjectConfig = Field( default_factory=ObjectConfig, title="Object configuration." ) + audio: AudioConfig = Field( + default_factory=AudioConfig, title="Audio events configuration." + ) motion: Optional[MotionConfig] = Field(title="Motion detection configuration.") detect: DetectConfig = Field( default_factory=DetectConfig, title="Object detection configuration." @@ -661,12 +679,16 @@ def __init__(self, **config): # add roles to the input if there is only one if len(config["ffmpeg"]["inputs"]) == 1: has_rtmp = "rtmp" in config["ffmpeg"]["inputs"][0].get("roles", []) + has_audio = "audio" in config["ffmpeg"]["inputs"][0].get("roles", []) config["ffmpeg"]["inputs"][0]["roles"] = [ "record", "detect", ] + if has_audio: + config["ffmpeg"]["inputs"][0]["roles"].append("audio") + if has_rtmp: config["ffmpeg"]["inputs"][0]["roles"].append("rtmp") @@ -799,6 +821,11 @@ def verify_config_roles(camera_config: CameraConfig) -> None: f"Camera {camera_config.name} has rtmp enabled, but rtmp is not assigned to an input." ) + if camera_config.audio.enabled and "audio" not in assigned_roles: + raise ValueError( + f"Camera {camera_config.name} has audio events enabled, but audio is not assigned to an input." + ) + def verify_valid_live_stream_name( frigate_config: FrigateConfig, camera_config: CameraConfig @@ -911,6 +938,9 @@ class FrigateConfig(FrigateBaseModel): objects: ObjectConfig = Field( default_factory=ObjectConfig, title="Global object configuration." ) + audio: AudioConfig = Field( + default_factory=AudioConfig, title="Global Audio events configuration." + ) motion: Optional[MotionConfig] = Field( title="Global motion detection configuration." ) @@ -935,6 +965,7 @@ def runtime_config(self, plus_api: PlusApi = None) -> FrigateConfig: # Global config to propagate down to camera level global_config = config.dict( include={ + "audio": ..., "birdseye": ..., "record": ..., "snapshots": ..., @@ -980,8 +1011,9 @@ def runtime_config(self, plus_api: PlusApi = None) -> FrigateConfig: camera_config.onvif.password = camera_config.onvif.password.format( **FRIGATE_ENV_VARS ) - # set config recording value + # set config pre-value camera_config.record.enabled_in_config = camera_config.record.enabled + camera_config.audio.enabled_in_config = camera_config.audio.enabled # Add default filters object_keys = camera_config.objects.track diff --git a/frigate/const.py b/frigate/const.py index c1524a6a8b..20e2b0daa8 100644 --- a/frigate/const.py +++ b/frigate/const.py @@ -8,6 +8,7 @@ BIRDSEYE_PIPE = "/tmp/cache/birdseye" CACHE_DIR = "/tmp/cache" YAML_EXT = (".yaml", ".yml") +FRIGATE_LOCALHOST = "http://127.0.0.1:5000" PLUS_ENV_VAR = "PLUS_API_KEY" PLUS_API_HOST = "https://api.frigate.video" BTBN_PATH = "/usr/lib/btbn-ffmpeg" @@ -22,6 +23,13 @@ item for sublist in ATTRIBUTE_LABEL_MAP.values() for item in sublist ] +# Audio Consts + +AUDIO_DURATION = 0.975 +AUDIO_FORMAT = "s16le" +AUDIO_MAX_BIT_RANGE = 32768.0 +AUDIO_SAMPLE_RATE = 16000 + # Regex Consts REGEX_CAMERA_NAME = r"^[a-zA-Z0-9_-]+$" diff --git a/frigate/events/audio.py b/frigate/events/audio.py new file mode 100644 index 0000000000..4f40334d42 --- /dev/null +++ b/frigate/events/audio.py @@ -0,0 +1,247 @@ +"""Handle creating audio events.""" + +import datetime +import logging +import multiprocessing as mp +import os +import signal +import threading +from types import FrameType +from typing import Optional + +import numpy as np +import requests +from setproctitle import setproctitle + +from frigate.config import CameraConfig, FrigateConfig +from frigate.const import ( + AUDIO_DURATION, + AUDIO_FORMAT, + AUDIO_MAX_BIT_RANGE, + AUDIO_SAMPLE_RATE, + CACHE_DIR, + FRIGATE_LOCALHOST, +) +from frigate.ffmpeg_presets import parse_preset_input +from frigate.log import LogPipe +from frigate.object_detection import load_labels +from frigate.types import FeatureMetricsTypes +from frigate.util import get_ffmpeg_arg_list, listen +from frigate.video import start_or_restart_ffmpeg, stop_ffmpeg + +try: + from tflite_runtime.interpreter import Interpreter +except ModuleNotFoundError: + from tensorflow.lite.python.interpreter import Interpreter + +logger = logging.getLogger(__name__) + + +def get_ffmpeg_command(input_args: list[str], input_path: str, pipe: str) -> list[str]: + return get_ffmpeg_arg_list( + f"ffmpeg {{}} -i {{}} -f {AUDIO_FORMAT} -ar {AUDIO_SAMPLE_RATE} -ac 1 -y {{}}".format( + " ".join(input_args), + input_path, + pipe, + ) + ) + + +def listen_to_audio( + config: FrigateConfig, + process_info: dict[str, FeatureMetricsTypes], +) -> None: + stop_event = mp.Event() + audio_threads: list[threading.Thread] = [] + + def exit_process() -> None: + for thread in audio_threads: + thread.join() + + logger.info("Exiting audio detector...") + + def receiveSignal(signalNumber: int, frame: Optional[FrameType]) -> None: + stop_event.set() + exit_process() + + signal.signal(signal.SIGTERM, receiveSignal) + signal.signal(signal.SIGINT, receiveSignal) + + threading.current_thread().name = "process:audio_manager" + setproctitle("frigate.audio_manager") + listen() + + for camera in config.cameras.values(): + if camera.enabled and camera.audio.enabled_in_config: + audio = AudioEventMaintainer(camera, process_info, stop_event) + audio_threads.append(audio) + audio.start() + + +class AudioTfl: + def __init__(self, stop_event: mp.Event): + self.stop_event = stop_event + self.labels = load_labels("/audio-labelmap.txt") + self.interpreter = Interpreter( + model_path="/cpu_audio_model.tflite", + num_threads=2, + ) + + self.interpreter.allocate_tensors() + + self.tensor_input_details = self.interpreter.get_input_details() + self.tensor_output_details = self.interpreter.get_output_details() + + def _detect_raw(self, tensor_input): + self.interpreter.set_tensor(self.tensor_input_details[0]["index"], tensor_input) + self.interpreter.invoke() + detections = np.zeros((20, 6), np.float32) + + res = self.interpreter.get_tensor(self.tensor_output_details[0]["index"])[0] + non_zero_indices = res > 0 + class_ids = np.argpartition(-res, 20)[:20] + class_ids = class_ids[np.argsort(-res[class_ids])] + class_ids = class_ids[non_zero_indices[class_ids]] + scores = res[class_ids] + boxes = np.full((scores.shape[0], 4), -1, np.float32) + count = len(scores) + + for i in range(count): + if scores[i] < 0.4 or i == 20: + break + detections[i] = [ + class_ids[i], + float(scores[i]), + boxes[i][0], + boxes[i][1], + boxes[i][2], + boxes[i][3], + ] + + return detections + + def detect(self, tensor_input, threshold=0.8): + detections = [] + + if self.stop_event.is_set(): + return detections + + raw_detections = self._detect_raw(tensor_input) + + for d in raw_detections: + if d[1] < threshold: + break + detections.append( + (self.labels[int(d[0])], float(d[1]), (d[2], d[3], d[4], d[5])) + ) + return detections + + +class AudioEventMaintainer(threading.Thread): + def __init__( + self, + camera: CameraConfig, + feature_metrics: dict[str, FeatureMetricsTypes], + stop_event: mp.Event, + ) -> None: + threading.Thread.__init__(self) + self.name = f"{camera.name}_audio_event_processor" + self.config = camera + self.feature_metrics = feature_metrics + self.detections: dict[dict[str, any]] = feature_metrics + self.stop_event = stop_event + self.detector = AudioTfl(stop_event) + self.shape = (int(round(AUDIO_DURATION * AUDIO_SAMPLE_RATE)),) + self.chunk_size = int(round(AUDIO_DURATION * AUDIO_SAMPLE_RATE * 2)) + self.pipe = f"{CACHE_DIR}/{self.config.name}-audio" + self.ffmpeg_cmd = get_ffmpeg_command( + get_ffmpeg_arg_list(self.config.ffmpeg.global_args) + + parse_preset_input("preset-rtsp-audio-only", 1), + [i.path for i in self.config.ffmpeg.inputs if "audio" in i.roles][0], + self.pipe, + ) + self.pipe_file = None + self.logpipe = LogPipe(f"ffmpeg.{self.config.name}.audio") + self.audio_listener = None + + def detect_audio(self, audio) -> None: + if not self.feature_metrics[self.config.name]["audio_enabled"].value: + return + + waveform = (audio / AUDIO_MAX_BIT_RANGE).astype(np.float32) + model_detections = self.detector.detect(waveform) + + for label, score, _ in model_detections: + if label not in self.config.audio.listen: + continue + + self.handle_detection(label, score) + + self.expire_detections() + + def handle_detection(self, label: str, score: float) -> None: + if self.detections.get(label): + self.detections[label][ + "last_detection" + ] = datetime.datetime.now().timestamp() + else: + resp = requests.post( + f"{FRIGATE_LOCALHOST}/api/events/{self.config.name}/{label}/create", + json={"duration": None}, + ) + + if resp.status_code == 200: + event_id = resp.json()[0]["event_id"] + self.detections[label] = { + "id": event_id, + "label": label, + "last_detection": datetime.datetime.now().timestamp(), + } + + def expire_detections(self) -> None: + now = datetime.datetime.now().timestamp() + + for detection in self.detections.values(): + if ( + now - detection.get("last_detection", now) + > self.config.audio.max_not_heard + ): + self.detections[detection["label"]] = None + requests.put( + f"{FRIGATE_LOCALHOST}/api/events/{detection['id']}/end", + json={ + "end_time": detection["last_detection"] + + self.config.record.events.post_capture + }, + ) + + def restart_audio_pipe(self) -> None: + try: + os.mkfifo(self.pipe) + except FileExistsError: + pass + + self.audio_listener = start_or_restart_ffmpeg( + self.ffmpeg_cmd, logger, self.logpipe, None, self.audio_listener + ) + + def read_audio(self) -> None: + if self.pipe_file is None: + self.pipe_file = open(self.pipe, "rb") + + try: + audio = np.frombuffer(self.pipe_file.read(self.chunk_size), dtype=np.int16) + self.detect_audio(audio) + except BrokenPipeError: + self.logpipe.dump() + self.restart_audio_pipe() + + def run(self) -> None: + self.restart_audio_pipe() + + while not self.stop_event.is_set(): + self.read_audio() + + self.pipe_file.close() + stop_ffmpeg(self.audio_listener, logger) + self.logpipe.close() diff --git a/frigate/events/external.py b/frigate/events/external.py index 910aee35f4..25ba289f25 100644 --- a/frigate/events/external.py +++ b/frigate/events/external.py @@ -67,11 +67,10 @@ def create_manual_event( return event_id - def finish_manual_event(self, event_id: str) -> None: + def finish_manual_event(self, event_id: str, end_time: float) -> None: """Finish external event with indeterminate duration.""" - now = datetime.datetime.now().timestamp() self.queue.put( - (EventTypeEnum.api, "end", None, {"id": event_id, "end_time": now}) + (EventTypeEnum.api, "end", None, {"id": event_id, "end_time": end_time}) ) def _write_images( diff --git a/frigate/events/maintainer.py b/frigate/events/maintainer.py index 28fb4646b4..f024f0be6b 100644 --- a/frigate/events/maintainer.py +++ b/frigate/events/maintainer.py @@ -18,7 +18,6 @@ class EventTypeEnum(str, Enum): api = "api" - # audio = "audio" tracked_object = "tracked_object" @@ -73,19 +72,21 @@ def run(self) -> None: except queue.Empty: continue - logger.debug(f"Event received: {event_type} {camera} {event_data['id']}") - - self.timeline_queue.put( - ( - camera, - source_type, - event_type, - self.events_in_process.get(event_data["id"]), - event_data, - ) + logger.debug( + f"Event received: {source_type} {event_type} {camera} {event_data['id']}" ) if source_type == EventTypeEnum.tracked_object: + self.timeline_queue.put( + ( + camera, + source_type, + event_type, + self.events_in_process.get(event_data["id"]), + event_data, + ) + ) + if event_type == "start": self.events_in_process[event_data["id"]] = event_data continue @@ -215,7 +216,7 @@ def handle_object_detection( del self.events_in_process[event_data["id"]] self.event_processed_queue.put((event_data["id"], camera)) - def handle_external_detection(self, type: str, event_data: Event): + def handle_external_detection(self, type: str, event_data: Event) -> None: if type == "new": event = { Event.id: event_data["id"], @@ -230,20 +231,14 @@ def handle_external_detection(self, type: str, event_data: Event): Event.zones: [], Event.data: {}, } + Event.insert(event).execute() elif type == "end": event = { Event.id: event_data["id"], Event.end_time: event_data["end_time"], } - try: - ( - Event.insert(event) - .on_conflict( - conflict_target=[Event.id], - update=event, - ) - .execute() - ) - except Exception: - logger.warning(f"Failed to update manual event: {event_data['id']}") + try: + Event.update(event).execute() + except Exception: + logger.warning(f"Failed to update manual event: {event_data['id']}") diff --git a/frigate/ffmpeg_presets.py b/frigate/ffmpeg_presets.py index dde1589163..a2785813c1 100644 --- a/frigate/ffmpeg_presets.py +++ b/frigate/ffmpeg_presets.py @@ -282,6 +282,13 @@ def parse_preset_hardware_acceleration_encode( "-use_wallclock_as_timestamps", "1", ], + "preset-rtsp-audio-only": [ + "-rtsp_transport", + "tcp", + TIMEOUT_PARAM, + "5000000", + "-vn", + ], "preset-rtsp-restream": _user_agent_args + [ "-rtsp_transport", diff --git a/frigate/http.py b/frigate/http.py index e47e6efc2d..f3632a0cf1 100644 --- a/frigate/http.py +++ b/frigate/http.py @@ -908,8 +908,11 @@ def create_event(camera_name, label): @bp.route("/events//end", methods=["PUT"]) def end_event(event_id): + json: dict[str, any] = request.get_json(silent=True) or {} + try: - current_app.external_processor.finish_manual_event(event_id) + end_time = json.get("end_time", datetime.now().timestamp()) + current_app.external_processor.finish_manual_event(event_id, end_time) except Exception: return jsonify( {"success": False, "message": f"{event_id} must be set and valid."}, 404 diff --git a/frigate/output.py b/frigate/output.py index 942bed12ef..0388353133 100644 --- a/frigate/output.py +++ b/frigate/output.py @@ -156,7 +156,12 @@ def run(self): class BirdsEyeFrameManager: - def __init__(self, config: FrigateConfig, frame_manager: SharedMemoryFrameManager): + def __init__( + self, + config: FrigateConfig, + frame_manager: SharedMemoryFrameManager, + stop_event: mp.Event, + ): self.config = config self.mode = config.birdseye.mode self.frame_manager = frame_manager @@ -165,6 +170,7 @@ def __init__(self, config: FrigateConfig, frame_manager: SharedMemoryFrameManage self.frame_shape = (height, width) self.yuv_shape = (height * 3 // 2, width) self.frame = np.ndarray(self.yuv_shape, dtype=np.uint8) + self.stop_event = stop_event # initialize the frame as black and with the Frigate logo self.blank_frame = np.zeros(self.yuv_shape, np.uint8) @@ -458,6 +464,9 @@ def calculate_layout( # decrease scaling coefficient until height of all cameras can fit into the birdseye canvas while calculating: + if self.stop_event.is_set(): + return + layout_candidate = calculate_layout( (canvas_width, canvas_height), active_cameras_to_add, @@ -580,7 +589,7 @@ def receiveSignal(signalNumber, frame): for t in broadcasters.values(): t.start() - birdseye_manager = BirdsEyeFrameManager(config, frame_manager) + birdseye_manager = BirdsEyeFrameManager(config, frame_manager, stop_event) if config.birdseye.restream: birdseye_buffer = frame_manager.create( diff --git a/frigate/record/maintainer.py b/frigate/record/maintainer.py index 3ed6540d00..8e40fc6e7a 100644 --- a/frigate/record/maintainer.py +++ b/frigate/record/maintainer.py @@ -20,7 +20,7 @@ from frigate.config import FrigateConfig, RetainModeEnum from frigate.const import CACHE_DIR, MAX_SEGMENT_DURATION, RECORD_DIR from frigate.models import Event, Recordings -from frigate.types import RecordMetricsTypes +from frigate.types import FeatureMetricsTypes from frigate.util import area, get_video_properties logger = logging.getLogger(__name__) @@ -31,7 +31,7 @@ def __init__( self, config: FrigateConfig, recordings_info_queue: mp.Queue, - process_info: dict[str, RecordMetricsTypes], + process_info: dict[str, FeatureMetricsTypes], stop_event: MpEvent, ): threading.Thread.__init__(self) diff --git a/frigate/record/record.py b/frigate/record/record.py index ab6cd3450a..530adc0319 100644 --- a/frigate/record/record.py +++ b/frigate/record/record.py @@ -14,7 +14,7 @@ from frigate.models import Event, Recordings, RecordingsToDelete, Timeline from frigate.record.cleanup import RecordingCleanup from frigate.record.maintainer import RecordingMaintainer -from frigate.types import RecordMetricsTypes +from frigate.types import FeatureMetricsTypes from frigate.util import listen logger = logging.getLogger(__name__) @@ -23,7 +23,7 @@ def manage_recordings( config: FrigateConfig, recordings_info_queue: mp.Queue, - process_info: dict[str, RecordMetricsTypes], + process_info: dict[str, FeatureMetricsTypes], ) -> None: stop_event = mp.Event() diff --git a/frigate/types.py b/frigate/types.py index 23751d499c..9083ade334 100644 --- a/frigate/types.py +++ b/frigate/types.py @@ -25,7 +25,8 @@ class CameraMetricsTypes(TypedDict): skipped_fps: Synchronized -class RecordMetricsTypes(TypedDict): +class FeatureMetricsTypes(TypedDict): + audio_enabled: Synchronized record_enabled: Synchronized diff --git a/web/__test__/handlers.js b/web/__test__/handlers.js index d7f2fb5a62..7ea1f90a5d 100644 --- a/web/__test__/handlers.js +++ b/web/__test__/handlers.js @@ -16,6 +16,7 @@ export const handlers = [ front: { name: 'front', objects: { track: ['taco', 'cat', 'dog'] }, + audio: { enabled: false, enabled_in_config: false }, record: { enabled: true, enabled_in_config: true }, detect: { width: 1280, height: 720 }, snapshots: {}, @@ -25,6 +26,7 @@ export const handlers = [ side: { name: 'side', objects: { track: ['taco', 'cat', 'dog'] }, + audio: { enabled: false, enabled_in_config: false }, record: { enabled: false, enabled_in_config: true }, detect: { width: 1280, height: 720 }, snapshots: {}, diff --git a/web/src/api/__tests__/ws.test.jsx b/web/src/api/__tests__/ws.test.jsx index 3b0e3420ad..373f6abea6 100644 --- a/web/src/api/__tests__/ws.test.jsx +++ b/web/src/api/__tests__/ws.test.jsx @@ -113,8 +113,8 @@ describe('WsProvider', () => { vi.spyOn(Date, 'now').mockReturnValue(123456); const config = { cameras: { - front: { name: 'front', detect: { enabled: true }, record: { enabled: false }, snapshots: { enabled: true } }, - side: { name: 'side', detect: { enabled: false }, record: { enabled: false }, snapshots: { enabled: false } }, + front: { name: 'front', detect: { enabled: true }, record: { enabled: false }, snapshots: { enabled: true }, audio: { enabled: false } }, + side: { name: 'side', detect: { enabled: false }, record: { enabled: false }, snapshots: { enabled: false }, audio: { enabled: false } }, }, }; render( diff --git a/web/src/api/ws.jsx b/web/src/api/ws.jsx index 0867ed0a43..8324632bee 100644 --- a/web/src/api/ws.jsx +++ b/web/src/api/ws.jsx @@ -41,10 +41,11 @@ export function WsProvider({ useEffect(() => { Object.keys(config.cameras).forEach((camera) => { - const { name, record, detect, snapshots } = config.cameras[camera]; + const { name, record, detect, snapshots, audio } = config.cameras[camera]; dispatch({ topic: `${name}/recordings/state`, payload: record.enabled ? 'ON' : 'OFF', retain: false }); dispatch({ topic: `${name}/detect/state`, payload: detect.enabled ? 'ON' : 'OFF', retain: false }); dispatch({ topic: `${name}/snapshots/state`, payload: snapshots.enabled ? 'ON' : 'OFF', retain: false }); + dispatch({ topic: `${name}/audio/state`, payload: audio.enabled ? 'ON' : 'OFF', retain: false }); }); }, [config]); @@ -120,6 +121,15 @@ export function useSnapshotsState(camera) { return { payload, send, connected }; } +export function useAudioState(camera) { + const { + value: { payload }, + send, + connected, + } = useWs(`${camera}/audio/state`, `${camera}/audio/set`); + return { payload, send, connected }; +} + export function usePtzCommand(camera) { const { value: { payload }, diff --git a/web/src/icons/Audio.jsx b/web/src/icons/Audio.jsx new file mode 100644 index 0000000000..cec783854c --- /dev/null +++ b/web/src/icons/Audio.jsx @@ -0,0 +1,36 @@ +import { h } from 'preact'; +import { memo } from 'preact/compat'; + +export function Snapshot({ className = 'h-6 w-6', stroke = 'currentColor', onClick = () => {} }) { + return ( + + + + + + ); +} + +export default memo(Snapshot); diff --git a/web/src/routes/Cameras.jsx b/web/src/routes/Cameras.jsx index 1e2bbf9030..2298b992ed 100644 --- a/web/src/routes/Cameras.jsx +++ b/web/src/routes/Cameras.jsx @@ -2,10 +2,11 @@ import { h, Fragment } from 'preact'; import ActivityIndicator from '../components/ActivityIndicator'; import Card from '../components/Card'; import CameraImage from '../components/CameraImage'; +import AudioIcon from '../icons/Audio'; import ClipIcon from '../icons/Clip'; import MotionIcon from '../icons/Motion'; import SnapshotIcon from '../icons/Snapshot'; -import { useDetectState, useRecordingsState, useSnapshotsState } from '../api/ws'; +import { useAudioState, useDetectState, useRecordingsState, useSnapshotsState } from '../api/ws'; import { useMemo } from 'preact/hooks'; import useSWR from 'swr'; @@ -43,6 +44,7 @@ function Camera({ name, config }) { const { payload: detectValue, send: sendDetect } = useDetectState(name); const { payload: recordValue, send: sendRecordings } = useRecordingsState(name); const { payload: snapshotValue, send: sendSnapshots } = useSnapshotsState(name); + const { payload: audioValue, send: sendAudio } = useAudioState(name); const href = `/cameras/${name}`; const buttons = useMemo(() => { return [ @@ -50,10 +52,9 @@ function Camera({ name, config }) { { name: 'Recordings', href: `/recording/${name}` }, ]; }, [name]); - const cleanName = useMemo( - () => { return `${name.replaceAll('_', ' ')}` }, - [name] - ); + const cleanName = useMemo(() => { + return `${name.replaceAll('_', ' ')}`; + }, [name]); const icons = useMemo( () => [ { @@ -65,7 +66,9 @@ function Camera({ name, config }) { }, }, { - name: config.record.enabled_in_config ? `Toggle recordings ${recordValue === 'ON' ? 'off' : 'on'}` : 'Recordings must be enabled in the config to be turned on in the UI.', + name: config.record.enabled_in_config + ? `Toggle recordings ${recordValue === 'ON' ? 'off' : 'on'}` + : 'Recordings must be enabled in the config to be turned on in the UI.', icon: ClipIcon, color: config.record.enabled_in_config ? (recordValue === 'ON' ? 'blue' : 'gray') : 'red', onClick: () => { @@ -82,11 +85,27 @@ function Camera({ name, config }) { sendSnapshots(snapshotValue === 'ON' ? 'OFF' : 'ON', true); }, }, - ], - [config, detectValue, sendDetect, recordValue, sendRecordings, snapshotValue, sendSnapshots] + config.audio.enabled_in_config + ? { + name: `Toggle audio detection ${audioValue === 'ON' ? 'off' : 'on'}`, + icon: AudioIcon, + color: audioValue === 'ON' ? 'blue' : 'gray', + onClick: () => { + sendAudio(audioValue === 'ON' ? 'OFF' : 'ON', true); + }, + } + : null, + ].filter((button) => button != null), + [config, audioValue, sendAudio, detectValue, sendDetect, recordValue, sendRecordings, snapshotValue, sendSnapshots] ); return ( - } /> + } + /> ); }