diff --git a/Dockerfile b/Dockerfile
index 6db81e46b0..d1dbd07552 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -98,7 +98,9 @@ COPY labelmap.txt .
 COPY --from=ov-converter /models/public/ssdlite_mobilenet_v2/FP16 openvino-model
 RUN wget -q https://github.com/openvinotoolkit/open_model_zoo/raw/master/data/dataset_classes/coco_91cl_bkgr.txt -O openvino-model/coco_91cl_bkgr.txt && \
     sed -i 's/truck/car/g' openvino-model/coco_91cl_bkgr.txt
-
+# Get Audio Model and labels
+RUN wget -qO cpu_audio_model.tflite https://tfhub.dev/google/lite-model/yamnet/classification/tflite/1?lite-format=tflite
+COPY audio-labelmap.txt .
 
 
 FROM wget AS s6-overlay
diff --git a/audio-labelmap.txt b/audio-labelmap.txt
new file mode 100644
index 0000000000..4a38b5f639
--- /dev/null
+++ b/audio-labelmap.txt
@@ -0,0 +1,521 @@
+speech
+speech
+speech
+speech
+babbling
+speech
+yell
+bellow
+whoop
+yell
+yell
+yell
+whispering
+laughter
+laughter
+laughter
+snicker
+laughter
+laughter
+crying
+crying
+crying
+yell
+sigh
+singing
+choir
+sodeling
+chant
+mantra
+child_singing
+synthetic_singing
+rapping
+humming
+groan
+grunt
+whistling
+breathing
+wheeze
+snoring
+gasp
+pant
+snort
+cough
+throat_clearing
+sneeze
+sniff
+run
+shuffle
+footsteps
+chewing
+biting
+gargling
+stomach_rumble
+burping
+hiccup
+fart
+hands
+finger_snapping
+clapping
+heartbeat
+heart_murmur
+cheering
+applause
+chatter
+crowd
+speech
+children_playing
+animal
+pets
+dog
+bark
+yip
+howl
+bow-wow
+growling
+whimper_dog
+cat
+purr
+meow
+hiss
+caterwaul
+livestock
+horse
+clip-clop
+neigh
+cattle
+moo
+cowbell
+pig
+oink
+goat
+bleat
+sheep
+fowl
+chicken
+cluck
+cock-a-doodle-doo
+turkey
+gobble
+duck
+quack
+goose
+honk
+wild_animals
+roaring_cats
+roar
+bird
+chird
+chirp
+squawk
+pigeon
+coo
+crow
+caw
+owl
+hoot
+flapping_wings
+dogs
+rats
+mouse
+patter
+insect
+cricket
+mosquito
+fly
+buzz
+buzz
+frog
+croak
+snake
+rattle
+whale_vocalization
+music
+musical_instrument
+plucked_string_instrument
+guitar
+electric_guitar
+bass_guitar
+acoustic_guitar
+steel_guitar
+tapping
+strum
+banjo
+sitar
+mandolin
+zither
+ukulele
+keyboard
+piano
+electric_piano
+organ
+electronic_organ
+hammond_organ
+synthesizer
+sampler
+harpsichord
+percussion
+drum_kit
+drum_machine
+drum
+snare_drum
+rimshot
+drum_roll
+bass_drum
+timpani
+tabla
+cymbal
+hi-hat
+wood_block
+tambourine
+rattle
+maraca
+gong
+tubular_bells
+mallet_percussion
+marimba
+glockenspiel
+vibraphone
+steelpan
+orchestra
+brass_instrument
+french_horn
+trumpet
+trombone
+bowed_string_instrument
+string_section
+violin
+pizzicato
+cello
+double_bass
+wind_instrument
+flute
+saxophone
+clarinet
+harp
+bell
+church_bell
+jingle_bell
+bicycle_bell
+tuning_fork
+chime
+wind_chime
+change_ringing
+harmonica
+accordion
+bagpipes
+didgeridoo
+shofar
+theremin
+singing_bowl
+scratching
+pop_music
+hip_hop_music
+beatboxing
+rock_music
+heavy_metal
+punk_rock
+grunge
+progressive_rock
+rock_and_roll
+psychedelic_rock
+rhythm_and_blues
+soul_music
+reggae
+country
+swing_music
+bluegrass
+funk
+folk_music
+middle_eastern_music
+jazz
+disco
+classical_music
+opera
+electronic_music
+house_music
+techno
+dubstep
+drum_and_bass
+electronica
+electronic_dance_music
+ambient_music
+trance_music
+music_of_latin_america
+salsa_music
+flamenco
+blues
+music_for_children
+new-age_music
+vocal_music
+a_capella
+music_of_africa
+afrobeat
+christian_music
+gospel_music
+music_of_asia
+carnatic_music
+music_of_bollywood
+ska
+traditional_music
+independent_music
+song
+background_music
+theme_music
+jingle
+soundtrack_music
+lullaby
+video_game_music
+christmas_music
+dance_music
+wedding_music
+happy_music
+sad_music
+tender_music
+exciting_music
+angry_music
+scary_music
+wind
+rustling_leaves
+wind_noise
+thunderstorm
+thunder
+water
+rain
+raindrop
+rain_on_surface
+stream
+waterfall
+ocean
+waves
+steam
+gurgling
+fire
+crackle
+vehicle
+boat
+sailboat
+rowboat
+motorboat
+ship
+motor_vehicle
+car
+honk
+toot
+car_alarm
+power_windows
+skidding
+tire_squeal
+car_passing_by
+race_car
+truck
+air_brake
+air_horn
+reversing_beeps
+ice_cream_truck
+bus
+emergency_vehicle
+police_car
+ambulance
+fire_engine
+motorcycle
+traffic_noise
+rail_transport
+train
+train_whistle
+train_horn
+railroad_car
+train_wheels_squealing
+subway
+aircraft
+aircraft_engine
+jet_engine
+propeller
+helicopter
+fixed-wing_aircraft
+bicycle
+skateboard
+engine
+light_engine
+dental_drill's_drill
+lawn_mower
+chainsaw
+medium_engine
+heavy_engine
+engine_knocking
+engine_starting
+idling
+accelerating
+door
+doorbell
+ding-dong
+sliding_door
+slam
+knock
+tap
+squeak
+cupboard_open_or_close
+drawer_open_or_close
+dishes
+cutlery
+chopping
+frying
+microwave_oven
+blender
+water_tap
+sink
+bathtub
+hair_dryer
+toilet_flush
+toothbrush
+electric_toothbrush
+vacuum_cleaner
+zipper
+keys_jangling
+coin
+scissors
+electric_shaver
+shuffling_cards
+typing
+typewriter
+computer_keyboard
+writing
+alarm
+telephone
+telephone_bell_ringing
+ringtone
+telephone_dialing
+dial_tone
+busy_signal
+alarm_clock
+siren
+civil_defense_siren
+buzzer
+smoke_detector
+fire_alarm
+foghorn
+whistle
+steam_whistle
+mechanisms
+ratchet
+clock
+tick
+tick-tock
+gears
+pulleys
+sewing_machine
+mechanical_fan
+air_conditioning
+cash_register
+printer
+camera
+single-lens_reflex_camera
+tools
+hammer
+jackhammer
+sawing
+filing
+sanding
+power_tool
+drill
+explosion
+gunshot
+machine_gun
+fusillade
+artillery_fire
+cap_gun
+fireworks
+firecracker
+burst
+eruption
+boom
+wood
+chop
+splinter
+crack
+glass
+chink
+shatter
+liquid
+splash
+slosh
+squish
+drip
+pour
+trickle
+gush
+fill
+spray
+pump
+stir
+boiling
+sonar
+arrow
+whoosh
+thump
+thunk
+electronic_tuner
+effects_unit
+chorus_effect
+basketball_bounce
+bang
+slap
+whack
+smash
+breaking
+bouncing
+whip
+flap
+scratch
+scrape
+rub
+roll
+crushing
+crumpling
+tearing
+beep
+ping
+ding
+clang
+squeal
+creak
+rustle
+whir
+clatter
+sizzle
+clicking
+clickety-clack
+rumble
+plop
+jingle
+hum
+zing
+boing
+crunch
+silence
+sine_wave
+harmonic
+chirp_tone
+sound_effect
+pulse
+inside
+inside
+inside
+outside
+outside
+reverberation
+echo
+noise
+environmental_noise
+static
+mains_hum
+distortion
+sidetone
+cacophony
+white_noise
+pink_noise
+throbbing
+vibration
+television
+radio
+field_recording
diff --git a/docs/docs/configuration/audio_detectors.md b/docs/docs/configuration/audio_detectors.md
new file mode 100644
index 0000000000..ef1d8227cf
--- /dev/null
+++ b/docs/docs/configuration/audio_detectors.md
@@ -0,0 +1,63 @@
+---
+id: audio_detectors
+title: Audio Detectors
+---
+
+Frigate provides a builtin audio detector which runs on the CPU. Compared to object detection in images, audio detection is a relatively lightweight operation so the only option is to run the detection on a CPU.
+
+## Configuration
+
+Audio events work by detecting a type of audio and creating an event, the event will end once the type of audio has not been heard for the configured amount of time. Audio events save a snapshot at the beginning of the event as well as recordings throughout the event. The recordings are retained using the configured recording retention.
+
+### Enabling Audio Events
+
+Audio events can be enabled for all cameras or only for specific cameras.
+
+```yaml
+
+audio: # <- enable audio events for all camera
+  enabled: True
+
+cameras:
+  front_camera:
+    ffmpeg:
+    ...
+    audio:
+      enabled: True # <- enable audio events for the front_camera
+```
+
+If you are using multiple streams then you must set the `audio` role on the stream that is going to be used for audio detection, this can be any stream but the stream must have audio included.
+
+:::note
+
+The ffmpeg process for capturing audio will be a separate connection to the camera along with the other roles assigned to the camera, for this reason it is recommended that the go2rtc restream is used for this purpose. See [the restream docs](/configuration/restream.md) for more information.
+
+:::
+
+```yaml
+cameras:
+  front_camera:
+    ffmpeg:
+      inputs:
+        - path: rtsp://.../main_stream
+          roles:
+            - record
+        - path: rtsp://.../sub_stream # <- this stream must have audio enabled
+          roles:
+            - audio
+            - detect
+```
+
+### Configuring Audio Events
+
+The included audio model has over 500 different types of audio that can be detected, many of which are not practical. By default `bark`, `speech`, `yell`, and `scream` are enabled but these can be customized.
+
+```yaml
+audio:
+  enabled: True
+  listen:
+    - bark
+    - scream
+    - speech
+    - yell
+```
diff --git a/docs/docs/configuration/index.md b/docs/docs/configuration/index.md
index 7279a76520..8915db6b3a 100644
--- a/docs/docs/configuration/index.md
+++ b/docs/docs/configuration/index.md
@@ -138,6 +138,20 @@ model:
   labelmap:
     2: vehicle
 
+# Optional: Audio Events Configuration
+# NOTE: Can be overridden at the camera level
+audio:
+  # Optional: Enable audio events (default: shown below)
+  enabled: False
+  # Optional: Configure the amount of seconds without detected audio to end the event (default: shown below)
+  max_not_heard: 30
+  # Optional: Types of audio to listen for (default: shown below)
+  listen:
+    - bark
+    - scream
+    - speech
+    - yell
+
 # Optional: logger verbosity settings
 logger:
   # Optional: Default log verbosity (default: shown below)
diff --git a/docs/docs/configuration/detectors.md b/docs/docs/configuration/object_detectors.md
similarity index 99%
rename from docs/docs/configuration/detectors.md
rename to docs/docs/configuration/object_detectors.md
index 84507321c3..3f48423bcf 100644
--- a/docs/docs/configuration/detectors.md
+++ b/docs/docs/configuration/object_detectors.md
@@ -1,6 +1,6 @@
 ---
-id: detectors
-title: Detectors
+id: object_detectors
+title: Object Detectors
 ---
 
 Frigate provides the following builtin detector types: `cpu`, `edgetpu`, `openvino`, and `tensorrt`. By default, Frigate will use a single CPU detector. Other detectors may require additional configuration as described below. When using multiple detectors they will run in dedicated processes, but pull from a common queue of detection requests from across all cameras.
@@ -275,6 +275,6 @@ detectors:
     api_timeout: 0.1 # seconds
 ```
 
-Replace `<your_codeproject_ai_server_ip>` and `<port>` with the IP address and port of your CodeProject.AI server. 
+Replace `<your_codeproject_ai_server_ip>` and `<port>` with the IP address and port of your CodeProject.AI server.
 
 To verify that the integration is working correctly, start Frigate and observe the logs for any error messages related to CodeProject.AI. Additionally, you can check the Frigate web interface to see if the objects detected by CodeProject.AI are being displayed and tracked properly.
\ No newline at end of file
diff --git a/docs/docs/configuration/restream.md b/docs/docs/configuration/restream.md
index 2d5c565b2e..61393a91c5 100644
--- a/docs/docs/configuration/restream.md
+++ b/docs/docs/configuration/restream.md
@@ -67,6 +67,7 @@ cameras:
           roles:
             - record
             - detect
+            - audio # <- only necessary if audio detection is enabled
   http_cam:
     ffmpeg:
       output_args:
@@ -77,6 +78,7 @@ cameras:
           roles:
             - record
             - detect
+            - audio # <- only necessary if audio detection is enabled
 ```
 
 ### With Sub Stream
@@ -112,6 +114,7 @@ cameras:
         - path: rtsp://127.0.0.1:8554/rtsp_cam_sub # <--- the name here must match the name of the camera_sub in restream
           input_args: preset-rtsp-restream
           roles:
+            - audio # <- only necessary if audio detection is enabled
             - detect
   http_cam:
     ffmpeg:
@@ -125,6 +128,7 @@ cameras:
         - path: rtsp://127.0.0.1:8554/http_cam_sub # <--- the name here must match the name of the camera_sub in restream
           input_args: preset-rtsp-restream
           roles:
+            - audio # <- only necessary if audio detection is enabled
             - detect
 ```
 
diff --git a/docs/docs/frigate/hardware.md b/docs/docs/frigate/hardware.md
index e5233e218c..36233ea686 100644
--- a/docs/docs/frigate/hardware.md
+++ b/docs/docs/frigate/hardware.md
@@ -50,7 +50,7 @@ The OpenVINO detector type is able to run on:
 - 6th Gen Intel Platforms and newer that have an iGPU
 - x86 & Arm64 hosts with VPU Hardware (ex: Intel NCS2)
 
-More information is available [in the detector docs](/configuration/detectors#openvino-detector)
+More information is available [in the detector docs](/configuration/object_detectors#openvino-detector)
 
 Inference speeds vary greatly depending on the CPU, GPU, or VPU used, some known examples are below:
 
@@ -72,7 +72,7 @@ Inference speeds vary greatly depending on the CPU, GPU, or VPU used, some known
 
 ### TensorRT
 
-The TensortRT detector is able to run on x86 hosts that have an Nvidia GPU which supports the 11.x series of CUDA libraries. The minimum driver version on the host system must be `>=450.80.02`. Also the GPU must support a Compute Capability of `5.0` or greater. This generally correlates to a Maxwell-era GPU or newer, check the [TensorRT docs for more info](/configuration/detectors#nvidia-tensorrt-detector).
+The TensortRT detector is able to run on x86 hosts that have an Nvidia GPU which supports the 11.x series of CUDA libraries. The minimum driver version on the host system must be `>=450.80.02`. Also the GPU must support a Compute Capability of `5.0` or greater. This generally correlates to a Maxwell-era GPU or newer, check the [TensorRT docs for more info](/configuration/object_detectors#nvidia-tensorrt-detector).
 
 Inference speeds will vary greatly depending on the GPU and the model used.
 `tiny` variants are faster than the equivalent non-tiny model, some known examples are below:
diff --git a/docs/docs/guides/getting_started.md b/docs/docs/guides/getting_started.md
index adbddb9c2c..cb67c59b43 100644
--- a/docs/docs/guides/getting_started.md
+++ b/docs/docs/guides/getting_started.md
@@ -71,7 +71,7 @@ cameras:
       ...
 ```
 
-More details on available detectors can be found [here](../configuration/detectors.md).
+More details on available detectors can be found [here](../configuration/object_detectors.md).
 
 Restart Frigate and you should start seeing detections for `person`. If you want to track other objects, they will need to be added according to the [configuration file reference](../configuration/index.md#full-configuration-reference).
 
diff --git a/docs/sidebars.js b/docs/sidebars.js
index 41628f2edc..35ce2bee3b 100644
--- a/docs/sidebars.js
+++ b/docs/sidebars.js
@@ -16,7 +16,8 @@ module.exports = {
     ],
     Configuration: [
       "configuration/index",
-      "configuration/detectors",
+      "configuration/object_detectors",
+      "configuration/audio_detectors",
       "configuration/cameras",
       "configuration/masks",
       "configuration/record",
diff --git a/frigate/app.py b/frigate/app.py
index 0b476cd432..ccfbd46961 100644
--- a/frigate/app.py
+++ b/frigate/app.py
@@ -29,6 +29,7 @@
     MODEL_CACHE_DIR,
     RECORD_DIR,
 )
+from frigate.events.audio import listen_to_audio
 from frigate.events.cleanup import EventCleanup
 from frigate.events.external import ExternalEventProcessor
 from frigate.events.maintainer import EventProcessor
@@ -44,7 +45,7 @@
 from frigate.stats import StatsEmitter, stats_init
 from frigate.storage import StorageMaintainer
 from frigate.timeline import TimelineProcessor
-from frigate.types import CameraMetricsTypes, RecordMetricsTypes
+from frigate.types import CameraMetricsTypes, FeatureMetricsTypes
 from frigate.version import VERSION
 from frigate.video import capture_camera, track_camera
 from frigate.watchdog import FrigateWatchdog
@@ -62,7 +63,7 @@ def __init__(self) -> None:
         self.log_queue: Queue = mp.Queue()
         self.plus_api = PlusApi()
         self.camera_metrics: dict[str, CameraMetricsTypes] = {}
-        self.record_metrics: dict[str, RecordMetricsTypes] = {}
+        self.feature_metrics: dict[str, FeatureMetricsTypes] = {}
         self.processes: dict[str, int] = {}
 
     def set_environment_vars(self) -> None:
@@ -104,7 +105,7 @@ def init_config(self) -> None:
         user_config = FrigateConfig.parse_file(config_file)
         self.config = user_config.runtime_config(self.plus_api)
 
-        for camera_name in self.config.cameras.keys():
+        for camera_name, camera_config in self.config.cameras.items():
             # create camera_metrics
             self.camera_metrics[camera_name] = {
                 "camera_fps": mp.Value("d", 0.0),  # type: ignore[typeddict-item]
@@ -159,13 +160,19 @@ def init_config(self) -> None:
                 "capture_process": None,
                 "process": None,
             }
-            self.record_metrics[camera_name] = {
+            self.feature_metrics[camera_name] = {
+                "audio_enabled": mp.Value(  # type: ignore[typeddict-item]
+                    # issue https://github.com/python/typeshed/issues/8799
+                    # from mypy 0.981 onwards
+                    "i",
+                    self.config.cameras[camera_name].audio.enabled,
+                ),
                 "record_enabled": mp.Value(  # type: ignore[typeddict-item]
                     # issue https://github.com/python/typeshed/issues/8799
                     # from mypy 0.981 onwards
                     "i",
                     self.config.cameras[camera_name].record.enabled,
-                )
+                ),
             }
 
     def set_log_levels(self) -> None:
@@ -253,7 +260,7 @@ def init_recording_manager(self) -> None:
         recording_process = mp.Process(
             target=manage_recordings,
             name="recording_manager",
-            args=(self.config, self.recordings_info_queue, self.record_metrics),
+            args=(self.config, self.recordings_info_queue, self.feature_metrics),
         )
         recording_process.daemon = True
         self.recording_process = recording_process
@@ -312,7 +319,7 @@ def init_dispatcher(self) -> None:
             self.config,
             self.onvif_controller,
             self.camera_metrics,
-            self.record_metrics,
+            self.feature_metrics,
             comms,
         )
 
@@ -421,6 +428,17 @@ def start_camera_capture_processes(self) -> None:
             capture_process.start()
             logger.info(f"Capture process started for {name}: {capture_process.pid}")
 
+    def start_audio_processors(self) -> None:
+        if len([c for c in self.config.cameras.values() if c.audio.enabled]) > 0:
+            audio_process = mp.Process(
+                target=listen_to_audio,
+                name="audio_capture",
+                args=(self.config, self.feature_metrics),
+            )
+            audio_process.daemon = True
+            audio_process.start()
+            logger.info(f"Audio process started: {audio_process.pid}")
+
     def start_timeline_processor(self) -> None:
         self.timeline_processor = TimelineProcessor(
             self.config, self.timeline_queue, self.stop_event
@@ -517,6 +535,7 @@ def start(self) -> None:
         self.start_detected_frames_processor()
         self.start_camera_processors()
         self.start_camera_capture_processes()
+        self.start_audio_processors()
         self.start_storage_maintainer()
         self.init_stats()
         self.init_external_event_processor()
diff --git a/frigate/comms/dispatcher.py b/frigate/comms/dispatcher.py
index b7e9e88586..1c9105ce87 100644
--- a/frigate/comms/dispatcher.py
+++ b/frigate/comms/dispatcher.py
@@ -6,7 +6,7 @@
 
 from frigate.config import FrigateConfig
 from frigate.ptz import OnvifCommandEnum, OnvifController
-from frigate.types import CameraMetricsTypes, RecordMetricsTypes
+from frigate.types import CameraMetricsTypes, FeatureMetricsTypes
 from frigate.util import restart_frigate
 
 logger = logging.getLogger(__name__)
@@ -39,19 +39,20 @@ def __init__(
         config: FrigateConfig,
         onvif: OnvifController,
         camera_metrics: dict[str, CameraMetricsTypes],
-        record_metrics: dict[str, RecordMetricsTypes],
+        feature_metrics: dict[str, FeatureMetricsTypes],
         communicators: list[Communicator],
     ) -> None:
         self.config = config
         self.onvif = onvif
         self.camera_metrics = camera_metrics
-        self.record_metrics = record_metrics
+        self.feature_metrics = feature_metrics
         self.comms = communicators
 
         for comm in self.comms:
             comm.subscribe(self._receive)
 
         self._camera_settings_handlers: dict[str, Callable] = {
+            "audio": self._on_audio_command,
             "detect": self._on_detect_command,
             "improve_contrast": self._on_motion_improve_contrast_command,
             "motion": self._on_motion_command,
@@ -186,6 +187,29 @@ def _on_motion_threshold_command(self, camera_name: str, payload: int) -> None:
         motion_settings.threshold = payload  # type: ignore[union-attr]
         self.publish(f"{camera_name}/motion_threshold/state", payload, retain=True)
 
+    def _on_audio_command(self, camera_name: str, payload: str) -> None:
+        """Callback for audio topic."""
+        audio_settings = self.config.cameras[camera_name].audio
+
+        if payload == "ON":
+            if not self.config.cameras[camera_name].audio.enabled_in_config:
+                logger.error(
+                    "Audio detection must be enabled in the config to be turned on via MQTT."
+                )
+                return
+
+            if not audio_settings.enabled:
+                logger.info(f"Turning on audio detection for {camera_name}")
+                audio_settings.enabled = True
+                self.feature_metrics[camera_name]["audio_enabled"].value = True
+        elif payload == "OFF":
+            if self.feature_metrics[camera_name]["audio_enabled"].value:
+                logger.info(f"Turning off audio detection for {camera_name}")
+                audio_settings.enabled = False
+                self.feature_metrics[camera_name]["audio_enabled"].value = False
+
+        self.publish(f"{camera_name}/audio/state", payload, retain=True)
+
     def _on_recordings_command(self, camera_name: str, payload: str) -> None:
         """Callback for recordings topic."""
         record_settings = self.config.cameras[camera_name].record
@@ -200,12 +224,12 @@ def _on_recordings_command(self, camera_name: str, payload: str) -> None:
             if not record_settings.enabled:
                 logger.info(f"Turning on recordings for {camera_name}")
                 record_settings.enabled = True
-                self.record_metrics[camera_name]["record_enabled"].value = True
+                self.feature_metrics[camera_name]["record_enabled"].value = True
         elif payload == "OFF":
-            if self.record_metrics[camera_name]["record_enabled"].value:
+            if self.feature_metrics[camera_name]["record_enabled"].value:
                 logger.info(f"Turning off recordings for {camera_name}")
                 record_settings.enabled = False
-                self.record_metrics[camera_name]["record_enabled"].value = False
+                self.feature_metrics[camera_name]["record_enabled"].value = False
 
         self.publish(f"{camera_name}/recordings/state", payload, retain=True)
 
diff --git a/frigate/comms/mqtt.py b/frigate/comms/mqtt.py
index 07799f9dab..4ddfbe7f13 100644
--- a/frigate/comms/mqtt.py
+++ b/frigate/comms/mqtt.py
@@ -41,7 +41,7 @@ def _set_initial_topics(self) -> None:
         for camera_name, camera in self.config.cameras.items():
             self.publish(
                 f"{camera_name}/recordings/state",
-                "ON" if camera.record.enabled else "OFF",
+                "ON" if camera.record.enabled_in_config else "OFF",
                 retain=True,
             )
             self.publish(
@@ -49,6 +49,11 @@ def _set_initial_topics(self) -> None:
                 "ON" if camera.snapshots.enabled else "OFF",
                 retain=True,
             )
+            self.publish(
+                f"{camera_name}/audio/state",
+                "ON" if camera.audio.enabled_in_config else "OFF",
+                retain=True,
+            )
             self.publish(
                 f"{camera_name}/detect/state",
                 "ON" if camera.detect.enabled else "OFF",
diff --git a/frigate/config.py b/frigate/config.py
index 662b7b8bd0..ea7ecdc49e 100644
--- a/frigate/config.py
+++ b/frigate/config.py
@@ -40,6 +40,7 @@
 FRIGATE_ENV_VARS = {k: v for k, v in os.environ.items() if k.startswith("FRIGATE_")}
 
 DEFAULT_TRACKED_OBJECTS = ["person"]
+DEFAULT_LISTEN_AUDIO = ["bark", "speech", "yell", "scream"]
 DEFAULT_DETECTORS = {"cpu": {"type": "cpu"}}
 
 
@@ -387,6 +388,19 @@ class ObjectConfig(FrigateBaseModel):
     mask: Union[str, List[str]] = Field(default="", title="Object mask.")
 
 
+class AudioConfig(FrigateBaseModel):
+    enabled: bool = Field(default=False, title="Enable audio events.")
+    max_not_heard: int = Field(
+        default=30, title="Seconds of not hearing the type of audio to end the event."
+    )
+    listen: List[str] = Field(
+        default=DEFAULT_LISTEN_AUDIO, title="Audio to listen for."
+    )
+    enabled_in_config: Optional[bool] = Field(
+        title="Keep track of original state of audio detection."
+    )
+
+
 class BirdseyeModeEnum(str, Enum):
     objects = "objects"
     motion = "motion"
@@ -470,6 +484,7 @@ class FfmpegConfig(FrigateBaseModel):
 
 
 class CameraRoleEnum(str, Enum):
+    audio = "audio"
     record = "record"
     rtmp = "rtmp"
     detect = "detect"
@@ -631,6 +646,9 @@ class CameraConfig(FrigateBaseModel):
     objects: ObjectConfig = Field(
         default_factory=ObjectConfig, title="Object configuration."
     )
+    audio: AudioConfig = Field(
+        default_factory=AudioConfig, title="Audio events configuration."
+    )
     motion: Optional[MotionConfig] = Field(title="Motion detection configuration.")
     detect: DetectConfig = Field(
         default_factory=DetectConfig, title="Object detection configuration."
@@ -661,12 +679,16 @@ def __init__(self, **config):
         # add roles to the input if there is only one
         if len(config["ffmpeg"]["inputs"]) == 1:
             has_rtmp = "rtmp" in config["ffmpeg"]["inputs"][0].get("roles", [])
+            has_audio = "audio" in config["ffmpeg"]["inputs"][0].get("roles", [])
 
             config["ffmpeg"]["inputs"][0]["roles"] = [
                 "record",
                 "detect",
             ]
 
+            if has_audio:
+                config["ffmpeg"]["inputs"][0]["roles"].append("audio")
+
             if has_rtmp:
                 config["ffmpeg"]["inputs"][0]["roles"].append("rtmp")
 
@@ -799,6 +821,11 @@ def verify_config_roles(camera_config: CameraConfig) -> None:
             f"Camera {camera_config.name} has rtmp enabled, but rtmp is not assigned to an input."
         )
 
+    if camera_config.audio.enabled and "audio" not in assigned_roles:
+        raise ValueError(
+            f"Camera {camera_config.name} has audio events enabled, but audio is not assigned to an input."
+        )
+
 
 def verify_valid_live_stream_name(
     frigate_config: FrigateConfig, camera_config: CameraConfig
@@ -911,6 +938,9 @@ class FrigateConfig(FrigateBaseModel):
     objects: ObjectConfig = Field(
         default_factory=ObjectConfig, title="Global object configuration."
     )
+    audio: AudioConfig = Field(
+        default_factory=AudioConfig, title="Global Audio events configuration."
+    )
     motion: Optional[MotionConfig] = Field(
         title="Global motion detection configuration."
     )
@@ -935,6 +965,7 @@ def runtime_config(self, plus_api: PlusApi = None) -> FrigateConfig:
         # Global config to propagate down to camera level
         global_config = config.dict(
             include={
+                "audio": ...,
                 "birdseye": ...,
                 "record": ...,
                 "snapshots": ...,
@@ -980,8 +1011,9 @@ def runtime_config(self, plus_api: PlusApi = None) -> FrigateConfig:
                 camera_config.onvif.password = camera_config.onvif.password.format(
                     **FRIGATE_ENV_VARS
                 )
-            # set config recording value
+            # set config pre-value
             camera_config.record.enabled_in_config = camera_config.record.enabled
+            camera_config.audio.enabled_in_config = camera_config.audio.enabled
 
             # Add default filters
             object_keys = camera_config.objects.track
diff --git a/frigate/const.py b/frigate/const.py
index c1524a6a8b..20e2b0daa8 100644
--- a/frigate/const.py
+++ b/frigate/const.py
@@ -8,6 +8,7 @@
 BIRDSEYE_PIPE = "/tmp/cache/birdseye"
 CACHE_DIR = "/tmp/cache"
 YAML_EXT = (".yaml", ".yml")
+FRIGATE_LOCALHOST = "http://127.0.0.1:5000"
 PLUS_ENV_VAR = "PLUS_API_KEY"
 PLUS_API_HOST = "https://api.frigate.video"
 BTBN_PATH = "/usr/lib/btbn-ffmpeg"
@@ -22,6 +23,13 @@
     item for sublist in ATTRIBUTE_LABEL_MAP.values() for item in sublist
 ]
 
+# Audio Consts
+
+AUDIO_DURATION = 0.975
+AUDIO_FORMAT = "s16le"
+AUDIO_MAX_BIT_RANGE = 32768.0
+AUDIO_SAMPLE_RATE = 16000
+
 # Regex Consts
 
 REGEX_CAMERA_NAME = r"^[a-zA-Z0-9_-]+$"
diff --git a/frigate/events/audio.py b/frigate/events/audio.py
new file mode 100644
index 0000000000..4f40334d42
--- /dev/null
+++ b/frigate/events/audio.py
@@ -0,0 +1,247 @@
+"""Handle creating audio events."""
+
+import datetime
+import logging
+import multiprocessing as mp
+import os
+import signal
+import threading
+from types import FrameType
+from typing import Optional
+
+import numpy as np
+import requests
+from setproctitle import setproctitle
+
+from frigate.config import CameraConfig, FrigateConfig
+from frigate.const import (
+    AUDIO_DURATION,
+    AUDIO_FORMAT,
+    AUDIO_MAX_BIT_RANGE,
+    AUDIO_SAMPLE_RATE,
+    CACHE_DIR,
+    FRIGATE_LOCALHOST,
+)
+from frigate.ffmpeg_presets import parse_preset_input
+from frigate.log import LogPipe
+from frigate.object_detection import load_labels
+from frigate.types import FeatureMetricsTypes
+from frigate.util import get_ffmpeg_arg_list, listen
+from frigate.video import start_or_restart_ffmpeg, stop_ffmpeg
+
+try:
+    from tflite_runtime.interpreter import Interpreter
+except ModuleNotFoundError:
+    from tensorflow.lite.python.interpreter import Interpreter
+
+logger = logging.getLogger(__name__)
+
+
+def get_ffmpeg_command(input_args: list[str], input_path: str, pipe: str) -> list[str]:
+    return get_ffmpeg_arg_list(
+        f"ffmpeg {{}} -i {{}} -f {AUDIO_FORMAT} -ar {AUDIO_SAMPLE_RATE} -ac 1 -y {{}}".format(
+            " ".join(input_args),
+            input_path,
+            pipe,
+        )
+    )
+
+
+def listen_to_audio(
+    config: FrigateConfig,
+    process_info: dict[str, FeatureMetricsTypes],
+) -> None:
+    stop_event = mp.Event()
+    audio_threads: list[threading.Thread] = []
+
+    def exit_process() -> None:
+        for thread in audio_threads:
+            thread.join()
+
+        logger.info("Exiting audio detector...")
+
+    def receiveSignal(signalNumber: int, frame: Optional[FrameType]) -> None:
+        stop_event.set()
+        exit_process()
+
+    signal.signal(signal.SIGTERM, receiveSignal)
+    signal.signal(signal.SIGINT, receiveSignal)
+
+    threading.current_thread().name = "process:audio_manager"
+    setproctitle("frigate.audio_manager")
+    listen()
+
+    for camera in config.cameras.values():
+        if camera.enabled and camera.audio.enabled_in_config:
+            audio = AudioEventMaintainer(camera, process_info, stop_event)
+            audio_threads.append(audio)
+            audio.start()
+
+
+class AudioTfl:
+    def __init__(self, stop_event: mp.Event):
+        self.stop_event = stop_event
+        self.labels = load_labels("/audio-labelmap.txt")
+        self.interpreter = Interpreter(
+            model_path="/cpu_audio_model.tflite",
+            num_threads=2,
+        )
+
+        self.interpreter.allocate_tensors()
+
+        self.tensor_input_details = self.interpreter.get_input_details()
+        self.tensor_output_details = self.interpreter.get_output_details()
+
+    def _detect_raw(self, tensor_input):
+        self.interpreter.set_tensor(self.tensor_input_details[0]["index"], tensor_input)
+        self.interpreter.invoke()
+        detections = np.zeros((20, 6), np.float32)
+
+        res = self.interpreter.get_tensor(self.tensor_output_details[0]["index"])[0]
+        non_zero_indices = res > 0
+        class_ids = np.argpartition(-res, 20)[:20]
+        class_ids = class_ids[np.argsort(-res[class_ids])]
+        class_ids = class_ids[non_zero_indices[class_ids]]
+        scores = res[class_ids]
+        boxes = np.full((scores.shape[0], 4), -1, np.float32)
+        count = len(scores)
+
+        for i in range(count):
+            if scores[i] < 0.4 or i == 20:
+                break
+            detections[i] = [
+                class_ids[i],
+                float(scores[i]),
+                boxes[i][0],
+                boxes[i][1],
+                boxes[i][2],
+                boxes[i][3],
+            ]
+
+        return detections
+
+    def detect(self, tensor_input, threshold=0.8):
+        detections = []
+
+        if self.stop_event.is_set():
+            return detections
+
+        raw_detections = self._detect_raw(tensor_input)
+
+        for d in raw_detections:
+            if d[1] < threshold:
+                break
+            detections.append(
+                (self.labels[int(d[0])], float(d[1]), (d[2], d[3], d[4], d[5]))
+            )
+        return detections
+
+
+class AudioEventMaintainer(threading.Thread):
+    def __init__(
+        self,
+        camera: CameraConfig,
+        feature_metrics: dict[str, FeatureMetricsTypes],
+        stop_event: mp.Event,
+    ) -> None:
+        threading.Thread.__init__(self)
+        self.name = f"{camera.name}_audio_event_processor"
+        self.config = camera
+        self.feature_metrics = feature_metrics
+        self.detections: dict[dict[str, any]] = feature_metrics
+        self.stop_event = stop_event
+        self.detector = AudioTfl(stop_event)
+        self.shape = (int(round(AUDIO_DURATION * AUDIO_SAMPLE_RATE)),)
+        self.chunk_size = int(round(AUDIO_DURATION * AUDIO_SAMPLE_RATE * 2))
+        self.pipe = f"{CACHE_DIR}/{self.config.name}-audio"
+        self.ffmpeg_cmd = get_ffmpeg_command(
+            get_ffmpeg_arg_list(self.config.ffmpeg.global_args)
+            + parse_preset_input("preset-rtsp-audio-only", 1),
+            [i.path for i in self.config.ffmpeg.inputs if "audio" in i.roles][0],
+            self.pipe,
+        )
+        self.pipe_file = None
+        self.logpipe = LogPipe(f"ffmpeg.{self.config.name}.audio")
+        self.audio_listener = None
+
+    def detect_audio(self, audio) -> None:
+        if not self.feature_metrics[self.config.name]["audio_enabled"].value:
+            return
+
+        waveform = (audio / AUDIO_MAX_BIT_RANGE).astype(np.float32)
+        model_detections = self.detector.detect(waveform)
+
+        for label, score, _ in model_detections:
+            if label not in self.config.audio.listen:
+                continue
+
+            self.handle_detection(label, score)
+
+        self.expire_detections()
+
+    def handle_detection(self, label: str, score: float) -> None:
+        if self.detections.get(label):
+            self.detections[label][
+                "last_detection"
+            ] = datetime.datetime.now().timestamp()
+        else:
+            resp = requests.post(
+                f"{FRIGATE_LOCALHOST}/api/events/{self.config.name}/{label}/create",
+                json={"duration": None},
+            )
+
+            if resp.status_code == 200:
+                event_id = resp.json()[0]["event_id"]
+                self.detections[label] = {
+                    "id": event_id,
+                    "label": label,
+                    "last_detection": datetime.datetime.now().timestamp(),
+                }
+
+    def expire_detections(self) -> None:
+        now = datetime.datetime.now().timestamp()
+
+        for detection in self.detections.values():
+            if (
+                now - detection.get("last_detection", now)
+                > self.config.audio.max_not_heard
+            ):
+                self.detections[detection["label"]] = None
+                requests.put(
+                    f"{FRIGATE_LOCALHOST}/api/events/{detection['id']}/end",
+                    json={
+                        "end_time": detection["last_detection"]
+                        + self.config.record.events.post_capture
+                    },
+                )
+
+    def restart_audio_pipe(self) -> None:
+        try:
+            os.mkfifo(self.pipe)
+        except FileExistsError:
+            pass
+
+        self.audio_listener = start_or_restart_ffmpeg(
+            self.ffmpeg_cmd, logger, self.logpipe, None, self.audio_listener
+        )
+
+    def read_audio(self) -> None:
+        if self.pipe_file is None:
+            self.pipe_file = open(self.pipe, "rb")
+
+        try:
+            audio = np.frombuffer(self.pipe_file.read(self.chunk_size), dtype=np.int16)
+            self.detect_audio(audio)
+        except BrokenPipeError:
+            self.logpipe.dump()
+            self.restart_audio_pipe()
+
+    def run(self) -> None:
+        self.restart_audio_pipe()
+
+        while not self.stop_event.is_set():
+            self.read_audio()
+
+        self.pipe_file.close()
+        stop_ffmpeg(self.audio_listener, logger)
+        self.logpipe.close()
diff --git a/frigate/events/external.py b/frigate/events/external.py
index 910aee35f4..25ba289f25 100644
--- a/frigate/events/external.py
+++ b/frigate/events/external.py
@@ -67,11 +67,10 @@ def create_manual_event(
 
         return event_id
 
-    def finish_manual_event(self, event_id: str) -> None:
+    def finish_manual_event(self, event_id: str, end_time: float) -> None:
         """Finish external event with indeterminate duration."""
-        now = datetime.datetime.now().timestamp()
         self.queue.put(
-            (EventTypeEnum.api, "end", None, {"id": event_id, "end_time": now})
+            (EventTypeEnum.api, "end", None, {"id": event_id, "end_time": end_time})
         )
 
     def _write_images(
diff --git a/frigate/events/maintainer.py b/frigate/events/maintainer.py
index 28fb4646b4..f024f0be6b 100644
--- a/frigate/events/maintainer.py
+++ b/frigate/events/maintainer.py
@@ -18,7 +18,6 @@
 
 class EventTypeEnum(str, Enum):
     api = "api"
-    # audio = "audio"
     tracked_object = "tracked_object"
 
 
@@ -73,19 +72,21 @@ def run(self) -> None:
             except queue.Empty:
                 continue
 
-            logger.debug(f"Event received: {event_type} {camera} {event_data['id']}")
-
-            self.timeline_queue.put(
-                (
-                    camera,
-                    source_type,
-                    event_type,
-                    self.events_in_process.get(event_data["id"]),
-                    event_data,
-                )
+            logger.debug(
+                f"Event received: {source_type} {event_type} {camera} {event_data['id']}"
             )
 
             if source_type == EventTypeEnum.tracked_object:
+                self.timeline_queue.put(
+                    (
+                        camera,
+                        source_type,
+                        event_type,
+                        self.events_in_process.get(event_data["id"]),
+                        event_data,
+                    )
+                )
+
                 if event_type == "start":
                     self.events_in_process[event_data["id"]] = event_data
                     continue
@@ -215,7 +216,7 @@ def handle_object_detection(
             del self.events_in_process[event_data["id"]]
             self.event_processed_queue.put((event_data["id"], camera))
 
-    def handle_external_detection(self, type: str, event_data: Event):
+    def handle_external_detection(self, type: str, event_data: Event) -> None:
         if type == "new":
             event = {
                 Event.id: event_data["id"],
@@ -230,20 +231,14 @@ def handle_external_detection(self, type: str, event_data: Event):
                 Event.zones: [],
                 Event.data: {},
             }
+            Event.insert(event).execute()
         elif type == "end":
             event = {
                 Event.id: event_data["id"],
                 Event.end_time: event_data["end_time"],
             }
 
-        try:
-            (
-                Event.insert(event)
-                .on_conflict(
-                    conflict_target=[Event.id],
-                    update=event,
-                )
-                .execute()
-            )
-        except Exception:
-            logger.warning(f"Failed to update manual event: {event_data['id']}")
+            try:
+                Event.update(event).execute()
+            except Exception:
+                logger.warning(f"Failed to update manual event: {event_data['id']}")
diff --git a/frigate/ffmpeg_presets.py b/frigate/ffmpeg_presets.py
index dde1589163..a2785813c1 100644
--- a/frigate/ffmpeg_presets.py
+++ b/frigate/ffmpeg_presets.py
@@ -282,6 +282,13 @@ def parse_preset_hardware_acceleration_encode(
         "-use_wallclock_as_timestamps",
         "1",
     ],
+    "preset-rtsp-audio-only": [
+        "-rtsp_transport",
+        "tcp",
+        TIMEOUT_PARAM,
+        "5000000",
+        "-vn",
+    ],
     "preset-rtsp-restream": _user_agent_args
     + [
         "-rtsp_transport",
diff --git a/frigate/http.py b/frigate/http.py
index e47e6efc2d..f3632a0cf1 100644
--- a/frigate/http.py
+++ b/frigate/http.py
@@ -908,8 +908,11 @@ def create_event(camera_name, label):
 
 @bp.route("/events/<event_id>/end", methods=["PUT"])
 def end_event(event_id):
+    json: dict[str, any] = request.get_json(silent=True) or {}
+
     try:
-        current_app.external_processor.finish_manual_event(event_id)
+        end_time = json.get("end_time", datetime.now().timestamp())
+        current_app.external_processor.finish_manual_event(event_id, end_time)
     except Exception:
         return jsonify(
             {"success": False, "message": f"{event_id} must be set and valid."}, 404
diff --git a/frigate/output.py b/frigate/output.py
index 942bed12ef..0388353133 100644
--- a/frigate/output.py
+++ b/frigate/output.py
@@ -156,7 +156,12 @@ def run(self):
 
 
 class BirdsEyeFrameManager:
-    def __init__(self, config: FrigateConfig, frame_manager: SharedMemoryFrameManager):
+    def __init__(
+        self,
+        config: FrigateConfig,
+        frame_manager: SharedMemoryFrameManager,
+        stop_event: mp.Event,
+    ):
         self.config = config
         self.mode = config.birdseye.mode
         self.frame_manager = frame_manager
@@ -165,6 +170,7 @@ def __init__(self, config: FrigateConfig, frame_manager: SharedMemoryFrameManage
         self.frame_shape = (height, width)
         self.yuv_shape = (height * 3 // 2, width)
         self.frame = np.ndarray(self.yuv_shape, dtype=np.uint8)
+        self.stop_event = stop_event
 
         # initialize the frame as black and with the Frigate logo
         self.blank_frame = np.zeros(self.yuv_shape, np.uint8)
@@ -458,6 +464,9 @@ def calculate_layout(
 
                 # decrease scaling coefficient until height of all cameras can fit into the birdseye canvas
                 while calculating:
+                    if self.stop_event.is_set():
+                        return
+
                     layout_candidate = calculate_layout(
                         (canvas_width, canvas_height),
                         active_cameras_to_add,
@@ -580,7 +589,7 @@ def receiveSignal(signalNumber, frame):
     for t in broadcasters.values():
         t.start()
 
-    birdseye_manager = BirdsEyeFrameManager(config, frame_manager)
+    birdseye_manager = BirdsEyeFrameManager(config, frame_manager, stop_event)
 
     if config.birdseye.restream:
         birdseye_buffer = frame_manager.create(
diff --git a/frigate/record/maintainer.py b/frigate/record/maintainer.py
index 3ed6540d00..8e40fc6e7a 100644
--- a/frigate/record/maintainer.py
+++ b/frigate/record/maintainer.py
@@ -20,7 +20,7 @@
 from frigate.config import FrigateConfig, RetainModeEnum
 from frigate.const import CACHE_DIR, MAX_SEGMENT_DURATION, RECORD_DIR
 from frigate.models import Event, Recordings
-from frigate.types import RecordMetricsTypes
+from frigate.types import FeatureMetricsTypes
 from frigate.util import area, get_video_properties
 
 logger = logging.getLogger(__name__)
@@ -31,7 +31,7 @@ def __init__(
         self,
         config: FrigateConfig,
         recordings_info_queue: mp.Queue,
-        process_info: dict[str, RecordMetricsTypes],
+        process_info: dict[str, FeatureMetricsTypes],
         stop_event: MpEvent,
     ):
         threading.Thread.__init__(self)
diff --git a/frigate/record/record.py b/frigate/record/record.py
index ab6cd3450a..530adc0319 100644
--- a/frigate/record/record.py
+++ b/frigate/record/record.py
@@ -14,7 +14,7 @@
 from frigate.models import Event, Recordings, RecordingsToDelete, Timeline
 from frigate.record.cleanup import RecordingCleanup
 from frigate.record.maintainer import RecordingMaintainer
-from frigate.types import RecordMetricsTypes
+from frigate.types import FeatureMetricsTypes
 from frigate.util import listen
 
 logger = logging.getLogger(__name__)
@@ -23,7 +23,7 @@
 def manage_recordings(
     config: FrigateConfig,
     recordings_info_queue: mp.Queue,
-    process_info: dict[str, RecordMetricsTypes],
+    process_info: dict[str, FeatureMetricsTypes],
 ) -> None:
     stop_event = mp.Event()
 
diff --git a/frigate/types.py b/frigate/types.py
index 23751d499c..9083ade334 100644
--- a/frigate/types.py
+++ b/frigate/types.py
@@ -25,7 +25,8 @@ class CameraMetricsTypes(TypedDict):
     skipped_fps: Synchronized
 
 
-class RecordMetricsTypes(TypedDict):
+class FeatureMetricsTypes(TypedDict):
+    audio_enabled: Synchronized
     record_enabled: Synchronized
 
 
diff --git a/web/__test__/handlers.js b/web/__test__/handlers.js
index d7f2fb5a62..7ea1f90a5d 100644
--- a/web/__test__/handlers.js
+++ b/web/__test__/handlers.js
@@ -16,6 +16,7 @@ export const handlers = [
           front: {
             name: 'front',
             objects: { track: ['taco', 'cat', 'dog'] },
+            audio: { enabled: false, enabled_in_config: false },
             record: { enabled: true, enabled_in_config: true },
             detect: { width: 1280, height: 720 },
             snapshots: {},
@@ -25,6 +26,7 @@ export const handlers = [
           side: {
             name: 'side',
             objects: { track: ['taco', 'cat', 'dog'] },
+            audio: { enabled: false, enabled_in_config: false },
             record: { enabled: false, enabled_in_config: true },
             detect: { width: 1280, height: 720 },
             snapshots: {},
diff --git a/web/src/api/__tests__/ws.test.jsx b/web/src/api/__tests__/ws.test.jsx
index 3b0e3420ad..373f6abea6 100644
--- a/web/src/api/__tests__/ws.test.jsx
+++ b/web/src/api/__tests__/ws.test.jsx
@@ -113,8 +113,8 @@ describe('WsProvider', () => {
     vi.spyOn(Date, 'now').mockReturnValue(123456);
     const config = {
       cameras: {
-        front: { name: 'front', detect: { enabled: true }, record: { enabled: false }, snapshots: { enabled: true } },
-        side: { name: 'side', detect: { enabled: false }, record: { enabled: false }, snapshots: { enabled: false } },
+        front: { name: 'front', detect: { enabled: true }, record: { enabled: false }, snapshots: { enabled: true }, audio: { enabled: false } },
+        side: { name: 'side', detect: { enabled: false }, record: { enabled: false }, snapshots: { enabled: false }, audio: { enabled: false } },
       },
     };
     render(
diff --git a/web/src/api/ws.jsx b/web/src/api/ws.jsx
index 0867ed0a43..8324632bee 100644
--- a/web/src/api/ws.jsx
+++ b/web/src/api/ws.jsx
@@ -41,10 +41,11 @@ export function WsProvider({
 
   useEffect(() => {
     Object.keys(config.cameras).forEach((camera) => {
-      const { name, record, detect, snapshots } = config.cameras[camera];
+      const { name, record, detect, snapshots, audio } = config.cameras[camera];
       dispatch({ topic: `${name}/recordings/state`, payload: record.enabled ? 'ON' : 'OFF', retain: false });
       dispatch({ topic: `${name}/detect/state`, payload: detect.enabled ? 'ON' : 'OFF', retain: false });
       dispatch({ topic: `${name}/snapshots/state`, payload: snapshots.enabled ? 'ON' : 'OFF', retain: false });
+      dispatch({ topic: `${name}/audio/state`, payload: audio.enabled ? 'ON' : 'OFF', retain: false });
     });
   }, [config]);
 
@@ -120,6 +121,15 @@ export function useSnapshotsState(camera) {
   return { payload, send, connected };
 }
 
+export function useAudioState(camera) {
+  const {
+    value: { payload },
+    send,
+    connected,
+  } = useWs(`${camera}/audio/state`, `${camera}/audio/set`);
+  return { payload, send, connected };
+}
+
 export function usePtzCommand(camera) {
   const {
     value: { payload },
diff --git a/web/src/icons/Audio.jsx b/web/src/icons/Audio.jsx
new file mode 100644
index 0000000000..cec783854c
--- /dev/null
+++ b/web/src/icons/Audio.jsx
@@ -0,0 +1,36 @@
+import { h } from 'preact';
+import { memo } from 'preact/compat';
+
+export function Snapshot({ className = 'h-6 w-6', stroke = 'currentColor', onClick = () => {} }) {
+  return (
+    <svg
+      xmlns="http://www.w3.org/2000/svg"
+      className={className}
+      fill="none"
+      viewBox="0 0 32 32"
+      stroke={stroke}
+      onClick={onClick}
+    >
+      <path
+        stroke-linecap="round"
+        stroke-linejoin="round"
+        stroke-width="2"
+        d="M18 30v-2a10.011 10.011 0 0010-10h2a12.013 12.013 0 01-12 12z"
+      />
+      <path
+        stroke-linecap="round"
+        stroke-linejoin="round"
+        stroke-width="2"
+        d="M18 22v-2a2.002 2.002 0 002-2h2a4.004 4.004 0 01-4 4zM10 2a9.01 9.01 0 00-9 9h2a7 7 0 0114 0 7.09 7.09 0 01-3.501 6.135l-.499.288v3.073a2.935 2.935 0 01-.9 2.151 4.182 4.182 0 01-4.633 1.03A4.092 4.092 0 015 20H3a6.116 6.116 0 003.67 5.512 5.782 5.782 0 002.314.486 6.585 6.585 0 004.478-1.888A4.94 4.94 0 0015 20.496v-1.942A9.108 9.108 0 0019 11a9.01 9.01 0 00-9-9z"
+      />
+      <path
+        stroke-linecap="round"
+        stroke-linejoin="round"
+        stroke-width="2"
+        d="M9.28 8.082A3.006 3.006 0 0113 11h2a4.979 4.979 0 00-1.884-3.911 5.041 5.041 0 00-4.281-.957 4.95 4.95 0 00-3.703 3.703 5.032 5.032 0 002.304 5.458A3.078 3.078 0 019 17.924V20h2v-2.077a5.06 5.06 0 00-2.537-4.346 3.002 3.002 0 01.817-5.494z"
+      />
+    </svg>
+  );
+}
+
+export default memo(Snapshot);
diff --git a/web/src/routes/Cameras.jsx b/web/src/routes/Cameras.jsx
index 1e2bbf9030..2298b992ed 100644
--- a/web/src/routes/Cameras.jsx
+++ b/web/src/routes/Cameras.jsx
@@ -2,10 +2,11 @@ import { h, Fragment } from 'preact';
 import ActivityIndicator from '../components/ActivityIndicator';
 import Card from '../components/Card';
 import CameraImage from '../components/CameraImage';
+import AudioIcon from '../icons/Audio';
 import ClipIcon from '../icons/Clip';
 import MotionIcon from '../icons/Motion';
 import SnapshotIcon from '../icons/Snapshot';
-import { useDetectState, useRecordingsState, useSnapshotsState } from '../api/ws';
+import { useAudioState, useDetectState, useRecordingsState, useSnapshotsState } from '../api/ws';
 import { useMemo } from 'preact/hooks';
 import useSWR from 'swr';
 
@@ -43,6 +44,7 @@ function Camera({ name, config }) {
   const { payload: detectValue, send: sendDetect } = useDetectState(name);
   const { payload: recordValue, send: sendRecordings } = useRecordingsState(name);
   const { payload: snapshotValue, send: sendSnapshots } = useSnapshotsState(name);
+  const { payload: audioValue, send: sendAudio } = useAudioState(name);
   const href = `/cameras/${name}`;
   const buttons = useMemo(() => {
     return [
@@ -50,10 +52,9 @@ function Camera({ name, config }) {
       { name: 'Recordings', href: `/recording/${name}` },
     ];
   }, [name]);
-  const cleanName = useMemo(
-    () => { return `${name.replaceAll('_', ' ')}` },
-    [name]
-  );
+  const cleanName = useMemo(() => {
+    return `${name.replaceAll('_', ' ')}`;
+  }, [name]);
   const icons = useMemo(
     () => [
       {
@@ -65,7 +66,9 @@ function Camera({ name, config }) {
         },
       },
       {
-        name: config.record.enabled_in_config ? `Toggle recordings ${recordValue === 'ON' ? 'off' : 'on'}` : 'Recordings must be enabled in the config to be turned on in the UI.',
+        name: config.record.enabled_in_config
+          ? `Toggle recordings ${recordValue === 'ON' ? 'off' : 'on'}`
+          : 'Recordings must be enabled in the config to be turned on in the UI.',
         icon: ClipIcon,
         color: config.record.enabled_in_config ? (recordValue === 'ON' ? 'blue' : 'gray') : 'red',
         onClick: () => {
@@ -82,11 +85,27 @@ function Camera({ name, config }) {
           sendSnapshots(snapshotValue === 'ON' ? 'OFF' : 'ON', true);
         },
       },
-    ],
-    [config, detectValue, sendDetect, recordValue, sendRecordings, snapshotValue, sendSnapshots]
+      config.audio.enabled_in_config
+        ? {
+          name: `Toggle audio detection ${audioValue === 'ON' ? 'off' : 'on'}`,
+          icon: AudioIcon,
+          color: audioValue === 'ON' ? 'blue' : 'gray',
+          onClick: () => {
+            sendAudio(audioValue === 'ON' ? 'OFF' : 'ON', true);
+          },
+        }
+        : null,
+    ].filter((button) => button != null),
+    [config, audioValue, sendAudio, detectValue, sendDetect, recordValue, sendRecordings, snapshotValue, sendSnapshots]
   );
 
   return (
-    <Card buttons={buttons} href={href} header={cleanName} icons={icons} media={<CameraImage camera={name} stretch />} />
+    <Card
+      buttons={buttons}
+      href={href}
+      header={cleanName}
+      icons={icons}
+      media={<CameraImage camera={name} stretch />}
+    />
   );
 }