Skip to content

Commit

Permalink
Optimization of Sync Records: Implementing Pagination and Temporary T…
Browse files Browse the repository at this point in the history
…able (blakeblackshear#6585)

* Update pull_request.yml

* Add temporary table for deletion and use pagination to process recordings in chunks for deletion of recordings with missing files

* move RecordingsToDelete class to models.py

* recording cleanup: bugfixes

* Update cleanup.py

* improve log message in cleanup.py

Co-authored-by: Nicolas Mowen <[email protected]>

---------

Co-authored-by: Nicolas Mowen <[email protected]>
  • Loading branch information
skrashevich and NickM-27 committed Jun 11, 2023
1 parent 7459a1c commit 5c27cb7
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 24 deletions.
8 changes: 8 additions & 0 deletions frigate/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,11 @@ class Recordings(Model): # type: ignore[misc]
motion = IntegerField(null=True)
objects = IntegerField(null=True)
segment_size = FloatField(default=0) # this should be stored as MB


# Used for temporary table in record/cleanup.py
class RecordingsToDelete(Model): # type: ignore[misc]
id = CharField(null=False, primary_key=False, max_length=30)

class Meta:
temporary = True
72 changes: 50 additions & 22 deletions frigate/record/cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@
from multiprocessing.synchronize import Event as MpEvent
from pathlib import Path

from peewee import DoesNotExist
from peewee import DatabaseError, DoesNotExist, chunked

from frigate.config import FrigateConfig, RetainModeEnum
from frigate.const import RECORD_DIR, SECONDS_IN_DAY
from frigate.models import Event, Recordings, Timeline
from frigate.models import Event, Recordings, RecordingsToDelete, Timeline
from frigate.record.util import remove_empty_directories

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -217,34 +217,62 @@ def sync_recordings(self) -> None:
logger.debug("Start sync recordings.")

# get all recordings in the db
recordings: Recordings = Recordings.select()

# get all recordings files on disk
files_on_disk = []
for root, _, files in os.walk(RECORD_DIR):
for file in files:
files_on_disk.append(os.path.join(root, file))

recordings_to_delete = []
for recording in recordings.objects().iterator():
if recording.path not in files_on_disk:
recordings_to_delete.append(recording.id)
recordings = Recordings.select(Recordings.id, Recordings.path)

# get all recordings files on disk and put them in a set
files_on_disk = {
os.path.join(root, file)
for root, _, files in os.walk(RECORD_DIR)
for file in files
}

# Use pagination to process records in chunks
page_size = 1000
num_pages = (recordings.count() + page_size - 1) // page_size
recordings_to_delete = set()

for page in range(num_pages):
for recording in recordings.paginate(page, page_size):
if recording.path not in files_on_disk:
recordings_to_delete.add(recording.id)

# convert back to list of dictionaries for insertion
recordings_to_delete = [
{"id": recording_id} for recording_id in recordings_to_delete
]

if len(recordings_to_delete) / recordings.count() > 0.5:
logger.debug(
f"Deleting {(len(recordings_to_delete) / recordings.count()):2f}% of recordings could be due to configuration error. Aborting..."
)
return

logger.debug(
f"Deleting {len(recordings_to_delete)} recordings with missing files"
)
# delete up to 100,000 at a time
max_deletes = 100000
for i in range(0, len(recordings_to_delete), max_deletes):
Recordings.delete().where(
Recordings.id << recordings_to_delete[i : i + max_deletes]
).execute()

# create a temporary table for deletion
RecordingsToDelete.create_table(temporary=True)

# insert ids to the temporary table
max_inserts = 1000
for batch in chunked(recordings_to_delete, max_inserts):
RecordingsToDelete.insert_many(batch).execute()

try:
# delete records in the main table that exist in the temporary table
query = Recordings.delete().where(
Recordings.id.in_(RecordingsToDelete.select(RecordingsToDelete.id))
)
query.execute()
except DatabaseError as e:
logger.error(f"Database error during delete: {e}")

logger.debug("End sync recordings.")

def run(self) -> None:
# on startup sync recordings with disk (disabled due to too much CPU usage)
# self.sync_recordings()
# on startup sync recordings with disk
self.sync_recordings()

# Expire tmp clips every minute, recordings and clean directories every hour.
for counter in itertools.cycle(range(self.config.record.expire_interval)):
Expand Down
4 changes: 2 additions & 2 deletions frigate/record/record.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from setproctitle import setproctitle

from frigate.config import FrigateConfig
from frigate.models import Event, Recordings, Timeline
from frigate.models import Event, Recordings, RecordingsToDelete, Timeline
from frigate.record.cleanup import RecordingCleanup
from frigate.record.maintainer import RecordingMaintainer
from frigate.types import RecordMetricsTypes
Expand Down Expand Up @@ -46,7 +46,7 @@ def receiveSignal(signalNumber: int, frame: Optional[FrameType]) -> None:
},
timeout=60,
)
models = [Event, Recordings, Timeline]
models = [Event, Recordings, Timeline, RecordingsToDelete]
db.bind(models)

maintainer = RecordingMaintainer(
Expand Down

0 comments on commit 5c27cb7

Please sign in to comment.