Skip to content

Commit

Permalink
Merge pull request #7955 from kozlovsky/fix/slow_queries
Browse files Browse the repository at this point in the history
Fix slow database queries, by using partial index `idx_torrentstate__last_check__partial`
  • Loading branch information
kozlovsky committed Apr 5, 2024
2 parents e13ee42 + 2445577 commit 023f44d
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 12 deletions.
9 changes: 5 additions & 4 deletions src/tribler/core/components/metadata_store/db/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -690,9 +690,9 @@ def get_entries_query(
t = time() - POPULAR_TORRENTS_FRESHNESS_PERIOD
health_list = list(
select(
health
for health in self.TorrentState
if health.last_check >= t and (health.seeders > 0 or health.leechers > 0)
health for health in self.TorrentState
if health.has_data == 1 # The condition had to be written this way for the partial index to work
and health.last_check >= t and (health.seeders > 0 or health.leechers > 0)
).order_by(
lambda health: (desc(health.seeders), desc(health.leechers), desc(health.last_check))
)[:POPULAR_TORRENTS_COUNT]
Expand Down Expand Up @@ -748,7 +748,8 @@ def get_entries_query(
)

if health_checked_after is not None:
pony_query = pony_query.where(lambda g: g.health.last_check >= health_checked_after)
pony_query = pony_query.where(lambda g: g.health.has_data == 1 # Has to be written this way for index
and g.health.last_check >= health_checked_after)

# Sort the query
pony_query = pony_query.sort_by("desc(g.rowid)" if sort_desc else "g.rowid")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ async def test_check_random_tracker_not_alive(torrent_checker):
async def test_task_select_tracker(torrent_checker):
with db_session:
tracker = torrent_checker.mds.TrackerState(url="http:https://localhost/tracker")
torrent_checker.mds.TorrentState(infohash=b'a' * 20, seeders=5, leechers=10, trackers={tracker})
torrent_checker.mds.TorrentState(infohash=b'a' * 20, seeders=5, leechers=10, trackers={tracker}, last_check=1)

controlled_session = HttpTrackerSession("127.0.0.1", ("localhost", 8475), "/announce", 5, None)
controlled_session.connect_to_tracker = lambda: succeed(None)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,9 @@ async def check_random_tracker(self):
url = tracker.url
with db_session:
dynamic_interval = TORRENT_CHECK_RETRY_INTERVAL * (2 ** tracker.failures)
torrents = select(ts for ts in tracker.torrents if ts.last_check + dynamic_interval < int(time.time()))
torrents = select(ts for ts in tracker.torrents
if ts.has_data == 1 # The condition had to be written this way for the index to work
and ts.last_check + dynamic_interval < int(time.time()))
infohashes = [t.infohash for t in torrents[:MAX_TORRENTS_CHECKED_PER_SESSION]]

if len(infohashes) == 0:
Expand Down Expand Up @@ -198,8 +200,8 @@ def load_torrents_checked_from_db(self) -> Dict[bytes, HealthInfo]:
now = int(time.time())
last_fresh_time = now - HEALTH_FRESHNESS_SECONDS
checked_torrents = list(self.mds.TorrentState
.select(lambda g: g.has_data and g.self_checked
and between(g.last_check, last_fresh_time, now))
.select(lambda g: g.has_data == 1 # Had to be written this way for index to work
and g.self_checked and between(g.last_check, last_fresh_time, now))
.order_by(lambda g: (desc(g.seeders), g.last_check))
.limit(TORRENTS_CHECKED_RETURN_SIZE))

Expand All @@ -223,11 +225,15 @@ def torrents_to_check(self):
By old torrents, we refer to those checked quite farther in the past, sorted by the last_check value.
"""
last_fresh_time = time.time() - HEALTH_FRESHNESS_SECONDS
popular_torrents = list(self.mds.TorrentState.select(lambda g: g.last_check < last_fresh_time).
order_by(lambda g: (desc(g.seeders), g.last_check)).limit(TORRENT_SELECTION_POOL_SIZE))
popular_torrents = list(self.mds.TorrentState.select(
lambda g: g.has_data == 1 # The condition had to be written this way for the partial index to work
and g.last_check < last_fresh_time
).order_by(lambda g: (desc(g.seeders), g.last_check)).limit(TORRENT_SELECTION_POOL_SIZE))

old_torrents = list(self.mds.TorrentState.select(lambda g: g.last_check < last_fresh_time).
order_by(lambda g: (g.last_check, desc(g.seeders))).limit(TORRENT_SELECTION_POOL_SIZE))
old_torrents = list(self.mds.TorrentState.select(
lambda g: g.has_data == 1 # The condition had to be written this way for the partial index to work
and g.last_check < last_fresh_time
).order_by(lambda g: (g.last_check, desc(g.seeders))).limit(TORRENT_SELECTION_POOL_SIZE))

selected_torrents = popular_torrents + old_torrents
selected_torrents = random.sample(selected_torrents, min(TORRENT_SELECTION_POOL_SIZE, len(selected_torrents)))
Expand All @@ -254,6 +260,7 @@ def torrents_to_check_in_user_channel(self):
channel_torrents = list(self.mds.TorrentMetadata.select(
lambda g: g.public_key == self.mds.my_public_key_bin
and g.metadata_type == REGULAR_TORRENT
and g.health.has_data == 1 # The condition had to be written this way for the index to work
and g.health.last_check < last_fresh_time)
.order_by(lambda g: g.health.last_check)
.limit(USER_CHANNEL_TORRENT_SELECTION_POOL_SIZE))
Expand Down

0 comments on commit 023f44d

Please sign in to comment.