Let --match-filter reject entries early

Makes redundant: `--match-title`, `--reject-title`, `--min-views`, `--max-views`
yt-dlp · Aug 16, 2021 · 8f18aca · 8f18aca
1 parent 3ad56b4
commit 8f18aca
Show file tree

Hide file tree

Showing 5 changed files with 40 additions and 24 deletions.
diff --git a/README.md b/README.md
@@ -1439,6 +1439,10 @@ While these options are redundant, they are still expected to be used due to the
  -e, --get-title --print title
  -g, --get-url --print urls
  -j, --dump-json --print "%()j"
+ --match-title REGEX --match-filter "title ~= (?i)REGEX"
+ --reject-title REGEX --match-filter "title !~= (?i)REGEX"
+ --min-views COUNT --match-filter "view_count >=? COUNT"
+ --max-views COUNT --match-filter "view_count <=? COUNT"
 
 
 #### Not recommended

diff --git a/test/test_utils.py b/test/test_utils.py
@@ -1285,9 +1285,15 @@ def test_match_str(self):
  self.assertTrue(match_str(r'x="foo \& bar" & x^=foo', {'x': 'foo & bar'}))
 
  # Example from docs
- self.assertTrue(
- r'!is_live & like_count>?100 & description~=\'(?i)\bcats \& dogs\b\'',
- {'description': 'Raining Cats & Dogs'})
+ self.assertTrue(match_str(
+ r"!is_live & like_count>?100 & description~='(?i)\bcats \& dogs\b'",
+ {'description': 'Raining Cats & Dogs'}))
+
+ # Incomplete
+ self.assertFalse(match_str('id!=foo', {'id': 'foo'}, True))
+ self.assertTrue(match_str('x', {'id': 'foo'}, True))
+ self.assertTrue(match_str('!x', {'id': 'foo'}, True))
+ self.assertFalse(match_str('x', {'id': 'foo'}, False))
 
  def test_parse_dfxp_time_expr(self):
  self.assertEqual(parse_dfxp_time_expr(None), None)

diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py
@@ -1117,12 +1117,15 @@ def check_filter():
  if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
  return 'Skipping "%s" because it is age restricted' % video_title
 
- if not incomplete:
- match_filter = self.params.get('match_filter')
- if match_filter is not None:
- ret = match_filter(info_dict)
- if ret is not None:
- return ret
+ match_filter = self.params.get('match_filter')
+ if match_filter is not None:
+ try:
+ ret = match_filter(info_dict, incomplete=incomplete)
+ except TypeError:
+ # For backward compatibility
+ ret = None if incomplete else match_filter(info_dict)
+ if ret is not None:
+ return ret
  return None
 
  if self.in_download_archive(info_dict):
@@ -2873,13 +2876,13 @@ def download(self, url_list):
  except UnavailableVideoError:
  self.report_error('unable to download video')
  except MaxDownloadsReached:
- self.to_screen('[info] Maximum number of downloaded files reached')
+ self.to_screen('[info] Maximum number of downloads reached')
  raise
  except ExistingVideoReached:
- self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
+ self.to_screen('[info] Encountered a video that is already in the archive, stopping due to --break-on-existing')
  raise
  except RejectedVideoReached:
- self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
+ self.to_screen('[info] Encountered a video that did not match filter, stopping due to --break-on-reject')
  raise
  else:
  if self.params.get('dump_single_json', False):

diff --git a/yt_dlp/options.py b/yt_dlp/options.py
@@ -356,11 +356,11 @@ def _dict_from_options_callback(
  selection.add_option(
  '--match-title',
  dest='matchtitle', metavar='REGEX',
- help='Download only matching titles (regex or caseless sub-string)')
+ help=optparse.SUPPRESS_HELP)
  selection.add_option(
  '--reject-title',
  dest='rejecttitle', metavar='REGEX',
- help='Skip download for matching titles (regex or caseless sub-string)')
+ help=optparse.SUPPRESS_HELP)
  selection.add_option(
  '--max-downloads',
  dest='max_downloads', metavar='NUMBER', type=int, default=None,
@@ -395,11 +395,11 @@ def _dict_from_options_callback(
  selection.add_option(
  '--min-views',
  metavar='COUNT', dest='min_views', default=None, type=int,
- help='Do not download any videos with less than COUNT views')
+ help=optparse.SUPPRESS_HELP)
  selection.add_option(
  '--max-views',
  metavar='COUNT', dest='max_views', default=None, type=int,
- help='Do not download any videos with more than COUNT views')
+ help=optparse.SUPPRESS_HELP)
  selection.add_option(
  '--match-filter',
  metavar='FILTER', dest='match_filter', default=None,

diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py
@@ -4657,7 +4657,7 @@ def filter_using_list(row, filterArray):
  return '\n'.join(format_str % tuple(row) for row in table)
 
 
-def _match_one(filter_part, dct):
+def _match_one(filter_part, dct, incomplete):
  # TODO: Generalize code with YoutubeDL._build_format_filter
  STRING_OPERATORS = {
  '*=': operator.contains,
@@ -4718,7 +4718,7 @@ def _match_one(filter_part, dct):
  'Invalid integer value %r in filter part %r' % (
  m.group('intval'), filter_part))
  if actual_value is None:
- return m.group('none_inclusive')
+ return incomplete or m.group('none_inclusive')
  return op(actual_value, comparison_value)
 
  UNARY_OPERATORS = {
@@ -4733,22 +4733,25 @@ def _match_one(filter_part, dct):
  if m:
  op = UNARY_OPERATORS[m.group('op')]
  actual_value = dct.get(m.group('key'))
+ if incomplete and actual_value is None:
+ return True
  return op(actual_value)
 
  raise ValueError('Invalid filter part %r' % filter_part)
 
 
-def match_str(filter_str, dct):
- """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
-
+def match_str(filter_str, dct, incomplete=False):
+ """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false
+ When incomplete, all conditions passes on missing fields
+ """
  return all(
- _match_one(filter_part.replace(r'\&', '&'), dct)
+ _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
  for filter_part in re.split(r'(?<!\\)&', filter_str))
 
 
 def match_filter_func(filter_str):
- def _match_func(info_dict):
- if match_str(filter_str, info_dict):
+ def _match_func(info_dict, *args, **kwargs):
+ if match_str(filter_str, info_dict, *args, **kwargs):
  return None
  else:
  video_title = info_dict.get('title', info_dict.get('id', 'video'))