From 1228a19be4a9d1fb75b96c6bea255c2764d43797 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 9 Oct 2019 19:24:40 +0000 Subject: [PATCH] remove duplicate in list_check_sha1s --- cufacesearch/cufacesearch/updater/extraction_checker.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cufacesearch/cufacesearch/updater/extraction_checker.py b/cufacesearch/cufacesearch/updater/extraction_checker.py index b442ee0..dba3808 100644 --- a/cufacesearch/cufacesearch/updater/extraction_checker.py +++ b/cufacesearch/cufacesearch/updater/extraction_checker.py @@ -303,6 +303,11 @@ def run(self, daemon=False): raise ValueError('Unknown keys in msg: {}'.format(msg.keys())) # This is dangerous, as it assumes the self.ingester.get_msg_json() generator # would restart from the next point... Is this the case for Kafka? + prev_len = len(list_check_sha1s) + list_check_sha1s = list(set(list_check_sha1s)) + if len(list_check_sha1s) < prev_len: + msg = "[{}: log] Removed {} duplicate from `list_check_sha1s`" + print(msg.format(self.pp, prev_len - len(list_check_sha1s))) if len(list_check_sha1s) >= self.indexer.batch_update_size: break except Exception as inst: