From cb91637ff60aa38f291a110935e726a323488260 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 5 Oct 2021 14:19:08 +0000
Subject: [PATCH] pre-commit automatic fixes

---
 scripts/build_web_domains_table.py            |   8 +-
 scripts/legacy/bne_baseline_matcher.py        |  19 +--
 .../compute_mixnmatch_and_sqid_stats.py       |  20 +--
 scripts/legacy/dates.py                       |   7 +-
 scripts/legacy/identifiers.py                 |   3 +-
 scripts/legacy/query_on_values.py             |   5 +-
 scripts/legacy/recordlinkage_first_trial.py   |   8 +-
 scripts/legacy/sample_additional_info.py      |  21 +--
 scripts/legacy/sitelinks.py                   |   4 +-
 scripts/legacy/sparql_templates.py            |  12 +-
 .../linker/analyze_classification_links.py    |  12 +-
 scripts/linker/extract_performances.py        |  16 +--
 soweego/commons/constants.py                  |   4 +-
 soweego/commons/data_gathering.py             |  46 ++----
 soweego/commons/http_client.py                |   4 +-
 soweego/commons/localizations.py              |   4 +-
 soweego/commons/text_utils.py                 |   8 +-
 soweego/commons/url_utils.py                  |  20 +--
 soweego/importer/base_dump_extractor.py       |   4 +-
 soweego/importer/discogs_dump_extractor.py    |  55 ++-----
 soweego/importer/imdb_dump_extractor.py       |  28 +---
 soweego/importer/importer.py                  |  20 +--
 soweego/importer/models/base_entity.py        |  13 +-
 soweego/importer/models/base_link_entity.py   |   4 +-
 soweego/importer/models/base_nlp_entity.py    |   4 +-
 soweego/importer/models/musicbrainz_entity.py |   4 +-
 .../importer/musicbrainz_dump_extractor.py    |  68 +++------
 soweego/ingester/mix_n_match_client.py        |  20 +--
 soweego/ingester/wikidata_bot.py              |  55 ++-----
 soweego/linker/baseline.py                    |  40 ++----
 soweego/linker/blocking.py                    |  12 +-
 soweego/linker/classifiers.py                 |  12 +-
 soweego/linker/evaluate.py                    |  50 ++-----
 soweego/linker/features.py                    |  45 ++----
 soweego/linker/link.py                        |  28 +---
 soweego/linker/train.py                       |  12 +-
 soweego/linker/workflow.py                    |  36 ++---
 soweego/pipeline.py                           |  16 +--
 soweego/validator/checks.py                   | 135 +++++-------------
 soweego/validator/enrichment.py               |  24 +---
 soweego/wikidata/api_requests.py              |  55 ++-----
 soweego/wikidata/sparql_queries.py            |  23 +--
 42 files changed, 249 insertions(+), 735 deletions(-)

diff --git a/scripts/build_web_domains_table.py b/scripts/build_web_domains_table.py
index 46f0f3df..77e14872 100644
--- a/scripts/build_web_domains_table.py
+++ b/scripts/build_web_domains_table.py
@@ -68,9 +68,7 @@ def main(args):
     catalog_and_entity = os.path.split(file_in)[1].partition('_urls')[0]
     file_out = f'{catalog_and_entity}_web_domains_table.mediawiki'
     json_out = f'{catalog_and_entity}.json'
-    header = HEADER.replace(
-        'TARGET', catalog_and_entity.replace('_', ' ').title()
-    )
+    header = HEADER.replace('TARGET', catalog_and_entity.replace('_', ' ').title())
     prefix = CATALOG_URL_PREFIXES.get(catalog_and_entity)
 
     if prefix is None:
@@ -123,9 +121,7 @@ def main(args):
             ) in enumerate(examples, 1):
                 buffer.append(f'{i}. [{url} URL], [{prefix}{tid} record]; ')
 
-            fout.write(
-                ROW.format(domain=domain, freq=freq, examples=''.join(buffer))
-            )
+            fout.write(ROW.format(domain=domain, freq=freq, examples=''.join(buffer)))
         fout.write(FOOTER)
 
     return 0
diff --git a/scripts/legacy/bne_baseline_matcher.py b/scripts/legacy/bne_baseline_matcher.py
index c74e794a..52913a3c 100644
--- a/scripts/legacy/bne_baseline_matcher.py
+++ b/scripts/legacy/bne_baseline_matcher.py
@@ -97,9 +97,7 @@ def temporary_wrapper():
     bne_linked = csv.DictReader(open(HOME + 'bne/linked_people'))
     linked_bne = {}
     for row in bne_linked:
-        linked_bne[row['link']] = row['id'].replace(
-            'http://datos.bne.es/resource/', ''
-        )
+        linked_bne[row['link']] = row['id'].replace('http://datos.bne.es/resource/', '')
 
     ### Baseline matcher 2: cross-catalogs links
     matched = defaultdict(list)
@@ -115,10 +113,7 @@ def temporary_wrapper():
     ### Baseline matcher 3: Wikipedia links
     # BNE, DBpedia links
     bbdb = filter(lambda x: 'dbpedia.org' in x, linked_bne)
-    dbp = {
-        x.replace('http://dbpedia.org/resource/', ''): linked_bne[x]
-        for x in bbdb
-    }
+    dbp = {x.replace('http://dbpedia.org/resource/', ''): linked_bne[x] for x in bbdb}
 
     # Wikidata sample, site links
     site_qid = json.load(open(HOME + 'wikidata/site2qid_1_percent_sample.json'))
@@ -136,9 +131,7 @@ def temporary_wrapper():
     ### Baseline matcher 4: name AND dates
     # Wikidata sample, dates
     dates_wd = {}
-    wd_dates = csv.DictReader(
-        open('dates_1_percent_sample.tsv'), delimiter='\t'
-    )
+    wd_dates = csv.DictReader(open('dates_1_percent_sample.tsv'), delimiter='\t')
     for row in wd_dates:
         qid = (
             row['?person']
@@ -156,9 +149,9 @@ def temporary_wrapper():
     dates_bne = {}
     bne_labels = defaultdict(list)
     for row in bne_names:
-        bne_labels[
-            row['id'].replace('http://datos.bne.es/resource/', '')
-        ].append(row['name'].lower())
+        bne_labels[row['id'].replace('http://datos.bne.es/resource/', '')].append(
+            row['name'].lower()
+        )
     for row in bne_dates:
         ident = row['id'].replace('http://datos.bne.es/resource/', '')
         for name in bne_labels[ident]:
diff --git a/scripts/legacy/compute_mixnmatch_and_sqid_stats.py b/scripts/legacy/compute_mixnmatch_and_sqid_stats.py
index ab1eb7e8..35930790 100644
--- a/scripts/legacy/compute_mixnmatch_and_sqid_stats.py
+++ b/scripts/legacy/compute_mixnmatch_and_sqid_stats.py
@@ -21,9 +21,7 @@
         'total_entries': int(mnm[db]['total']),
         'in_wikidata': float(int(mnm[db]['manual']) / int(mnm[db]['total'])),
         'unable_to_match': float(int(mnm[db]['noq']) / int(mnm[db]['total'])),
-        'matched_to_be_curated': float(
-            int(mnm[db]['autoq']) / int(mnm[db]['total'])
-        ),
+        'matched_to_be_curated': float(int(mnm[db]['autoq']) / int(mnm[db]['total'])),
         'url': mnm[db]['url'],
     }
     for db in mnm.keys()
@@ -59,9 +57,7 @@
 )
 
 # All SQID Wikidata properties
-sqid = requests.get(
-    'https://tools.wmflabs.org/sqid/data/properties.json'
-).json()
+sqid = requests.get('https://tools.wmflabs.org/sqid/data/properties.json').json()
 # SQID properties having external IDs as values
 sqid_all = {
     pid: {
@@ -78,12 +74,8 @@
 mnm_people_with_pid = {
     mnm[db]['wd_prop']: {
         'mnm_total_db_entries': int(mnm[db]['total']),
-        'mnm_in_wikidata': float(
-            int(mnm[db]['manual']) / int(mnm[db]['total'])
-        ),
-        'mnm_unable_to_match': float(
-            int(mnm[db]['noq']) / int(mnm[db]['total'])
-        ),
+        'mnm_in_wikidata': float(int(mnm[db]['manual']) / int(mnm[db]['total'])),
+        'mnm_unable_to_match': float(int(mnm[db]['noq']) / int(mnm[db]['total'])),
         'mnm_matched_to_be_curated': float(
             int(mnm[db]['autoq']) / int(mnm[db]['total'])
         ),
@@ -109,9 +101,7 @@
     )
 )
 by_mnm_entries = OrderedDict(
-    sorted(
-        final.items(), key=lambda x: x[1]['mnm_total_db_entries'], reverse=True
-    )
+    sorted(final.items(), key=lambda x: x[1]['mnm_total_db_entries'], reverse=True)
 )
 json.dump(
     by_sqid_usage,
diff --git a/scripts/legacy/dates.py b/scripts/legacy/dates.py
index 8ff8fdad..9dfb5c52 100644
--- a/scripts/legacy/dates.py
+++ b/scripts/legacy/dates.py
@@ -5,12 +5,9 @@
 
 WD = '/Users/focs/wikidata/'
 
-entities = [
-    l.rstrip() for l in open(WD + 'humans_1_percent_sample').readlines()
-]
+entities = [l.rstrip() for l in open(WD + 'humans_1_percent_sample').readlines()]
 buckets = [
-    entities[i * 100 : (i + 1) * 100]
-    for i in range(0, int((len(entities) / 100 + 1)))
+    entities[i * 100 : (i + 1) * 100] for i in range(0, int((len(entities) / 100 + 1)))
 ]
 with open(WD + 'dates_1_percent_sample.tsv', 'w') as o:
     for b in buckets:
diff --git a/scripts/legacy/identifiers.py b/scripts/legacy/identifiers.py
index 4e98f827..c36d3f48 100644
--- a/scripts/legacy/identifiers.py
+++ b/scripts/legacy/identifiers.py
@@ -3,8 +3,7 @@
 
 entities = [l.rstrip() for l in open('1_percent_sample').readlines()]
 buckets = [
-    entities[i * 100 : (i + 1) * 100]
-    for i in range(0, int((len(entities) / 100 + 1)))
+    entities[i * 100 : (i + 1) * 100] for i in range(0, int((len(entities) / 100 + 1)))
 ]
 with open('linked_1_percent_sample.tsv', 'w') as o:
     for b in buckets:
diff --git a/scripts/legacy/query_on_values.py b/scripts/legacy/query_on_values.py
index 1123f0be..058df006 100644
--- a/scripts/legacy/query_on_values.py
+++ b/scripts/legacy/query_on_values.py
@@ -33,9 +33,6 @@ def main(items_path, sparql_condition, output_path):
 
 if __name__ == '__main__':
     if len(argv) != 4:
-        print(
-            'Usage: python %s ITEMS_PATH SPARQL_CONSTRAINT OUTPUT_PATH'
-            % __file__
-        )
+        print('Usage: python %s ITEMS_PATH SPARQL_CONSTRAINT OUTPUT_PATH' % __file__)
         exit(1)
     exit(main(argv[1], argv[2], argv[3]))
diff --git a/scripts/legacy/recordlinkage_first_trial.py b/scripts/legacy/recordlinkage_first_trial.py
index 0aad315c..00431df9 100644
--- a/scripts/legacy/recordlinkage_first_trial.py
+++ b/scripts/legacy/recordlinkage_first_trial.py
@@ -63,9 +63,7 @@
 features = compare.compute(candidate_pairs, discogs_df, wikidata_df)
 features
 compare = recordlinkage.Compare()
-compare.string(
-    'name', 'name', method='levenshtein', threshold=0.7, label='stocazzo'
-)
+compare.string('name', 'name', method='levenshtein', threshold=0.7, label='stocazzo')
 features = compare.compute(candidate_pairs, discogs_df, wikidata_df)
 features
 discogs_df[304]
@@ -103,9 +101,7 @@
 from recordlinkage.preprocessing import clean
 
 wikidata
-etichette = json.load(
-    open('/Users/focs/wikidata/label2qid_1_percent_sample.json')
-)
+etichette = json.load(open('/Users/focs/wikidata/label2qid_1_percent_sample.json'))
 etichette
 get_ipython().run_line_magic('pinfo', 'pandas.Series')
 serie = pandas.Series(etichette)
diff --git a/scripts/legacy/sample_additional_info.py b/scripts/legacy/sample_additional_info.py
index 96d02c9b..4180960e 100644
--- a/scripts/legacy/sample_additional_info.py
+++ b/scripts/legacy/sample_additional_info.py
@@ -85,10 +85,7 @@ def get_links_for_sample(sample_path, url_formatters, output):
                             formatters_dict[prop_id].replace('$1', id_row[col])
                         ] = entity_id
                     else:
-                        print(
-                            '%s does not have an entry in the formatters file'
-                            % col
-                        )
+                        print('%s does not have an entry in the formatters file' % col)
 
     json.dump(url_id, open(filepath, 'w'), indent=2, ensure_ascii=False)
 
@@ -134,22 +131,16 @@ def get_birth_death_dates_for_sample(sample_path, output):
             qid = get_wikidata_id_from_uri(date_row['?id'])
             # creates the combination of all birth dates strings and all death dates strings
             if date_row['?birth']:
-                for b in get_date_strings(
-                    date_row['?birth'], date_row['?b_precision']
-                ):
+                for b in get_date_strings(date_row['?birth'], date_row['?b_precision']):
                     if date_row['?death']:
                         for d in get_date_strings(
                             date_row['?death'], date_row['?d_precision']
                         ):
-                            labeldate_qid[
-                                '%s|%s-%s' % (qid_labels[qid], b, d)
-                            ] = qid
+                            labeldate_qid['%s|%s-%s' % (qid_labels[qid], b, d)] = qid
                     else:
                         labeldate_qid['%s|%s' % (qid_labels[qid], b)] = qid
             else:
-                for d in get_date_strings(
-                    date_row['?death'], date_row['?d_precision']
-                ):
+                for d in get_date_strings(date_row['?death'], date_row['?d_precision']):
                     labeldate_qid['%s|-%s' % (qid_labels[qid], d)] = qid
 
     json.dump(labeldate_qid, open(filepath, 'w'), indent=2, ensure_ascii=False)
@@ -166,9 +157,7 @@ def get_url_formatters_for_properties(property_mapping_path, output):
 
     formatters = {}
     for _, prop_id in properties.items():
-        query = (
-            """SELECT * WHERE { wd:%s wdt:P1630 ?formatterUrl . }""" % prop_id
-        )
+        query = """SELECT * WHERE { wd:%s wdt:P1630 ?formatterUrl . }""" % prop_id
         for r in _make_request(query):
             formatters[prop_id] = r['?formatterUrl']
 
diff --git a/scripts/legacy/sitelinks.py b/scripts/legacy/sitelinks.py
index 4bd29996..c3e42c02 100644
--- a/scripts/legacy/sitelinks.py
+++ b/scripts/legacy/sitelinks.py
@@ -35,9 +35,7 @@
     for qid in r['entities']:
         entity = r['entities'][qid]
         if entity.get('sitelinks'):
-            site_qid[
-                entity['sitelinks']['enwiki']['title'].replace(' ', '_')
-            ] = qid
+            site_qid[entity['sitelinks']['enwiki']['title'].replace(' ', '_')] = qid
 
 json.dump(
     site_qid,
diff --git a/scripts/legacy/sparql_templates.py b/scripts/legacy/sparql_templates.py
index d336d5dc..37b8ec35 100644
--- a/scripts/legacy/sparql_templates.py
+++ b/scripts/legacy/sparql_templates.py
@@ -1,18 +1,10 @@
 from soweego.wikidata.sparql_queries import ITEM_BINDING, PROPERTY_BINDING
 
 VALUES_QUERY_TEMPLATE = (
-    'SELECT * WHERE { VALUES '
-    + ITEM_BINDING
-    + ' { %s } . '
-    + ITEM_BINDING
-    + ' %s }'
+    'SELECT * WHERE { VALUES ' + ITEM_BINDING + ' { %s } . ' + ITEM_BINDING + ' %s }'
 )
 CATALOG_QID_QUERY_TEMPLATE = (
-    'SELECT '
-    + ITEM_BINDING
-    + ' WHERE { wd:%s wdt:P1629 '
-    + ITEM_BINDING
-    + ' . }'
+    'SELECT ' + ITEM_BINDING + ' WHERE { wd:%s wdt:P1629 ' + ITEM_BINDING + ' . }'
 )
 PROPERTIES_WITH_URL_DATATYPE_QUERY = (
     'SELECT '
diff --git a/scripts/linker/analyze_classification_links.py b/scripts/linker/analyze_classification_links.py
index 70e51057..c87205a0 100644
--- a/scripts/linker/analyze_classification_links.py
+++ b/scripts/linker/analyze_classification_links.py
@@ -112,9 +112,7 @@
         }
     )
 
-summaries = pd.DataFrame(summaries).sort_values(
-    by="Average Mean", ascending=False
-)
+summaries = pd.DataFrame(summaries).sort_values(by="Average Mean", ascending=False)
 
 print(summaries.to_csv(index=False))
 
@@ -184,9 +182,7 @@
             d["Prediction"].value_counts(normalize=True).reset_index()
         )
 
-        dcounts = dcounts.rename(
-            columns={"index": "Value", "Prediction": "Counts"}
-        )
+        dcounts = dcounts.rename(columns={"index": "Value", "Prediction": "Counts"})
         dcounts["Model"] = m
         dcounts["Catalog/Entity"] = ce
 
@@ -195,6 +191,4 @@
         else:
             data = data.append(dcounts, ignore_index=True)
 
-    sns.barplot(
-        x="Value", y="Counts", data=data, hue="Model", ax=axes_binary[axi]
-    )
+    sns.barplot(x="Value", y="Counts", data=data, hue="Model", ax=axes_binary[axi])
diff --git a/scripts/linker/extract_performances.py b/scripts/linker/extract_performances.py
index a154ced6..39a6da0b 100644
--- a/scripts/linker/extract_performances.py
+++ b/scripts/linker/extract_performances.py
@@ -144,14 +144,11 @@
             "Average Prec": "%.6f" % gg['Prec.Mean'].astype(float).mean(),
             "Average Prec.STD": "%.6f" % gg['Prec.STD'].astype(float).mean(),
             "Average Recall": "%.6f" % gg['Recall.Mean'].astype(float).mean(),
-            "Average Recall.STD": "%.6f"
-            % gg['Recall.STD'].astype(float).mean(),
+            "Average Recall.STD": "%.6f" % gg['Recall.STD'].astype(float).mean(),
         }
     )
 
-summaries = pd.DataFrame(summaries).sort_values(
-    by="Average F1", ascending=False
-)
+summaries = pd.DataFrame(summaries).sort_values(by="Average F1", ascending=False)
 
 print(summaries.to_csv(index=False))
 
@@ -168,12 +165,9 @@
                 "Average F1": "%.6f" % gg['F1.Mean'].astype(float).mean(),
                 "Average F1.STD": "%.6f" % gg['F1.STD'].astype(float).mean(),
                 "Average Prec": "%.6f" % gg['Prec.Mean'].astype(float).mean(),
-                "Average Prec.STD": "%.6f"
-                % gg['Prec.STD'].astype(float).mean(),
-                "Average Recall": "%.6f"
-                % gg['Recall.Mean'].astype(float).mean(),
-                "Average Recall.STD": "%.6f"
-                % gg['Recall.STD'].astype(float).mean(),
+                "Average Prec.STD": "%.6f" % gg['Prec.STD'].astype(float).mean(),
+                "Average Recall": "%.6f" % gg['Recall.Mean'].astype(float).mean(),
+                "Average Recall.STD": "%.6f" % gg['Recall.STD'].astype(float).mean(),
             }
         )
 
diff --git a/soweego/commons/constants.py b/soweego/commons/constants.py
index 43144fae..b23c7666 100644
--- a/soweego/commons/constants.py
+++ b/soweego/commons/constants.py
@@ -109,9 +109,7 @@
 SAMPLES = os.path.join(SAMPLES_DIR, SAMPLES_FILENAME)
 FEATURES = os.path.join(FEATURES_DIR, FEATURES_FILENAME)
 LINKER_MODEL = os.path.join(MODELS_DIR, MODEL_FILENAME)
-LINKER_NESTED_CV_BEST_MODEL = os.path.join(
-    MODELS_DIR, NESTED_CV_BEST_MODEL_FILENAME
-)
+LINKER_NESTED_CV_BEST_MODEL = os.path.join(MODELS_DIR, NESTED_CV_BEST_MODEL_FILENAME)
 LINKER_RESULT = os.path.join(RESULTS_DIR, RESULT_FILENAME)
 LINKER_EVALUATION_PREDICTIONS = os.path.join(
     RESULTS_DIR, EVALUATION_PREDICTIONS_FILENAME
diff --git a/soweego/commons/data_gathering.py b/soweego/commons/data_gathering.py
index efe50bb0..31077515 100644
--- a/soweego/commons/data_gathering.py
+++ b/soweego/commons/data_gathering.py
@@ -21,13 +21,7 @@
 from sqlalchemy import or_
 from tqdm import tqdm
 
-from soweego.commons import (
-    constants,
-    keys,
-    target_database,
-    text_utils,
-    url_utils,
-)
+from soweego.commons import constants, keys, target_database, text_utils, url_utils
 from soweego.commons.db_manager import DBManager
 from soweego.importer import models
 from soweego.wikidata import api_requests, sparql_queries, vocabulary
@@ -35,9 +29,7 @@
 LOGGER = logging.getLogger(__name__)
 
 
-def gather_target_biodata(
-    entity: str, catalog: str
-) -> Optional[Iterator[tuple]]:
+def gather_target_biodata(entity: str, catalog: str) -> Optional[Iterator[tuple]]:
     LOGGER.info(
         'Gathering %s birth/death dates/places and gender metadata ...', catalog
     )
@@ -83,11 +75,7 @@ def tokens_fulltext_search(
         raise ValueError('Bad target entity class: %s' % target_entity)
 
     tokens = filter(None, tokens)
-    terms = (
-        ' '.join(map('+{0}'.format, tokens))
-        if boolean_mode
-        else ' '.join(tokens)
-    )
+    terms = ' '.join(map('+{0}'.format, tokens)) if boolean_mode else ' '.join(tokens)
     ft_search = column.match(terms)
 
     session = DBManager.connect_to_db()
@@ -144,9 +132,7 @@ def perfect_name_search(
     session = DBManager.connect_to_db()
     try:
         for r in (
-            session.query(target_entity)
-            .filter(target_entity.name == to_search)
-            .all()
+            session.query(target_entity).filter(target_entity.name == to_search).all()
         ):
             yield r
 
@@ -163,9 +149,7 @@ def perfect_name_search_bucket(
     session = DBManager.connect_to_db()
     try:
         for r in (
-            session.query(target_entity)
-            .filter(target_entity.name.in_(to_search))
-            .all()
+            session.query(target_entity).filter(target_entity.name.in_(to_search)).all()
         ):
             yield r
 
@@ -246,9 +230,7 @@ def _run_query(query, catalog, entity_type, page=1000):
             "No data available for %s %s. Stopping here", catalog, entity_type
         )
         return None
-    LOGGER.info(
-        'Got %d internal IDs with data from %s %s', count, catalog, entity_type
-    )
+    LOGGER.info('Got %d internal IDs with data from %s %s', count, catalog, entity_type)
     return query.yield_per(page).enable_eagerloads(False)
 
 
@@ -269,15 +251,11 @@ def _build_biodata_query_fields(entity, entity_type, catalog):
     if hasattr(entity, 'birth_place'):
         query_fields.append(entity.birth_place)
     else:
-        LOGGER.info(
-            '%s %s has no birth place information', catalog, entity_type
-        )
+        LOGGER.info('%s %s has no birth place information', catalog, entity_type)
     if hasattr(entity, 'death_place'):
         query_fields.append(entity.death_place)
     else:
-        LOGGER.info(
-            '%s %s has no death place information', catalog, entity_type
-        )
+        LOGGER.info('%s %s has no death place information', catalog, entity_type)
     return query_fields
 
 
@@ -410,9 +388,7 @@ def gather_wikidata_biodata(wikidata):
             timestamp, precision = parsed[0], parsed[1]
             # Get rid of time, useless
             timestamp = timestamp.split('T')[0]
-            wikidata[qid][keys.BIODATA].append(
-                (pid, f'{timestamp}/{precision}')
-            )
+            wikidata[qid][keys.BIODATA].append((pid, f'{timestamp}/{precision}'))
         else:
             wikidata[qid][keys.BIODATA].append((pid, parsed))
         total += 1
@@ -485,9 +461,7 @@ def _compile(regexp, id_or_url):
 
 
 def gather_target_ids(entity, catalog, catalog_pid, aggregated):
-    LOGGER.info(
-        'Gathering Wikidata %s items with %s identifiers ...', entity, catalog
-    )
+    LOGGER.info('Gathering Wikidata %s items with %s identifiers ...', entity, catalog)
 
     query_type = keys.IDENTIFIER, constants.SUPPORTED_ENTITIES.get(entity)
 
diff --git a/soweego/commons/http_client.py b/soweego/commons/http_client.py
index f13501fb..8ea4e873 100644
--- a/soweego/commons/http_client.py
+++ b/soweego/commons/http_client.py
@@ -37,9 +37,7 @@ def download_file(url, filePath):
     """Downloads a web content and saves it in a custom filePath"""
     try:
         file_size = int(requests.head(url).headers["Content-Length"])
-        pbar = tqdm(
-            total=file_size, unit='B', unit_scale=True, desc=url.split('/')[-1]
-        )
+        pbar = tqdm(total=file_size, unit='B', unit_scale=True, desc=url.split('/')[-1])
 
         stream = requests.get(url, stream=True, verify=False)
         with open(filePath, 'wb') as f:
diff --git a/soweego/commons/localizations.py b/soweego/commons/localizations.py
index 8923ba80..8431b365 100644
--- a/soweego/commons/localizations.py
+++ b/soweego/commons/localizations.py
@@ -10,8 +10,6 @@
 )
 FAIL_DOWNLOAD = 'Fails on dump download'
 FAIL_HANDLER = 'Handler fails on dump scraping'
-MALFORMED_ROW = (
-    'Malformed Row, brokes the structure <subject> <predicate> <object>'
-)
+MALFORMED_ROW = 'Malformed Row, brokes the structure <subject> <predicate> <object>'
 FIELD_NOT_MAPPED = 'Field: \t %s \t not mapped'
 WRONG_MAPPINGS = 'Errors at DB import, probably due to wrong mappings \n %s'
diff --git a/soweego/commons/text_utils.py b/soweego/commons/text_utils.py
index 70a29ed1..9711cfc5 100644
--- a/soweego/commons/text_utils.py
+++ b/soweego/commons/text_utils.py
@@ -17,9 +17,7 @@
 
 # Adapted from http://snowball.tartarus.org/algorithms/english/stop.txt
 STOPWORDS_ENG = frozenset(
-    str(
-        get_data('soweego.commons.resources', 'stopwords_eng.txt'), 'utf8'
-    ).splitlines()
+    str(get_data('soweego.commons.resources', 'stopwords_eng.txt'), 'utf8').splitlines()
 )
 COMMON_WORDS_ENG = frozenset(
     str(
@@ -34,9 +32,7 @@
 )
 
 BAND_NAME_LOW_SCORE_WORDS = frozenset(
-    str(
-        get_data('soweego.commons.resources', 'band_low_score_words.txt')
-    ).splitlines()
+    str(get_data('soweego.commons.resources', 'band_low_score_words.txt')).splitlines()
 )
 
 STOPWORDS_URL_TOKENS = frozenset(
diff --git a/soweego/commons/url_utils.py b/soweego/commons/url_utils.py
index f9269181..270434a3 100644
--- a/soweego/commons/url_utils.py
+++ b/soweego/commons/url_utils.py
@@ -61,7 +61,9 @@ def clean(url):
 def validate(url):
     ul = '\u00a1-\uffff'  # Unicode letters range (must not be a raw string)
     # IP patterns
-    ipv4_re = r'(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}'
+    ipv4_re = (
+        r'(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)(?:\.(?:25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}'
+    )
     ipv6_re = r'\[[0-9a-f:\.]+\]'
     # Host patterns
     hostname_re = (
@@ -93,9 +95,7 @@ def validate(url):
         LOGGER.debug('Dropping invalid URL: <%s>', url)
         return None
     if not valid_url.group(1):
-        LOGGER.debug(
-            "Adding 'https' to potential URL with missing scheme: <%s>", url
-        )
+        LOGGER.debug("Adding 'https' to potential URL with missing scheme: <%s>", url)
         return 'https://' + valid_url.group()
     return valid_url.group()
 
@@ -116,9 +116,7 @@ def resolve(url: str) -> Optional[str]:
     }
     try:
         # Some Web sites do not accept the HEAD method: fire a GET, but don't download anything
-        response = get(
-            url, headers=browser_ua, stream=True, timeout=READ_TIMEOUT
-        )
+        response = get(url, headers=browser_ua, stream=True, timeout=READ_TIMEOUT)
     except requests.exceptions.SSLError as ssl_error:
         LOGGER.debug(
             'SSL certificate verification failed, will retry without verification. Original URL: <%s> - Reason: %s',
@@ -189,9 +187,7 @@ def tokenize(url, domain_only=False) -> set:
     try:
         split = urlsplit(url)
     except ValueError as value_error:
-        LOGGER.warning(
-            'Invalid URL: %s. Reason: %s', url, value_error, exc_info=1
-        )
+        LOGGER.warning('Invalid URL: %s. Reason: %s', url, value_error, exc_info=1)
         return None
     domain_tokens = set(re.split(r'\W+', split.netloc))
     domain_tokens.difference_update(TOP_LEVEL_DOMAINS, DOMAIN_PREFIXES)
@@ -335,7 +331,5 @@ def get_external_id_from_url(url, ext_id_pids_to_urls):
 def is_wiki_link(url):
     domain = urlsplit(url).netloc
     return (
-        True
-        if any(wiki_project in domain for wiki_project in WIKI_PROJECTS)
-        else False
+        True if any(wiki_project in domain for wiki_project in WIKI_PROJECTS) else False
     )
diff --git a/soweego/importer/base_dump_extractor.py b/soweego/importer/base_dump_extractor.py
index 5973f2d8..a891db46 100644
--- a/soweego/importer/base_dump_extractor.py
+++ b/soweego/importer/base_dump_extractor.py
@@ -26,9 +26,7 @@ class BaseDumpExtractor:
     populate a database instance.
     """
 
-    def extract_and_populate(
-        self, dump_file_paths: List[str], resolve: bool
-    ) -> None:
+    def extract_and_populate(self, dump_file_paths: List[str], resolve: bool) -> None:
         """Extract relevant data and populate
         `SQLAlchemy <https://www.sqlalchemy.org/>`_ ORM entities accordingly.
         Entities will be then persisted to a database instance.
diff --git a/soweego/importer/discogs_dump_extractor.py b/soweego/importer/discogs_dump_extractor.py
index 3c95d6d4..81cc404e 100644
--- a/soweego/importer/discogs_dump_extractor.py
+++ b/soweego/importer/discogs_dump_extractor.py
@@ -83,9 +83,7 @@ def get_dump_download_urls(self) -> Optional[List[str]]:
             return None
         return urls
 
-    def extract_and_populate(
-        self, dump_file_paths: List[str], resolve: bool
-    ) -> None:
+    def extract_and_populate(self, dump_file_paths: List[str], resolve: bool) -> None:
         """Extract relevant data from the *artists* (people)
         and *masters* (works) Discogs dumps, preprocess them, populate
         `SQLAlchemy <https://www.sqlalchemy.org/>`_ ORM entities, and persist
@@ -101,9 +99,7 @@ def extract_and_populate(
         self._process_masters_dump(dump_file_paths[1])
 
     def _process_masters_dump(self, dump_file_path):
-        LOGGER.info(
-            "Starting import of masters from Discogs dump '%s'", dump_file_path
-        )
+        LOGGER.info("Starting import of masters from Discogs dump '%s'", dump_file_path)
         start = datetime.now()
         tables = [DiscogsMasterEntity, DiscogsMasterArtistRelationship]
         db_manager = DBManager()
@@ -124,9 +120,7 @@ def _process_masters_dump(self, dump_file_path):
                     shutil.copyfileobj(f_in, f_out)
 
         # count number of entries
-        n_rows = sum(
-            1 for _ in self._g_process_et_items(extracted_path, 'master')
-        )
+        n_rows = sum(1 for _ in self._g_process_et_items(extracted_path, 'master'))
         session = db_manager.new_session()
         entity_array = []  # array to which we'll add the entities
         relationships_set = set()
@@ -176,8 +170,7 @@ def _process_masters_dump(self, dump_file_path):
 
         end = datetime.now()
         LOGGER.info(
-            'Import completed in %s. Total entities: %d. '
-            'Total relationships %s.',
+            'Import completed in %s. Total entities: %d. ' 'Total relationships %s.',
             end - start,
             self.total_entities,
             len(relationships_set),
@@ -217,9 +210,7 @@ def _extract_from_master_node(node, relationships_set):
                     )
             elif child.tag == 'artists':
                 for artist in child:
-                    relationships_set.add(
-                        (entity.catalog_id, artist.find('id').text)
-                    )
+                    relationships_set.add((entity.catalog_id, artist.find('id').text))
         entity.genres = ' '.join(genres)
         return entity
 
@@ -257,9 +248,7 @@ def _extract_from_artist_node(self, node, resolve: bool) -> dict:
         infos['profile'] = node.findtext('profile')
         infos['namevariations'] = node.find('namevariations')
 
-        infos['living_links'] = self._extract_living_links(
-            identifier, node, resolve
-        )
+        infos['living_links'] = self._extract_living_links(identifier, node, resolve)
 
         return infos
 
@@ -295,9 +284,7 @@ def _process_artists_dump(self, dump_file_path, resolve):
                     shutil.copyfileobj(f_in, f_out)
 
         # count number of entries
-        n_rows = sum(
-            1 for _ in self._g_process_et_items(extracted_path, 'artist')
-        )
+        n_rows = sum(1 for _ in self._g_process_et_items(extracted_path, 'artist'))
         session = db_manager.new_session()
         entity_array = []  # array to which we'll add the entities
         for _, node in tqdm(
@@ -363,9 +350,7 @@ def _process_artists_dump(self, dump_file_path, resolve):
         # we can safely delete the extracted discogs dump
         os.remove(extracted_path)
 
-    def _populate_band(
-        self, entity_array, entity: DiscogsGroupEntity, infos: dict
-    ):
+    def _populate_band(self, entity_array, entity: DiscogsGroupEntity, infos: dict):
         # Main entity
         self._fill_entity(entity, infos)
         self.bands += 1
@@ -409,9 +394,7 @@ def _populate_links(self, entity_array, entity_class, infos: dict):
             self._fill_link_entity(link_entity, infos['identifier'], link)
             entity_array.append(link_entity)
 
-    def _populate_name_variations(
-        self, entity_array, infos: dict, current_entity
-    ):
+    def _populate_name_variations(self, entity_array, infos: dict, current_entity):
         identifier = infos['identifier']
         if infos.get('namevariations') is not None:
             children = list(infos['namevariations'])
@@ -421,9 +404,7 @@ def _populate_name_variations(
                 ):
                     entity_array.append(entity)
             else:
-                LOGGER.debug(
-                    'Artist %s has an empty <namevariations/> tag', identifier
-                )
+                LOGGER.debug('Artist %s has an empty <namevariations/> tag', identifier)
         else:
             LOGGER.debug('Artist %s has no <namevariations> tag', identifier)
 
@@ -442,9 +423,7 @@ def _populate_nlp_entity(self, entity_array, infos: dict, entity_class):
             else:
                 self.band_nlp += 1
         else:
-            LOGGER.debug(
-                'Artist %s has an empty <profile/> tag', infos['identifier']
-            )
+            LOGGER.debug('Artist %s has an empty <profile/> tag', infos['identifier'])
 
     @staticmethod
     def _fill_entity(entity: DiscogsArtistEntity, infos):
@@ -459,9 +438,7 @@ def _fill_entity(entity: DiscogsArtistEntity, infos):
         if real_name:
             entity.real_name = real_name
         else:
-            LOGGER.debug(
-                'Artist %s has an empty <realname/> tag', infos['identifier']
-            )
+            LOGGER.debug('Artist %s has an empty <realname/> tag', infos['identifier'])
         # Data quality
         data_quality = infos['data_quality']
         if data_quality:
@@ -506,9 +483,7 @@ def _extract_living_links(self, identifier, node, resolve: bool):
             for url_element in urls.iterfind('url'):
                 url = url_element.text
                 if not url:
-                    LOGGER.debug(
-                        'Artist %s: skipping empty <url> tag', identifier
-                    )
+                    LOGGER.debug('Artist %s: skipping empty <url> tag', identifier)
                     continue
                 for alive_link in self._check_link(url, resolve):
                     yield alive_link
@@ -551,9 +526,7 @@ def _g_process_et_items(path, tag) -> Iterable[Tuple]:
         efficient way
         """
 
-        context: etree.ElementTree = etree.iterparse(
-            path, events=('end',), tag=tag
-        )
+        context: etree.ElementTree = etree.iterparse(path, events=('end',), tag=tag)
 
         for event, elem in context:
             yield event, elem
diff --git a/soweego/importer/imdb_dump_extractor.py b/soweego/importer/imdb_dump_extractor.py
index f573faea..be251ab1 100644
--- a/soweego/importer/imdb_dump_extractor.py
+++ b/soweego/importer/imdb_dump_extractor.py
@@ -67,9 +67,7 @@ def _normalize_null(entity: Dict) -> None:
             if value == '\\N':
                 entity[key] = None
 
-    def extract_and_populate(
-        self, dump_file_paths: List[str], resolve: bool
-    ) -> None:
+    def extract_and_populate(self, dump_file_paths: List[str], resolve: bool) -> None:
         """Extract relevant data from the *name* (people) and *title* (works)
         IMDb dumps, preprocess them, populate
         `SQLAlchemy <https://www.sqlalchemy.org/>`_ ORM entities, and persist
@@ -115,9 +113,7 @@ def extract_and_populate(
         LOGGER.info('Starting import of movies ...')
 
         # Here we open the movie dump file, and add everything to the DB
-        for movie_info, entity_array in self._loop_through_entities(
-            movies_file_path
-        ):
+        for movie_info, entity_array in self._loop_through_entities(movies_file_path):
 
             # create the movie SQLAlchemy entity and populate it
             movie_entity = imdb_entity.IMDbTitleEntity()
@@ -128,9 +124,7 @@ def extract_and_populate(
                 movie_entity.name_tokens = ' '.join(
                     text_utils.tokenize(movie_info.get('primaryTitle'))
                 )
-            movie_entity.is_adult = (
-                True if movie_info.get('isAdult') == '1' else False
-            )
+            movie_entity.is_adult = True if movie_info.get('isAdult') == '1' else False
             try:
                 movie_entity.born = datetime.date(
                     year=int(movie_info.get('startYear')), month=1, day=1
@@ -177,9 +171,7 @@ def extract_and_populate(
         # reset timer for persons import
         start = datetime.datetime.now()
 
-        for person_info, entity_array in self._loop_through_entities(
-            person_file_path
-        ):
+        for person_info, entity_array in self._loop_through_entities(person_file_path):
 
             # IMDb saves the list of professions as a comma separated
             # string
@@ -187,9 +179,7 @@ def extract_and_populate(
 
             # if person has no professions then ignore it
             if not professions:
-                LOGGER.debug(
-                    'Person %s has no professions', person_info.get('nconst')
-                )
+                LOGGER.debug('Person %s has no professions', person_info.get('nconst'))
                 continue
 
             professions = professions.split(',')
@@ -359,9 +349,7 @@ def _populate_person(
 
         person_entity.catalog_id = person_info.get('nconst')
         person_entity.name = person_info.get('primaryName')
-        person_entity.name_tokens = ' '.join(
-            text_utils.tokenize(person_entity.name)
-        )
+        person_entity.name_tokens = ' '.join(text_utils.tokenize(person_entity.name))
 
         # If either `actor` or `actress` in primary profession
         # (which is a comma separated string of professions)
@@ -371,9 +359,7 @@ def _populate_person(
             for prof in ['actor', 'actress']
         ):
             person_entity.gender = (
-                'male'
-                if 'actor' in person_info.get('primaryProfession')
-                else 'female'
+                'male' if 'actor' in person_info.get('primaryProfession') else 'female'
             )
 
         # IMDb only provides us with the birth and death year of
diff --git a/soweego/importer/importer.py b/soweego/importer/importer.py
index df3f9c16..2cdb904e 100644
--- a/soweego/importer/importer.py
+++ b/soweego/importer/importer.py
@@ -39,9 +39,7 @@
 
 
 @click.command()
-@click.argument(
-    'catalog', type=click.Choice(target_database.supported_targets())
-)
+@click.argument('catalog', type=click.Choice(target_database.supported_targets()))
 @click.option(
     '--url-check',
     is_flag=True,
@@ -65,9 +63,7 @@ def import_cli(catalog: str, url_check: bool, dir_io: str) -> None:
 
 
 @click.command()
-@click.argument(
-    'catalog', type=click.Choice(target_database.supported_targets())
-)
+@click.argument('catalog', type=click.Choice(target_database.supported_targets()))
 @click.option(
     '-d',
     '--drop',
@@ -116,9 +112,7 @@ def check_urls_cli(catalog, drop, dir_io):
             try:
                 # Resolve every URL
                 for resolved, result in tqdm(
-                    pool.imap_unordered(
-                        _resolve, query_session.query(link_entity)
-                    ),
+                    pool.imap_unordered(_resolve, query_session.query(link_entity)),
                     total=total,
                 ):
                     if not resolved:
@@ -209,12 +203,8 @@ def refresh_dump(
                     last_modified, '%a, %d %b %Y %H:%M:%S GMT'
                 ).strftime('%Y%m%d_%H%M%S')
             except TypeError:
-                LOGGER.info(
-                    "Last modified not available, using now as replacement"
-                )
-                last_modified = datetime.datetime.now().strftime(
-                    '%Y%m%d_%H%M%S'
-                )
+                LOGGER.info("Last modified not available, using now as replacement")
+                last_modified = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
 
             extensions = download_url.split('/')[-1].split('.')[1:]
 
diff --git a/soweego/importer/models/base_entity.py b/soweego/importer/models/base_entity.py
index 950233d1..a64e3dcf 100644
--- a/soweego/importer/models/base_entity.py
+++ b/soweego/importer/models/base_entity.py
@@ -41,9 +41,7 @@ class BaseEntity(AbstractConcreteBase, BASE):
 
     __tablename__ = None
 
-    internal_id = Column(
-        Integer, unique=True, primary_key=True, autoincrement=True
-    )
+    internal_id = Column(Integer, unique=True, primary_key=True, autoincrement=True)
     # Catalog identifier, indexed
     catalog_id = Column(String(50), nullable=False, index=True)
     # Full name
@@ -72,10 +70,7 @@ def __table_args__(cls):
         )
 
     def __repr__(self) -> str:
-        return (
-            f'<BaseEntity(catalog_id="{self.catalog_id}", '
-            f'name="{self.name}")>'
-        )
+        return f'<BaseEntity(catalog_id="{self.catalog_id}", ' f'name="{self.name}")>'
 
 
 class BaseRelationship(AbstractConcreteBase, BASE):
@@ -94,9 +89,7 @@ class BaseRelationship(AbstractConcreteBase, BASE):
     """
 
     __tablename__ = None
-    internal_id = Column(
-        Integer, unique=True, primary_key=True, autoincrement=True
-    )
+    internal_id = Column(Integer, unique=True, primary_key=True, autoincrement=True)
 
     from_catalog_id = Column(String(50), nullable=False, index=False)
     to_catalog_id = Column(String(50), nullable=False, index=False)
diff --git a/soweego/importer/models/base_link_entity.py b/soweego/importer/models/base_link_entity.py
index 8b990f57..0530ab02 100644
--- a/soweego/importer/models/base_link_entity.py
+++ b/soweego/importer/models/base_link_entity.py
@@ -35,9 +35,7 @@ class BaseLinkEntity(AbstractConcreteBase, BASE):
     """
 
     __tablename__ = None
-    internal_id = Column(
-        Integer, unique=True, primary_key=True, autoincrement=True
-    )
+    internal_id = Column(Integer, unique=True, primary_key=True, autoincrement=True)
     # Catalog identifier of the entity having the link, indexed
     catalog_id = Column(String(50), nullable=False, index=True)
     # Full URL
diff --git a/soweego/importer/models/base_nlp_entity.py b/soweego/importer/models/base_nlp_entity.py
index a9067d6c..bb6986dc 100644
--- a/soweego/importer/models/base_nlp_entity.py
+++ b/soweego/importer/models/base_nlp_entity.py
@@ -35,9 +35,7 @@ class BaseNlpEntity(AbstractConcreteBase, BASE):
     """
 
     __tablename__ = None
-    internal_id = Column(
-        Integer, unique=True, primary_key=True, autoincrement=True
-    )
+    internal_id = Column(Integer, unique=True, primary_key=True, autoincrement=True)
     # Catalog identifier of the entity with textual data, indexed
     catalog_id = Column(String(50), nullable=False, index=True)
     # Original text
diff --git a/soweego/importer/models/musicbrainz_entity.py b/soweego/importer/models/musicbrainz_entity.py
index db0c602d..a097077a 100644
--- a/soweego/importer/models/musicbrainz_entity.py
+++ b/soweego/importer/models/musicbrainz_entity.py
@@ -28,9 +28,7 @@
 RELEASE_GROUP_LINK_TABLE = 'musicbrainz_release_group_link'
 
 ARTIST_BAND_RELATIONSHIP_TABLE = 'musicbrainz_artist_band_relationship'
-RELEASE_ARTIST_RELATIONSHIP_TABLE = (
-    'musicbrainz_release_group_artist_relationship'
-)
+RELEASE_ARTIST_RELATIONSHIP_TABLE = 'musicbrainz_release_group_artist_relationship'
 
 
 class MusicBrainzArtistEntity(BaseEntity):
diff --git a/soweego/importer/musicbrainz_dump_extractor.py b/soweego/importer/musicbrainz_dump_extractor.py
index 8846460a..e7f73429 100644
--- a/soweego/importer/musicbrainz_dump_extractor.py
+++ b/soweego/importer/musicbrainz_dump_extractor.py
@@ -73,13 +73,9 @@ def extract_and_populate(self, dump_file_paths: List[str], resolve: bool):
 
         if not os.path.isdir(dump_path):
             with tarfile.open(dump_file_path, "r:bz2") as tar:
-                LOGGER.info(
-                    "Extracting dump %s in %s", dump_file_path, dump_path
-                )
+                LOGGER.info("Extracting dump %s in %s", dump_file_path, dump_path)
                 tar.extractall(dump_path)
-                LOGGER.info(
-                    "Extracted dump %s in %s", dump_file_path, dump_path
-                )
+                LOGGER.info("Extracted dump %s in %s", dump_file_path, dump_path)
 
         db_manager = DBManager()
 
@@ -190,9 +186,7 @@ def release_artist_relationships_uniqueness_filter():
         def artist_band_relationships_uniqueness_filter():
             yield from [
                 MusicBrainzArtistBandRelationship(item[0], item[1])
-                for item in set(
-                    self._artist_band_relationship_generator(dump_path)
-                )
+                for item in set(self._artist_band_relationship_generator(dump_path))
             ]
 
         relationships_count = self._add_entities_from_generator(
@@ -270,9 +264,7 @@ def _add_entities_from_generator(
         return n_total_entities, n_added_entities
 
     @staticmethod
-    def _get_urls_for_entity_id(
-        dump_path: str, l_path: str, resolve: bool
-    ) -> dict:
+    def _get_urls_for_entity_id(dump_path: str, l_path: str, resolve: bool) -> dict:
         """given a l_{something}_url relationship file, return a dict of
         somethingid-[urls]"""
 
@@ -296,9 +288,7 @@ def _get_urls_for_entity_id(
                         relationship[3],
                     )
                 else:
-                    urlid_entityid_relationship[relationship[3]] = relationship[
-                        2
-                    ]
+                    urlid_entityid_relationship[relationship[3]] = relationship[2]
 
         url_path = os.path.join(dump_path, 'mbdump', 'url')
         url_entityid = {}
@@ -312,9 +302,7 @@ def _get_urls_for_entity_id(
                 tsvfile, delimiter='\t', fieldnames=[i for i in range(0, 5)]
             )
 
-            for url_record in tqdm(
-                urls, total=count_num_lines_in_file(tsvfile)
-            ):
+            for url_record in tqdm(urls, total=count_num_lines_in_file(tsvfile)):
 
                 urlid = url_record[0]
                 if urlid in urlid_entityid_relationship:
@@ -323,9 +311,7 @@ def _get_urls_for_entity_id(
                             continue
                         if resolve and not url_utils.resolve(candidate_url):
                             continue
-                        url_entityid[
-                            candidate_url
-                        ] = urlid_entityid_relationship[urlid]
+                        url_entityid[candidate_url] = urlid_entityid_relationship[urlid]
                         del urlid_entityid_relationship[urlid]
 
         entityid_url = defaultdict(list)
@@ -373,15 +359,11 @@ def _artist_link_generator(self, dump_path: str, resolve: bool):
                     for link in artistid_url[artist['id']]:
                         if self._check_person(artist['type_id']):
                             current_entity = MusicBrainzArtistLinkEntity()
-                            self._fill_link_entity(
-                                current_entity, artist['gid'], link
-                            )
+                            self._fill_link_entity(current_entity, artist['gid'], link)
                             yield current_entity
                         if self._check_band(artist['type_id']):
                             current_entity = MusicBrainzBandLinkEntity()
-                            self._fill_link_entity(
-                                current_entity, artist['gid'], link
-                            )
+                            self._fill_link_entity(current_entity, artist['gid'], link)
                             yield current_entity
 
     def _release_group_link_generator(self, dump_path: str, resolve: bool):
@@ -434,9 +416,7 @@ def _isni_link_generator(self, dump_path: str, resolve: bool):
                             for candidate_url in url_utils.clean(link):
                                 if not url_utils.validate(candidate_url):
                                     continue
-                                if resolve and not url_utils.resolve(
-                                    candidate_url
-                                ):
+                                if resolve and not url_utils.resolve(candidate_url):
                                     continue
                                 artist_link[artistid] = candidate_url
                 done = True
@@ -470,15 +450,11 @@ def _isni_link_generator(self, dump_path: str, resolve: bool):
                     link = artist_link[artist['id']]
                     if self._check_person(artist['type_id']):
                         current_entity = MusicBrainzArtistLinkEntity()
-                        self._fill_link_entity(
-                            current_entity, artist['gid'], link
-                        )
+                        self._fill_link_entity(current_entity, artist['gid'], link)
                         yield current_entity
                     if self._check_band(artist['type_id']):
                         current_entity = MusicBrainzBandLinkEntity()
-                        self._fill_link_entity(
-                            current_entity, artist['gid'], link
-                        )
+                        self._fill_link_entity(current_entity, artist['gid'], link)
                         yield current_entity
                 except KeyError:
                     continue
@@ -549,9 +525,7 @@ def _artist_generator(self, dump_path):
 
                     try:
                         self._fill_entity(current_entity, artist, areas)
-                        current_entity.gender = self._artist_gender(
-                            artist['gender']
-                        )
+                        current_entity.gender = self._artist_gender(artist['gender'])
                     except KeyError:
                         LOGGER.error('Wrong gender code: %s', artist)
                         continue
@@ -603,9 +577,7 @@ def _artist_band_relationship_generator(dump_path):
                 if row['link_type'] in link_types:
                     links.add(row['id'])
 
-        artists_relationship_file = os.path.join(
-            dump_path, 'mbdump', 'l_artist_artist'
-        )
+        artists_relationship_file = os.path.join(dump_path, 'mbdump', 'l_artist_artist')
 
         ids_translator = {}
         relationships = []
@@ -670,9 +642,7 @@ def _release_group_generator(self, dump_path):
                 fieldnames=['id', 'gid', 'label', 'artist_credit', 'type_id'],
             )
 
-            for row in tqdm(
-                release_reader, total=count_num_lines_in_file(releasefile)
-            ):
+            for row in tqdm(release_reader, total=count_num_lines_in_file(releasefile)):
                 entity = MusicBrainzReleaseGroupEntity()
                 self._fill_entity(entity, row, None)
                 if row['id'] in release_group_datesprec:
@@ -712,9 +682,7 @@ def _release_group_artist_relationship_generator(dump_path):
 
             n_rows = count_num_lines_in_file(artistcreditfile)
             for row in tqdm(artist_credit_reader, total=n_rows):
-                artist_id_release[row['artist_id']] = artist_credit_release[
-                    row['id']
-                ]
+                artist_id_release[row['artist_id']] = artist_credit_release[row['id']]
                 # memory free up for performance
                 del artist_credit_release[row['id']]
 
@@ -861,9 +829,7 @@ def _artist_gender(gender_code):
     def _retrieve_release_group_dates(self, dump_path):
         release_dateprec = defaultdict(lambda: (date.today(), 0))
 
-        release_country_path = os.path.join(
-            dump_path, 'mbdump', 'release_country'
-        )
+        release_country_path = os.path.join(dump_path, 'mbdump', 'release_country')
 
         with open(release_country_path) as rfile:
             releases = DictReader(
diff --git a/soweego/ingester/mix_n_match_client.py b/soweego/ingester/mix_n_match_client.py
index 3ab7eedc..7de82db9 100644
--- a/soweego/ingester/mix_n_match_client.py
+++ b/soweego/ingester/mix_n_match_client.py
@@ -97,9 +97,7 @@
 
 @click.command()
 @click.argument('catalog', type=click.Choice(SUPPORTED_TARGETS))
-@click.argument(
-    'entity', type=click.Choice(target_database.supported_entities())
-)
+@click.argument('entity', type=click.Choice(target_database.supported_entities()))
 @click.argument('confidence_range', type=(float, float))
 @click.argument('matches', type=click.Path(exists=True, dir_okay=False))
 def cli(catalog, entity, confidence_range, matches):
@@ -145,9 +143,7 @@ def add_catalog(catalog: str, entity: str) -> int:
     session = DBManager(MNM_DB).new_session()
     try:
         existing = (
-            session.query(mix_n_match.MnMCatalog)
-            .filter_by(name=name_field)
-            .first()
+            session.query(mix_n_match.MnMCatalog).filter_by(name=name_field).first()
         )
         if existing is None:
             LOGGER.info(
@@ -179,9 +175,7 @@ def add_catalog(catalog: str, entity: str) -> int:
     finally:
         session.close()
 
-    LOGGER.info(
-        'Catalog addition/update went fine. Internal ID: %d', catalog_id
-    )
+    LOGGER.info('Catalog addition/update went fine. Internal ID: %d', catalog_id)
     return catalog_id
 
 
@@ -297,9 +291,7 @@ def _import_matches(
         url = '' if url_prefix is None else f'{url_prefix}{tid}'
 
         db_entity = mix_n_match.MnMEntry()
-        _set_entry_fields(
-            db_entity, catalog_id, qid, tid, url, class_qid, score
-        )
+        _set_entry_fields(db_entity, catalog_id, qid, tid, url, class_qid, score)
         batch.append(db_entity)
 
         if len(batch) >= COMMIT_EVERY:
@@ -452,9 +444,7 @@ def _set_catalog_fields(db_entity, name_field, catalog, entity):
     db_entity.active = 1
     db_entity.note = NOTE_FIELD
     db_entity.type = CATALOG_TYPES.get(catalog, '')
-    db_entity.source_item = int(
-        target_database.get_catalog_qid(catalog).lstrip('Q')
-    )
+    db_entity.source_item = int(target_database.get_catalog_qid(catalog).lstrip('Q'))
     wd_prop = target_database.get_catalog_pid(catalog, entity)
     db_entity.wd_prop = int(wd_prop.lstrip('P'))
     db_entity.search_wp = SEARCH_WP_FIELD
diff --git a/soweego/ingester/wikidata_bot.py b/soweego/ingester/wikidata_bot.py
index a96ad6b3..278f5aaf 100644
--- a/soweego/ingester/wikidata_bot.py
+++ b/soweego/ingester/wikidata_bot.py
@@ -93,9 +93,7 @@
 
 @click.command()
 @click.argument('catalog', type=click.Choice(SUPPORTED_TARGETS))
-@click.argument(
-    'entity', type=click.Choice(target_database.supported_entities())
-)
+@click.argument('entity', type=click.Choice(target_database.supported_entities()))
 @click.argument('invalid_identifiers', type=click.File())
 @click.option(
     '-s',
@@ -110,9 +108,7 @@ def delete_cli(catalog, entity, invalid_identifiers, sandbox):
     Format: { catalog_identifier: [ list of QIDs ] }
     """
     if sandbox:
-        LOGGER.info(
-            'Running on the Wikidata sandbox item %s ...', vocabulary.SANDBOX_2
-        )
+        LOGGER.info('Running on the Wikidata sandbox item %s ...', vocabulary.SANDBOX_2)
 
     delete_or_deprecate_identifiers(
         'delete', catalog, entity, json.load(invalid_identifiers), sandbox
@@ -121,9 +117,7 @@ def delete_cli(catalog, entity, invalid_identifiers, sandbox):
 
 @click.command()
 @click.argument('catalog', type=click.Choice(SUPPORTED_TARGETS))
-@click.argument(
-    'entity', type=click.Choice(target_database.supported_entities())
-)
+@click.argument('entity', type=click.Choice(target_database.supported_entities()))
 @click.argument('invalid_identifiers', type=click.File())
 @click.option(
     '-s',
@@ -138,9 +132,7 @@ def deprecate_cli(catalog, entity, invalid_identifiers, sandbox):
     Format: { catalog_identifier: [ list of QIDs ] }
     """
     if sandbox:
-        LOGGER.info(
-            'Running on the Wikidata sandbox item %s ...', vocabulary.SANDBOX_2
-        )
+        LOGGER.info('Running on the Wikidata sandbox item %s ...', vocabulary.SANDBOX_2)
 
     delete_or_deprecate_identifiers(
         'deprecate', catalog, entity, json.load(invalid_identifiers), sandbox
@@ -149,9 +141,7 @@ def deprecate_cli(catalog, entity, invalid_identifiers, sandbox):
 
 @click.command()
 @click.argument('catalog', type=click.Choice(SUPPORTED_TARGETS))
-@click.argument(
-    'entity', type=click.Choice(target_database.supported_entities())
-)
+@click.argument('entity', type=click.Choice(target_database.supported_entities()))
 @click.argument('identifiers', type=click.File())
 @click.option(
     '-s',
@@ -356,8 +346,7 @@ def add_people_statements(
         edit_summary = BIO_VALIDATION_SUMMARY
     else:
         raise ValueError(
-            f"Invalid criterion: '{criterion}'. "
-            "Please use either 'links' or 'bio'"
+            f"Invalid criterion: '{criterion}'. " "Please use either 'links' or 'bio'"
         )
 
     sandbox_item = vocabulary.SANDBOX_2
@@ -387,9 +376,7 @@ def add_people_statements(
         )
 
 
-def add_works_statements(
-    statements: Iterable, catalog: str, sandbox: bool
-) -> None:
+def add_works_statements(statements: Iterable, catalog: str, sandbox: bool) -> None:
     """Add statements to existing Wikidata works.
 
     Statements typically come from
@@ -459,9 +446,7 @@ def delete_or_deprecate_identifiers(
     for tid, qids in invalid.items():
         for qid in qids:
             actual_qid = qid if not sandbox else sandbox_item
-            LOGGER.info(
-                'Will %s %s identifier: %s -> %s', action, catalog, tid, qid
-            )
+            LOGGER.info('Will %s %s identifier: %s -> %s', action, catalog, tid, qid)
             _delete_or_deprecate(action, actual_qid, tid, catalog, catalog_pid)
 
 
@@ -630,9 +615,7 @@ def _handle_addition(
 
     # No given value -> add statement
     if value not in existing_values:
-        LOGGER.debug(
-            '%s has no %s claim with value %s', subject_qid, predicate, value
-        )
+        LOGGER.debug('%s has no %s claim with value %s', subject_qid, predicate, value)
         _add(
             subject_item,
             predicate,
@@ -646,9 +629,7 @@ def _handle_addition(
         return
 
     # Claim with the given predicate and value -> add reference
-    LOGGER.debug(
-        "%s has a %s claim with value '%s'", subject_qid, predicate, value
-    )
+    LOGGER.debug("%s has a %s claim with value '%s'", subject_qid, predicate, value)
     if case_insensitive:
         for claim in given_predicate_claims:
             if claim.getTarget().lower() == value:
@@ -822,9 +803,7 @@ def _add(
         catalog_id,
         edit_summary=edit_summary,
     )
-    LOGGER.info(
-        'Added (%s, %s, %s) statement', subject_item.getID(), predicate, value
-    )
+    LOGGER.info('Added (%s, %s, %s) statement', subject_item.getID(), predicate, value)
 
 
 def _reference(
@@ -862,18 +841,14 @@ def _reference(
 
     if catalog_pid is not None and catalog_id is not None:
         # (catalog property, catalog ID) reference claim
-        catalog_id_reference = pywikibot.Claim(
-            REPO, catalog_pid, is_reference=True
-        )
+        catalog_id_reference = pywikibot.Claim(REPO, catalog_pid, is_reference=True)
         catalog_id_reference.setTarget(catalog_id)
         reference_node.append(catalog_id_reference)
         log_buffer.append(f'({catalog_pid}, {catalog_id})')
 
     # All tasks
     # (retrieved, TODAY) reference claim
-    retrieved_reference = pywikibot.Claim(
-        REPO, vocabulary.RETRIEVED, is_reference=True
-    )
+    retrieved_reference = pywikibot.Claim(REPO, vocabulary.RETRIEVED, is_reference=True)
     retrieved_reference.setTarget(TIMESTAMP)
     reference_node.append(retrieved_reference)
     log_buffer.append(f'({retrieved_reference.getID()}, {TODAY})')
@@ -929,9 +904,7 @@ def _delete_or_deprecate(action, qid, tid, catalog, catalog_pid) -> None:
             if action == 'delete':
                 item.removeClaims([claim], summary='Invalid identifier')
             elif action == 'deprecate':
-                claim.changeRank(
-                    'deprecated', summary='Deprecate arguable claim'
-                )
+                claim.changeRank('deprecated', summary='Deprecate arguable claim')
             LOGGER.debug('%s claim: %s', action.title() + 'd', claim.toJSON())
     LOGGER.info(
         '%s %s identifier statement from %s', action.title() + 'd', catalog, qid
diff --git a/soweego/linker/baseline.py b/soweego/linker/baseline.py
index 0206be45..16893290 100644
--- a/soweego/linker/baseline.py
+++ b/soweego/linker/baseline.py
@@ -41,12 +41,8 @@
 
 
 @click.command()
-@click.argument(
-    'catalog', type=click.Choice(target_database.supported_targets())
-)
-@click.argument(
-    'entity', type=click.Choice(target_database.supported_entities())
-)
+@click.argument('catalog', type=click.Choice(target_database.supported_targets()))
+@click.argument('entity', type=click.Choice(target_database.supported_entities()))
 @click.option(
     '-r',
     '--rule',
@@ -86,18 +82,14 @@ def cli(catalog, entity, rule, upload, sandbox, dir_io, dates):
 
     Run all of them by default.
     """
-    LOGGER.info(
-        "Running baseline '%s' rule over %s %s ...", rule, catalog, entity
-    )
+    LOGGER.info("Running baseline '%s' rule over %s %s ...", rule, catalog, entity)
 
     # No need for the return value: only the output file will be consumed
     build_wikidata('classification', catalog, entity, dir_io)
 
     _run(catalog, entity, rule, dates, upload, sandbox, dir_io)
 
-    LOGGER.info(
-        "Baseline '%s' rule over %s %s completed", rule, catalog, entity
-    )
+    LOGGER.info("Baseline '%s' rule over %s %s completed", rule, catalog, entity)
 
 
 def _run(catalog, entity, rule, check_dates, upload, sandbox, dir_io):
@@ -180,12 +172,8 @@ def _run(catalog, entity, rule, check_dates, upload, sandbox, dir_io):
 
 
 @click.command()
-@click.argument(
-    'catalog', type=click.Choice(target_database.supported_targets())
-)
-@click.argument(
-    'entity', type=click.Choice(target_database.supported_entities())
-)
+@click.argument('catalog', type=click.Choice(target_database.supported_targets()))
+@click.argument('entity', type=click.Choice(target_database.supported_entities()))
 @click.option('-u', '--upload', is_flag=True, help='Upload links to Wikidata.')
 @click.option(
     '-s',
@@ -309,12 +297,8 @@ def _perfect_names_linker(
                             continue
 
                         if wd_name.lower() == target.name.lower():
-                            if not compare_dates or _birth_death_date_match(
-                                wd, target
-                            ):
-                                yield wd[
-                                    keys.QID
-                                ], catalog_pid, target.catalog_id
+                            if not compare_dates or _birth_death_date_match(wd, target):
+                                yield wd[keys.QID], catalog_pid, target.catalog_id
 
             bucket.clear()
             bucket_names.clear()
@@ -354,9 +338,7 @@ def _similar_tokens_linker(
                 for target in data_gathering.tokens_fulltext_search(
                     target_db_entity, True, wd_tokens
                 ):
-                    if not compare_dates or _birth_death_date_match(
-                        wd_item, target
-                    ):
+                    if not compare_dates or _birth_death_date_match(wd_item, target):
                         yield qid, catalog_pid, target.catalog_id
                         to_exclude.add(target.catalog_id)
 
@@ -370,9 +352,7 @@ def _similar_tokens_linker(
                 ):
                     target_tokens = set(getattr(target, target_field).split())
 
-                    if len(target_tokens) > 1 and target_tokens.issubset(
-                        wd_tokens
-                    ):
+                    if len(target_tokens) > 1 and target_tokens.issubset(wd_tokens):
                         if not compare_dates or _birth_death_date_match(
                             wd_item, target
                         ):
diff --git a/soweego/linker/blocking.py b/soweego/linker/blocking.py
index e1a0e31e..6dabd077 100644
--- a/soweego/linker/blocking.py
+++ b/soweego/linker/blocking.py
@@ -102,9 +102,7 @@ def find_samples(
     wikidata_column.dropna(inplace=True)
 
     samples = _fire_queries(wikidata_column, target_db_entity)
-    samples_index = pd.MultiIndex.from_tuples(
-        samples, names=[keys.QID, keys.TID]
-    )
+    samples_index = pd.MultiIndex.from_tuples(samples, names=[keys.QID, keys.TID])
 
     LOGGER.debug(
         '%s %s samples index chunk %d random example:\n%s',
@@ -151,16 +149,12 @@ def _full_text_search(
             ),
         )
     )
-    LOGGER.debug(
-        'Target ID candidates: %s - Query terms: %s', tids, query_terms
-    )
+    LOGGER.debug('Target ID candidates: %s - Query terms: %s', tids, query_terms)
 
     return [(qid, tid) for tid in tids]
 
 
-def _fire_queries(
-    wikidata_column: pd.Series, target_db_entity: constants.DB_ENTITY
-):
+def _fire_queries(wikidata_column: pd.Series, target_db_entity: constants.DB_ENTITY):
     with Pool() as pool:
         for result in tqdm(
             pool.imap_unordered(
diff --git a/soweego/linker/classifiers.py b/soweego/linker/classifiers.py
index 77889305..8710ed14 100644
--- a/soweego/linker/classifiers.py
+++ b/soweego/linker/classifiers.py
@@ -103,9 +103,7 @@ def _fit(
 
         model_path = os.path.join(
             constants.WORK_DIR,
-            constants.NEURAL_NETWORK_CHECKPOINT_MODEL.format(
-                self.__class__.__name__
-            ),
+            constants.NEURAL_NETWORK_CHECKPOINT_MODEL.format(self.__class__.__name__),
         )
         os.makedirs(os.path.dirname(model_path), exist_ok=True)
 
@@ -577,9 +575,7 @@ def __init__(self, num_features, **kwargs):
 
         estimators = []
         for clf in constants.CLASSIFIERS_FOR_ENSEMBLE:
-            model = utils.init_model(
-                clf, num_features=self.num_features, **kwargs
-            )
+            model = utils.init_model(clf, num_features=self.num_features, **kwargs)
 
             estimators.append((clf, model.kernel))
 
@@ -644,9 +640,7 @@ def __init__(self, num_features, **kwargs):
         def init_estimators(num_features):
             estimators = []
             for clf in constants.CLASSIFIERS_FOR_ENSEMBLE:
-                model = utils.init_model(
-                    clf, num_features=num_features, **kwargs
-                )
+                model = utils.init_model(clf, num_features=num_features, **kwargs)
 
                 estimators.append((clf, model.kernel))
             return estimators
diff --git a/soweego/linker/evaluate.py b/soweego/linker/evaluate.py
index 97a518b6..bff6dbe8 100644
--- a/soweego/linker/evaluate.py
+++ b/soweego/linker/evaluate.py
@@ -34,19 +34,14 @@
     context_settings={'ignore_unknown_options': True, 'allow_extra_args': True}
 )
 @click.argument('classifier', type=click.Choice(constants.CLASSIFIERS))
-@click.argument(
-    'catalog', type=click.Choice(target_database.supported_targets())
-)
-@click.argument(
-    'entity', type=click.Choice(target_database.supported_entities())
-)
+@click.argument('catalog', type=click.Choice(target_database.supported_targets()))
+@click.argument('entity', type=click.Choice(target_database.supported_entities()))
 @click.option('-k', '--k-folds', default=5, help="Number of folds, default: 5.")
 @click.option(
     '-s',
     '--single',
     is_flag=True,
-    help='Compute a single evaluation over all k folds, instead of k '
-    'evaluations.',
+    help='Compute a single evaluation over all k folds, instead of k ' 'evaluations.',
 )
 @click.option(
     '-n',
@@ -71,9 +66,7 @@
     help=f'Input/output directory, default: {constants.WORK_DIR}.',
 )
 @click.pass_context
-def cli(
-    ctx, classifier, catalog, entity, k_folds, single, nested, metric, dir_io
-):
+def cli(ctx, classifier, catalog, entity, k_folds, single, nested, metric, dir_io):
     """Evaluate the performance of a supervised linker.
 
     By default, run 5-fold cross-validation and
@@ -132,9 +125,7 @@ def cli(
 def _build_output_paths(catalog, entity, classifier, dir_io):
     classifier = constants.CLASSIFIERS.get(classifier)
 
-    performance = constants.LINKER_PERFORMANCE.format(
-        catalog, entity, classifier
-    )
+    performance = constants.LINKER_PERFORMANCE.format(catalog, entity, classifier)
     predictions = constants.LINKER_EVALUATION_PREDICTIONS.format(
         catalog, entity, classifier
     )
@@ -259,8 +250,7 @@ def _run_nested(
     dir_io,
 ):
     LOGGER.warning(
-        'You have opted for the slowest evaluation option, '
-        'please be patient ...'
+        'You have opted for the slowest evaluation option, ' 'please be patient ...'
     )
     LOGGER.info(
         'Starting nested %d-fold cross-validation with '
@@ -272,9 +262,7 @@ def _run_nested(
     param_grid = constants.PARAMETER_GRIDS.get(clf)
 
     if param_grid is None:
-        err_msg = (
-            f'Hyperparameter tuning for classifier "{clf}" is not supported'
-        )
+        err_msg = f'Hyperparameter tuning for classifier "{clf}" is not supported'
         LOGGER.critical(err_msg)
         raise NotImplementedError(err_msg)
 
@@ -301,9 +289,7 @@ def _compute_performance(test_index, predictions, test_vectors_size):
     recall = rl.recall(test_index, predictions)
     f_score = rl.fscore(confusion_matrix)
 
-    LOGGER.info(
-        'Precision: %f - Recall: %f - F-score: %f', precision, recall, f_score
-    )
+    LOGGER.info('Precision: %f - Recall: %f - F-score: %f', precision, recall, f_score)
     LOGGER.info('Confusion matrix: %s', confusion_matrix)
 
     return precision, recall, f_score, confusion_matrix
@@ -312,9 +298,7 @@ def _compute_performance(test_index, predictions, test_vectors_size):
 def _nested_k_fold_with_grid_search(
     classifier, param_grid, catalog, entity, k, scoring, dir_io, **kwargs
 ):
-    dataset, positive_samples_index = train.build_training_set(
-        catalog, entity, dir_io
-    )
+    dataset, positive_samples_index = train.build_training_set(catalog, entity, dir_io)
     model = utils.init_model(classifier, dataset.shape[1], **kwargs).kernel
 
     inner_k_fold, target = utils.prepare_stratified_k_fold(
@@ -372,16 +356,12 @@ def _nested_k_fold_with_grid_search(
 
 def _average_k_fold(classifier, catalog, entity, k, dir_io, **kwargs):
     predictions, precisions, recalls, f_scores = None, [], [], []
-    dataset, positive_samples_index = train.build_training_set(
-        catalog, entity, dir_io
-    )
+    dataset, positive_samples_index = train.build_training_set(catalog, entity, dir_io)
     k_fold, binary_target_variables = utils.prepare_stratified_k_fold(
         k, dataset, positive_samples_index
     )
 
-    for train_index, test_index in k_fold.split(
-        dataset, binary_target_variables
-    ):
+    for train_index, test_index in k_fold.split(dataset, binary_target_variables):
         training, test = dataset.iloc[train_index], dataset.iloc[test_index]
 
         model = utils.init_model(classifier, dataset.shape[1], **kwargs)
@@ -418,16 +398,12 @@ def _average_k_fold(classifier, catalog, entity, k, dir_io, **kwargs):
 
 def _single_k_fold(classifier, catalog, entity, k, dir_io, **kwargs):
     predictions, test_set = None, []
-    dataset, positive_samples_index = train.build_training_set(
-        catalog, entity, dir_io
-    )
+    dataset, positive_samples_index = train.build_training_set(catalog, entity, dir_io)
     k_fold, binary_target_variables = utils.prepare_stratified_k_fold(
         k, dataset, positive_samples_index
     )
 
-    for train_index, test_index in k_fold.split(
-        dataset, binary_target_variables
-    ):
+    for train_index, test_index in k_fold.split(dataset, binary_target_variables):
         training, test = dataset.iloc[train_index], dataset.iloc[test_index]
         test_set.append(test)
 
diff --git a/soweego/linker/features.py b/soweego/linker/features.py
index f3623315..e6ae0ec3 100644
--- a/soweego/linker/features.py
+++ b/soweego/linker/features.py
@@ -77,9 +77,7 @@ class ExactMatch(BaseCompareFeature):
     """Compare pairs of lists through exact match on each pair of elements."""
 
     name = 'exact_match'
-    description = (
-        'Compare pairs of lists through exact match on each pair of elements.'
-    )
+    description = 'Compare pairs of lists through exact match on each pair of elements.'
 
     def __init__(
         self,
@@ -112,9 +110,7 @@ def _compute_vectorized(self, source_column, target_column):
 
         def exact_apply(pair):
             if _pair_has_any_null(pair):
-                LOGGER.debug(
-                    "Can't compare, the pair contains null values: %s", pair
-                )
+                LOGGER.debug("Can't compare, the pair contains null values: %s", pair)
                 return np.nan
 
             scores = []
@@ -367,12 +363,8 @@ def check_date_equality(pair: Tuple[List[pd.Period], List[pd.Period]]):
 
             for source, target in itertools.product(source_list, target_list):
                 # Get precision number for both dates
-                s_precision = constants.PD_PERIOD_PRECISIONS.index(
-                    source.freq.name
-                )
-                t_precision = constants.PD_PERIOD_PRECISIONS.index(
-                    target.freq.name
-                )
+                s_precision = constants.PD_PERIOD_PRECISIONS.index(source.freq.name)
+                t_precision = constants.PD_PERIOD_PRECISIONS.index(target.freq.name)
 
                 # Minimum pair precision = maximum shared precision
                 lowest_prec = min(s_precision, t_precision)
@@ -408,9 +400,7 @@ def check_date_equality(pair: Tuple[List[pd.Period], List[pd.Period]]):
 
             return best
 
-        return fillna(
-            concatenated.apply(check_date_equality), self.missing_value
-        )
+        return fillna(concatenated.apply(check_date_equality), self.missing_value)
 
 
 class SharedTokens(BaseCompareFeature):
@@ -420,8 +410,7 @@ class SharedTokens(BaseCompareFeature):
 
     name = 'shared_tokens'
     description = (
-        'Compare pairs of lists holding string tokens '
-        'through weighted intersection'
+        'Compare pairs of lists holding string tokens ' 'through weighted intersection'
     )
 
     def __init__(
@@ -548,9 +537,7 @@ def _expand_occupations(self, occupation_qids: Set[str]) -> Set[str]:
 
         return expanded_set
 
-    def _compute_vectorized(
-        self, source_column: pd.Series, target_column: pd.Series
-    ):
+    def _compute_vectorized(self, source_column: pd.Series, target_column: pd.Series):
 
         # add the superclasses and subclasses of each occupation to
         # the target column
@@ -563,8 +550,7 @@ def _compute_vectorized(
         def check_occupation_equality(pair: Tuple[Set[str], Set[str]]):
             if _pair_has_any_null(pair):
                 LOGGER.debug(
-                    "Can't compare occupations, "
-                    "the pair contains null values: %s",
+                    "Can't compare occupations, " "the pair contains null values: %s",
                     pair,
                 )
                 return np.nan
@@ -576,9 +562,7 @@ def check_occupation_equality(pair: Tuple[Set[str], Set[str]]):
 
             return n_shared_items / min_length
 
-        return fillna(
-            concatenated.apply(check_occupation_equality), self.missing_value
-        )
+        return fillna(concatenated.apply(check_occupation_equality), self.missing_value)
 
 
 class SharedTokensPlus(BaseCompareFeature):
@@ -596,8 +580,7 @@ class SharedTokensPlus(BaseCompareFeature):
 
     name = 'shared_tokens_plus'
     description = (
-        'Compare pairs of lists holding string tokens '
-        'through weighted intersection'
+        'Compare pairs of lists holding string tokens ' 'through weighted intersection'
     )
 
     def __init__(
@@ -652,17 +635,13 @@ def _compute_vectorized(
         # Compute shared tokens after filtering stop words
         def compare_apply(pair: Tuple[List[str], List[str]]) -> float:
             if _pair_has_any_null(pair):
-                LOGGER.debug(
-                    "Can't compare, the pair contains null values: %s", pair
-                )
+                LOGGER.debug("Can't compare, the pair contains null values: %s", pair)
                 return np.nan
 
             # first we clean a bit the pair
             # make all lowercase and split on possible spaces
             # also reshape result into a list (flatten)
-            pair = [
-                self._flatten([el.lower().split() for el in p]) for p in pair
-            ]
+            pair = [self._flatten([el.lower().split() for el in p]) for p in pair]
 
             s_item, t_item = pair
 
diff --git a/soweego/linker/link.py b/soweego/linker/link.py
index 4ac80fa2..08d570f1 100644
--- a/soweego/linker/link.py
+++ b/soweego/linker/link.py
@@ -31,12 +31,8 @@
 
 @click.command()
 @click.argument('classifier', type=click.Choice(constants.CLASSIFIERS))
-@click.argument(
-    'catalog', type=click.Choice(target_database.supported_targets())
-)
-@click.argument(
-    'entity', type=click.Choice(target_database.supported_entities())
-)
+@click.argument('catalog', type=click.Choice(target_database.supported_targets()))
+@click.argument('entity', type=click.Choice(target_database.supported_entities()))
 @click.option(
     '-t',
     '--threshold',
@@ -63,9 +59,7 @@
     default=constants.WORK_DIR,
     help=f'Input/output directory, default: {constants.WORK_DIR}.',
 )
-def cli(
-    classifier, catalog, entity, threshold, name_rule, upload, sandbox, dir_io
-):
+def cli(classifier, catalog, entity, threshold, name_rule, upload, sandbox, dir_io):
     """Run a supervised linker.
 
     Build the classification set relevant to the given catalog and entity,
@@ -82,9 +76,7 @@ def cli(
     """
     actual_classifier = constants.CLASSIFIERS[classifier]
 
-    model_path, result_path = _handle_io(
-        actual_classifier, catalog, entity, dir_io
-    )
+    model_path, result_path = _handle_io(actual_classifier, catalog, entity, dir_io)
     # Exit if the model file doesn't exist
     if model_path is None:
         sys.exit(1)
@@ -223,9 +215,7 @@ def _apply_linking_rules(name_rule, predictions, target_chunk, wd_chunk):
     return predictions
 
 
-def _get_unique_predictions_above_threshold(
-    predictions, threshold
-) -> pd.DataFrame:
+def _get_unique_predictions_above_threshold(predictions, threshold) -> pd.DataFrame:
     # Filter by threshold
     above_threshold = predictions[predictions >= threshold]
 
@@ -256,9 +246,7 @@ def _handle_io(classifier, catalog, entity, dir_io):
     # Delete existing result file,
     # otherwise the current output would be appended to it
     if os.path.isfile(result_path):
-        LOGGER.warning(
-            "Will delete old output file found at '%s' ...", result_path
-        )
+        LOGGER.warning("Will delete old output file found at '%s' ...", result_path)
         os.remove(result_path)
 
     os.makedirs(os.path.dirname(result_path), exist_ok=True)
@@ -269,9 +257,7 @@ def _handle_io(classifier, catalog, entity, dir_io):
 def _upload(chunk, chunk_number, catalog, entity, sandbox):
     links = dict(chunk.to_dict().keys())
 
-    LOGGER.info(
-        'Starting upload of links to Wikidata, chunk %d ...', chunk_number
-    )
+    LOGGER.info('Starting upload of links to Wikidata, chunk %d ...', chunk_number)
 
     wikidata_bot.add_identifiers(links, catalog, entity, sandbox)
 
diff --git a/soweego/linker/train.py b/soweego/linker/train.py
index e430802b..e88ed24b 100644
--- a/soweego/linker/train.py
+++ b/soweego/linker/train.py
@@ -33,12 +33,8 @@
     context_settings={'ignore_unknown_options': True, 'allow_extra_args': True}
 )
 @click.argument('classifier', type=click.Choice(constants.CLASSIFIERS))
-@click.argument(
-    'catalog', type=click.Choice(target_database.supported_targets())
-)
-@click.argument(
-    'entity', type=click.Choice(target_database.supported_entities())
-)
+@click.argument('catalog', type=click.Choice(target_database.supported_targets()))
+@click.argument('entity', type=click.Choice(target_database.supported_entities()))
 @click.option(
     '-t',
     '--tune',
@@ -72,9 +68,7 @@ def cli(ctx, classifier, catalog, entity, tune, k_folds, dir_io):
 
     actual_classifier = constants.CLASSIFIERS[classifier]
 
-    model = execute(
-        actual_classifier, catalog, entity, tune, k_folds, dir_io, **kwargs
-    )
+    model = execute(actual_classifier, catalog, entity, tune, k_folds, dir_io, **kwargs)
 
     outfile = os.path.join(
         dir_io,
diff --git a/soweego/linker/workflow.py b/soweego/linker/workflow.py
index 15ef8879..e39b3de1 100644
--- a/soweego/linker/workflow.py
+++ b/soweego/linker/workflow.py
@@ -49,9 +49,7 @@
 LOGGER = logging.getLogger(__name__)
 
 
-def build_wikidata(
-    goal: str, catalog: str, entity: str, dir_io: str
-) -> JsonReader:
+def build_wikidata(goal: str, catalog: str, entity: str, dir_io: str) -> JsonReader:
     """Build a Wikidata dataset for training or classification purposes:
     workflow step 1.
 
@@ -122,9 +120,7 @@ def build_wikidata(
 
     # Cached dataset, for development purposes
     else:
-        LOGGER.info(
-            "Will reuse existing Wikidata %s set: '%s'", goal, wd_io_path
-        )
+        LOGGER.info("Will reuse existing Wikidata %s set: '%s'", goal, wd_io_path)
         if goal == 'training':
             _reconstruct_qids_and_tids(wd_io_path, qids_and_tids)
 
@@ -178,9 +174,7 @@ def build_target(
     for table in tables:
         query = query.outerjoin(table, base.catalog_id == table.catalog_id)
     # Condition
-    query = query.filter(base.catalog_id.in_(identifiers)).enable_eagerloads(
-        False
-    )
+    query = query.filter(base.catalog_id.in_(identifiers)).enable_eagerloads(False)
 
     sql = query.statement
     LOGGER.debug('SQL query to be fired: %s', sql)
@@ -222,9 +216,7 @@ def preprocess_wikidata(
     for i, chunk in enumerate(wikidata_reader, 1):
         # 1. QID as index
         chunk.set_index(keys.QID, inplace=True)
-        log_dataframe_info(
-            LOGGER, chunk, f"Built index from '{keys.QID}' column"
-        )
+        log_dataframe_info(LOGGER, chunk, f"Built index from '{keys.QID}' column")
 
         # 2. Drop columns with null values only
         _drop_null_columns(chunk)
@@ -267,9 +259,7 @@ def preprocess_wikidata(
         yield chunk
 
 
-def preprocess_target(
-    goal: str, target_reader: Iterator[pd.DataFrame]
-) -> pd.DataFrame:
+def preprocess_target(goal: str, target_reader: Iterator[pd.DataFrame]) -> pd.DataFrame:
     """Preprocess a target catalog dataset: workflow step 2.
 
     This function consumes :class:`pandas.DataFrame` chunks and
@@ -386,9 +376,7 @@ def in_both_datasets(col: str) -> bool:
     name_column = keys.NAME
     if in_both_datasets(name_column):
         feature_extractor.add(
-            features.ExactMatch(
-                name_column, name_column, label=f'{name_column}_exact'
-            )
+            features.ExactMatch(name_column, name_column, label=f'{name_column}_exact')
         )
 
     # URL features
@@ -437,9 +425,7 @@ def in_both_datasets(col: str) -> bool:
             )
         )
 
-    feature_vectors = feature_extractor.compute(
-        candidate_pairs, wikidata, target
-    )
+    feature_vectors = feature_extractor.compute(candidate_pairs, wikidata, target)
     feature_vectors = feature_vectors[
         ~feature_vectors.index.duplicated()  # Drop duplicates
     ]
@@ -569,9 +555,7 @@ def _rename_or_drop_tid_columns(target):
         # in this case, they must be identical,
         # so take the first one
         target[keys.TID] = (
-            no_nulls.iloc[:, 0]
-            if isinstance(no_nulls, pd.DataFrame)
-            else no_nulls
+            no_nulls.iloc[:, 0] if isinstance(no_nulls, pd.DataFrame) else no_nulls
         )
 
     target.drop(columns=keys.CATALOG_ID, inplace=True)
@@ -738,9 +722,7 @@ def _build_date_object(value, slice_index, to_dates_list):
     try:
         to_dates_list.append(pd.Period(value[:slice_index]))
     except ValueError as ve:
-        LOGGER.warning(
-            "Skipping date that can't be parsed: %s. Reason: %s", value, ve
-        )
+        LOGGER.warning("Skipping date that can't be parsed: %s. Reason: %s", value, ve)
 
 
 def _occupations_to_set(df):
diff --git a/soweego/pipeline.py b/soweego/pipeline.py
index 03e5076c..2619e330 100644
--- a/soweego/pipeline.py
+++ b/soweego/pipeline.py
@@ -15,9 +15,7 @@
 
 
 @click.command()
-@click.argument(
-    'catalog', type=click.Choice(target_database.supported_targets())
-)
+@click.argument('catalog', type=click.Choice(target_database.supported_targets()))
 @click.option(
     '--validator/--no-validator',
     default=False,
@@ -38,9 +36,7 @@
     default=True,
     help='Upload results to Wikidata. Default: yes.',
 )
-def cli(
-    catalog: str, validator: bool, importer: bool, linker: bool, upload: bool
-):
+def cli(catalog: str, validator: bool, importer: bool, linker: bool, upload: bool):
     """Launch the whole pipeline."""
 
     if importer:
@@ -61,9 +57,7 @@ def cli(
 
 def _importer(target: str):
     """Contains all the command the importer has to do"""
-    LOGGER.info(
-        "Running importer for target: %s without resolving the URLs", target
-    )
+    LOGGER.info("Running importer for target: %s without resolving the URLs", target)
     _invoke_no_exit(import_cli, [target])
 
 
@@ -75,9 +69,7 @@ def _linker(target: str, upload: bool):
         if not target_type:
             continue
         arguments = (
-            [target, target_type, '--upload']
-            if upload
-            else [target, target_type]
+            [target, target_type, '--upload'] if upload else [target, target_type]
         )
 
         _invoke_no_exit(baseline.extract_cli, arguments)
diff --git a/soweego/validator/checks.py b/soweego/validator/checks.py
index 86c45b1b..fb3a8f59 100644
--- a/soweego/validator/checks.py
+++ b/soweego/validator/checks.py
@@ -21,13 +21,7 @@
 import click
 from sqlalchemy.exc import SQLAlchemyError
 
-from soweego.commons import (
-    constants,
-    data_gathering,
-    keys,
-    target_database,
-    text_utils,
-)
+from soweego.commons import constants, data_gathering, keys, target_database, text_utils
 from soweego.commons.db_manager import DBManager
 from soweego.ingester import wikidata_bot
 from soweego.wikidata import api_requests, vocabulary
@@ -39,9 +33,7 @@
 # For all CLIs
 WD_CACHE_FNAME = '{catalog}_{entity}_{criterion}_wd_cache.pkl'
 # For `links_cli` and `bio_cli`
-IDS_TO_BE_DEPRECATED_FNAME = (
-    '{catalog}_{entity}_{criterion}_ids_to_be_deprecated.json'
-)
+IDS_TO_BE_DEPRECATED_FNAME = '{catalog}_{entity}_{criterion}_ids_to_be_deprecated.json'
 SHARED_STATEMENTS_FNAME = '{catalog}_{entity}_{criterion}_shared_statements.csv'
 WD_STATEMENTS_FNAME = 'wikidata_{criterion}_for_{catalog}_{entity}.csv'
 # For `dead_ids_cli`
@@ -50,9 +42,7 @@
 EXT_IDS_FNAME = '{catalog}_{entity}_external_ids_to_be_{task}.csv'
 URLS_FNAME = '{catalog}_{entity}_urls_to_be_{task}.csv'
 # For `bio_cli`
-BIO_STATEMENTS_TO_BE_ADDED_FNAME = (
-    '{catalog}_{entity}_bio_statements_to_be_added.csv'
-)
+BIO_STATEMENTS_TO_BE_ADDED_FNAME = '{catalog}_{entity}_bio_statements_to_be_added.csv'
 
 # URL prefixes for catalog providers
 QID_PREFIX = 'https://www.wikidata.org/wiki/'
@@ -60,12 +50,8 @@
 
 
 @click.command()
-@click.argument(
-    'catalog', type=click.Choice(target_database.supported_targets())
-)
-@click.argument(
-    'entity', type=click.Choice(target_database.supported_entities())
-)
+@click.argument('catalog', type=click.Choice(target_database.supported_targets()))
+@click.argument('entity', type=click.Choice(target_database.supported_entities()))
 @click.option(
     '-d',
     '--deprecate',
@@ -102,9 +88,7 @@ def dead_ids_cli(catalog, entity, deprecate, sandbox, dump_wikidata, dir_io):
     )
     wd_cache_path = os.path.join(
         dir_io,
-        WD_CACHE_FNAME.format(
-            catalog=catalog, entity=entity, criterion='dead_ids'
-        ),
+        WD_CACHE_FNAME.format(catalog=catalog, entity=entity, criterion='dead_ids'),
     )
 
     # Handle Wikidata cache
@@ -149,21 +133,15 @@ def dead_ids_cli(catalog, entity, deprecate, sandbox, dump_wikidata, dir_io):
 
 
 @click.command()
-@click.argument(
-    'catalog', type=click.Choice(target_database.supported_targets())
-)
-@click.argument(
-    'entity', type=click.Choice(target_database.supported_entities())
-)
+@click.argument('catalog', type=click.Choice(target_database.supported_targets()))
+@click.argument('entity', type=click.Choice(target_database.supported_entities()))
 @click.option(
     '-b',
     '--blacklist',
     is_flag=True,
     help='Filter low-quality URLs through a blacklist.',
 )
-@click.option(
-    '-u', '--upload', is_flag=True, help='Upload the output to Wikidata.'
-)
+@click.option('-u', '--upload', is_flag=True, help='Upload the output to Wikidata.')
 @click.option(
     '-s',
     '--sandbox',
@@ -181,9 +159,7 @@ def dead_ids_cli(catalog, entity, deprecate, sandbox, dump_wikidata, dir_io):
     default=constants.WORK_DIR,
     help=f'Input/output directory, default: {constants.WORK_DIR}.',
 )
-def links_cli(
-    catalog, entity, blacklist, upload, sandbox, dump_wikidata, dir_io
-):
+def links_cli(catalog, entity, blacklist, upload, sandbox, dump_wikidata, dir_io):
     """Validate identifiers against links.
 
     Dump 6 output files:
@@ -235,15 +211,11 @@ def links_cli(
     )
     wd_urls_path = os.path.join(
         dir_io,
-        WD_STATEMENTS_FNAME.format(
-            criterion=criterion, catalog=catalog, entity=entity
-        ),
+        WD_STATEMENTS_FNAME.format(criterion=criterion, catalog=catalog, entity=entity),
     )
     wd_cache_path = os.path.join(
         dir_io,
-        WD_CACHE_FNAME.format(
-            catalog=catalog, entity=entity, criterion=criterion
-        ),
+        WD_CACHE_FNAME.format(catalog=catalog, entity=entity, criterion=criterion),
     )
 
     # Wikidata cache
@@ -272,17 +244,13 @@ def links_cli(
     ) = result
     # Dump output files
     _dump_deprecated(deprecate, deprecate_path)
-    _dump_csv_output(
-        add_ext_ids, add_ext_ids_path, 'third-party IDs to be added'
-    )
+    _dump_csv_output(add_ext_ids, add_ext_ids_path, 'third-party IDs to be added')
     _dump_csv_output(add_urls, add_urls_path, 'URLs to be added')
     _dump_csv_output(
         ref_ext_ids, ref_ext_ids_path, 'shared third-party IDs to be referenced'
     )
     _dump_csv_output(ref_urls, ref_urls_path, 'shared URLs to be referenced')
-    _dump_csv_output(
-        wd_urls, wd_urls_path, f'Wikidata URLs not in {catalog} {entity}'
-    )
+    _dump_csv_output(wd_urls, wd_urls_path, f'Wikidata URLs not in {catalog} {entity}')
 
     # Dump Wikidata cache
     if dump_wikidata:
@@ -291,9 +259,7 @@ def links_cli(
                 # Using the highest protocol available for the current Python
                 # version should be the most efficient solution
                 pickle.dump(wd_cache, cout, protocol=pickle.HIGHEST_PROTOCOL)
-            LOGGER.info(
-                'URLs gathered from Wikidata dumped to %s', wd_cache_path
-            )
+            LOGGER.info('URLs gathered from Wikidata dumped to %s', wd_cache_path)
         except MemoryError:
             LOGGER.warning('Could not pickle the Wikidata cache: memory error')
 
@@ -309,35 +275,19 @@ def links_cli(
             'deprecate', catalog, entity, deprecate, sandbox
         )
         LOGGER.info('Starting addition of external IDs to Wikidata ...')
-        wikidata_bot.add_people_statements(
-            catalog, add_ext_ids, criterion, sandbox
-        )
+        wikidata_bot.add_people_statements(catalog, add_ext_ids, criterion, sandbox)
         LOGGER.info('Starting addition of URLs to Wikidata ...')
-        wikidata_bot.add_people_statements(
-            catalog, add_urls, criterion, sandbox
-        )
-        LOGGER.info(
-            'Starting referencing of shared external IDs in Wikidata ...'
-        )
-        wikidata_bot.add_people_statements(
-            catalog, add_ext_ids, criterion, sandbox
-        )
+        wikidata_bot.add_people_statements(catalog, add_urls, criterion, sandbox)
+        LOGGER.info('Starting referencing of shared external IDs in Wikidata ...')
+        wikidata_bot.add_people_statements(catalog, add_ext_ids, criterion, sandbox)
         LOGGER.info('Starting referencing of shared URLs in Wikidata ...')
-        wikidata_bot.add_people_statements(
-            catalog, add_urls, criterion, sandbox
-        )
+        wikidata_bot.add_people_statements(catalog, add_urls, criterion, sandbox)
 
 
 @click.command()
-@click.argument(
-    'catalog', type=click.Choice(target_database.supported_targets())
-)
-@click.argument(
-    'entity', type=click.Choice(target_database.supported_entities())
-)
-@click.option(
-    '-u', '--upload', is_flag=True, help='Upload the output to Wikidata.'
-)
+@click.argument('catalog', type=click.Choice(target_database.supported_targets()))
+@click.argument('entity', type=click.Choice(target_database.supported_entities()))
+@click.option('-u', '--upload', is_flag=True, help='Upload the output to Wikidata.')
 @click.option(
     '-s',
     '--sandbox',
@@ -396,15 +346,11 @@ def bio_cli(catalog, entity, upload, sandbox, dump_wikidata, dir_io):
     )
     wd_stmts_path = os.path.join(
         dir_io,
-        WD_STATEMENTS_FNAME.format(
-            criterion=criterion, catalog=catalog, entity=entity
-        ),
+        WD_STATEMENTS_FNAME.format(criterion=criterion, catalog=catalog, entity=entity),
     )
     wd_cache_path = os.path.join(
         dir_io,
-        WD_CACHE_FNAME.format(
-            catalog=catalog, entity=entity, criterion=criterion
-        ),
+        WD_CACHE_FNAME.format(catalog=catalog, entity=entity, criterion=criterion),
     )
 
     # Wikidata cache
@@ -462,14 +408,10 @@ def bio_cli(catalog, entity, upload, sandbox, dump_wikidata, dir_io):
         LOGGER.info('Starting addition of extra statements to Wikidata ...')
         wikidata_bot.add_people_statements(catalog, add, criterion, sandbox)
         LOGGER.info('Starting referencing of shared statements in Wikidata ...')
-        wikidata_bot.add_people_statements(
-            catalog, reference, criterion, sandbox
-        )
+        wikidata_bot.add_people_statements(catalog, reference, criterion, sandbox)
 
 
-def dead_ids(
-    catalog: str, entity: str, wd_cache=None
-) -> Tuple[DefaultDict, Dict]:
+def dead_ids(catalog: str, entity: str, wd_cache=None) -> Tuple[DefaultDict, Dict]:
     """Look for dead identifiers in Wikidata.
     An identifier is dead if it does not exist in the given catalog
     when this function is executed.
@@ -520,9 +462,7 @@ def dead_ids(
                     .count()
                 )
                 if existing == 0:
-                    LOGGER.debug(
-                        '%s %s identifier %s is dead', qid, catalog, tid
-                    )
+                    LOGGER.debug('%s %s identifier %s is dead', qid, catalog, tid)
                     dead[tid].add(qid)
         session.commit()
     except SQLAlchemyError as error:
@@ -609,16 +549,12 @@ def links(
             target_database.get_catalog_pid(catalog, entity),
             wd_links,
         )
-        data_gathering.gather_wikidata_links(
-            wd_links, url_pids, ext_id_pids_to_urls
-        )
+        data_gathering.gather_wikidata_links(wd_links, url_pids, ext_id_pids_to_urls)
     else:
         wd_links = wd_cache
 
     # Validation
-    _validate(
-        keys.LINKS, wd_links, target_links, deprecate, add, reference, wd_only
-    )
+    _validate(keys.LINKS, wd_links, target_links, deprecate, add, reference, wd_only)
 
     # URLs to be added:
     # 1. Separate external IDs from URLs
@@ -737,9 +673,7 @@ def bio(
         wd_bio = wd_cache
 
     # Validation
-    _validate(
-        keys.BIODATA, wd_bio, target_bio, deprecate, add, reference, wd_only
-    )
+    _validate(keys.BIODATA, wd_bio, target_bio, deprecate, add, reference, wd_only)
 
     return (
         deprecate,
@@ -889,8 +823,7 @@ def _compute_comparison_sets(criterion, wd_data, target_data):
         # In `target_data` we look for relevant date PIDs
         target_dates = set(
             filter(
-                lambda x: x[0]
-                in (vocabulary.DATE_OF_BIRTH, vocabulary.DATE_OF_DEATH),
+                lambda x: x[0] in (vocabulary.DATE_OF_BIRTH, vocabulary.DATE_OF_DEATH),
                 target_data,
             )
         )
@@ -1004,9 +937,7 @@ def _compare_dates(inputs):
         extra.add(extra_date)
 
 
-def _match_dates_by_precision(
-    precision, wd_elem, wd_timestamp, t_elem, t_timestamp
-):
+def _match_dates_by_precision(precision, wd_elem, wd_timestamp, t_elem, t_timestamp):
     slice_indices = {
         vocabulary.YEAR: 4,
         vocabulary.MONTH: 7,
diff --git a/soweego/validator/enrichment.py b/soweego/validator/enrichment.py
index 5d33f73e..5c1b96f5 100644
--- a/soweego/validator/enrichment.py
+++ b/soweego/validator/enrichment.py
@@ -21,13 +21,7 @@
 from sqlalchemy.exc import SQLAlchemyError
 from tqdm import tqdm
 
-from soweego.commons import (
-    constants,
-    data_gathering,
-    keys,
-    target_database,
-    utils,
-)
+from soweego.commons import constants, data_gathering, keys, target_database, utils
 from soweego.commons.db_manager import DBManager
 from soweego.ingester import wikidata_bot
 from soweego.wikidata import vocabulary
@@ -36,15 +30,9 @@
 
 
 @click.command()
-@click.argument(
-    'catalog', type=click.Choice(target_database.supported_targets())
-)
-@click.argument(
-    'entity', type=click.Choice(target_database.supported_entities())
-)
-@click.option(
-    '-u', '--upload', is_flag=True, help='Upload statements to Wikidata.'
-)
+@click.argument('catalog', type=click.Choice(target_database.supported_targets()))
+@click.argument('entity', type=click.Choice(target_database.supported_entities()))
+@click.option('-u', '--upload', is_flag=True, help='Upload statements to Wikidata.')
 @click.option(
     '-s',
     '--sandbox',
@@ -74,9 +62,7 @@ def works_people_cli(catalog, entity, upload, sandbox, dir_io):
         sys.exit(1)
 
     with open(
-        os.path.join(
-            dir_io, constants.WORKS_BY_PEOPLE_STATEMENTS % (catalog, entity)
-        ),
+        os.path.join(dir_io, constants.WORKS_BY_PEOPLE_STATEMENTS % (catalog, entity)),
         'w',
         1,
     ) as fout:
diff --git a/soweego/wikidata/api_requests.py b/soweego/wikidata/api_requests.py
index 9f9362a0..06214d08 100644
--- a/soweego/wikidata/api_requests.py
+++ b/soweego/wikidata/api_requests.py
@@ -63,15 +63,11 @@ def resolve_qid(term: str, language='en') -> Optional[str]:
         return response_body['search'][0]['id']
     # Malformed JSON response
     except KeyError as e:
-        LOGGER.error(
-            "Missing '%s' key from JSON response: %s", e, response_body
-        )
+        LOGGER.error("Missing '%s' key from JSON response: %s", e, response_body)
         return None
     # No search results
     except IndexError:
-        LOGGER.info(
-            "No QIDs found for search term '%s' (language: %s)", term, language
-        )
+        LOGGER.info("No QIDs found for search term '%s' (language: %s)", term, language)
         return None
 
 
@@ -97,9 +93,7 @@ def get_url_blacklist() -> Optional[set]:
     try:
         star = response_body['parse']['text']['*']  # Interesting nonsense key
     except KeyError as e:
-        LOGGER.error(
-            "Missing '%s' key from JSON response: %s", e, response_body
-        )
+        LOGGER.error("Missing '%s' key from JSON response: %s", e, response_body)
         return None
 
     # The parsed page should be a <div> element
@@ -181,9 +175,7 @@ def get_links(
             claims = entity.get('claims')
             if claims:
                 # Third-party links
-                yield _yield_expected_values(
-                    qid, claims, url_pids, no_links_count
-                )
+                yield _yield_expected_values(qid, claims, url_pids, no_links_count)
 
                 # External ID links
                 yield _yield_ext_id_links(
@@ -308,9 +300,7 @@ def build_session() -> requests.Session:
     :rtype: :py:class:`requests.Session`
     :return: the HTTP session to interact with the Wikidata API
     """
-    session_dump_path = os.path.join(
-        constants.WORK_DIR, constants.WIKIDATA_API_SESSION
-    )
+    session_dump_path = os.path.join(constants.WORK_DIR, constants.WIKIDATA_API_SESSION)
 
     try:
         return _load_cached_session(session_dump_path)
@@ -360,9 +350,7 @@ def build_session() -> requests.Session:
         return session
 
 
-def parse_value(
-    value: Union[str, Dict]
-) -> Union[str, Tuple[str, str], Set[str], None]:
+def parse_value(value: Union[str, Dict]) -> Union[str, Tuple[str, str], Set[str], None]:
     """Parse a value returned by the Wikidata API into standard Python objects.
 
     The parser supports the following Wikidata
@@ -439,8 +427,7 @@ def _lookup_label(item_value):
     entity = entities.get(item_value)
     if entity is None:
         LOGGER.warning(
-            "Skipping unexpected JSON response with no %s "
-            "in the 'entities' key",
+            "Skipping unexpected JSON response with no %s " "in the 'entities' key",
             item_value,
         )
         return None
@@ -544,9 +531,7 @@ def _process_bucket(
         processed[keys.URL] = list(processed[keys.URL])
 
         # Expected claims
-        processed.update(
-            _return_claims_for_linker(qid, claims, needs, counters)
-        )
+        processed.update(_return_claims_for_linker(qid, claims, needs, counters))
 
         result.append(processed)
 
@@ -612,9 +597,7 @@ def _return_third_party_urls(qid, claims, url_pids, counters):
     available = url_pids.intersection(claims.keys())
 
     if available:
-        LOGGER.debug(
-            'Available third-party URL PIDs for %s: %s', qid, available
-        )
+        LOGGER.debug('Available third-party URL PIDs for %s: %s', qid, available)
         for pid in available:
             for pid_claim in claims[pid]:
                 value = _extract_value_from_claim(pid_claim, pid, qid)
@@ -747,9 +730,7 @@ def _yield_sitelinks(entity, qid, no_sitelinks_count):
 
 
 def _yield_ext_id_links(ext_id_pids_to_urls, claims, qid, no_ext_ids_count):
-    available_ext_id_pids = set(ext_id_pids_to_urls.keys()).intersection(
-        claims.keys()
-    )
+    available_ext_id_pids = set(ext_id_pids_to_urls.keys()).intersection(claims.keys())
 
     if not available_ext_id_pids:
         LOGGER.debug('No external identifier links for %s', qid)
@@ -771,9 +752,7 @@ def _yield_ext_id_links(ext_id_pids_to_urls, claims, qid, no_ext_ids_count):
                     yield qid, formatter_url.replace('$1', ext_id)
 
 
-def _yield_expected_values(
-    qid, claims, expected_pids, count, include_pid=False
-):
+def _yield_expected_values(qid, claims, expected_pids, count, include_pid=False):
     available = expected_pids.intersection(claims.keys())
 
     if not available:
@@ -947,16 +926,12 @@ def _extract_value_from_claim(pid_claim, pid, qid):
     LOGGER.debug('Processing (%s, %s) claim: %s', qid, pid, pid_claim)
     main_snak = pid_claim.get('mainsnak')
     if not main_snak:
-        LOGGER.warning(
-            'Skipping malformed (%s, %s) claim with no main snak', qid, pid
-        )
+        LOGGER.warning('Skipping malformed (%s, %s) claim with no main snak', qid, pid)
         LOGGER.debug('Malformed claim: %s', pid_claim)
         return None
     snak_type = main_snak.get('snaktype')
     if not snak_type:
-        LOGGER.warning(
-            'Skipping malformed (%s, %s) claim with no snak type', qid, pid
-        )
+        LOGGER.warning('Skipping malformed (%s, %s) claim with no snak type', qid, pid)
         LOGGER.debug('Malformed claim: %s', pid_claim)
         return None
     if snak_type == 'novalue':
@@ -976,9 +951,7 @@ def _extract_value_from_claim(pid_claim, pid, qid):
         return None
     value = data_value.get('value')
     if not value:
-        LOGGER.warning(
-            'Skipping malformed (%s, %s) claim with no value', qid, pid
-        )
+        LOGGER.warning('Skipping malformed (%s, %s) claim with no value', qid, pid)
         LOGGER.debug('Malformed claim: %s', pid_claim)
         return None
     LOGGER.debug('QID: %s - PID: %s - Value: %s', qid, pid, value)
diff --git a/soweego/wikidata/sparql_queries.py b/soweego/wikidata/sparql_queries.py
index 5a5382cd..c369de83 100755
--- a/soweego/wikidata/sparql_queries.py
+++ b/soweego/wikidata/sparql_queries.py
@@ -142,8 +142,7 @@ def external_id_pids_and_urls() -> Iterator[Dict]:
         formatter_url_dict = result.get(FORMATTER_URL_BINDING.lstrip('?'))
         if not formatter_url_dict:
             LOGGER.warning(
-                'Skipping malformed query result: '
-                'no formatter URL binding in %s',
+                'Skipping malformed query result: ' 'no formatter URL binding in %s',
                 result,
             )
             continue
@@ -252,11 +251,9 @@ def run_query(
     # Items & identifiers
     if what == keys.IDENTIFIER:
         query = (
-            IDENTIFIER_TEMPLATE
-            % (vocabulary.INSTANCE_OF, class_qid, catalog_pid)
+            IDENTIFIER_TEMPLATE % (vocabulary.INSTANCE_OF, class_qid, catalog_pid)
             if how == keys.CLASS_QUERY
-            else IDENTIFIER_TEMPLATE
-            % (vocabulary.OCCUPATION, class_qid, catalog_pid)
+            else IDENTIFIER_TEMPLATE % (vocabulary.OCCUPATION, class_qid, catalog_pid)
         )
         return _parse_query_result(
             keys.IDENTIFIER, _run_paged_query(result_per_page, query)
@@ -267,12 +264,9 @@ def run_query(
         query = (
             LINKS_TEMPLATE % (vocabulary.INSTANCE_OF, class_qid, catalog_pid)
             if how == keys.CLASS_QUERY
-            else LINKS_TEMPLATE
-            % (vocabulary.OCCUPATION, class_qid, catalog_pid)
-        )
-        return _parse_query_result(
-            keys.LINKS, _run_paged_query(result_per_page, query)
+            else LINKS_TEMPLATE % (vocabulary.OCCUPATION, class_qid, catalog_pid)
         )
+        return _parse_query_result(keys.LINKS, _run_paged_query(result_per_page, query))
 
     # Items without identifiers (for classification purposes)
     if what == keys.DATASET:
@@ -437,8 +431,7 @@ def _make_request(query, response_format=DEFAULT_RESPONSE_FORMAT):
         # Random value between 0 and 1
         wait_time = random()
         LOGGER.warning(
-            'Exceeded concurrent queries limit, '
-            'will retry after %f seconds ...',
+            'Exceeded concurrent queries limit, ' 'will retry after %f seconds ...',
             wait_time,
         )
 
@@ -521,9 +514,7 @@ def _run_paged_query(result_per_page, query):
             result_set = _make_request(' '.join(query_builder))
 
             if not result_set:
-                LOGGER.error(
-                    'Skipping page %d because the query went wrong', pages
-                )
+                LOGGER.error('Skipping page %d because the query went wrong', pages)
                 pages += 1
                 continue