ci: Fix all ruff pyflakes errors except unused imports (#5820)

* ci: Fix all ruff pyflakes errors except unused imports * Delete releasenotes/notes/fix-some-pyflakes-errors-69a1106efa5d0203.yaml
deepset-ai · Sep 15, 2023 · 1bc03dd · 1bc03dd
1 parent 8af0d81
commit 1bc03dd
Show file tree

Hide file tree

Showing 23 changed files with 135 additions and 136 deletions.
diff --git a/haystack/modeling/__init__.py b/haystack/modeling/__init__.py
@@ -4,7 +4,7 @@
 
 try:
  import torch
-except (ModuleNotFoundError, ImportError) as iexc:
+except (ModuleNotFoundError, ImportError):
  raise ImportError(
  "torch not installed, haystack.modeling won't work. Run 'pip install transformers[torch]' to fix this problem."
  )
diff --git a/pyproject.toml b/pyproject.toml
@@ -386,6 +386,7 @@ select = [
  "DJ", # flake8-django
  "E501", # Long lines
  "EXE", # flake8-executable
+ "F", # Pyflakes
  "FURB", # refurb
  "INT", # flake8-gettext
  "PL", # Pylint
@@ -395,7 +396,6 @@ select = [
  "W", # pycodestyle
  "YTT", # flake8-2020
  # "E", # pycodestyle
- # "F", # Pyflakes
  # "NPY", # NumPy-specific rules
  # "PD", # pandas-vet
  # "PERF", # Perflint
@@ -405,6 +405,7 @@ select = [
 line-length = 1486
 target-version = "py38"
 ignore = [
+ "F401", # unused-import
  "PLR1714", # repeated-equality-comparison
  "PLR5501", # collapsible-else-if
  "PLW0603", # global-statement
@@ -415,6 +416,10 @@ ignore = [
 [tool.ruff.mccabe]
 max-complexity = 28
 
+[tool.ruff.per-file-ignores]
+"haystack/preview/testing/document_store.py" = ["F821"]
+"haystack/telemetry.py" = ["F821"]
+
 [tool.ruff.pylint]
 allow-magic-value-types = ["float", "int", "str"]
 max-args = 38 # Default is 5

diff --git a/rest_api/rest_api/__about__.py b/rest_api/rest_api/__about__.py
@@ -8,6 +8,6 @@
 
 try:
  __version__ = open(Path(__file__).parent.parent / "VERSION.txt", "r").read()
-except Exception as e:
+except Exception:
  logger = logging.getLogger(__name__)
  logger.exception("No VERSION.txt found!")
diff --git a/test/benchmarks/datadog/metric_handler.py b/test/benchmarks/datadog/metric_handler.py
@@ -144,7 +144,7 @@ def send_custom_dd_metrics(self, metrics: List[CustomDatadogMetric]) -> List[Dic
  try:
  response = self.send_custom_dd_metric(metric)
  responses.append(response)
- except ConnectionError as e:
+ except ConnectionError:
  LOGGER.error(
  f"Could not send custom metric even after retrying. "
  f"metric_name={metric.name}, metric_value={metric.value}"

diff --git a/test/document_stores/test_faiss.py b/test/document_stores/test_faiss.py
@@ -233,7 +233,7 @@ def test_passing_index_from_outside(self, documents_with_embeddings, tmp_path):
  faiss_index.set_direct_map_type(faiss.DirectMap.Hashtable)
  faiss_index.nprobe = 2
  document_store = FAISSDocumentStore(
- sql_url=f"sqlite:https:///", faiss_index=faiss_index, index=index, isolation_level="AUTOCOMMIT"
+ sql_url="sqlite:https:///", faiss_index=faiss_index, index=index, isolation_level="AUTOCOMMIT"
  )
 
  document_store.delete_documents()

diff --git a/test/document_stores/test_pinecone.py b/test/document_stores/test_pinecone.py
@@ -195,12 +195,12 @@ def test_nin_filters(self, ds, documents):
 
  @pytest.mark.skip
  @pytest.mark.integration
- def test_ne_filters(self, ds, documents):
+ def test_ne_filters(self, ds, documents): # noqa: F811
  pass
 
  @pytest.mark.skip
  @pytest.mark.integration
- def test_nin_filters(self, ds, documents):
+ def test_nin_filters(self, ds, documents): # noqa: F811
  pass
 
  @pytest.mark.skip
@@ -439,7 +439,7 @@ def test_multilayer_dict(self, doc_store_with_docs: PineconeDocumentStore):
  "meta_field": "multilayer-test",
  }
  doc = Document(
- content=f"Multilayered dict", meta=multilayer_meta, embedding=np.random.rand(768).astype(np.float32)
+ content="Multilayered dict", meta=multilayer_meta, embedding=np.random.rand(768).astype(np.float32)
  )
 
  doc_store_with_docs.write_documents([doc])
@@ -466,7 +466,7 @@ def test_get_embedding_count(self, doc_store_with_docs: PineconeDocumentStore):
  We expect 1 doc with an embeddings because all documents in already written in doc_store_with_docs contain no
  embeddings.
  """
- doc = Document(content=f"Doc with embedding", embedding=np.random.rand(768).astype(np.float32))
+ doc = Document(content="Doc with embedding", embedding=np.random.rand(768).astype(np.float32))
  doc_store_with_docs.write_documents([doc])
  assert doc_store_with_docs.get_embedding_count() == 1
 
@@ -485,7 +485,7 @@ def test_get_document_count_after_write_doc_with_embedding(self, doc_store_with_
  assert doc_store_with_docs.get_document_count() == initial_document_count
 
  # document with embedding is written to doc_store_with_docs
- doc = Document(content=f"Doc with embedding", embedding=np.random.rand(768).astype(np.float32))
+ doc = Document(content="Doc with embedding", embedding=np.random.rand(768).astype(np.float32))
  doc_store_with_docs.write_documents([doc])
 
  # so we expect initial_document_count + 1 documents in total
@@ -509,7 +509,7 @@ def test_get_document_count_after_write_doc_without_embedding(self, doc_store_wi
  assert doc_store_with_docs.get_document_count() == initial_document_count
 
  # document without embedding is written to doc_store_with_docs
- doc = Document(content=f"Doc without embedding")
+ doc = Document(content="Doc without embedding")
  doc_store_with_docs.write_documents([doc])
 
  # we now expect initial_document_count + 1 documents in total
@@ -536,8 +536,8 @@ def test_get_document_count_after_delete_doc_with_embedding(self, doc_store_with
  assert doc_store_with_docs.get_document_count() == initial_document_count
 
  # two documents with embedding are written to doc_store_with_docs
- doc_1 = Document(content=f"Doc with embedding 1", embedding=np.random.rand(768).astype(np.float32))
- doc_2 = Document(content=f"Doc with embedding 2", embedding=np.random.rand(768).astype(np.float32))
+ doc_1 = Document(content="Doc with embedding 1", embedding=np.random.rand(768).astype(np.float32))
+ doc_2 = Document(content="Doc with embedding 2", embedding=np.random.rand(768).astype(np.float32))
  doc_store_with_docs.write_documents([doc_1, doc_2])
 
  # total number is initial_document_count + 2
@@ -569,8 +569,8 @@ def test_get_document_count_after_delete_doc_without_embedding(self, doc_store_w
  assert doc_store_with_docs.get_document_count() == initial_document_count
 
  # two documents without embedding are written to doc_store_with_docs
- doc_1 = Document(content=f"Doc with embedding 1", embedding=None)
- doc_2 = Document(content=f"Doc with embedding 2", embedding=None)
+ doc_1 = Document(content="Doc with embedding 1", embedding=None)
+ doc_2 = Document(content="Doc with embedding 2", embedding=None)
  doc_store_with_docs.write_documents([doc_1, doc_2])
 
  # total number is initial_document_count + 2

diff --git a/test/document_stores/test_weaviate.py b/test/document_stores/test_weaviate.py
@@ -235,14 +235,14 @@ def test_deleting_by_id_or_by_filters(self, ds, documents):
  def test_similarity_existing_index(self, similarity):
  """Testing non-matching similarity"""
  # create the document_store
- document_store = WeaviateDocumentStore(
+ WeaviateDocumentStore(
  similarity=similarity, index=f"test_similarity_existing_index_{similarity}", recreate_index=True
  )
 
  # try to connect to the same document store but using the wrong similarity
  non_matching_similarity = "l2" if similarity == "cosine" else "cosine"
  with pytest.raises(ValueError, match=r"This index already exists in Weaviate with similarity .*"):
- document_store2 = WeaviateDocumentStore(
+ WeaviateDocumentStore(
  similarity=non_matching_similarity,
  index=f"test_similarity_existing_index_{similarity}",
  recreate_index=False,

diff --git a/test/nodes/test_file_converter.py b/test/nodes/test_file_converter.py
@@ -182,7 +182,7 @@ def test_pdf_parallel_ocr(Converter, samples_path):
 @fail_at_version(1, 18)
 def test_deprecated_encoding():
  with pytest.warns(DeprecationWarning):
- converter = PDFToTextConverter(encoding="utf-8")
+ PDFToTextConverter(encoding="utf-8")
 
 
 @fail_at_version(1, 18)
@@ -195,7 +195,7 @@ def test_deprecated_encoding_in_convert_method(samples_path):
 @fail_at_version(1, 18)
 def test_deprecated_keep_physical_layout():
  with pytest.warns(DeprecationWarning):
- converter = PDFToTextConverter(keep_physical_layout=True)
+ PDFToTextConverter(keep_physical_layout=True)
 
 
 @fail_at_version(1, 18)

diff --git a/test/nodes/test_filetype_classifier.py b/test/nodes/test_filetype_classifier.py
@@ -45,24 +45,24 @@ def test_filetype_classifier_many_files_mixed_extensions(tmp_path):
 @pytest.mark.unit
 def test_filetype_classifier_unsupported_extension(tmp_path):
  node = FileTypeClassifier()
- test_file = tmp_path / f"test.really_weird_extension"
+ test_file = tmp_path / "test.really_weird_extension"
  with pytest.raises(ValueError):
  node.run(test_file)
 
 
 @pytest.mark.unit
 def test_filetype_classifier_custom_extensions(tmp_path):
  node = FileTypeClassifier(supported_types=["my_extension"])
- test_file = tmp_path / f"test.my_extension"
+ test_file = tmp_path / "test.my_extension"
  output, edge = node.run(test_file)
- assert edge == f"output_1"
+ assert edge == "output_1"
  assert output == {"file_paths": [test_file]}
 
 
 @pytest.mark.unit
 def test_filetype_classifier_duplicate_custom_extensions():
  with pytest.raises(ValueError):
- FileTypeClassifier(supported_types=[f"my_extension", "my_extension"])
+ FileTypeClassifier(supported_types=["my_extension", "my_extension"])
 
 
 @pytest.mark.unit
@@ -102,7 +102,7 @@ def test_filetype_classifier_text_files_without_extension_no_magic(monkeypatch,
  node = FileTypeClassifier(supported_types=[""])
 
  with caplog.at_level(logging.ERROR):
- node.run(samples_path / "extensionless_files" / f"pdf_file")
+ node.run(samples_path / "extensionless_files" / "pdf_file")
  assert "'python-magic' is not installed" in caplog.text
 
 
@@ -120,7 +120,7 @@ def test_filetype_classifier_media_extensions_positive(tmp_path):
 def test_filetype_classifier_media_extensions_negative(tmp_path):
  node = FileTypeClassifier(supported_types=DEFAULT_MEDIA_TYPES)
 
- test_file = tmp_path / f"test.txt"
+ test_file = tmp_path / "test.txt"
  with pytest.raises(ValueError, match="Files of type 'txt'"):
  node.run(test_file)
 
@@ -137,7 +137,7 @@ def test_filetype_classifier_estimate_media_extensions(tmp_path):
  shutil.copy(test_file, new_file_path)
 
  output, edge = node.run(new_file_path)
- assert edge == f"output_5"
+ assert edge == "output_5"
  assert output == {"file_paths": [Path(new_file_path)]}
 
 
@@ -166,5 +166,5 @@ def test_filetype_classifier_batched_same_media_extensions(tmp_path):
 
  # we should be able to pass a list of files with the same extension
  output, edge = node.run_batch(test_files)
- assert edge == f"output_1"
+ assert edge == "output_1"
  assert output == {"file_paths": test_files}
diff --git a/test/nodes/test_preprocessor.py b/test/nodes/test_preprocessor.py
@@ -528,12 +528,12 @@ def test_preprocessor_very_long_document(caplog):
  preproc = PreProcessor(
  clean_empty_lines=False, clean_header_footer=False, clean_whitespace=False, split_by=None, max_chars_check=10
  )
- documents = [Document(content=str(i) + (f"." * i)) for i in range(0, 30, 3)]
+ documents = [Document(content=str(i) + ("." * i)) for i in range(0, 30, 3)]
  results = preproc.process(documents)
  assert len(results) == 19
  assert any(d.content.startswith(".") for d in results)
  assert any(not d.content.startswith(".") for d in results)
- assert f"characters long after preprocessing, where the maximum length should be 10." in caplog.text
+ assert "characters long after preprocessing, where the maximum length should be 10." in caplog.text
 
 
 @pytest.mark.unit

diff --git a/test/nodes/test_query_classifier.py b/test/nodes/test_query_classifier.py
@@ -6,7 +6,7 @@
 @pytest.mark.unit
 def test_query_classifier_initialized_with_token_instead_of_use_auth_token():
  with patch("haystack.nodes.query_classifier.transformers.pipeline") as mock_transformers_pipeline:
- classifier = TransformersQueryClassifier(task="zero-shot-classification")
+ TransformersQueryClassifier(task="zero-shot-classification")
  assert "token" in mock_transformers_pipeline.call_args.kwargs
  assert "use_auth_token" not in mock_transformers_pipeline.call_args.kwargs
 
@@ -75,7 +75,7 @@ def test_zero_shot_transformers_query_classifier_batch(zero_shot_transformers_qu
 
 def test_transformers_query_classifier_wrong_labels():
  with pytest.raises(ValueError, match="For text-classification, the provided labels must match the model labels"):
- query_classifier = TransformersQueryClassifier(
+ TransformersQueryClassifier(
  model_name_or_path="shahrukhx01/bert-mini-finetune-question-detection",
  use_gpu=False,
  task="text-classification",
@@ -85,7 +85,7 @@ def test_transformers_query_classifier_wrong_labels():
 
 def test_transformers_query_classifier_no_labels():
  with pytest.raises(ValueError, match="The labels must be provided"):
- query_classifier = TransformersQueryClassifier(
+ TransformersQueryClassifier(
  model_name_or_path="shahrukhx01/bert-mini-finetune-question-detection",
  use_gpu=False,
  task="text-classification",
@@ -95,7 +95,7 @@ def test_transformers_query_classifier_no_labels():
 
 def test_transformers_query_classifier_unsupported_task():
  with pytest.raises(ValueError, match="Task not supported"):
- query_classifier = TransformersQueryClassifier(
+ TransformersQueryClassifier(
  model_name_or_path="shahrukhx01/bert-mini-finetune-question-detection",
  use_gpu=False,
  task="summarization",

diff --git a/test/nodes/test_reader.py b/test/nodes/test_reader.py
@@ -167,7 +167,7 @@ def test_deduplication_for_overlapping_documents(reader):
 def test_model_download_options():
  # download disabled and model is not cached locally
  with pytest.raises(OSError):
- impossible_reader = FARMReader("mfeb/albert-xxlarge-v2-squad2", local_files_only=True, num_processes=0)
+ FARMReader("mfeb/albert-xxlarge-v2-squad2", local_files_only=True, num_processes=0)
 
 
 @pytest.mark.integration
@@ -226,17 +226,15 @@ def test_top_k(reader, docs, top_k):
 def test_farm_reader_invalid_params():
  # invalid max_seq_len (greater than model maximum seq length)
  with pytest.raises(Exception):
- reader = FARMReader(model_name_or_path="deepset/tinyroberta-squad2", use_gpu=False, max_seq_len=513)
+ FARMReader(model_name_or_path="deepset/tinyroberta-squad2", use_gpu=False, max_seq_len=513)
 
  # invalid max_seq_len (max_seq_len >= doc_stride)
  with pytest.raises(Exception):
- reader = FARMReader(
- model_name_or_path="deepset/tinyroberta-squad2", use_gpu=False, max_seq_len=129, doc_stride=128
- )
+ FARMReader(model_name_or_path="deepset/tinyroberta-squad2", use_gpu=False, max_seq_len=129, doc_stride=128)
 
  # invalid doc_stride (doc_stride >= (max_seq_len - max_query_length))
  with pytest.raises(Exception):
- reader = FARMReader(model_name_or_path="deepset/tinyroberta-squad2", use_gpu=False, doc_stride=999)
+ FARMReader(model_name_or_path="deepset/tinyroberta-squad2", use_gpu=False, doc_stride=999)
 
 
 def test_farm_reader_update_params(docs):