Skip to content

Commit

Permalink
ci: Fix all ruff pyflakes errors except unused imports (#5820)
Browse files Browse the repository at this point in the history
* ci: Fix all ruff pyflakes errors except unused imports

* Delete releasenotes/notes/fix-some-pyflakes-errors-69a1106efa5d0203.yaml
  • Loading branch information
cclauss committed Sep 15, 2023
1 parent 8af0d81 commit 1bc03dd
Show file tree
Hide file tree
Showing 23 changed files with 135 additions and 136 deletions.
2 changes: 1 addition & 1 deletion haystack/modeling/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

try:
import torch
except (ModuleNotFoundError, ImportError) as iexc:
except (ModuleNotFoundError, ImportError):
raise ImportError(
"torch not installed, haystack.modeling won't work. Run 'pip install transformers[torch]' to fix this problem."
)
7 changes: 6 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,7 @@ select = [
"DJ", # flake8-django
"E501", # Long lines
"EXE", # flake8-executable
"F", # Pyflakes
"FURB", # refurb
"INT", # flake8-gettext
"PL", # Pylint
Expand All @@ -395,7 +396,6 @@ select = [
"W", # pycodestyle
"YTT", # flake8-2020
# "E", # pycodestyle
# "F", # Pyflakes
# "NPY", # NumPy-specific rules
# "PD", # pandas-vet
# "PERF", # Perflint
Expand All @@ -405,6 +405,7 @@ select = [
line-length = 1486
target-version = "py38"
ignore = [
"F401", # unused-import
"PLR1714", # repeated-equality-comparison
"PLR5501", # collapsible-else-if
"PLW0603", # global-statement
Expand All @@ -415,6 +416,10 @@ ignore = [
[tool.ruff.mccabe]
max-complexity = 28

[tool.ruff.per-file-ignores]
"haystack/preview/testing/document_store.py" = ["F821"]
"haystack/telemetry.py" = ["F821"]

[tool.ruff.pylint]
allow-magic-value-types = ["float", "int", "str"]
max-args = 38 # Default is 5
Expand Down
2 changes: 1 addition & 1 deletion rest_api/rest_api/__about__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@

try:
__version__ = open(Path(__file__).parent.parent / "VERSION.txt", "r").read()
except Exception as e:
except Exception:
logger = logging.getLogger(__name__)
logger.exception("No VERSION.txt found!")
2 changes: 1 addition & 1 deletion test/benchmarks/datadog/metric_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def send_custom_dd_metrics(self, metrics: List[CustomDatadogMetric]) -> List[Dic
try:
response = self.send_custom_dd_metric(metric)
responses.append(response)
except ConnectionError as e:
except ConnectionError:
LOGGER.error(
f"Could not send custom metric even after retrying. "
f"metric_name={metric.name}, metric_value={metric.value}"
Expand Down
2 changes: 1 addition & 1 deletion test/document_stores/test_faiss.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ def test_passing_index_from_outside(self, documents_with_embeddings, tmp_path):
faiss_index.set_direct_map_type(faiss.DirectMap.Hashtable)
faiss_index.nprobe = 2
document_store = FAISSDocumentStore(
sql_url=f"sqlite:https:///", faiss_index=faiss_index, index=index, isolation_level="AUTOCOMMIT"
sql_url="sqlite:https:///", faiss_index=faiss_index, index=index, isolation_level="AUTOCOMMIT"
)

document_store.delete_documents()
Expand Down
20 changes: 10 additions & 10 deletions test/document_stores/test_pinecone.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,12 +195,12 @@ def test_nin_filters(self, ds, documents):

@pytest.mark.skip
@pytest.mark.integration
def test_ne_filters(self, ds, documents):
def test_ne_filters(self, ds, documents): # noqa: F811
pass

@pytest.mark.skip
@pytest.mark.integration
def test_nin_filters(self, ds, documents):
def test_nin_filters(self, ds, documents): # noqa: F811
pass

@pytest.mark.skip
Expand Down Expand Up @@ -439,7 +439,7 @@ def test_multilayer_dict(self, doc_store_with_docs: PineconeDocumentStore):
"meta_field": "multilayer-test",
}
doc = Document(
content=f"Multilayered dict", meta=multilayer_meta, embedding=np.random.rand(768).astype(np.float32)
content="Multilayered dict", meta=multilayer_meta, embedding=np.random.rand(768).astype(np.float32)
)

doc_store_with_docs.write_documents([doc])
Expand All @@ -466,7 +466,7 @@ def test_get_embedding_count(self, doc_store_with_docs: PineconeDocumentStore):
We expect 1 doc with an embeddings because all documents in already written in doc_store_with_docs contain no
embeddings.
"""
doc = Document(content=f"Doc with embedding", embedding=np.random.rand(768).astype(np.float32))
doc = Document(content="Doc with embedding", embedding=np.random.rand(768).astype(np.float32))
doc_store_with_docs.write_documents([doc])
assert doc_store_with_docs.get_embedding_count() == 1

Expand All @@ -485,7 +485,7 @@ def test_get_document_count_after_write_doc_with_embedding(self, doc_store_with_
assert doc_store_with_docs.get_document_count() == initial_document_count

# document with embedding is written to doc_store_with_docs
doc = Document(content=f"Doc with embedding", embedding=np.random.rand(768).astype(np.float32))
doc = Document(content="Doc with embedding", embedding=np.random.rand(768).astype(np.float32))
doc_store_with_docs.write_documents([doc])

# so we expect initial_document_count + 1 documents in total
Expand All @@ -509,7 +509,7 @@ def test_get_document_count_after_write_doc_without_embedding(self, doc_store_wi
assert doc_store_with_docs.get_document_count() == initial_document_count

# document without embedding is written to doc_store_with_docs
doc = Document(content=f"Doc without embedding")
doc = Document(content="Doc without embedding")
doc_store_with_docs.write_documents([doc])

# we now expect initial_document_count + 1 documents in total
Expand All @@ -536,8 +536,8 @@ def test_get_document_count_after_delete_doc_with_embedding(self, doc_store_with
assert doc_store_with_docs.get_document_count() == initial_document_count

# two documents with embedding are written to doc_store_with_docs
doc_1 = Document(content=f"Doc with embedding 1", embedding=np.random.rand(768).astype(np.float32))
doc_2 = Document(content=f"Doc with embedding 2", embedding=np.random.rand(768).astype(np.float32))
doc_1 = Document(content="Doc with embedding 1", embedding=np.random.rand(768).astype(np.float32))
doc_2 = Document(content="Doc with embedding 2", embedding=np.random.rand(768).astype(np.float32))
doc_store_with_docs.write_documents([doc_1, doc_2])

# total number is initial_document_count + 2
Expand Down Expand Up @@ -569,8 +569,8 @@ def test_get_document_count_after_delete_doc_without_embedding(self, doc_store_w
assert doc_store_with_docs.get_document_count() == initial_document_count

# two documents without embedding are written to doc_store_with_docs
doc_1 = Document(content=f"Doc with embedding 1", embedding=None)
doc_2 = Document(content=f"Doc with embedding 2", embedding=None)
doc_1 = Document(content="Doc with embedding 1", embedding=None)
doc_2 = Document(content="Doc with embedding 2", embedding=None)
doc_store_with_docs.write_documents([doc_1, doc_2])

# total number is initial_document_count + 2
Expand Down
4 changes: 2 additions & 2 deletions test/document_stores/test_weaviate.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,14 +235,14 @@ def test_deleting_by_id_or_by_filters(self, ds, documents):
def test_similarity_existing_index(self, similarity):
"""Testing non-matching similarity"""
# create the document_store
document_store = WeaviateDocumentStore(
WeaviateDocumentStore(
similarity=similarity, index=f"test_similarity_existing_index_{similarity}", recreate_index=True
)

# try to connect to the same document store but using the wrong similarity
non_matching_similarity = "l2" if similarity == "cosine" else "cosine"
with pytest.raises(ValueError, match=r"This index already exists in Weaviate with similarity .*"):
document_store2 = WeaviateDocumentStore(
WeaviateDocumentStore(
similarity=non_matching_similarity,
index=f"test_similarity_existing_index_{similarity}",
recreate_index=False,
Expand Down
4 changes: 2 additions & 2 deletions test/nodes/test_file_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ def test_pdf_parallel_ocr(Converter, samples_path):
@fail_at_version(1, 18)
def test_deprecated_encoding():
with pytest.warns(DeprecationWarning):
converter = PDFToTextConverter(encoding="utf-8")
PDFToTextConverter(encoding="utf-8")


@fail_at_version(1, 18)
Expand All @@ -195,7 +195,7 @@ def test_deprecated_encoding_in_convert_method(samples_path):
@fail_at_version(1, 18)
def test_deprecated_keep_physical_layout():
with pytest.warns(DeprecationWarning):
converter = PDFToTextConverter(keep_physical_layout=True)
PDFToTextConverter(keep_physical_layout=True)


@fail_at_version(1, 18)
Expand Down
16 changes: 8 additions & 8 deletions test/nodes/test_filetype_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,24 +45,24 @@ def test_filetype_classifier_many_files_mixed_extensions(tmp_path):
@pytest.mark.unit
def test_filetype_classifier_unsupported_extension(tmp_path):
node = FileTypeClassifier()
test_file = tmp_path / f"test.really_weird_extension"
test_file = tmp_path / "test.really_weird_extension"
with pytest.raises(ValueError):
node.run(test_file)


@pytest.mark.unit
def test_filetype_classifier_custom_extensions(tmp_path):
node = FileTypeClassifier(supported_types=["my_extension"])
test_file = tmp_path / f"test.my_extension"
test_file = tmp_path / "test.my_extension"
output, edge = node.run(test_file)
assert edge == f"output_1"
assert edge == "output_1"
assert output == {"file_paths": [test_file]}


@pytest.mark.unit
def test_filetype_classifier_duplicate_custom_extensions():
with pytest.raises(ValueError):
FileTypeClassifier(supported_types=[f"my_extension", "my_extension"])
FileTypeClassifier(supported_types=["my_extension", "my_extension"])


@pytest.mark.unit
Expand Down Expand Up @@ -102,7 +102,7 @@ def test_filetype_classifier_text_files_without_extension_no_magic(monkeypatch,
node = FileTypeClassifier(supported_types=[""])

with caplog.at_level(logging.ERROR):
node.run(samples_path / "extensionless_files" / f"pdf_file")
node.run(samples_path / "extensionless_files" / "pdf_file")
assert "'python-magic' is not installed" in caplog.text


Expand All @@ -120,7 +120,7 @@ def test_filetype_classifier_media_extensions_positive(tmp_path):
def test_filetype_classifier_media_extensions_negative(tmp_path):
node = FileTypeClassifier(supported_types=DEFAULT_MEDIA_TYPES)

test_file = tmp_path / f"test.txt"
test_file = tmp_path / "test.txt"
with pytest.raises(ValueError, match="Files of type 'txt'"):
node.run(test_file)

Expand All @@ -137,7 +137,7 @@ def test_filetype_classifier_estimate_media_extensions(tmp_path):
shutil.copy(test_file, new_file_path)

output, edge = node.run(new_file_path)
assert edge == f"output_5"
assert edge == "output_5"
assert output == {"file_paths": [Path(new_file_path)]}


Expand Down Expand Up @@ -166,5 +166,5 @@ def test_filetype_classifier_batched_same_media_extensions(tmp_path):

# we should be able to pass a list of files with the same extension
output, edge = node.run_batch(test_files)
assert edge == f"output_1"
assert edge == "output_1"
assert output == {"file_paths": test_files}
4 changes: 2 additions & 2 deletions test/nodes/test_preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,12 +528,12 @@ def test_preprocessor_very_long_document(caplog):
preproc = PreProcessor(
clean_empty_lines=False, clean_header_footer=False, clean_whitespace=False, split_by=None, max_chars_check=10
)
documents = [Document(content=str(i) + (f"." * i)) for i in range(0, 30, 3)]
documents = [Document(content=str(i) + ("." * i)) for i in range(0, 30, 3)]
results = preproc.process(documents)
assert len(results) == 19
assert any(d.content.startswith(".") for d in results)
assert any(not d.content.startswith(".") for d in results)
assert f"characters long after preprocessing, where the maximum length should be 10." in caplog.text
assert "characters long after preprocessing, where the maximum length should be 10." in caplog.text


@pytest.mark.unit
Expand Down
8 changes: 4 additions & 4 deletions test/nodes/test_query_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
@pytest.mark.unit
def test_query_classifier_initialized_with_token_instead_of_use_auth_token():
with patch("haystack.nodes.query_classifier.transformers.pipeline") as mock_transformers_pipeline:
classifier = TransformersQueryClassifier(task="zero-shot-classification")
TransformersQueryClassifier(task="zero-shot-classification")
assert "token" in mock_transformers_pipeline.call_args.kwargs
assert "use_auth_token" not in mock_transformers_pipeline.call_args.kwargs

Expand Down Expand Up @@ -75,7 +75,7 @@ def test_zero_shot_transformers_query_classifier_batch(zero_shot_transformers_qu

def test_transformers_query_classifier_wrong_labels():
with pytest.raises(ValueError, match="For text-classification, the provided labels must match the model labels"):
query_classifier = TransformersQueryClassifier(
TransformersQueryClassifier(
model_name_or_path="shahrukhx01/bert-mini-finetune-question-detection",
use_gpu=False,
task="text-classification",
Expand All @@ -85,7 +85,7 @@ def test_transformers_query_classifier_wrong_labels():

def test_transformers_query_classifier_no_labels():
with pytest.raises(ValueError, match="The labels must be provided"):
query_classifier = TransformersQueryClassifier(
TransformersQueryClassifier(
model_name_or_path="shahrukhx01/bert-mini-finetune-question-detection",
use_gpu=False,
task="text-classification",
Expand All @@ -95,7 +95,7 @@ def test_transformers_query_classifier_no_labels():

def test_transformers_query_classifier_unsupported_task():
with pytest.raises(ValueError, match="Task not supported"):
query_classifier = TransformersQueryClassifier(
TransformersQueryClassifier(
model_name_or_path="shahrukhx01/bert-mini-finetune-question-detection",
use_gpu=False,
task="summarization",
Expand Down
10 changes: 4 additions & 6 deletions test/nodes/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def test_deduplication_for_overlapping_documents(reader):
def test_model_download_options():
# download disabled and model is not cached locally
with pytest.raises(OSError):
impossible_reader = FARMReader("mfeb/albert-xxlarge-v2-squad2", local_files_only=True, num_processes=0)
FARMReader("mfeb/albert-xxlarge-v2-squad2", local_files_only=True, num_processes=0)


@pytest.mark.integration
Expand Down Expand Up @@ -226,17 +226,15 @@ def test_top_k(reader, docs, top_k):
def test_farm_reader_invalid_params():
# invalid max_seq_len (greater than model maximum seq length)
with pytest.raises(Exception):
reader = FARMReader(model_name_or_path="deepset/tinyroberta-squad2", use_gpu=False, max_seq_len=513)
FARMReader(model_name_or_path="deepset/tinyroberta-squad2", use_gpu=False, max_seq_len=513)

# invalid max_seq_len (max_seq_len >= doc_stride)
with pytest.raises(Exception):
reader = FARMReader(
model_name_or_path="deepset/tinyroberta-squad2", use_gpu=False, max_seq_len=129, doc_stride=128
)
FARMReader(model_name_or_path="deepset/tinyroberta-squad2", use_gpu=False, max_seq_len=129, doc_stride=128)

# invalid doc_stride (doc_stride >= (max_seq_len - max_query_length))
with pytest.raises(Exception):
reader = FARMReader(model_name_or_path="deepset/tinyroberta-squad2", use_gpu=False, doc_stride=999)
FARMReader(model_name_or_path="deepset/tinyroberta-squad2", use_gpu=False, doc_stride=999)


def test_farm_reader_update_params(docs):
Expand Down
Loading

0 comments on commit 1bc03dd

Please sign in to comment.