Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make bool values lowercase in solr query url - fixes #401 #435

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Make bool values lowercase in solr query url
  • Loading branch information
ch2ohch2oh committed Oct 14, 2023
commit 0f18d53bb97ae9582bfd9963962d88ca2aa1ea0d
31 changes: 25 additions & 6 deletions pysolr.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,9 @@ def safe_urlencode(params, doseq=0):
which can't fail down to ascii.
"""
if IS_PY3:
for key, val in params.items():
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In general, pysolr has avoided doing type coercion to avoid needing to know what type(s) Solr supports for a given field (e.g. a parameter like group is documented as accepting true but will also accept on or yes but not 1 or True) but I think this is relatively safe because Solr doesn't accept that value natively so the only case where this could cause a problem is if someone was passing a Solr string which they for some reason wanted to have processed literally — for example, if I had a Python app which used a Solr StringField for something and expected the literal value True or False because that field type isn't case-insensitive.

if isinstance(val, bool):
params[key] = str(val).lower()
return urlencode(params, doseq)

if hasattr(params, "items"):
Expand Down Expand Up @@ -300,6 +303,16 @@ def __iter__(self):
result = result._next_page_query and result._next_page_query()


def get_nested(obj, keys, default=None):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like this is only used by a debug logging call, and it could be replaced by a standard dictionary get-with-default call there.

"""Nested key lookup for a dict-like object."""
try:
for k in keys:
obj = obj[k]
return obj
except KeyError:
return default


class Solr(object):
"""
The main object for working with Solr.
Expand Down Expand Up @@ -528,7 +541,7 @@ def _update(
path_handler = handler
if self.use_qt_param:
path_handler = "select"
query_vars.append("qt=%s" % safe_urlencode(handler, True))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This appears to be unrelated to the proposed change.

query_vars.append("qt=%s" % handler)

path = "%s/" % path_handler

Expand Down Expand Up @@ -830,11 +843,17 @@ def search(self, q, search_handler=None, **kwargs):
response = self._select(params, handler=search_handler)
decoded = self.decoder.decode(response)

self.log.debug(
"Found '%s' search results.",
# cover both cases: there is no response key or value is None
(decoded.get("response", {}) or {}).get("numFound", 0),
)
if decoded.get("grouped"):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would defer support for grouped responses to a larger task which would provide more help for them - just changing a log message doesn't seem to add much value.

group_key = next(iter(decoded["grouped"]))
self.log.debug(
"Found results grouped by '%s' with %d matches",
group_key,
decoded["grouped"][group_key]["matches"],
)
else:
self.log.debug(
"Found %d docs", get_nested(decoded, ["response", "numFound"], 0)
)

cursorMark = params.get("cursorMark", None)
if cursorMark != decoded.get("nextCursorMark", cursorMark):
Expand Down
11 changes: 11 additions & 0 deletions tests/test_client.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,28 @@
# -*- coding: utf-8 -*-

from __future__ import absolute_import, unicode_literals

import datetime
import random
import time
import unittest
import json
from io import StringIO
from xml.etree import ElementTree

from pysolr import (
NESTED_DOC_KEY,
Results,
Solr,
SolrError,
clean_xml_string,
force_bytes,
force_unicode,
json,

Check failure on line 21 in tests/test_client.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (F811)

tests/test_client.py:21:5: F811 Redefinition of unused `json` from line 9
safe_urlencode,
sanitize,
unescape_html,
get_nested,
)

try:

Check failure on line 28 in tests/test_client.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (I001)

tests/test_client.py:3:1: I001 Import block is un-sorted or un-formatted
Expand Down Expand Up @@ -72,6 +74,9 @@
"test=Hello \u2603!&test=Helllo world!",
)

# Boolean options for Solr should be in lowercase.
self.assertTrue("True" not in safe_urlencode(dict(group=True)))

Check failure on line 78 in tests/test_client.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (C408)

tests/test_client.py:78:54: C408 Unnecessary `dict` call (rewrite as a literal)

def test_sanitize(self):
self.assertEqual(
sanitize(
Expand Down Expand Up @@ -101,6 +106,12 @@
def test_clean_xml_string(self):
self.assertEqual(clean_xml_string("\x00\x0b\x0d\uffff"), "\x0d")

def test_get_nested(self):
doc = {"a": {"b": {"c": 2023}}}
self.assertEqual(get_nested(doc, ["a", "e"]), None)
self.assertEqual(get_nested(doc, ["a"]), doc["a"])
self.assertEqual(get_nested(doc, ["a", "b", "c"]), doc["a"]["b"]["c"])


class ResultsTestCase(unittest.TestCase):
def test_init(self):
Expand Down
Loading