Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature: build Mango indexes from dynamic expressions #3912

Open
wants to merge 6 commits into
base: 3.x
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Prev Previous commit
Next Next commit
Allow multiple jq expressions in index definitions
If have a document like:

    {
      "foo": "a b",
      "bar": "x y"
    }

And an index definition like:

    [
      { "foo_words": { "$jq": '.foo | split(" ") | .[]' } },
      { "bar_words": { "$jq": '.bar | split(" ") | .[]' } }
    ]

Then this should produce four index keys for the document:

    ("a", "x")
    ("a", "y")
    ("b", "x")
    ("b", "y")

This lets us query on multiple virtual fields in a single query. The
implementation here allows jq expressions (that return multiple values)
to be mixed with normal field access that returns a single value;
`flatten_keys/1` returns the product of any multi-valued index fields.
For example, above `foo_words` produces values `["a", "b"]` and
`bar_words` produces `["x", "y"]`, and we multiply this out giving the
four keys above.
  • Loading branch information
jcoglan committed Jan 25, 2022
commit 1852c314606978b9c3eb098742b1de2056d98ddf
4 changes: 3 additions & 1 deletion src/mango/src/mango_cursor_view.erl
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,8 @@ choose_best_index(_DbName, IndexRanges) ->
{SelectedIndex, SelectedIndexRanges, _} = hd(lists:sort(Cmp, IndexRanges)),
{SelectedIndex, SelectedIndexRanges}.

add_virtual_field({Props}, {[{<<"$and">>, [{[{K1, _}]}, {[{K2, _}]}]}]}, [V1, V2]) ->
{lists:append(Props, [{K1, V1}, {K2, V2}])};
add_virtual_field({Props}, {[{Key, _}]}, [Value]) ->
{lists:append(Props, [{Key, Value}])};
add_virtual_field(Props, _, _) ->
Expand Down Expand Up @@ -265,7 +267,7 @@ view_cb({row, Row}, #mrargs{extra = Options} = Acc) ->
put(mango_docs_examined, get(mango_docs_examined) + 1),
Selector = couch_util:get_value(selector, Options),
couch_stats:increment_counter([mango, docs_examined]),
io:format("----[view_cb selector] ~p~n", [Selector]),
io:format("----[view_cb selector] doc = ~p :: selector = ~p :: key = ~p~n", [Doc, Selector, Key]),
Doc1 = add_virtual_field(Doc, Selector, Key),
case mango_selector:match(Selector, Doc1) of
true ->
Expand Down
22 changes: 14 additions & 8 deletions src/mango/src/mango_native_proc.erl
Original file line number Diff line number Diff line change
Expand Up @@ -118,14 +118,9 @@ get_index_entries({IdxProps}, Doc) ->
false ->
[];
true ->
Values = get_index_values(Fields, Doc),
case lists:member(not_found, Values) of
true -> [];
false -> case Values of
[{jq, Values1}] -> [[[V], null] || V <- Values1];
_Else -> [[Values, null]]
end
end
Keys = flatten_keys(get_index_values(Fields, Doc)),
io:format("----[get_index_entries] ~p :: ~p~n", [Fields, Keys]),
[[Values, null] || Values <- Keys]
end.

get_index_values(Fields, Doc) ->
Expand All @@ -142,6 +137,17 @@ get_index_field_value({Field, FieldDef}, Doc) ->
Value1 -> Value1
end.

flatten_keys([not_found | _]) ->
[];
flatten_keys([{jq, Values} | Rest]) ->
Keys = flatten_keys(Rest),
[[V | K] || V <- Values, K <- Keys];
flatten_keys([First | Rest]) ->
Keys = flatten_keys(Rest),
[[First | K] || K <- Keys];
flatten_keys([]) ->
[[]].

get_text_entries({IdxProps}, Doc) ->
Selector = get_index_partial_filter_selector(IdxProps),
case should_index(Selector, Doc) of
Expand Down
37 changes: 30 additions & 7 deletions src/mango/test/22-index-function-test.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,35 @@ def setUpClass(klass):

create_index(
klass.db,
"_design/jq-split",
"jq-split-json-index",
{"bar_words": {"$jq": '.bar | split(" ") | .[]'}},
"_design/jq-split-1",
"jq-json-index",
{"f1_words": {"$jq": '.f1 | split(" ") | .[]'}},
)
klass.db.save_docs([{"_id": "example-doc", "bar": "a b c"}])

def test_index_by_length(self):
resp = self.db.find({"bar_words": "b"})
self.assertEqual([doc["_id"] for doc in resp], ["example-doc"])
create_index(
klass.db,
"_design/jq-split-2",
"jq-json-index",
{
"f2_words": {"$jq": '.f2 | split(" ") | .[]'},
"f3_words": {"$jq": '.f3 | split(" ") | .[]'},
},
)

klass.db.save_docs(
[
{"_id": "doc-1", "f1": "a b", "f2": "j k", "f3": "y z"},
{"_id": "doc-2", "f1": "b c", "f2": "k l", "f3": "x y"},
]
)

def test_search_on_one_field(self):
resp = self.db.find({"f1_words": "a"})
self.assertEqual([doc["_id"] for doc in resp], ["doc-1"])

resp = self.db.find({"f1_words": "b"})
self.assertEqual([doc["_id"] for doc in resp], ["doc-1", "doc-2"])

def test_search_on_two_fields(self):
resp = self.db.find({"f2_words": "k", "f3_words": "x"})
self.assertEqual([doc["_id"] for doc in resp], ["doc-2"])