Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature: build Mango indexes from dynamic expressions #3912

Open
wants to merge 6 commits into
base: 3.x
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Next Next commit
Index on a "virtual" field computed from normal fields
This puts in just enough machinery to support indexing on a function of
a document's content inside Mango, without going through the JS engine.
Say we have a document like:

    {
      "bar": "a b c"
    }

If we want to index on the individual "words" of `bar`, i.e. find this
doc via the keys "a", "b" or "c", we'd normally need a JS map function:

    function (doc) {
      for (let word of doc.bar.split(" ")) {
        emit(word)
      }
    }

This patch lets us do this inside Mango by defining an index on a
"virtual" field whose value is a function of the doc's other fields. We
put a view containing this in a design doc:

    "map": {
      "fields": {
        "bar_words": {
          "$explode": { "$field": "bar", "$separator": " " }
        }
      }
    }

And this lets us perform `_find` queries for e.g. { "bar_words": "b" }
to get our original document.

As this is a proof of concept designed to get the index machinery
working, `$explode` is the only function defined.
  • Loading branch information
janl authored and jcoglan committed Jan 25, 2022
commit 661cda304d996e48206265cffd7ea860d7fdbbbf
15 changes: 12 additions & 3 deletions src/mango/src/mango_cursor_view.erl
Original file line number Diff line number Diff line change
Expand Up @@ -236,16 +236,22 @@ choose_best_index(_DbName, IndexRanges) ->
{SelectedIndex, SelectedIndexRanges, _} = hd(lists:sort(Cmp, IndexRanges)),
{SelectedIndex, SelectedIndexRanges}.

add_virtual_field({Props}, {[{Key, _}]}, [Value]) ->
{lists:append(Props, [{Key, Value}])};
add_virtual_field(Props, _, _) ->
Props.

view_cb({meta, Meta}, Acc) ->
% Map function starting
put(mango_docs_examined, 0),
set_mango_msg_timestamp(),
ok = rexi:stream2({meta, Meta}),
{ok, Acc};
view_cb({row, Row}, #mrargs{extra = Options} = Acc) ->
Key = couch_util:get_value(key, Row),
ViewRow = #view_row{
id = couch_util:get_value(id, Row),
key = couch_util:get_value(key, Row),
key = Key,
doc = couch_util:get_value(doc, Row)
},
case ViewRow#view_row.doc of
Expand All @@ -259,9 +265,12 @@ view_cb({row, Row}, #mrargs{extra = Options} = Acc) ->
put(mango_docs_examined, get(mango_docs_examined) + 1),
Selector = couch_util:get_value(selector, Options),
couch_stats:increment_counter([mango, docs_examined]),
case mango_selector:match(Selector, Doc) of
io:format("----[view_cb selector] ~p~n", [Selector]),
Doc1 = add_virtual_field(Doc, Selector, Key),
case mango_selector:match(Selector, Doc1) of
true ->
ok = rexi:stream2(ViewRow),
ViewRow1 = ViewRow#view_row{doc = Doc1},
ok = rexi:stream2(ViewRow1),
set_mango_msg_timestamp();
false ->
maybe_send_mango_ping()
Expand Down
15 changes: 15 additions & 0 deletions src/mango/src/mango_doc.erl
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

get_field/2,
get_field/3,
get_field_fun/2,
rem_field/2,
set_field/3
]).
Expand Down Expand Up @@ -415,6 +416,20 @@ get_field(Values, [Name | Rest], Validator) when is_list(Values) ->
get_field(_, [_ | _], _) ->
bad_path.

get_field_fun(Props, MangoFun) ->
{FunName, {Args}} = MangoFun,
case FunName of
<<"$explode">> -> handle_explode(Props, Args);
_ -> bad_path
end.

handle_explode({Doc}, Args) ->
FieldName = proplists:get_value(<<"$field">>, Args),
Separator = proplists:get_value(<<"$separator">>, Args),
{_, FieldValue} = lists:keyfind(FieldName, 1, Doc),
R = string:split(FieldValue, Separator, all),
{fn, R}.

rem_field(Props, Field) when is_binary(Field) ->
{ok, Path} = mango_util:parse_field(Field),
rem_field(Props, Path);
Expand Down
18 changes: 16 additions & 2 deletions src/mango/src/mango_native_proc.erl
Original file line number Diff line number Diff line change
Expand Up @@ -121,13 +121,27 @@ get_index_entries({IdxProps}, Doc) ->
Values = get_index_values(Fields, Doc),
case lists:member(not_found, Values) of
true -> [];
false -> [[Values, null]]
false -> case Values of
[{fn, Values1}] ->
io:format("----[get_index_entries] ~p~n", [Values1]),
[[[V], null] || V <- Values1];
_Else -> [[Values, null]]
end
end
end.

get_index_values(Fields, Doc) ->
io:format("----[get_index_values fields] ~p~n", [Fields]),
lists:map(
fun({Field, _Dir}) ->
fun({_Field, {[MangoFun]}}) ->
case mango_doc:get_field_fun(Doc, MangoFun) of
not_found -> not_found;
bad_path -> not_found;
Value ->
io:format("----[get_field_fun result] ~p~n", [Value]),
Value
end;
({Field, _Dir}) ->
case mango_doc:get_field(Doc, Field) of
not_found -> not_found;
bad_path -> not_found;
Expand Down
49 changes: 49 additions & 0 deletions src/mango/test/22-index-function-test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
# use this file except in compliance with the License. You may obtain a copy of
# the License at
#
# http:https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations under
# the License.

import mango


def create_index(db, ddoc_id, view_id, definition):
db.save_docs(
[
{
"_id": ddoc_id,
"language": "query",
"views": {
view_id: {
"map": {"fields": definition, "partial_filter_selector": {}},
"reduce": "_count",
"options": {"def": {"fields": definition}},
}
},
}
]
)


class IndexFunctionTests(mango.DbPerClass):
@classmethod
def setUpClass(klass):
super(IndexFunctionTests, klass).setUpClass()

create_index(
klass.db,
"_design/jq-split",
"jq-split-json-index",
{"bar_words": {"$explode": {"$field": "bar", "$separator": " "}}},
)
klass.db.save_docs([{"_id": "example-doc", "bar": "a b c"}])

def test_index_by_length(self):
resp = self.db.find({"bar_words": "b"})
self.assertEqual([doc["_id"] for doc in resp], ["example-doc"])