Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

On demand indexing #1260

Merged
merged 38 commits into from
Apr 8, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
2ff5acf
Store POIs on-demand
robertoaloi Mar 30, 2022
a9882c5
Store references on demand
robertoaloi Apr 1, 2022
7dd5582
Store signatures on demand
robertoaloi Apr 1, 2022
fffe202
Patches
robertoaloi Apr 1, 2022
75d30f2
Add missing module
robertoaloi Apr 1, 2022
6c20c94
Lower severity for missing config message
robertoaloi Apr 1, 2022
92c7e62
Fix end-per-testcase
robertoaloi Apr 1, 2022
055a922
Lower verbosity
robertoaloi Apr 1, 2022
8934fad
Only index specs when needed
robertoaloi Apr 4, 2022
0af544f
Prune signatures and references during re-indexing
robertoaloi Apr 4, 2022
9a478cc
Only register references when necessary
robertoaloi Apr 4, 2022
03579f9
Refactor text search
robertoaloi Apr 4, 2022
9c73c66
Handle whitespaces, actually store references
robertoaloi Apr 4, 2022
1a59fd8
Fixes
robertoaloi Apr 4, 2022
afecb31
Fix Dialyzer specs
robertoaloi Apr 5, 2022
056aa61
In memory grep
robertoaloi Apr 5, 2022
5ba84b8
Do not rely on order
robertoaloi Apr 5, 2022
ff720ce
Refactoring and fixes
robertoaloi Apr 5, 2022
de7f47a
One buffer per document
robertoaloi Apr 6, 2022
99ac925
Resolve race in index buffer
robertoaloi Apr 6, 2022
b9c83d2
Lower verbosity of logs in case of shutdown
robertoaloi Apr 6, 2022
fc9c456
Set reference to undefined
robertoaloi Apr 6, 2022
cec3040
Show edits in logs
robertoaloi Apr 7, 2022
d136370
Make incremental indexing configurable
robertoaloi Apr 7, 2022
a003d31
Clarify indexing logs
robertoaloi Apr 7, 2022
dc94b07
Show background job title on error
robertoaloi Apr 7, 2022
41ca00b
Revert configuration of Incremental
robertoaloi Apr 7, 2022
413b5dd
Basic words scanner
robertoaloi Apr 7, 2022
5416d7a
Lower verbosity
robertoaloi Apr 7, 2022
d98eb18
Real devs write match specs by hand. Not.
robertoaloi Apr 7, 2022
d59306d
Explicitly store buffers
robertoaloi Apr 7, 2022
8d34dfa
Fixes
robertoaloi Apr 7, 2022
3971114
Debug failing testcase in OTP 22
robertoaloi Apr 7, 2022
bbcb689
Do not delete specs for header files, to avoid conflict
robertoaloi Apr 8, 2022
f054f92
Remove dbg expressions
robertoaloi Apr 8, 2022
8a91a23
Fix Dialyzer issues
robertoaloi Apr 8, 2022
161dc2c
Remove obsolete module
robertoaloi Apr 8, 2022
c505eff
Fix indexing of include attributes
robertoaloi Apr 8, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 114 additions & 0 deletions apps/els_lsp/src/els_buffer_server.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
%%%=============================================================================
%%% @doc Buffer edits to an open buffer to avoid re-indexing too often.
%%% @end
%%%=============================================================================
-module(els_buffer_server).

%%==============================================================================
%% API
%%==============================================================================
-export([ new/2
, stop/1
, apply_edits/2
, flush/1
]).

-export([ start_link/2 ]).

%%==============================================================================
%% Callbacks for the gen_server behaviour
%%==============================================================================
-behaviour(gen_server).
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you explain a bit why we need a buffer server in the first place, rather than keeping the text directly and using plain functions for processing edits?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The buffer was introduced as part of support for incremental text sync and I just ported it to a gen_server, so it's not new with this feature.

The main intention was to buffer didChange events, so that re-indexing would be retriggered not too often (a didChange event is sent at every keystroke). But I agree that it complicates things and probably introduces some races.

-export([ init/1
, handle_call/3
, handle_cast/2
, handle_info/2
]).

%%==============================================================================
%% Macro Definitions
%%==============================================================================
-define(FLUSH_DELAY, 200). %% ms

%%==============================================================================
%% Type Definitions
%%==============================================================================
-type text() :: binary().
-type state() :: #{ uri := uri()
, text := text()
, ref := undefined | reference()
}.
-type buffer() :: pid().

%%==============================================================================
%% Includes
%%==============================================================================
-include_lib("kernel/include/logger.hrl").
-include("els_lsp.hrl").

%%==============================================================================
%% API
%%==============================================================================
-spec new(uri(), text()) -> {ok, pid()}.
new(Uri, Text) ->
supervisor:start_child(els_buffer_sup, [Uri, Text]).

-spec stop(buffer()) -> ok.
stop(Buffer) ->
supervisor:terminate_child(els_buffer_sup, Buffer).

-spec apply_edits(buffer(), [els_text:edit()]) -> ok.
apply_edits(Buffer, Edits) ->
gen_server:cast(Buffer, {apply_edits, Edits}).

-spec flush(buffer()) -> ok.
flush(Buffer) ->
gen_server:call(Buffer, {flush}).

-spec start_link(uri(), text()) -> {ok, buffer()}.
start_link(Uri, Text) ->
gen_server:start_link(?MODULE, {Uri, Text}, []).

%%==============================================================================
%% Callbacks for the gen_server behaviour
%%==============================================================================
-spec init({uri(), text()}) -> {ok, state()}.
init({Uri, Text}) ->
do_flush(Uri, Text),
{ok, #{ uri => Uri, text => Text, ref => undefined }}.

-spec handle_call(any(), {pid(), any()}, state()) -> {reply, any(), state()}.
handle_call({flush}, _From, #{uri := Uri, text := Text} = State) ->
?LOG_DEBUG("[~p] Flushing request [uri=~p]", [?MODULE, Uri]),
do_flush(Uri, Text),
{reply, ok, State};
handle_call(Request, _From, State) ->
{reply, {not_implemented, Request}, State}.

-spec handle_cast(any(), state()) -> {noreply, state()}.
handle_cast({apply_edits, Edits}, #{uri := Uri} = State) ->
?LOG_DEBUG("[~p] Applying edits [uri=~p]", [?MODULE, Uri]),
#{text := Text0, ref := Ref0} = State,
case Ref0 of
undefined -> ok;
_ -> erlang:cancel_timer(Ref0)
end,
Text = els_text:apply_edits(Text0, Edits),
Ref = erlang:send_after(?FLUSH_DELAY, self(), flush),
{noreply, State#{text => Text, ref => Ref}}.

-spec handle_info(any(), state()) -> {noreply, state()}.
handle_info(flush, #{uri := Uri, text := Text} = State) ->
?LOG_DEBUG("[~p] Scheduled flushing [uri=~p]", [?MODULE, Uri]),
do_flush(Uri, Text),
{noreply, State};
handle_info(_Request, State) ->
{noreply, State}.

%%==============================================================================
%% Internal Functions
%%==============================================================================
-spec do_flush(uri(), text()) -> ok.
do_flush(Uri, Text) ->
{ok, Document} = els_utils:lookup_document(Uri),
els_indexing:deep_index(Document#{text => Text, buffer => self()}).
47 changes: 47 additions & 0 deletions apps/els_lsp/src/els_buffer_sup.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
%%==============================================================================
%% Supervisor for Buffers
%%==============================================================================
-module(els_buffer_sup).

%%==============================================================================
%% Behaviours
%%==============================================================================
-behaviour(supervisor).

%%==============================================================================
%% Exports
%%==============================================================================

%% API
-export([ start_link/0 ]).

%% Supervisor Callbacks
-export([ init/1 ]).

%%==============================================================================
%% Defines
%%==============================================================================
-define(SERVER, ?MODULE).

%%==============================================================================
%% API
%%==============================================================================
-spec start_link() -> {ok, pid()}.
start_link() ->
supervisor:start_link({local, ?SERVER}, ?MODULE, []).

%%==============================================================================
%% Supervisor callbacks
%%==============================================================================
-spec init([]) -> {ok, {supervisor:sup_flags(), [supervisor:child_spec()]}}.
init([]) ->
SupFlags = #{ strategy => simple_one_for_one
, intensity => 5
, period => 60
},
ChildSpecs = [#{ id => els_buffer_sup
, start => {els_buffer_server, start_link, []}
, restart => temporary
, shutdown => 5000
}],
{ok, {SupFlags, ChildSpecs}}.
13 changes: 11 additions & 2 deletions apps/els_lsp/src/els_completion_provider.erl
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,17 @@ handle_request({completion, Params}, State) ->
}
, <<"textDocument">> := #{<<"uri">> := Uri}
} = Params,
ok = els_index_buffer:flush(Uri),
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A bit of context may help here.

Completion requests typically come at the same time as edits. With the current implementation of the els_index_buffer it could happen that, during a completion, edits are still not processed (there's some basic protection against this, but things can still fail).

The completion provider assumes a specific length in the line when reading the text after the flush, but it's not guaranteed that all edits are applied. Fast typers can see the provider (and eventually the whole server) crashing.

With the new approach, the idea is to block callers of the flush for a certain amount of time (right now 200ms), but if edits keep coming the count restarts.

This should also prevent indexing happening multiple times.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm... I'm not sure I fully understand how the processing of everything happens.

Can you confirm that didChange notifications are processed asynchronously - there's no guarantee that a later request will observe the earlier change.
It seems like this one thing introduces a lot of conceptual complexity in how everything fits together

{ok, #{text := Text} = Document} = els_utils:lookup_document(Uri),
{ok, Document} = els_utils:lookup_document(Uri),
#{text := Text, buffer := Buffer} = Document,
%% Ensure there are no pending changes. This causes a heavy sync
%% point, since for a big module indexing could take a while. Maybe
%% just do a 'soft' flush and avoid indexing here?
case Buffer of
undefined ->
ok;
_ ->
ok = els_buffer_server:flush(Buffer)
Copy link
Contributor

@TheGeorge TheGeorge Apr 6, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we cannot be even more clever here and to make flushing delayed. Looking at the LSP, I see that we can return partial completion results. So what we can do to increase responsiveness is this:

  • if no pending changes, just what we do now
  • i pending changes, trigger flushing, and return completion results based on current knowledge
  • when indexing is complete return full completion results

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Partial results are not yet implemented, I will add this as a follow up.

end,
Context = maps:get( <<"context">>
, Params
, #{ <<"triggerKind">> => ?COMPLETION_TRIGGER_KIND_INVOKED }
Expand Down
12 changes: 11 additions & 1 deletion apps/els_lsp/src/els_dt_document.erl
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
-type id() :: atom().
-type kind() :: module | header | other.
-type source() :: otp | app | dep.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We were not storing references for OTP modules during the initial indexing.
Now that indexing happens on-demand, we need a way to distinguish OTP modules or app/dep modules when reading a document .

-type buffer() :: pid().
-export_type([source/0]).

%%==============================================================================
Expand All @@ -58,6 +59,7 @@
, md5 :: binary() | '_'
, pois :: [poi()] | '_' | ondemand
, source :: source() | '$2'
, buffer :: buffer() | '_' | undefined
}).
-type els_dt_document() :: #els_dt_document{}.

Expand All @@ -68,6 +70,7 @@
, md5 => binary()
, pois => [poi()] | ondemand
, source => source()
, buffer => buffer() | undefined
}.
-export_type([ id/0
, item/0
Expand Down Expand Up @@ -97,6 +100,7 @@ from_item(#{ uri := Uri
, md5 := MD5
, pois := POIs
, source := Source
, buffer := Buffer
}) ->
#els_dt_document{ uri = Uri
, id = Id
Expand All @@ -105,6 +109,7 @@ from_item(#{ uri := Uri
, md5 = MD5
, pois = POIs
, source = Source
, buffer = Buffer
}.

-spec to_item(els_dt_document()) -> item().
Expand All @@ -115,6 +120,7 @@ to_item(#els_dt_document{ uri = Uri
, md5 = MD5
, pois = POIs
, source = Source
, buffer = Buffer
}) ->
#{ uri => Uri
, id => Id
Expand All @@ -123,6 +129,7 @@ to_item(#els_dt_document{ uri = Uri
, md5 => MD5
, pois => POIs
, source => Source
, buffer => Buffer
}.

-spec insert(item()) -> ok | {error, any()}.
Expand Down Expand Up @@ -162,6 +169,7 @@ new(Uri, Text, Id, Kind, Source) ->
, md5 => MD5
, pois => ondemand
, source => Source
, buffer => undefined
}.

%% @doc Returns the list of POIs for the current document
Expand Down Expand Up @@ -221,10 +229,12 @@ find_candidates(Pattern) ->
%% when Source =/= otp -> {Uri, Text} end).
MS = [{#els_dt_document{ uri = '$1'
, source = '$2'
, buffer = '_'
, kind = '_'
, text = '$3'
, md5 = '_'
,pois = '_'}
, pois = '_'
}
, [{'=/=', '$2', otp}]
, [{{'$1', '$3'}}]}],
All = ets:select(name(), MS),
Expand Down
4 changes: 2 additions & 2 deletions apps/els_lsp/src/els_dt_signatures.erl
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ insert(Map) when is_map(Map) ->

-spec lookup(mfa()) -> {ok, [item()]}.
lookup({M, _F, _A} = MFA) ->
{ok, Uris} = els_utils:find_modules(M),
[els_indexing:ensure_deeply_indexed(Uri) || Uri <- Uris],
%% By finding a module, we also ensure the module is deeply indexed.
{ok, _Uris} = els_utils:find_modules(M),
{ok, Items} = els_db:lookup(name(), MFA),
{ok, [to_item(Item) || Item <- Items]}.

Expand Down
Loading