Skip to content

Commit

Permalink
Merge pull request #5 from cloudant/33243-provide-option-for-running-…
Browse files Browse the repository at this point in the history
…on-all-cluster-nodes

33243 provide option for running on all cluster nodes
  • Loading branch information
mikewallace1979 committed Aug 14, 2014
2 parents 7b90d12 + a564c9d commit 28a1e57
Show file tree
Hide file tree
Showing 16 changed files with 246 additions and 98 deletions.
24 changes: 15 additions & 9 deletions src/weatherreport/src/weatherreport.erl
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@
{level, $d, "level", {atom, notice}, "Minimum message severity level (default: notice)"},
{expert, $e, "expert", undefined, "Perform more detailed diagnostics" },
{usage, $h, "help", undefined, "Display help/usage" },
{list, $l, "list", undefined, "Describe available diagnostic tasks" }
{list, $l, "list", undefined, "Describe available diagnostic tasks" },
{all_nodes, $a, "all-nodes", undefined, "Run weatherreport on all cluster nodes" }
]).

-define(USAGE_OPTS, [ O || O <- ?OPTS,
Expand Down Expand Up @@ -109,21 +110,23 @@ run(InputChecks) ->
ShortNames = [{weatherreport_util:short_name(Mod), Mod} || Mod <- weatherreport_check:modules() ],
element(1, lists:foldr(fun validate_checks/2, {[], ShortNames}, InputChecks))
end,
Messages = lists:foldl(
fun(Mod, Acc) -> Acc ++ weatherreport_check:check(Mod) end,
[],
Checks
),
Messages = case application:get_env(weatherreport, all_nodes) of
{ok, true} ->
weatherreport_runner:run(Checks, all);
_ ->
weatherreport_runner:run(Checks)

end,
case Messages of
[] ->
io:format("No diagnostic messages to report.~n"),
halt(0);
_ ->
%% Print the most critical messages first
FilteredMessages = lists:filter(fun({Level,_,_}) ->
FilteredMessages = lists:filter(fun({_,Level,_,_}) ->
weatherreport_util:should_log(Level)
end, Messages),
SortedMessages = lists:sort(fun({ALevel, _, _}, {BLevel, _, _}) ->
SortedMessages = lists:sort(fun({_, ALevel, _, _}, {_, BLevel, _, _}) ->
twig_util:level(ALevel) =< twig_util:level(BLevel)
end, FilteredMessages),
case SortedMessages of
Expand Down Expand Up @@ -161,7 +164,10 @@ process_option({level, Level}, Result) ->
application:set_env(weatherreport, log_level, Level),
Result;
process_option(expert, Result) ->
application:set_env(weatherreport, expert_mode, true),
application:set_env(weatherreport, expert, true),
Result;
process_option(all_nodes, Result) ->
application:set_env(weatherreport, all_nodes, true),
Result;
process_option(list, usage) -> %% Help should have precedence over listing checks
usage;
Expand Down
18 changes: 9 additions & 9 deletions src/weatherreport/src/weatherreport_check.erl
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@

-module(weatherreport_check).
-export([behaviour_info/1]).
-export([check/1,
-export([check/2,
modules/0,
print/1]).

Expand All @@ -67,18 +67,18 @@
behaviour_info(callbacks) ->
[{description, 0},
{valid, 0},
{check, 0},
{check, 1},
{format, 1}];
behaviour_info(_) ->
undefined.

%% @doc Runs the diagnostic in the given module, if it is valid. Returns a
%% list of messages that will be printed later using print/1.
-spec check(Module::module()) -> [{atom(), module(), term()}].
check(Module) ->
-spec check(Module::module(), list()) -> [{atom(), module(), term()}].
check(Module, Opts) ->
case Module:valid() of
true ->
[ {Level, Module, Message} || {Level, Message} <- Module:check() ];
[ {Level, Module, Message} || {Level, Message} <- Module:check(Opts) ];
_ ->
[]
end.
Expand All @@ -97,11 +97,11 @@ modules() ->
%% module's format/1 function will be called to provide a
%% human-readable message. It should return an iolist() or a 2-tuple
%% consisting of a format string and a list of terms.
-spec print({Level::atom(), Module::module(), Data::term()}) -> ok.
print({Level, Mod, Data}) ->
-spec print({Node::atom(), Level::atom(), Module::module(), Data::term()}) -> ok.
print({Node, Level, Mod, Data}) ->
case Mod:format(Data) of
{Format, Terms} ->
weatherreport_util:log(Level, Format, Terms);
weatherreport_util:log(Node, Level, Format, Terms);
String ->
weatherreport_util:log(Level, String)
weatherreport_util:log(Node, Level, String)
end.
6 changes: 3 additions & 3 deletions src/weatherreport/src/weatherreport_check_custodian.erl
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

-export([description/0,
valid/0,
check/0,
check/1,
format/1]).

-include_lib("eunit/include/eunit.hrl").
Expand All @@ -58,8 +58,8 @@ n_to_level(_) ->
report_to_message({DbName, ShardRange, {Type, N}}, NodeName) ->
{n_to_level(N), {Type, N, DbName, ShardRange, NodeName}}.

-spec check() -> [{atom(), term()}].
check() ->
-spec check(list()) -> [{atom(), term()}].
check(_Opts) ->
NodeName = weatherreport_node:nodename(),
case weatherreport_node:local_command(custodian, report, []) of
[] ->
Expand Down
8 changes: 4 additions & 4 deletions src/weatherreport/src/weatherreport_check_disk.erl
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@

-export([description/0,
valid/0,
check/0,
check/1,
format/1]).

-spec description() -> string().
Expand All @@ -59,8 +59,8 @@ description() ->
valid() ->
true.

-spec check() -> [{atom(), term()}].
check() ->
-spec check(list()) -> [{atom(), term()}].
check(_Opts) ->
DataDirs = weatherreport_config:data_directories(),
%% Add additional disk checks in the function below
lists:flatmap(fun(Dir) ->
Expand Down Expand Up @@ -157,8 +157,8 @@ check_is_file_readable(Directory) ->
%% Check if the directory is mounted with 'noatime'
check_atime(Directory) ->
File = filename:join([Directory, ?TEST_FILE]),
weatherreport_util:run_command("touch -at 201401010000.00 " ++ File),
{ok, FileInfo1} = file:read_file_info(File),
timer:sleep(1001),
{ok, S} = file:open(File, [read]),
io:get_line(S, ''),
file:close(S),
Expand Down
6 changes: 3 additions & 3 deletions src/weatherreport/src/weatherreport_check_ioq.erl
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

-export([description/0,
valid/0,
check/0,
check/1,
format/1]).

-define(THRESHOLD, 500).
Expand Down Expand Up @@ -61,8 +61,8 @@ sum_queues([{channels, {Channels}} | Rest], Acc) ->
sum_queues([{_Name, Value} | Rest], Acc) ->
sum_queues(Rest, Acc + Value).

-spec check() -> [{atom(), term()}].
check() ->
-spec check(list()) -> [{atom(), term()}].
check(_Opts) ->
case weatherreport_node:local_command(ioq, get_disk_queues, []) of
Queues when is_list(Queues) ->
Total = sum_queues(Queues, 0),
Expand Down
6 changes: 3 additions & 3 deletions src/weatherreport/src/weatherreport_check_mem3_sync.erl
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

-export([description/0,
valid/0,
check/0,
check/1,
format/1]).

-spec description() -> string().
Expand All @@ -39,8 +39,8 @@ description() ->
valid() ->
weatherreport_node:can_connect().

-spec check() -> [{atom(), term()}].
check() ->
-spec check(list()) -> [{atom(), term()}].
check(_Opts) ->
NodeName = weatherreport_node:nodename(),
case weatherreport_node:local_command(erlang, whereis, [mem3_sync]) of
undefined ->
Expand Down
6 changes: 3 additions & 3 deletions src/weatherreport/src/weatherreport_check_membership.erl
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@

-export([description/0,
valid/0,
check/0,
check/1,
format/1]).

-include_lib("eunit/include/eunit.hrl").
Expand All @@ -49,8 +49,8 @@ description() ->
valid() ->
weatherreport_node:can_connect().

-spec check() -> [{atom(), term()}].
check() ->
-spec check(list()) -> [{atom(), term()}].
check(_Opts) ->
NodeName = weatherreport_node:nodename(),
Members = weatherreport_node:local_command(mem3, nodes, []),
case lists:member(NodeName, Members) of
Expand Down
6 changes: 3 additions & 3 deletions src/weatherreport/src/weatherreport_check_memory_use.erl
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@

-export([description/0,
valid/0,
check/0,
check/1,
format/1]).

-spec description() -> string().
Expand All @@ -47,8 +47,8 @@ description() ->
valid() ->
weatherreport_node:can_connect().

-spec check() -> [{atom(), term()}].
check() ->
-spec check(list()) -> [{atom(), term()}].
check(_Opts) ->
Pid = weatherreport_node:pid(),
Output = weatherreport_util:run_command("ps -o pmem,rss -p " ++ Pid),
[_,_,Percent, RealSize| _] = string:tokens(Output, "/n \n"),
Expand Down
29 changes: 15 additions & 14 deletions src/weatherreport/src/weatherreport_check_message_queues.erl
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

-export([description/0,
valid/0,
check/0,
check/1,
format/1]).

-define(THRESHOLD, 1000).
Expand All @@ -41,32 +41,33 @@ description() ->
valid() ->
weatherreport_node:can_connect().

fold_processes([], Acc) ->
fold_processes([], Acc, _Opts) ->
Acc;
fold_processes([{Pid, MBoxSize, Info} | T], Acc) when MBoxSize < ?THRESHOLD ->
fold_processes([{Pid, MBoxSize, Info} | T], Acc, Opts) when MBoxSize < ?THRESHOLD ->
Message = {info, {mbox_ok, {Pid, MBoxSize, Info}}},
fold_processes(T, [Message | Acc]);
fold_processes([{Pid, MBoxSize, Info} | T], Acc) ->
case application:get_env(weatherreport, expert_mode) of
{ok, true} ->
fold_processes(T, [Message | Acc], Opts);
fold_processes([{Pid, MBoxSize, Info} | T], Acc, Opts) ->
Message = case proplists:get_value(expert, Opts) of
true ->
Pinfo = weatherreport_node:local_command(recon, info, [Pid]),
weatherreport_util:log(warning, "Process info for ~w:~n~p", [Pid, Pinfo]);
{warning, {mbox_large, {Pid, MBoxSize, Info, Pinfo}}};
_ ->
ok
{warning, {mbox_large, {Pid, MBoxSize, Info}}}
end,
Message = {warning, {mbox_large, {Pid, MBoxSize, Info}}},
fold_processes(T, [Message | Acc]).
fold_processes(T, [Message | Acc], Opts).

-spec check() -> [{atom(), term()}].
check() ->
-spec check(list()) -> [{atom(), term()}].
check(Opts) ->
Processes = weatherreport_node:local_command(
recon,
proc_count,
[message_queue_len, 10]
),
fold_processes(Processes, []).
fold_processes(Processes, [], Opts).

-spec format(term()) -> {io:format(), [term()]}.
format({mbox_large, {Pid, MBoxSize, Info, Pinfo}}) ->
{"Process ~w has excessive mailbox size of ~w: ~w ~w", [Pid, MBoxSize, Info, Pinfo]};
format({mbox_large, {Pid, MBoxSize, Info}}) ->
{"Process ~w has excessive mailbox size of ~w: ~w", [Pid, MBoxSize, Info]};
format({mbox_ok, {Pid, MBoxSize, Info}}) ->
Expand Down
6 changes: 3 additions & 3 deletions src/weatherreport/src/weatherreport_check_nodes_connected.erl
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

-export([description/0,
valid/0,
check/0,
check/1,
format/1]).

-spec description() -> string().
Expand All @@ -45,8 +45,8 @@ description() ->
valid() ->
weatherreport_node:can_connect().

-spec check() -> [{atom(), term()}].
check() ->
-spec check(list()) -> [{atom(), term()}].
check(_Opts) ->
NodeName = weatherreport_node:nodename(),
ConnectedNodes = [NodeName | weatherreport_node:local_command(erlang, nodes, [])],
Members = weatherreport_node:local_command(mem3, nodes, []),
Expand Down
42 changes: 21 additions & 21 deletions src/weatherreport/src/weatherreport_check_process_calls.erl
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

-export([description/0,
valid/0,
check/0,
check/1,
format/1]).

-define(THRESHOLD, 1000).
Expand All @@ -40,37 +40,33 @@ description() ->
valid() ->
weatherreport_node:can_connect().

fold_processes([], Acc, _Lim, _) ->
fold_processes([], Acc, _Lim, _CallType, _Opts) ->
Acc;
fold_processes(_, Acc, 0, _) ->
fold_processes(_, Acc, 0, _CallType, _Opts) ->
Acc;
fold_processes([{Count, {M, F, A}} | T], Acc, Lim, CallType) ->
fold_processes([{Count, {M, F, A}} | T], Acc, Lim, CallType, Opts) ->
Level = case Count > ?THRESHOLD of
true ->
warning;
_ ->
info
end,
case application:get_env(weatherreport, expert_mode) of
{ok, true} ->
Message = case proplists:get_value(expert, Opts) of
true ->
PidFun = list_to_atom("find_by_" ++ CallType ++ "_call"),
Pids = weatherreport_node:local_command(recon, PidFun, [M, F]),
lists:map(fun(Pid) ->
Pinfos = lists:map(fun(Pid) ->
Pinfo = weatherreport_node:local_command(recon, info, [Pid]),
weatherreport_util:log(
Level,
"Process info for ~w:~n~p",
[Pid, Pinfo]
)
end, lists:sublist(Pids, 10));
{Pid, Pinfo}
end, lists:sublist(Pids, 10)),
{Level, {process_count, {CallType, Count, M, F, A, Pinfos}}};
_ ->
ok
{Level, {process_count, {CallType, Count, M, F, A}}}
end,
Message = {Level, {process_count, {CallType, Count, M, F, A}}},
fold_processes(T, [Message | Acc], Lim - 1, CallType).
fold_processes(T, [Message | Acc], Lim - 1, CallType, Opts).

-spec check() -> [{atom(), term()}].
check() ->
-spec check(list()) -> [{atom(), term()}].
check(Opts) ->
CurrentCallCounts = weatherreport_node:local_command(
recon,
show_current_call_counts,
Expand All @@ -80,7 +76,8 @@ check() ->
CurrentCallCounts,
[],
10,
"current"
"current",
Opts
),
FirstCallCounts = weatherreport_node:local_command(
recon,
Expand All @@ -91,9 +88,12 @@ check() ->
FirstCallCounts,
CurrentCallMessages,
10,
"first"
"first",
Opts
)).

-spec format(term()) -> {io:format(), [term()]}.
format({process_count, {CallType, Count, M, F, A}}) ->
{"~w processes with ~s call ~w:~w/~w", [Count, CallType, M, F, A]}.
{"~w processes with ~s call ~w:~w/~w", [Count, CallType, M, F, A]};
format({process_count, {CallType, Count, M, F, A, Pinfos}}) ->
{"~w processes with ~s call ~w:~w/~w ~w", [Count, CallType, M, F, A, Pinfos]}.
Loading

0 comments on commit 28a1e57

Please sign in to comment.