From 847e884a5360a295921edd841e1de261ffdfeea4 Mon Sep 17 00:00:00 2001 From: Daniel Moody Date: Fri, 8 Jul 2022 19:08:41 +0000 Subject: [PATCH] SERVER-67051 SERVER-67052 SERVER-67053 SERVER-67054 add per action metrics --- SConstruct | 6 +- etc/pip/components/build_metrics.req | 1 + .../site_tools/build_metrics/__init__.py | 28 +-- .../build_metrics/build_metrics_format.schema | 23 +++ site_scons/site_tools/build_metrics/memory.py | 19 +- .../build_metrics/per_action_metrics.py | 186 ++++++++++++++++++ .../site_tools/build_metrics/protocol.py | 3 +- site_scons/site_tools/build_metrics/util.py | 33 +++- 8 files changed, 265 insertions(+), 34 deletions(-) create mode 100644 site_scons/site_tools/build_metrics/per_action_metrics.py diff --git a/SConstruct b/SConstruct index 00c3614a2e56e..7267eed814d23 100755 --- a/SConstruct +++ b/SConstruct @@ -122,9 +122,9 @@ add_option( choices=list(BUILD_PROFILES.keys()), default='default', type='choice', - help='''Short hand for common build options. These profiles are well supported by SDP and are - kept up to date. Unless you need something specific, it is recommended that you only build with - these. san is the recommeneded profile since it exposes bugs before they are found in patch + help='''Short hand for common build options. These profiles are well supported by SDP and are + kept up to date. Unless you need something specific, it is recommended that you only build with + these. san is the recommended profile since it exposes bugs before they are found in patch builds. Check out site_scons/mongo/build_profiles.py to see each profile.''', ) diff --git a/etc/pip/components/build_metrics.req b/etc/pip/components/build_metrics.req index 17a95891ed289..656758aa27942 100644 --- a/etc/pip/components/build_metrics.req +++ b/etc/pip/components/build_metrics.req @@ -1,2 +1,3 @@ psutil jsonschema +memory_profiler \ No newline at end of file diff --git a/site_scons/site_tools/build_metrics/__init__.py b/site_scons/site_tools/build_metrics/__init__.py index f29c332893112..f987c9b99eb67 100644 --- a/site_scons/site_tools/build_metrics/__init__.py +++ b/site_scons/site_tools/build_metrics/__init__.py @@ -30,28 +30,20 @@ from jsonschema import validate import psutil -from .util import add_meta_data, get_build_metric_dict -import build_metrics.memory +from .util import add_meta_data, get_build_metric_dict, CaptureAtexits +from .memory import MemoryMonitor +from .per_action_metrics import PerActionMetrics _SEC_TO_NANOSEC_FACTOR = 1000000000.0 - - -# This section is an excerpt of the original -# https://stackoverflow.com/a/63029332/1644736 -class CaptureAtexits: - def __init__(self): - self.captured = [] - - def __eq__(self, other): - self.captured.append(other) - return False +_METRICS_COLLECTORS = [] def finalize_build_metrics(env): metrics = get_build_metric_dict() metrics['end_time'] = time.time_ns() for m in _METRICS_COLLECTORS: - m.finalize() + key, value = m.finalize() + metrics[key] = value with open(os.path.join(os.path.dirname(__file__), "build_metrics_format.schema")) as f: validate(metrics, json.load(f)) @@ -64,9 +56,6 @@ def finalize_build_metrics(env): json.dump(metrics, f, indent=4, sort_keys=True) -_METRICS_COLLECTORS = [] - - def generate(env, **kwargs): global _METRICS_COLLECTORS @@ -89,7 +78,10 @@ def generate(env, **kwargs): metrics['start_time'] = int(p.create_time() * _SEC_TO_NANOSEC_FACTOR) metrics['scons_command'] = " ".join([sys.executable] + sys.argv) - _METRICS_COLLECTORS = [memory.MemoryMonitor(psutil.Process().memory_info().vms)] + _METRICS_COLLECTORS = [ + MemoryMonitor(psutil.Process().memory_info().vms), + PerActionMetrics(), + ] def exists(env): diff --git a/site_scons/site_tools/build_metrics/build_metrics_format.schema b/site_scons/site_tools/build_metrics/build_metrics_format.schema index 4b6fec5006858..61343dc819fc1 100644 --- a/site_scons/site_tools/build_metrics/build_metrics_format.schema +++ b/site_scons/site_tools/build_metrics/build_metrics_format.schema @@ -33,6 +33,29 @@ "arithmetic_mean": {"type": "number"}, "start_mem": { "$ref": "#/$defs/memory" } } + }, + "build_tasks" : { + "type": "array", + "task": { + "type": "object", + "properties": { + "array_index": {"type": "integer"}, + "start_time": {"$ref": "#/$defs/timestamp"}, + "end_time" : {"$ref": "#/$defs/timestamp"}, + "cpu_time" : {"type" : "integer"}, + "builder" : {"type" : "str"}, + "mem_usage": {"$ref": "#/$defs/memory"}, + "inputs" : { + "type": "array", + "input": { "type": "str"} + }, + "outputs" : { + "type": "array", + "output": { "type": "str"} + }, + "action": {"type" : "str"} + } + } } } } diff --git a/site_scons/site_tools/build_metrics/memory.py b/site_scons/site_tools/build_metrics/memory.py index efa8320eae38b..f3338bbf7ee6e 100644 --- a/site_scons/site_tools/build_metrics/memory.py +++ b/site_scons/site_tools/build_metrics/memory.py @@ -2,7 +2,7 @@ import time import psutil import sys -from .util import timestamp_now, get_build_metric_dict +from .util import timestamp_now from .protocol import BuildMetricsCollector @@ -11,8 +11,7 @@ class MemoryMonitor(BuildMetricsCollector): def __init__(self, starting_memory_adjustment=0): self._stop = False - metrics = get_build_metric_dict() - metrics['system_memory'] = { + self.system_memory_metrics = { "mem_over_time": [], "start_mem": used_memory() - starting_memory_adjustment, } @@ -24,20 +23,19 @@ def finalize(self): self._stop = True self._record_data_point() - metrics = get_build_metric_dict() - sys_mem = metrics["system_memory"] - mean = 0 max_ = 0 count = 1 - for val in sys_mem["mem_over_time"]: + for val in self.system_memory_metrics["mem_over_time"]: max_ = max(val["memory"], max_) # iterative mean calculation algorithm from https://stackoverflow.com/a/1934266 mean += (val["memory"] - mean) / count count += 1 - sys_mem["arithmetic_mean"] = mean - sys_mem["max"] = max_ + self.system_memory_metrics["arithmetic_mean"] = mean + self.system_memory_metrics["max"] = max_ + + return 'system_memory', self.system_memory_metrics def memory_monitor(self): while not self._stop: @@ -51,8 +49,7 @@ def _record_data_point(self): used_mem = used_memory() now_time = timestamp_now() - metrics = get_build_metric_dict() - metrics["system_memory"]["mem_over_time"].append( + self.system_memory_metrics["mem_over_time"].append( {"timestamp": now_time, "memory": used_mem}) diff --git a/site_scons/site_tools/build_metrics/per_action_metrics.py b/site_scons/site_tools/build_metrics/per_action_metrics.py new file mode 100644 index 0000000000000..4ad92deff3351 --- /dev/null +++ b/site_scons/site_tools/build_metrics/per_action_metrics.py @@ -0,0 +1,186 @@ +import functools +import subprocess +import time +import psutil +import memory_profiler +import SCons +import sys + +from .util import fullname +from .protocol import BuildMetricsCollector + + +class ProfiledFunction: + """ + A class which mimics a FunctionAction function, behaving exactly the same + as the original FunctionAction function, except for gather perf metrics + during the __call__ of the function. + """ + + def __init__(self, per_action_instance, original_func) -> None: + self.original_func = original_func + self.per_action_instance = per_action_instance + + if hasattr(original_func, 'strfunction'): + self.strfunction = original_func.strfunction + + if isinstance(self.original_func, SCons.Action.ActionCaller): + self.original_func = original_func.__call__ + + self.__name__ = "profiled_function" + + def __call__(self, target, source, env): + return self.function_action_execute(target, source, env) + + def __str__(self) -> str: + return str(self.original_func) + + def function_action_execute(self, target, source, env): + + task_metrics = { + 'outputs': [str(t) for t in target], + 'inputs': [str(s) for s in source], + 'action': fullname(self.original_func), + 'start_time': time.time_ns(), + 'builder': target[0].get_builder().get_name(target[0].get_env()), + } + + profile = memory_profiler.LineProfiler(include_children=False) + return_value = profile(self.original_func)(target=target, source=source, env=env) + + memory_increases_per_line = [] + for (file_where_code_is, lines_of_code) in profile.code_map.items(): + + # skip the first item in the list because this is just the initial + # memory state, and we are interested just in the increases + for (line_number, memory_usage) in list(lines_of_code)[1:]: + if memory_usage: + memory_increase = memory_usage[0] + memory_increases_per_line.append(memory_increase) + + task_metrics['cpu_time'] = time.thread_time_ns() + task_metrics['end_time'] = time.time_ns() + task_metrics['mem_usage'] = int(sum(memory_increases_per_line) * 1024 * 1024) + + self.per_action_instance.build_tasks_metrics.append(task_metrics) + task_metrics['array_index'] = self.per_action_instance.build_tasks_metrics.index( + task_metrics) + + return return_value + + +class PerActionMetrics(BuildMetricsCollector): + """ + Creates hooks the CommandAction and FunctionAction execute calls in SCons to track + CPU, memory and duration of execution of said action types. + """ + + def __init__(self) -> None: + self.build_tasks_metrics = [] + + # place hooks into scons internals to give us a chance to + # adjust things to take measurements + original_command_execute = SCons.Action.CommandAction.execute + + def build_metrics_CommandAction_execute(command_action_instance, target, source, env, + executor=None): + if 'conftest' not in str(target[0]): + + # We use the SPAWN var to control the SCons proper execute to call our spawn. + # We set the spawn back after the proper execute is done + original_spawn = env['SPAWN'] + env['SPAWN'] = functools.partial(self.command_spawn_func, target=target, + source=source) + result = original_command_execute(command_action_instance, target, source, env, + executor) + env['SPAWN'] = original_spawn + else: + result = original_command_execute(command_action_instance, target, source, env, + executor) + return result + + SCons.Action.CommandAction.execute = build_metrics_CommandAction_execute + + original_function_action_execute = SCons.Action.FunctionAction.execute + + def build_metrics_FunctionAction_execute(function_action_instance, target, source, env, + executor=None): + if target and 'conftest' not in str(target[0]) and not isinstance( + function_action_instance.execfunction, ProfiledFunction): + + # set our profiled function class as the function action call. Profiled function + # should look and behave exactly as the original function, besides the __call__ + # behaving differently. We set back the original function for posterity just in case + original_func = function_action_instance.execfunction + function_action_instance.execfunction = ProfiledFunction( + self, function_action_instance.execfunction) + original_function_action_execute(function_action_instance, target, source, env, + executor) + function_action_instance.execfunction = original_func + else: + return original_function_action_execute(function_action_instance, target, source, + env, executor) + + SCons.Action.FunctionAction.execute = build_metrics_FunctionAction_execute + + def get_mem_cpu(self, proc): + with proc.oneshot(): + cpu = (proc.cpu_times().system + proc.cpu_times().user) + mem = proc.memory_info().vms + for p in proc.children(recursive=True): + with p.oneshot(): + cpu += (p.cpu_times().system + p.cpu_times().user) + mem += p.memory_info().vms + return cpu, mem + + def track_process(self, proc, target): + """ Poll virtual memory of a process and children. """ + try: + peak_cpu, peak_mem = self.get_mem_cpu(proc) + except (psutil.NoSuchProcess, psutil.AccessDenied): + return 0, 0 + + while proc.poll() is None: + try: + cpu, mem = self.get_mem_cpu(proc) + if peak_cpu < cpu: + peak_cpu = cpu + if peak_mem < mem: + peak_mem = mem + except (psutil.NoSuchProcess, psutil.AccessDenied): + pass + else: + time.sleep(0.01) + + return peak_cpu, peak_mem + + def command_spawn_func(self, sh, escape, cmd, args, env, target, source): + + task_metrics = { + 'outputs': [str(t) for t in target], + 'inputs': [str(s) for s in source], + 'action': ' '.join(args), + 'start_time': time.time_ns(), + 'builder': target[0].get_builder().get_name(target[0].get_env()), + } + + if sys.platform[:3] == 'win': + # have to use shell=True for windows because of https://github.com/python/cpython/issues/53908 + proc = psutil.Popen(' '.join(args), env=env, close_fds=True, shell=True) + else: + proc = psutil.Popen([sh, '-c', ' '.join(args)], env=env, close_fds=True) + + cpu_usage, mem_usage = self.track_process(proc, target[0]) + return_code = proc.wait() + + task_metrics['end_time'] = time.time_ns() + task_metrics['cpu_time'] = int(cpu_usage * (10.0**9.0)) + task_metrics['mem_usage'] = int(mem_usage) + + self.build_tasks_metrics.append(task_metrics) + task_metrics['array_index'] = self.build_tasks_metrics.index(task_metrics) + + return return_code + + def finalize(self): + return 'build_tasks', self.build_tasks_metrics diff --git a/site_scons/site_tools/build_metrics/protocol.py b/site_scons/site_tools/build_metrics/protocol.py index 39d6fd8b749e9..cc3ea17f64b8e 100644 --- a/site_scons/site_tools/build_metrics/protocol.py +++ b/site_scons/site_tools/build_metrics/protocol.py @@ -1,8 +1,9 @@ +from typing import Tuple from typing_extensions import Protocol from abc import abstractmethod class BuildMetricsCollector(Protocol): @abstractmethod - def finalize(self): + def finalize(self) -> Tuple[str, str]: raise NotImplementedError diff --git a/site_scons/site_tools/build_metrics/util.py b/site_scons/site_tools/build_metrics/util.py index 967bc5f33cc48..5762033f00770 100644 --- a/site_scons/site_tools/build_metrics/util.py +++ b/site_scons/site_tools/build_metrics/util.py @@ -1,9 +1,40 @@ -import datetime import time _BUILD_METRIC_DATA = {} +# This section is from the original +# https://stackoverflow.com/a/70693158/1644736 +def fullname(o): + try: + # if o is a class or function, get module directly + module = o.__module__ + except AttributeError: + # then get module from o's class + module = o.__class__.__module__ + try: + # if o is a class or function, get name directly + name = o.__qualname__ + except AttributeError: + # then get o's class name + name = o.__class__.__qualname__ + # if o is a method of builtin class, then module will be None + if module == 'builtins' or module is None: + return name + return module + '.' + name + + +# This section is an excerpt of the original +# https://stackoverflow.com/a/63029332/1644736 +class CaptureAtexits: + def __init__(self): + self.captured = [] + + def __eq__(self, other): + self.captured.append(other) + return False + + def get_build_metric_dict(): global _BUILD_METRIC_DATA return _BUILD_METRIC_DATA