SERVER-67051 SERVER-67052 SERVER-67053 SERVER-67054 add per action me…

…trics
HyaZz · Jul 8, 2022 · 847e884 · 847e884
1 parent ab6951d
commit 847e884
Show file tree

Hide file tree

Showing 8 changed files with 265 additions and 34 deletions.
diff --git a/SConstruct b/SConstruct
@@ -122,9 +122,9 @@ add_option(
  choices=list(BUILD_PROFILES.keys()),
  default='default',
  type='choice',
- help='''Short hand for common build options. These profiles are well supported by SDP and are 
- kept up to date. Unless you need something specific, it is recommended that you only build with 
- these. san is the recommeneded profile since it exposes bugs before they are found in patch 
+ help='''Short hand for common build options. These profiles are well supported by SDP and are
+ kept up to date. Unless you need something specific, it is recommended that you only build with
+ these. san is the recommended profile since it exposes bugs before they are found in patch
  builds. Check out site_scons/mongo/build_profiles.py to see each profile.''',
 )
 

diff --git a/etc/pip/components/build_metrics.req b/etc/pip/components/build_metrics.req
@@ -1,2 +1,3 @@
 psutil
 jsonschema
+memory_profiler
diff --git a/site_scons/site_tools/build_metrics/__init__.py b/site_scons/site_tools/build_metrics/__init__.py
@@ -30,28 +30,20 @@
 from jsonschema import validate
 import psutil
 
-from .util import add_meta_data, get_build_metric_dict
-import build_metrics.memory
+from .util import add_meta_data, get_build_metric_dict, CaptureAtexits
+from .memory import MemoryMonitor
+from .per_action_metrics import PerActionMetrics
 
 _SEC_TO_NANOSEC_FACTOR = 1000000000.0
-
-
-# This section is an excerpt of the original
-# https://stackoverflow.com/a/63029332/1644736
-class CaptureAtexits:
- def __init__(self):
- self.captured = []
-
- def __eq__(self, other):
- self.captured.append(other)
- return False
+_METRICS_COLLECTORS = []
 
 
 def finalize_build_metrics(env):
  metrics = get_build_metric_dict()
  metrics['end_time'] = time.time_ns()
  for m in _METRICS_COLLECTORS:
- m.finalize()
+ key, value = m.finalize()
+ metrics[key] = value
 
  with open(os.path.join(os.path.dirname(__file__), "build_metrics_format.schema")) as f:
  validate(metrics, json.load(f))
@@ -64,9 +56,6 @@ def finalize_build_metrics(env):
  json.dump(metrics, f, indent=4, sort_keys=True)
 
 
-_METRICS_COLLECTORS = []
-
-
 def generate(env, **kwargs):
  global _METRICS_COLLECTORS
 
@@ -89,7 +78,10 @@ def generate(env, **kwargs):
  metrics['start_time'] = int(p.create_time() * _SEC_TO_NANOSEC_FACTOR)
  metrics['scons_command'] = " ".join([sys.executable] + sys.argv)
 
- _METRICS_COLLECTORS = [memory.MemoryMonitor(psutil.Process().memory_info().vms)]
+ _METRICS_COLLECTORS = [
+ MemoryMonitor(psutil.Process().memory_info().vms),
+ PerActionMetrics(),
+ ]
 
 
 def exists(env):

diff --git a/site_scons/site_tools/build_metrics/build_metrics_format.schema b/site_scons/site_tools/build_metrics/build_metrics_format.schema
@@ -33,6 +33,29 @@
  "arithmetic_mean": {"type": "number"},
  "start_mem": { "$ref": "#/$defs/memory" }
  }
+ },
+ "build_tasks" : {
+ "type": "array",
+ "task": {
+ "type": "object",
+ "properties": {
+ "array_index": {"type": "integer"},
+ "start_time": {"$ref": "#/$defs/timestamp"},
+ "end_time" : {"$ref": "#/$defs/timestamp"},
+ "cpu_time" : {"type" : "integer"},
+ "builder" : {"type" : "str"},
+ "mem_usage": {"$ref": "#/$defs/memory"},
+ "inputs" : {
+ "type": "array",
+ "input": { "type": "str"}
+ },
+ "outputs" : {
+ "type": "array",
+ "output": { "type": "str"}
+ },
+ "action": {"type" : "str"}
+ }
+ }
  }
  }
 }
diff --git a/site_scons/site_tools/build_metrics/memory.py b/site_scons/site_tools/build_metrics/memory.py
@@ -2,7 +2,7 @@
 import time
 import psutil
 import sys
-from .util import timestamp_now, get_build_metric_dict
+from .util import timestamp_now
 from .protocol import BuildMetricsCollector
 
 
@@ -11,8 +11,7 @@ class MemoryMonitor(BuildMetricsCollector):
 
  def __init__(self, starting_memory_adjustment=0):
  self._stop = False
- metrics = get_build_metric_dict()
- metrics['system_memory'] = {
+ self.system_memory_metrics = {
  "mem_over_time": [],
  "start_mem": used_memory() - starting_memory_adjustment,
  }
@@ -24,20 +23,19 @@ def finalize(self):
  self._stop = True
  self._record_data_point()
 
- metrics = get_build_metric_dict()
- sys_mem = metrics["system_memory"]
-
  mean = 0
  max_ = 0
  count = 1
- for val in sys_mem["mem_over_time"]:
+ for val in self.system_memory_metrics["mem_over_time"]:
  max_ = max(val["memory"], max_)
  # iterative mean calculation algorithm from https://stackoverflow.com/a/1934266
  mean += (val["memory"] - mean) / count
  count += 1
 
- sys_mem["arithmetic_mean"] = mean
- sys_mem["max"] = max_
+ self.system_memory_metrics["arithmetic_mean"] = mean
+ self.system_memory_metrics["max"] = max_
+
+ return 'system_memory', self.system_memory_metrics
 
  def memory_monitor(self):
  while not self._stop:
@@ -51,8 +49,7 @@ def _record_data_point(self):
  used_mem = used_memory()
  now_time = timestamp_now()
 
- metrics = get_build_metric_dict()
- metrics["system_memory"]["mem_over_time"].append(
+ self.system_memory_metrics["mem_over_time"].append(
  {"timestamp": now_time, "memory": used_mem})
 
 

diff --git a/site_scons/site_tools/build_metrics/per_action_metrics.py b/site_scons/site_tools/build_metrics/per_action_metrics.py
@@ -0,0 +1,186 @@
+import functools
+import subprocess
+import time
+import psutil
+import memory_profiler
+import SCons
+import sys
+
+from .util import fullname
+from .protocol import BuildMetricsCollector
+
+
+class ProfiledFunction:
+ """
+ A class which mimics a FunctionAction function, behaving exactly the same
+ as the original FunctionAction function, except for gather perf metrics
+ during the __call__ of the function.
+ """
+
+ def __init__(self, per_action_instance, original_func) -> None:
+ self.original_func = original_func
+ self.per_action_instance = per_action_instance
+
+ if hasattr(original_func, 'strfunction'):
+ self.strfunction = original_func.strfunction
+
+ if isinstance(self.original_func, SCons.Action.ActionCaller):
+ self.original_func = original_func.__call__
+
+ self.__name__ = "profiled_function"
+
+ def __call__(self, target, source, env):
+ return self.function_action_execute(target, source, env)
+
+ def __str__(self) -> str:
+ return str(self.original_func)
+
+ def function_action_execute(self, target, source, env):
+
+ task_metrics = {
+ 'outputs': [str(t) for t in target],
+ 'inputs': [str(s) for s in source],
+ 'action': fullname(self.original_func),
+ 'start_time': time.time_ns(),
+ 'builder': target[0].get_builder().get_name(target[0].get_env()),
+ }
+
+ profile = memory_profiler.LineProfiler(include_children=False)
+ return_value = profile(self.original_func)(target=target, source=source, env=env)
+
+ memory_increases_per_line = []
+ for (file_where_code_is, lines_of_code) in profile.code_map.items():
+
+ # skip the first item in the list because this is just the initial
+ # memory state, and we are interested just in the increases
+ for (line_number, memory_usage) in list(lines_of_code)[1:]:
+ if memory_usage:
+ memory_increase = memory_usage[0]
+ memory_increases_per_line.append(memory_increase)
+
+ task_metrics['cpu_time'] = time.thread_time_ns()
+ task_metrics['end_time'] = time.time_ns()
+ task_metrics['mem_usage'] = int(sum(memory_increases_per_line) * 1024 * 1024)
+
+ self.per_action_instance.build_tasks_metrics.append(task_metrics)
+ task_metrics['array_index'] = self.per_action_instance.build_tasks_metrics.index(
+ task_metrics)
+
+ return return_value
+
+
+class PerActionMetrics(BuildMetricsCollector):
+ """
+ Creates hooks the CommandAction and FunctionAction execute calls in SCons to track
+ CPU, memory and duration of execution of said action types.
+ """
+
+ def __init__(self) -> None:
+ self.build_tasks_metrics = []
+
+ # place hooks into scons internals to give us a chance to
+ # adjust things to take measurements
+ original_command_execute = SCons.Action.CommandAction.execute
+
+ def build_metrics_CommandAction_execute(command_action_instance, target, source, env,
+ executor=None):
+ if 'conftest' not in str(target[0]):
+
+ # We use the SPAWN var to control the SCons proper execute to call our spawn.
+ # We set the spawn back after the proper execute is done
+ original_spawn = env['SPAWN']
+ env['SPAWN'] = functools.partial(self.command_spawn_func, target=target,
+ source=source)
+ result = original_command_execute(command_action_instance, target, source, env,
+ executor)
+ env['SPAWN'] = original_spawn
+ else:
+ result = original_command_execute(command_action_instance, target, source, env,
+ executor)
+ return result
+
+ SCons.Action.CommandAction.execute = build_metrics_CommandAction_execute
+
+ original_function_action_execute = SCons.Action.FunctionAction.execute
+
+ def build_metrics_FunctionAction_execute(function_action_instance, target, source, env,
+ executor=None):
+ if target and 'conftest' not in str(target[0]) and not isinstance(
+ function_action_instance.execfunction, ProfiledFunction):
+
+ # set our profiled function class as the function action call. Profiled function
+ # should look and behave exactly as the original function, besides the __call__
+ # behaving differently. We set back the original function for posterity just in case
+ original_func = function_action_instance.execfunction
+ function_action_instance.execfunction = ProfiledFunction(
+ self, function_action_instance.execfunction)
+ original_function_action_execute(function_action_instance, target, source, env,
+ executor)
+ function_action_instance.execfunction = original_func
+ else:
+ return original_function_action_execute(function_action_instance, target, source,
+ env, executor)
+
+ SCons.Action.FunctionAction.execute = build_metrics_FunctionAction_execute
+
+ def get_mem_cpu(self, proc):
+ with proc.oneshot():
+ cpu = (proc.cpu_times().system + proc.cpu_times().user)
+ mem = proc.memory_info().vms
+ for p in proc.children(recursive=True):
+ with p.oneshot():
+ cpu += (p.cpu_times().system + p.cpu_times().user)
+ mem += p.memory_info().vms
+ return cpu, mem
+
+ def track_process(self, proc, target):
+ """ Poll virtual memory of a process and children. """
+ try:
+ peak_cpu, peak_mem = self.get_mem_cpu(proc)
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
+ return 0, 0
+
+ while proc.poll() is None:
+ try:
+ cpu, mem = self.get_mem_cpu(proc)
+ if peak_cpu < cpu:
+ peak_cpu = cpu
+ if peak_mem < mem:
+ peak_mem = mem
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
+ pass
+ else:
+ time.sleep(0.01)
+
+ return peak_cpu, peak_mem
+
+ def command_spawn_func(self, sh, escape, cmd, args, env, target, source):
+
+ task_metrics = {
+ 'outputs': [str(t) for t in target],
+ 'inputs': [str(s) for s in source],
+ 'action': ' '.join(args),
+ 'start_time': time.time_ns(),
+ 'builder': target[0].get_builder().get_name(target[0].get_env()),
+ }
+
+ if sys.platform[:3] == 'win':
+ # have to use shell=True for windows because of https://github.com/python/cpython/issues/53908
+ proc = psutil.Popen(' '.join(args), env=env, close_fds=True, shell=True)
+ else:
+ proc = psutil.Popen([sh, '-c', ' '.join(args)], env=env, close_fds=True)
+
+ cpu_usage, mem_usage = self.track_process(proc, target[0])
+ return_code = proc.wait()
+
+ task_metrics['end_time'] = time.time_ns()
+ task_metrics['cpu_time'] = int(cpu_usage * (10.0**9.0))
+ task_metrics['mem_usage'] = int(mem_usage)
+
+ self.build_tasks_metrics.append(task_metrics)
+ task_metrics['array_index'] = self.build_tasks_metrics.index(task_metrics)
+
+ return return_code
+
+ def finalize(self):
+ return 'build_tasks', self.build_tasks_metrics
diff --git a/site_scons/site_tools/build_metrics/protocol.py b/site_scons/site_tools/build_metrics/protocol.py
@@ -1,8 +1,9 @@
+from typing import Tuple
 from typing_extensions import Protocol
 from abc import abstractmethod
 
 
 class BuildMetricsCollector(Protocol):
  @abstractmethod
- def finalize(self):
+ def finalize(self) -> Tuple[str, str]:
  raise NotImplementedError
diff --git a/site_scons/site_tools/build_metrics/util.py b/site_scons/site_tools/build_metrics/util.py
@@ -1,9 +1,40 @@
-import datetime
 import time
 
 _BUILD_METRIC_DATA = {}
 
 
+# This section is from the original
+# https://stackoverflow.com/a/70693158/1644736
+def fullname(o):
+ try:
+ # if o is a class or function, get module directly
+ module = o.__module__
+ except AttributeError:
+ # then get module from o's class
+ module = o.__class__.__module__
+ try:
+ # if o is a class or function, get name directly
+ name = o.__qualname__
+ except AttributeError:
+ # then get o's class name
+ name = o.__class__.__qualname__
+ # if o is a method of builtin class, then module will be None
+ if module == 'builtins' or module is None:
+ return name
+ return module + '.' + name
+
+
+# This section is an excerpt of the original
+# https://stackoverflow.com/a/63029332/1644736
+class CaptureAtexits:
+ def __init__(self):
+ self.captured = []
+
+ def __eq__(self, other):
+ self.captured.append(other)
+ return False
+
+
 def get_build_metric_dict():
  global _BUILD_METRIC_DATA
  return _BUILD_METRIC_DATA