diff --git a/scalene/scalene.py b/scalene/scalene.py index 3e6c348f9..c69f80bea 100644 --- a/scalene/scalene.py +++ b/scalene/scalene.py @@ -1,40 +1,27 @@ """Scalene: a high-performance, high-precision CPU *and* memory profiler for Python. - Scalene uses interrupt-driven sampling for CPU profiling. For memory - profiling, it uses a similar mechanism but with interrupts generated - by a "sampling memory allocator" that produces signals everytime the - heap grows or shrinks by a certain amount. See libscalene.cpp for - details (sampling logic is in include/sampleheap.hpp). + Scalene uses interrupt-driven sampling for CPU profiling. For memory + profiling, it uses a similar mechanism but with interrupts generated + by a "sampling memory allocator" that produces signals everytime the + heap grows or shrinks by a certain amount. See libscalene.cpp for + details (sampling logic is in include/sampleheap.hpp). - by Emery Berger - https://emeryberger.com + by Emery Berger + https://emeryberger.com - usage: # for CPU profiling only - python -m scalene test/testme.py - # for CPU and memory profiling (Mac OS X) - DYLD_INSERT_LIBRARIES=$PWD/libscalene.dylib PYTHONMALLOC=malloc python -m scalene test/testme.py - # for CPU and memory profiling (Linux) - LD_PRELOAD=$PWD/libscalene.dylib PYTHONMALLOC=malloc python -m scalene test/testme.py + usage: # for CPU profiling only + python -m Scalene test/testme.py + # for CPU and memory profiling (Mac OS X) + DYLD_INSERT_LIBRARIES=$PWD/libscalene.dylib PYTHONMALLOC=malloc python -m scalene test/testme.py + # for CPU and memory profiling (Linux) + LD_PRELOAD=$PWD/libscalene.dylib PYTHONMALLOC=malloc python -m scalene test/testme.py """ -the_globals = { - '__name__': '__main__', - '__doc__': None, - '__package__': None, - '__loader__': globals()['__loader__'], - '__spec__': None, - '__annotations__': {}, - '__builtins__': globals()['__builtins__'], - '__file__': None, - '__cached__': None, -} - import sys import atexit import signal -import json -import linecache +# import linecache import math from collections import defaultdict import time @@ -44,17 +31,27 @@ import argparse from contextlib import contextmanager - +the_globals = { + '__name__': '__main__', + '__doc__': None, + '__package__': None, + '__loader__': globals()['__loader__'], + '__spec__': None, + '__annotations__': {}, + '__builtins__': globals()['__builtins__'], + '__file__': None, + '__cached__': None, +} assert sys.version_info[0] == 3 and sys.version_info[1] >= 6, "Scalene requires Python version 3.6 or above." # Scalene currently only supports Unix-like operating systems; in particular, Linux and Mac OS X. if sys.platform == 'win32': - print("scalene currently does not support Windows, but works on Linux and Mac OS X.") + print("Scalene currently does not support Windows, but works on Linux and Mac OS X.") sys.exit(-1) -class scalene(object): - +class Scalene(): + """The Scalene profiler itself.""" # Statistics counters. cpu_samples_python = defaultdict(lambda: defaultdict(int)) # CPU samples for each location in the program # spent in the interpreter @@ -86,34 +83,34 @@ def gettime(): def __init__(self, program_being_profiled): # Register the exit handler to run when the program terminates or we quit. - atexit.register(scalene.exit_handler) + atexit.register(Scalene.exit_handler) # Store relevant names (program, path). - scalene.program_being_profiled = os.path.abspath(program_being_profiled) - scalene.program_path = os.path.dirname(scalene.program_being_profiled) + Scalene.program_being_profiled = os.path.abspath(program_being_profiled) + Scalene.program_path = os.path.dirname(Scalene.program_being_profiled) # Set up the signal handler to handle periodic timer interrupts (for CPU). - signal.signal(scalene.cpu_signal, self.cpu_signal_handler) + signal.signal(Scalene.cpu_signal, self.cpu_signal_handler) # Set up the signal handler to handle malloc interrupts (for memory allocations). - signal.signal(scalene.malloc_signal, self.malloc_signal_handler) - signal.signal(scalene.free_signal, self.free_signal_handler) + signal.signal(Scalene.malloc_signal, self.malloc_signal_handler) + signal.signal(Scalene.free_signal, self.free_signal_handler) # Turn on the CPU profiling timer to run every signal_interval seconds. signal.setitimer(signal.ITIMER_PROF, self.signal_interval, self.signal_interval) - scalene.last_signal_time = scalene.gettime() + Scalene.last_signal_time = Scalene.gettime() @staticmethod - def cpu_signal_handler(sig, frame): + def cpu_signal_handler(_, frame): """Handle interrupts for CPU profiling.""" # Record how long it has been since we received a timer # before. See the logic below. - now = scalene.gettime() - elapsed_since_last_signal = now - scalene.last_signal_time + now = Scalene.gettime() + elapsed_since_last_signal = now - Scalene.last_signal_time fname = frame.f_code.co_filename # Record samples only for files we care about. if (len(fname)) == 0: # 'eval/compile' gives no f_code.co_filename. # We have to look back into the outer frame in order to check the co_filename. fname = frame.f_back.f_code.co_filename - if not scalene.should_trace(fname): + if not Scalene.should_trace(fname): return # Here we take advantage of an apparent limitation of Python: # it only delivers signals after the interpreter has given up @@ -121,42 +118,45 @@ def cpu_signal_handler(sig, frame): # running purely in the interpreter, and in fact, that was a limitation # of the first version of Scalene. # - # However, it is in fact a way to separate out time spent in + # (cf. https://docs.python.org/3.9/library/signal.html#execution-of-python-signal-handlers) + # + # However, we can turn lemons into lemonade. This "problem" is in fact + # an effective way to separate out time spent in # Python vs. time spent in native code "for free"! If we get # the signal immediately, we must be running in the - # interpreter; if the signal's delivery is delayed, it's - # because we are running code OUTSIDE the interpreter, e.g., + # interpreter. On the other hand, if it was delayed, that means + # we are running code OUTSIDE the interpreter, e.g., # native code (be it inside of Python or in a library). We # account for this time by tracking the elapsed (process) time # and compare it to the interval, and add any computed delay # (as if it were sampled) to the C counter. - c_time = elapsed_since_last_signal - scalene.signal_interval - scalene.cpu_samples_python[fname][frame.f_lineno] += 1 - scalene.cpu_samples_c[fname][frame.f_lineno] += c_time / scalene.signal_interval - scalene.total_cpu_samples += elapsed_since_last_signal / scalene.signal_interval - scalene.last_signal_time = scalene.gettime() + c_time = elapsed_since_last_signal - Scalene.signal_interval + Scalene.cpu_samples_python[fname][frame.f_lineno] += 1 + Scalene.cpu_samples_c[fname][frame.f_lineno] += c_time / Scalene.signal_interval + Scalene.total_cpu_samples += elapsed_since_last_signal / Scalene.signal_interval + Scalene.last_signal_time = Scalene.gettime() return @staticmethod - def malloc_signal_handler(sig, frame): + def malloc_signal_handler(_, frame): """Handle interrupts for memory profiling (mallocs).""" fname = frame.f_code.co_filename # Record samples only for files we care about. - if not scalene.should_trace(fname): + if not Scalene.should_trace(fname): return - scalene.malloc_samples[fname][frame.f_lineno] += 1 - scalene.total_malloc_samples += 1 + Scalene.malloc_samples[fname][frame.f_lineno] += 1 + Scalene.total_malloc_samples += 1 return @staticmethod - def free_signal_handler(sig, frame): + def free_signal_handler(_, frame): """Handle interrupts for memory profiling (frees).""" fname = frame.f_code.co_filename # Record samples only for files we care about. - if not scalene.should_trace(fname): + if not Scalene.should_trace(fname): return - scalene.free_samples[fname][frame.f_lineno] += 1 - scalene.total_free_samples += 1 + Scalene.free_samples[fname][frame.f_lineno] += 1 + Scalene.total_free_samples += 1 return @staticmethod @@ -171,22 +171,23 @@ def should_trace(filename): # Don't profile the profiler. return False filename = os.path.abspath(filename) - return scalene.program_path in filename + return Scalene.program_path in filename @staticmethod def start(): """Initiate profiling.""" - scalene.elapsed_time = scalene.gettime() + Scalene.elapsed_time = Scalene.gettime() @staticmethod def stop(): """Complete profiling.""" - scalene.disable_signals() - scalene.elapsed_time = scalene.gettime() - scalene.elapsed_time + Scalene.disable_signals() + Scalene.elapsed_time = Scalene.gettime() - Scalene.elapsed_time @staticmethod @contextmanager def file_or_stdout(file_name): + """Returns a file handle for writing; if no argument is passed, returns stdout.""" # from https://stackoverflow.com/questions/9836370/fallback-to-stdout-if-no-file-name-provided if file_name is None: yield sys.stdout @@ -197,38 +198,38 @@ def file_or_stdout(file_name): @staticmethod def output_profiles(output_file): """Write the profile out (currently to stdout).""" - total_mem_samples = scalene.total_malloc_samples + scalene.total_free_samples # use + scalene.total_free_samples for churn. + total_mem_samples = Scalene.total_malloc_samples + Scalene.total_free_samples # use + Scalene.total_free_samples for churn. # Collect all instrumented filenames. - all_instrumented_files = list(set(list(scalene.cpu_samples_python.keys()) + list(scalene.malloc_samples.keys()) + list(scalene.free_samples.keys()))) - with scalene.file_or_stdout(output_file) as out: + all_instrumented_files = list(set(list(Scalene.cpu_samples_python.keys()) + list(Scalene.malloc_samples.keys()) + list(Scalene.free_samples.keys()))) + with Scalene.file_or_stdout(output_file) as out: for fname in sorted(all_instrumented_files): - with open(fname, 'r') as fd: - this_cpu_samples = sum(scalene.cpu_samples_c[fname].values()) + sum(scalene.cpu_samples_python[fname].values()) + with open(fname, 'r') as source_file: + this_cpu_samples = sum(Scalene.cpu_samples_c[fname].values()) + sum(Scalene.cpu_samples_python[fname].values()) if this_cpu_samples == 0: continue percent_cpu_time = 100 * this_cpu_samples * \ - scalene.signal_interval / scalene.elapsed_time - print(f"{fname}: % of CPU time = {percent_cpu_time:6.2f}% out of {scalene.elapsed_time:6.2f}s.", file=out) + Scalene.signal_interval / Scalene.elapsed_time + print(f"{fname}: % of CPU time = {percent_cpu_time:6.2f}% out of {Scalene.elapsed_time:6.2f}s.", file=out) print(f" \t | {'CPU %':9}| {'CPU %':9}| {'Memory (MB) |' if total_mem_samples != 0 else ''}", file=out) print(f" Line\t | {'(Python)':9}| {'(C)':9}|{' |' if total_mem_samples != 0 else ''} [{fname}]", file=out) print("-" * 80, file=out) - contents = fd.readlines() + contents = source_file.readlines() line_no = 1 for line in contents: line = line[:-1] # Strip newline - n_cpu_samples_c = scalene.cpu_samples_c[fname][line_no] + n_cpu_samples_c = Scalene.cpu_samples_c[fname][line_no] if n_cpu_samples_c < 0: # This can happen because of floating point inaccuracies, since we perform subtraction to compute it. n_cpu_samples_c = 0 - n_cpu_samples_python = scalene.cpu_samples_python[fname][line_no] + n_cpu_samples_python = Scalene.cpu_samples_python[fname][line_no] n_mem_mb = 0 - n_mem_samples = scalene.malloc_samples[fname][line_no] - scalene.free_samples[fname][line_no] + n_mem_samples = Scalene.malloc_samples[fname][line_no] - Scalene.free_samples[fname][line_no] if n_mem_samples != 0: - r = n_mem_samples * scalene.memory_sampling_rate - n_mem_mb = r / (1024 * 1024) - if scalene.total_cpu_samples != 0: - n_cpu_percent_c = n_cpu_samples_c * 100 / scalene.total_cpu_samples - n_cpu_percent_python = n_cpu_samples_python * 100 / scalene.total_cpu_samples + rate = n_mem_samples * Scalene.memory_sampling_rate + n_mem_mb = rate / (1024 * 1024) + if Scalene.total_cpu_samples != 0: + n_cpu_percent_c = n_cpu_samples_c * 100 / Scalene.total_cpu_samples + n_cpu_percent_python = n_cpu_samples_python * 100 / Scalene.total_cpu_samples # Print results. n_cpu_percent_c_str = "" if n_cpu_percent_c == 0 else f'{n_cpu_percent_c:6.2f}%' n_cpu_percent_python_str = "" if n_cpu_percent_python == 0 else f'{n_cpu_percent_python:6.2f}%' @@ -245,20 +246,20 @@ def output_profiles(output_file): def disable_signals(): """Turn off the profiling signals.""" signal.signal(signal.ITIMER_PROF, signal.SIG_IGN) - signal.signal(scalene.malloc_signal, signal.SIG_IGN) - signal.signal(scalene.free_signal, signal.SIG_IGN) + signal.signal(Scalene.malloc_signal, signal.SIG_IGN) + signal.signal(Scalene.free_signal, signal.SIG_IGN) signal.setitimer(signal.ITIMER_PROF, 0) - + @staticmethod def exit_handler(): """When we exit, disable all signals.""" - scalene.disable_signals() + Scalene.disable_signals() @staticmethod def main(): - + """Invokes the profiler from the command-line.""" usage = """Scalene: a high-precision CPU and memory profiler. -https://github.com/emeryberger/scalene +https://github.com/emeryberger/Scalene for CPU profiling only: % python -m scalene yourprogram.py @@ -267,19 +268,18 @@ def main(): for CPU and memory profiling (Linux): % LD_PRELOAD=$PWD/libscalene.dylib PYTHONMALLOC=malloc python -m scalene yourprogram.py """ - parser = argparse.ArgumentParser(prog='scalene', description=usage, formatter_class=argparse.RawTextHelpFormatter) - parser.add_argument('prog',type=str,help='program to be profiled') - parser.add_argument('-o','--outfile',type=str,default=None,help='file to hold profiler output (default: stdout)') - # Parse out all scalene arguments and jam the remaining ones into argv. + parser.add_argument('prog', type=str, help='program to be profiled') + parser.add_argument('-o', '--outfile', type=str, default=None, help='file to hold profiler output (default: stdout)') + # Parse out all Scalene arguments and jam the remaining ones into argv. # See https://stackoverflow.com/questions/35733262/is-there-any-way-to-instruct-argparse-python-2-7-to-remove-found-arguments-fro args, left = parser.parse_known_args() sys.argv = sys.argv[:1]+left try: - with open(args.prog, 'rb') as fp: + with open(args.prog, 'rb') as prog_being_profiled: original_path = os.getcwd() # Read in the code and compile it. - code = compile(fp.read(), args.prog, "exec") + code = compile(prog_being_profiled.read(), args.prog, "exec") # Push the program's path. program_path = os.path.dirname(os.path.abspath(args.prog)) sys.path.insert(0, program_path) @@ -287,7 +287,7 @@ def main(): # Set the file being executed. the_globals['__file__'] = args.prog # Start the profiler. - profiler = scalene(os.path.join(program_path, os.path.basename(args.prog))) + profiler = Scalene(os.path.join(program_path, os.path.basename(args.prog))) try: profiler.start() # Run the code being profiled. @@ -299,13 +299,13 @@ def main(): if profiler.total_cpu_samples > 0: profiler.output_profiles(args.outfile) else: - print("scalene: Program did not run for long enough to profile.") + print("Scalene: Program did not run for long enough to profile.") except Exception as ex: - template = "scalene: An exception of type {0} occurred. Arguments:\n{1!r}" + template = "Scalene: An exception of type {0} occurred. Arguments:\n{1!r}" message = template.format(type(ex).__name__, ex.args) print(message) print(traceback.format_exc()) except (FileNotFoundError, IOError): - print("scalene: could not find input file.") - -scalene.main() + print("Scalene: could not find input file.") + +Scalene.main()