Skip to content

Commit

Permalink
Cleaned up (per pylint).
Browse files Browse the repository at this point in the history
  • Loading branch information
emeryberger committed Jan 12, 2020
1 parent b09a9fe commit 6ac8f1f
Showing 1 changed file with 96 additions and 96 deletions.
192 changes: 96 additions & 96 deletions scalene/scalene.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,27 @@
"""Scalene: a high-performance, high-precision CPU *and* memory profiler for Python.
Scalene uses interrupt-driven sampling for CPU profiling. For memory
profiling, it uses a similar mechanism but with interrupts generated
by a "sampling memory allocator" that produces signals everytime the
heap grows or shrinks by a certain amount. See libscalene.cpp for
details (sampling logic is in include/sampleheap.hpp).
Scalene uses interrupt-driven sampling for CPU profiling. For memory
profiling, it uses a similar mechanism but with interrupts generated
by a "sampling memory allocator" that produces signals everytime the
heap grows or shrinks by a certain amount. See libscalene.cpp for
details (sampling logic is in include/sampleheap.hpp).
by Emery Berger
https://emeryberger.com
by Emery Berger
https://emeryberger.com
usage: # for CPU profiling only
python -m scalene test/testme.py
# for CPU and memory profiling (Mac OS X)
DYLD_INSERT_LIBRARIES=$PWD/libscalene.dylib PYTHONMALLOC=malloc python -m scalene test/testme.py
# for CPU and memory profiling (Linux)
LD_PRELOAD=$PWD/libscalene.dylib PYTHONMALLOC=malloc python -m scalene test/testme.py
usage: # for CPU profiling only
python -m Scalene test/testme.py
# for CPU and memory profiling (Mac OS X)
DYLD_INSERT_LIBRARIES=$PWD/libscalene.dylib PYTHONMALLOC=malloc python -m scalene test/testme.py
# for CPU and memory profiling (Linux)
LD_PRELOAD=$PWD/libscalene.dylib PYTHONMALLOC=malloc python -m scalene test/testme.py
"""

the_globals = {
'__name__': '__main__',
'__doc__': None,
'__package__': None,
'__loader__': globals()['__loader__'],
'__spec__': None,
'__annotations__': {},
'__builtins__': globals()['__builtins__'],
'__file__': None,
'__cached__': None,
}

import sys
import atexit
import signal
import json
import linecache
# import linecache
import math
from collections import defaultdict
import time
Expand All @@ -44,17 +31,27 @@
import argparse
from contextlib import contextmanager


the_globals = {
'__name__': '__main__',
'__doc__': None,
'__package__': None,
'__loader__': globals()['__loader__'],
'__spec__': None,
'__annotations__': {},
'__builtins__': globals()['__builtins__'],
'__file__': None,
'__cached__': None,
}

assert sys.version_info[0] == 3 and sys.version_info[1] >= 6, "Scalene requires Python version 3.6 or above."

# Scalene currently only supports Unix-like operating systems; in particular, Linux and Mac OS X.
if sys.platform == 'win32':
print("scalene currently does not support Windows, but works on Linux and Mac OS X.")
print("Scalene currently does not support Windows, but works on Linux and Mac OS X.")
sys.exit(-1)

class scalene(object):

class Scalene():
"""The Scalene profiler itself."""
# Statistics counters.
cpu_samples_python = defaultdict(lambda: defaultdict(int)) # CPU samples for each location in the program
# spent in the interpreter
Expand Down Expand Up @@ -86,77 +83,80 @@ def gettime():

def __init__(self, program_being_profiled):
# Register the exit handler to run when the program terminates or we quit.
atexit.register(scalene.exit_handler)
atexit.register(Scalene.exit_handler)
# Store relevant names (program, path).
scalene.program_being_profiled = os.path.abspath(program_being_profiled)
scalene.program_path = os.path.dirname(scalene.program_being_profiled)
Scalene.program_being_profiled = os.path.abspath(program_being_profiled)
Scalene.program_path = os.path.dirname(Scalene.program_being_profiled)
# Set up the signal handler to handle periodic timer interrupts (for CPU).
signal.signal(scalene.cpu_signal, self.cpu_signal_handler)
signal.signal(Scalene.cpu_signal, self.cpu_signal_handler)
# Set up the signal handler to handle malloc interrupts (for memory allocations).
signal.signal(scalene.malloc_signal, self.malloc_signal_handler)
signal.signal(scalene.free_signal, self.free_signal_handler)
signal.signal(Scalene.malloc_signal, self.malloc_signal_handler)
signal.signal(Scalene.free_signal, self.free_signal_handler)
# Turn on the CPU profiling timer to run every signal_interval seconds.
signal.setitimer(signal.ITIMER_PROF, self.signal_interval, self.signal_interval)
scalene.last_signal_time = scalene.gettime()
Scalene.last_signal_time = Scalene.gettime()


@staticmethod
def cpu_signal_handler(sig, frame):
def cpu_signal_handler(_, frame):
"""Handle interrupts for CPU profiling."""
# Record how long it has been since we received a timer
# before. See the logic below.
now = scalene.gettime()
elapsed_since_last_signal = now - scalene.last_signal_time
now = Scalene.gettime()
elapsed_since_last_signal = now - Scalene.last_signal_time
fname = frame.f_code.co_filename
# Record samples only for files we care about.
if (len(fname)) == 0:
# 'eval/compile' gives no f_code.co_filename.
# We have to look back into the outer frame in order to check the co_filename.
fname = frame.f_back.f_code.co_filename
if not scalene.should_trace(fname):
if not Scalene.should_trace(fname):
return
# Here we take advantage of an apparent limitation of Python:
# it only delivers signals after the interpreter has given up
# control. This seems to mean that sampling is limited to code
# running purely in the interpreter, and in fact, that was a limitation
# of the first version of Scalene.
#
# However, it is in fact a way to separate out time spent in
# (cf. https://docs.python.org/3.9/library/signal.html#execution-of-python-signal-handlers)
#
# However, we can turn lemons into lemonade. This "problem" is in fact
# an effective way to separate out time spent in
# Python vs. time spent in native code "for free"! If we get
# the signal immediately, we must be running in the
# interpreter; if the signal's delivery is delayed, it's
# because we are running code OUTSIDE the interpreter, e.g.,
# interpreter. On the other hand, if it was delayed, that means
# we are running code OUTSIDE the interpreter, e.g.,
# native code (be it inside of Python or in a library). We
# account for this time by tracking the elapsed (process) time
# and compare it to the interval, and add any computed delay
# (as if it were sampled) to the C counter.
c_time = elapsed_since_last_signal - scalene.signal_interval
scalene.cpu_samples_python[fname][frame.f_lineno] += 1
scalene.cpu_samples_c[fname][frame.f_lineno] += c_time / scalene.signal_interval
scalene.total_cpu_samples += elapsed_since_last_signal / scalene.signal_interval
scalene.last_signal_time = scalene.gettime()
c_time = elapsed_since_last_signal - Scalene.signal_interval
Scalene.cpu_samples_python[fname][frame.f_lineno] += 1
Scalene.cpu_samples_c[fname][frame.f_lineno] += c_time / Scalene.signal_interval
Scalene.total_cpu_samples += elapsed_since_last_signal / Scalene.signal_interval
Scalene.last_signal_time = Scalene.gettime()
return

@staticmethod
def malloc_signal_handler(sig, frame):
def malloc_signal_handler(_, frame):
"""Handle interrupts for memory profiling (mallocs)."""
fname = frame.f_code.co_filename
# Record samples only for files we care about.
if not scalene.should_trace(fname):
if not Scalene.should_trace(fname):
return
scalene.malloc_samples[fname][frame.f_lineno] += 1
scalene.total_malloc_samples += 1
Scalene.malloc_samples[fname][frame.f_lineno] += 1
Scalene.total_malloc_samples += 1
return

@staticmethod
def free_signal_handler(sig, frame):
def free_signal_handler(_, frame):
"""Handle interrupts for memory profiling (frees)."""
fname = frame.f_code.co_filename
# Record samples only for files we care about.
if not scalene.should_trace(fname):
if not Scalene.should_trace(fname):
return
scalene.free_samples[fname][frame.f_lineno] += 1
scalene.total_free_samples += 1
Scalene.free_samples[fname][frame.f_lineno] += 1
Scalene.total_free_samples += 1
return

@staticmethod
Expand All @@ -171,22 +171,23 @@ def should_trace(filename):
# Don't profile the profiler.
return False
filename = os.path.abspath(filename)
return scalene.program_path in filename
return Scalene.program_path in filename

@staticmethod
def start():
"""Initiate profiling."""
scalene.elapsed_time = scalene.gettime()
Scalene.elapsed_time = Scalene.gettime()

@staticmethod
def stop():
"""Complete profiling."""
scalene.disable_signals()
scalene.elapsed_time = scalene.gettime() - scalene.elapsed_time
Scalene.disable_signals()
Scalene.elapsed_time = Scalene.gettime() - Scalene.elapsed_time

@staticmethod
@contextmanager
def file_or_stdout(file_name):
"""Returns a file handle for writing; if no argument is passed, returns stdout."""
# from https://stackoverflow.com/questions/9836370/fallback-to-stdout-if-no-file-name-provided
if file_name is None:
yield sys.stdout
Expand All @@ -197,38 +198,38 @@ def file_or_stdout(file_name):
@staticmethod
def output_profiles(output_file):
"""Write the profile out (currently to stdout)."""
total_mem_samples = scalene.total_malloc_samples + scalene.total_free_samples # use + scalene.total_free_samples for churn.
total_mem_samples = Scalene.total_malloc_samples + Scalene.total_free_samples # use + Scalene.total_free_samples for churn.
# Collect all instrumented filenames.
all_instrumented_files = list(set(list(scalene.cpu_samples_python.keys()) + list(scalene.malloc_samples.keys()) + list(scalene.free_samples.keys())))
with scalene.file_or_stdout(output_file) as out:
all_instrumented_files = list(set(list(Scalene.cpu_samples_python.keys()) + list(Scalene.malloc_samples.keys()) + list(Scalene.free_samples.keys())))
with Scalene.file_or_stdout(output_file) as out:
for fname in sorted(all_instrumented_files):
with open(fname, 'r') as fd:
this_cpu_samples = sum(scalene.cpu_samples_c[fname].values()) + sum(scalene.cpu_samples_python[fname].values())
with open(fname, 'r') as source_file:
this_cpu_samples = sum(Scalene.cpu_samples_c[fname].values()) + sum(Scalene.cpu_samples_python[fname].values())
if this_cpu_samples == 0:
continue
percent_cpu_time = 100 * this_cpu_samples * \
scalene.signal_interval / scalene.elapsed_time
print(f"{fname}: % of CPU time = {percent_cpu_time:6.2f}% out of {scalene.elapsed_time:6.2f}s.", file=out)
Scalene.signal_interval / Scalene.elapsed_time
print(f"{fname}: % of CPU time = {percent_cpu_time:6.2f}% out of {Scalene.elapsed_time:6.2f}s.", file=out)
print(f" \t | {'CPU %':9}| {'CPU %':9}| {'Memory (MB) |' if total_mem_samples != 0 else ''}", file=out)
print(f" Line\t | {'(Python)':9}| {'(C)':9}|{' |' if total_mem_samples != 0 else ''} [{fname}]", file=out)
print("-" * 80, file=out)
contents = fd.readlines()
contents = source_file.readlines()
line_no = 1
for line in contents:
line = line[:-1] # Strip newline
n_cpu_samples_c = scalene.cpu_samples_c[fname][line_no]
n_cpu_samples_c = Scalene.cpu_samples_c[fname][line_no]
if n_cpu_samples_c < 0:
# This can happen because of floating point inaccuracies, since we perform subtraction to compute it.
n_cpu_samples_c = 0
n_cpu_samples_python = scalene.cpu_samples_python[fname][line_no]
n_cpu_samples_python = Scalene.cpu_samples_python[fname][line_no]
n_mem_mb = 0
n_mem_samples = scalene.malloc_samples[fname][line_no] - scalene.free_samples[fname][line_no]
n_mem_samples = Scalene.malloc_samples[fname][line_no] - Scalene.free_samples[fname][line_no]
if n_mem_samples != 0:
r = n_mem_samples * scalene.memory_sampling_rate
n_mem_mb = r / (1024 * 1024)
if scalene.total_cpu_samples != 0:
n_cpu_percent_c = n_cpu_samples_c * 100 / scalene.total_cpu_samples
n_cpu_percent_python = n_cpu_samples_python * 100 / scalene.total_cpu_samples
rate = n_mem_samples * Scalene.memory_sampling_rate
n_mem_mb = rate / (1024 * 1024)
if Scalene.total_cpu_samples != 0:
n_cpu_percent_c = n_cpu_samples_c * 100 / Scalene.total_cpu_samples
n_cpu_percent_python = n_cpu_samples_python * 100 / Scalene.total_cpu_samples
# Print results.
n_cpu_percent_c_str = "" if n_cpu_percent_c == 0 else f'{n_cpu_percent_c:6.2f}%'
n_cpu_percent_python_str = "" if n_cpu_percent_python == 0 else f'{n_cpu_percent_python:6.2f}%'
Expand All @@ -245,20 +246,20 @@ def output_profiles(output_file):
def disable_signals():
"""Turn off the profiling signals."""
signal.signal(signal.ITIMER_PROF, signal.SIG_IGN)
signal.signal(scalene.malloc_signal, signal.SIG_IGN)
signal.signal(scalene.free_signal, signal.SIG_IGN)
signal.signal(Scalene.malloc_signal, signal.SIG_IGN)
signal.signal(Scalene.free_signal, signal.SIG_IGN)
signal.setitimer(signal.ITIMER_PROF, 0)

@staticmethod
def exit_handler():
"""When we exit, disable all signals."""
scalene.disable_signals()
Scalene.disable_signals()

@staticmethod
def main():

"""Invokes the profiler from the command-line."""
usage = """Scalene: a high-precision CPU and memory profiler.
https://github.com/emeryberger/scalene
https://github.com/emeryberger/Scalene
for CPU profiling only:
% python -m scalene yourprogram.py
Expand All @@ -267,27 +268,26 @@ def main():
for CPU and memory profiling (Linux):
% LD_PRELOAD=$PWD/libscalene.dylib PYTHONMALLOC=malloc python -m scalene yourprogram.py
"""

parser = argparse.ArgumentParser(prog='scalene', description=usage, formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument('prog',type=str,help='program to be profiled')
parser.add_argument('-o','--outfile',type=str,default=None,help='file to hold profiler output (default: stdout)')
# Parse out all scalene arguments and jam the remaining ones into argv.
parser.add_argument('prog', type=str, help='program to be profiled')
parser.add_argument('-o', '--outfile', type=str, default=None, help='file to hold profiler output (default: stdout)')
# Parse out all Scalene arguments and jam the remaining ones into argv.
# See https://stackoverflow.com/questions/35733262/is-there-any-way-to-instruct-argparse-python-2-7-to-remove-found-arguments-fro
args, left = parser.parse_known_args()
sys.argv = sys.argv[:1]+left
try:
with open(args.prog, 'rb') as fp:
with open(args.prog, 'rb') as prog_being_profiled:
original_path = os.getcwd()
# Read in the code and compile it.
code = compile(fp.read(), args.prog, "exec")
code = compile(prog_being_profiled.read(), args.prog, "exec")
# Push the program's path.
program_path = os.path.dirname(os.path.abspath(args.prog))
sys.path.insert(0, program_path)
os.chdir(program_path)
# Set the file being executed.
the_globals['__file__'] = args.prog
# Start the profiler.
profiler = scalene(os.path.join(program_path, os.path.basename(args.prog)))
profiler = Scalene(os.path.join(program_path, os.path.basename(args.prog)))
try:
profiler.start()
# Run the code being profiled.
Expand All @@ -299,13 +299,13 @@ def main():
if profiler.total_cpu_samples > 0:
profiler.output_profiles(args.outfile)
else:
print("scalene: Program did not run for long enough to profile.")
print("Scalene: Program did not run for long enough to profile.")
except Exception as ex:
template = "scalene: An exception of type {0} occurred. Arguments:\n{1!r}"
template = "Scalene: An exception of type {0} occurred. Arguments:\n{1!r}"
message = template.format(type(ex).__name__, ex.args)
print(message)
print(traceback.format_exc())
except (FileNotFoundError, IOError):
print("scalene: could not find input file.")
scalene.main()
print("Scalene: could not find input file.")

Scalene.main()

0 comments on commit 6ac8f1f

Please sign in to comment.