Skip to content
This repository has been archived by the owner on Jul 31, 2024. It is now read-only.

Python gotcha fixes + python tools submodule #218

Merged
merged 6 commits into from
Jul 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 9 additions & 30 deletions .appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,45 +42,23 @@ environment:
USE_ARCH: "OFF"
USE_PYTHON: "ON"
BUILD_C: "ON"
BUILD_SHARED: "ON"
BUILD_STATIC: "OFF"
BUILD_TESTING: "ON"
LINE_PROFILER: "ON"
- CONDA: 37
CPP: 14
CONFIG: RelWithDebInfo
EXAMPLES: "OFF"
TOOLS: "OFF"
WINSOCK: "OFF"
USE_ARCH: "OFF"
USE_PYTHON: "ON"
BUILD_C: "OFF"
BUILD_STATIC: "ON"
BUILD_TESTING: "OFF"
LINE_PROFILER: "ON"
- PYTHON: 36
- PYTHON: 37
CPP: 14
CONFIG: Debug
EXAMPLES: "ON"
TOOLS: "OFF"
WINSOCK: "OFF"
USE_ARCH: "OFF"
USE_PYTHON: "ON"
BUILD_C: "ON"
BUILD_STATIC: "OFF"
BUILD_TESTING: "OFF"
LINE_PROFILER: "OFF"
- PYTHON: 37
CPP: 17
CONFIG: MinSizeRel
EXAMPLES: "OFF"
TOOLS: "ON"
WINSOCK: "OFF"
USE_ARCH: "OFF"
USE_PYTHON: "ON"
BUILD_C: "OFF"
BUILD_SHARED: "OFF"
BUILD_STATIC: "ON"
BUILD_TESTING: "OFF"
LINE_PROFILER: "ON"
LINE_PROFILER: "OFF"

install:
# Configure environment
Expand Down Expand Up @@ -109,7 +87,7 @@ install:
$env:PYTHON_EXE = "C:\Python$env:PYTHON\python.exe"
}
python -m pip install --disable-pip-version-check --user --upgrade pip wheel
python -m pip install --user cython numpy matplotlib pillow pandas pydot
python -m pip install --user matplotlib numpy pillow cython six pandas pydot multiprocess
} elseif ($env:CONDA) {
if ($env:PLATFORM -eq "x64") {
$env:PATH = "C:\Miniconda$env:CONDA-x64;C:\Miniconda$env:CONDA-x64\Scripts;$env:PATH"
Expand All @@ -122,8 +100,8 @@ install:
$env:PYTHONHOME = "C:\Miniconda$env:CONDA"
$env:PYTHON_EXE = "C:\Miniconda$env:CONDA\python.exe"
}
conda update -y -q -n base conda
conda install -y -c defaults -c conda-forge -q pip setuptools scikit-build numpy matplotlib pillow cython pandas pydot
conda update -y -n base conda
conda install -y -c conda-forge -c defaults matplotlib numpy pillow cython six pandas pydot multiprocess
}
$env:TIMEMORY_FILE_OUTPUT = "OFF"
$env:TIMEMORY_AUTO_OUTPUT = "ON"
Expand All @@ -134,8 +112,9 @@ build_script:
- mkdir build-timemory
- cd build-timemory
- cmake .. -G "%CMAKE_GENERATOR%" -A "%CMAKE_ARCH%"
-DBUILD_SHARED_LIBS=ON
-DBUILD_SHARED_LIBS="%BUILD_SHARED%"
-DBUILD_STATIC_LIBS="%BUILD_STATIC%"
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE="%CONFIG%"
-DCMAKE_CXX_STANDARD="%CPP%"
-DCMAKE_INSTALL_PREFIX=..\install-timemory
Expand Down
2 changes: 1 addition & 1 deletion .readthedocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ conda:

python:
setup_py_install: False
version: 3.6
version: 3.7

build:
image: latest
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.2.1
3.2.2.dev0
3 changes: 3 additions & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ def build_doxy_docs():
"-DENABLE_DOXYGEN_LATEX_DOCS=OFF",
"-DENABLE_DOXYGEN_MAN_DOCS=OFF",
"-DTIMEMORY_BUILD_KOKKOS_TOOLS=ON",
"-DTIMEMORY_BUILD_C=OFF",
"-DTIMEMORY_BUILD_CUDA=OFF",
"-DTIMEMORY_BUILD_FORTRAN=OFF",
_srcdir,
]
)
Expand Down
3 changes: 2 additions & 1 deletion docs/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ channels:
- conda-forge
- defaults
dependencies:
- python=3.6
- python=3.7
- cmake
- curl
- doxygen
Expand All @@ -18,4 +18,5 @@ dependencies:
- setuptools
- breathe
- sphinx-markdown-tables
- docutils

3 changes: 3 additions & 0 deletions docs/getting_started/integrating.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ were not available when timemory was installed.
| `timemory::timemory-compile-extra` | Extra optimization flags |
| `timemory::timemory-compile-options` | Adds the standard set of compiler flags used by timemory |
| `timemory::timemory-compile-timing` | Adds compiler flags which report compilation timing metrics |
| `timemory::timemory-compiler-instrument-compile-options` | INTERFACE |
| `timemory::timemory-compiler-instrument` | Provides library for compiler instrumentation |
| `timemory::timemory-coverage` | Enables code-coverage flags |
| `timemory::timemory-cpu-roofline` | Enables flags and libraries for proper CPU roofline generation |
Expand Down Expand Up @@ -69,6 +70,7 @@ were not available when timemory was installed.
| `timemory::timemory-hidden-visibility` | Adds -fvisibility=hidden compiler flag |
| `timemory::timemory-instrument-functions` | Adds compiler flags to enable compile-time instrumentation |
| `timemory::timemory-leak-sanitizer` | Adds compiler flags to enable leak sanitizer (-fsanitize=leak) |
| `timemory::timemory-libunwind` | Enables libunwind support |
| `timemory::timemory-likwid` | Enables LIKWID support |
| `timemory::timemory-lto` | Adds link-time-optimization flags |
| `timemory::timemory-mallocp-library` | Provides MALLOCP library for tracking memory allocations |
Expand All @@ -79,6 +81,7 @@ were not available when timemory was installed.
| `timemory::timemory-ncclp-library` | Provides NCCLP library for NCCL performance analysis |
| `timemory::timemory-no-mpi-init` | Disables the generation of MPI_Init and MPI_Init_thread symbols |
| `timemory::timemory-null-sanitizer` | Adds compiler flags to enable null sanitizer (-fsanitize=null) |
| `timemory::timemory-nvml` | Enables NVML support (NVIDIA) |
| `timemory::timemory-ompt-library` | Provides OMPT library for OpenMP performance analysis |
| `timemory::timemory-ompt` | Enables OpenMP-tools support |
| `timemory::timemory-papi-static` | Enables PAPI support + links to static library |
Expand Down
4 changes: 4 additions & 0 deletions docs/tools/timemory-compiler-instrument/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ In other words, `"TIMEMORY_FLAT_PROFILE=ON"` will not be applied to the compiler
to enable flat profiling for the compiler instrumentation, set `"TIMEMORY_COMPILER_FLAT_PROFILE=ON"`,
and so on for `"TIMEMORY_COMPILER_OUTPUT_PATH=..."`, etc.

> **NOTE:** Environment variables `TIMEMORY_COMPILER_MAX_DEPTH`, `TIMEMORY_COMPILER_THROTTLE_COUNT`, and `TIMEMORY_COMPILER_THROTTLE_VALUE`
> can be very useful for reducing the overhead of the instrumentation. For more information, see the descriptions provided via
> `timemory-avail -Sd -r 'THROTTLE|MAX_DEPTH'`.

## Build

Timemory provides a `timemory::timemory-compiler-instrument` target in CMake which provides the necessary
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
// SOFTWARE.
//

#include "timemory/components/papi/papi_tuple.hpp"
#include "timemory/library.h"
#include "timemory/timemory.hpp"

Expand Down
3 changes: 3 additions & 0 deletions examples/ex-optional/ex_optional.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,9 @@ int main(int argc, char** argv)
ret_sum += ret * ret;
}

// avoid set but unused warning
if(ret_sum < 0) printf("sum: %li\n", ret_sum);

std::vector<long> ret_reduce;
std::vector<long> ret_send;
for(size_t i = 0; i < fibvalues.size(); ++i)
Expand Down
116 changes: 75 additions & 41 deletions examples/ex-python/ex_bindings.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
#!@PYTHON_EXECUTABLE@


import sys
import numpy
import argparse

use_mpi = True
try:
import mpi4py # noqa: F401
from mpi4py import MPI # noqa: F401
from mpi4py.MPI import Exception as MPIException # noqa: F401
except ImportError:
use_mpi = False
MPIException = RuntimeError
pass

import timemory # noqa: E402
from timemory.profiler import profile # noqa: E402
from timemory.tools import function_wrappers # noqa: E402
import libex_python_bindings as ex_bindings # noqa: E402

if use_mpi:
Expand All @@ -35,37 +38,48 @@ def run_profile(nitr=100, nsize=1000000):

def run_mpi(nitr=100, nsize=1000000):

if size != 2:
return
if use_mpi is False:
_sum = 0.0
for i in range(nitr):
data = numpy.arange(nsize, dtype="i")
_val = numpy.sum(data)
_sum += 1.0 / _val
data = numpy.arange(nsize, dtype=numpy.float64)
_val = numpy.sum(data)
_sum += 1.0 / _val

msgs = set()
for i in range(nitr):
# passing MPI datatypes explicitly
if rank == 0:
data = numpy.arange(nsize, dtype="i")
comm.Send([data, MPI.INT], dest=1, tag=77)
elif rank == 1:
data = numpy.empty(nsize, dtype="i")
comm.Recv([data, MPI.INT], source=0, tag=77)
try:
if rank == 0:
data = numpy.arange(nsize, dtype="i")
comm.Send([data, MPI.INT], dest=1, tag=77)
elif rank == 1:
data = numpy.empty(nsize, dtype="i")
comm.Recv([data, MPI.INT], source=0, tag=77)
except MPIException as e:
msgs.add(f"{e}")

# automatic MPI datatype discovery
if rank == 0:
data = numpy.empty(nsize, dtype=numpy.float64)
comm.Recv(data, source=1, tag=13)
elif rank == 1:
data = numpy.arange(nsize, dtype=numpy.float64)
comm.Send(data, dest=0, tag=13)

try:
if rank == 0 and size == 2:
data = numpy.empty(nsize, dtype=numpy.float64)
comm.Recv(data, source=1, tag=13)
elif rank == 1:
data = numpy.arange(nsize, dtype=numpy.float64)
comm.Send(data, dest=0, tag=13)
except MPIException as e:
msgs.add(f"{e}")

def main(args):
# start MPI wrappers
id = timemory.start_mpip()
for i, itr in enumerate(msgs):
sys.stderr.write("{}: {}\n".format(i, itr))

run_mpi(args.iterations)
ans = run_profile(args.iterations, args.size)

# stop MPI wrappers
timemory.stop_mpip(id)
return ans
def main(args):
# start function wrappers (MPI, OpenMP, etc. if available)
with function_wrappers(*args.profile, nccl=False):
return run_profile(args.iterations, args.size)


if __name__ == "__main__":
Expand All @@ -74,40 +88,60 @@ def main(args):
parser.add_argument(
"-i",
"--iterations",
required=False,
default=100,
type=int,
help="Iterations",
)
parser.add_argument(
"-n",
"--size",
required=False,
default=1000000,
type=int,
help="Array size",
)
parser.add_argument(
"-c",
"--components",
default=[
"wall_clock",
"peak_rss",
"cpu_clock",
"cpu_util",
"thread_cpu_clock",
"thread_cpu_util",
],
type=str,
help="Additional components",
nargs="*",
)
parser.add_argument(
"-p",
"--profile",
default=["mpi", "openmp", "malloc"],
choices=("mpi", "openmp", "malloc", "nccl"),
type=str,
help="Profiling library wrappers to activate",
nargs="*",
)

args = parser.parse_args()
timemory.enable_signal_detection()
timemory.settings.width = 12
timemory.settings.precision = 6
timemory.settings.width = 8
timemory.settings.precision = 2
timemory.settings.scientific = True
timemory.settings.plot_output = True
timemory.settings.dart_output = True
timemory.settings.dart_output = False
timemory.timemory_init([__file__])

with profile(
[
"wall_clock",
"user_clock",
"system_clock",
"cpu_util",
"peak_rss",
"thread_cpu_clock",
"thread_cpu_util",
]
):
@function_wrappers(*args.profile, nccl=False)
def runner(nitr, nsize):
run_mpi(nitr, nsize)

runner(args.iterations, args.size)

with profile(args.components):
ans = main(args)
print("Answer = {}".format(ans))

print("Success! Answer = {}. Finalizing...".format(ans))
timemory.finalize()
print("Python Finished")
Loading