From 605cff94ffaf0a865195f173ecbf088188d1fd15 Mon Sep 17 00:00:00 2001 From: Patrick Flick Date: Tue, 3 Oct 2017 15:46:15 -0400 Subject: [PATCH 1/4] added benchmark executable --- src/CMakeLists.txt | 3 ++ src/benchmark_bw.cpp | 65 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 src/benchmark_bw.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ff34356..086d79c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -5,3 +5,6 @@ project(mxx-bm) add_executable(mxx-bm-vote-off vote_off.cpp) target_link_libraries(mxx-bm-vote-off ${MPI_LIBRARIES}) + +add_executable(mxx-benchmark benchmark_bw.cpp) +target_link_libraries(mxx-benchmark ${MPI_LIBRARIES}) diff --git a/src/benchmark_bw.cpp b/src/benchmark_bw.cpp new file mode 100644 index 0000000..5dd1876 --- /dev/null +++ b/src/benchmark_bw.cpp @@ -0,0 +1,65 @@ + +#include +#include +#include +#include + +#include + +std::string exec_name; + +void print_usage() { + std::cerr << "Usage: " << exec_name << " " << std::endl; + std::cerr << "where" << std::endl; + std::cerr << " Number of nodes to vote off." << std::endl; + std::cerr << " Filename for the new nodefile, output by this program." << std::endl; +} + +int main(int argc, char* argv[]) { + mxx::env e(argc, argv); + mxx::comm comm; + + // print out node and rank distribution + mxx::print_node_distribution(comm); + + // create shared-mem MPI+MPI hybrid communicator + mxx::hybrid_comm hc(comm); + + // assert same number processors per node + int proc_per_node = hc.local.size(); + if (!mxx::all_same(proc_per_node, comm)) { + std::cerr << "Error: this benchmark assumes the same number of processors per node" << std::endl; + MPI_Abort(comm, -1); + } + + // assert we have an even number of nodes + int num_nodes = hc.num_nodes(); + if (num_nodes % 2 != 0) { + std::cerr << "Error: this benchmark assumes an even number of nodes" << std::endl; + MPI_Abort(comm, -1); + } + + /* + // parse input arguments + exec_name = argv[0]; + if (argc < 3) { + print_usage(); + MPI_Abort(comm, -1); + } + int n_vote_off = atoi(argv[1]); + std::string output_nodefile(argv[2]); + if (n_vote_off < 0) { + print_usage(); + MPI_Abort(comm, -1); + } + */ + + + std::vector bw_row = mxx::pairwise_bw_matrix(hc); + + mxx::print_bw_matrix_stats(hc, bw_row); + + mxx::bw_all2all(hc.global, hc.local); + + return 0; +} From 492ab47be718a635faa4b46584d3ef4c442b7364 Mon Sep 17 00:00:00 2001 From: Patrick Flick Date: Fri, 15 Dec 2017 14:29:15 -0500 Subject: [PATCH 2/4] benchmarks --- include/mxx/benchmark.hpp | 487 ++++++++++++++++++++++++++----------- include/mxx/samplesort.hpp | 7 +- include/mxx/stream.hpp | 8 + src/CMakeLists.txt | 16 +- src/benchmark_a2a.cpp | 89 +++++++ src/benchmark_bw.cpp | 65 ----- src/benchmark_p2p_bw.cpp | 82 +++++++ src/benchmark_sort.cpp | 54 ++++ src/pbs_run.sh | 24 ++ src/vote_off.cpp | 2 +- test/CMakeLists.txt | 3 - test/benchmarks.cpp | 22 -- 12 files changed, 621 insertions(+), 238 deletions(-) create mode 100644 src/benchmark_a2a.cpp delete mode 100644 src/benchmark_bw.cpp create mode 100644 src/benchmark_p2p_bw.cpp create mode 100644 src/benchmark_sort.cpp create mode 100644 src/pbs_run.sh delete mode 100644 test/benchmarks.cpp diff --git a/include/mxx/benchmark.hpp b/include/mxx/benchmark.hpp index a621d5b..23de508 100644 --- a/include/mxx/benchmark.hpp +++ b/include/mxx/benchmark.hpp @@ -79,6 +79,21 @@ class hybrid_comm { return result; } + // split the local communicators in the same way on all processes + // This splits the `local` and `global` communicator and leaves the + // `local_master` as is, assuming that `color` is identical for all + // processes in a `local_master`. + hybrid_comm split_local(int color) const { + // split the processes but assert that each node is only in + // one process + MXX_ASSERT(mxx::all_same(color, local_master)); + hybrid_comm result; + result.local = local.split(color); + result.local_master = local_master.copy(); + result.global = global.split(color); + return result; + } + // move constructor moves all members hybrid_comm(hybrid_comm&& o) = default; @@ -100,6 +115,26 @@ class hybrid_comm { global.barrier(); } + // executes only with `ppn` processes per node + template + void with_ppn(int ppn, Func func) const { + MXX_ASSERT(mxx::all_same(ppn, global)); + MXX_ASSERT(mxx::all_of(ppn <= local.size())); + + int participate = local.rank() < ppn; + if (mxx::all_same(participate, global)) { + if (participate) { + func(*this); + } + } else { + hybrid_comm hc(split_local(participate)); + if (participate) { + func(hc); + } + } + global.barrier(); + } + int num_nodes() const { return global.size() / local.size(); } @@ -152,25 +187,27 @@ std::pair bw_simplex(const mxx::comm& c, int partner, size_t n) { return std::pair(bw1, bw2); } +/** + * Collectively times a duplex sendrecv with a given partner rank. + */ template double time_duplex(const mxx::comm& c, int partner, const std::vector& sendvec, std::vector& recvvec) { MXX_ASSERT(sendvec.size() == recvvec.size()); - size_t n = sendvec.size(); + mxx::datatype dt = mxx::get_datatype(); c.barrier(); auto start = std::chrono::steady_clock::now(); - mxx::datatype dt = mxx::get_datatype(); // sendrecv for full duplex MPI_Sendrecv(const_cast(&sendvec[0]), n, dt.type(), partner, 0, &recvvec[0], n, dt.type(), partner, 0, c, MPI_STATUS_IGNORE); - //c.barrier(); auto end = std::chrono::steady_clock::now(); double time_p2p = std::chrono::duration_cast(end - start).count(); - - // calculate duplex bandwidth - //double bw = 2*8*(double)n*sizeof(size_t)/time_p2p/1000.0; return time_p2p; } +/** + * Times duplex sendrecv with a partner process and compute the + * duplex bandwidth per node. + */ template double bw_duplex_per_node(const mxx::comm& c, int partner, const mxx::comm& smc, const std::vector& sendvec, std::vector& recvvec) { size_t n = sendvec.size(); @@ -181,82 +218,133 @@ double bw_duplex_per_node(const mxx::comm& c, int partner, const mxx::comm& smc, } -void bm(const mxx::comm& c, int partner, const mxx::comm& smc) { - size_t n = 10000000; - n /= smc.size(); - size_t MB = (n*sizeof(size_t))/1024/1024; - std::vector vec(n); - std::vector result(n); - std::generate(vec.begin(), vec.end(), std::rand); - std::string node_name = get_processor_name(); - - double timed = time_duplex(c, partner, vec, result); - double bw_send, bw_recv; - std::tie(bw_send, bw_recv) = bw_simplex(c, partner, n); - - // calculate BW - double maxtime_duplex = mxx::allreduce(timed, mxx::max(), smc); - double sum_bwd = 2*8*n*sizeof(size_t)*smc.size()/maxtime_duplex/1000.0; - double sum_bw_send = mxx::allreduce(bw_send, std::plus(), smc); - double sum_bw_recv = mxx::allreduce(bw_recv, std::plus(), smc); - c.with_subset(smc.rank() == 0, [&](const mxx::comm& subcomm) { - mxx::sync_cout(subcomm) << "[" << node_name << "]: Node BW Duplex = " - << sum_bwd << " Gb/s (Simplex " << sum_bw_send << " Gb/s send, " << sum_bw_recv - << " Gb/s recv) [" << MB*smc.size() << " MiB]" << std::endl; + +/** + * @brief Executes the given function f(i) for each pair of processes where + * there are `size` processes and this process has rank `rank`. + * Processes are paired such that in every iteration + * f(i) is called on rank `rank` iff simultanously f(rank) is called on + * rank `i`. + * + * The function f(i) is called once for all ranks 0,...,size-1, + * excluding i = rank. + * + * If in any iteration, no partner is found, f(-1) is called. This happens + * if for example the number of processes `size` is an odd number. + * + * @param rank The rank of this process + * @param size The number of total processes + * @param f The function to be called for each paired rank. + */ +template +void pairwise_func(int rank, int size, F f) { + // pair up blocks of size 2^i in iteration i + // for each pair of blocks, pair up all combinations via a linear offset + for (int dist = 1; dist < size; dist <<= 1) { + int partner_block; + if ((rank / dist) % 2 == 0) { + // to left block + partner_block = (rank/dist + 1)*dist; + } else { + partner_block = (rank/dist - 1)*dist; + } + int inblock_idx = rank % dist; + for (int i = 0; i < dist; ++i) { + int partner; + if (partner_block >= rank) + partner = partner_block + (inblock_idx + i) % dist; + else + partner = partner_block + (inblock_idx + (dist - i)) % dist; + if (partner < size) { + f(partner); + } else { + // this rank doesn't have a partner in this iteration + f(-1); + } + } + } +} + + +/** + * @brief Calls f(c, partner) for each pair of processes and f(-1) if there is + * no partner in any given round. (see `pairwise_func(int,int,F)`) + */ +template +void pairwise_func(const mxx::comm& c, F f) { + pairwise_func(c.rank(), c.size(), [&](int partner){ f(c, partner);}); +} + +// called once for each pair of nodes +// if there are multiple processes per node, the processes are matched +// up by their local ranks +// The given function is called using global ranks as the partner index +// If in any iteration there is no partner node, this calls f(-1) +template +void pairwise_nodes_func(const hybrid_comm& hc, F f) { + MXX_ASSERT(mxx::all_same(hc.local.size())); + pairwise_func(hc.node_rank(), hc.num_nodes(), [&](int partner_node) { + if (partner_node >= 0) { + int partner_rank = partner_node * hc.local.size() + hc.local.rank(); + f(partner_rank); + } else { + f(-1); + } }); } // returns a row of pairwise bw benchmark results on each process where smc.rank() == 0 -std::vector pairwise_bw_matrix(const hybrid_comm& hc) { - int num_nodes = hc.num_nodes(); - size_t n = 1000000; +std::vector pairwise_bw_matrix(const hybrid_comm& hc, size_t msg_size) { + size_t n = msg_size / 8; std::vector vec(n); std::vector result(n); std::generate(vec.begin(), vec.end(), std::rand); - n /= hc.local.size(); + // nodes get partnered - int node_idx = hc.node_rank(); std::vector bw_row; if (hc.is_local_master()) - bw_row.resize(num_nodes); - for (int dist = 1; dist < num_nodes; dist <<= 1) { - if (hc.global.rank() == 0) { - std::cout << "Benchmarking p2p duplex for dist = " << dist << std::endl; - } - int partner_block; - if ((node_idx / dist) % 2 == 0) { - // to left block - partner_block = (node_idx/dist + 1)*dist; - } else { - partner_block = (node_idx/dist - 1)*dist; - } - int inblock_idx = node_idx % dist; - for (int i = 0; i < dist; ++i) { - int partner_node; - if (partner_block >= node_idx) - partner_node = partner_block + (inblock_idx + i) % dist; - else - partner_node = partner_block + (inblock_idx + (dist - i)) % dist; - int partner = partner_node*hc.local.size() + hc.local.rank(); - // benchmark duplex with partner - if (partner_node < num_nodes) { - double bw = bw_duplex_per_node(hc.global, partner, hc.local, vec, result); - if (hc.is_local_master()) { - bw_row[partner_node] = bw; - } - } else { - // if this node doesn't participate in the benchmarking, it - // still needs to call the barrier that is otherwise called - // inside the time_duplex function - hc.global.barrier(); + bw_row.resize(hc.num_nodes()); + + pairwise_nodes_func(hc, [&](int partner){ + if (partner >= 0) { + double bw = bw_duplex_per_node(hc.global, partner, hc.local, vec, result); + if (hc.is_local_master()) { + int partner_node = partner / hc.local.size(); + bw_row[partner_node] = bw; } + } else { + // if this node doesn't participate in the benchmarking, it + // still needs to call the barrier that is otherwise called + // inside the time_duplex function + hc.global.barrier(); } - } + }); + return bw_row; } + +void save_matrix_pernode(const hybrid_comm& hc, const std::string& filename, const std::vector& values) { + std::ofstream of; + if (hc.global.rank() == 0) { + of.open(filename); + } + hc.with_local_master([&](){ + std::stringstream ss; + ss << hc.node_name << ","; + ss << std::fixed << std::setprecision(2); + for (size_t i = 0; i < values.size(); ++i) { + ss << values[i]; + if (i+1 < values.size()) + ss << ","; + } + // create sync stream + mxx::sync_os(hc.local_master, of) << ss.str() << std::endl; + }); +} + void print_bw_matrix_stats(const hybrid_comm& hc, const std::vector& bw_row) { // print matrix hc.with_local_master([&](){ @@ -402,10 +490,199 @@ void write_new_nodefile(const hybrid_comm& hc, bool participate, const std::stri } +// benchmark all2all function that outputs to csv file: +// n (in bytes), p, nnodes, m, ppn, min_time, avg_time, max_time + +// m: message size, total data send from one process: m*comm.size() +// n = m*p^2 +// returns the time taken by this process for the all2all in microseconds +template +double timed_all2all_impl(const mxx::comm& c, size_t m) { + // create arrays and generate random data + std::vector els(m*c.size()); + //std::generate(els.begin(), els.end(), [](){ return std::rand() % 255; }); + std::vector rcv(m*c.size()); + mxx::datatype dt = mxx::get_datatype(); + + /* run the actual MPI_Alltoall and time it */ + c.barrier(); + auto start = std::chrono::steady_clock::now(); + //MPI_Alltoall(&els[0], m, dt.type(), &rcv[0], m, dt.type(), c); + mxx::all2all(&els[0], m, &rcv[0], c); + auto end = std::chrono::steady_clock::now(); + + /* return the time taken on this process in microseconds */ + double time_all2all = std::chrono::duration_cast(end - start).count(); + return time_all2all; +} + +double timed_all2all(const mxx::comm& c, size_t m) { + if (m >= 8 && m % 8 == 0) { + return timed_all2all_impl(c, m/8); + } else if (m >= 4 && m % 4 == 0) { + return timed_all2all_impl(c, m/4); + } else { + return timed_all2all_impl(c, m); + } +} + +void bm_all2all(const mxx::hybrid_comm& hc, std::ostream& os, size_t max_size_per_node) { + const mxx::comm& c = hc.global; + assert(sizeof(size_t) == 8); + if (hc.global.rank() == 0) { + std::cerr << "bm_all2all with np = " << hc.global.size() << ", ppn = " << hc.local.size() << ", nnodes = " << hc.num_nodes() << std::endl; + } + for (size_t m = 1; (m*c.size()*hc.local.size()) <= max_size_per_node; m <<= 1) { + if (m >= std::numeric_limits::max()) + break; + size_t np = m*c.size(); // data per process + size_t n = np*c.size(); // total number of bytes globally + + if (hc.global.rank() == 0) { + std::cerr << "bm_all2all m = " << m << ", n/p = " << np << ", n = " << n << ", n/node = " << (m*c.size()*hc.local.size()) << std::endl; + } + + double time_all2all = timed_all2all(c, m); + + /* get min, max and average */ + double max_time = mxx::allreduce(time_all2all, mxx::max(), c); + double min_time = mxx::allreduce(time_all2all, mxx::min(), c); + double avg_time = mxx::allreduce(time_all2all, c) / c.size(); + + // p, q, ppn, m, n, time_min, time_avg, time_max + if (c.rank() == 0) { + os << c.size() << "," << hc.num_nodes() << "," << hc.local.size() << "," << m << "," << n << "," << min_time << "," << avg_time << "," << max_time << std::endl; + } + } +} + +#if 0 +template +void bm_coll_function(const mxx::hybrid_comm& hc, std::ostream& os, F f, size_t max_size_per_node) { + const mxx::comm& c = hc.global; + for (size_t m = 1; (m*c.size()*hc.local.size()) <= max_size_per_node; m <<= 1) { + size_t np = m*c.size(); // max data per process + size_t n = np*c.size(); // max total number of bytes globally (all2all and allgather) + + // execute the timed function + double val = f(c, m); + + /* get min, max and average */ + double max_val = mxx::allreduce(val, mxx::max(), comm); + double min_val = mxx::allreduce(val, mxx::min(), comm); + double avg_val = mxx::allreduce(val, comm) / comm.size(); + + // p, q, ppn, m, n, time_min, time_avg, time_max + if (c.rank() == 0) { + os << c.size() << "," << hc.num_nodes() << "," << hc.local.size() << "," << m << "," << n << "," << min_time << "," << avg_time << "," << max_time << std::endl; + } + } +} +#endif + +// next smaller power of 2 +uint32_t flp2 (uint32_t x) +{ + x = x | (x >> 1); + x = x | (x >> 2); + x = x | (x >> 4); + x = x | (x >> 8); + x = x | (x >> 16); + return x - (x >> 1); +} + + +/** + * @brief Executes the given function `f(hybrid_comm hc_ppn)` for all powers of + * 2 between the total ppn downto 1. + * + * @param hc Hybrid communicator which is split for all `ppn` values. + * @param f The function called for each `ppn` value. + */ +template +void forall_p2_ppn(const mxx::hybrid_comm& hc, F f) { + int ppn = hc.local.size(); + MXX_ASSERT(mxx::all_same(ppn, hc.global)); + for (int q = ppn; q >= 1; q = flp2(q-1)) { + // split by ppn + hc.with_ppn(q, std::forward(f)); + } +} + +template +void forall_p2_nnodes_and_ppn(const mxx::hybrid_comm& hc, F f) { + int ppn = hc.local.size(); + MXX_ASSERT(mxx::all_same(ppn, hc.global)); + + // in decreasing powers of two + for (int nn = hc.num_nodes(); nn >= 2; nn = flp2(nn-1)) { + // split by nodes + hc.with_nodes(hc.node_rank() < nn, [&](const mxx::hybrid_comm& hcn) { + for (int q = ppn; q >= 1; q = flp2(q-1)) { + // split by ppn + hcn.with_ppn(q, std::forward(f)); + } + }); + } +} + +void bm_all2all_forall_q(const mxx::hybrid_comm& hc, std::ostream& os, size_t max_size_per_node) { + int ppn = hc.local.size(); + MXX_ASSERT(mxx::all_same(ppn, hc.global)); + + for (int q = ppn; q >= 1; q = flp2(q)) { + hc.with_ppn(q, [&](const mxx::hybrid_comm& hcq) { + bm_all2all(hcq, os, max_size_per_node); + }); + } +} + +// TODO: this isn't working yet +double ping(const mxx::comm& c, int partner, int rounds = 100) { + // pairwise ping measurement with this process and process of rank `partner` + + int msg = 0; + std::chrono::steady_clock::time_point rcv_tp, send_tp; + MPI_Status st; + + if (c.rank() < partner) { + // i initiate first ping + send_tp = std::chrono::steady_clock::now(); + MPI_Send(&msg, 1, MPI_INT, partner, 0, c); + } else { + //MPI_Recv(&msg, 1, MPI_INT, partner, MPI_ANY_TAG, c, &st); + } + + int ping_cnt = 0; + double ping_sum = 0.; + for (int i = 0; i <= rounds; ++i) { + MPI_Recv(&msg, 1, MPI_INT, partner, MPI_ANY_TAG, c, &st); + rcv_tp = std::chrono::steady_clock::now(); + int t = st.MPI_TAG; + if (t > 0) { + // save time diff + double time_diff = std::chrono::duration_cast(rcv_tp - send_tp).count(); + ping_cnt += 1; + ping_sum += time_diff; + } + if (t < 2*rounds) { + send_tp = std::chrono::steady_clock::now(); + MPI_Send(&msg, 1, MPI_INT, partner, t+1, c); + } + if (t+1 == 2*rounds) + break; + } + assert(ping_cnt == rounds); + return ping_sum / ping_cnt; +} + + + void bw_all2all(const mxx::comm& c, const mxx::comm& smc) { // message size per target processor for (int k = 1; k <= 16; k <<= 1) { - int m = k*1024; + size_t n = k*1024*1024; + int m = n/c.size(); std::vector els(m*c.size()); std::generate(els.begin(), els.end(), std::rand); std::vector rcv(m*c.size()); @@ -418,11 +695,11 @@ void bw_all2all(const mxx::comm& c, const mxx::comm& smc) { double time_all2all = std::chrono::duration_cast(end - start).count(); double max_time = mxx::allreduce(time_all2all, mxx::max(), c); double min_time = mxx::allreduce(time_all2all, mxx::min(), c); - size_t bits_sendrecv = 2*8*sizeof(size_t)*m*(c.size() - smc.size()); + size_t bits_sendrecv = 2*8*sizeof(size_t)*smc.size()*m*(c.size() - smc.size()); // bandwidth in Gb/s double bw = bits_sendrecv / max_time / 1000.0; if (c.rank() == 0) { - std::cout << "All2all bandwidth: " << bw << " Gb/s [min=" << min_time/1000.0 << " ms, max=" << max_time/1000.0 << " ms, local_size=" << bits_sendrecv/1024/1024 << " MiB]" << std::endl; + std::cout << "All2all bandwidth (per node) " << bw << " Gb/s [min=" << min_time/1000.0 << " ms, max=" << max_time/1000.0 << " ms, local_size=" << bits_sendrecv/1024/1024 << " MiB]" << std::endl; } } } @@ -516,80 +793,6 @@ void bw_all2all_unaligned_char(const mxx::comm& c, const mxx::comm& smc, bool re } } -void benchmark_nodes_bw_p2p(const mxx::comm& comm = mxx::comm()) { - // pair with another node - hybrid_comm hc(comm); - int proc_per_node = hc.local.size(); - - // assert same number processors per node - if (!mxx::all_same(proc_per_node, comm)) { - std::cerr << "Error: this benchmark assumes the same number of processors per node" << std::endl; - MPI_Abort(comm, -1); - } - - int num_nodes = hc.num_nodes(); - - if (num_nodes % 2 != 0) { - std::cerr << "Error: this benchmark assumes an even number of nodes" << std::endl; - MPI_Abort(comm, -1); - } - - //for (int local_p = 1; local_p <= proc_per_node; local_p <<= 1) { - /* - int local_p = sm_comm.size(); - bool participate = true; //sm_comm.rank() < local_p; - mxx::comm c = comm.split(participate, node_idx*local_p + sm_comm.rank()); - */ - // mxx::comm smc = sm_comm.split(participate); - - if (true) { - std::vector bw_row = pairwise_bw_matrix(hc); - print_bw_matrix_stats(hc, bw_row); - bool part = vote_off(hc, 4, bw_row); // TODO: process result - if (hc.global.rank() == 0) - std::cout << "Before vote off: " << std::endl; - bw_all2all(hc.global, hc.local); - if (hc.global.rank() == 0) - std::cout << "After vote off: " << std::endl; - hc.with_nodes(part, [&](const hybrid_comm& subhc) { - bw_all2all(subhc.global, subhc.local); - bw_all2all_char(subhc.global, subhc.local); - bw_all2all_unaligned_char(subhc.global, subhc.local, false); - if (subhc.global.rank() == 0) - std::cout << "== With re-alignment" << std::endl; - bw_all2all_unaligned_char(subhc.global, subhc.local, true); - }); - write_new_nodefile(hc, part, "blah.nodes"); - - /* - if (c.rank() == 0) { - std::cout << "Running with " << local_p << "/" << proc_per_node << " processes per node" << std::endl; - } - MXX_ASSERT(c.size() % 2 == 0); - if (local_p > 1) { - // intranode BW test - if (c.rank() == 0) - std::cout << "Intranode BW test" << std::endl; - int partner = (c.rank() % 2 == 0) ? c.rank() + 1 : c.rank() - 1; - bm(c, partner, smc); - } - // 1) closest neighbor - if (c.rank() == 0) - std::cout << "Closest Neighbor BW test" << std::endl; - int partner = (node_idx % 2 == 0) ? c.rank() + local_p : c.rank() - local_p; - bm(c, partner, smc); - - // 2) furthest neighbor - if (c.rank() == 0) - std::cout << "Furthest Neighbor BW test" << std::endl; - partner = (c.rank() < c.size()/2) ? c.rank() + c.size()/2 : c.rank() - c.size()/2; - bm(c, partner, smc); - */ - } - //} - // wait for other processes to finish the benchmarking - //comm.barrier(); -} } // namespace mxx diff --git a/include/mxx/samplesort.hpp b/include/mxx/samplesort.hpp index 59d850f..3df9aae 100644 --- a/include/mxx/samplesort.hpp +++ b/include/mxx/samplesort.hpp @@ -47,8 +47,11 @@ #endif -#define SS_ENABLE_TIMER 0 -#if SS_ENABLE_TIMER +#ifndef MXX_SAMPLESORT_TIMER +#define MXX_SAMPLESORT_TIMER 0 +#endif + +#if MXX_SAMPLESORT_TIMER #include "timer.hpp" #define SS_TIMER_START(comm) mxx::section_timer timer(std::cerr, comm, 0); #define SS_TIMER_END_SECTION(str) timer.end_section(str); diff --git a/include/mxx/stream.hpp b/include/mxx/stream.hpp index 9104bf4..c57391f 100644 --- a/include/mxx/stream.hpp +++ b/include/mxx/stream.hpp @@ -100,6 +100,14 @@ inline sync_ostream sync_cerr(const mxx::comm& comm, int root = 0) { return comm.rank() == root ? sync_ostream(comm, root, std::cerr) : sync_ostream(comm, root); } +template +inline sync_ostream sync_os(const mxx::comm& comm, base_stream& bs, int root = 0) { + return comm.rank() == root ? sync_ostream(comm, root, bs) : sync_ostream(comm, root); +}; + +// TODO: a sync ofstream +// TODO: a sync cout/cerr which writes [rank] before every line/msg + } // namespace mxx diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 086d79c..123d76d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 2.6) +cmake_minimum_required(VERSION 2.8) # project settings project(mxx-bm) @@ -6,5 +6,15 @@ project(mxx-bm) add_executable(mxx-bm-vote-off vote_off.cpp) target_link_libraries(mxx-bm-vote-off ${MPI_LIBRARIES}) -add_executable(mxx-benchmark benchmark_bw.cpp) -target_link_libraries(mxx-benchmark ${MPI_LIBRARIES}) +# benchmark p2p bandwidth +add_executable(mxx-benchmark-p2p-bw benchmark_p2p_bw.cpp) +target_link_libraries(mxx-benchmark-p2p-bw ${MPI_LIBRARIES}) + +# benchmark all2all +add_executable(mxx-benchmark-a2a benchmark_a2a.cpp) +target_link_libraries(mxx-benchmark-a2a ${MPI_LIBRARIES}) + +# benchmark parallel sorting +add_executable(mxx-sort-benchmark benchmark_sort.cpp) +target_compile_definitions(mxx-sort-benchmark PUBLIC -DMXX_SAMPLESORT_TIMER=1) +target_link_libraries(mxx-sort-benchmark ${MPI_LIBRARIES}) diff --git a/src/benchmark_a2a.cpp b/src/benchmark_a2a.cpp new file mode 100644 index 0000000..b9342f9 --- /dev/null +++ b/src/benchmark_a2a.cpp @@ -0,0 +1,89 @@ +#include +#include +#include + +#include +#include +#include +#include + +#include + +std::string exec_name; + +void print_usage() { + std::cerr << "Usage: " << exec_name << " -m " << std::endl; + std::cerr << "where" << std::endl; + std::cerr << " (optional) Filename for the benchmark results (default: 'all2all_benchmark.csv')." << std::endl; + std::cerr << " -m (optional) Maximum message space per node (in GB) (default: 32)." << std::endl; +} + +int main(int argc, char* argv[]) { + mxx::env e(argc, argv); + mxx::comm comm; + + // print out node and rank distribution + mxx::print_node_distribution(comm); + + // create shared-mem MPI+MPI hybrid communicator + mxx::hybrid_comm hc(comm); + + // assert same number processors per node + int proc_per_node = hc.local.size(); + if (!mxx::all_same(proc_per_node, comm)) { + std::cerr << "Error: this benchmark assumes the same number of processors per node" << std::endl; + MPI_Abort(comm, -1); + } + + // assert we have an even number of nodes + int num_nodes = hc.num_nodes(); + if (num_nodes > 1 && num_nodes % 2 != 0) { + std::cerr << "Error: this benchmark assumes an even number of nodes" << std::endl; + MPI_Abort(comm, -1); + } + + // default args + size_t mem_per_node_gb = 32; // setting the max experiment at 32 GB per node + std::string filename = "all2all_benchmark.csv"; + + // parse input arguments + exec_name = argv[0]; + argv++; argc--; + if (argc >= 2) { + std::string x(argv[0]); + if (x == "-m") { + mem_per_node_gb = atoi(argv[1]); + argv+=2; argc-=2; + } + if (x != "-m" || mem_per_node_gb > 1024 || mem_per_node_gb == 0) { + print_usage(); + MPI_Abort(comm, -1); + } + } + if (argc > 0) { + filename = argv[0]; + argv++; argc--; + } + if (argc > 0) { + print_usage(); + MPI_Abort(comm, -1); + } + + MXX_ASSERT(mxx::all_same(mem_per_node_gb, comm)); + + // benchmark all: + std::ofstream of; + if (hc.global.rank() == 0) { + of.open(filename); + of << "p,nnodes,q,m,n,min,avg,max" << std::endl; + } + + // 32 GB/node max? + size_t mempernode = mem_per_node_gb << 30; + + mxx::forall_p2_nnodes_and_ppn(hc, [&](const mxx::hybrid_comm& hc){ + bm_all2all(hc, of, mempernode); + }); + + return 0; +} diff --git a/src/benchmark_bw.cpp b/src/benchmark_bw.cpp deleted file mode 100644 index 5dd1876..0000000 --- a/src/benchmark_bw.cpp +++ /dev/null @@ -1,65 +0,0 @@ - -#include -#include -#include -#include - -#include - -std::string exec_name; - -void print_usage() { - std::cerr << "Usage: " << exec_name << " " << std::endl; - std::cerr << "where" << std::endl; - std::cerr << " Number of nodes to vote off." << std::endl; - std::cerr << " Filename for the new nodefile, output by this program." << std::endl; -} - -int main(int argc, char* argv[]) { - mxx::env e(argc, argv); - mxx::comm comm; - - // print out node and rank distribution - mxx::print_node_distribution(comm); - - // create shared-mem MPI+MPI hybrid communicator - mxx::hybrid_comm hc(comm); - - // assert same number processors per node - int proc_per_node = hc.local.size(); - if (!mxx::all_same(proc_per_node, comm)) { - std::cerr << "Error: this benchmark assumes the same number of processors per node" << std::endl; - MPI_Abort(comm, -1); - } - - // assert we have an even number of nodes - int num_nodes = hc.num_nodes(); - if (num_nodes % 2 != 0) { - std::cerr << "Error: this benchmark assumes an even number of nodes" << std::endl; - MPI_Abort(comm, -1); - } - - /* - // parse input arguments - exec_name = argv[0]; - if (argc < 3) { - print_usage(); - MPI_Abort(comm, -1); - } - int n_vote_off = atoi(argv[1]); - std::string output_nodefile(argv[2]); - if (n_vote_off < 0) { - print_usage(); - MPI_Abort(comm, -1); - } - */ - - - std::vector bw_row = mxx::pairwise_bw_matrix(hc); - - mxx::print_bw_matrix_stats(hc, bw_row); - - mxx::bw_all2all(hc.global, hc.local); - - return 0; -} diff --git a/src/benchmark_p2p_bw.cpp b/src/benchmark_p2p_bw.cpp new file mode 100644 index 0000000..6339ed5 --- /dev/null +++ b/src/benchmark_p2p_bw.cpp @@ -0,0 +1,82 @@ +#include +#include +#include + +#include +#include +#include +#include + +#include + +std::string exec_name; + +void print_usage() { + std::cerr << "Usage: " << exec_name << " -m " << std::endl; + std::cerr << "where" << std::endl; + std::cerr << " (optional) Filename for the pairwise bandwidth matrix (default: 'p2p_bw.csv')." << std::endl; + std::cerr << " -m (optional) Message size for each process pair in kilo bytes. (default: 131072 (128 MB))." << std::endl; +} + +int main(int argc, char* argv[]) { + mxx::env e(argc, argv); + mxx::comm comm; + + // print out node and rank distribution + mxx::print_node_distribution(comm); + + // create shared-mem MPI+MPI hybrid communicator + mxx::hybrid_comm hc(comm); + + // assert same number processors per node + int proc_per_node = hc.local.size(); + if (!mxx::all_same(proc_per_node, comm)) { + std::cerr << "Error: this benchmark assumes the same number of processors per node" << std::endl; + MPI_Abort(comm, -1); + } + + // assert we have an even number of nodes + int num_nodes = hc.num_nodes(); + if (num_nodes > 1 && num_nodes % 2 != 0) { + std::cerr << "Error: this benchmark assumes an even number of nodes" << std::endl; + MPI_Abort(comm, -1); + } + + // default args + size_t msg_size_kB = 128*1024; // 128 MiB per process default + std::string filename = "p2p_bw.csv"; + + // parse input arguments + exec_name = argv[0]; + argv++; argc--; + if (argc >= 2) { + std::string x(argv[0]); + if (x == "-m") { + msg_size_kB = atoi(argv[1]); + argv+=2; argc-=2; + } + if (x != "-m" || msg_size_kB > 4*1024*1024 || msg_size_kB == 0) { + print_usage(); + MPI_Abort(comm, -1); + } + } + if (argc > 0) { + filename = argv[0]; + argv++; argc--; + } + if (argc > 0) { + print_usage(); + MPI_Abort(comm, -1); + } + + MXX_ASSERT(mxx::all_same(msg_size_kB, comm)); + + // perform pairwise bandwidth benchmarking + std::vector bw_row = mxx::pairwise_bw_matrix(hc, msg_size_kB*1024); + + // print out benchmarking results and save as file + mxx::print_bw_matrix_stats(hc, bw_row); + mxx::save_matrix_pernode(hc, filename, bw_row); + + return 0; +} diff --git a/src/benchmark_sort.cpp b/src/benchmark_sort.cpp new file mode 100644 index 0000000..13dccd3 --- /dev/null +++ b/src/benchmark_sort.cpp @@ -0,0 +1,54 @@ +#include +#include +#include + +#include +#include +#include +#include + +#include + +std::string exec_name; + +// TODO fix usage +void print_usage() { + std::cerr << "Usage: " << exec_name << " " << std::endl; + std::cerr << "where" << std::endl; + std::cerr << " Number of nodes to vote off." << std::endl; + std::cerr << " Filename for the new nodefile, output by this program." << std::endl; +} + +int main(int argc, char* argv[]) { + mxx::env e(argc, argv); + mxx::comm comm; + + // print out node and rank distribution + mxx::print_node_distribution(comm); + + // create shared-mem MPI+MPI hybrid communicator + mxx::hybrid_comm hc(comm); + + // create output file for benchmark + std::ofstream of; + if (hc.global.rank() == 0) { + of.open("bm_samplesort.csv"); + of << "p,nnodes,q,m,n,min,avg,max" << std::endl; + } + + size_t mempernode = 16ull << 30; + + // input in growing sizes of 2 + typedef std::tuple T; + for (size_t npn = 1024; npn <= mempernode/sizeof(T)/2; npn <<= 1) { + // generate input + std::vector a(npn); + srand(comm.rank()* 13 + 5); + std::generate(a.begin(), a.end(), [](){ return std::make_pair(rand(), rand()); }); + + } + + // TODO: sorting benchmark + + return 0; +} diff --git a/src/pbs_run.sh b/src/pbs_run.sh new file mode 100644 index 0000000..1551298 --- /dev/null +++ b/src/pbs_run.sh @@ -0,0 +1,24 @@ +#!/bin/sh + +#PBS -q swarm +#PBS -l nodes=16:ppn=28 +#PBS -l walltime=2:00:00 + +# set up env +module load gcc/4.9.0 +module load mvapich2/2.2 +#module load openmpi + + +# Change to directory from which qsub command was issued +cd $PBS_O_WORKDIR + +# Old num nodes and PPN +PPN=$PBS_NUM_PPN +NUM_NODES=$PBS_NUM_NODES +NP=$(expr $NUM_NODES \\* $PPN) + +echo "Running with np = $NP, ppn = $PPN, nnodes = $NUM_NODES" + +mpirun -np $NP -ppn $PPN ./bin/mxx-benchmark-p2p-bw p2p_bw_${NUM_NODES}nodes.csv +mpirun -np $NP -ppn $PPN ./bin/mxx-benchmark-a2a bm_all2all_${NUM_NODES}nodes.csv diff --git a/src/vote_off.cpp b/src/vote_off.cpp index 6d541dd..38afee9 100644 --- a/src/vote_off.cpp +++ b/src/vote_off.cpp @@ -48,7 +48,7 @@ int main(int argc, char* argv[]) { bool benchmark_char_align = false; - std::vector bw_row = mxx::pairwise_bw_matrix(hc); + std::vector bw_row = mxx::pairwise_bw_matrix(hc, 32*1024*1024); mxx::print_bw_matrix_stats(hc, bw_row); bool part = mxx::vote_off(hc, n_vote_off, bw_row); if (hc.global.rank() == 0) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c15b232..b741aa4 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -26,9 +26,6 @@ target_link_libraries(mxx-test-sort mxx-gtest-main) add_executable(mxx-test-distribution test_distribution.cpp) target_link_libraries(mxx-test-distribution mxx-gtest-main) -add_executable(mxx-benchmarks benchmarks.cpp) -target_link_libraries(mxx-benchmarks mxx-gtest-main) - # Combination of all parallel tests: add_executable(mxx-test-all test_collective.cpp test_reductions.cpp test_send.cpp test_sort.cpp test_distribution.cpp) target_link_libraries(mxx-test-all mxx-gtest-main) diff --git a/test/benchmarks.cpp b/test/benchmarks.cpp deleted file mode 100644 index e71428e..0000000 --- a/test/benchmarks.cpp +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2015 Georgia Institute of Technology - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include - -TEST(mxx, benchmark_p2p) { - mxx::benchmark_nodes_bw_p2p(); -} From 5f5d152f3a9f2f9c3fbe32b1fc2ace9ecd3e2093 Mon Sep 17 00:00:00 2001 From: Tristan Konolige Date: Fri, 23 Mar 2018 11:56:35 -0600 Subject: [PATCH 3/4] modernize cmake files --- CMakeLists.txt | 67 +++++++++++++++++++++++++++++++--------- cmake/mxxConfig.cmake | 3 ++ gtest/CMakeLists.txt | 7 ++--- src/CMakeLists.txt | 10 +++--- src/benchmark_a2a.cpp | 2 +- src/benchmark_p2p_bw.cpp | 2 +- src/benchmark_sort.cpp | 2 +- test/CMakeLists.txt | 2 +- 8 files changed, 68 insertions(+), 27 deletions(-) create mode 100644 cmake/mxxConfig.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index cb3459c..222bdb1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,14 +1,7 @@ -cmake_minimum_required(VERSION 2.6) +cmake_minimum_required(VERSION 3.6) # project settings -project(mxx) - -##### General Compilation Settings - -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wuninitialized --std=c++11") -set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g -O0") -set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG -march=native -funroll-loops") -set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELEASE} -g") +project(mxx LANGUAGES CXX) # Add these standard paths to the search paths for FIND_LIBRARY # to find libraries from these locations first @@ -25,6 +18,10 @@ if(COMMAND cmake_policy) cmake_policy(SET CMP0003 NEW) endif() +add_library(mxx INTERFACE) +target_compile_features(mxx INTERFACE cxx_std_11) +target_include_directories(mxx INTERFACE $ $) + #### MPI find_package(MPI REQUIRED) @@ -32,11 +29,57 @@ if (MPI_FOUND) #set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MPI_COMPILE_FLAGS}") #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MPI_COMPILE_FLAGS}") #set(CMAKE_LINK_FLAGS "${CMAKE_LINK_FLAGS} ${MPI_LINK_FLAGS}") - include_directories(SYSTEM ${MPI_INCLUDE_PATH}) + target_link_libraries(mxx INTERFACE ${MPI_CXX_LIBRARIES}) + target_include_directories(mxx INTERFACE ${MPI_CXX_INCLUDE_DIRS}) else (MPI_FOUND) message(SEND_ERROR "This application cannot compile without MPI") endif (MPI_FOUND) +#### cxx-prettyprint +target_include_directories(mxx INTERFACE $ $) + +#### Installation +install(DIRECTORY include/mxx/ DESTINATION include/mxx) +install(DIRECTORY ext/ DESTINATION include/mxx FILES_MATCHING PATTERN "*.hpp") + +include(CMakePackageConfigHelpers) + +install(TARGETS mxx + EXPORT mxxTargets + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib + RUNTIME DESTINATION bin + INCLUDES DESTINATION include + ) + +export(EXPORT mxxTargets + FILE "${CMAKE_CURRENT_BINARY_DIR}/mxx/mxxTargets.cmake" + NAMESPACE mxx:: +) +set(ConfigPackageLocation lib/cmake/mxx) +configure_package_config_file(cmake/mxxConfig.cmake + "${CMAKE_CURRENT_BINARY_DIR}/mxx/mxxConfig.cmake" + INSTALL_DESTINATION "${ConfigPackageLocation}" +) + +install(EXPORT mxxTargets + FILE + mxxTargets.cmake + NAMESPACE + mxx:: + DESTINATION + ${ConfigPackageLocation} +) +install( + FILES + cmake/mxxConfig.cmake + DESTINATION + ${ConfigPackageLocation} + COMPONENT + Devel +) + + #### Doxygen find_package(Doxygen) if(DOXYGEN_FOUND) @@ -82,10 +125,6 @@ endif(FAKE_BIG_MPI) # Save libs and executables in the same place set(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin CACHE PATH "Output directory for applications" ) -include_directories("${PROJECT_SOURCE_DIR}/include") -include_directories("${PROJECT_SOURCE_DIR}/ext") -include_directories("${PROJECT_SOURCE_DIR}") - # build tests add_subdirectory(gtest) add_subdirectory(test) diff --git a/cmake/mxxConfig.cmake b/cmake/mxxConfig.cmake new file mode 100644 index 0000000..f4b02c8 --- /dev/null +++ b/cmake/mxxConfig.cmake @@ -0,0 +1,3 @@ +find_package(MPI REQUIRED QUIET) + +include("${CMAKE_CURRENT_LIST_DIR}/mxxTargets.cmake") diff --git a/gtest/CMakeLists.txt b/gtest/CMakeLists.txt index de6b34e..48bbe34 100644 --- a/gtest/CMakeLists.txt +++ b/gtest/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 2.6) +cmake_minimum_required(VERSION 3.6) # project settings project(mxx-gtest) @@ -8,7 +8,6 @@ project(mxx-gtest) # MPI Google Test: # ###################### -include_directories("${PROJECT_SOURCE_DIR}/../") - add_library(mxx-gtest-main mxx_gtest_main.cpp gtest-all.cc) -target_link_libraries(mxx-gtest-main ${MPI_LIBRARIES} pthread) +target_link_libraries(mxx-gtest-main PUBLIC mxx ${MPI_LIBRARIES} pthread) +target_include_directories(mxx-gtest-main PUBLIC "../") diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 123d76d..b91c260 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,20 +1,20 @@ -cmake_minimum_required(VERSION 2.8) +cmake_minimum_required(VERSION 3.6) # project settings project(mxx-bm) add_executable(mxx-bm-vote-off vote_off.cpp) -target_link_libraries(mxx-bm-vote-off ${MPI_LIBRARIES}) +target_link_libraries(mxx-bm-vote-off mxx ${MPI_LIBRARIES}) # benchmark p2p bandwidth add_executable(mxx-benchmark-p2p-bw benchmark_p2p_bw.cpp) -target_link_libraries(mxx-benchmark-p2p-bw ${MPI_LIBRARIES}) +target_link_libraries(mxx-benchmark-p2p-bw mxx ${MPI_LIBRARIES}) # benchmark all2all add_executable(mxx-benchmark-a2a benchmark_a2a.cpp) -target_link_libraries(mxx-benchmark-a2a ${MPI_LIBRARIES}) +target_link_libraries(mxx-benchmark-a2a mxx ${MPI_LIBRARIES}) # benchmark parallel sorting add_executable(mxx-sort-benchmark benchmark_sort.cpp) target_compile_definitions(mxx-sort-benchmark PUBLIC -DMXX_SAMPLESORT_TIMER=1) -target_link_libraries(mxx-sort-benchmark ${MPI_LIBRARIES}) +target_link_libraries(mxx-sort-benchmark mxx ${MPI_LIBRARIES}) diff --git a/src/benchmark_a2a.cpp b/src/benchmark_a2a.cpp index b9342f9..d9f24f0 100644 --- a/src/benchmark_a2a.cpp +++ b/src/benchmark_a2a.cpp @@ -7,7 +7,7 @@ #include #include -#include +#include std::string exec_name; diff --git a/src/benchmark_p2p_bw.cpp b/src/benchmark_p2p_bw.cpp index 6339ed5..266e07f 100644 --- a/src/benchmark_p2p_bw.cpp +++ b/src/benchmark_p2p_bw.cpp @@ -7,7 +7,7 @@ #include #include -#include +#include std::string exec_name; diff --git a/src/benchmark_sort.cpp b/src/benchmark_sort.cpp index 13dccd3..e49ec4c 100644 --- a/src/benchmark_sort.cpp +++ b/src/benchmark_sort.cpp @@ -7,7 +7,7 @@ #include #include -#include +#include std::string exec_name; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index b741aa4..c8a1970 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 2.6) +cmake_minimum_required(VERSION 3.6) # project settings project(mxx-test) From 2017ae926f8c70e5a7839f7b0b31a40979ff127f Mon Sep 17 00:00:00 2001 From: Tristan Konolige Date: Sat, 24 Mar 2018 20:21:43 -0600 Subject: [PATCH 4/4] try and fix travis mpi issue --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 222bdb1..2c5982b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -30,7 +30,7 @@ if (MPI_FOUND) #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MPI_COMPILE_FLAGS}") #set(CMAKE_LINK_FLAGS "${CMAKE_LINK_FLAGS} ${MPI_LINK_FLAGS}") target_link_libraries(mxx INTERFACE ${MPI_CXX_LIBRARIES}) - target_include_directories(mxx INTERFACE ${MPI_CXX_INCLUDE_DIRS}) + target_include_directories(mxx INTERFACE ${MPI_CXX_INCLUDE_PATH}) else (MPI_FOUND) message(SEND_ERROR "This application cannot compile without MPI") endif (MPI_FOUND)