Skip to content

Commit

Permalink
Fixed seed for shuffle at validate stage (valhalla#2515)
Browse files Browse the repository at this point in the history
* Sort bins after append

* Improve reproduce-test detalization

* Use concurrency field from mjolnir section

* Revert: don't sort bins

* format

* Read concurrency parameter from the correct config section

* Ignore test/ directory for codecov

* s/fixate/fixed

* Revert "Ignore test/ directory for codecov"

This reverts commit f6c864d.

* Test improvements

* Update changelog

Co-authored-by: Svetlana Bayarovich <[email protected]>
Co-authored-by: Kevin Kreiser <[email protected]>
  • Loading branch information
3 people authored Aug 14, 2020
1 parent 5b27ae7 commit 8460b51
Show file tree
Hide file tree
Showing 6 changed files with 98 additions and 41 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@
* FIXED: Fix bug in trace_route for uturns causing garbage coordinates [#2517](https://github.com/valhalla/valhalla/pull/2517)
* FIXED: Simplify heading calculation for turn type. Remove undefined behavior case. [#2513](https://github.com/valhalla/valhalla/pull/2513)
* FIXED: Always set costing name even if one is not provided for osrm serializer weight_name. [#2528](https://github.com/valhalla/valhalla/pull/2528)
* FIXED: Make single-thread tile building reproducible: fix seed for shuffle, use concurrency configuration from the mjolnir section. [#2515](https://github.com/valhalla/valhalla/pull/2515)

* **Enhancement**
* ADDED: Add explicit include for sstream to be compatible with msvc_x64 toolset. [#2449](https://github.com/valhalla/valhalla/pull/2449)
Expand Down
2 changes: 1 addition & 1 deletion src/mjolnir/elevationbuilder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ void ElevationBuilder::Build(const boost::property_tree::ptree& pt) {
// Setup threads
uint32_t nthreads =
std::max(static_cast<unsigned int>(1),
pt.get<unsigned int>("concurrency", std::thread::hardware_concurrency()));
pt.get<unsigned int>("mjolnir.concurrency", std::thread::hardware_concurrency()));
std::vector<std::shared_ptr<std::thread>> threads(nthreads);

// Setup promises. Hold the results for the threads
Expand Down
2 changes: 1 addition & 1 deletion src/mjolnir/graphenhancer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1864,7 +1864,7 @@ void GraphEnhancer::Enhance(const boost::property_tree::ptree& pt,
// A place to hold worker threads and their results, exceptions or otherwise
std::vector<std::shared_ptr<std::thread>> threads(
std::max(static_cast<unsigned int>(1),
pt.get<unsigned int>("concurrency", std::thread::hardware_concurrency())));
pt.get<unsigned int>("mjolnir.concurrency", std::thread::hardware_concurrency())));

// A place to hold the results of those threads, exceptions or otherwise
std::list<std::promise<enhancer_stats>> results;
Expand Down
5 changes: 3 additions & 2 deletions src/mjolnir/graphvalidator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -556,7 +556,8 @@ void GraphValidator::Validate(const boost::property_tree::ptree& pt) {
for (const auto& id : tileset) {
tilequeue.emplace_back(id);
}
std::random_shuffle(tilequeue.begin(), tilequeue.end());
// fixed seed for reproducible tile build
std::shuffle(tilequeue.begin(), tilequeue.end(), std::mt19937(3));

// Remember what the dataset id is in case we have to make some tiles
auto dataset_id = GraphTile(tile_dir, *tilequeue.begin()).header()->dataset_id();
Expand All @@ -567,7 +568,7 @@ void GraphValidator::Validate(const boost::property_tree::ptree& pt) {
// Setup threads
std::vector<std::shared_ptr<std::thread>> threads(
std::max(static_cast<unsigned int>(1),
pt.get<unsigned int>("concurrency", std::thread::hardware_concurrency())));
pt.get<unsigned int>("mjolnir.concurrency", std::thread::hardware_concurrency())));

// Setup promises
std::list<
Expand Down
2 changes: 1 addition & 1 deletion src/mjolnir/restrictionbuilder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -630,7 +630,7 @@ void RestrictionBuilder::Build(const boost::property_tree::ptree& pt,

std::vector<std::shared_ptr<std::thread>> threads(
std::max(static_cast<unsigned int>(1),
pt.get<unsigned int>("concurrency", std::thread::hardware_concurrency())));
pt.get<unsigned int>("mjolnir.concurrency", std::thread::hardware_concurrency())));
// Hold the results (DataQuality/stats) for the threads
std::vector<std::promise<DataQuality>> results(threads.size());

Expand Down
127 changes: 91 additions & 36 deletions test/gurka/test_reproduce_tile_build.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ using namespace valhalla;
using namespace valhalla::baldr;

void assert_tile_equalish(const GraphTile& a, const GraphTile& b) {
const GraphTileHeader *ah = a.header(), *bh = b.header();

// expected size
ASSERT_EQ(a.header()->end_offset(), b.header()->end_offset());

Expand All @@ -20,14 +22,36 @@ void assert_tile_equalish(const GraphTile& a, const GraphTile& b) {
sizeof(GraphTileHeader)),
0);

// check bins
for (size_t bin_index = 0; bin_index < kBinCount; ++bin_index) {
ASSERT_EQ(ah->bin_offset(bin_index), bh->bin_offset(bin_index));
auto a_bin = a.GetBin(bin_index);
auto b_bin = b.GetBin(bin_index);
GraphId* a_pos = a_bin.begin();
GraphId* b_pos = b_bin.begin();

while (true) {
const auto diff = std::mismatch(a_pos, a_bin.end(), b_pos, b_bin.end());
if (diff.first != a_bin.end()) {
ADD_FAILURE() << "Bin[" << bin_index << "] mismatch at position "
<< std::distance(a_bin.begin(), diff.first) << ": " << *diff.first
<< " != " << *diff.second;

a_pos = diff.first + 1;
b_pos = diff.second + 1;
} else {
break;
}
}
}

// check the stuff after the bins
ASSERT_EQ(memcmp(reinterpret_cast<const char*>(a.header()) + a.header()->edgeinfo_offset(),
reinterpret_cast<const char*>(b.header()) + b.header()->edgeinfo_offset(),
b.header()->end_offset() - b.header()->edgeinfo_offset()),
0);

// if the header is as expected
const GraphTileHeader *ah = a.header(), *bh = b.header();
ASSERT_EQ(ah->access_restriction_count(), bh->access_restriction_count());
ASSERT_EQ(ah->admincount(), bh->admincount());
ASSERT_EQ(ah->complex_restriction_forward_offset(), bh->complex_restriction_forward_offset());
Expand Down Expand Up @@ -62,41 +86,72 @@ void assert_tile_equalish(const GraphTile& a, const GraphTile& b) {

// 1. build tiles with the same input twice
// 2. check that the same tile sets are generated
TEST(GraphTileBuilder, TestBuildIsReproducible) {
const auto build_tiles = [](const std::string& dir) -> gurka::map {
const std::string ascii_map = R"(A-----B)";
const gurka::ways ways = {
{"ABA", {{"highway", "motorway"}}},
};
const gurka::nodelayout layout = gurka::detail::map_to_coordinates(ascii_map, 10);
const std::string workdir = "test/data/gurka_reproduce_tile_build/" + dir;
return gurka::buildtiles(layout, ways, {}, {}, workdir, {});
};
const gurka::map first_map = build_tiles("1");
const gurka::map second_map = build_tiles("2");

baldr::GraphReader first_reader(first_map.config.get_child("mjolnir"));
baldr::GraphReader second_reader(second_map.config.get_child("mjolnir"));
const std::unordered_set<GraphId> first_tiles = first_reader.GetTileSet();
ASSERT_EQ(first_tiles.size(), second_reader.GetTileSet().size())
<< "Got different tiles sets: tile count mismatch";
for (const GraphId& tile_id : first_tiles) {
const GraphTile* first_tile = first_reader.GetGraphTile(tile_id);
ASSERT_TRUE(second_reader.DoesTileExist(tile_id))
<< "Tile " << GraphTile::FileSuffix(tile_id) << " isn't found in the second tile set";
const GraphTile* second_tile = second_reader.GetGraphTile(tile_id);

// human readable check
assert_tile_equalish(*first_tile, *second_tile);

// check that raw tiles are equal
const auto raw_tile_bytes = [](const GraphTile* tile) -> std::string {
const GraphTileHeader* header = tile->header();
return std::string(reinterpret_cast<const char*>(header), header->end_offset());
struct ReproducibleBuild : ::testing::Test {
void BuildTiles(const std::string& ascii_map, const gurka::ways& ways, const double gridsize) {
const auto build_tiles = [&](const std::string& dir) -> gurka::map {
const gurka::nodelayout layout = gurka::detail::map_to_coordinates(ascii_map, gridsize);
const std::string workdir = "test/data/gurka_reproduce_tile_build/" + dir;
return gurka::buildtiles(layout, ways, {}, {}, workdir, {});
};
const std::string first_raw_tile = raw_tile_bytes(first_tile);
const std::string second_raw_tile = raw_tile_bytes(second_tile);
EXPECT_EQ(first_raw_tile, second_raw_tile)
<< "Tile{" << GraphTile::FileSuffix(tile_id) << "} mismatch";
const gurka::map first_map = build_tiles("1");
const gurka::map second_map = build_tiles("2");

baldr::GraphReader first_reader(first_map.config.get_child("mjolnir"));
baldr::GraphReader second_reader(second_map.config.get_child("mjolnir"));
const std::unordered_set<GraphId> first_tiles = first_reader.GetTileSet();
ASSERT_EQ(first_tiles.size(), second_reader.GetTileSet().size())
<< "Got different tiles sets: tile count mismatch";
for (const GraphId& tile_id : first_tiles) {
const GraphTile* first_tile = first_reader.GetGraphTile(tile_id);
ASSERT_TRUE(second_reader.DoesTileExist(tile_id))
<< "Tile " << GraphTile::FileSuffix(tile_id) << " isn't found in the second tile set";
const GraphTile* second_tile = second_reader.GetGraphTile(tile_id);

// human readable check
assert_tile_equalish(*first_tile, *second_tile);

// check that raw tiles are equal
const auto raw_tile_bytes = [](const GraphTile* tile) -> std::string {
const GraphTileHeader* header = tile->header();
return std::string(reinterpret_cast<const char*>(header), header->end_offset());
};
const std::string first_raw_tile = raw_tile_bytes(first_tile);
const std::string second_raw_tile = raw_tile_bytes(second_tile);
const auto diff_positions = std::mismatch(first_raw_tile.begin(), first_raw_tile.end(),
second_raw_tile.begin(), second_raw_tile.end());
if (diff_positions.first != first_raw_tile.end())
FAIL() << "Tile{" << GraphTile::FileSuffix(tile_id.Tile_Base())
<< "} mismatch at byte position "
<< std::distance(first_raw_tile.begin(), diff_positions.first);
}
}
};

TEST_F(ReproducibleBuild, OneHighway) {
const std::string ascii_map = R"(A-----B)";
const gurka::ways ways = {
{"ABA", {{"highway", "motorway"}}},
};
BuildTiles(ascii_map, ways, 100);
}

TEST_F(ReproducibleBuild, BigGridSize) {
const std::string ascii_map = R"(
A----B----C
| .
D----E----F
| . \
G----H----I)";

// BE and EH are highway=path, so no cars
// EI is a shortcut that's not accessible to bikes
const gurka::ways ways = {{"AB", {{"highway", "primary"}}},
{"BC", {{"highway", "primary"}}},
{"DEF", {{"highway", "primary"}}},
{"GHI", {{"highway", "primary"}}},
{"ADG", {{"highway", "motorway"}}},
{"BE", {{"highway", "path"}}},
{"EI", {{"highway", "path"}, {"bicycle", "no"}}},
{"EH", {{"highway", "path"}}}};
BuildTiles(ascii_map, ways, 100000);
}

0 comments on commit 8460b51

Please sign in to comment.