From d7fd78dead621a539c20791a93abec34bb1be385 Mon Sep 17 00:00:00 2001 From: Matthew Russotto Date: Tue, 29 Jun 2021 11:16:43 -0400 Subject: [PATCH] SERVER-53643 Wait for FCV to be majority committed before reporting it. (cherry picked from commit f34d72aed2861b91fdd2907058d1fbec7f66e328) --- .../major_version_upgrade.js | 8 +++ jstests/replsets/rollback_set_fcv.js | 20 +++++--- .../feature_compatibility_version.cpp | 51 +++++++++++++++++-- .../commands/feature_compatibility_version.h | 4 +- 4 files changed, 69 insertions(+), 14 deletions(-) diff --git a/jstests/multiVersion/genericSetFCVUsage/major_version_upgrade.js b/jstests/multiVersion/genericSetFCVUsage/major_version_upgrade.js index 273695dbc05c4..eb70923641667 100644 --- a/jstests/multiVersion/genericSetFCVUsage/major_version_upgrade.js +++ b/jstests/multiVersion/genericSetFCVUsage/major_version_upgrade.js @@ -292,6 +292,14 @@ for (let i = 0; i < versions.length; i++) { assert.commandWorked(primaryAdminDB.runCommand( {setFeatureCompatibilityVersion: version.featureCompatibilityVersion})); rst.awaitReplication(); + // Make sure we reach the new featureCompatibilityVersion in the committed snapshot on + // on all nodes before continuing to upgrade. + // checkFCV does not work for version 3.4 (and below) + if (version.featureCompatibilityVersion != '3.4') { + for (let n of rst.nodes) { + checkFCV(n.getDB("admin"), version.featureCompatibilityVersion); + } + } } } diff --git a/jstests/replsets/rollback_set_fcv.js b/jstests/replsets/rollback_set_fcv.js index 7a787532e7bbe..a00c30f3bc0a9 100644 --- a/jstests/replsets/rollback_set_fcv.js +++ b/jstests/replsets/rollback_set_fcv.js @@ -21,6 +21,12 @@ function setFCV(fcv) { ErrorCodes.InterruptedDueToReplStateChange); } +// Using getParameter results in waiting for the current FCV to be majority committed. In this +// test, it never will, so we need to get the FCV directly. +function getFCVFromDocument(conn) { + return conn.getDB("admin").system.version.find().readConcern("local").toArray()[0]; +} + // fromFCV refers to the FCV we will test rolling back from. // toFCV refers to the FCV we will test rolling back to. function rollbackFCVFromDowngradingOrUpgrading(fromFCV, toFCV) { @@ -40,9 +46,8 @@ function rollbackFCVFromDowngradingOrUpgrading(fromFCV, toFCV) { // Wait for the FCV update to be reflected on the primary. This should eventually be rolled // back. assert.soon(function() { - let res = assert.commandWorked( - primary.adminCommand({getParameter: 1, featureCompatibilityVersion: 1})); - return res.featureCompatibilityVersion.hasOwnProperty('targetVersion'); + let featureCompatibilityVersion = getFCVFromDocument(primary); + return featureCompatibilityVersion.hasOwnProperty('targetVersion'); }, "Failed waiting for the server to set the targetVersion: " + fromFCV); rollbackTest.transitionToSyncSourceOperationsBeforeRollback(); // Secondaries should never have received the FCV update. @@ -85,10 +90,9 @@ function rollbackFCVFromDowngradedOrUpgraded(fromFCV, toFCV, failPoint) { // should never make it to the secondary. hangBeforeUnsettingTargetVersion.off(); assert.soon(function() { - let res = assert.commandWorked( - primary.adminCommand({getParameter: 1, featureCompatibilityVersion: 1})); - return !res.featureCompatibilityVersion.hasOwnProperty('targetVersion') && - res.featureCompatibilityVersion.version === fromFCV; + let featureCompatibilityVersion = getFCVFromDocument(primary); + return !featureCompatibilityVersion.hasOwnProperty('targetVersion') && + featureCompatibilityVersion.version === fromFCV; }, "Failed waiting for server to unset the targetVersion or to set the FCV to " + fromFCV); rollbackTest.transitionToSyncSourceOperationsBeforeRollback(); // The secondary should never have received the update to unset the targetVersion. @@ -125,4 +129,4 @@ rollbackFCVFromDowngradedOrUpgraded(lastStableFCV, latestFCV, "hangWhileDowngrad rollbackFCVFromDowngradedOrUpgraded(latestFCV, lastStableFCV, "hangWhileUpgrading"); rollbackTest.stop(); -}()); \ No newline at end of file +}()); diff --git a/src/mongo/db/commands/feature_compatibility_version.cpp b/src/mongo/db/commands/feature_compatibility_version.cpp index 616fee9802b4d..38bc3fc92f81c 100644 --- a/src/mongo/db/commands/feature_compatibility_version.cpp +++ b/src/mongo/db/commands/feature_compatibility_version.cpp @@ -62,6 +62,10 @@ namespace mongo { using repl::UnreplicatedWritesBlock; Lock::ResourceMutex FeatureCompatibilityVersion::fcvLock("featureCompatibilityVersionLock"); +// lastFCVUpdateTimestamp contains the latest oplog entry timestamp which updated the FCV. +// It is reset on rollback. +Timestamp lastFCVUpdateTimestamp; +SimpleMutex lastFCVUpdateTimestampMutex; MONGO_FAIL_POINT_DEFINE(hangBeforeAbortingRunningTransactionsOnFCVDowngrade); @@ -162,7 +166,7 @@ void FeatureCompatibilityVersion::onInsertOrUpdate(OperationContext* opCtx, cons } opCtx->recoveryUnit()->onCommit( - [opCtx, newVersion](boost::optional) { _setVersion(opCtx, newVersion); }); + [opCtx, newVersion](boost::optional ts) { _setVersion(opCtx, newVersion, ts); }); } void FeatureCompatibilityVersion::updateMinWireVersion() { @@ -186,7 +190,17 @@ void FeatureCompatibilityVersion::updateMinWireVersion() { } void FeatureCompatibilityVersion::_setVersion( - OperationContext* opCtx, ServerGlobalParams::FeatureCompatibility::Version newVersion) { + OperationContext* opCtx, + ServerGlobalParams::FeatureCompatibility::Version newVersion, + boost::optional commitTs) { + // We set the last FCV update timestamp before setting the new FCV, to make sure we never + // read an FCV that is not stable. We might still read a stale one. + { + stdx::lock_guard lk(lastFCVUpdateTimestampMutex); + if (commitTs && *commitTs > lastFCVUpdateTimestamp) { + lastFCVUpdateTimestamp = *commitTs; + } + } serverGlobalParams.featureCompatibility.setVersion(newVersion); updateMinWireVersion(); @@ -229,7 +243,10 @@ void FeatureCompatibilityVersion::onReplicationRollback(OperationContext* opCtx) << FeatureCompatibilityVersionParser::toString(memoryFcv) << "' to '" << FeatureCompatibilityVersionParser::toString(diskFcv) << "' as part of rollback."; - _setVersion(opCtx, diskFcv); + _setVersion(opCtx, diskFcv, boost::none); + // The rollback FCV is already in the stable snapshot. + stdx::lock_guard lk(lastFCVUpdateTimestampMutex); + lastFCVUpdateTimestamp = Timestamp(); } } } @@ -301,7 +318,7 @@ void FeatureCompatibilityVersionParameter::append(OperationContext* opCtx, featureCompatibilityVersionBuilder.append( FeatureCompatibilityVersionParser::kVersionField, FeatureCompatibilityVersionParser::kVersion42); - return; + break; case ServerGlobalParams::FeatureCompatibility::Version::kUpgradingTo42: featureCompatibilityVersionBuilder.append( FeatureCompatibilityVersionParser::kVersionField, @@ -322,11 +339,35 @@ void FeatureCompatibilityVersionParameter::append(OperationContext* opCtx, featureCompatibilityVersionBuilder.append( FeatureCompatibilityVersionParser::kVersionField, FeatureCompatibilityVersionParser::kVersion40); - return; + break; case ServerGlobalParams::FeatureCompatibility::Version::kUnsetDefault40Behavior: // getVersion() does not return this value. MONGO_UNREACHABLE; } + // If the FCV has been recently set to the fully upgraded FCV but is not part of the majority + // snapshot, then if we do a binary upgrade, we may see the old FCV at startup. + // It is not safe to do oplog application on the new binary at that point. So we make sure + // that when we report the FCV, it is in the majority snapshot. + // (The same consideration applies at downgrade, where if a recently-set fully downgraded FCV + // is not part of the majority snapshot, the downgraded binary will see the upgrade FCV and + // fail.) + const auto replCoordinator = repl::ReplicationCoordinator::get(opCtx); + const bool isReplSet = replCoordinator && + replCoordinator->getReplicationMode() == repl::ReplicationCoordinator::modeReplSet; + auto neededMajorityTimestamp = [] { + stdx::lock_guard lk(lastFCVUpdateTimestampMutex); + return lastFCVUpdateTimestamp; + }(); + if (isReplSet && !neededMajorityTimestamp.isNull()) { + auto status = replCoordinator->awaitTimestampCommitted(opCtx, neededMajorityTimestamp); + // If majority reads are not supported, we will take a full snapshot on clean shutdown + // and the new FCV will be included, so upgrade is possible. + if (status.code() != ErrorCodes::CommandNotSupported) + uassertStatusOK( + status.withContext("Most recent 'featureCompatibilityVersion' was not in the " + "majority snapshot on this node")); + } + return; } Status FeatureCompatibilityVersionParameter::setFromString(const std::string&) { diff --git a/src/mongo/db/commands/feature_compatibility_version.h b/src/mongo/db/commands/feature_compatibility_version.h index 0e06d36488fdb..1262138a017f3 100644 --- a/src/mongo/db/commands/feature_compatibility_version.h +++ b/src/mongo/db/commands/feature_compatibility_version.h @@ -120,9 +120,11 @@ class FeatureCompatibilityVersion { /** * Set the FCV to newVersion, making sure to close any outgoing connections with incompatible * servers and closing open transactions if necessary. Increments the server TopologyVersion. + * If the commitTimestamp is set, advances the lastFCVUpdateTimestamp to it. */ static void _setVersion(OperationContext* opCtx, - ServerGlobalParams::FeatureCompatibility::Version newVersion); + ServerGlobalParams::FeatureCompatibility::Version newVersion, + boost::optional commitTimestamp); };