Skip to content

Commit

Permalink
[Metrics] Added additional metrics for CASE and Subscription (#33934)
Browse files Browse the repository at this point in the history
* [Metrics] Added additional metrics for CASE and Subscription

- CASE session tracking
- Subscription Setup
- MRP Retries

* Fixed code review comment

* Restyler fixes

* Update src/darwin/Framework/CHIP/MTRDevice.mm

Co-authored-by: Boris Zbarsky <[email protected]>

* Addressed code review comments

* Update src/darwin/Framework/CHIP/MTRMetricKeys.h

Co-authored-by: Boris Zbarsky <[email protected]>

* Fixed build failure when tracing is off

* Resytle fixes

---------

Co-authored-by: Boris Zbarsky <[email protected]>
  • Loading branch information
anush-apple and bzbarsky-apple committed Jun 19, 2024
1 parent 10da30d commit f6ac926
Show file tree
Hide file tree
Showing 6 changed files with 114 additions and 14 deletions.
17 changes: 16 additions & 1 deletion src/app/ReadClient.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include <messaging/ReliableMessageMgr.h>
#include <messaging/ReliableMessageProtocolConfig.h>
#include <platform/LockTracker.h>
#include <tracing/metric_event.h>

#include <app-common/zap-generated/cluster-objects.h>
#include <app-common/zap-generated/ids/Attributes.h>
Expand Down Expand Up @@ -186,6 +187,11 @@ void ReadClient::Close(CHIP_ERROR aError, bool allowResubscription)
}
else
{
if (IsAwaitingInitialReport() || IsAwaitingSubscribeResponse())
{
MATTER_LOG_METRIC_END(Tracing::kMetricDeviceSubscriptionSetup, aError);
}

ClearActiveSubscriptionState();
if (aError != CHIP_NO_ERROR)
{
Expand Down Expand Up @@ -491,6 +497,7 @@ CHIP_ERROR ReadClient::OnMessageReceived(Messaging::ExchangeContext * apExchange
ChipLogProgress(DataManagement, "SubscribeResponse is received");
VerifyOrExit(apExchangeContext == mExchange.Get(), err = CHIP_ERROR_INCORRECT_STATE);
err = ProcessSubscribeResponse(std::move(aPayload));
MATTER_LOG_METRIC_END(Tracing::kMetricDeviceSubscriptionSetup, err);
}
else if (aPayloadHeader.HasMessageType(Protocols::InteractionModel::MsgType::StatusResponse))
{
Expand Down Expand Up @@ -684,6 +691,7 @@ void ReadClient::OnResponseTimeout(Messaging::ExchangeContext * apExchangeContex
{
ChipLogError(DataManagement, "Time out! failed to receive report data from Exchange: " ChipLogFormatExchange,
ChipLogValueExchange(apExchangeContext));

Close(CHIP_ERROR_TIMEOUT);
}

Expand Down Expand Up @@ -1096,11 +1104,18 @@ CHIP_ERROR ReadClient::SendSubscribeRequest(const ReadPrepareParams & aReadPrepa
VerifyOrReturnError(aReadPrepareParams.mMinIntervalFloorSeconds <= aReadPrepareParams.mMaxIntervalCeilingSeconds,
CHIP_ERROR_INVALID_ARGUMENT);

return SendSubscribeRequestImpl(aReadPrepareParams);
auto err = SendSubscribeRequestImpl(aReadPrepareParams);
if (CHIP_NO_ERROR != err)
{
MATTER_LOG_METRIC_END(Tracing::kMetricDeviceSubscriptionSetup, err);
}
return err;
}

CHIP_ERROR ReadClient::SendSubscribeRequestImpl(const ReadPrepareParams & aReadPrepareParams)
{
MATTER_LOG_METRIC_BEGIN(Tracing::kMetricDeviceSubscriptionSetup);

VerifyOrReturnError(ClientState::Idle == mState, CHIP_ERROR_INCORRECT_STATE);

if (&aReadPrepareParams != &mReadPrepareParams)
Expand Down
6 changes: 6 additions & 0 deletions src/darwin/Framework/CHIP/MTRDevice.mm
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ - (void)_deviceInternalStateChanged:(MTRDevice *)device;
using namespace chip;
using namespace chip::app;
using namespace chip::Protocols::InteractionModel;
using namespace chip::Tracing::DarwinFramework;

typedef void (^FirstReportHandler)(void);

Expand Down Expand Up @@ -1184,6 +1185,7 @@ - (void)_handleSubscriptionEstablished
if (HadSubscriptionEstablishedOnce(_internalDeviceState)) {
[self _changeInternalState:MTRInternalDeviceStateLaterSubscriptionEstablished];
} else {
MATTER_LOG_METRIC_END(kMetricMTRDeviceInitialSubscriptionSetup, CHIP_NO_ERROR);
[self _changeInternalState:MTRInternalDeviceStateInitialSubscriptionEstablished];
}

Expand Down Expand Up @@ -2340,6 +2342,10 @@ - (void)_setupSubscriptionWithReason:(NSString *)reason
});
}

// This marks begin of initial subscription to the device (before CASE is established). The end is only marked after successfully setting
// up the subscription since it is always retried as long as the MTRDevice is kept running.
MATTER_LOG_METRIC_BEGIN(kMetricMTRDeviceInitialSubscriptionSetup);

// Call directlyGetSessionForNode because the subscription setup already goes through the subscription pool queue
[_deviceController
directlyGetSessionForNode:_nodeID.unsignedLongLongValue
Expand Down
4 changes: 4 additions & 0 deletions src/darwin/Framework/CHIP/MTRMetricKeys.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include <lib/core/CHIPError.h>
#include <tracing/metric_event.h>
#include <tracing/metric_keys.h>

namespace chip {
namespace Tracing {
Expand Down Expand Up @@ -87,6 +88,9 @@ constexpr Tracing::MetricKey kMetricBLEDevicesRemoved = "dwnfw_ble_devices_remov
// Unexpected C quality attribute update outside of priming
constexpr Tracing::MetricKey kMetricUnexpectedCQualityUpdate = "dwnpm_bad_c_attr_update";

// Setup from darwin MTRDevice for initial subscription to a device
constexpr Tracing::MetricKey kMetricMTRDeviceInitialSubscriptionSetup = "dwnpm_dev_initial_subscription_setup";

} // namespace DarwinFramework
} // namespace Tracing
} // namespace chip
2 changes: 2 additions & 0 deletions src/messaging/ReliableMessageMgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <messaging/ReliableMessageContext.h>
#include <messaging/ReliableMessageMgr.h>
#include <platform/ConnectivityManager.h>
#include <tracing/metric_event.h>

#if CHIP_CONFIG_ENABLE_ICD_SERVER
#include <app/icd/server/ICDConfigurationData.h> // nogncheck
Expand Down Expand Up @@ -170,6 +171,7 @@ void ReliableMessageMgr::ExecuteActions()
"Retransmitting MessageCounter:" ChipLogFormatMessageCounter " on exchange " ChipLogFormatExchange
" Send Cnt %d",
messageCounter, ChipLogValueExchange(&entry->ec.Get()), entry->sendCount);
MATTER_LOG_METRIC(Tracing::kMetricDeviceRMPRetryCount, entry->sendCount);

CalculateNextRetransTime(*entry);
SendFromRetransTable(entry);
Expand Down
75 changes: 62 additions & 13 deletions src/protocols/secure_channel/CASESession.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
#include <system/SystemClock.h>
#include <system/TLVPacketBufferBackingStore.h>
#include <tracing/macros.h>
#include <tracing/metric_event.h>
#include <transport/SessionManager.h>

namespace {
Expand Down Expand Up @@ -101,6 +102,7 @@ using namespace Credentials;
using namespace Messaging;
using namespace Encoding;
using namespace Protocols::SecureChannel;
using namespace Tracing;

constexpr uint8_t kKDFSR2Info[] = { 0x53, 0x69, 0x67, 0x6d, 0x61, 0x32 };
constexpr uint8_t kKDFSR3Info[] = { 0x53, 0x69, 0x67, 0x6d, 0x61, 0x33 };
Expand Down Expand Up @@ -521,14 +523,15 @@ CHIP_ERROR CASESession::EstablishSession(SessionManager & sessionManager, Fabric
CHIP_ERROR err = CHIP_NO_ERROR;

// Return early on error here, as we have not initialized any state yet
ReturnErrorCodeIf(exchangeCtxt == nullptr, CHIP_ERROR_INVALID_ARGUMENT);
ReturnErrorCodeIf(fabricTable == nullptr, CHIP_ERROR_INVALID_ARGUMENT);
ReturnErrorCodeWithMetricIf(kMetricDeviceCASESession, exchangeCtxt == nullptr, CHIP_ERROR_INVALID_ARGUMENT);
ReturnErrorCodeWithMetricIf(kMetricDeviceCASESession, fabricTable == nullptr, CHIP_ERROR_INVALID_ARGUMENT);

// Use FabricTable directly to avoid situation of dangling index from stale FabricInfo
// until we factor-out any FabricInfo direct usage.
ReturnErrorCodeIf(peerScopedNodeId.GetFabricIndex() == kUndefinedFabricIndex, CHIP_ERROR_INVALID_ARGUMENT);
ReturnErrorCodeWithMetricIf(kMetricDeviceCASESession, peerScopedNodeId.GetFabricIndex() == kUndefinedFabricIndex,
CHIP_ERROR_INVALID_ARGUMENT);
const auto * fabricInfo = fabricTable->FindFabricWithIndex(peerScopedNodeId.GetFabricIndex());
ReturnErrorCodeIf(fabricInfo == nullptr, CHIP_ERROR_INVALID_ARGUMENT);
ReturnErrorCodeWithMetricIf(kMetricDeviceCASESession, fabricInfo == nullptr, CHIP_ERROR_INVALID_ARGUMENT);

err = Init(sessionManager, policy, delegate, peerScopedNodeId);

Expand All @@ -542,9 +545,11 @@ CHIP_ERROR CASESession::EstablishSession(SessionManager & sessionManager, Fabric

// From here onwards, let's go to exit on error, as some state might have already
// been initialized
SuccessOrExit(err);
SuccessOrExitWithMetric(kMetricDeviceCASESession, err);

SuccessOrExit(err = fabricTable->AddFabricDelegate(this));
SuccessOrExitWithMetric(kMetricDeviceCASESession, err = fabricTable->AddFabricDelegate(this));

MATTER_LOG_METRIC_BEGIN(kMetricDeviceCASESession);

// Set the PeerAddress in the secure session up front to indicate the
// Transport Type of the session that is being set up.
Expand All @@ -571,13 +576,16 @@ CHIP_ERROR CASESession::EstablishSession(SessionManager & sessionManager, Fabric
}
else
{
MATTER_LOG_METRIC_BEGIN(kMetricDeviceCASESessionSigma1);
err = SendSigma1();
SuccessOrExit(err);
}

exit:
if (err != CHIP_NO_ERROR)
{
MATTER_LOG_METRIC_END(kMetricDeviceCASESessionSigma1, err);
MATTER_LOG_METRIC_END(kMetricDeviceCASESession, err);
Clear();
}
return err;
Expand All @@ -601,6 +609,7 @@ void CASESession::OnResponseTimeout(ExchangeContext * ec)

void CASESession::AbortPendingEstablish(CHIP_ERROR err)
{
MATTER_LOG_METRIC_END(kMetricDeviceCASESession, err);
MATTER_TRACE_SCOPE("AbortPendingEstablish", "CASESession");
// This needs to come before Clear() which will reset mState.
SessionEstablishmentStage state = MapCASEStateToSessionEstablishmentStage(mState);
Expand Down Expand Up @@ -851,7 +860,17 @@ CHIP_ERROR CASESession::SendSigma1()
ReturnErrorOnFailure(mExchangeCtxt.Value()->SendMessage(Protocols::SecureChannel::MsgType::CASE_Sigma1, std::move(msg_R1),
SendFlags(SendMessageFlags::kExpectResponse)));

mState = resuming ? State::kSentSigma1Resume : State::kSentSigma1;
if (resuming)
{
mState = State::kSentSigma1Resume;

// Flags that Resume is being attempted
MATTER_LOG_METRIC(kMetricDeviceCASESessionSigma1Resume);
}
else
{
mState = State::kSentSigma1;
}

ChipLogProgress(SecureChannel, "Sent Sigma1 msg");

Expand Down Expand Up @@ -984,7 +1003,13 @@ CHIP_ERROR CASESession::HandleSigma1(System::PacketBufferHandle && msg)
std::copy(resumptionId.begin(), resumptionId.end(), mResumeResumptionId.begin());

// Send Sigma2Resume message to the initiator
SuccessOrExit(err = SendSigma2Resume());
MATTER_LOG_METRIC_BEGIN(kMetricDeviceCASESessionSigma2Resume);
err = SendSigma2Resume();
if (CHIP_NO_ERROR != err)
{
MATTER_LOG_METRIC_END(kMetricDeviceCASESessionSigma2Resume, err);
}
SuccessOrExit(err);

mDelegate->OnSessionEstablishmentStarted();

Expand Down Expand Up @@ -1013,7 +1038,13 @@ CHIP_ERROR CASESession::HandleSigma1(System::PacketBufferHandle && msg)
// mRemotePubKey.Length() == initiatorPubKey.size() == kP256_PublicKey_Length.
memcpy(mRemotePubKey.Bytes(), initiatorPubKey.data(), mRemotePubKey.Length());

SuccessOrExit(err = SendSigma2());
MATTER_LOG_METRIC_BEGIN(kMetricDeviceCASESessionSigma2);
err = SendSigma2();
if (CHIP_NO_ERROR != err)
{
MATTER_LOG_METRIC_END(kMetricDeviceCASESessionSigma2, err);
}
SuccessOrExit(err);

mDelegate->OnSessionEstablishmentStarted();

Expand Down Expand Up @@ -1236,6 +1267,7 @@ CHIP_ERROR CASESession::HandleSigma2Resume(System::PacketBufferHandle && msg)

ChipLogDetail(SecureChannel, "Received Sigma2Resume msg");
MATTER_TRACE_COUNTER("Sigma2Resume");
MATTER_LOG_METRIC_END(kMetricDeviceCASESessionSigma1, err);

uint8_t sigma2ResumeMIC[CHIP_CRYPTO_AEAD_MIC_LENGTH_BYTES];

Expand Down Expand Up @@ -1278,6 +1310,7 @@ CHIP_ERROR CASESession::HandleSigma2Resume(System::PacketBufferHandle && msg)
ChipLogError(SecureChannel, "Unable to save session resumption state: %" CHIP_ERROR_FORMAT, err2.Format());
}

MATTER_LOG_METRIC(kMetricDeviceCASESessionSigmaFinished);
SendStatusReport(mExchangeCtxt, kProtocolCodeSuccess);

mState = State::kFinishedViaResume;
Expand All @@ -1294,10 +1327,17 @@ CHIP_ERROR CASESession::HandleSigma2Resume(System::PacketBufferHandle && msg)
CHIP_ERROR CASESession::HandleSigma2_and_SendSigma3(System::PacketBufferHandle && msg)
{
MATTER_TRACE_SCOPE("HandleSigma2_and_SendSigma3", "CASESession");
ReturnErrorOnFailure(HandleSigma2(std::move(msg)));
ReturnErrorOnFailure(SendSigma3a());
CHIP_ERROR err = HandleSigma2(std::move(msg));
MATTER_LOG_METRIC_END(kMetricDeviceCASESessionSigma1, err);
ReturnErrorOnFailure(err);

return CHIP_NO_ERROR;
MATTER_LOG_METRIC_BEGIN(kMetricDeviceCASESessionSigma3);
err = SendSigma3a();
if (CHIP_NO_ERROR != err)
{
MATTER_LOG_METRIC_END(kMetricDeviceCASESessionSigma3, err);
}
return err;
}

CHIP_ERROR CASESession::HandleSigma2(System::PacketBufferHandle && msg)
Expand Down Expand Up @@ -1708,6 +1748,7 @@ CHIP_ERROR CASESession::HandleSigma3a(System::PacketBufferHandle && msg)

ChipLogProgress(SecureChannel, "Received Sigma3 msg");
MATTER_TRACE_COUNTER("Sigma3");
MATTER_LOG_METRIC_END(kMetricDeviceCASESessionSigma2, err);

auto helper = WorkHelper<HandleSigma3Data>::Create(*this, &HandleSigma3b, &CASESession::HandleSigma3c);
VerifyOrExit(helper, err = CHIP_ERROR_NO_MEMORY);
Expand Down Expand Up @@ -1888,6 +1929,7 @@ CHIP_ERROR CASESession::HandleSigma3c(HandleSigma3Data & data, CHIP_ERROR status
}
}

MATTER_LOG_METRIC(kMetricDeviceCASESessionSigmaFinished);
SendStatusReport(mExchangeCtxt, kProtocolCodeSuccess);

mState = State::kFinished;
Expand Down Expand Up @@ -2288,6 +2330,7 @@ CHIP_ERROR CASESession::OnMessageReceived(ExchangeContext * ec, const PayloadHea

case MsgType::StatusReport:
err = HandleStatusReport(std::move(msg), /* successExpected*/ false);
MATTER_LOG_METRIC_END(kMetricDeviceCASESessionSigma1, err);
break;

default:
Expand All @@ -2308,6 +2351,7 @@ CHIP_ERROR CASESession::OnMessageReceived(ExchangeContext * ec, const PayloadHea

case MsgType::StatusReport:
err = HandleStatusReport(std::move(msg), /* successExpected*/ false);
MATTER_LOG_METRIC_END(kMetricDeviceCASESessionSigma1, err);
break;

default:
Expand All @@ -2324,6 +2368,7 @@ CHIP_ERROR CASESession::OnMessageReceived(ExchangeContext * ec, const PayloadHea

case MsgType::StatusReport:
err = HandleStatusReport(std::move(msg), /* successExpected*/ false);
MATTER_LOG_METRIC_END(kMetricDeviceCASESessionSigma2, err);
break;

default:
Expand All @@ -2335,7 +2380,11 @@ CHIP_ERROR CASESession::OnMessageReceived(ExchangeContext * ec, const PayloadHea
case State::kSentSigma2Resume:
if (msgType == Protocols::SecureChannel::MsgType::StatusReport)
{
err = HandleStatusReport(std::move(msg), /* successExpected*/ true);
// Need to capture before invoking status report since 'this' might be deallocated on successful completion of sigma3
MetricKey key = (mState == State::kSentSigma3) ? kMetricDeviceCASESessionSigma3 : kMetricDeviceCASESessionSigma2Resume;
err = HandleStatusReport(std::move(msg), /* successExpected*/ true);
MATTER_LOG_METRIC_END(key, err);
IgnoreUnusedVariable(key);
}
break;
default:
Expand Down
24 changes: 24 additions & 0 deletions src/tracing/metric_keys.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,5 +54,29 @@ constexpr MetricKey kMetricDeviceOperationalDiscoveryAttemptCount = "core_dev_op
// CASE Session
constexpr MetricKey kMetricDeviceCASESession = "core_dev_case_session";

// CASE Session Sigma1
constexpr MetricKey kMetricDeviceCASESessionSigma1 = "core_dev_case_session_sigma1";

// CASE Session Sigma1Resume
constexpr MetricKey kMetricDeviceCASESessionSigma1Resume = "core_dev_case_session_sigma1_resume";

// CASE Session Sigma2
constexpr MetricKey kMetricDeviceCASESessionSigma2 = "core_dev_case_session_sigma2";

// CASE Session Sigma3
constexpr MetricKey kMetricDeviceCASESessionSigma3 = "core_dev_case_session_sigma3";

// CASE Session Sigma2 Resume
constexpr MetricKey kMetricDeviceCASESessionSigma2Resume = "core_dev_case_session_sigma2_resume";

// CASE Session SigmaFinished
constexpr MetricKey kMetricDeviceCASESessionSigmaFinished = "core_dev_case_session_sigma_finished";

// MRP Retry Counter
constexpr MetricKey kMetricDeviceRMPRetryCount = "core_dev_rmp_retry_count";

// Subscription setup
constexpr MetricKey kMetricDeviceSubscriptionSetup = "core_dev_subscription_setup";

} // namespace Tracing
} // namespace chip

0 comments on commit f6ac926

Please sign in to comment.