more metrics

This commit is contained in:
Grant Limberg 2023-05-19 09:40:19 -07:00
commit a6c9460de8
No known key found for this signature in database
GPG key ID: 8F2F97D3BE8D7735
5 changed files with 150 additions and 23 deletions

View file

@ -460,17 +460,39 @@ static bool _parseRule(json &r,ZT_VirtualNetworkRule &rule)
} // anonymous namespace } // anonymous namespace
EmbeddedNetworkController::EmbeddedNetworkController(Node *node,const char *ztPath,const char *dbPath, int listenPort, RedisConfig *rc) : EmbeddedNetworkController::EmbeddedNetworkController(Node *node,const char *ztPath,const char *dbPath, int listenPort, RedisConfig *rc)
_startTime(OSUtils::now()), : _startTime(OSUtils::now())
_listenPort(listenPort), , _listenPort(listenPort)
_node(node), , _node(node)
_ztPath(ztPath), , _ztPath(ztPath)
_path(dbPath), , _path(dbPath)
_sender((NetworkController::Sender *)0), , _sender((NetworkController::Sender *)0)
_db(this), , _db(this)
_ssoExpiryRunning(true), , _ssoExpiryRunning(true)
_ssoExpiry(std::thread(&EmbeddedNetworkController::_ssoExpiryThread, this)), , _ssoExpiry(std::thread(&EmbeddedNetworkController::_ssoExpiryThread, this))
_rc(rc) , _rc(rc)
, _member_status_lookup{"nc_member_status_lookup",""}
, _member_status_lookup_count{"nc_member_status_lookup_count",""}
, _node_is_online{"nc_node_is_online",""}
, _node_is_online_count{"nc_node_is_online_count",""}
, _get_and_init_member{"nc_get_and_init_member",""}
, _get_and_init_member_count{"nc_get_and_init_member_count",""}
, _have_identity{"nc_have_identity",""}
, _have_identity_count{"nc_have_identity_count",""}
, _determine_auth{"nc_determine_auth",""}
, _determine_auth_count{"nc_determine_auth_count",""}
, _sso_check{"nc_sso_check",""}
, _sso_check_count{"nc_sso_check_count",""}
, _auth_check{"nc_auth_check",""}
, _auth_check_count{"nc_auth_check_count",""}
, _json_schlep{"nc_json_schlep",""}
, _json_schlep_count{"nc_json_schlep_count",""}
, _issue_certificate{"nc_issue_certificate", ""}
, _issue_certificate_count{"nc_issue_certificate_count",""}
, _save_member{"nc_save_member",""}
, _save_member_count{"nc_save_member_count",""}
, _send_netconf{"nc_send_netconf2",""}
, _send_netconf_count{"nc_send_netconf2_count",""}
{ {
} }
@ -1177,37 +1199,62 @@ void EmbeddedNetworkController::_request(
const Dictionary<ZT_NETWORKCONFIG_METADATA_DICT_CAPACITY> &metaData) const Dictionary<ZT_NETWORKCONFIG_METADATA_DICT_CAPACITY> &metaData)
{ {
Metrics::network_config_request++; Metrics::network_config_request++;
auto tid = std::this_thread::get_id();
std::stringstream ss; ss << tid;
std::string threadID = ss.str();
auto b1 = _member_status_lookup.Add({{"thread", threadID}});
auto c1 = _member_status_lookup_count.Add({{"thread", threadID}});
c1++;
b1.start();
char nwids[24]; char nwids[24];
DB::NetworkSummaryInfo ns; DB::NetworkSummaryInfo ns;
json network,member; json network,member;
if (((!_signingId)||(!_signingId.hasPrivate()))||(_signingId.address().toInt() != (nwid >> 24))||(!_sender)) if (((!_signingId)||(!_signingId.hasPrivate()))||(_signingId.address().toInt() != (nwid >> 24))||(!_sender)) {
return; return;
}
const int64_t now = OSUtils::now(); const int64_t now = OSUtils::now();
if (requestPacketId) { if (requestPacketId) {
std::lock_guard<std::mutex> l(_memberStatus_l); std::lock_guard<std::mutex> l(_memberStatus_l);
_MemberStatus &ms = _memberStatus[_MemberStatusKey(nwid,identity.address().toInt())]; _MemberStatus &ms = _memberStatus[_MemberStatusKey(nwid,identity.address().toInt())];
if ((now - ms.lastRequestTime) <= ZT_NETCONF_MIN_REQUEST_PERIOD) if ((now - ms.lastRequestTime) <= ZT_NETCONF_MIN_REQUEST_PERIOD) {
return; return;
}
ms.lastRequestTime = now; ms.lastRequestTime = now;
} }
b1.stop();
auto b2 = _node_is_online.Add({{"thread",threadID}});
auto c2 = _node_is_online_count.Add({{"thread",threadID}});
c2++;
b2.start();
_db.nodeIsOnline(nwid,identity.address().toInt(),fromAddr); _db.nodeIsOnline(nwid,identity.address().toInt(),fromAddr);
b2.stop();
auto b3 = _get_and_init_member.Add({{"thread", threadID}});
auto c3 = _get_and_init_member_count.Add({{"thread",threadID}});
c3++;
b3.start();
Utils::hex(nwid,nwids); Utils::hex(nwid,nwids);
_db.get(nwid,network,identity.address().toInt(),member,ns); _db.get(nwid,network,identity.address().toInt(),member,ns);
if ((!network.is_object())||(network.empty())) { if ((!network.is_object())||(network.empty())) {
_sender->ncSendError(nwid,requestPacketId,identity.address(),NetworkController::NC_ERROR_OBJECT_NOT_FOUND, nullptr, 0); _sender->ncSendError(nwid,requestPacketId,identity.address(),NetworkController::NC_ERROR_OBJECT_NOT_FOUND, nullptr, 0);
b3.stop();
return; return;
} }
const bool newMember = ((!member.is_object())||(member.empty())); const bool newMember = ((!member.is_object())||(member.empty()));
DB::initMember(member); DB::initMember(member);
_MemberStatusKey msk(nwid,identity.address().toInt()); _MemberStatusKey msk(nwid,identity.address().toInt());
b3.stop();
{ {
auto b4 = _have_identity.Add({{"thread",threadID}});
auto c4 = _have_identity_count.Add({{"thread",threadID}});
c4++;
b4.start();
const std::string haveIdStr(OSUtils::jsonString(member["identity"],"")); const std::string haveIdStr(OSUtils::jsonString(member["identity"],""));
if (haveIdStr.length() > 0) { if (haveIdStr.length() > 0) {
// If we already know this member's identity perform a full compare. This prevents // If we already know this member's identity perform a full compare. This prevents
@ -1216,10 +1263,12 @@ void EmbeddedNetworkController::_request(
try { try {
if (Identity(haveIdStr.c_str()) != identity) { if (Identity(haveIdStr.c_str()) != identity) {
_sender->ncSendError(nwid,requestPacketId,identity.address(),NetworkController::NC_ERROR_ACCESS_DENIED, nullptr, 0); _sender->ncSendError(nwid,requestPacketId,identity.address(),NetworkController::NC_ERROR_ACCESS_DENIED, nullptr, 0);
b4.stop();
return; return;
} }
} catch ( ... ) { } catch ( ... ) {
_sender->ncSendError(nwid,requestPacketId,identity.address(),NetworkController::NC_ERROR_ACCESS_DENIED, nullptr, 0); _sender->ncSendError(nwid,requestPacketId,identity.address(),NetworkController::NC_ERROR_ACCESS_DENIED, nullptr, 0);
b4.stop();
return; return;
} }
} else { } else {
@ -1227,6 +1276,7 @@ void EmbeddedNetworkController::_request(
char idtmp[1024]; char idtmp[1024];
member["identity"] = identity.toString(false,idtmp); member["identity"] = identity.toString(false,idtmp);
} }
b4.stop();
} }
// These are always the same, but make sure they are set // These are always the same, but make sure they are set
@ -1239,6 +1289,10 @@ void EmbeddedNetworkController::_request(
} }
// Determine whether and how member is authorized // Determine whether and how member is authorized
auto b5 = _determine_auth.Add({{"thread",threadID}});
auto c5 = _determine_auth_count.Add({{"thread",threadID}});
c5++;
b5.start();
bool authorized = false; bool authorized = false;
bool autoAuthorized = false; bool autoAuthorized = false;
json autoAuthCredentialType,autoAuthCredential; json autoAuthCredentialType,autoAuthCredential;
@ -1275,10 +1329,15 @@ void EmbeddedNetworkController::_request(
member["lastAuthorizedCredentialType"] = autoAuthCredentialType; member["lastAuthorizedCredentialType"] = autoAuthCredentialType;
member["lastAuthorizedCredential"] = autoAuthCredential; member["lastAuthorizedCredential"] = autoAuthCredential;
} }
b5.stop();
// Should we check SSO Stuff? // Should we check SSO Stuff?
// If network is configured with SSO, and the member is not marked exempt: yes // If network is configured with SSO, and the member is not marked exempt: yes
// Otherwise no, we use standard auth logic. // Otherwise no, we use standard auth logic.
auto b6 = _sso_check.Add({{"thread",threadID}});
auto c6 = _sso_check_count.Add({{"thread",threadID}});
c6++;
b6.start();
AuthInfo info; AuthInfo info;
int64_t authenticationExpiryTime = -1; int64_t authenticationExpiryTime = -1;
bool networkSSOEnabled = OSUtils::jsonBool(network["ssoEnabled"], false); bool networkSSOEnabled = OSUtils::jsonBool(network["ssoEnabled"], false);
@ -1305,11 +1364,18 @@ void EmbeddedNetworkController::_request(
} }
DB::cleanMember(member); DB::cleanMember(member);
_db.save(member,true); _db.save(member,true);
b6.stop();
return; return;
} }
} }
b6.stop();
auto b7 = _auth_check.Add({{"thread",threadID}});
auto c7 = _auth_check_count.Add({{"thread",threadID}});
c7++;
b7.start();
if (authorized) { if (authorized) {
Metrics::nc_authed_request++;
// Update version info and meta-data if authorized and if this is a genuine request // Update version info and meta-data if authorized and if this is a genuine request
if (requestPacketId) { if (requestPacketId) {
const uint64_t vMajor = metaData.getUI(ZT_NETWORKCONFIG_REQUEST_METADATA_KEY_NODE_MAJOR_VERSION,0); const uint64_t vMajor = metaData.getUI(ZT_NETWORKCONFIG_REQUEST_METADATA_KEY_NODE_MAJOR_VERSION,0);
@ -1340,12 +1406,15 @@ void EmbeddedNetworkController::_request(
} }
} }
} else { } else {
Metrics::nc_unauthed_request++;
// If they are not authorized, STOP! // If they are not authorized, STOP!
DB::cleanMember(member); DB::cleanMember(member);
_db.save(member,true); _db.save(member,true);
_sender->ncSendError(nwid,requestPacketId,identity.address(),NetworkController::NC_ERROR_ACCESS_DENIED, nullptr, 0); _sender->ncSendError(nwid,requestPacketId,identity.address(),NetworkController::NC_ERROR_ACCESS_DENIED, nullptr, 0);
b7.stop();
return; return;
} }
b7.stop();
// ------------------------------------------------------------------------- // -------------------------------------------------------------------------
// If we made it this far, they are authorized (and authenticated). // If we made it this far, they are authorized (and authenticated).
@ -1353,6 +1422,10 @@ void EmbeddedNetworkController::_request(
// Default timeout: 15 minutes. Maximum: two hours. Can be specified by an optional field in the network config // Default timeout: 15 minutes. Maximum: two hours. Can be specified by an optional field in the network config
// if something longer than 15 minutes is desired. Minimum is 5 minutes since shorter than that would be flaky. // if something longer than 15 minutes is desired. Minimum is 5 minutes since shorter than that would be flaky.
auto b8 = _json_schlep.Add({{"thread",threadID}});
auto c8 = _json_schlep_count.Add({{"thread", threadID}});
c8++;
b8.start();
int64_t credentialtmd = ZT_NETWORKCONFIG_DEFAULT_CREDENTIAL_TIME_DFL_MAX_DELTA; int64_t credentialtmd = ZT_NETWORKCONFIG_DEFAULT_CREDENTIAL_TIME_DFL_MAX_DELTA;
if (network.contains("certificateTimeoutWindowSize")) { if (network.contains("certificateTimeoutWindowSize")) {
credentialtmd = (int64_t)network["certificateTimeoutWindowSize"]; credentialtmd = (int64_t)network["certificateTimeoutWindowSize"];
@ -1420,8 +1493,9 @@ void EmbeddedNetworkController::_request(
nc->remoteTraceLevel = (Trace::Level)OSUtils::jsonInt(network["remoteTraceLevel"],0ULL); nc->remoteTraceLevel = (Trace::Level)OSUtils::jsonInt(network["remoteTraceLevel"],0ULL);
} }
for(std::vector<Address>::const_iterator ab(ns.activeBridges.begin());ab!=ns.activeBridges.end();++ab) for(std::vector<Address>::const_iterator ab(ns.activeBridges.begin());ab!=ns.activeBridges.end();++ab) {
nc->addSpecialist(*ab,ZT_NETWORKCONFIG_SPECIALIST_TYPE_ACTIVE_BRIDGE); nc->addSpecialist(*ab,ZT_NETWORKCONFIG_SPECIALIST_TYPE_ACTIVE_BRIDGE);
}
json &v4AssignMode = network["v4AssignMode"]; json &v4AssignMode = network["v4AssignMode"];
json &v6AssignMode = network["v6AssignMode"]; json &v6AssignMode = network["v6AssignMode"];
@ -1741,12 +1815,18 @@ void EmbeddedNetworkController::_request(
} else { } else {
dns = json::object(); dns = json::object();
} }
b8.stop();
// Issue a certificate of ownership for all static IPs // Issue a certificate of ownership for all static IPs
auto b9 = _issue_certificate.Add({{"thread",threadID}});
auto c9 = _issue_certificate_count.Add({{"thread",threadID}});
c9++;
b9.start();
if (nc->staticIpCount) { if (nc->staticIpCount) {
nc->certificatesOfOwnership[0] = CertificateOfOwnership(nwid,now,identity.address(),1); nc->certificatesOfOwnership[0] = CertificateOfOwnership(nwid,now,identity.address(),1);
for(unsigned int i=0;i<nc->staticIpCount;++i) for(unsigned int i=0;i<nc->staticIpCount;++i) {
nc->certificatesOfOwnership[0].addThing(nc->staticIps[i]); nc->certificatesOfOwnership[0].addThing(nc->staticIps[i]);
}
nc->certificatesOfOwnership[0].sign(_signingId); nc->certificatesOfOwnership[0].sign(_signingId);
nc->certificateOfOwnershipCount = 1; nc->certificateOfOwnershipCount = 1;
} }
@ -1756,19 +1836,33 @@ void EmbeddedNetworkController::_request(
nc->com = com; nc->com = com;
} else { } else {
_sender->ncSendError(nwid,requestPacketId,identity.address(),NetworkController::NC_ERROR_INTERNAL_SERVER_ERROR, nullptr, 0); _sender->ncSendError(nwid,requestPacketId,identity.address(),NetworkController::NC_ERROR_INTERNAL_SERVER_ERROR, nullptr, 0);
b9.stop();
return; return;
} }
b9.stop();
auto b10 = _save_member.Add({{"thread",threadID}});
auto c10 = _save_member_count.Add({{"thread",threadID}});
c10++;
b10.start();
DB::cleanMember(member); DB::cleanMember(member);
_db.save(member,true); _db.save(member,true);
b10.stop();
auto b11 = _send_netconf.Add({{"thread",threadID}});
auto c11 = _send_netconf_count.Add({{"thread",threadID}});
c11++;
b11.start();
_sender->ncSendConfig(nwid,requestPacketId,identity.address(),*(nc.get()),metaData.getUI(ZT_NETWORKCONFIG_REQUEST_METADATA_KEY_VERSION,0) < 6); _sender->ncSendConfig(nwid,requestPacketId,identity.address(),*(nc.get()),metaData.getUI(ZT_NETWORKCONFIG_REQUEST_METADATA_KEY_VERSION,0) < 6);
b11.stop();
} }
void EmbeddedNetworkController::_startThreads() void EmbeddedNetworkController::_startThreads()
{ {
std::lock_guard<std::mutex> l(_threads_l); std::lock_guard<std::mutex> l(_threads_l);
if (!_threads.empty()) if (!_threads.empty()) {
return; return;
}
const long hwc = std::max((long)std::thread::hardware_concurrency(),(long)1); const long hwc = std::max((long)std::thread::hardware_concurrency(),(long)1);
for(long t=0;t<hwc;++t) { for(long t=0;t<hwc;++t) {
_threads.emplace_back([this]() { _threads.emplace_back([this]() {

View file

@ -150,6 +150,29 @@ private:
RedisConfig *_rc; RedisConfig *_rc;
std::string _ssoRedirectURL; std::string _ssoRedirectURL;
prometheus::simpleapi::benchmark_family_t _member_status_lookup;
prometheus::simpleapi::counter_family_t _member_status_lookup_count;
prometheus::simpleapi::benchmark_family_t _node_is_online;
prometheus::simpleapi::counter_family_t _node_is_online_count;
prometheus::simpleapi::benchmark_family_t _get_and_init_member;
prometheus::simpleapi::counter_family_t _get_and_init_member_count;
prometheus::simpleapi::benchmark_family_t _have_identity;
prometheus::simpleapi::counter_family_t _have_identity_count;
prometheus::simpleapi::benchmark_family_t _determine_auth;
prometheus::simpleapi::counter_family_t _determine_auth_count;
prometheus::simpleapi::benchmark_family_t _sso_check;
prometheus::simpleapi::counter_family_t _sso_check_count;
prometheus::simpleapi::benchmark_family_t _auth_check;
prometheus::simpleapi::counter_family_t _auth_check_count;
prometheus::simpleapi::benchmark_family_t _json_schlep;
prometheus::simpleapi::counter_family_t _json_schlep_count;
prometheus::simpleapi::benchmark_family_t _issue_certificate;
prometheus::simpleapi::counter_family_t _issue_certificate_count;
prometheus::simpleapi::benchmark_family_t _save_member;
prometheus::simpleapi::counter_family_t _save_member_count;
prometheus::simpleapi::benchmark_family_t _send_netconf;
prometheus::simpleapi::counter_family_t _send_netconf_count;
}; };
} // namespace ZeroTier } // namespace ZeroTier

View file

@ -374,6 +374,7 @@ void PostgreSQL::nodeIsOnline(const uint64_t networkId, const uint64_t memberId,
AuthInfo PostgreSQL::getSSOAuthInfo(const nlohmann::json &member, const std::string &redirectURL) AuthInfo PostgreSQL::getSSOAuthInfo(const nlohmann::json &member, const std::string &redirectURL)
{ {
Metrics::db_get_sso_info++;
// NONCE is just a random character string. no semantic meaning // NONCE is just a random character string. no semantic meaning
// state = HMAC SHA384 of Nonce based on shared sso key // state = HMAC SHA384 of Nonce based on shared sso key
// //
@ -462,11 +463,11 @@ AuthInfo PostgreSQL::getSSOAuthInfo(const nlohmann::json &member, const std::str
uint64_t sso_version = 0; uint64_t sso_version = 0;
if (r.size() == 1) { if (r.size() == 1) {
client_id = r.at(0)[0].as<std::string>(); client_id = r.at(0)[0].as<std::optional<std::string>>().value_or("");
authorization_endpoint = r.at(0)[1].as<std::string>(); authorization_endpoint = r.at(0)[1].as<std::optional<std::string>>().value_or("");
issuer = r.at(0)[2].as<std::string>(); issuer = r.at(0)[2].as<std::optional<std::string>>().value_or("");
provider = r.at(0)[3].as<std::string>(); provider = r.at(0)[3].as<std::optional<std::string>>().value_or("");
sso_version = r.at(0)[4].as<uint64_t>(); sso_version = r.at(0)[4].as<std::optional<uint64_t>>().value_or(1);
} else if (r.size() > 1) { } else if (r.size() > 1) {
fprintf(stderr, "ERROR: More than one auth endpoint for an organization?!?!? NetworkID: %s\n", networkId.c_str()); fprintf(stderr, "ERROR: More than one auth endpoint for an organization?!?!? NetworkID: %s\n", networkId.c_str());
} else { } else {
@ -1596,7 +1597,6 @@ void PostgreSQL::commitThread()
} }
_pool->unborrow(c); _pool->unborrow(c);
c.reset(); c.reset();
std::this_thread::sleep_for(std::chrono::milliseconds(100));
} }
fprintf(stderr, "%s commitThread finished\n", _myAddressStr.c_str()); fprintf(stderr, "%s commitThread finished\n", _myAddressStr.c_str());

View file

@ -233,7 +233,13 @@ namespace ZeroTier {
{ "controller_db_member_change", "counter" }; { "controller_db_member_change", "counter" };
prometheus::simpleapi::counter_metric_t db_network_change prometheus::simpleapi::counter_metric_t db_network_change
{ "controller_db_network_change", "counter" }; { "controller_db_network_change", "counter" };
prometheus::simpleapi::counter_metric_t db_get_sso_info
{ "controller_db_get_sso_info", "counter" };
prometheus::simpleapi::counter_metric_t nc_unauthed_request
{ "controller_authorized_requests", "counter" };
prometheus::simpleapi::counter_metric_t nc_authed_request
{ "controller_unauthorized_requests", "counter"};
#ifdef ZT_CONTROLLER_USE_LIBPQ #ifdef ZT_CONTROLLER_USE_LIBPQ
// Central Controller Metrics // Central Controller Metrics
prometheus::simpleapi::counter_metric_t pgsql_mem_notification prometheus::simpleapi::counter_metric_t pgsql_mem_notification

View file

@ -136,6 +136,10 @@ namespace ZeroTier {
extern prometheus::simpleapi::counter_metric_t db_get_network_list; extern prometheus::simpleapi::counter_metric_t db_get_network_list;
extern prometheus::simpleapi::counter_metric_t db_member_change; extern prometheus::simpleapi::counter_metric_t db_member_change;
extern prometheus::simpleapi::counter_metric_t db_network_change; extern prometheus::simpleapi::counter_metric_t db_network_change;
extern prometheus::simpleapi::counter_metric_t db_get_sso_info;
extern prometheus::simpleapi::counter_metric_t nc_unauthed_request;
extern prometheus::simpleapi::counter_metric_t nc_authed_request;
#ifdef ZT_CONTROLLER_USE_LIBPQ #ifdef ZT_CONTROLLER_USE_LIBPQ
// Central Controller Metrics // Central Controller Metrics