diff --git a/node/Metrics.cpp b/node/Metrics.cpp index 11232cb0e..366f4ab32 100644 --- a/node/Metrics.cpp +++ b/node/Metrics.cpp @@ -13,6 +13,7 @@ // clang-format off #include #include +#include "Metrics.hpp" // clang-format on namespace prometheus { @@ -162,5 +163,68 @@ prometheus::simpleapi::gauge_metric_t pool_avail { "controller_pgsql_available_c prometheus::simpleapi::gauge_metric_t pool_in_use { "controller_pgsql_in_use_conns", "number of postgres database connections in use" }; prometheus::simpleapi::counter_metric_t pool_errors { "controller_pgsql_connection_errors", "number of connection errors the connection pool has seen" }; #endif -} // namespace Metrics -} // namespace ZeroTier + + // Fragmentation Metrics + prometheus::simpleapi::counter_family_t packet_fragmentation + { "zt_packet_fragmentation", "ZeroTier packet fragmentation events" }; + + // VL2 Fragmentation Metrics + prometheus::simpleapi::counter_metric_t vl2_oversized_frame_tx + { packet_fragmentation.Add({{"layer", "VL2"}, {"direction", "tx"}, {"reason", "oversized_frame"}}) }; + prometheus::simpleapi::counter_metric_t vl2_would_fragment_or_drop_rx + { packet_fragmentation.Add({{"layer", "VL2"}, {"direction", "rx"}, {"reason", "would_fragment_or_drop"}}) }; + + // VL1 Fragmentation Metrics + prometheus::simpleapi::counter_metric_t vl1_fragmented_tx + { packet_fragmentation.Add({{"layer", "VL1"}, {"direction", "tx"}, {"reason", "mtu_exceeded"}}) }; + prometheus::simpleapi::counter_metric_t vl1_fragment_rx + { packet_fragmentation.Add({{"layer", "VL1"}, {"direction", "rx"}, {"reason", "fragment"}}) }; + prometheus::simpleapi::counter_metric_t vl1_reassembly_failed_rx + { packet_fragmentation.Add({{"layer", "VL1"}, {"direction", "rx"}, {"reason", "reassembly_failed"}}) }; + prometheus::simpleapi::counter_metric_t vl1_fragment_without_head_rx + { packet_fragmentation.Add({{"layer", "VL1"}, {"direction", "rx"}, {"reason", "fragment_without_head"}}) }; + prometheus::simpleapi::counter_metric_t vl1_fragment_before_head_rx + { packet_fragmentation.Add({{"layer", "VL1"}, {"direction", "rx"}, {"reason", "fragment_before_head"}}) }; + prometheus::simpleapi::counter_metric_t vl1_duplicate_fragment_rx + { packet_fragmentation.Add({{"layer", "VL1"}, {"direction", "rx"}, {"reason", "duplicate_fragment"}}) }; + prometheus::simpleapi::counter_metric_t vl1_duplicate_head_rx + { packet_fragmentation.Add({{"layer", "VL1"}, {"direction", "rx"}, {"reason", "duplicate_head"}}) }; + + // VL1 Fragmentation Histogram and Counters + prometheus::CustomFamily> &vl1_fragments_per_packet_histogram = + prometheus::Builder>() + .Name("zt_vl1_fragments_per_packet") + .Help("Histogram of fragments per packet at VL1") + .Register(prometheus::simpleapi::registry); + prometheus::simpleapi::counter_metric_t vl1_incomplete_reassembly_rx + { packet_fragmentation.Add({{"layer", "VL1"}, {"direction", "rx"}, {"reason", "incomplete_reassembly"}}) }; + prometheus::simpleapi::counter_metric_t vl1_vl2_double_fragmentation_tx + { packet_fragmentation.Add({{"layer", "VL1_VL2"}, {"direction", "tx"}, {"reason", "double_fragmentation"}}) }; + + prometheus::Histogram &vl1_fragments_per_packet_hist = + vl1_fragments_per_packet_histogram.Add( + {}, + std::vector(std::begin(ZeroTier::Metrics::VL1_FRAGMENTS_PER_PACKET_BUCKETS), std::end(ZeroTier::Metrics::VL1_FRAGMENTS_PER_PACKET_BUCKETS)) + ); + + // VL2 Frame Size Histogram + // Buckets: 512 (IoT/legacy), 576 (min IPv4), 1200 (QUIC/mobile), 1280 (min IPv6), + // 1332, 1380, 1400 (VPN/overlay), 1420 (cloud), 1460 (TCP MSS), 1472 (ICMP/MTU), + // 1480 (ICMP/MTU), 1492 (PPPoE), 1500 (Ethernet), 2800 (VL2 default), 9000 (jumbo) + prometheus::CustomFamily> &vl2_frame_size_histogram = + prometheus::Builder>() + .Name("zt_vl2_frame_size") + .Help("Histogram of frame sizes delivered to TAP (VL2)") + .Register(prometheus::simpleapi::registry); + prometheus::simpleapi::counter_metric_t vl2_incomplete_reassembly_rx + { packet_fragmentation.Add({{"layer", "VL2"}, {"direction", "rx"}, {"reason", "incomplete_reassembly"}}) }; + prometheus::simpleapi::counter_metric_t vl2_vl1_double_fragmentation_tx + { packet_fragmentation.Add({{"layer", "VL2_VL1"}, {"direction", "tx"}, {"reason", "double_fragmentation"}}) }; + + prometheus::Histogram &vl2_frame_size_hist = + vl2_frame_size_histogram.Add( + {}, + std::vector(std::begin(ZeroTier::Metrics::VL2_FRAME_SIZE_BUCKETS), std::end(ZeroTier::Metrics::VL2_FRAME_SIZE_BUCKETS)) + ); + } +} diff --git a/node/Metrics.hpp b/node/Metrics.hpp index 8c2c4290d..a8574225a 100644 --- a/node/Metrics.hpp +++ b/node/Metrics.hpp @@ -139,6 +139,38 @@ extern prometheus::simpleapi::counter_metric_t db_get_network_list; extern prometheus::simpleapi::counter_metric_t db_member_change; extern prometheus::simpleapi::counter_metric_t db_network_change; + // Fragmentation Metrics + extern prometheus::simpleapi::counter_family_t packet_fragmentation; + + // VL2 Fragmentation Metrics + extern prometheus::simpleapi::counter_metric_t vl2_oversized_frame_tx; + extern prometheus::simpleapi::counter_metric_t vl2_would_fragment_or_drop_rx; + + // VL1 Fragmentation Metrics + extern prometheus::simpleapi::counter_metric_t vl1_fragmented_tx; + extern prometheus::simpleapi::counter_metric_t vl1_fragment_rx; + extern prometheus::simpleapi::counter_metric_t vl1_reassembly_failed_rx; + extern prometheus::simpleapi::counter_metric_t vl1_fragment_without_head_rx; + extern prometheus::simpleapi::counter_metric_t vl1_fragment_before_head_rx; + extern prometheus::simpleapi::counter_metric_t vl1_duplicate_fragment_rx; + extern prometheus::simpleapi::counter_metric_t vl1_duplicate_head_rx; + + // VL1 Fragmentation Histogram and Counters + extern prometheus::CustomFamily> &vl1_fragments_per_packet_histogram; + extern prometheus::simpleapi::counter_metric_t vl1_incomplete_reassembly_rx; + extern prometheus::simpleapi::counter_metric_t vl1_vl2_double_fragmentation_tx; + + // VL2 Frame Size Histogram + // Buckets: 512 (IoT/legacy), 576 (min IPv4), 1200 (QUIC/mobile), 1280 (min IPv6), + // 1332, 1380, 1400 (VPN/overlay), 1420 (cloud), 1460 (TCP MSS), 1472 (ICMP/MTU), + // 1480 (ICMP/MTU), 1492 (PPPoE), 1500 (Ethernet), 2800 (VL2 default), 9000 (jumbo) + extern prometheus::CustomFamily> &vl2_frame_size_histogram; + + // Histogram bucket boundaries for VL1 fragments per packet + inline constexpr uint64_t VL1_FRAGMENTS_PER_PACKET_BUCKETS[] = {1,2,3,4,5,6,7,8,9,10,12,16}; + // Histogram bucket boundaries for VL2 frame size + inline constexpr uint64_t VL2_FRAME_SIZE_BUCKETS[] = {512,576,1200,1280,1332,1380,1400,1420,1460,1472,1480,1492,1500,2800,9000}; + #ifdef ZT_CONTROLLER_USE_LIBPQ // Central Controller Metrics extern prometheus::simpleapi::counter_metric_t pgsql_mem_notification; @@ -159,7 +191,10 @@ extern prometheus::simpleapi::gauge_metric_t pool_avail; extern prometheus::simpleapi::gauge_metric_t pool_in_use; extern prometheus::simpleapi::counter_metric_t pool_errors; #endif -} // namespace Metrics -} // namespace ZeroTier + + extern prometheus::Histogram &vl1_fragments_per_packet_hist; + extern prometheus::Histogram &vl2_frame_size_hist; + } // namespace Metrics +}// namespace ZeroTier #endif // METRICS_H_ diff --git a/node/Switch.cpp b/node/Switch.cpp index 6b0e2b081..ed391a134 100644 --- a/node/Switch.cpp +++ b/node/Switch.cpp @@ -121,7 +121,7 @@ void Switch::onRemotePacket(void* tPtr, const int64_t localSocket, const InetAdd Mutex::Lock rql(rq->lock); if (rq->packetId != fragmentPacketId) { // No packet found, so we received a fragment without its head. - + Metrics::vl1_fragment_without_head_rx++; rq->flowId = flowId; rq->timestamp = now; rq->packetId = fragmentPacketId; @@ -132,7 +132,7 @@ void Switch::onRemotePacket(void* tPtr, const int64_t localSocket, const InetAdd } else if (! (rq->haveFragments & (1 << fragmentNumber))) { // We have other fragments and maybe the head, so add this one and check - + Metrics::vl1_fragment_before_head_rx++; rq->frags[fragmentNumber - 1] = fragment; rq->totalFragments = totalFragments; @@ -148,9 +148,14 @@ void Switch::onRemotePacket(void* tPtr, const int64_t localSocket, const InetAdd } else { rq->complete = true; // set complete flag but leave entry since it probably needs WHOIS or something + Metrics::vl1_reassembly_failed_rx++; } } - } // else this is a duplicate fragment, ignore + } + else { + // This is a duplicate fragment, ignore + Metrics::vl1_duplicate_fragment_rx++; + } } } @@ -201,9 +206,9 @@ void Switch::onRemotePacket(void* tPtr, const int64_t localSocket, const InetAdd // Packet is the head of a fragmented packet series const uint64_t packetId = - ((((uint64_t)reinterpret_cast(data)[0]) << 56) | (((uint64_t)reinterpret_cast(data)[1]) << 48) | (((uint64_t)reinterpret_cast(data)[2]) << 40) - | (((uint64_t)reinterpret_cast(data)[3]) << 32) | (((uint64_t)reinterpret_cast(data)[4]) << 24) | (((uint64_t)reinterpret_cast(data)[5]) << 16) - | (((uint64_t)reinterpret_cast(data)[6]) << 8) | ((uint64_t)reinterpret_cast(data)[7])); + ((((uint64_t) reinterpret_cast(data)[0]) << 56) | (((uint64_t) reinterpret_cast(data)[1]) << 48) | (((uint64_t) reinterpret_cast(data)[2]) << 40) + | (((uint64_t) reinterpret_cast(data)[3]) << 32) | (((uint64_t) reinterpret_cast(data)[4]) << 24) | (((uint64_t) reinterpret_cast(data)[5]) << 16) + | (((uint64_t) reinterpret_cast(data)[6]) << 8) | ((uint64_t) reinterpret_cast(data)[7])); RXQueueEntry* const rq = _findRXQueueEntry(packetId); Mutex::Lock rql(rq->lock); @@ -234,13 +239,18 @@ void Switch::onRemotePacket(void* tPtr, const int64_t localSocket, const InetAdd } else { rq->complete = true; // set complete flag but leave entry since it probably needs WHOIS or something + Metrics::vl1_reassembly_failed_rx++; } } else { // Still waiting on more fragments, but keep the head rq->frag0.init(data, len, path, now); } - } // else this is a duplicate head, ignore + } + else { + // This is a duplicate head, ignore + Metrics::vl1_duplicate_head_rx++; + } } else { // Packet is unfragmented, so just process it @@ -272,6 +282,13 @@ void Switch::onLocalEthernet(void* tPtr, const SharedPtr& network, cons return; } + // VL2 fragmentation metric: oversized frame from TAP device (TX) + if (len > network->config().mtu) { + Metrics::vl2_oversized_frame_tx++; + // Just measure, do not drop or return + return; + } + // Check if this packet is from someone other than the tap -- i.e. bridged in bool fromBridged; if ((fromBridged = (from != network->mac()))) { @@ -392,7 +409,7 @@ void Switch::onLocalEthernet(void* tPtr, const SharedPtr& network, cons const InetAddress* const sip = &(network->config().staticIps[sipk]); if (sip->ss_family == AF_INET6) { my6 = reinterpret_cast(reinterpret_cast(&(*sip))->sin6_addr.s6_addr); - const unsigned int sipNetmaskBits = Utils::ntoh((uint16_t)reinterpret_cast(&(*sip))->sin6_port); + const unsigned int sipNetmaskBits = Utils::ntoh((uint16_t) reinterpret_cast(&(*sip))->sin6_port); if ((sipNetmaskBits == 88) && (my6[0] == 0xfd) && (my6[9] == 0x99) && (my6[10] == 0x93)) { // ZT-RFC4193 /88 ??? unsigned int ptr = 0; while (ptr != 11) { @@ -963,6 +980,15 @@ void Switch::doAnythingWaitingForPeer(void* tPtr, const SharedPtr& peer) if ((rq->timestamp) && (rq->complete)) { if ((rq->frag0.tryDecode(RR, tPtr, rq->flowId)) || ((now - rq->timestamp) > ZT_RECEIVE_QUEUE_TIMEOUT)) { rq->timestamp = 0; + if ((now - rq->timestamp) > ZT_RECEIVE_QUEUE_TIMEOUT) { + Metrics::vl1_incomplete_reassembly_rx++; + } + } + else { + const Address src(rq->frag0.source()); + if (! RR->topology->getPeer(tPtr, src)) { + requestWhois(tPtr, now, src); + } } } } @@ -1021,6 +1047,9 @@ unsigned long Switch::doTimerTasks(void* tPtr, int64_t now) Mutex::Lock rql(rq->lock); if ((rq->timestamp) && (rq->complete)) { if ((rq->frag0.tryDecode(RR, tPtr, rq->flowId)) || ((now - rq->timestamp) > ZT_RECEIVE_QUEUE_TIMEOUT)) { + if ((now - rq->timestamp) > ZT_RECEIVE_QUEUE_TIMEOUT) { + Metrics::vl1_incomplete_reassembly_rx++; + } rq->timestamp = 0; } else { @@ -1084,7 +1113,7 @@ bool Switch::_trySend(void* tPtr, Packet& packet, bool encrypt, int32_t flowId) for (int i = 0; i < ZT_MAX_PEER_NETWORK_PATHS; ++i) { if (peer->_paths[i].p && peer->_paths[i].p->alive(now)) { uint16_t userSpecifiedMtu = peer->_paths[i].p->mtu(); - _sendViaSpecificPath(tPtr, peer, peer->_paths[i].p, userSpecifiedMtu, now, packet, encrypt, flowId); + _sendViaSpecificPath(tPtr, peer, peer->_paths[i].p, userSpecifiedMtu, now, packet, encrypt, flowId, false); } } return true; @@ -1102,7 +1131,7 @@ bool Switch::_trySend(void* tPtr, Packet& packet, bool encrypt, int32_t flowId) } if (viaPath) { uint16_t userSpecifiedMtu = viaPath->mtu(); - _sendViaSpecificPath(tPtr, peer, viaPath, userSpecifiedMtu, now, packet, encrypt, flowId); + _sendViaSpecificPath(tPtr, peer, viaPath, userSpecifiedMtu, now, packet, encrypt, flowId, false); return true; } } @@ -1110,7 +1139,7 @@ bool Switch::_trySend(void* tPtr, Packet& packet, bool encrypt, int32_t flowId) return false; } -void Switch::_sendViaSpecificPath(void* tPtr, SharedPtr peer, SharedPtr viaPath, uint16_t userSpecifiedMtu, int64_t now, Packet& packet, bool encrypt, int32_t flowId) +void Switch::_sendViaSpecificPath(void* tPtr, SharedPtr peer, SharedPtr viaPath, uint16_t userSpecifiedMtu, int64_t now, Packet& packet, bool encrypt, int32_t flowId, bool fragmentedAtVl2) { unsigned int mtu = ZT_DEFAULT_PHYSMTU; uint64_t trustedPathId = 0; @@ -1137,6 +1166,11 @@ void Switch::_sendViaSpecificPath(void* tPtr, SharedPtr peer, SharedPtrsend(RR, tPtr, packet.data(), chunkSize, now)) { if (chunkSize < packet.size()) { // Too big for one packet, fragment the rest + Metrics::vl1_fragments_per_packet_hist.Observe(2); + if (fragmentedAtVl2) { + Metrics::vl1_vl2_double_fragmentation_tx++; + } + unsigned int fragStart = chunkSize; unsigned int remaining = packet.size() - chunkSize; unsigned int fragsRemaining = (remaining / (mtu - ZT_PROTO_MIN_FRAGMENT_LENGTH)); @@ -1144,6 +1178,7 @@ void Switch::_sendViaSpecificPath(void* tPtr, SharedPtr peer, SharedPtr peer, SharedPtr viaPath, uint16_t userSpecifiedMtu, int64_t now, Packet& packet, bool encrypt, int32_t flowId); + void _sendViaSpecificPath(void* tPtr, SharedPtr peer, SharedPtr viaPath, uint16_t userSpecifiedMtu, int64_t now, Packet& packet, bool encrypt, int32_t flowId, bool fragmentedAtVl2); void _recordOutgoingPacketMetrics(const Packet& p); const RuntimeEnvironment* const RR; diff --git a/osdep/BSDEthernetTap.cpp b/osdep/BSDEthernetTap.cpp index e1e0d42e5..ac137a659 100644 --- a/osdep/BSDEthernetTap.cpp +++ b/osdep/BSDEthernetTap.cpp @@ -51,6 +51,13 @@ #include #include +#include "../node/Constants.hpp" +#include "../node/Utils.hpp" +#include "../node/Mutex.hpp" +#include "OSUtils.hpp" +#include "BSDEthernetTap.hpp" +#include "../node/Metrics.hpp" + #define ZT_BASE32_CHARS "0123456789abcdefghijklmnopqrstuv" #define ZT_TAP_BUF_SIZE (1024 * 16) @@ -353,6 +360,11 @@ std::vector BSDEthernetTap::ips() const void BSDEthernetTap::put(const MAC& from, const MAC& to, unsigned int etherType, const void* data, unsigned int len) { + // VL2 frame size histogram + Metrics::vl2_frame_size_hist.Observe(len); + if (len > this->_mtu) { + Metrics::vl2_would_fragment_or_drop_rx++; + } char putBuf[ZT_MAX_MTU + 64]; if ((_fd > 0) && (len <= _mtu) && (_enabled)) { to.copyTo(putBuf, 6); diff --git a/osdep/LinuxEthernetTap.cpp b/osdep/LinuxEthernetTap.cpp index 5fe37216d..cb08dfb52 100644 --- a/osdep/LinuxEthernetTap.cpp +++ b/osdep/LinuxEthernetTap.cpp @@ -16,6 +16,7 @@ #endif #include "../node/Constants.hpp" +#include "../node/Metrics.hpp" #ifdef __LINUX__ @@ -507,6 +508,11 @@ std::vector LinuxEthernetTap::ips() const void LinuxEthernetTap::put(const MAC& from, const MAC& to, unsigned int etherType, const void* data, unsigned int len) { + // VL2 frame size histogram + ZeroTier::Metrics::vl2_frame_size_hist.Observe(len); + if (len > this->_mtu) { + ZeroTier::Metrics::vl2_would_fragment_or_drop_rx++; + } char putBuf[ZT_MAX_MTU + 64]; if ((_fd > 0) && (len <= _mtu) && (_enabled)) { to.copyTo(putBuf, 6); diff --git a/osdep/MacEthernetTap.cpp b/osdep/MacEthernetTap.cpp index 09278855b..bb8397890 100644 --- a/osdep/MacEthernetTap.cpp +++ b/osdep/MacEthernetTap.cpp @@ -22,6 +22,7 @@ #include "MacEthernetTap.hpp" #include "MacEthernetTapAgent.h" #include "OSUtils.hpp" +#include "../node/Metrics.hpp" #include #include @@ -393,6 +394,11 @@ std::vector MacEthernetTap::ips() const void MacEthernetTap::put(const MAC& from, const MAC& to, unsigned int etherType, const void* data, unsigned int len) { + // VL2 frame size histogram + Metrics::vl2_frame_size_hist.Observe(len); + if (len > this->_mtu) { + Metrics::vl2_would_fragment_or_drop_rx++; + } struct iovec iov[3]; unsigned char hdr[15]; uint16_t l; diff --git a/osdep/NetBSDEthernetTap.cpp b/osdep/NetBSDEthernetTap.cpp index 0508246b0..03cb1d183 100644 --- a/osdep/NetBSDEthernetTap.cpp +++ b/osdep/NetBSDEthernetTap.cpp @@ -51,6 +51,15 @@ #include #include #include + +#include "../node/Constants.hpp" +#include "../node/Utils.hpp" +#include "../node/Mutex.hpp" +#include "OSUtils.hpp" +#include "NetBSDEthernetTap.hpp" +#include "../node/Metrics.hpp" + +#include using namespace std; #define ZT_BASE32_CHARS "0123456789abcdefghijklmnopqrstuv" @@ -328,6 +337,12 @@ std::vector NetBSDEthernetTap::ips() const void NetBSDEthernetTap::put(const MAC& from, const MAC& to, unsigned int etherType, const void* data, unsigned int len) { + // VL2 frame size histogram + Metrics::vl2_frame_size_hist.Observe(len); + + if (len > this->_mtu) { + Metrics::vl2_would_fragment_or_drop_rx++; + } char putBuf[4096]; if ((_fd > 0) && (len <= _mtu) && (_enabled)) { to.copyTo(putBuf, 6); diff --git a/osdep/WindowsEthernetTap.cpp b/osdep/WindowsEthernetTap.cpp index 3043868ae..5ebcf4904 100644 --- a/osdep/WindowsEthernetTap.cpp +++ b/osdep/WindowsEthernetTap.cpp @@ -16,9 +16,10 @@ #include "../node/Constants.hpp" #include "../node/Mutex.hpp" #include "../node/Utils.hpp" -#include "..\windows\TapDriver6\tap-windows.h" +#include "../windows/TapDriver6/tap-windows.h" #include "OSUtils.hpp" #include "WinDNSHelper.hpp" +#include "../node/Metrics.hpp" #include #include @@ -816,7 +817,14 @@ std::vector WindowsEthernetTap::ips() const void WindowsEthernetTap::put(const MAC& from, const MAC& to, unsigned int etherType, const void* data, unsigned int len) { - if ((! _initialized) || (! _enabled) || (_tap == INVALID_HANDLE_VALUE) || (len > _mtu)) + // Check MTU and add to histogram + ZeroTier::Metrics::vl2_frame_size_hist.Observe(len); + if (len > this->_mtu) { + ZeroTier::Metrics::vl2_would_fragment_or_drop_rx++; + return; + } + + if ((! _initialized) || (! _enabled) || (_tap == INVALID_HANDLE_VALUE)) return; Mutex::Lock _l(_injectPending_m);