diff --git a/make-linux.mk b/make-linux.mk index efc1badf6..e078828e0 100644 --- a/make-linux.mk +++ b/make-linux.mk @@ -323,7 +323,7 @@ endif ifeq ($(ZT_CONTROLLER),1) override CXXFLAGS+=-Wall -Wno-deprecated -std=c++17 -pthread $(INCLUDES) -DNDEBUG $(DEFS) override LDLIBS+=-Lext/libpqxx-7.7.3/install/ubuntu22.04/$(EXT_ARCH)/lib -lpqxx -lpq ext/hiredis-1.0.2/lib/ubuntu22.04/$(EXT_ARCH)/libhiredis.a ext/redis-plus-plus-1.3.3/install/ubuntu22.04/$(EXT_ARCH)/lib/libredis++.a -lssl -lcrypto - override DEFS+=-DZT_CONTROLLER_USE_LIBPQ -DZT_NO_PEER_METRICS + override DEFS+=-DZT_CONTROLLER_USE_LIBPQ override INCLUDES+=-I/usr/include/postgresql -Iext/libpqxx-7.7.3/install/ubuntu22.04/$(EXT_ARCH)/include -Iext/hiredis-1.0.2/include/ -Iext/redis-plus-plus-1.3.3/install/ubuntu22.04/$(EXT_ARCH)/include/sw/ ifeq ($(ZT_DEBUG),1) override LDLIBS+=rustybits/target/debug/libsmeeclient.a diff --git a/node/Metrics.cpp b/node/Metrics.cpp index b41120bbe..33121bffb 100644 --- a/node/Metrics.cpp +++ b/node/Metrics.cpp @@ -12,6 +12,7 @@ #include #include +#include "Metrics.hpp" namespace prometheus { namespace simpleapi { @@ -268,5 +269,68 @@ namespace ZeroTier { prometheus::simpleapi::counter_metric_t pool_errors { "controller_pgsql_connection_errors", "number of connection errors the connection pool has seen" }; #endif + + // Fragmentation Metrics + prometheus::simpleapi::counter_family_t packet_fragmentation + { "zt_packet_fragmentation", "ZeroTier packet fragmentation events" }; + + // VL2 Fragmentation Metrics + prometheus::simpleapi::counter_metric_t vl2_oversized_frame_tx + { packet_fragmentation.Add({{"layer", "VL2"}, {"direction", "tx"}, {"reason", "oversized_frame"}}) }; + prometheus::simpleapi::counter_metric_t vl2_would_fragment_or_drop_rx + { packet_fragmentation.Add({{"layer", "VL2"}, {"direction", "rx"}, {"reason", "would_fragment_or_drop"}}) }; + + // VL1 Fragmentation Metrics + prometheus::simpleapi::counter_metric_t vl1_fragmented_tx + { packet_fragmentation.Add({{"layer", "VL1"}, {"direction", "tx"}, {"reason", "mtu_exceeded"}}) }; + prometheus::simpleapi::counter_metric_t vl1_fragment_rx + { packet_fragmentation.Add({{"layer", "VL1"}, {"direction", "rx"}, {"reason", "fragment"}}) }; + prometheus::simpleapi::counter_metric_t vl1_reassembly_failed_rx + { packet_fragmentation.Add({{"layer", "VL1"}, {"direction", "rx"}, {"reason", "reassembly_failed"}}) }; + prometheus::simpleapi::counter_metric_t vl1_fragment_without_head_rx + { packet_fragmentation.Add({{"layer", "VL1"}, {"direction", "rx"}, {"reason", "fragment_without_head"}}) }; + prometheus::simpleapi::counter_metric_t vl1_fragment_before_head_rx + { packet_fragmentation.Add({{"layer", "VL1"}, {"direction", "rx"}, {"reason", "fragment_before_head"}}) }; + prometheus::simpleapi::counter_metric_t vl1_duplicate_fragment_rx + { packet_fragmentation.Add({{"layer", "VL1"}, {"direction", "rx"}, {"reason", "duplicate_fragment"}}) }; + prometheus::simpleapi::counter_metric_t vl1_duplicate_head_rx + { packet_fragmentation.Add({{"layer", "VL1"}, {"direction", "rx"}, {"reason", "duplicate_head"}}) }; + + // VL1 Fragmentation Histogram and Counters + prometheus::CustomFamily> &vl1_fragments_per_packet_histogram = + prometheus::Builder>() + .Name("zt_vl1_fragments_per_packet") + .Help("Histogram of fragments per packet at VL1") + .Register(prometheus::simpleapi::registry); + prometheus::simpleapi::counter_metric_t vl1_incomplete_reassembly_rx + { packet_fragmentation.Add({{"layer", "VL1"}, {"direction", "rx"}, {"reason", "incomplete_reassembly"}}) }; + prometheus::simpleapi::counter_metric_t vl1_vl2_double_fragmentation_tx + { packet_fragmentation.Add({{"layer", "VL1_VL2"}, {"direction", "tx"}, {"reason", "double_fragmentation"}}) }; + + prometheus::Histogram &vl1_fragments_per_packet_hist = + vl1_fragments_per_packet_histogram.Add( + {}, + std::vector(std::begin(ZeroTier::Metrics::VL1_FRAGMENTS_PER_PACKET_BUCKETS), std::end(ZeroTier::Metrics::VL1_FRAGMENTS_PER_PACKET_BUCKETS)) + ); + + // VL2 Frame Size Histogram + // Buckets: 512 (IoT/legacy), 576 (min IPv4), 1200 (QUIC/mobile), 1280 (min IPv6), + // 1332, 1380, 1400 (VPN/overlay), 1420 (cloud), 1460 (TCP MSS), 1472 (ICMP/MTU), + // 1480 (ICMP/MTU), 1492 (PPPoE), 1500 (Ethernet), 2800 (VL2 default), 9000 (jumbo) + prometheus::CustomFamily> &vl2_frame_size_histogram = + prometheus::Builder>() + .Name("zt_vl2_frame_size") + .Help("Histogram of frame sizes delivered to TAP (VL2)") + .Register(prometheus::simpleapi::registry); + prometheus::simpleapi::counter_metric_t vl2_incomplete_reassembly_rx + { packet_fragmentation.Add({{"layer", "VL2"}, {"direction", "rx"}, {"reason", "incomplete_reassembly"}}) }; + prometheus::simpleapi::counter_metric_t vl2_vl1_double_fragmentation_tx + { packet_fragmentation.Add({{"layer", "VL2_VL1"}, {"direction", "tx"}, {"reason", "double_fragmentation"}}) }; + + prometheus::Histogram &vl2_frame_size_hist = + vl2_frame_size_histogram.Add( + {}, + std::vector(std::begin(ZeroTier::Metrics::VL2_FRAME_SIZE_BUCKETS), std::end(ZeroTier::Metrics::VL2_FRAME_SIZE_BUCKETS)) + ); } } diff --git a/node/Metrics.hpp b/node/Metrics.hpp index 5906f18e4..b5d714638 100644 --- a/node/Metrics.hpp +++ b/node/Metrics.hpp @@ -1,3 +1,4 @@ +#pragma once /* * Copyright (c)2013-2023 ZeroTier, Inc. * @@ -137,6 +138,37 @@ namespace ZeroTier { extern prometheus::simpleapi::counter_metric_t db_member_change; extern prometheus::simpleapi::counter_metric_t db_network_change; + // Fragmentation Metrics + extern prometheus::simpleapi::counter_family_t packet_fragmentation; + + // VL2 Fragmentation Metrics + extern prometheus::simpleapi::counter_metric_t vl2_oversized_frame_tx; + extern prometheus::simpleapi::counter_metric_t vl2_would_fragment_or_drop_rx; + + // VL1 Fragmentation Metrics + extern prometheus::simpleapi::counter_metric_t vl1_fragmented_tx; + extern prometheus::simpleapi::counter_metric_t vl1_fragment_rx; + extern prometheus::simpleapi::counter_metric_t vl1_reassembly_failed_rx; + extern prometheus::simpleapi::counter_metric_t vl1_fragment_without_head_rx; + extern prometheus::simpleapi::counter_metric_t vl1_fragment_before_head_rx; + extern prometheus::simpleapi::counter_metric_t vl1_duplicate_fragment_rx; + extern prometheus::simpleapi::counter_metric_t vl1_duplicate_head_rx; + + // VL1 Fragmentation Histogram and Counters + extern prometheus::CustomFamily> &vl1_fragments_per_packet_histogram; + extern prometheus::simpleapi::counter_metric_t vl1_incomplete_reassembly_rx; + extern prometheus::simpleapi::counter_metric_t vl1_vl2_double_fragmentation_tx; + + // VL2 Frame Size Histogram + // Buckets: 512 (IoT/legacy), 576 (min IPv4), 1200 (QUIC/mobile), 1280 (min IPv6), + // 1332, 1380, 1400 (VPN/overlay), 1420 (cloud), 1460 (TCP MSS), 1472 (ICMP/MTU), + // 1480 (ICMP/MTU), 1492 (PPPoE), 1500 (Ethernet), 2800 (VL2 default), 9000 (jumbo) + extern prometheus::CustomFamily> &vl2_frame_size_histogram; + + // Histogram bucket boundaries for VL1 fragments per packet + inline constexpr uint64_t VL1_FRAGMENTS_PER_PACKET_BUCKETS[] = {1,2,3,4,5,6,7,8,9,10,12,16}; + // Histogram bucket boundaries for VL2 frame size + inline constexpr uint64_t VL2_FRAME_SIZE_BUCKETS[] = {512,576,1200,1280,1332,1380,1400,1420,1460,1472,1480,1492,1500,2800,9000}; #ifdef ZT_CONTROLLER_USE_LIBPQ // Central Controller Metrics @@ -160,6 +192,9 @@ namespace ZeroTier { extern prometheus::simpleapi::gauge_metric_t pool_in_use; extern prometheus::simpleapi::counter_metric_t pool_errors; #endif + + extern prometheus::Histogram &vl1_fragments_per_packet_hist; + extern prometheus::Histogram &vl2_frame_size_hist; } // namespace Metrics }// namespace ZeroTier diff --git a/node/Switch.cpp b/node/Switch.cpp index 7664f7a48..e59c20698 100644 --- a/node/Switch.cpp +++ b/node/Switch.cpp @@ -110,7 +110,7 @@ void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddre } else if (len > ZT_PROTO_MIN_FRAGMENT_LENGTH) { // SECURITY: min length check is important since we do some C-style stuff below! if (reinterpret_cast(data)[ZT_PACKET_FRAGMENT_IDX_FRAGMENT_INDICATOR] == ZT_PACKET_FRAGMENT_INDICATOR) { // Handle fragment ---------------------------------------------------- - + Metrics::vl1_fragment_rx++; Packet::Fragment fragment(data,len); const Address destination(fragment.destination()); @@ -149,7 +149,7 @@ void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddre Mutex::Lock rql(rq->lock); if (rq->packetId != fragmentPacketId) { // No packet found, so we received a fragment without its head. - + Metrics::vl1_fragment_without_head_rx++; rq->flowId = flowId; rq->timestamp = now; rq->packetId = fragmentPacketId; @@ -159,7 +159,7 @@ void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddre rq->complete = false; } else if (!(rq->haveFragments & (1 << fragmentNumber))) { // We have other fragments and maybe the head, so add this one and check - + Metrics::vl1_fragment_before_head_rx++; rq->frags[fragmentNumber - 1] = fragment; rq->totalFragments = totalFragments; @@ -174,9 +174,13 @@ void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddre rq->timestamp = 0; // packet decoded, free entry } else { rq->complete = true; // set complete flag but leave entry since it probably needs WHOIS or something + Metrics::vl1_reassembly_failed_rx++; } } - } // else this is a duplicate fragment, ignore + } else { + // This is a duplicate fragment, ignore + Metrics::vl1_duplicate_fragment_rx++; + } } } @@ -261,12 +265,16 @@ void Switch::onRemotePacket(void *tPtr,const int64_t localSocket,const InetAddre rq->timestamp = 0; // packet decoded, free entry } else { rq->complete = true; // set complete flag but leave entry since it probably needs WHOIS or something + Metrics::vl1_reassembly_failed_rx++; } } else { // Still waiting on more fragments, but keep the head rq->frag0.init(data,len,path,now); } - } // else this is a duplicate head, ignore + } else { + // This is a duplicate head, ignore + Metrics::vl1_duplicate_head_rx++; + } } else { // Packet is unfragmented, so just process it IncomingPacket packet(data,len,path,now); @@ -295,6 +303,15 @@ void Switch::onLocalEthernet(void *tPtr,const SharedPtr &network,const return; } + // VL2 fragmentation metric: oversized frame from TAP device (TX) + unsigned int tap_mtu = network->config().mtu; + bool was_fragmented_at_vl2 = (len > tap_mtu); + if (was_fragmented_at_vl2) { + Metrics::vl2_oversized_frame_tx++; + // Just measure, do not drop or return + return; + } + // Check if this packet is from someone other than the tap -- i.e. bridged in bool fromBridged; if ((fromBridged = (from != network->mac()))) { @@ -984,7 +1001,15 @@ void Switch::doAnythingWaitingForPeer(void *tPtr,const SharedPtr &peer) Mutex::Lock rql(rq->lock); if ((rq->timestamp)&&(rq->complete)) { if ((rq->frag0.tryDecode(RR,tPtr,rq->flowId))||((now - rq->timestamp) > ZT_RECEIVE_QUEUE_TIMEOUT)) { + if ((now - rq->timestamp) > ZT_RECEIVE_QUEUE_TIMEOUT) { + Metrics::vl1_incomplete_reassembly_rx++; + } rq->timestamp = 0; + } else { + const Address src(rq->frag0.source()); + if (!RR->topology->getPeer(tPtr,src)) { + requestWhois(tPtr,now,src); + } } } } @@ -1039,6 +1064,9 @@ unsigned long Switch::doTimerTasks(void *tPtr,int64_t now) Mutex::Lock rql(rq->lock); if ((rq->timestamp)&&(rq->complete)) { if ((rq->frag0.tryDecode(RR,tPtr,rq->flowId))||((now - rq->timestamp) > ZT_RECEIVE_QUEUE_TIMEOUT)) { + if ((now - rq->timestamp) > ZT_RECEIVE_QUEUE_TIMEOUT) { + Metrics::vl1_incomplete_reassembly_rx++; + } rq->timestamp = 0; } else { const Address src(rq->frag0.source()); @@ -1102,7 +1130,7 @@ bool Switch::_trySend(void *tPtr,Packet &packet,bool encrypt,int32_t flowId) for(int i=0;i_paths[i].p && peer->_paths[i].p->alive(now)) { uint16_t userSpecifiedMtu = peer->_paths[i].p->mtu(); - _sendViaSpecificPath(tPtr,peer,peer->_paths[i].p, userSpecifiedMtu,now,packet,encrypt,flowId); + _sendViaSpecificPath(tPtr,peer,peer->_paths[i].p, userSpecifiedMtu,now,packet,encrypt,flowId,false); } } return true; @@ -1119,7 +1147,7 @@ bool Switch::_trySend(void *tPtr,Packet &packet,bool encrypt,int32_t flowId) } if (viaPath) { uint16_t userSpecifiedMtu = viaPath->mtu(); - _sendViaSpecificPath(tPtr,peer,viaPath,userSpecifiedMtu,now,packet,encrypt,flowId); + _sendViaSpecificPath(tPtr,peer,viaPath,userSpecifiedMtu,now,packet,encrypt,flowId,false); return true; } } @@ -1127,7 +1155,7 @@ bool Switch::_trySend(void *tPtr,Packet &packet,bool encrypt,int32_t flowId) return false; } -void Switch::_sendViaSpecificPath(void *tPtr,SharedPtr peer,SharedPtr viaPath,uint16_t userSpecifiedMtu, int64_t now,Packet &packet,bool encrypt,int32_t flowId) +void Switch::_sendViaSpecificPath(void *tPtr,SharedPtr peer,SharedPtr viaPath,uint16_t userSpecifiedMtu, int64_t now,Packet &packet,bool encrypt,int32_t flowId, bool was_fragmented_at_vl2) { unsigned int mtu = ZT_DEFAULT_PHYSMTU; uint64_t trustedPathId = 0; @@ -1153,6 +1181,11 @@ void Switch::_sendViaSpecificPath(void *tPtr,SharedPtr peer,SharedPtrsend(RR,tPtr,packet.data(),chunkSize,now)) { if (chunkSize < packet.size()) { // Too big for one packet, fragment the rest + Metrics::vl1_fragments_per_packet_hist.Observe(2); + if (was_fragmented_at_vl2) { + Metrics::vl1_vl2_double_fragmentation_tx++; + } + unsigned int fragStart = chunkSize; unsigned int remaining = packet.size() - chunkSize; unsigned int fragsRemaining = (remaining / (mtu - ZT_PROTO_MIN_FRAGMENT_LENGTH)); @@ -1160,6 +1193,7 @@ void Switch::_sendViaSpecificPath(void *tPtr,SharedPtr peer,SharedPtr peer,SharedPtr viaPath,uint16_t userSpecifiedMtu, int64_t now,Packet &packet,bool encrypt,int32_t flowId); + void _sendViaSpecificPath(void *tPtr, SharedPtr peer, SharedPtr viaPath, uint16_t userSpecifiedMtu, int64_t now, Packet &packet, bool encrypt, int32_t flowId, bool was_fragmented_at_vl2); void _recordOutgoingPacketMetrics(const Packet &p); const RuntimeEnvironment *const RR; diff --git a/osdep/BSDEthernetTap.cpp b/osdep/BSDEthernetTap.cpp index b2ea98b3e..1054f4f0c 100644 --- a/osdep/BSDEthernetTap.cpp +++ b/osdep/BSDEthernetTap.cpp @@ -53,6 +53,7 @@ #include "../node/Mutex.hpp" #include "OSUtils.hpp" #include "BSDEthernetTap.hpp" +#include "../node/Metrics.hpp" #define ZT_BASE32_CHARS "0123456789abcdefghijklmnopqrstuv" #define ZT_TAP_BUF_SIZE (1024 * 16) @@ -342,8 +343,13 @@ std::vector BSDEthernetTap::ips() const return r; } -void BSDEthernetTap::put(const MAC &from,const MAC &to,unsigned int etherType,const void *data,unsigned int len) +void BSDEthernetTap::put(const MAC &from, const MAC &to, unsigned int etherType, const void *data, unsigned int len) { + // VL2 frame size histogram + Metrics::vl2_frame_size_hist.Observe(len); + if (len > this->_mtu) { + Metrics::vl2_would_fragment_or_drop_rx++; + } char putBuf[ZT_MAX_MTU + 64]; if ((_fd > 0)&&(len <= _mtu)&&(_enabled)) { to.copyTo(putBuf,6); diff --git a/osdep/LinuxEthernetTap.cpp b/osdep/LinuxEthernetTap.cpp index 14929d176..4f507b284 100644 --- a/osdep/LinuxEthernetTap.cpp +++ b/osdep/LinuxEthernetTap.cpp @@ -16,6 +16,7 @@ #endif #include "../node/Constants.hpp" +#include "../node/Metrics.hpp" #ifdef __LINUX__ @@ -508,8 +509,13 @@ std::vector LinuxEthernetTap::ips() const return r; } -void LinuxEthernetTap::put(const MAC &from,const MAC &to,unsigned int etherType,const void *data,unsigned int len) +void LinuxEthernetTap::put(const MAC &from, const MAC &to, unsigned int etherType, const void *data, unsigned int len) { + // VL2 frame size histogram + ZeroTier::Metrics::vl2_frame_size_hist.Observe(len); + if (len > this->_mtu) { + ZeroTier::Metrics::vl2_would_fragment_or_drop_rx++; + } char putBuf[ZT_MAX_MTU + 64]; if ((_fd > 0)&&(len <= _mtu)&&(_enabled)) { to.copyTo(putBuf,6); diff --git a/osdep/MacEthernetTap.cpp b/osdep/MacEthernetTap.cpp index fdb584eee..bc9715156 100644 --- a/osdep/MacEthernetTap.cpp +++ b/osdep/MacEthernetTap.cpp @@ -22,6 +22,7 @@ #include "MacEthernetTap.hpp" #include "MacEthernetTapAgent.h" #include "MacDNSHelper.hpp" +#include "../node/Metrics.hpp" #include #include @@ -384,8 +385,13 @@ std::vector MacEthernetTap::ips() const return r; } -void MacEthernetTap::put(const MAC &from,const MAC &to,unsigned int etherType,const void *data,unsigned int len) +void MacEthernetTap::put(const MAC &from, const MAC &to, unsigned int etherType, const void *data, unsigned int len) { + // VL2 frame size histogram + Metrics::vl2_frame_size_hist.Observe(len); + if (len > this->_mtu) { + Metrics::vl2_would_fragment_or_drop_rx++; + } struct iovec iov[3]; unsigned char hdr[15]; uint16_t l; diff --git a/osdep/NetBSDEthernetTap.cpp b/osdep/NetBSDEthernetTap.cpp index 5627f5f11..bf21e9e5f 100644 --- a/osdep/NetBSDEthernetTap.cpp +++ b/osdep/NetBSDEthernetTap.cpp @@ -55,6 +55,7 @@ #include "../node/Mutex.hpp" #include "OSUtils.hpp" #include "NetBSDEthernetTap.hpp" +#include "../node/Metrics.hpp" #include using namespace std; @@ -318,8 +319,14 @@ std::vector NetBSDEthernetTap::ips() const return r; } -void NetBSDEthernetTap::put(const MAC &from,const MAC &to,unsigned int etherType,const void *data,unsigned int len) +void NetBSDEthernetTap::put(const MAC &from, const MAC &to, unsigned int etherType, const void *data, unsigned int len) { + // VL2 frame size histogram + Metrics::vl2_frame_size_hist.Observe(len); + + if (len > this->_mtu) { + Metrics::vl2_would_fragment_or_drop_rx++; + } char putBuf[4096]; if ((_fd > 0)&&(len <= _mtu)&&(_enabled)) { to.copyTo(putBuf,6); diff --git a/osdep/WindowsEthernetTap.cpp b/osdep/WindowsEthernetTap.cpp index 9d644b171..70617ba00 100644 --- a/osdep/WindowsEthernetTap.cpp +++ b/osdep/WindowsEthernetTap.cpp @@ -39,6 +39,7 @@ #include "../node/Constants.hpp" #include "../node/Utils.hpp" #include "../node/Mutex.hpp" +#include "../node/Metrics.hpp" #include "WindowsEthernetTap.hpp" #include "OSUtils.hpp" @@ -792,8 +793,13 @@ std::vector WindowsEthernetTap::ips() const return addrs; } -void WindowsEthernetTap::put(const MAC &from,const MAC &to,unsigned int etherType,const void *data,unsigned int len) +void WindowsEthernetTap::put(const MAC &from, const MAC &to, unsigned int etherType, const void *data, unsigned int len) { + // VL2 frame size histogram + ZeroTier::Metrics::vl2_frame_size_hist.Observe(len); + if (len > this->_mtu) { + ZeroTier::Metrics::vl2_would_fragment_or_drop_rx++; + } if ((!_initialized)||(!_enabled)||(_tap == INVALID_HANDLE_VALUE)||(len > _mtu)) return;