Switch back to using an actor to manage link state, and slighty randomize the delay between multicast announcements. This seems to fix the issue with duplicate connections (and breaks a livelock in the multicast code where both nodes keep closing the listen side of their connection, but that's kind of a hack, we need a better solution)

This commit is contained in:
Arceliar 2023-06-18 03:40:40 -05:00
parent 2eda59d9e4
commit c1ae9ea0d4
3 changed files with 257 additions and 241 deletions

View File

@ -6,8 +6,11 @@ import (
"fmt"
"net"
"net/url"
"sync/atomic"
"time"
"github.com/Arceliar/phony"
"github.com/Arceliar/ironwood/network"
"github.com/yggdrasil-network/yggdrasil-go/src/address"
)
@ -70,12 +73,10 @@ func (c *Core) GetPeers() []PeerInfo {
conns[p.Conn] = p
}
c.links.RLock()
defer c.links.RUnlock()
phony.Block(&c.links, func() {
for info, state := range c.links._links {
var peerinfo PeerInfo
var conn net.Conn
state.RLock()
peerinfo.URI = info.uri
peerinfo.LastError = state._err
peerinfo.LastErrorTime = state._errtime
@ -83,11 +84,10 @@ func (c *Core) GetPeers() []PeerInfo {
conn = c
peerinfo.Up = true
peerinfo.Inbound = state.linkType == linkTypeIncoming
peerinfo.RXBytes = c.rx
peerinfo.TXBytes = c.tx
peerinfo.RXBytes = atomic.LoadUint64(&c.rx)
peerinfo.TXBytes = atomic.LoadUint64(&c.tx)
peerinfo.Uptime = time.Since(c.up)
}
state.RUnlock()
if p, ok := conns[conn]; ok {
peerinfo.Key = p.Key
peerinfo.Root = p.Root
@ -96,6 +96,7 @@ func (c *Core) GetPeers() []PeerInfo {
}
peers = append(peers, peerinfo)
}
})
return peers
}

View File

@ -13,7 +13,6 @@ import (
"net/url"
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
@ -30,12 +29,13 @@ const (
)
type links struct {
phony.Inbox
core *Core
tcp *linkTCP // TCP interface support
tls *linkTLS // TLS interface support
unix *linkUNIX // UNIX interface support
socks *linkSOCKS // SOCKS interface support
sync.RWMutex // Protects the below
// _links can only be modified safely from within the links actor
_links map[linkInfo]*link // *link is nil if connection in progress
}
@ -55,7 +55,7 @@ type link struct {
kick chan struct{} // Attempt to reconnect now, if backing off
linkType linkType // Type of link, i.e. outbound/inbound, persistent/ephemeral
linkProto string // Protocol carrier of link, e.g. TCP, AWDL
sync.RWMutex // Protects the below
// The remaining fields can only be modified safely from within the links actor
_conn *linkConn // Connected link, if any, nil if not connected
_err error // Last error on the connection, if any
_errtime time.Time // Last time an error occured
@ -131,6 +131,8 @@ const ErrLinkPinnedKeyInvalid = linkError("pinned public key is invalid")
const ErrLinkUnrecognisedSchema = linkError("link schema unknown")
func (l *links) add(u *url.URL, sintf string, linkType linkType) error {
var retErr error
phony.Block(l, func() {
// Generate the link info and see whether we think we already
// have an open peering to this peer.
lu := urlForLinkInfo(*u)
@ -145,7 +147,8 @@ func (l *links) add(u *url.URL, sintf string, linkType linkType) error {
for _, pubkey := range u.Query()["key"] {
sigPub, err := hex.DecodeString(pubkey)
if err != nil {
return ErrLinkPinnedKeyInvalid
retErr = ErrLinkPinnedKeyInvalid
return
}
var sigPubKey keyArray
copy(sigPubKey[:], sigPub)
@ -157,7 +160,8 @@ func (l *links) add(u *url.URL, sintf string, linkType linkType) error {
if p := u.Query().Get("priority"); p != "" {
pi, err := strconv.ParseUint(p, 10, 8)
if err != nil {
return ErrLinkPriorityInvalid
retErr = ErrLinkPriorityInvalid
return
}
options.priority = uint8(pi)
}
@ -166,15 +170,14 @@ func (l *links) add(u *url.URL, sintf string, linkType linkType) error {
// the existing peer state. Try to kick the peer if possible,
// which will cause an immediate connection attempt if it is
// backing off for some reason.
l.Lock()
state, ok := l._links[info]
if ok && state != nil {
select {
case state.kick <- struct{}{}:
default:
}
l.Unlock()
return ErrLinkAlreadyConfigured
retErr = ErrLinkAlreadyConfigured
return
}
// Create the link entry. This will contain the connection
@ -188,7 +191,6 @@ func (l *links) add(u *url.URL, sintf string, linkType linkType) error {
// Store the state of the link so that it can be queried later.
l._links[info] = state
l.Unlock()
// Track how many consecutive connection failures we have had,
// as we will back off exponentially rather than hammering the
@ -220,13 +222,16 @@ func (l *links) add(u *url.URL, sintf string, linkType linkType) error {
// Otherwise the loop will end, cleaning up the link entry.
go func() {
defer func() {
l.Lock()
defer l.Unlock()
phony.Block(l, func() {
if l._links[info] == state {
delete(l._links, info)
}
})
}()
// This loop will run each and every time we want to attempt
// a connection to this peer.
// TODO get rid of this loop, this is *exactly* what time.AfterFunc is for, we should just send a signal to the links actor to kick off a goroutine as needed
for {
conn, err := l.connect(u, info, options)
if err != nil {
@ -234,11 +239,11 @@ func (l *links) add(u *url.URL, sintf string, linkType linkType) error {
// If the link is a persistent configured peering,
// store information about the connection error so
// that we can report it through the admin socket.
state.Lock()
phony.Block(l, func() {
state._conn = nil
state._err = err
state._errtime = time.Now()
state.Unlock()
})
// Back off for a bit. If true is returned here, we
// can continue onto the next loop iteration to try
@ -266,14 +271,17 @@ func (l *links) add(u *url.URL, sintf string, linkType linkType) error {
// Update the link state with our newly wrapped connection.
// Clear the error state.
state.Lock()
var doRet bool
phony.Block(l, func() {
if state._conn != nil {
// If a peering has come up in this time, abort this one.
state.Unlock()
return
doRet = true
}
state._conn = lc
state.Unlock()
})
if doRet {
return
}
// Give the connection to the handler. The handler will block
// for the lifetime of the connection.
@ -287,12 +295,12 @@ func (l *links) add(u *url.URL, sintf string, linkType linkType) error {
// try to close the underlying socket just in case and then
// update the link state.
_ = lc.Close()
state.Lock()
phony.Block(l, func() {
state._conn = nil
if state._err = err; state._err != nil {
state._errtime = time.Now()
}
state.Unlock()
})
// If the link is persistently configured, back off if needed
// and then try reconnecting. Otherwise, exit out.
@ -307,7 +315,8 @@ func (l *links) add(u *url.URL, sintf string, linkType linkType) error {
}
}
}()
return nil
})
return retErr
}
func (l *links) listen(u *url.URL, sintf string) (*Listener, error) {
@ -369,8 +378,11 @@ func (l *links) listen(u *url.URL, sintf string) (*Listener, error) {
// If there's an existing link state for this link, get it.
// If this node is already connected to us, just drop the
// connection. This prevents duplicate peerings.
l.Lock()
state, ok := l._links[info]
var lc *linkConn
var state *link
phony.Block(l, func() {
var ok bool
state, ok = l._links[info]
if !ok || state == nil {
state = &link{
linkType: linkTypeIncoming,
@ -378,19 +390,16 @@ func (l *links) listen(u *url.URL, sintf string) (*Listener, error) {
kick: make(chan struct{}),
}
}
state.Lock()
if state._conn != nil {
// If a connection has come up in this time, abort
// this one.
state.Unlock()
l.Unlock()
return
}
// The linkConn wrapper allows us to track the number of
// bytes written to and read from this connection without
// the help of ironwood.
lc := &linkConn{
lc = &linkConn{
Conn: conn,
up: time.Now(),
}
@ -400,11 +409,13 @@ func (l *links) listen(u *url.URL, sintf string) (*Listener, error) {
state._conn = lc
state._err = nil
state._errtime = time.Time{}
state.Unlock()
// Store the state of the link so that it can be queried later.
l._links[info] = state
l.Unlock()
})
if lc == nil {
return
}
// Give the connection to the handler. The handler will block
// for the lifetime of the connection.
@ -416,9 +427,11 @@ func (l *links) listen(u *url.URL, sintf string) (*Listener, error) {
// try to close the underlying socket just in case and then
// drop the link state.
_ = lc.Close()
l.Lock()
phony.Block(l, func() {
if l._links[info] == state {
delete(l._links, info)
l.Unlock()
}
})
}(conn)
}
}()

View File

@ -4,6 +4,7 @@ import (
"context"
"encoding/hex"
"fmt"
"math/rand"
"net"
"net/url"
"time"
@ -337,7 +338,8 @@ func (m *Multicast) _announce() {
break
}
}
m._timer = time.AfterFunc(time.Second, func() {
annInterval := time.Second + time.Microsecond*(time.Duration(rand.Intn(1048576))) // Randomize delay
m._timer = time.AfterFunc(annInterval, func() {
m.Act(nil, m._announce)
})
}