@@ -69,6 +69,7 @@ type ServerInfo struct {
6969 totalQueries uint64 // Total queries sent to this server
7070 failedQueries uint64 // Failed queries count
7171 lastUpdateTime time.Time // Last time metrics were updated
72+ lastDecayTS time.Time // Last time RTT was decayed for recovery
7273}
7374
7475type LBStrategy interface {
@@ -310,7 +311,6 @@ func (serversInfo *ServersInfo) estimatorUpdate(currentActive int) {
310311 serversInfo .inner [currentActive ].rtt .Set (currentActiveRtt )
311312 return
312313 }
313- partialSort := false
314314 if candidateRtt < currentActiveRtt {
315315 serversInfo .inner [candidate ], serversInfo .inner [currentActive ] = serversInfo .inner [currentActive ], serversInfo .inner [candidate ]
316316 dlog .Debugf (
@@ -319,27 +319,65 @@ func (serversInfo *ServersInfo) estimatorUpdate(currentActive int) {
319319 int (candidateRtt ),
320320 int (currentActiveRtt ),
321321 )
322- partialSort = true
323- } else if candidateRtt > 0 && candidateRtt >= (serversInfo .inner [0 ].rtt .Value ()+ serversInfo .inner [activeCount - 1 ].rtt .Value ())/ 2.0 * 4.0 {
324- if time .Since (serversInfo .inner [candidate ].lastActionTS ) > time .Duration (1 * time .Minute ) {
325- serversInfo .inner [candidate ].rtt .Add (candidateRtt / 2.0 )
326- dlog .Debugf (
327- "Giving a new chance to candidate [%s], lowering its RTT from %d to %d (best: %d)" ,
328- serversInfo .inner [candidate ].Name ,
329- int (candidateRtt ),
330- int (serversInfo .inner [candidate ].rtt .Value ()),
331- int (serversInfo .inner [0 ].rtt .Value ()),
332- )
333- partialSort = true
322+ serversInfo .sortByRtt ()
323+ }
324+ }
325+
326+ func (serversInfo * ServersInfo ) sortByRtt () {
327+ for i := 1 ; i < len (serversInfo .inner ); i ++ {
328+ if serversInfo .inner [i - 1 ].rtt .Value () > serversInfo .inner [i ].rtt .Value () {
329+ serversInfo .inner [i - 1 ], serversInfo .inner [i ] = serversInfo .inner [i ], serversInfo .inner [i - 1 ]
334330 }
335331 }
336- if partialSort {
337- for i := 1 ; i < serversCount ; i ++ {
338- if serversInfo .inner [i - 1 ].rtt .Value () > serversInfo .inner [i ].rtt .Value () {
339- serversInfo .inner [i - 1 ], serversInfo .inner [i ] = serversInfo .inner [i ], serversInfo .inner [i - 1 ]
340- }
332+ }
333+
334+ func (serversInfo * ServersInfo ) recoverDormantServers () {
335+ if len (serversInfo .inner ) <= 1 {
336+ return
337+ }
338+ bestRtt := serversInfo .inner [0 ].rtt .Value ()
339+ for _ , server := range serversInfo .inner {
340+ if rtt := server .rtt .Value (); rtt > 0 && rtt < bestRtt {
341+ bestRtt = rtt
341342 }
342343 }
344+ if bestRtt <= 0 {
345+ return
346+ }
347+ now := time .Now ()
348+ needsSort := false
349+ for _ , server := range serversInfo .inner {
350+ currentRtt := server .rtt .Value ()
351+ if currentRtt <= bestRtt * 4.0 {
352+ continue
353+ }
354+ if now .Sub (server .lastActionTS ) <= time .Minute {
355+ continue
356+ }
357+ if now .Sub (server .lastDecayTS ) <= 10 * time .Second {
358+ continue
359+ }
360+ targetRtt := float64 (server .initialRtt )
361+ if targetRtt <= 0 {
362+ targetRtt = bestRtt
363+ }
364+ server .rtt .Add (targetRtt )
365+ newRtt := server .rtt .Value ()
366+ server .totalQueries = 0
367+ server .failedQueries = 0
368+ server .lastDecayTS = now
369+ needsSort = true
370+ dlog .Debugf (
371+ "Giving a new chance to [%s], lowering its RTT from %d to %d (best: %d)" ,
372+ server .Name ,
373+ int (currentRtt ),
374+ int (newRtt ),
375+ int (bestRtt ),
376+ )
377+ }
378+ if needsSort {
379+ serversInfo .sortByRtt ()
380+ }
343381}
344382
345383func (serversInfo * ServersInfo ) getOne () * ServerInfo {
@@ -350,6 +388,8 @@ func (serversInfo *ServersInfo) getOne() *ServerInfo {
350388 return nil
351389 }
352390
391+ serversInfo .recoverDormantServers ()
392+
353393 var candidate int
354394
355395 // Check if using WP2 strategy
0 commit comments