aboutsummaryrefslogtreecommitdiffstats
path: root/includes/libs
diff options
context:
space:
mode:
authorTim Starling <tstarling@wikimedia.org>2022-07-19 09:54:21 +1000
committerKrinkle <krinkle@fastmail.com>2022-07-20 05:36:06 +0000
commit0c92bc8a96fb23a682d8640ba8b5b5076c28c19c (patch)
tree82ea0ea6222d998025ee9e6829723a1534e40bfb /includes/libs
parent042b53807b08340eb1afed9fb495b9dfdf3ca012 (diff)
downloadmediawikicore-0c92bc8a96fb23a682d8640ba8b5b5076c28c19c.tar.gz
mediawikicore-0c92bc8a96fb23a682d8640ba8b5b5076c28c19c.zip
rdbms: Instrument LoadBalancer with statsd metrics
* Export the LoadMonitor weight metric to statsd on each global cache miss. * Export the replication lag at the same time. This can replace the getLagTimes.php cron job and should eliminate the sawtooth effect seen in the cron job exports, which is presumably due to the offset between cron job start time and heartbeat time. It should also work around the bug which causes s3 to be missing. Also: * Fix a log message in TransactionProfiler. actualSeconds -> actual since this log message is used for counts as well as time. Bug: T313004 Change-Id: I61e45870d750019ed2c92f45b2f8b9c33a7e7d65
Diffstat (limited to 'includes/libs')
-rw-r--r--includes/libs/rdbms/TransactionProfiler.php4
-rw-r--r--includes/libs/rdbms/lbfactory/LBFactory.php7
-rw-r--r--includes/libs/rdbms/loadbalancer/LoadBalancer.php6
-rw-r--r--includes/libs/rdbms/loadmonitor/ILoadMonitor.php3
-rw-r--r--includes/libs/rdbms/loadmonitor/LoadMonitor.php45
-rw-r--r--includes/libs/rdbms/loadmonitor/LoadMonitorNull.php4
6 files changed, 51 insertions, 18 deletions
diff --git a/includes/libs/rdbms/TransactionProfiler.php b/includes/libs/rdbms/TransactionProfiler.php
index 38e44cd6e008..d42189fdd571 100644
--- a/includes/libs/rdbms/TransactionProfiler.php
+++ b/includes/libs/rdbms/TransactionProfiler.php
@@ -452,7 +452,7 @@ class TransactionProfiler implements LoggerAwareInterface {
$max = $this->expect[$expectation][self::FLD_LIMIT];
$by = $this->expect[$expectation][self::FLD_FNAME];
- $message = "Expectation ($expectation <=) $max by $by not met (actual: {actual})";
+ $message = "Expectation ($expectation <= $max) by $by not met (actual: {actual})";
if ( $trxId ) {
$message .= ' in trx #{trxId}';
}
@@ -463,7 +463,7 @@ class TransactionProfiler implements LoggerAwareInterface {
'measure' => $expectation,
'maxSeconds' => $max,
'by' => $by,
- 'actualSeconds' => $actual,
+ 'actual' => $actual,
'query' => $this->getGeneralizedSql( $query ),
'exception' => new RuntimeException(),
'trxId' => $trxId,
diff --git a/includes/libs/rdbms/lbfactory/LBFactory.php b/includes/libs/rdbms/lbfactory/LBFactory.php
index 36797563c7ef..caa791c9d567 100644
--- a/includes/libs/rdbms/lbfactory/LBFactory.php
+++ b/includes/libs/rdbms/lbfactory/LBFactory.php
@@ -27,7 +27,9 @@ use BagOStuff;
use EmptyBagOStuff;
use Exception;
use Generator;
+use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface;
use LogicException;
+use NullStatsdDataFactory;
use Psr\Log\LoggerInterface;
use Psr\Log\NullLogger;
use RuntimeException;
@@ -52,6 +54,8 @@ abstract class LBFactory implements ILBFactory {
private $profiler;
/** @var TransactionProfiler */
private $trxProfiler;
+ /** @var StatsdDataFactoryInterface */
+ private $statsd;
/** @var LoggerInterface */
private $replLogger;
/** @var LoggerInterface */
@@ -71,7 +75,6 @@ abstract class LBFactory implements ILBFactory {
protected $srvCache;
/** @var WANObjectCache */
protected $wanCache;
-
/** @var DatabaseDomain Local domain */
protected $localDomain;
@@ -151,6 +154,7 @@ abstract class LBFactory implements ILBFactory {
$this->profiler = $conf['profiler'] ?? null;
$this->trxProfiler = $conf['trxProfiler'] ?? new TransactionProfiler();
+ $this->statsd = $conf['statsdDataFactory'] ?? new NullStatsdDataFactory();
$this->csProvider = $conf['criticalSectionProvider'] ?? null;
@@ -670,6 +674,7 @@ abstract class LBFactory implements ILBFactory {
'perfLogger' => $this->perfLogger,
'errorLogger' => $this->errorLogger,
'deprecationLogger' => $this->deprecationLogger,
+ 'statsdDataFactory' => $this->statsd,
'cliMode' => $this->cliMode,
'agent' => $this->agent,
'maxLag' => $this->maxLag,
diff --git a/includes/libs/rdbms/loadbalancer/LoadBalancer.php b/includes/libs/rdbms/loadbalancer/LoadBalancer.php
index 450a641ea38f..a3ffe2626948 100644
--- a/includes/libs/rdbms/loadbalancer/LoadBalancer.php
+++ b/includes/libs/rdbms/loadbalancer/LoadBalancer.php
@@ -25,7 +25,9 @@ use ArrayUtils;
use BagOStuff;
use EmptyBagOStuff;
use InvalidArgumentException;
+use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface;
use LogicException;
+use NullStatsdDataFactory;
use Psr\Log\LoggerInterface;
use Psr\Log\NullLogger;
use RuntimeException;
@@ -58,6 +60,8 @@ class LoadBalancer implements ILoadBalancerForOwner {
private $profiler;
/** @var TransactionProfiler */
private $trxProfiler;
+ /** @var StatsdDataFactoryInterface */
+ private $statsd;
/** @var LoggerInterface */
private $connLogger;
/** @var LoggerInterface */
@@ -232,6 +236,7 @@ class LoadBalancer implements ILoadBalancerForOwner {
$this->clusterName = $params['clusterName'] ?? null;
$this->profiler = $params['profiler'] ?? null;
$this->trxProfiler = $params['trxProfiler'] ?? new TransactionProfiler();
+ $this->statsd = $params['statsdDataFactory'] ?? new NullStatsdDataFactory();
$this->csProvider = $params['criticalSectionProvider'] ?? null;
@@ -419,6 +424,7 @@ class LoadBalancer implements ILoadBalancerForOwner {
$this->loadMonitor = new $class(
$this, $this->srvCache, $this->wanCache, $this->loadMonitorConfig );
$this->loadMonitor->setLogger( $this->replLogger );
+ $this->loadMonitor->setStatsdDataFactory( $this->statsd );
}
return $this->loadMonitor;
diff --git a/includes/libs/rdbms/loadmonitor/ILoadMonitor.php b/includes/libs/rdbms/loadmonitor/ILoadMonitor.php
index 2ee49f60b652..f7d5214c2e74 100644
--- a/includes/libs/rdbms/loadmonitor/ILoadMonitor.php
+++ b/includes/libs/rdbms/loadmonitor/ILoadMonitor.php
@@ -25,6 +25,7 @@ namespace Wikimedia\Rdbms;
use BagOStuff;
use Psr\Log\LoggerAwareInterface;
+use StatsdAwareInterface;
use WANObjectCache;
/**
@@ -32,7 +33,7 @@ use WANObjectCache;
*
* @ingroup Database
*/
-interface ILoadMonitor extends LoggerAwareInterface {
+interface ILoadMonitor extends LoggerAwareInterface, StatsdAwareInterface {
/**
* Construct a new LoadMonitor with a given LoadBalancer parent
*
diff --git a/includes/libs/rdbms/loadmonitor/LoadMonitor.php b/includes/libs/rdbms/loadmonitor/LoadMonitor.php
index 5c1a7983ac4a..f7e821d8062c 100644
--- a/includes/libs/rdbms/loadmonitor/LoadMonitor.php
+++ b/includes/libs/rdbms/loadmonitor/LoadMonitor.php
@@ -22,6 +22,8 @@
namespace Wikimedia\Rdbms;
use BagOStuff;
+use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface;
+use NullStatsdDataFactory;
use Psr\Log\LoggerInterface;
use Psr\Log\NullLogger;
use RuntimeException;
@@ -47,6 +49,8 @@ class LoadMonitor implements ILoadMonitor {
protected $wanCache;
/** @var LoggerInterface */
protected $replLogger;
+ /** @var StatsdDataFactoryInterface */
+ protected $statsd;
/** @var float Moving average ratio (e.g. 0.1 for 10% weight to new weight) */
private $movingAveRatio;
@@ -83,6 +87,7 @@ class LoadMonitor implements ILoadMonitor {
$this->srvCache = $srvCache;
$this->wanCache = $wCache;
$this->replLogger = new NullLogger();
+ $this->statsd = new NullStatsdDataFactory();
$this->movingAveRatio = $options['movingAveRatio'] ?? 0.1;
$this->lagWarnThreshold = $options['lagWarnThreshold'] ?? LoadBalancer::MAX_LAG_DEFAULT;
@@ -92,6 +97,10 @@ class LoadMonitor implements ILoadMonitor {
$this->replLogger = $logger;
}
+ public function setStatsdDataFactory( StatsdDataFactoryInterface $statsFactory ) {
+ $this->statsd = $statsFactory;
+ }
+
final public function scaleLoads( array &$weightByServer, $domain ) {
$serverIndexes = array_keys( $weightByServer );
$states = $this->getServerStates( $serverIndexes, $domain );
@@ -205,6 +214,7 @@ class LoadMonitor implements ILoadMonitor {
}
$priorScales = $priorStates ? $priorStates['weightScales'] : [];
+ $cluster = $this->lb->getClusterName();
$lagTimes = [];
$weightScales = [];
@@ -241,9 +251,12 @@ class LoadMonitor implements ILoadMonitor {
$naiveScale,
$this->movingAveRatio
);
-
// Scale from 0% to 100% of nominal weight
- $weightScales[$i] = max( $newScale, 0.0 );
+ $newScale = max( $newScale, 0.0 );
+
+ $weightScales[$i] = $newScale;
+ $statHost = str_replace( '.', '_', $host );
+ $this->statsd->gauge( "loadbalancer.weight.$cluster.$statHost", $newScale );
// Mark replication lag on this server as "false" if it is unreachable
if ( !$conn ) {
@@ -257,26 +270,30 @@ class LoadMonitor implements ILoadMonitor {
// Determine the amount of replication lag on this server
try {
- $lagTimes[$i] = $conn->getLag();
+ $lag = $conn->getLag();
} catch ( DBError $e ) {
// Mark the lag time as "false" if it cannot be queried
- $lagTimes[$i] = false;
+ $lag = false;
}
+ $lagTimes[$i] = $lag;
- if ( $lagTimes[$i] === false ) {
+ if ( $lag === false ) {
$this->replLogger->error(
__METHOD__ . ": host {db_server} is not replicating?",
[ 'db_server' => $host ]
);
- } elseif ( $lagTimes[$i] > $this->lagWarnThreshold ) {
- $this->replLogger->warning(
- "Server {db_server} has {lag} seconds of lag (>= {maxlag})",
- [
- 'db_server' => $host,
- 'lag' => $lagTimes[$i],
- 'maxlag' => $this->lagWarnThreshold
- ]
- );
+ } else {
+ $this->statsd->timing( "loadbalancer.lag.$cluster.$statHost", $lag * 1000 );
+ if ( $lag > $this->lagWarnThreshold ) {
+ $this->replLogger->warning(
+ "Server {db_server} has {lag} seconds of lag (>= {maxlag})",
+ [
+ 'db_server' => $host,
+ 'lag' => $lag,
+ 'maxlag' => $this->lagWarnThreshold
+ ]
+ );
+ }
}
if ( $close ) {
diff --git a/includes/libs/rdbms/loadmonitor/LoadMonitorNull.php b/includes/libs/rdbms/loadmonitor/LoadMonitorNull.php
index cbc77f1a903e..a2a21e1048a5 100644
--- a/includes/libs/rdbms/loadmonitor/LoadMonitorNull.php
+++ b/includes/libs/rdbms/loadmonitor/LoadMonitorNull.php
@@ -22,6 +22,7 @@
namespace Wikimedia\Rdbms;
use BagOStuff;
+use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface;
use Psr\Log\LoggerInterface;
use WANObjectCache;
@@ -34,6 +35,9 @@ class LoadMonitorNull implements ILoadMonitor {
public function setLogger( LoggerInterface $logger ) {
}
+ public function setStatsdDataFactory( StatsdDataFactoryInterface $statsFactory ) {
+ }
+
public function scaleLoads( array &$loads, $domain ) {
}