diff options
author | jenkins-bot <jenkins-bot@gerrit.wikimedia.org> | 2024-05-13 16:07:27 +0000 |
---|---|---|
committer | Gerrit Code Review <gerrit@wikimedia.org> | 2024-05-13 16:07:27 +0000 |
commit | 17e9c09d0c8423d8f41a26e78028b57093a4d674 (patch) | |
tree | 1b66625ae4910ac099afb49b4130ee80700b938b /includes/RenameUser | |
parent | 12582c344d2254fc5242b79bf17dd0e49fdcfc4f (diff) | |
parent | b432fd2f20b980527bbefc004d6cd77125d140c9 (diff) | |
download | mediawikicore-17e9c09d0c8423d8f41a26e78028b57093a4d674.tar.gz mediawikicore-17e9c09d0c8423d8f41a26e78028b57093a4d674.zip |
Merge "Move various job classes to relevant component directories"
Diffstat (limited to 'includes/RenameUser')
-rw-r--r-- | includes/RenameUser/RenameUserJob.php | 124 |
1 files changed, 124 insertions, 0 deletions
diff --git a/includes/RenameUser/RenameUserJob.php b/includes/RenameUser/RenameUserJob.php new file mode 100644 index 000000000000..1bfe78cd4e0e --- /dev/null +++ b/includes/RenameUser/RenameUserJob.php @@ -0,0 +1,124 @@ +<?php + +use MediaWiki\Config\Config; +use MediaWiki\MainConfigNames; +use MediaWiki\Title\Title; +use Wikimedia\Rdbms\ILBFactory; + +/** + * Custom job to perform updates on tables in busier environments + * + * Job parameters include: + * - table : DB table to update + * - column : The *_user_text column to update + * - oldname : The old user name + * - newname : The new user name + * - count : The expected number of rows to update in this batch + * + * Additionally, one of the following groups of parameters must be set: + * a) The timestamp based rename parameters: + * - timestampColumn : The *_timestamp column + * - minTimestamp : The minimum bound of the timestamp column range for this batch + * - maxTimestamp : The maximum bound of the timestamp column range for this batch + * - uniqueKey : A column that is unique (preferably the PRIMARY KEY) [optional] + * b) The unique key based rename parameters: + * - uniqueKey : A column that is unique (preferably the PRIMARY KEY) + * - keyId : A list of values for this column to determine rows to update for this batch + * + * To avoid some race conditions, the following parameters should be set: + * - userID : The ID of the user to update + * - uidColumn : The *_user_id column + */ +class RenameUserJob extends Job { + /** @var int */ + private $updateRowsPerQuery; + + /** @var ILBFactory */ + private $lbFactory; + + public function __construct( + Title $title, + $params, + Config $config, + ILBFactory $lbFactory + ) { + parent::__construct( 'renameUser', $title, $params ); + + $this->updateRowsPerQuery = $config->get( MainConfigNames::UpdateRowsPerQuery ); + $this->lbFactory = $lbFactory; + } + + public function run() { + $dbw = $this->lbFactory->getPrimaryDatabase(); + $table = $this->params['table']; + $column = $this->params['column']; + + $oldname = $this->params['oldname']; + $newname = $this->params['newname']; + if ( isset( $this->params['userID'] ) ) { + $userID = $this->params['userID']; + $uidColumn = $this->params['uidColumn']; + } else { + $userID = null; + $uidColumn = null; + } + if ( isset( $this->params['timestampColumn'] ) ) { + $timestampColumn = $this->params['timestampColumn']; + $minTimestamp = $this->params['minTimestamp']; + $maxTimestamp = $this->params['maxTimestamp']; + } else { + $timestampColumn = null; + $minTimestamp = null; + $maxTimestamp = null; + } + $uniqueKey = $this->params['uniqueKey'] ?? null; + $keyId = $this->params['keyId'] ?? null; + + # Conditions like "*_user_text = 'x' + $conds = [ $column => $oldname ]; + # If user ID given, add that to condition to avoid rename collisions + if ( $userID !== null ) { + $conds[$uidColumn] = $userID; + } + # Bound by timestamp if given + if ( $timestampColumn !== null ) { + $conds[] = $dbw->expr( $timestampColumn, '>=', $minTimestamp ); + $conds[] = $dbw->expr( $timestampColumn, '<=', $maxTimestamp ); + # Bound by unique key if given (B/C) + } elseif ( $uniqueKey !== null && $keyId !== null ) { + $conds[$uniqueKey] = $keyId; + } else { + throw new InvalidArgumentException( 'Expected ID batch or time range' ); + } + + # Actually update the rows for this job... + if ( $uniqueKey !== null ) { + // Select the rows to update by PRIMARY KEY + $ids = $dbw->newSelectQueryBuilder() + ->select( $uniqueKey ) + ->from( $table ) + ->where( $conds ) + ->caller( __METHOD__ )->fetchFieldValues(); + # Update these rows by PRIMARY KEY to avoid replica lag + foreach ( array_chunk( $ids, $this->updateRowsPerQuery ) as $batch ) { + $dbw->commit( __METHOD__, 'flush' ); + $this->lbFactory->waitForReplication(); + + $dbw->newUpdateQueryBuilder() + ->update( $table ) + ->set( [ $column => $newname ] ) + ->where( [ $column => $oldname, $uniqueKey => $batch ] ) + ->caller( __METHOD__ )->execute(); + } + } else { + # Update the chunk of rows directly + $dbw->newUpdateQueryBuilder() + ->update( $table ) + ->set( [ $column => $newname ] ) + ->where( $conds ) + ->caller( __METHOD__ )->execute(); + } + + return true; + } +} |