aboutsummaryrefslogtreecommitdiffstats
path: root/maintenance/initEditCount.php
diff options
context:
space:
mode:
authorBrad Jorsch <bjorsch@wikimedia.org>2017-09-12 13:12:29 -0400
committerJames D. Forrester <jforrester@wikimedia.org>2018-02-23 10:06:20 -0800
commit27c61fb1e94da9114314468fd00bcf129ec064b6 (patch)
treef5cc45ec82c54570b24c94cda7e0d0d249e010c6 /maintenance/initEditCount.php
parent0c712ce564e0bd53e28f2581a714ee8cd04b614c (diff)
downloadmediawikicore-27c61fb1e94da9114314468fd00bcf129ec064b6.tar.gz
mediawikicore-27c61fb1e94da9114314468fd00bcf129ec064b6.zip
Add `actor` table and code to start using it
Storing the user name or IP in every row in large tables like revision and logging takes up space and makes operations on these tables slower. This patch begins the process of moving those into one "actor" table which other tables can reference with a single integer field. A subsequent patch will remove the old columns. Bug: T167246 Depends-On: I9293fd6e0f958d87e52965de925046f1bb8f8a50 Change-Id: I8d825eb02c69cc66d90bd41325133fd3f99f0226
Diffstat (limited to 'maintenance/initEditCount.php')
-rw-r--r--maintenance/initEditCount.php112
1 files changed, 99 insertions, 13 deletions
diff --git a/maintenance/initEditCount.php b/maintenance/initEditCount.php
index 3c4336ff28e3..f7ef7a28a2c7 100644
--- a/maintenance/initEditCount.php
+++ b/maintenance/initEditCount.php
@@ -38,9 +38,9 @@ in the load balancer, usually indicating a replication environment.' );
}
public function execute() {
+ global $wgActorTableSchemaMigrationStage;
+
$dbw = $this->getDB( DB_MASTER );
- $user = $dbw->tableName( 'user' );
- $revision = $dbw->tableName( 'revision' );
// Autodetect mode...
if ( $this->hasOption( 'background' ) ) {
@@ -51,6 +51,17 @@ in the load balancer, usually indicating a replication environment.' );
$backgroundMode = wfGetLB()->getServerCount() > 1;
}
+ $actorQuery = ActorMigration::newMigration()->getJoin( 'rev_user' );
+
+ $needSpecialQuery = ( $wgActorTableSchemaMigrationStage !== MIGRATION_OLD &&
+ $wgActorTableSchemaMigrationStage !== MIGRATION_NEW );
+ if ( $needSpecialQuery ) {
+ foreach ( $actorQuery['joins'] as &$j ) {
+ $j[0] = 'JOIN'; // replace LEFT JOIN
+ }
+ unset( $j );
+ }
+
if ( $backgroundMode ) {
$this->output( "Using replication-friendly background mode...\n" );
@@ -62,15 +73,55 @@ in the load balancer, usually indicating a replication environment.' );
$migrated = 0;
for ( $min = 0; $min <= $lastUser; $min += $chunkSize ) {
$max = $min + $chunkSize;
- $result = $dbr->query(
- "SELECT
- user_id,
- COUNT(rev_user) AS user_editcount
- FROM $user
- LEFT OUTER JOIN $revision ON user_id=rev_user
- WHERE user_id > $min AND user_id <= $max
- GROUP BY user_id",
- __METHOD__ );
+
+ if ( $needSpecialQuery ) {
+ // Use separate subqueries to collect counts with the old
+ // and new schemas, to avoid having to do whole-table scans.
+ $result = $dbr->select(
+ [
+ 'user',
+ 'rev1' => '('
+ . $dbr->selectSQLText(
+ [ 'revision', 'revision_actor_temp' ],
+ [ 'rev_user', 'ct' => 'COUNT(*)' ],
+ [
+ "rev_user > $min AND rev_user <= $max",
+ 'revactor_rev' => null,
+ ],
+ __METHOD__,
+ [ 'GROUP BY' => 'rev_user' ],
+ [ 'revision_actor_temp' => [ 'LEFT JOIN', 'revactor_rev = rev_id' ] ]
+ ) . ')',
+ 'rev2' => '('
+ . $dbr->selectSQLText(
+ [ 'revision' ] + $actorQuery['tables'],
+ [ 'actor_user', 'ct' => 'COUNT(*)' ],
+ "actor_user > $min AND actor_user <= $max",
+ __METHOD__,
+ [ 'GROUP BY' => 'actor_user' ],
+ $actorQuery['joins']
+ ) . ')',
+ ],
+ [ 'user_id', 'user_editcount' => 'COALESCE(rev1.ct,0) + COALESCE(rev2.ct,0)' ],
+ "user_id > $min AND user_id <= $max",
+ __METHOD__,
+ [],
+ [
+ 'rev1' => [ 'LEFT JOIN', 'user_id = rev_user' ],
+ 'rev2' => [ 'LEFT JOIN', 'user_id = actor_user' ],
+ ]
+ );
+ } else {
+ $revUser = $actorQuery['fields']['rev_user'];
+ $result = $dbr->select(
+ [ 'user', 'rev' => [ 'revision' ] + $actorQuery['tables'] ],
+ [ 'user_id', 'user_editcount' => "COUNT($revUser)" ],
+ "user_id > $min AND user_id <= $max",
+ __METHOD__,
+ [ 'GROUP BY' => 'user_id' ],
+ [ 'rev' => [ 'LEFT JOIN', "user_id = $revUser" ] ] + $actorQuery['joins']
+ );
+ }
foreach ( $result as $row ) {
$dbw->update( 'user',
@@ -93,8 +144,43 @@ in the load balancer, usually indicating a replication environment.' );
}
} else {
$this->output( "Using single-query mode...\n" );
- $sql = "UPDATE $user SET user_editcount=(SELECT COUNT(*) FROM $revision WHERE rev_user=user_id)";
- $dbw->query( $sql );
+
+ $user = $dbw->tableName( 'user' );
+ if ( $needSpecialQuery ) {
+ $subquery1 = $dbw->selectSQLText(
+ [ 'revision', 'revision_actor_temp' ],
+ [ 'COUNT(*)' ],
+ [
+ 'user_id = rev_user',
+ 'revactor_rev' => null,
+ ],
+ __METHOD__,
+ [],
+ [ 'revision_actor_temp' => [ 'LEFT JOIN', 'revactor_rev = rev_id' ] ]
+ );
+ $subquery2 = $dbw->selectSQLText(
+ [ 'revision' ] + $actorQuery['tables'],
+ [ 'COUNT(*)' ],
+ 'user_id = actor_user',
+ __METHOD__,
+ [],
+ $actorQuery['joins']
+ );
+ $dbw->query(
+ "UPDATE $user SET user_editcount=($subquery1) + ($subquery2)",
+ __METHOD__
+ );
+ } else {
+ $subquery = $dbw->selectSQLText(
+ [ 'revision' ] + $actorQuery['tables'],
+ [ 'COUNT(*)' ],
+ [ 'user_id = ' . $actorQuery['fields']['rev_user'] ],
+ __METHOD__,
+ [],
+ $actorQuery['joins']
+ );
+ $dbw->query( "UPDATE $user SET user_editcount=($subquery)", __METHOD__ );
+ }
}
$this->output( "Done!\n" );