diff options
author | Brad Jorsch <bjorsch@wikimedia.org> | 2017-09-12 13:12:29 -0400 |
---|---|---|
committer | James D. Forrester <jforrester@wikimedia.org> | 2018-02-23 10:06:20 -0800 |
commit | 27c61fb1e94da9114314468fd00bcf129ec064b6 (patch) | |
tree | f5cc45ec82c54570b24c94cda7e0d0d249e010c6 /maintenance/initEditCount.php | |
parent | 0c712ce564e0bd53e28f2581a714ee8cd04b614c (diff) | |
download | mediawikicore-27c61fb1e94da9114314468fd00bcf129ec064b6.tar.gz mediawikicore-27c61fb1e94da9114314468fd00bcf129ec064b6.zip |
Add `actor` table and code to start using it
Storing the user name or IP in every row in large tables like revision
and logging takes up space and makes operations on these tables slower.
This patch begins the process of moving those into one "actor" table
which other tables can reference with a single integer field.
A subsequent patch will remove the old columns.
Bug: T167246
Depends-On: I9293fd6e0f958d87e52965de925046f1bb8f8a50
Change-Id: I8d825eb02c69cc66d90bd41325133fd3f99f0226
Diffstat (limited to 'maintenance/initEditCount.php')
-rw-r--r-- | maintenance/initEditCount.php | 112 |
1 files changed, 99 insertions, 13 deletions
diff --git a/maintenance/initEditCount.php b/maintenance/initEditCount.php index 3c4336ff28e3..f7ef7a28a2c7 100644 --- a/maintenance/initEditCount.php +++ b/maintenance/initEditCount.php @@ -38,9 +38,9 @@ in the load balancer, usually indicating a replication environment.' ); } public function execute() { + global $wgActorTableSchemaMigrationStage; + $dbw = $this->getDB( DB_MASTER ); - $user = $dbw->tableName( 'user' ); - $revision = $dbw->tableName( 'revision' ); // Autodetect mode... if ( $this->hasOption( 'background' ) ) { @@ -51,6 +51,17 @@ in the load balancer, usually indicating a replication environment.' ); $backgroundMode = wfGetLB()->getServerCount() > 1; } + $actorQuery = ActorMigration::newMigration()->getJoin( 'rev_user' ); + + $needSpecialQuery = ( $wgActorTableSchemaMigrationStage !== MIGRATION_OLD && + $wgActorTableSchemaMigrationStage !== MIGRATION_NEW ); + if ( $needSpecialQuery ) { + foreach ( $actorQuery['joins'] as &$j ) { + $j[0] = 'JOIN'; // replace LEFT JOIN + } + unset( $j ); + } + if ( $backgroundMode ) { $this->output( "Using replication-friendly background mode...\n" ); @@ -62,15 +73,55 @@ in the load balancer, usually indicating a replication environment.' ); $migrated = 0; for ( $min = 0; $min <= $lastUser; $min += $chunkSize ) { $max = $min + $chunkSize; - $result = $dbr->query( - "SELECT - user_id, - COUNT(rev_user) AS user_editcount - FROM $user - LEFT OUTER JOIN $revision ON user_id=rev_user - WHERE user_id > $min AND user_id <= $max - GROUP BY user_id", - __METHOD__ ); + + if ( $needSpecialQuery ) { + // Use separate subqueries to collect counts with the old + // and new schemas, to avoid having to do whole-table scans. + $result = $dbr->select( + [ + 'user', + 'rev1' => '(' + . $dbr->selectSQLText( + [ 'revision', 'revision_actor_temp' ], + [ 'rev_user', 'ct' => 'COUNT(*)' ], + [ + "rev_user > $min AND rev_user <= $max", + 'revactor_rev' => null, + ], + __METHOD__, + [ 'GROUP BY' => 'rev_user' ], + [ 'revision_actor_temp' => [ 'LEFT JOIN', 'revactor_rev = rev_id' ] ] + ) . ')', + 'rev2' => '(' + . $dbr->selectSQLText( + [ 'revision' ] + $actorQuery['tables'], + [ 'actor_user', 'ct' => 'COUNT(*)' ], + "actor_user > $min AND actor_user <= $max", + __METHOD__, + [ 'GROUP BY' => 'actor_user' ], + $actorQuery['joins'] + ) . ')', + ], + [ 'user_id', 'user_editcount' => 'COALESCE(rev1.ct,0) + COALESCE(rev2.ct,0)' ], + "user_id > $min AND user_id <= $max", + __METHOD__, + [], + [ + 'rev1' => [ 'LEFT JOIN', 'user_id = rev_user' ], + 'rev2' => [ 'LEFT JOIN', 'user_id = actor_user' ], + ] + ); + } else { + $revUser = $actorQuery['fields']['rev_user']; + $result = $dbr->select( + [ 'user', 'rev' => [ 'revision' ] + $actorQuery['tables'] ], + [ 'user_id', 'user_editcount' => "COUNT($revUser)" ], + "user_id > $min AND user_id <= $max", + __METHOD__, + [ 'GROUP BY' => 'user_id' ], + [ 'rev' => [ 'LEFT JOIN', "user_id = $revUser" ] ] + $actorQuery['joins'] + ); + } foreach ( $result as $row ) { $dbw->update( 'user', @@ -93,8 +144,43 @@ in the load balancer, usually indicating a replication environment.' ); } } else { $this->output( "Using single-query mode...\n" ); - $sql = "UPDATE $user SET user_editcount=(SELECT COUNT(*) FROM $revision WHERE rev_user=user_id)"; - $dbw->query( $sql ); + + $user = $dbw->tableName( 'user' ); + if ( $needSpecialQuery ) { + $subquery1 = $dbw->selectSQLText( + [ 'revision', 'revision_actor_temp' ], + [ 'COUNT(*)' ], + [ + 'user_id = rev_user', + 'revactor_rev' => null, + ], + __METHOD__, + [], + [ 'revision_actor_temp' => [ 'LEFT JOIN', 'revactor_rev = rev_id' ] ] + ); + $subquery2 = $dbw->selectSQLText( + [ 'revision' ] + $actorQuery['tables'], + [ 'COUNT(*)' ], + 'user_id = actor_user', + __METHOD__, + [], + $actorQuery['joins'] + ); + $dbw->query( + "UPDATE $user SET user_editcount=($subquery1) + ($subquery2)", + __METHOD__ + ); + } else { + $subquery = $dbw->selectSQLText( + [ 'revision' ] + $actorQuery['tables'], + [ 'COUNT(*)' ], + [ 'user_id = ' . $actorQuery['fields']['rev_user'] ], + __METHOD__, + [], + $actorQuery['joins'] + ); + $dbw->query( "UPDATE $user SET user_editcount=($subquery)", __METHOD__ ); + } } $this->output( "Done!\n" ); |