aboutsummaryrefslogtreecommitdiffstats
path: root/maintenance/migrateLinksTable.php
blob: dc151fcebec171a539506fe6f768ac88772dd87f (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
<?php

require_once __DIR__ . '/Maintenance.php';

use MediaWiki\Title\TitleValue;

/**
 * Maintenance script that populates normalization column in links tables.
 *
 * @ingroup Maintenance
 * @since 1.39
 */
class MigrateLinksTable extends LoggedUpdateMaintenance {
	public function __construct() {
		parent::__construct();
		$this->addDescription(
			'Populates normalization column in links tables.'
		);
		$this->addOption(
			'table',
			'Table name. Like pagelinks.',
			true,
			true
		);
		$this->addOption(
			'sleep',
			'Sleep time (in seconds) between every batch. Default: 0',
			false,
			true
		);
		$this->setBatchSize( 1000 );
	}

	protected function getUpdateKey() {
		return __CLASS__ . $this->getOption( 'table', '' );
	}

	protected function doDBUpdates() {
		$dbw = $this->getDB( DB_PRIMARY );
		$mapping = \MediaWiki\Linker\LinksMigration::$mapping;
		$table = $this->getOption( 'table', '' );
		if ( !isset( $mapping[$table] ) ) {
			$this->output( "Mapping for this table doesn't exist yet.\n" );
			return false;
		}
		$targetColumn = $mapping[$table]['target_id'];
		if ( !$dbw->fieldExists( $table, $mapping[$table]['title'], __METHOD__ ) ) {
			$this->output( "Old fields don't exist. There is no need to run this script\n" );
			return true;
		}
		if ( !$dbw->fieldExists( $table, $targetColumn, __METHOD__ ) ) {
			$this->output( "Run update.php to create the $targetColumn column.\n" );
			return false;
		}
		if ( !$dbw->tableExists( 'linktarget', __METHOD__ ) ) {
			$this->output( "Run update.php to create the linktarget table.\n" );
			return true;
		}

		$this->output( "Populating the $targetColumn column\n" );
		$updated = 0;

		$highestPageId = $dbw->newSelectQueryBuilder()
			->select( 'page_id' )
			->from( 'page' )
			->limit( 1 )
			->caller( __METHOD__ )
			->orderBy( 'page_id', 'DESC' )
			->fetchResultSet()->fetchRow();
		if ( !$highestPageId ) {
			$this->output( "Page table is empty.\n" );
			return true;
		}
		$highestPageId = $highestPageId[0];
		$pageId = 0;
		while ( $pageId <= $highestPageId ) {
			// Given the indexes and the structure of links tables,
			// we need to split the update into batches of pages.
			// Otherwise the queries will take a really long time in production and cause read-only.
			$updated += $this->handlePageBatch( $pageId, $mapping, $table );
			$pageId += $this->getBatchSize();
		}

		$this->output( "Completed normalization of $table, $updated rows updated.\n" );

		return true;
	}

	private function handlePageBatch( $lowPageId, $mapping, $table ) {
		$batchSize = $this->getBatchSize();
		$targetColumn = $mapping[$table]['target_id'];
		$pageIdColumn = $mapping[$table]['page_id'];
		// range is inclusive, let's subtract one.
		$highPageId = $lowPageId + $batchSize - 1;
		$dbw = $this->getPrimaryDB();
		$updated = 0;

		while ( true ) {
			$res = $dbw->newSelectQueryBuilder()
				->select( [ $mapping[$table]['ns'], $mapping[$table]['title'] ] )
				->from( $table )
				->where( [
					$targetColumn => [ null, 0 ],
					$dbw->expr( $pageIdColumn, '>=', $lowPageId ),
					$dbw->expr( $pageIdColumn, '<=', $highPageId ),
				] )
				->limit( 1 )
				->caller( __METHOD__ )
				->fetchResultSet();
			if ( !$res->numRows() ) {
				break;
			}
			$row = $res->fetchRow();
			$ns = $row[$mapping[$table]['ns']];
			$titleString = $row[$mapping[$table]['title']];
			$title = new TitleValue( (int)$ns, $titleString );
			$this->output( "Starting backfill of $ns:$titleString " .
				"title on pages between $lowPageId and $highPageId\n" );
			$id = $this->getServiceContainer()->getLinkTargetLookup()->acquireLinkTargetId( $title, $dbw );
			$dbw->newUpdateQueryBuilder()
				->update( $table )
				->set( [ $targetColumn => $id ] )
				->where( [
					$targetColumn => [ null, 0 ],
					$mapping[$table]['ns'] => $ns,
					$mapping[$table]['title'] => $titleString,
					$dbw->expr( $pageIdColumn, '>=', $lowPageId ),
					$dbw->expr( $pageIdColumn, '<=', $highPageId ),
				] )
				->caller( __METHOD__ )->execute();
			$updatedInThisBatch = $dbw->affectedRows();
			$updated += $updatedInThisBatch;
			$this->output( "Updated $updatedInThisBatch rows\n" );
			// Sleep between batches for replication to catch up
			$this->waitForReplication();
			$sleep = (int)$this->getOption( 'sleep', 0 );
			if ( $sleep > 0 ) {
				sleep( $sleep );
			}
		}
		return $updated;
	}

}

$maintClass = MigrateLinksTable::class;
require_once RUN_MAINTENANCE_IF_MAIN;