diff options
author | Tim Starling <tstarling@wikimedia.org> | 2021-12-07 16:37:26 +1100 |
---|---|---|
committer | Tim Starling <tstarling@wikimedia.org> | 2022-01-04 15:35:57 +1100 |
commit | d3b2b800678e91fd1a6177d80fde790c9006d423 (patch) | |
tree | 940b8b37452a41abcb6f16ec0d6824c9e5bcbddc | |
parent | bdae0df4e057ba71e19b491108100d61a1b84faf (diff) | |
download | mediawikicore-d3b2b800678e91fd1a6177d80fde790c9006d423.tar.gz mediawikicore-d3b2b800678e91fd1a6177d80fde790c9006d423.zip |
LinksUpdate refactor
* In LinksUpdate, put each table in its own class, so that the
special code for each table has a more elegant place to go.
* Use generators to abstract and flatten out the underlying ParserOutput
data structures, without requiring the data to be copied. This allows
the incremental logic which was previously duplicated for each table,
i.e. computing insertions and deletions from the old and new state,
to be factored out. The per-table batch classes act as container
abstractions for the underlying data.
Bug: T263437
Depends-On: I472f4a023969bfde6298eb56112c16d2ae842199
Change-Id: Id40be19a8caa7b4669911927964cfd6755c25238
18 files changed, 1984 insertions, 822 deletions
diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php index 1e77d2247fca..6e64f8c09163 100644 --- a/includes/AutoLoader.php +++ b/includes/AutoLoader.php @@ -148,6 +148,7 @@ class AutoLoader { 'MediaWiki\\Config\\' => __DIR__ . '/config/', 'MediaWiki\\Content\\' => __DIR__ . '/content/', 'MediaWiki\\DB\\' => __DIR__ . '/db/', + 'MediaWiki\\Deferred\\LinksUpdate\\' => __DIR__ . '/deferred/LinksUpdate/', 'MediaWiki\\Diff\\' => __DIR__ . '/diff/', 'MediaWiki\\Edit\\' => __DIR__ . '/edit/', 'MediaWiki\\EditPage\\' => __DIR__ . '/editpage/', diff --git a/includes/collation/CollationFactory.php b/includes/collation/CollationFactory.php index 060f810d674f..6710c5f7e437 100644 --- a/includes/collation/CollationFactory.php +++ b/includes/collation/CollationFactory.php @@ -128,7 +128,11 @@ class CollationFactory { * @return Collation */ public function getCategoryCollation(): Collation { - return $this->makeCollation( $this->options->get( 'CategoryCollation' ) ); + return $this->makeCollation( $this->getDefaultCollationName() ); + } + + public function getDefaultCollationName(): string { + return $this->options->get( 'CategoryCollation' ); } /** diff --git a/includes/deferred/LinksDeletionUpdate.php b/includes/deferred/LinksDeletionUpdate.php index 6b5c5d4ba5e4..d7f52ad0734d 100644 --- a/includes/deferred/LinksDeletionUpdate.php +++ b/includes/deferred/LinksDeletionUpdate.php @@ -19,6 +19,7 @@ * * @file */ + use MediaWiki\MediaWikiServices; /** diff --git a/includes/deferred/LinksUpdate.php b/includes/deferred/LinksUpdate.php index b2a486149a46..d00abb6f7af5 100644 --- a/includes/deferred/LinksUpdate.php +++ b/includes/deferred/LinksUpdate.php @@ -20,10 +20,17 @@ * @file */ +use MediaWiki\Deferred\LinksUpdate\ExternalLinksTable; +use MediaWiki\Deferred\LinksUpdate\LinksTable; +use MediaWiki\Deferred\LinksUpdate\LinksTableGroup; +use MediaWiki\Deferred\LinksUpdate\PageLinksTable; +use MediaWiki\Deferred\LinksUpdate\PagePropsTable; +use MediaWiki\Deferred\LinksUpdate\TitleLinksTable; use MediaWiki\HookContainer\ProtectedHookAccessorTrait; use MediaWiki\Logger\LoggerFactory; use MediaWiki\MediaWikiServices; use MediaWiki\Page\PageIdentity; +use MediaWiki\Page\PageReferenceValue; use MediaWiki\Revision\RevisionRecord; use MediaWiki\User\UserIdentity; use Wikimedia\Rdbms\IDatabase; @@ -84,37 +91,6 @@ class LinksUpdate extends DataUpdate { private $mRevisionRecord; /** - * @var array[]|null Added links if calculated. - * @phan-var array<int,array{pl_from:int,pl_from_namespace:int,pl_namespace:int,pl_title:string}>|null - */ - private $linkInsertions = null; - - /** - * @var null|array Deleted links if calculated. - */ - private $linkDeletions = null; - - /** - * @var null|array[] Added external links if calculated. - */ - private $externalLinkInsertions = null; - - /** - * @var null|array Deleted external links if calculated. - */ - private $externalLinkDeletions = null; - - /** - * @var null|array Added properties if calculated. - */ - private $propertyInsertions = null; - - /** - * @var null|array Deleted properties if calculated. - */ - private $propertyDeletions = null; - - /** * @var UserIdentity|null */ private $user; @@ -122,7 +98,8 @@ class LinksUpdate extends DataUpdate { /** @var IDatabase */ private $db; - private $isStrictTestMode = false; + /** @var LinksTableGroup */ + private $tableFactory; /** * @param PageIdentity $page The page we're updating @@ -165,9 +142,27 @@ class LinksUpdate extends DataUpdate { $this->mRecursive = $recursive; + $services = MediaWikiServices::getInstance(); + $this->tableFactory = new LinksTableGroup( + $services->getObjectFactory(), + $services->getDBLoadBalancerFactory(), + $page, + $services->getMainConfig()->get( 'UpdateRowsPerQuery' ), + function ( $table, $rows ) { + $this->getHookRunner()->onLinksUpdateAfterInsert( $this, $table, $rows ); + } + ); + // TODO: this does not have to be called in LinksDeletionUpdate + $this->tableFactory->setParserOutput( $parserOutput ); + $this->getHookRunner()->onLinksUpdateConstructed( $this ); } + public function setTransactionTicket( $ticket ) { + parent::setTransactionTicket( $ticket ); + $this->tableFactory->setTransactionTicket( $ticket ); + } + /** * Update link tables with outgoing links from an updated article * @@ -244,101 +239,10 @@ class LinksUpdate extends DataUpdate { } protected function doIncrementalUpdate() { - # Page links - $existingPL = $this->getExistingLinks(); - $this->linkDeletions = $this->getLinkDeletions( $existingPL ); - $this->linkInsertions = $this->getLinkInsertions( $existingPL ); - $this->incrTableUpdate( 'pagelinks', 'pl', $this->linkDeletions, $this->linkInsertions ); - - # Image links - $existingIL = $this->getExistingImages(); - $imageDeletes = $this->getImageDeletions( $existingIL ); - $imageAdditions = $this->getImageAdditions( $existingIL ); - $this->incrTableUpdate( - 'imagelinks', - 'il', - $imageDeletes, - $this->getImageInsertions( $existingIL ) ); - - # Image change tags - $enabledTags = ChangeTags::getSoftwareTags(); - $mediaChangeTags = array_filter( [ - count( $imageAdditions ) && in_array( 'mw-add-media', $enabledTags ) ? 'mw-add-media' : '', - count( $imageDeletes ) && in_array( 'mw-remove-media', $enabledTags ) ? 'mw-remove-media' : '', - ] ); - $revisionRecord = $this->getRevisionRecord(); - if ( $revisionRecord && count( $mediaChangeTags ) ) { - ChangeTags::addTags( $mediaChangeTags, null, $revisionRecord->getId() ); + foreach ( $this->tableFactory->getAll() as $table ) { + $table->update(); } - # Invalidate all image description pages which had links added or removed - $imageUpdates = $imageDeletes + $imageAdditions; - $this->invalidateImageDescriptions( $imageUpdates ); - - # External links - $existingEL = $this->getExistingExternals(); - $this->externalLinkDeletions = $this->getExternalDeletions( $existingEL ); - $this->externalLinkInsertions = $this->getExternalInsertions( - $existingEL ); - $this->incrTableUpdate( - 'externallinks', - 'el', - $this->externalLinkDeletions, - $this->externalLinkInsertions ); - - # Language links - $existingLL = $this->getExistingInterlangs(); - $this->incrTableUpdate( - 'langlinks', - 'll', - $this->getInterlangDeletions( $existingLL ), - $this->getInterlangInsertions( $existingLL ) ); - - # Inline interwiki links - $existingIW = $this->getExistingInterwikis(); - $this->incrTableUpdate( - 'iwlinks', - 'iwl', - $this->getInterwikiDeletions( $existingIW ), - $this->getInterwikiInsertions( $existingIW ) ); - - # Template links - $existingTL = $this->getExistingTemplates(); - $this->incrTableUpdate( - 'templatelinks', - 'tl', - $this->getTemplateDeletions( $existingTL ), - $this->getTemplateInsertions( $existingTL ) ); - - # Category links - $existingCL = $this->getExistingCategories(); - $categoryDeletes = $this->getCategoryDeletions( $existingCL ); - $this->incrTableUpdate( - 'categorylinks', - 'cl', - $categoryDeletes, - $this->getCategoryInsertions( $existingCL ) ); - $categoryInserts = array_diff_assoc( $this->mCategories, $existingCL ); - $categoryUpdates = $categoryInserts + $categoryDeletes; - - # Page properties - $existingPP = $this->getExistingProperties(); - $this->propertyDeletions = $this->getPropertyDeletions( $existingPP ); - $this->incrTableUpdate( - 'page_props', - 'pp', - $this->propertyDeletions, - $this->getPropertyInsertions( $existingPP ) ); - - # Invalidate the necessary pages - $this->propertyInsertions = array_diff_assoc( $this->mProperties, $existingPP ); - $changed = $this->propertyDeletions + $this->propertyInsertions; - $this->invalidateProperties( $changed ); - - # Invalidate all categories which were added, deleted or changed (set symmetric difference) - $this->invalidateCategories( $categoryUpdates ); - $this->updateCategoryCounts( $categoryInserts, $categoryDeletes ); - # Refresh links of all pages including this page # This will be in a separate transaction if ( $this->mRecursive ) { @@ -423,655 +327,13 @@ class LinksUpdate extends DataUpdate { } /** - * @param array $cats - */ - private function invalidateCategories( $cats ) { - PurgeJobUtils::invalidatePages( - $this->getDB(), NS_CATEGORY, array_map( 'strval', array_keys( $cats ) ) - ); - } - - /** - * Update all the appropriate counts in the category table. - * @param array $added Associative array of category name => sort key - * @param array $deleted Associative array of category name => sort key - */ - private function updateCategoryCounts( array $added, array $deleted ) { - global $wgUpdateRowsPerQuery; - - if ( !$added && !$deleted ) { - return; - } - - $domainId = $this->getDB()->getDomainID(); - $services = MediaWikiServices::getInstance(); - $wp = $services->getWikiPageFactory()->newFromTitle( $this->mTitle ); - $lbf = $services->getDBLoadBalancerFactory(); - // T163801: try to release any row locks to reduce contention - $lbf->commitAndWaitForReplication( __METHOD__, $this->ticket, [ 'domain' => $domainId ] ); - - foreach ( array_chunk( array_keys( $added ), $wgUpdateRowsPerQuery ) as $addBatch ) { - $wp->updateCategoryCounts( array_map( 'strval', $addBatch ), [], $this->mId ); - $lbf->commitAndWaitForReplication( - __METHOD__, $this->ticket, [ 'domain' => $domainId ] ); - } - - foreach ( array_chunk( array_keys( $deleted ), $wgUpdateRowsPerQuery ) as $deleteBatch ) { - $wp->updateCategoryCounts( [], array_map( 'strval', $deleteBatch ), $this->mId ); - $lbf->commitAndWaitForReplication( - __METHOD__, $this->ticket, [ 'domain' => $domainId ] ); - } - } - - /** - * @param array $images - */ - private function invalidateImageDescriptions( array $images ) { - PurgeJobUtils::invalidatePages( - $this->getDB(), NS_FILE, array_map( 'strval', array_keys( $images ) ) - ); - } - - /** - * Update a table by doing a delete query then an insert query - * @param string $table Table name - * @param string $prefix Field name prefix - * @param array $deletions - * @param array $insertions Rows to insert - */ - private function incrTableUpdate( $table, $prefix, $deletions, $insertions ) { - $services = MediaWikiServices::getInstance(); - $bSize = $services->getMainConfig()->get( 'UpdateRowsPerQuery' ); - $lbf = $services->getDBLoadBalancerFactory(); - - if ( $table === 'page_props' ) { - $fromField = 'pp_page'; - } else { - $fromField = "{$prefix}_from"; - } - - $deleteWheres = []; // list of WHERE clause arrays for each DB delete() call - if ( $table === 'pagelinks' || $table === 'templatelinks' || $table === 'iwlinks' ) { - $baseKey = ( $table === 'iwlinks' ) ? 'iwl_prefix' : "{$prefix}_namespace"; - - $curBatchSize = 0; - $curDeletionBatch = []; - $deletionBatches = []; - foreach ( $deletions as $ns => $dbKeys ) { - foreach ( $dbKeys as $dbKey => $unused ) { - $curDeletionBatch[$ns][$dbKey] = 1; - if ( ++$curBatchSize >= $bSize ) { - $deletionBatches[] = $curDeletionBatch; - $curDeletionBatch = []; - $curBatchSize = 0; - } - } - } - if ( $curDeletionBatch ) { - $deletionBatches[] = $curDeletionBatch; - } - - foreach ( $deletionBatches as $deletionBatch ) { - $deleteWheres[] = [ - $fromField => $this->mId, - $this->getDB()->makeWhereFrom2d( $deletionBatch, $baseKey, "{$prefix}_title" ) - ]; - } - } else { - if ( $table === 'langlinks' ) { - $toField = 'll_lang'; - } elseif ( $table === 'page_props' ) { - $toField = 'pp_propname'; - } else { - $toField = $prefix . '_to'; - } - - $deletionBatches = array_chunk( array_keys( $deletions ), $bSize ); - foreach ( $deletionBatches as $deletionBatch ) { - $deleteWheres[] = [ - $fromField => $this->mId, - $toField => array_map( 'strval', $deletionBatch ) - ]; - } - } - - $domainId = $this->getDB()->getDomainID(); - - foreach ( $deleteWheres as $deleteWhere ) { - $this->getDB()->delete( $table, $deleteWhere, __METHOD__ ); - $lbf->commitAndWaitForReplication( - __METHOD__, $this->ticket, [ 'domain' => $domainId ] - ); - } - - $insertBatches = array_chunk( $insertions, $bSize ); - foreach ( $insertBatches as $insertBatch ) { - $this->getDB()->insert( $table, $insertBatch, __METHOD__, - $this->getConflictOption() ); - $lbf->commitAndWaitForReplication( - __METHOD__, $this->ticket, [ 'domain' => $domainId ] - ); - } - - if ( count( $insertions ) ) { - $this->getHookRunner()->onLinksUpdateAfterInsert( $this, $table, $insertions ); - } - } - - /** * Omit conflict resolution options from the insert query so that testing * can confirm that the incremental update logic was correct. * * @param bool $mode */ public function setStrictTestMode( $mode = true ) { - $this->isStrictTestMode = $mode; - } - - /** - * @return array - */ - private function getConflictOption() { - if ( $this->isStrictTestMode ) { - return []; - } else { - return [ 'IGNORE' ]; - } - } - - /** - * Get an array of pagelinks insertions for passing to the DB - * Skips the titles specified by the 2-D array $existing - * @param array $existing - * @return array[] - * @phan-return array<int,array{pl_from:int,pl_from_namespace:int,pl_namespace:int,pl_title:string}> - */ - private function getLinkInsertions( $existing = [] ) { - $arr = []; - foreach ( $this->mLinks as $ns => $dbkeys ) { - $diffs = isset( $existing[$ns] ) - ? array_diff_key( $dbkeys, $existing[$ns] ) - : $dbkeys; - foreach ( $diffs as $dbk => $id ) { - $arr[] = [ - 'pl_from' => $this->mId, - 'pl_from_namespace' => $this->mTitle->getNamespace(), - 'pl_namespace' => $ns, - 'pl_title' => $dbk - ]; - } - } - - return $arr; - } - - /** - * Get an array of template insertions. Like getLinkInsertions() - * @param array $existing - * @return array - */ - private function getTemplateInsertions( $existing = [] ) { - $arr = []; - foreach ( $this->mTemplates as $ns => $dbkeys ) { - $diffs = isset( $existing[$ns] ) ? array_diff_key( $dbkeys, $existing[$ns] ) : $dbkeys; - foreach ( $diffs as $dbk => $id ) { - $arr[] = [ - 'tl_from' => $this->mId, - 'tl_from_namespace' => $this->mTitle->getNamespace(), - 'tl_namespace' => $ns, - 'tl_title' => $dbk - ]; - } - } - - return $arr; - } - - /** - * Get an array of image insertions - * Skips the names specified in $existing - * @param array $existing - * @return array - */ - private function getImageInsertions( $existing = [] ) { - $arr = []; - $diffs = $this->getImageAdditions( $existing ); - foreach ( $diffs as $iname => $dummy ) { - $arr[] = [ - 'il_from' => $this->mId, - 'il_from_namespace' => $this->mTitle->getNamespace(), - 'il_to' => $iname - ]; - } - - return $arr; - } - - /** - * Get an array of externallinks insertions. Skips the names specified in $existing - * @param array $existing - * @return array[] - */ - private function getExternalInsertions( $existing = [] ) { - $arr = []; - $diffs = array_diff_key( $this->mExternals, $existing ); - foreach ( $diffs as $url => $dummy ) { - foreach ( LinkFilter::makeIndexes( $url ) as $index ) { - $arr[] = [ - 'el_from' => $this->mId, - 'el_to' => $url, - 'el_index' => $index, - 'el_index_60' => substr( $index, 0, 60 ), - ]; - } - } - - return $arr; - } - - /** - * Get an array of category insertions - * - * @param array $existing Mapping existing category names to sort keys. If both - * match a link in $this, the link will be omitted from the output - * - * @return array - */ - private function getCategoryInsertions( $existing = [] ) { - global $wgCategoryCollation; - $diffs = array_diff_assoc( $this->mCategories, $existing ); - $arr = []; - - $languageConverter = MediaWikiServices::getInstance()->getLanguageConverterFactory() - ->getLanguageConverter(); - - $collation = MediaWikiServices::getInstance()->getCollationFactory()->getCategoryCollation(); - foreach ( $diffs as $name => $prefix ) { - $nt = Title::makeTitleSafe( NS_CATEGORY, $name ); - $languageConverter->findVariantLink( $name, $nt, true ); - - $type = MediaWikiServices::getInstance()->getNamespaceInfo()-> - getCategoryLinkType( $this->mTitle->getNamespace() ); - - # Treat custom sortkeys as a prefix, so that if multiple - # things are forced to sort as '*' or something, they'll - # sort properly in the category rather than in page_id - # order or such. - $sortkey = $collation->getSortKey( $this->mTitle->getCategorySortkey( $prefix ) ); - - $arr[] = [ - 'cl_from' => $this->mId, - 'cl_to' => $name, - 'cl_sortkey' => $sortkey, - 'cl_timestamp' => $this->getDB()->timestamp(), - 'cl_sortkey_prefix' => $prefix, - 'cl_collation' => $wgCategoryCollation, - 'cl_type' => $type, - ]; - } - - return $arr; - } - - /** - * Get an array of interlanguage link insertions - * - * @param array $existing Mapping existing language codes to titles - * - * @return array - */ - private function getInterlangInsertions( $existing = [] ) { - $diffs = array_diff_assoc( $this->mInterlangs, $existing ); - $arr = []; - foreach ( $diffs as $lang => $title ) { - $arr[] = [ - 'll_from' => $this->mId, - 'll_lang' => $lang, - 'll_title' => $title - ]; - } - - return $arr; - } - - /** - * Get an array of page property insertions - * @param array $existing - * @return array - */ - private function getPropertyInsertions( $existing = [] ) { - $diffs = array_diff_assoc( $this->mProperties, $existing ); - - $arr = []; - foreach ( array_keys( $diffs ) as $name ) { - $arr[] = $this->getPagePropRowData( (string)$name ); - } - - return $arr; - } - - /** - * Returns an associative array to be used for inserting a row into - * the page_props table. Besides the given property name, this will - * include the page id from $this->mId and any property value from - * $this->mProperties. - * - * The array returned will include the pp_sortkey field. - * The sortkey value is currently determined by getPropertySortKeyValue(). - * - * @note this assumes that $this->mProperties[$prop] is defined. - * - * @param string $prop The name of the property. - * - * @return array - */ - private function getPagePropRowData( $prop ) { - $value = $this->mProperties[$prop]; - - return [ - 'pp_page' => $this->mId, - 'pp_propname' => $prop, - 'pp_value' => $value, - 'pp_sortkey' => $this->getPropertySortKeyValue( $value ) - ]; - } - - /** - * Determines the sort key for the given property value. - * This will return $value if it is a float or int, - * 1 or resp. 0 if it is a bool, and null otherwise. - * - * @note In the future, we may allow the sortkey to be specified explicitly - * in ParserOutput::setProperty. - * - * @param mixed $value - * - * @return float|null - */ - private function getPropertySortKeyValue( $value ) { - if ( is_int( $value ) || is_float( $value ) || is_bool( $value ) ) { - return floatval( $value ); - } - - return null; - } - - /** - * Get an array of interwiki insertions for passing to the DB - * Skips the titles specified by the 2-D array $existing - * @param array $existing - * @return array - */ - private function getInterwikiInsertions( $existing = [] ) { - $arr = []; - foreach ( $this->mInterwikis as $prefix => $dbkeys ) { - $diffs = isset( $existing[$prefix] ) - ? array_diff_key( $dbkeys, $existing[$prefix] ) - : $dbkeys; - - foreach ( $diffs as $dbk => $id ) { - $arr[] = [ - 'iwl_from' => $this->mId, - 'iwl_prefix' => $prefix, - 'iwl_title' => $dbk - ]; - } - } - - return $arr; - } - - /** - * Given an array of existing images, returns $this images that are not in there - * and thus should be added. - * @param array $existing - * @return array - */ - private function getImageAdditions( $existing ) { - return array_diff_key( $this->mImages, $existing ); - } - - /** - * Given an array of existing links, returns those links which are not in $this - * and thus should be deleted. - * @param array $existing - * @return array - */ - private function getLinkDeletions( $existing ) { - $del = []; - foreach ( $existing as $ns => $dbkeys ) { - if ( isset( $this->mLinks[$ns] ) ) { - $del[$ns] = array_diff_key( $dbkeys, $this->mLinks[$ns] ); - } else { - $del[$ns] = $dbkeys; - } - } - - return $del; - } - - /** - * Given an array of existing templates, returns those templates which are not in $this - * and thus should be deleted. - * @param array $existing - * @return array - */ - private function getTemplateDeletions( $existing ) { - $del = []; - foreach ( $existing as $ns => $dbkeys ) { - if ( isset( $this->mTemplates[$ns] ) ) { - $del[$ns] = array_diff_key( $dbkeys, $this->mTemplates[$ns] ); - } else { - $del[$ns] = $dbkeys; - } - } - - return $del; - } - - /** - * Given an array of existing images, returns those images which are not in $this - * and thus should be deleted. - * @param array $existing - * @return array - */ - private function getImageDeletions( $existing ) { - return array_diff_key( $existing, $this->mImages ); - } - - /** - * Given an array of existing external links, returns those links which are not - * in $this and thus should be deleted. - * @param array $existing - * @return array - */ - private function getExternalDeletions( $existing ) { - return array_diff_key( $existing, $this->mExternals ); - } - - /** - * Given an array of existing categories, returns those categories which are not in $this - * and thus should be deleted. - * @param array $existing - * @return array - */ - private function getCategoryDeletions( $existing ) { - return array_diff_assoc( $existing, $this->mCategories ); - } - - /** - * Given an array of existing interlanguage links, returns those links which are not - * in $this and thus should be deleted. - * @param array $existing - * @return array - */ - private function getInterlangDeletions( $existing ) { - return array_diff_assoc( $existing, $this->mInterlangs ); - } - - /** - * Get array of properties which should be deleted. - * @param array $existing - * @return array - */ - private function getPropertyDeletions( $existing ) { - return array_diff_assoc( $existing, $this->mProperties ); - } - - /** - * Given an array of existing interwiki links, returns those links which are not in $this - * and thus should be deleted. - * @param array $existing - * @return array - */ - private function getInterwikiDeletions( $existing ) { - $del = []; - foreach ( $existing as $prefix => $dbkeys ) { - if ( isset( $this->mInterwikis[$prefix] ) ) { - $del[$prefix] = array_diff_key( $dbkeys, $this->mInterwikis[$prefix] ); - } else { - $del[$prefix] = $dbkeys; - } - } - - return $del; - } - - /** - * Get an array of existing links, as a 2-D array - * - * @return array - */ - private function getExistingLinks() { - $res = $this->getDB()->select( 'pagelinks', [ 'pl_namespace', 'pl_title' ], - [ 'pl_from' => $this->mId ], __METHOD__ ); - $arr = []; - foreach ( $res as $row ) { - if ( !isset( $arr[$row->pl_namespace] ) ) { - $arr[$row->pl_namespace] = []; - } - $arr[$row->pl_namespace][$row->pl_title] = 1; - } - - return $arr; - } - - /** - * Get an array of existing templates, as a 2-D array - * - * @return array - */ - private function getExistingTemplates() { - $res = $this->getDB()->select( 'templatelinks', [ 'tl_namespace', 'tl_title' ], - [ 'tl_from' => $this->mId ], __METHOD__ ); - $arr = []; - foreach ( $res as $row ) { - if ( !isset( $arr[$row->tl_namespace] ) ) { - $arr[$row->tl_namespace] = []; - } - $arr[$row->tl_namespace][$row->tl_title] = 1; - } - - return $arr; - } - - /** - * Get an array of existing images, image names in the keys - * - * @return array - */ - private function getExistingImages() { - $res = $this->getDB()->select( 'imagelinks', [ 'il_to' ], - [ 'il_from' => $this->mId ], __METHOD__ ); - $arr = []; - foreach ( $res as $row ) { - $arr[$row->il_to] = 1; - } - - return $arr; - } - - /** - * Get an array of existing external links, URLs in the keys - * - * @return array - */ - private function getExistingExternals() { - $res = $this->getDB()->select( 'externallinks', [ 'el_to' ], - [ 'el_from' => $this->mId ], __METHOD__ ); - $arr = []; - foreach ( $res as $row ) { - $arr[$row->el_to] = 1; - } - - return $arr; - } - - /** - * Get an array of existing categories, with the name in the key and sort key in the value. - * - * @return array - */ - private function getExistingCategories() { - $res = $this->getDB()->select( 'categorylinks', [ 'cl_to', 'cl_sortkey_prefix' ], - [ 'cl_from' => $this->mId ], __METHOD__ ); - $arr = []; - foreach ( $res as $row ) { - $arr[$row->cl_to] = $row->cl_sortkey_prefix; - } - - return $arr; - } - - /** - * Get an array of existing interlanguage links, with the language code in the key and the - * title in the value. - * - * @return array - */ - private function getExistingInterlangs() { - $res = $this->getDB()->select( 'langlinks', [ 'll_lang', 'll_title' ], - [ 'll_from' => $this->mId ], __METHOD__ ); - $arr = []; - foreach ( $res as $row ) { - $arr[$row->ll_lang] = $row->ll_title; - } - - return $arr; - } - - /** - * Get an array of existing inline interwiki links, as a 2-D array - * @return array [ prefix => [ dbkey => 1 ] ] - */ - private function getExistingInterwikis() { - $res = $this->getDB()->select( 'iwlinks', [ 'iwl_prefix', 'iwl_title' ], - [ 'iwl_from' => $this->mId ], __METHOD__ ); - $arr = []; - foreach ( $res as $row ) { - if ( !isset( $arr[$row->iwl_prefix] ) ) { - $arr[$row->iwl_prefix] = []; - } - $arr[$row->iwl_prefix][$row->iwl_title] = 1; - } - - return $arr; - } - - /** - * Get an array of existing categories, with the name in the key and sort key in the value. - * - * @return array Array of property names and values - */ - private function getExistingProperties() { - $res = $this->getDB()->select( 'page_props', [ 'pp_propname', 'pp_value' ], - [ 'pp_page' => $this->mId ], __METHOD__ ); - $arr = []; - foreach ( $res as $row ) { - $arr[$row->pp_propname] = $row->pp_value; - } - - return $arr; + $this->tableFactory->setStrictTestMode( $mode ); } /** @@ -1121,6 +383,7 @@ class LinksUpdate extends DataUpdate { */ public function setRevisionRecord( RevisionRecord $revisionRecord ) { $this->mRevisionRecord = $revisionRecord; + $this->tableFactory->setRevision( $revisionRecord ); } /** @@ -1152,66 +415,49 @@ class LinksUpdate extends DataUpdate { } /** - * Invalidate any necessary link lists related to page property changes - * @param array $changed + * @return PageLinksTable */ - private function invalidateProperties( $changed ) { - global $wgPagePropLinkInvalidations; + protected function getPageLinksTable(): PageLinksTable { + // @phan-suppress-next-line PhanTypeMismatchReturnSuperType + return $this->tableFactory->get( 'pagelinks' ); + } - $jobs = []; - foreach ( $changed as $name => $value ) { - if ( isset( $wgPagePropLinkInvalidations[$name] ) ) { - $inv = $wgPagePropLinkInvalidations[$name]; - if ( !is_array( $inv ) ) { - $inv = [ $inv ]; - } - foreach ( $inv as $table ) { - $jobs[] = HTMLCacheUpdateJob::newForBacklinks( - $this->mTitle, - $table, - [ 'causeAction' => 'page-props' ] - ); - } - } - } + /** + * @return ExternalLinksTable + */ + protected function getExternalLinksTable(): ExternalLinksTable { + // @phan-suppress-next-line PhanTypeMismatchReturnSuperType + return $this->tableFactory->get( 'externallinks' ); + } - JobQueueGroup::singleton()->lazyPush( $jobs ); + /** + * @return PagePropsTable + */ + protected function getPagePropsTable(): PagePropsTable { + // @phan-suppress-next-line PhanTypeMismatchReturnSuperType + return $this->tableFactory->get( 'page_props' ); } /** * Fetch page links added by this LinksUpdate. Only available after the update is complete. + * * @since 1.22 - * @return null|array Array of Titles + * @deprecated since 1.38 use getPageReferenceIterator() or getPageReferenceArray() + * @return Title[] Array of Titles */ public function getAddedLinks() { - if ( $this->linkInsertions === null ) { - return null; - } - $result = []; - foreach ( $this->linkInsertions as $insertion ) { - $result[] = Title::makeTitle( $insertion['pl_namespace'], $insertion['pl_title'] ); - } - - return $result; + return $this->getPageLinksTable()->getTitleArray( LinksTable::INSERTED ); } /** * Fetch page links removed by this LinksUpdate. Only available after the update is complete. + * * @since 1.22 - * @return null|array Array of Titles + * @deprecated since 1.38 use getPageReferenceIterator() or getPageReferenceArray() + * @return Title[] Array of Titles */ public function getRemovedLinks() { - if ( $this->linkDeletions === null ) { - return null; - } - $result = []; - foreach ( $this->linkDeletions as $ns => $titles ) { - foreach ( $titles as $title => $unused ) { - $result[] = Title::makeTitle( $ns, $title ); - } - } - - return $result; + return $this->getPageLinksTable()->getTitleArray( LinksTable::DELETED ); } /** @@ -1221,10 +467,7 @@ class LinksUpdate extends DataUpdate { * @return null|array Array of Strings */ public function getAddedExternalLinks() { - if ( $this->externalLinkInsertions === null ) { - return null; - } - return array_column( $this->externalLinkInsertions, 'el_to' ); + return $this->getExternalLinksTable()->getStringArray( LinksTable::INSERTED ); } /** @@ -1234,10 +477,7 @@ class LinksUpdate extends DataUpdate { * @return null|string[] */ public function getRemovedExternalLinks() { - if ( $this->externalLinkDeletions === null ) { - return null; - } - return array_keys( $this->externalLinkDeletions ); + return $this->getExternalLinksTable()->getStringArray( LinksTable::DELETED ); } /** @@ -1247,7 +487,7 @@ class LinksUpdate extends DataUpdate { * @return null|array */ public function getAddedProperties() { - return $this->propertyInsertions; + return $this->getPagePropsTable()->getAssocArray( LinksTable::INSERTED ); } /** @@ -1257,7 +497,45 @@ class LinksUpdate extends DataUpdate { * @return null|array */ public function getRemovedProperties() { - return $this->propertyDeletions; + return $this->getPagePropsTable()->getAssocArray( LinksTable::DELETED ); + } + + /** + * Get an iterator over PageReferenceValue objects corresponding to a given set + * type in a given table. + * + * @since 1.38 + * @param string $tableName The name of any table that links to local titles + * @param int $setType One of: + * - LinksTable::INSERTED: The inserted links + * - LinksTable::DELETED: The deleted links + * - LinksTable::CHANGED: Both the inserted and deleted links + * - LinksTable::OLD: The old set of links, loaded before the update + * - LinksTable::NEW: The new set of links from the ParserOutput + * @return iterable<PageReferenceValue> + * @phan-return \Traversable + */ + public function getPageReferenceIterator( $tableName, $setType ) { + $table = $this->tableFactory->get( $tableName ); + if ( $table instanceof TitleLinksTable ) { + return $table->getPageReferenceIterator( $setType ); + } else { + throw new \InvalidArgumentException( + __METHOD__ . ": $tableName does not have a list of titles" ); + } + } + + /** + * Same as getPageReferenceIterator() but converted to an array for convenience + * (at the expense of additional time and memory usage) + * + * @since 1.38 + * @param string $tableName + * @param int $setType + * @return PageReferenceValue[] + */ + public function getPageReferenceArray( $tableName, $setType ) { + return iterator_to_array( $this->getPageReferenceIterator( $tableName, $setType ) ); } /** diff --git a/includes/deferred/LinksUpdate/CategoryLinksTable.php b/includes/deferred/LinksUpdate/CategoryLinksTable.php new file mode 100644 index 000000000000..2643d6259fec --- /dev/null +++ b/includes/deferred/LinksUpdate/CategoryLinksTable.php @@ -0,0 +1,247 @@ +<?php + +namespace MediaWiki\Deferred\LinksUpdate; + +use MediaWiki\Collation\CollationFactory; +use MediaWiki\DAO\WikiAwareEntity; +use MediaWiki\Languages\LanguageConverterFactory; +use MediaWiki\Page\PageReferenceValue; +use MediaWiki\Page\WikiPageFactory; +use NamespaceInfo; +use ParserOutput; +use PurgeJobUtils; +use Title; + +/** + * categorylinks + * + * Link ID format: string[] + * - 0: Category name + * - 1: User-specified sort key (cl_sortkey_prefix) + * + * @since 1.38 + */ +class CategoryLinksTable extends TitleLinksTable { + /** + * @var array Associative array of new links, with the category name in the + * key and the sort key prefix in the value + */ + private $newLinks = []; + + /** + * @var array|null Associative array of existing links, or null if it has + * not been loaded yet + */ + private $existingLinks; + + /** @var \ILanguageConverter */ + private $languageConverter; + + /** @var \Collation */ + private $collation; + + /** @var string The collation name for cl_collation */ + private $collationName; + + /** @var string The category type, which depends on the source page */ + private $categoryType; + + /** @var NamespaceInfo */ + private $namespaceInfo; + + /** @var WikiPageFactory */ + private $wikiPageFactory; + + public function __construct( + LanguageConverterFactory $converterFactory, + CollationFactory $collationFactory, + NamespaceInfo $namespaceInfo, + WikiPageFactory $wikiPageFactory + ) { + $this->languageConverter = $converterFactory->getLanguageConverter(); + $this->collation = $collationFactory->getCategoryCollation(); + $this->collationName = $collationFactory->getDefaultCollationName(); + $this->namespaceInfo = $namespaceInfo; + $this->wikiPageFactory = $wikiPageFactory; + } + + /** + * Cache the category type after the source page has been set + */ + public function startUpdate() { + $this->categoryType = $this->namespaceInfo + ->getCategoryLinkType( $this->getSourcePage()->getNamespace() ); + } + + public function setParserOutput( ParserOutput $parserOutput ) { + $this->newLinks = []; + foreach ( $parserOutput->getCategories() as $name => $sortKey ) { + // If the sortkey is longer then 255 bytes, it is truncated by DB, and then doesn't match + // when comparing existing vs current categories, causing T27254. + $sortKey = mb_strcut( $sortKey, 0, 255 ); + $this->newLinks[(string)$name] = $sortKey; + } + } + + protected function getTableName() { + return 'categorylinks'; + } + + protected function getFromField() { + return 'cl_from'; + } + + protected function getExistingFields() { + return [ 'cl_to', 'cl_sortkey_prefix' ]; + } + + /** + * Get the new link IDs. The link ID is a list with the name in the first + * element and the sort key prefix in the second element. + * + * @return iterable<array> + */ + protected function getNewLinkIDs() { + foreach ( $this->newLinks as $name => $sortkey ) { + yield [ $name, $sortkey ]; + } + } + + /** + * Get the existing links as an associative array, with the category name + * in the key and the sort key prefix in the value. + * + * @return array + */ + private function getExistingLinks() { + if ( $this->existingLinks === null ) { + $this->existingLinks = []; + foreach ( $this->fetchExistingRows() as $row ) { + $this->existingLinks[$row->cl_to] = $row->cl_sortkey_prefix; + } + } + return $this->existingLinks; + } + + /** + * @return \Generator + */ + protected function getExistingLinkIDs() { + foreach ( $this->getExistingLinks() as $name => $sortkey ) { + yield [ $name, $sortkey ]; + } + } + + protected function isExisting( $linkId ) { + $links = $this->getExistingLinks(); + [ $name, $prefix ] = $linkId; + return \array_key_exists( $name, $links ) && $links[$name] === $prefix; + } + + protected function isInNewSet( $linkId ) { + [ $name, $prefix ] = $linkId; + return \array_key_exists( $name, $this->newLinks ) + && $this->newLinks[$name] === $prefix; + } + + protected function insertLink( $linkId ) { + [ $name, $prefix ] = $linkId; + $nt = Title::makeTitleSafe( NS_CATEGORY, $name ); + $this->languageConverter->findVariantLink( $name, $nt, true ); + + // Treat custom sortkeys as a prefix, so that if multiple + // things are forced to sort as '*' or something, they'll + // sort properly in the category rather than in page_id + // order or such. + $sortkey = $this->collation->getSortKey( + Title::castFromPageIdentity( $this->getSourcePage() ) + ->getCategorySortkey( $prefix ) ); + + $this->insertRow( [ + 'cl_to' => $name, + 'cl_sortkey' => $sortkey, + 'cl_timestamp' => $this->getDB()->timestamp(), + 'cl_sortkey_prefix' => $prefix, + 'cl_collation' => $this->collationName, + 'cl_type' => $this->categoryType, + ] ); + } + + protected function deleteLink( $linkId ) { + $this->deleteRow( [ 'cl_to' => $linkId[0] ] ); + } + + protected function makePageReferenceValue( $linkId ): PageReferenceValue { + return new PageReferenceValue( NS_CATEGORY, $linkId[0], WikiAwareEntity::LOCAL ); + } + + protected function makeTitle( $linkId ): Title { + return Title::makeTitle( NS_CATEGORY, $linkId[0] ); + } + + protected function deduplicateLinkIds( $linkIds ) { + $seen = []; + foreach ( $linkIds as $linkId ) { + if ( !\array_key_exists( $linkId[0], $seen ) ) { + $seen[$linkId[0]] = true; + yield $linkId; + } + } + } + + protected function finishUpdate() { + $this->invalidateCategories(); + $this->updateCategoryCounts(); + } + + private function invalidateCategories() { + $changedCategoryNames = array_unique( array_merge( + array_column( $this->insertedLinks, 0 ), + array_column( $this->deletedLinks, 0 ) + ) ); + PurgeJobUtils::invalidatePages( + $this->getDB(), NS_CATEGORY, $changedCategoryNames ); + } + + /** + * Update all the appropriate counts in the category table. + */ + private function updateCategoryCounts() { + if ( !$this->insertedLinks && !$this->deletedLinks ) { + return; + } + + $domainId = $this->getDB()->getDomainID(); + $wp = $this->wikiPageFactory->newFromTitle( $this->getSourcePage() ); + $lbf = $this->getLBFactory(); + $size = $this->getBatchSize(); + // T163801: try to release any row locks to reduce contention + $lbf->commitAndWaitForReplication( + __METHOD__, $this->getTransactionTicket(), [ 'domain' => $domainId ] ); + + if ( count( $this->insertedLinks ) + count( $this->deletedLinks ) < $size ) { + $wp->updateCategoryCounts( + array_column( $this->insertedLinks, 0 ), + array_column( $this->deletedLinks, 0 ), + $this->getSourcePageId() + ); + $lbf->commitAndWaitForReplication( + __METHOD__, $this->getTransactionTicket(), [ 'domain' => $domainId ] ); + } else { + $addedChunks = array_chunk( array_column( $this->insertedLinks, 0 ), $size ); + foreach ( $addedChunks as $chunk ) { + $wp->updateCategoryCounts( $chunk, [], $this->getSourcePageId() ); + $lbf->commitAndWaitForReplication( + __METHOD__, $this->getTransactionTicket(), [ 'domain' => $domainId ] ); + } + + $deletedChunks = array_chunk( array_column( $this->deletedLinks, 0 ), $size ); + foreach ( $deletedChunks as $chunk ) { + $wp->updateCategoryCounts( [], $chunk, $this->getSourcePageId() ); + $lbf->commitAndWaitForReplication( + __METHOD__, $this->getTransactionTicket(), [ 'domain' => $domainId ] ); + } + + } + } +} diff --git a/includes/deferred/LinksUpdate/ExternalLinksTable.php b/includes/deferred/LinksUpdate/ExternalLinksTable.php new file mode 100644 index 000000000000..12a8db877e7b --- /dev/null +++ b/includes/deferred/LinksUpdate/ExternalLinksTable.php @@ -0,0 +1,99 @@ +<?php + +namespace MediaWiki\Deferred\LinksUpdate; + +use LinkFilter; +use ParserOutput; + +/** + * externallinks + * + * Link ID format: string URL + * + * @since 1.38 + */ +class ExternalLinksTable extends LinksTable { + private $newLinks = []; + private $existingLinks; + + public function setParserOutput( ParserOutput $parserOutput ) { + $this->newLinks = $parserOutput->getExternalLinks(); + } + + protected function getTableName() { + return 'externallinks'; + } + + protected function getFromField() { + return 'el_from'; + } + + protected function getExistingFields() { + return [ 'el_to' ]; + } + + /** + * Get the existing links as an array, where the key is the URL and the + * value is unused. + * + * @return array + */ + private function getExistingLinks() { + if ( $this->existingLinks === null ) { + $this->existingLinks = []; + foreach ( $this->fetchExistingRows() as $row ) { + $this->existingLinks[$row->el_to] = true; + } + } + return $this->existingLinks; + } + + protected function getNewLinkIDs() { + foreach ( $this->newLinks as $link => $unused ) { + yield $link; + } + } + + protected function getExistingLinkIDs() { + foreach ( $this->getExistingLinks() as $link => $unused ) { + yield $link; + } + } + + protected function isExisting( $linkId ) { + return \array_key_exists( $linkId, $this->getExistingLinks() ); + } + + protected function isInNewSet( $linkId ) { + return \array_key_exists( $linkId, $this->newLinks ); + } + + protected function insertLink( $linkId ) { + foreach ( LinkFilter::makeIndexes( $linkId ) as $index ) { + $this->insertRow( [ + 'el_to' => $linkId, + 'el_index' => $index, + 'el_index_60' => substr( $index, 0, 60 ), + ] ); + } + } + + protected function deleteLink( $linkId ) { + $this->deleteRow( [ 'el_to' => $linkId ] ); + } + + /** + * Get an array of URLs of the given type + * + * @param int $setType One of the link set constants as in LinksTable::getLinkIDs() + * @return string[] + */ + public function getStringArray( $setType ) { + $ids = $this->getLinkIDs( $setType ); + if ( is_array( $ids ) ) { + return $ids; + } else { + return iterator_to_array( $ids ); + } + } +} diff --git a/includes/deferred/LinksUpdate/GenericPageLinksTable.php b/includes/deferred/LinksUpdate/GenericPageLinksTable.php new file mode 100644 index 000000000000..acf06e630ae0 --- /dev/null +++ b/includes/deferred/LinksUpdate/GenericPageLinksTable.php @@ -0,0 +1,136 @@ +<?php + +namespace MediaWiki\Deferred\LinksUpdate; + +use MediaWiki\DAO\WikiAwareEntity; +use MediaWiki\Page\PageReferenceValue; +use Title; + +/** + * Shared code for pagelinks and templatelinks. They are very similar tables + * since they both link to an arbitrary page identified by namespace and title. + * + * Link ID format: string[]: + * - 0: namespace ID + * - 1: title DB key + * + * @since 1.38 + */ +abstract class GenericPageLinksTable extends TitleLinksTable { + /** + * A 2d array representing the new links, with the namespace ID in the + * first key, the DB key in the second key, and the value arbitrary. + * + * @var array + */ + protected $newLinks = []; + + /** + * The existing links in the same format as self::$newLinks, or null if it + * has not been loaded yet. + * + * @var array|null + */ + private $existingLinks; + + /** + * Get the namespace field name + * + * @return string + */ + abstract protected function getNamespaceField(); + + /** + * Get the title (DB key) field name + * + * @return string + */ + abstract protected function getTitleField(); + + /** + * @return mixed + */ + abstract protected function getFromNamespaceField(); + + protected function getExistingFields() { + return [ + 'ns' => $this->getNamespaceField(), + 'title' => $this->getTitleField() + ]; + } + + /** + * Get existing links as an associative array + * + * @return array + */ + private function getExistingLinks() { + if ( $this->existingLinks === null ) { + $this->existingLinks = []; + foreach ( $this->fetchExistingRows() as $row ) { + $this->existingLinks[$row->ns][$row->title] = 1; + } + } + + return $this->existingLinks; + } + + protected function getNewLinkIDs() { + foreach ( $this->newLinks as $ns => $links ) { + foreach ( $links as $dbk => $unused ) { + yield [ $ns, $dbk ]; + } + } + } + + protected function getExistingLinkIDs() { + foreach ( $this->getExistingLinks() as $ns => $links ) { + foreach ( $links as $dbk => $unused ) { + yield [ $ns, $dbk ]; + } + } + } + + protected function isExisting( $linkId ) { + [ $ns, $dbk ] = $linkId; + return isset( $this->getExistingLinks()[$ns][$dbk] ); + } + + protected function isInNewSet( $linkId ) { + [ $ns, $dbk ] = $linkId; + return isset( $this->newLinks[$ns][$dbk] ); + } + + protected function insertLink( $linkId ) { + $this->insertRow( [ + $this->getFromNamespaceField() => $this->getSourcePage()->getNamespace(), + $this->getNamespaceField() => $linkId[0], + $this->getTitleField() => $linkId[1] + ] ); + } + + protected function deleteLink( $linkId ) { + $this->deleteRow( [ + $this->getNamespaceField() => $linkId[0], + $this->getTitleField() => $linkId[1] + ] ); + } + + protected function makePageReferenceValue( $linkId ): PageReferenceValue { + return new PageReferenceValue( $linkId[0], $linkId[1], WikiAwareEntity::LOCAL ); + } + + protected function makeTitle( $linkId ): Title { + return Title::makeTitle( $linkId[0], $linkId[1] ); + } + + protected function deduplicateLinkIds( $linkIds ) { + $seen = []; + foreach ( $linkIds as $linkId ) { + if ( !isset( $seen[$linkId[0]][$linkId[1]] ) ) { + $seen[$linkId[0]][$linkId[1]] = true; + yield $linkId; + } + } + } +} diff --git a/includes/deferred/LinksUpdate/ImageLinksTable.php b/includes/deferred/LinksUpdate/ImageLinksTable.php new file mode 100644 index 000000000000..bc0adcdb6a13 --- /dev/null +++ b/includes/deferred/LinksUpdate/ImageLinksTable.php @@ -0,0 +1,138 @@ +<?php + +namespace MediaWiki\Deferred\LinksUpdate; + +use ChangeTags; +use MediaWiki\DAO\WikiAwareEntity; +use MediaWiki\Page\PageReferenceValue; +use ParserOutput; +use PurgeJobUtils; +use Title; + +/** + * imagelinks + * + * Link ID format: string image name + * + * @since 1.38 + */ +class ImageLinksTable extends TitleLinksTable { + /** + * @var array New links with the name in the key, value arbitrary + */ + private $newLinks; + + /** + * @var array Existing links with the name in the key, value arbitrary + */ + private $existingLinks; + + public function setParserOutput( ParserOutput $parserOutput ) { + $this->newLinks = $parserOutput->getImages(); + } + + protected function getTableName() { + return 'imagelinks'; + } + + protected function getFromField() { + return 'il_from'; + } + + protected function getExistingFields() { + return [ 'il_to' ]; + } + + protected function getNewLinkIDs() { + foreach ( $this->newLinks as $link => $unused ) { + yield (string)$link; + } + } + + /** + * Get existing links with the name in the key, value arbitrary. + * + * @return array + */ + private function getExistingLinks() { + if ( $this->existingLinks === null ) { + $this->existingLinks = []; + foreach ( $this->fetchExistingRows() as $row ) { + $this->existingLinks[$row->il_to] = true; + } + } + return $this->existingLinks; + } + + protected function getExistingLinkIDs() { + foreach ( $this->getExistingLinks() as $link => $unused ) { + yield $link; + } + } + + protected function isExisting( $linkId ) { + return \array_key_exists( $linkId, $this->getExistingLinks() ); + } + + protected function isInNewSet( $linkId ) { + return \array_key_exists( $linkId, $this->newLinks ); + } + + protected function insertLink( $linkId ) { + $this->insertRow( [ + 'il_from_namespace' => $this->getSourcePage()->getNamespace(), + 'il_to' => $linkId + ] ); + } + + protected function deleteLink( $linkId ) { + $this->deleteRow( [ 'il_to' => $linkId ] ); + } + + protected function makePageReferenceValue( $linkId ): PageReferenceValue { + return new PageReferenceValue( NS_FILE, $linkId, WikiAwareEntity::LOCAL ); + } + + protected function makeTitle( $linkId ): Title { + return Title::makeTitle( NS_FILE, $linkId ); + } + + protected function deduplicateLinkIds( $linkIds ) { + if ( !is_array( $linkIds ) ) { + $linkIds = iterator_to_array( $linkIds ); + } + return array_unique( $linkIds ); + } + + protected function finishUpdate() { + $this->updateChangeTags(); + $this->invalidateImageDescriptions(); + } + + /** + * Add the mw-add-media or mw-remove-media change tags to the edit if appropriate + */ + private function updateChangeTags() { + $enabledTags = ChangeTags::getSoftwareTags(); + $mediaChangeTags = []; + if ( count( $this->insertedLinks ) && in_array( 'mw-add-media', $enabledTags ) ) { + $mediaChangeTags[] = 'mw-add-media'; + } + if ( count( $this->deletedLinks ) && in_array( 'mw-remove-media', $enabledTags ) ) { + $mediaChangeTags[] = 'mw-remove-media'; + } + $revisionRecord = $this->getRevision(); + if ( $revisionRecord && count( $mediaChangeTags ) ) { + ChangeTags::addTags( $mediaChangeTags, null, $revisionRecord->getId() ); + } + } + + /** + * Invalidate all image description pages which had links added or removed + */ + private function invalidateImageDescriptions() { + PurgeJobUtils::invalidatePages( + $this->getDB(), NS_FILE, + array_merge( $this->insertedLinks, $this->deletedLinks ) ); + } +} diff --git a/includes/deferred/LinksUpdate/InterwikiLinksTable.php b/includes/deferred/LinksUpdate/InterwikiLinksTable.php new file mode 100644 index 000000000000..a5b4f06099b5 --- /dev/null +++ b/includes/deferred/LinksUpdate/InterwikiLinksTable.php @@ -0,0 +1,97 @@ +<?php + +namespace MediaWiki\Deferred\LinksUpdate; + +use ParserOutput; + +/** + * iwlinks + * + * Link ID format: string[] + * - 0: Interwiki prefix + * - 1: Foreign title + * + * @since 1.38 + */ +class InterwikiLinksTable extends LinksTable { + /** @var array */ + private $newLinks = []; + + /** @var array|null */ + private $existingLinks; + + public function setParserOutput( ParserOutput $parserOutput ) { + $this->newLinks = $parserOutput->getInterwikiLinks(); + } + + protected function getTableName() { + return 'iwlinks'; + } + + protected function getFromField() { + return 'iwl_from'; + } + + protected function getExistingFields() { + return [ 'iwl_prefix', 'iwl_title' ]; + } + + protected function getNewLinkIDs() { + foreach ( $this->newLinks as $prefix => $links ) { + foreach ( $links as $title => $unused ) { + yield [ $prefix, $title ]; + } + } + } + + /** + * Get the existing links as a 2-d array, with the prefix in the first key, + * the title in the second key, and the value arbitrary. + * + * @return array|null + */ + private function getExistingLinks() { + if ( $this->existingLinks === null ) { + $this->existingLinks = []; + foreach ( $this->fetchExistingRows() as $row ) { + $this->existingLinks[$row->iwl_prefix][$row->iwl_title] = true; + } + } + return $this->existingLinks; + } + + protected function getExistingLinkIDs() { + foreach ( $this->getExistingLinks() as $prefix => $links ) { + foreach ( $links as $title => $unused ) { + yield [ $prefix, $title ]; + } + } + } + + protected function isExisting( $linkId ) { + $links = $this->getExistingLinks(); + [ $prefix, $title ] = $linkId; + return isset( $links[$prefix][$title] ); + } + + protected function isInNewSet( $linkId ) { + [ $prefix, $title ] = $linkId; + return isset( $this->newLinks[$prefix][$title] ); + } + + protected function insertLink( $linkId ) { + [ $prefix, $title ] = $linkId; + $this->insertRow( [ + 'iwl_prefix' => $prefix, + 'iwl_title' => $title + ] ); + } + + protected function deleteLink( $linkId ) { + [ $prefix, $title ] = $linkId; + $this->deleteRow( [ + 'iwl_prefix' => $prefix, + 'iwl_title' => $title + ] ); + } +} diff --git a/includes/deferred/LinksUpdate/LangLinksTable.php b/includes/deferred/LinksUpdate/LangLinksTable.php new file mode 100644 index 000000000000..09722338d58f --- /dev/null +++ b/includes/deferred/LinksUpdate/LangLinksTable.php @@ -0,0 +1,99 @@ +<?php + +namespace MediaWiki\Deferred\LinksUpdate; + +use ParserOutput; + +/** + * langlinks + * + * Link ID format: string[] + * - 0: Language code + * - 1: Foreign title + * + * @since 1.38 + */ +class LangLinksTable extends LinksTable { + private $newLinks = []; + private $existingLinks; + + public function setParserOutput( ParserOutput $parserOutput ) { + // Convert the format of the interlanguage links + // I didn't want to change it in the ParserOutput, because that array is passed all + // the way back to the skin, so either a skin API break would be required, or an + // inefficient back-conversion. + $ill = $parserOutput->getLanguageLinks(); + $this->newLinks = []; + foreach ( $ill as $link ) { + [ $key, $title ] = explode( ':', $link, 2 ); + $this->newLinks[$key] = $title; + } + } + + protected function getTableName() { + return 'langlinks'; + } + + protected function getFromField() { + return 'll_from'; + } + + protected function getExistingFields() { + return [ 'll_lang', 'll_title' ]; + } + + protected function getNewLinkIDs() { + foreach ( $this->newLinks as $key => $title ) { + yield [ $key, $title ]; + } + } + + /** + * Get the existing links as an array where the key is the language code + * and the value is the title of the target in that language. + * + * @return array + */ + private function getExistingLinks() { + if ( $this->existingLinks === null ) { + $this->existingLinks = []; + foreach ( $this->fetchExistingRows() as $row ) { + $this->existingLinks[$row->ll_lang] = $row->ll_title; + } + } + return $this->existingLinks; + } + + protected function getExistingLinkIDs() { + foreach ( $this->getExistingLinks() as $lang => $title ) { + yield [ $lang, $title ]; + } + } + + protected function isExisting( $linkId ) { + $links = $this->getExistingLinks(); + [ $lang, $title ] = $linkId; + return \array_key_exists( $lang, $links ) + && $links[$lang] === $title; + } + + protected function isInNewSet( $linkId ) { + [ $lang, $title ] = $linkId; + return \array_key_exists( $lang, $this->newLinks ) + && $this->newLinks[$lang] === $title; + } + + protected function insertLink( $linkId ) { + [ $lang, $title ] = $linkId; + $this->insertRow( [ + 'll_lang' => $lang, + 'll_title' => $title + ] ); + } + + protected function deleteLink( $linkId ) { + $this->deleteRow( [ + 'll_lang' => $linkId[0] + ] ); + } +} diff --git a/includes/deferred/LinksUpdate/LinksTable.php b/includes/deferred/LinksUpdate/LinksTable.php new file mode 100644 index 000000000000..c32893220450 --- /dev/null +++ b/includes/deferred/LinksUpdate/LinksTable.php @@ -0,0 +1,472 @@ +<?php + +namespace MediaWiki\Deferred\LinksUpdate; + +use MediaWiki\Page\PageIdentity; +use MediaWiki\Revision\RevisionRecord; +use ParserOutput; +use Wikimedia\Rdbms\IDatabase; +use Wikimedia\Rdbms\IResultWrapper; +use Wikimedia\Rdbms\LBFactory; + +/** + * The base class for classes which update a single link table. + * + * A LinksTable object is a container for new and existing link sets outbound + * from a single page, and an abstraction of the associated DB schema. The + * object stores state related to an update of the outbound links of a page. + * + * Explanation of link ID concept + * ------------------------------ + * + * Link IDs identify a link in the new or old state, or in the change arrays. + * They are opaque to the base class and are type-hinted here as mixed. + * + * Conventionally, the link ID is string|string[] and contains the link target + * fields. + * + * The link ID should contain enough information so that the base class can + * tell whether an existing link is in the new set, or vice versa, for the + * purposes of incremental updates. If a change to a field would cause a DB + * update, the field should be in the link ID. + * + * For example, a change to cl_timestamp does not trigger an update, so + * cl_timestamp is not in the link ID. + * + * @stable to extend + * @since 1.38 + */ +abstract class LinksTable { + /** Link type: Inserted (added) links */ + public const INSERTED = 1; + + /** Link type: Deleted (removed) links */ + public const DELETED = 2; + + /** Link type: Changed (inserted or removed) links */ + public const CHANGED = 3; + + /** Link type: existing/old links */ + public const OLD = 4; + + /** Link type: new links (from the ParserOutput) */ + public const NEW = 5; + + /** + * Rows to delete. An array of associative arrays, each associative array + * being the conditions for a delete query. Common conditions should be + * leftmost in the associative array so that they can be factored out. + * + * @var array + */ + protected $rowsToDelete = []; + + /** + * Rows to insert. An array of associative arrays, each associative array + * mapping field names to values. + * + * @var array + */ + protected $rowsToInsert = []; + + /** @var array Link IDs for inserted links */ + protected $insertedLinks = []; + + /** @var array Link IDs for deleted links */ + protected $deletedLinks = []; + + /** @var LBFactory */ + private $lbFactory; + + /** @var IDatabase */ + private $db; + + /** @var PageIdentity */ + private $sourcePage; + + /** @var int */ + private $batchSize; + + /** @var mixed */ + private $ticket; + + /** @var RevisionRecord */ + private $revision; + + /** @var callable|null Callback for deprecated hook */ + private $afterUpdateHook; + + /** @var bool */ + protected $strictTestMode; + + /** + * This is called by the factory to inject dependencies for the base class. + * This is used instead of the constructor so that changes can be made to + * the injected parameters without breaking the subclass constructors. + * + * @param LBFactory $lbFactory + * @param PageIdentity $sourcePage + * @param int $batchSize + * @param callable|null $afterUpdateHook + */ + final public function injectBaseDependencies( + LBFactory $lbFactory, + PageIdentity $sourcePage, + $batchSize, + $afterUpdateHook + ) { + $this->lbFactory = $lbFactory; + $this->db = $this->lbFactory->getMainLB()->getConnection( DB_PRIMARY ); + $this->sourcePage = $sourcePage; + $this->batchSize = $batchSize; + $this->afterUpdateHook = $afterUpdateHook; + } + + /** + * Set the empty transaction ticket + * + * @param mixed $ticket + */ + public function setTransactionTicket( $ticket ) { + $this->ticket = $ticket; + } + + /** + * Set the revision associated with the edit. + * + * @param RevisionRecord $revision + */ + public function setRevision( RevisionRecord $revision ) { + $this->revision = $revision; + } + + /** + * Subclasses should implement this to extract the data they need from the + * ParserOutput. + * + * To support a future refactor of LinksDeletionUpdate, if this method is + * not called, the subclass should assume that the new state is empty. + * + * @param ParserOutput $parserOutput + */ + abstract public function setParserOutput( ParserOutput $parserOutput ); + + /** + * Get the table name. + * + * @return string + */ + abstract protected function getTableName(); + + /** + * Get the name of the field which links to page_id. + * + * @return string + */ + abstract protected function getFromField(); + + /** + * Get the fields to be used in fetchExistingRows(). Note that + * fetchExistingRows() is just a helper for subclasses. The value returned + * here is effectively private to the subclass. + * + * @return array + */ + abstract protected function getExistingFields(); + + /** + * Get an array (or iterator) of link IDs for the new state. + * + * See the LinksTable doc comment for an explanation of link IDs. + * + * @return iterable<mixed> + */ + abstract protected function getNewLinkIDs(); + + /** + * Get an array (or iterator) of link IDs for the existing state. The + * subclass should load the data from the database. There is + * fetchExistingRows() to make this easier but the subclass is responsible + * for caching. + * + * See the LinksTable doc comment for an explanation of link IDs. + * + * @return iterable<mixed> + */ + abstract protected function getExistingLinkIDs(); + + /** + * Determine whether a link (from the new set) is in the existing set. + * + * @param mixed $linkId + * @return bool + */ + abstract protected function isExisting( $linkId ); + + /** + * Determine whether a link (from the existing set) is in the new set. + * + * @param mixed $linkId + * @return bool + */ + abstract protected function isInNewSet( $linkId ); + + /** + * Insert a link identified by ID. The subclass is expected to queue the + * insertion by calling insertRow(). + * + * @param mixed $linkId + */ + abstract protected function insertLink( $linkId ); + + /** + * Delete a link identified by ID. The subclass is expected to queue the + * deletion by calling deleteRow(). + * + * @param mixed $linkId + */ + abstract protected function deleteLink( $linkId ); + + /** + * @stable to override + * @return IDatabase + */ + protected function getDB(): IDatabase { + return $this->db; + } + + /** + * @return LBFactory + */ + protected function getLBFactory(): LBFactory { + return $this->lbFactory; + } + + /** + * Get the page_id of the source page + * + * @return int + */ + protected function getSourcePageId(): int { + return $this->sourcePage->getId(); + } + + /** + * Get the source page, i.e. the page which is being updated and is the + * source of links. + * + * @return PageIdentity + */ + protected function getSourcePage(): PageIdentity { + return $this->sourcePage; + } + + /** + * Get the maximum number of rows to update in a batch. + * + * @return int + */ + protected function getBatchSize(): int { + return $this->batchSize; + } + + /** + * Get the empty transaction ticket, or null if there is none. + * + * @return mixed + */ + protected function getTransactionTicket() { + return $this->ticket; + } + + /** + * Get the RevisionRecord of the new revision, if the LinksUpdate caller + * injected one. + * + * @return RevisionRecord|null + */ + protected function getRevision(): ?RevisionRecord { + return $this->revision; + } + + /** + * Get field=>value associative array for the from field(s) + * + * @stable to override + * @return array + */ + protected function getFromConds() { + return [ $this->getFromField() => $this->getSourcePageId() ]; + } + + /** + * Do a select query to fetch the existing rows. This is a helper for + * subclasses. + * + * @return IResultWrapper + */ + protected function fetchExistingRows(): IResultWrapper { + return $this->getDB()->newSelectQueryBuilder() + ->select( $this->getExistingFields() ) + ->from( $this->getTableName() ) + ->where( $this->getFromConds() ) + ->caller( __METHOD__ ) + ->fetchResultSet(); + } + + /** + * Execute the update + */ + final public function update() { + $this->startUpdate(); + foreach ( $this->getNewLinkIDs() as $link ) { + if ( !$this->isExisting( $link ) ) { + $this->insertLink( $link ); + $this->insertedLinks[] = $link; + } + } + + foreach ( $this->getExistingLinkIDs() as $link ) { + if ( !$this->isInNewSet( $link ) ) { + $this->deleteLink( $link ); + $this->deletedLinks[] = $link; + } + } + $this->doWrites(); + $this->finishUpdate(); + } + + /** + * Queue a row for insertion. Subclasses are expected to call this from + * insertLink(). The "from" field should not be included in the row. + * + * @param array $row Associative array mapping fields to values. + */ + protected function insertRow( $row ) { + $row += $this->getFromConds(); + $this->rowsToInsert[] = $row; + } + + /** + * Queue a deletion operation. Subclasses are expected to call this from + * deleteLink(). The "from" field does not need to be included in the + * conditions. + * + * Most often, the conditions match a single row, but this is not required. + * + * @param array $conds Associative array mapping fields to values, + * specifying the conditions for a delete query. + */ + protected function deleteRow( $conds ) { + // Put the "from" field leftmost, so it can be factored out + $conds = $this->getFromConds() + $conds; + $this->rowsToDelete[] = $conds; + } + + /** + * Subclasses can override this to do any necessary setup before individual + * write operations begin. + * + * @stable to override + */ + protected function startUpdate() { + } + + /** + * Subclasses can override this to do any updates associated with their + * link data, for example dispatching HTML update jobs. + * + * @stable to override + */ + protected function finishUpdate() { + } + + /** + * Do the common DB operations + */ + protected function doWrites() { + $db = $this->getDB(); + $table = $this->getTableName(); + $domainId = $db->getDomainID(); + $batchSize = $this->getBatchSize(); + $ticket = $this->getTransactionTicket(); + + foreach ( array_chunk( $this->rowsToDelete, $batchSize ) as $chunk ) { + $factoredConds = $db->factorConds( $chunk ); + $db->delete( + $table, + $factoredConds, + __METHOD__ + ); + $this->lbFactory->commitAndWaitForReplication( + __METHOD__, $ticket, [ 'domain' => $domainId ] + ); + } + + $insertBatches = array_chunk( $this->rowsToInsert, $batchSize ); + foreach ( $insertBatches as $insertBatch ) { + $db->insert( $table, $insertBatch, __METHOD__, $this->getInsertOptions() ); + $this->lbFactory->commitAndWaitForReplication( + __METHOD__, $ticket, [ 'domain' => $domainId ] + ); + } + + if ( count( $this->rowsToInsert ) && $this->afterUpdateHook ) { + ( $this->afterUpdateHook )( $table, $this->rowsToInsert ); + } + } + + /** + * Omit conflict resolution options from the insert query so that testing + * can confirm that the incremental update logic was correct. + * + * @param bool $mode + */ + public function setStrictTestMode( $mode = true ) { + $this->strictTestMode = $mode; + } + + /** + * Get the options for the insert queries + * + * @return array + */ + protected function getInsertOptions() { + if ( $this->strictTestMode ) { + return []; + } else { + return [ 'IGNORE' ]; + } + } + + /** + * Get an array or iterator of link IDs of a given type. Some subclasses + * use this to provide typed data to callers. This is not public because + * link IDs are a private concept. + * + * @param int $setType One of the class constants: self::INSERTED, self::DELETED, + * self::CHANGED, self::OLD or self::NEW. + * @return iterable<mixed> + */ + protected function getLinkIDs( $setType ) { + switch ( $setType ) { + case self::INSERTED: + return $this->insertedLinks; + + case self::DELETED: + return $this->deletedLinks; + + case self::CHANGED: + return array_merge( $this->insertedLinks, $this->deletedLinks ); + + case self::OLD: + return $this->getExistingLinkIDs(); + + case self::NEW: + return $this->getNewLinkIDs(); + + default: + throw new \InvalidArgumentException( __METHOD__ . ": Unknown link type" ); + } + } +} diff --git a/includes/deferred/LinksUpdate/LinksTableGroup.php b/includes/deferred/LinksUpdate/LinksTableGroup.php new file mode 100644 index 000000000000..850a12e3fc33 --- /dev/null +++ b/includes/deferred/LinksUpdate/LinksTableGroup.php @@ -0,0 +1,218 @@ +<?php + +namespace MediaWiki\Deferred\LinksUpdate; + +use MediaWiki\Config\ServiceOptions; +use MediaWiki\MediaWikiServices; +use MediaWiki\Page\PageIdentity; +use MediaWiki\Revision\RevisionRecord; +use ParserOutput; +use Wikimedia\ObjectFactory\ObjectFactory; +use Wikimedia\Rdbms\LBFactory; + +/** + * @since 1.38 + */ +class LinksTableGroup { + /** + * ObjectFactory specifications for the subclasses. The following + * additional keys are defined: + * + * - serviceOptions: An array of configuration variable names. If this is + * set, the specified configuration will be sent to the subclass + * constructor as a ServiceOptions object. + */ + private const CORE_LIST = [ + 'categorylinks' => [ + 'class' => CategoryLinksTable::class, + 'services' => [ + 'LanguageConverterFactory', + 'CollationFactory', + 'NamespaceInfo', + 'WikiPageFactory' + ] + ], + 'externallinks' => [ + 'class' => ExternalLinksTable::class + ], + 'imagelinks' => [ + 'class' => ImageLinksTable::class + ], + 'iwlinks' => [ + 'class' => InterwikiLinksTable::class + ], + 'langlinks' => [ + 'class' => LangLinksTable::class + ], + 'pagelinks' => [ + 'class' => PageLinksTable::class + ], + 'page_props' => [ + 'class' => PagePropsTable::class, + 'services' => [ + 'JobQueueGroup' + ], + 'serviceOptions' => PagePropsTable::CONSTRUCTOR_OPTIONS + ], + 'templatelinks' => [ + 'class' => TemplateLinksTable::class + ] + ]; + + /** @var ObjectFactory */ + private $objectFactory; + + /** @var LBFactory */ + private $lbFactory; + + /** @var PageIdentity */ + private $page; + + /** @var ParserOutput|null */ + private $parserOutput; + + /** @var int */ + private $batchSize; + + /** @var callable|null */ + private $afterUpdateHook; + + /** @var mixed */ + private $ticket; + + /** @var RevisionRecord|null */ + private $revision; + + /** @var LinksTable[] */ + private $tables = []; + + /** + * @param ObjectFactory $objectFactory + * @param LBFactory $lbFactory + * @param PageIdentity $page + * @param int $batchSize + * @param callable|null $afterUpdateHook + */ + public function __construct( + ObjectFactory $objectFactory, + LBFactory $lbFactory, + PageIdentity $page, + $batchSize, + $afterUpdateHook + ) { + $this->objectFactory = $objectFactory; + $this->lbFactory = $lbFactory; + $this->page = $page; + $this->batchSize = $batchSize; + $this->afterUpdateHook = $afterUpdateHook; + } + + /** + * Set the ParserOutput object to be used in new and existing objects. + * + * @param ParserOutput $parserOutput + */ + public function setParserOutput( ParserOutput $parserOutput ) { + $this->parserOutput = $parserOutput; + foreach ( $this->tables as $table ) { + $table->setParserOutput( $parserOutput ); + } + } + + /** + * Set the transaction ticket to be used in new and existing objects. + * + * @param mixed $ticket + */ + public function setTransactionTicket( $ticket ) { + $this->ticket = $ticket; + foreach ( $this->tables as $table ) { + $table->setTransactionTicket( $ticket ); + } + } + + /** + * Set the revision to be used in new and existing objects. + * + * @param RevisionRecord $revision + */ + public function setRevision( RevisionRecord $revision ) { + $this->revision = $revision; + foreach ( $this->tables as $table ) { + $table->setRevision( $revision ); + } + } + + /** + * Set the strict test mode + * + * @param bool $mode + */ + public function setStrictTestMode( $mode = true ) { + foreach ( $this->getAll() as $table ) { + $table->setStrictTestMode( $mode ); + } + } + + /** + * Get the spec array for a given table. + * + * @param string $tableName + * @return array + */ + private function getSpec( $tableName ) { + if ( !isset( self::CORE_LIST[$tableName] ) ) { + throw new \InvalidArgumentException( + __CLASS__ . ": unknown table name \"$tableName\"" ); + } + return self::CORE_LIST[$tableName]; + } + + /** + * Get a LinksTable for a given table. + * + * @param string $tableName + * @return LinksTable + */ + public function get( $tableName ) { + if ( !isset( $this->tables[$tableName] ) ) { + $spec = $this->getSpec( $tableName ); + if ( isset( $spec['serviceOptions'] ) ) { + $config = MediaWikiServices::getInstance()->getMainConfig(); + $extraArgs = [ new ServiceOptions( $spec['serviceOptions'], $config ) ]; + unset( $spec['serviceOptions'] ); + } else { + $extraArgs = []; + } + /** @var LinksTable $table */ + $table = $this->objectFactory->createObject( $spec, [ 'extraArgs' => $extraArgs ] ); + $table->injectBaseDependencies( + $this->lbFactory, + $this->page, + $this->batchSize, + $this->afterUpdateHook + ); + if ( $this->parserOutput ) { + $table->setParserOutput( $this->parserOutput ); + } + if ( $this->ticket ) { + $table->setTransactionTicket( $this->ticket ); + } + if ( $this->revision ) { + $table->setRevision( $this->revision ); + } + $this->tables[$tableName] = $table; + } + return $this->tables[$tableName]; + } + + /** + * Get LinksTable objects for all known links tables. + * @return iterable<LinksTable> + */ + public function getAll() { + foreach ( self::CORE_LIST as $tableName => $spec ) { + yield $this->get( $tableName ); + } + } +} diff --git a/includes/deferred/LinksUpdate/PageLinksTable.php b/includes/deferred/LinksUpdate/PageLinksTable.php new file mode 100644 index 000000000000..15a21a55f0a1 --- /dev/null +++ b/includes/deferred/LinksUpdate/PageLinksTable.php @@ -0,0 +1,34 @@ +<?php + +namespace MediaWiki\Deferred\LinksUpdate; + +use ParserOutput; + +/** + * pagelinks + */ +class PageLinksTable extends GenericPageLinksTable { + public function setParserOutput( ParserOutput $parserOutput ) { + $this->newLinks = $parserOutput->getLinks(); + } + + protected function getTableName() { + return 'pagelinks'; + } + + protected function getFromField() { + return 'pl_from'; + } + + protected function getNamespaceField() { + return 'pl_namespace'; + } + + protected function getTitleField() { + return 'pl_title'; + } + + protected function getFromNamespaceField() { + return 'pl_from_namespace'; + } +} diff --git a/includes/deferred/LinksUpdate/PagePropsTable.php b/includes/deferred/LinksUpdate/PagePropsTable.php new file mode 100644 index 000000000000..ca7097d16c2b --- /dev/null +++ b/includes/deferred/LinksUpdate/PagePropsTable.php @@ -0,0 +1,190 @@ +<?php + +namespace MediaWiki\Deferred\LinksUpdate; + +use HTMLCacheUpdateJob; +use JobQueueGroup; +use MediaWiki\Config\ServiceOptions; +use ParserOutput; + +/** + * page_props + * + * Link ID format: string[] + * 0: Property name (pp_propname) + * 1: Property value (pp_value) + * + * @since 1.38 + */ +class PagePropsTable extends LinksTable { + /** @var JobQueueGroup */ + private $jobQueueGroup; + + /** @var array */ + private $newProps = []; + + /** @var array|null */ + private $existingProps; + + /** + * The configured PagePropLinkInvalidations. An associative array where the + * key is the property name and the value is a string or array of strings + * giving the link table names which will be used for backlink cache + * invalidation. + * + * @var array + */ + private $linkInvalidations; + + public const CONSTRUCTOR_OPTIONS = [ 'PagePropLinkInvalidations' ]; + + public function __construct( + ServiceOptions $options, + JobQueueGroup $jobQueueGroup + ) { + $this->jobQueueGroup = $jobQueueGroup; + $options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS ); + $this->linkInvalidations = $options->get( 'PagePropLinkInvalidations' ); + } + + public function setParserOutput( ParserOutput $parserOutput ) { + $this->newProps = $parserOutput->getPageProperties(); + } + + protected function getTableName() { + return 'page_props'; + } + + protected function getFromField() { + return 'pp_page'; + } + + protected function getExistingFields() { + return [ 'pp_propname', 'pp_value' ]; + } + + protected function getNewLinkIDs() { + foreach ( $this->newProps as $name => $value ) { + yield [ $name, $value ]; + } + } + + /** + * Get the existing page_props as an associative array + * + * @return array + */ + private function getExistingProps() { + if ( $this->existingProps === null ) { + $this->existingProps = []; + foreach ( $this->fetchExistingRows() as $row ) { + $this->existingProps[$row->pp_propname] = $row->pp_value; + } + } + return $this->existingProps; + } + + protected function getExistingLinkIDs() { + foreach ( $this->getExistingProps() as $name => $value ) { + yield [ $name, $value ]; + } + } + + protected function isExisting( $linkId ) { + $existing = $this->getExistingProps(); + [ $name, $value ] = $linkId; + return \array_key_exists( $name, $existing ) + && $existing[$name] === $value; + } + + protected function isInNewSet( $linkId ) { + [ $name, $value ] = $linkId; + return \array_key_exists( $name, $this->newProps ) + && $this->newProps[$name] === $value; + } + + protected function insertLink( $linkId ) { + [ $name, $value ] = $linkId; + $this->insertRow( [ + 'pp_propname' => $name, + 'pp_value' => $value, + 'pp_sortkey' => $this->getPropertySortKeyValue( $value ) + ] ); + } + + /** + * Determines the sort key for the given property value. + * This will return $value if it is a float or int, + * 1 or resp. 0 if it is a bool, and null otherwise. + * + * @note In the future, we may allow the sortkey to be specified explicitly + * in ParserOutput::setProperty. + * + * @param mixed $value + * + * @return float|null + */ + private function getPropertySortKeyValue( $value ) { + if ( is_int( $value ) || is_float( $value ) || is_bool( $value ) ) { + return floatval( $value ); + } + + return null; + } + + protected function deleteLink( $linkId ) { + $this->deleteRow( [ + 'pp_propname' => $linkId[0] + ] ); + } + + protected function finishUpdate() { + $changed = array_unique( array_merge( + array_column( $this->insertedLinks, 0 ), + array_column( $this->deletedLinks, 0 ) ) ); + $this->invalidateProperties( $changed ); + } + + /** + * Invalidate the properties given the list of changed property names + * + * @param string[] $changed + */ + private function invalidateProperties( array $changed ) { + $jobs = []; + foreach ( $changed as $name ) { + if ( isset( $this->linkInvalidations[$name] ) ) { + $inv = $this->linkInvalidations[$name]; + if ( !is_array( $inv ) ) { + $inv = [ $inv ]; + } + foreach ( $inv as $table ) { + $jobs[] = HTMLCacheUpdateJob::newForBacklinks( + $this->getSourcePage(), + $table, + [ 'causeAction' => 'page-props' ] + ); + } + } + } + + if ( $jobs ) { + $this->jobQueueGroup->lazyPush( $jobs ); + } + } + + /** + * Get the properties for a given link set as an associative array + * + * @param int $setType The set type as in LinksTable::getLinkIDs() + * @return array + */ + public function getAssocArray( $setType ) { + $props = []; + foreach ( $this->getLinkIDs( $setType ) as $linkId ) { + [ $name, $value ] = $linkId; + $props[$name] = $value; + } + return $props; + } +} diff --git a/includes/deferred/LinksUpdate/TemplateLinksTable.php b/includes/deferred/LinksUpdate/TemplateLinksTable.php new file mode 100644 index 000000000000..0cc655fc517c --- /dev/null +++ b/includes/deferred/LinksUpdate/TemplateLinksTable.php @@ -0,0 +1,36 @@ +<?php + +namespace MediaWiki\Deferred\LinksUpdate; + +use ParserOutput; + +/** + * templatelinks + * + * @since 1.38 + */ +class TemplateLinksTable extends GenericPageLinksTable { + public function setParserOutput( ParserOutput $parserOutput ) { + $this->newLinks = $parserOutput->getTemplates(); + } + + protected function getTableName() { + return 'templatelinks'; + } + + protected function getFromField() { + return 'tl_from'; + } + + protected function getNamespaceField() { + return 'tl_namespace'; + } + + protected function getTitleField() { + return 'tl_title'; + } + + protected function getFromNamespaceField() { + return 'tl_from_namespace'; + } +} diff --git a/includes/deferred/LinksUpdate/TitleLinksTable.php b/includes/deferred/LinksUpdate/TitleLinksTable.php new file mode 100644 index 000000000000..8a28638d8ced --- /dev/null +++ b/includes/deferred/LinksUpdate/TitleLinksTable.php @@ -0,0 +1,88 @@ +<?php + +namespace MediaWiki\Deferred\LinksUpdate; + +use MediaWiki\Page\PageReferenceValue; +use Title; + +/** + * An abstract base class for tables that link to local titles. + * + * @stable to extend + * @since 1.38 + */ +abstract class TitleLinksTable extends LinksTable { + /** + * Convert a link ID to a PageReferenceValue + * + * @param mixed $linkId + * @return PageReferenceValue + */ + abstract protected function makePageReferenceValue( $linkId ): PageReferenceValue; + + /** + * Convert a link ID to a Title + * + * @stable to override + * @param mixed $linkId + * @return Title + */ + protected function makeTitle( $linkId ): Title { + return Title::castFromPageReference( $this->makePageReferenceValue( $linkId ) ); + } + + /** + * Given an iterator over link IDs, remove links which go to the same + * title, leaving only one link per title. + * + * @param iterable<mixed> $linkIds + * @return iterable<mixed> + */ + abstract protected function deduplicateLinkIds( $linkIds ); + + /** + * Get link IDs for a given set type, filtering out duplicate links to the + * same title. + * + * @param int $setType + * @return iterable<mixed> + */ + protected function getDeduplicatedLinkIds( $setType ) { + $linkIds = $this->getLinkIDs( $setType ); + // Only the CHANGED set type should have duplicates + if ( $setType === self::CHANGED ) { + $linkIds = $this->deduplicateLinkIds( $linkIds ); + } + return $linkIds; + } + + /** + * Get a link set as an array of Title objects. This is memory-inefficient. + * + * @deprecated since 1.38 + * @param int $setType + * @return Title[] + */ + public function getTitleArray( $setType ) { + $linkIds = $this->getDeduplicatedLinkIds( $setType ); + $titles = []; + foreach ( $linkIds as $linkId ) { + $titles[] = $this->makeTitle( $linkId ); + } + return $titles; + } + + /** + * Get a link set as an iterator over PageReferenceValue objects. + * + * @param int $setType + * @return iterable<PageReferenceValue> + * @phan-return \Traversable + */ + public function getPageReferenceIterator( $setType ) { + $linkIds = $this->getDeduplicatedLinkIds( $setType ); + foreach ( $linkIds as $linkId ) { + yield $this->makePageReferenceValue( $linkId ); + } + } +} diff --git a/tests/phpunit/includes/deferred/LinksDeletionUpdateTest.php b/tests/phpunit/includes/deferred/LinksDeletionUpdateTest.php index 59c6cdeff91f..82db3d71ba6b 100644 --- a/tests/phpunit/includes/deferred/LinksDeletionUpdateTest.php +++ b/tests/phpunit/includes/deferred/LinksDeletionUpdateTest.php @@ -3,6 +3,18 @@ /** * @covers LinksDeletionUpdate * @covers LinksUpdate + * @covers \MediaWiki\Deferred\LinksUpdate\CategoryLinksTable + * @covers \MediaWiki\Deferred\LinksUpdate\ExternalLinksTable + * @covers \MediaWiki\Deferred\LinksUpdate\GenericPageLinksTable + * @covers \MediaWiki\Deferred\LinksUpdate\ImageLinksTable + * @covers \MediaWiki\Deferred\LinksUpdate\InterwikiLinksTable + * @covers \MediaWiki\Deferred\LinksUpdate\LangLinksTable + * @covers \MediaWiki\Deferred\LinksUpdate\LinksTable + * @covers \MediaWiki\Deferred\LinksUpdate\LinksTableGroup + * @covers \MediaWiki\Deferred\LinksUpdate\PageLinksTable + * @covers \MediaWiki\Deferred\LinksUpdate\PagePropsTable + * @covers \MediaWiki\Deferred\LinksUpdate\TemplateLinksTable + * @covers \MediaWiki\Deferred\LinksUpdate\TitleLinksTable * * @group LinksUpdate * @group Database diff --git a/tests/phpunit/includes/deferred/LinksUpdateTest.php b/tests/phpunit/includes/deferred/LinksUpdateTest.php index 9d090824b4cc..db3efd269ce4 100644 --- a/tests/phpunit/includes/deferred/LinksUpdateTest.php +++ b/tests/phpunit/includes/deferred/LinksUpdateTest.php @@ -5,6 +5,18 @@ use Wikimedia\TestingAccessWrapper; /** * @covers LinksUpdate + * @covers \MediaWiki\Deferred\LinksUpdate\CategoryLinksTable + * @covers \MediaWiki\Deferred\LinksUpdate\ExternalLinksTable + * @covers \MediaWiki\Deferred\LinksUpdate\GenericPageLinksTable + * @covers \MediaWiki\Deferred\LinksUpdate\ImageLinksTable + * @covers \MediaWiki\Deferred\LinksUpdate\InterwikiLinksTable + * @covers \MediaWiki\Deferred\LinksUpdate\LangLinksTable + * @covers \MediaWiki\Deferred\LinksUpdate\LinksTable + * @covers \MediaWiki\Deferred\LinksUpdate\LinksTableGroup + * @covers \MediaWiki\Deferred\LinksUpdate\PageLinksTable + * @covers \MediaWiki\Deferred\LinksUpdate\PagePropsTable + * @covers \MediaWiki\Deferred\LinksUpdate\TemplateLinksTable + * @covers \MediaWiki\Deferred\LinksUpdate\TitleLinksTable * * @group LinksUpdate * @group Database |