diff options
author | Amir Sarabadani <ladsgroup@gmail.com> | 2023-02-09 19:59:23 +0100 |
---|---|---|
committer | Amir Sarabadani <ladsgroup@gmail.com> | 2023-02-09 20:18:54 +0100 |
commit | c8116223b4ba8d66dc15c4cdd2cb3fd47a083934 (patch) | |
tree | 84b790b50d6482040fffffe844f5972f1227ebc3 /includes/Category | |
parent | 7fa44740499e191b034ca8dea46f6fe359f6a508 (diff) | |
download | mediawikicore-c8116223b4ba8d66dc15c4cdd2cb3fd47a083934.tar.gz mediawikicore-c8116223b4ba8d66dc15c4cdd2cb3fd47a083934.zip |
Reorg: Move category-related classes from includes/ to Category/
Bug: T321882
Change-Id: I0b86acfdeaa3a2a0a14b7763fd088122820bafdc
Diffstat (limited to 'includes/Category')
-rw-r--r-- | includes/Category/CategoriesRdf.php | 140 | ||||
-rw-r--r-- | includes/Category/Category.php | 542 | ||||
-rw-r--r-- | includes/Category/CategoryViewer.php | 827 | ||||
-rw-r--r-- | includes/Category/TrackingCategories.php | 259 |
4 files changed, 1768 insertions, 0 deletions
diff --git a/includes/Category/CategoriesRdf.php b/includes/Category/CategoriesRdf.php new file mode 100644 index 000000000000..b1384feb5ed0 --- /dev/null +++ b/includes/Category/CategoriesRdf.php @@ -0,0 +1,140 @@ +<?php +/** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +namespace MediaWiki\Category; + +use Title; +use Wikimedia\Purtle\RdfWriter; + +/** + * Helper class to produce RDF representation of categories. + */ +class CategoriesRdf { + /** + * Prefix used for Mediawiki ontology in the dump. + */ + private const ONTOLOGY_PREFIX = 'mediawiki'; + /** + * Base URL for Mediawiki ontology. + */ + private const ONTOLOGY_URL = 'https://www.mediawiki.org/ontology#'; + /** + * OWL description of the ontology. + */ + public const OWL_URL = 'https://www.mediawiki.org/ontology/ontology.owl'; + /** + * Current version of the dump format. + */ + public const FORMAT_VERSION = "1.1"; + /** + * Special page for Dump identification. + * Used as head URI for each wiki's category dump, e.g.: + * https://en.wikipedia.org/wiki/Special:CategoryDump + */ + private const SPECIAL_DUMP = 'Special:CategoryDump'; + /** + * @var RdfWriter + */ + private $rdfWriter; + + public function __construct( RdfWriter $writer ) { + $this->rdfWriter = $writer; + } + + /** + * Setup prefixes relevant for the dump + */ + public function setupPrefixes() { + $this->rdfWriter->prefix( self::ONTOLOGY_PREFIX, self::ONTOLOGY_URL ); + $this->rdfWriter->prefix( 'rdfs', 'http://www.w3.org/2000/01/rdf-schema#' ); + $this->rdfWriter->prefix( 'owl', 'http://www.w3.org/2002/07/owl#' ); + $this->rdfWriter->prefix( 'schema', 'http://schema.org/' ); + $this->rdfWriter->prefix( 'cc', 'http://creativecommons.org/ns#' ); + } + + /** + * Write RDF data for link between categories. + * @param string $fromName Child category name + * @param string $toName Parent category name + */ + public function writeCategoryLinkData( $fromName, $toName ) { + $titleFrom = Title::makeTitle( NS_CATEGORY, $fromName ); + $titleTo = Title::makeTitle( NS_CATEGORY, $toName ); + $this->rdfWriter->about( $this->titleToUrl( $titleFrom ) ) + ->say( self::ONTOLOGY_PREFIX, 'isInCategory' ) + ->is( $this->titleToUrl( $titleTo ) ); + } + + /** + * Write out the data for single category. + * @param string $categoryName + * @param bool $isHidden Hidden category? + * @param int $pages Page count (note this includes only Wiki articles, not subcats or files) + * @param int $subcategories Subcategory count + */ + public function writeCategoryData( $categoryName, $isHidden, $pages, $subcategories ) { + if ( $pages < 0 ) { + // Bugfix for T201119 + $pages = 0; + } + $title = Title::makeTitle( NS_CATEGORY, $categoryName ); + $this->rdfWriter->about( $this->titleToUrl( $title ) ) + ->say( 'a' ) + ->is( self::ONTOLOGY_PREFIX, 'Category' ); + if ( $isHidden ) { + $this->rdfWriter->is( self::ONTOLOGY_PREFIX, 'HiddenCategory' ); + } + $titletext = $title->getText(); + $this->rdfWriter->say( 'rdfs', 'label' )->value( $titletext ); + // @phan-suppress-next-line PhanTypeMismatchArgument T302667 + $this->rdfWriter->say( self::ONTOLOGY_PREFIX, 'pages' )->value( $pages ); + // @phan-suppress-next-line PhanTypeMismatchArgument T302667 + $this->rdfWriter->say( self::ONTOLOGY_PREFIX, 'subcategories' )->value( $subcategories ); + // TODO: do we want files too here? Easy to add, but don't have use case so far. + } + + /** + * Make URL from title label + * @param string $titleLabel Short label (without namespace) of the category + * @return string URL for the category + */ + public function labelToUrl( $titleLabel ) { + return $this->titleToUrl( Title::makeTitle( NS_CATEGORY, $titleLabel ) ); + } + + /** + * Convert Title to link to target page. + * @param Title $title + * @return string URL for the category + */ + private function titleToUrl( Title $title ) { + return $title->getFullURL( '', false, PROTO_CANONICAL ); + } + + /** + * Get URI of the dump for this particular wiki. + * @return false|string + */ + public function getDumpURI() { + return $this->titleToUrl( Title::makeTitle( NS_MAIN, self::SPECIAL_DUMP ) ); + } + +} + +class_alias( CategoriesRdf::class, 'CategoriesRdf' ); diff --git a/includes/Category/Category.php b/includes/Category/Category.php new file mode 100644 index 000000000000..d290f532babe --- /dev/null +++ b/includes/Category/Category.php @@ -0,0 +1,542 @@ +<?php +/** + * Representation for a category. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @author Simetrical + */ + +namespace MediaWiki\Category; + +use DeferredUpdates; +use MediaWiki\MediaWikiServices; +use MediaWiki\Page\PageIdentity; +use MediaWiki\Title\TitleArray; +use MWException; +use ReadOnlyMode; +use stdClass; +use Title; +use Wikimedia\Rdbms\ILoadBalancer; + +/** + * Category objects are immutable, strictly speaking. If you call methods that change the database, + * like to refresh link counts, the objects will be appropriately reinitialized. + * Member variables are lazy-initialized. + */ +class Category { + /** Name of the category, normalized to DB-key form */ + private $mName = null; + private $mID = null; + /** + * Category page title + * @var PageIdentity + */ + private $mPage = null; + + /** Counts of membership (cat_pages, cat_subcats, cat_files) */ + /** @var int */ + private $mPages = 0; + + /** @var int */ + private $mSubcats = 0; + + /** @var int */ + private $mFiles = 0; + + protected const LOAD_ONLY = 0; + protected const LAZY_INIT_ROW = 1; + + public const ROW_COUNT_SMALL = 100; + + public const COUNT_ALL_MEMBERS = 0; + public const COUNT_CONTENT_PAGES = 1; + + /** @var ILoadBalancer */ + private $loadBalancer; + + /** @var ReadOnlyMode */ + private $readOnlyMode; + + private function __construct() { + $services = MediaWikiServices::getInstance(); + $this->loadBalancer = $services->getDBLoadBalancer(); + $this->readOnlyMode = $services->getReadOnlyMode(); + } + + /** + * Set up all member variables using a database query. + * @param int $mode One of (Category::LOAD_ONLY, Category::LAZY_INIT_ROW) + * @throws MWException + * @return bool True on success, false on failure. + */ + protected function initialize( $mode = self::LOAD_ONLY ) { + if ( $this->mName === null && $this->mID === null ) { + throw new MWException( __METHOD__ . ' has both names and IDs null' ); + } elseif ( $this->mID === null ) { + $where = [ 'cat_title' => $this->mName ]; + } elseif ( $this->mName === null ) { + $where = [ 'cat_id' => $this->mID ]; + } else { + # Already initialized + return true; + } + + $row = $this->loadBalancer->getConnectionRef( DB_REPLICA )->newSelectQueryBuilder() + ->select( [ 'cat_id', 'cat_title', 'cat_pages', 'cat_subcats', 'cat_files' ] ) + ->from( 'category' ) + ->where( $where ) + ->caller( __METHOD__ )->fetchRow(); + + if ( !$row ) { + # Okay, there were no contents. Nothing to initialize. + if ( $this->mPage ) { + # If there is a page object but no record in the category table, + # treat this as an empty category. + $this->mID = false; + $this->mName = $this->mPage->getDBkey(); + $this->mPages = 0; + $this->mSubcats = 0; + $this->mFiles = 0; + + # If the page exists, call refreshCounts to add a row for it. + if ( $mode === self::LAZY_INIT_ROW && $this->mPage->exists() ) { + DeferredUpdates::addCallableUpdate( [ $this, 'refreshCounts' ] ); + } + + return true; + } else { + return false; # Fail + } + } + + $this->mID = $row->cat_id; + $this->mName = $row->cat_title; + $this->mPages = (int)$row->cat_pages; + $this->mSubcats = (int)$row->cat_subcats; + $this->mFiles = (int)$row->cat_files; + + # (T15683) If the count is negative, then 1) it's obviously wrong + # and should not be kept, and 2) we *probably* don't have to scan many + # rows to obtain the correct figure, so let's risk a one-time recount. + if ( $this->mPages < 0 || $this->mSubcats < 0 || $this->mFiles < 0 ) { + $this->mPages = max( $this->mPages, 0 ); + $this->mSubcats = max( $this->mSubcats, 0 ); + $this->mFiles = max( $this->mFiles, 0 ); + + if ( $mode === self::LAZY_INIT_ROW ) { + DeferredUpdates::addCallableUpdate( [ $this, 'refreshCounts' ] ); + } + } + + return true; + } + + /** + * Factory function. + * + * @param string $name A category name (no "Category:" prefix). It need + * not be normalized, with spaces replaced by underscores. + * @return Category|bool Category, or false on a totally invalid name + */ + public static function newFromName( $name ) { + $cat = new self(); + $title = Title::makeTitleSafe( NS_CATEGORY, $name ); + + if ( !is_object( $title ) ) { + return false; + } + + $cat->mPage = $title; + $cat->mName = $title->getDBkey(); + + return $cat; + } + + /** + * Factory function. + * + * @param PageIdentity $page Category page. Warning, no validation is performed! + * @return Category + */ + public static function newFromTitle( PageIdentity $page ): self { + $cat = new self(); + + $cat->mPage = $page; + $cat->mName = $page->getDBkey(); + + return $cat; + } + + /** + * Factory function. + * + * @param int $id A category id. Warning, no validation is performed! + * @return Category + */ + public static function newFromID( $id ) { + $cat = new self(); + $cat->mID = intval( $id ); + return $cat; + } + + /** + * Factory function, for constructing a Category object from a result set + * + * @param stdClass $row Result set row, must contain the cat_xxx fields. If the fields are + * null, the resulting Category object will represent an empty category if a page object was + * given. If the fields are null and no PageIdentity was given, this method fails and returns + * false. + * @param PageIdentity|null $page This must be provided if there is no cat_title field in $row. + * @return Category|false + */ + public static function newFromRow( stdClass $row, ?PageIdentity $page = null ) { + $cat = new self(); + $cat->mPage = $page; + + # NOTE: the row often results from a LEFT JOIN on categorylinks. This may result in + # all the cat_xxx fields being null, if the category page exists, but nothing + # was ever added to the category. This case should be treated link an empty + # category, if possible. + + if ( $row->cat_title === null ) { + if ( $page === null ) { + # the name is probably somewhere in the row, for example as page_title, + # but we can't know that here... + return false; + } else { + # if we have a PageIdentity object, fetch the category name from there + $cat->mName = $page->getDBkey(); + } + + $cat->mID = false; + $cat->mSubcats = 0; + $cat->mPages = 0; + $cat->mFiles = 0; + } else { + $cat->mName = $row->cat_title; + $cat->mID = $row->cat_id; + $cat->mSubcats = (int)$row->cat_subcats; + $cat->mPages = (int)$row->cat_pages; + $cat->mFiles = (int)$row->cat_files; + } + + return $cat; + } + + /** + * @return string|false DB key name, or false on failure + */ + public function getName() { + return $this->getX( 'mName' ); + } + + /** + * @return string|false Category ID, or false on failure + */ + public function getID() { + return $this->getX( 'mID' ); + } + + /** + * @return int Total number of members count (sum of subcats, files and pages) + */ + public function getMemberCount(): int { + $this->initialize( self::LAZY_INIT_ROW ); + + return $this->mPages; + } + + /** + * @param int $type One of self::COUNT_ALL_MEMBERS and self::COUNT_CONTENT_PAGES + * @return int Total number of member count or content page count + */ + public function getPageCount( $type = self::COUNT_ALL_MEMBERS ): int { + $allCount = $this->getMemberCount(); + + if ( $type === self::COUNT_CONTENT_PAGES ) { + return $allCount - ( $this->getSubcatCount() + $this->getFileCount() ); + } + + return $allCount; + } + + /** + * @return int Number of subcategories + */ + public function getSubcatCount(): int { + return $this->getX( 'mSubcats' ); + } + + /** + * @return int Number of member files + */ + public function getFileCount(): int { + return $this->getX( 'mFiles' ); + } + + /** + * @since 1.37 + * @return ?PageIdentity the page associated with this category, or null on failure. NOTE: This + * returns null on failure, unlike getTitle() which returns false. + */ + public function getPage(): ?PageIdentity { + if ( $this->mPage ) { + return $this->mPage; + } + + if ( !$this->initialize( self::LAZY_INIT_ROW ) ) { + return null; + } + + $this->mPage = Title::makeTitleSafe( NS_CATEGORY, $this->mName ); + return $this->mPage; + } + + /** + * @deprecated since 1.37, use getPage() instead. + * @return Title|bool Title for this category, or false on failure. + */ + public function getTitle() { + return Title::castFromPageIdentity( $this->getPage() ) ?? false; + } + + /** + * Fetch a TitleArray of up to $limit category members, beginning after the + * category sort key $offset. + * @param int|false $limit + * @param string $offset + * @return TitleArray TitleArray object for category members. + */ + public function getMembers( $limit = false, $offset = '' ) { + $dbr = $this->loadBalancer->getConnection( DB_REPLICA ); + $queryBuilder = $dbr->newSelectQueryBuilder(); + $queryBuilder->select( [ 'page_id', 'page_namespace', 'page_title', 'page_len', + 'page_is_redirect', 'page_latest' ] ) + ->from( 'categorylinks' ) + ->join( 'page', null, [ 'cl_from = page_id' ] ) + ->where( [ 'cl_to' => $this->getName() ] ) + ->orderBy( 'cl_sortkey' ); + + if ( $limit ) { + $queryBuilder->limit( $limit ); + } + + if ( $offset !== '' ) { + $queryBuilder->andWhere( $dbr->buildComparison( '>', [ 'cl_sortkey' => $offset ] ) ); + } + + $result = TitleArray::newFromResult( $queryBuilder->caller( __METHOD__ )->fetchResultSet() ); + + return $result; + } + + /** + * Generic accessor + * @param string $key + * @return mixed + */ + private function getX( $key ) { + $this->initialize( self::LAZY_INIT_ROW ); + + return $this->{$key} ?? false; + } + + /** + * Refresh the counts for this category. + * + * @return bool True on success, false on failure + */ + public function refreshCounts() { + if ( $this->readOnlyMode->isReadOnly() ) { + return false; + } + + # If we have just a category name, find out whether there is an + # existing row. Or if we have just an ID, get the name, because + # that's what categorylinks uses. + if ( !$this->initialize( self::LOAD_ONLY ) ) { + return false; + } + + $dbw = $this->loadBalancer->getConnectionRef( DB_PRIMARY ); + # Avoid excess contention on the same category (T162121) + $name = __METHOD__ . ':' . md5( $this->mName ); + $scopedLock = $dbw->getScopedLockAndFlush( $name, __METHOD__, 0 ); + if ( !$scopedLock ) { + return false; + } + + $dbw->startAtomic( __METHOD__ ); + + // Lock the `category` row before locking `categorylinks` rows to try + // to avoid deadlocks with LinksDeletionUpdate (T195397) + $dbw->lockForUpdate( 'category', [ 'cat_title' => $this->mName ], __METHOD__ ); + + // Lock all the `categorylinks` records and gaps for this category; + // this is a separate query due to postgres limitations + $dbw->newSelectQueryBuilder() + ->select( '*' ) + ->from( 'categorylinks' ) + ->join( 'page', null, 'page_id = cl_from' ) + ->where( [ 'cl_to' => $this->mName ] ) + ->lockInShareMode() + ->caller( __METHOD__ )->fetchRowCount(); + + // Get the aggregate `categorylinks` row counts for this category + $catCond = $dbw->conditional( [ 'page_namespace' => NS_CATEGORY ], '1', 'NULL' ); + $fileCond = $dbw->conditional( [ 'page_namespace' => NS_FILE ], '1', 'NULL' ); + $result = $dbw->newSelectQueryBuilder() + ->select( [ + 'pages' => 'COUNT(*)', + 'subcats' => "COUNT($catCond)", + 'files' => "COUNT($fileCond)" + ] ) + ->from( 'categorylinks' ) + ->join( 'page', null, 'page_id = cl_from' ) + ->where( [ 'cl_to' => $this->mName ] ) + ->caller( __METHOD__ )->fetchRow(); + + $shouldExist = $result->pages > 0 || $this->getPage()->exists(); + + if ( $this->mID ) { + if ( $shouldExist ) { + # The category row already exists, so do a plain UPDATE instead + # of INSERT...ON DUPLICATE KEY UPDATE to avoid creating a gap + # in the cat_id sequence. The row may or may not be "affected". + $dbw->update( + 'category', + [ + 'cat_pages' => $result->pages, + 'cat_subcats' => $result->subcats, + 'cat_files' => $result->files + ], + [ 'cat_title' => $this->mName ], + __METHOD__ + ); + } else { + # The category is empty and has no description page, delete it + $dbw->delete( + 'category', + [ 'cat_title' => $this->mName ], + __METHOD__ + ); + $this->mID = false; + } + } elseif ( $shouldExist ) { + # The category row doesn't exist but should, so create it. Use + # upsert in case of races. + $dbw->upsert( + 'category', + [ + 'cat_title' => $this->mName, + 'cat_pages' => $result->pages, + 'cat_subcats' => $result->subcats, + 'cat_files' => $result->files + ], + 'cat_title', + [ + 'cat_pages' => $result->pages, + 'cat_subcats' => $result->subcats, + 'cat_files' => $result->files + ], + __METHOD__ + ); + // @todo: Should we update $this->mID here? Or not since Category + // objects tend to be short lived enough to not matter? + } + + $dbw->endAtomic( __METHOD__ ); + + # Now we should update our local counts. + $this->mPages = (int)$result->pages; + $this->mSubcats = (int)$result->subcats; + $this->mFiles = (int)$result->files; + + return true; + } + + /** + * Call refreshCounts() if there are no entries in the categorylinks table + * or if the category table has a row that states that there are no entries + * + * Due to lock errors or other failures, the precomputed counts can get out of sync, + * making it hard to know when to delete the category row without checking the + * categorylinks table. + * + * @return bool Whether links were refreshed + * @since 1.32 + */ + public function refreshCountsIfEmpty() { + return $this->refreshCountsIfSmall( 0 ); + } + + /** + * Call refreshCounts() if there are few entries in the categorylinks table + * + * Due to lock errors or other failures, the precomputed counts can get out of sync, + * making it hard to know when to delete the category row without checking the + * categorylinks table. + * + * This method will do a non-locking select first to reduce contention. + * + * @param int $maxSize Only refresh if there are this or less many backlinks + * @return bool Whether links were refreshed + * @since 1.34 + */ + public function refreshCountsIfSmall( $maxSize = self::ROW_COUNT_SMALL ) { + $dbw = $this->loadBalancer->getConnectionRef( DB_PRIMARY ); + $dbw->startAtomic( __METHOD__ ); + + $typeOccurances = $dbw->newSelectQueryBuilder() + ->select( 'cl_type' ) + ->from( 'categorylinks' ) + ->where( [ 'cl_to' => $this->getName() ] ) + ->limit( $maxSize + 1 ) + ->caller( __METHOD__ )->fetchFieldValues(); + + if ( !$typeOccurances ) { + $doRefresh = true; // delete any category table entry + } elseif ( count( $typeOccurances ) <= $maxSize ) { + $countByType = array_count_values( $typeOccurances ); + $doRefresh = !$dbw->newSelectQueryBuilder() + ->select( '1' ) + ->from( 'category' ) + ->where( [ + 'cat_title' => $this->getName(), + 'cat_pages' => $countByType['page'] ?? 0, + 'cat_subcats' => $countByType['subcat'] ?? 0, + 'cat_files' => $countByType['file'] ?? 0 + ] ) + ->caller( __METHOD__ )->fetchField(); + } else { + $doRefresh = false; // category is too big + } + + $dbw->endAtomic( __METHOD__ ); + + if ( $doRefresh ) { + $this->refreshCounts(); // update the row + + return true; + } + + return false; + } +} + +class_alias( Category::class, 'Category' ); diff --git a/includes/Category/CategoryViewer.php b/includes/Category/CategoryViewer.php new file mode 100644 index 000000000000..c48d2e91184f --- /dev/null +++ b/includes/Category/CategoryViewer.php @@ -0,0 +1,827 @@ +<?php +/** + * List and paging of category members. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +namespace MediaWiki\Category; + +use Collation; +use ContextSource; +use DeprecationHelper; +use Html; +use HtmlArmor; +use IContextSource; +use ILanguageConverter; +use ImageGalleryBase; +use ImageGalleryClassNotFoundException; +use LinkCache; +use MediaWiki\HookContainer\ProtectedHookAccessorTrait; +use MediaWiki\Linker\LinkTarget; +use MediaWiki\MainConfigNames; +use MediaWiki\MediaWikiServices; +use MediaWiki\Page\PageIdentity; +use MediaWiki\Page\PageReference; +use MWException; +use Title; +use TitleValue; +use Wikimedia\Rdbms\SelectQueryBuilder; + +class CategoryViewer extends ContextSource { + use ProtectedHookAccessorTrait; + use DeprecationHelper; + + /** @var int */ + public $limit; + + /** @var array */ + public $from; + + /** @var array */ + public $until; + + /** @var string[] */ + public $articles; + + /** @var array */ + public $articles_start_char; + + /** @var array */ + public $children; + + /** @var array */ + public $children_start_char; + + /** @var bool */ + public $showGallery; + + /** @var array */ + public $imgsNoGallery_start_char; + + /** @var array */ + public $imgsNoGallery; + + /** @var array */ + public $nextPage; + + /** @var array */ + protected $prevPage; + + /** @var array */ + public $flip; + + /** @var PageIdentity */ + protected $page; + + /** @var Collation */ + public $collation; + + /** @var ImageGalleryBase */ + public $gallery; + + /** @var Category Category object for this page. */ + private $cat; + + /** @var array The original query array, to be used in generating paging links. */ + private $query; + + /** @var ILanguageConverter */ + private $languageConverter; + + /** + * @since 1.19 $context is a second, required parameter + * @param PageIdentity $page + * @param IContextSource $context + * @param array $from An array with keys page, subcat, + * and file for offset of results of each section (since 1.17) + * @param array $until An array with 3 keys for until of each section (since 1.17) + * @param array $query + */ + public function __construct( PageIdentity $page, IContextSource $context, array $from = [], + array $until = [], array $query = [] + ) { + $this->page = $page; + + $this->deprecatePublicPropertyFallback( + 'title', + '1.37', + function (): Title { + // @phan-suppress-next-line PhanTypeMismatchReturnNullable castFrom does not return null here + return Title::castFromPageIdentity( $this->page ); + }, + function ( PageIdentity $page ) { + $this->page = $page; + } + ); + + $this->setContext( $context ); + $this->getOutput()->addModuleStyles( [ + 'mediawiki.action.styles', + ] ); + $this->from = $from; + $this->until = $until; + $this->limit = $context->getConfig()->get( MainConfigNames::CategoryPagingLimit ); + $this->cat = Category::newFromTitle( $page ); + $this->query = $query; + $this->collation = MediaWikiServices::getInstance()->getCollationFactory()->getCategoryCollation(); + $this->languageConverter = MediaWikiServices::getInstance() + ->getLanguageConverterFactory()->getLanguageConverter(); + unset( $this->query['title'] ); + } + + /** + * Format the category data list. + * + * @return string HTML output + */ + public function getHTML() { + $this->showGallery = $this->getConfig()->get( MainConfigNames::CategoryMagicGallery ) + && !$this->getOutput()->mNoGallery; + + $this->clearCategoryState(); + $this->doCategoryQuery(); + $this->finaliseCategoryState(); + + $r = $this->getSubcategorySection() . + $this->getPagesSection() . + $this->getImageSection(); + + if ( $r == '' ) { + // If there is no category content to display, only + // show the top part of the navigation links. + // @todo FIXME: Cannot be completely suppressed because it + // is unknown if 'until' or 'from' makes this + // give 0 results. + $r = $this->getCategoryTop(); + } else { + $r = $this->getCategoryTop() . + $r . + $this->getCategoryBottom(); + } + + // Give a proper message if category is empty + if ( $r == '' ) { + $r = $this->msg( 'category-empty' )->parseAsBlock(); + } + + $lang = $this->getLanguage(); + $attribs = [ + 'class' => 'mw-category-generated', + 'lang' => $lang->getHtmlCode(), + 'dir' => $lang->getDir() + ]; + # put a div around the headings which are in the user language + $r = Html::rawElement( 'div', $attribs, $r ); + + return $r; + } + + protected function clearCategoryState() { + $this->articles = []; + $this->articles_start_char = []; + $this->children = []; + $this->children_start_char = []; + if ( $this->showGallery ) { + // Note that null for mode is taken to mean use default. + $mode = $this->getRequest()->getVal( 'gallerymode', null ); + try { + $this->gallery = ImageGalleryBase::factory( $mode, $this->getContext() ); + } catch ( ImageGalleryClassNotFoundException $e ) { + // User specified something invalid, fallback to default. + $this->gallery = ImageGalleryBase::factory( false, $this->getContext() ); + } + + $this->gallery->setHideBadImages(); + } else { + $this->imgsNoGallery = []; + $this->imgsNoGallery_start_char = []; + } + } + + /** + * Add a subcategory to the internal lists, using a Category object + * @param Category $cat + * @param string $sortkey + * @param int $pageLength + */ + public function addSubcategoryObject( Category $cat, $sortkey, $pageLength ) { + $page = $cat->getPage(); + if ( !$page ) { + return; + } + + // Subcategory; strip the 'Category' namespace from the link text. + $pageRecord = MediaWikiServices::getInstance()->getPageStore() + ->getPageByReference( $page ); + if ( !$pageRecord ) { + return; + } + + $this->children[] = $this->generateLink( + 'subcat', + $pageRecord, + $pageRecord->isRedirect(), + htmlspecialchars( str_replace( '_', ' ', $pageRecord->getDBkey() ) ) + ); + + $this->children_start_char[] = + $this->getSubcategorySortChar( $page, $sortkey ); + } + + /** + * @param string $type + * @param PageReference $page + * @param bool $isRedirect + * @param string|null $html + * @return string + * Annotations needed to tell taint about HtmlArmor, + * due to the use of the hook it is not possible to avoid raw html handling here + * @param-taint $html tainted + * @return-taint escaped + */ + private function generateLink( + string $type, PageReference $page, bool $isRedirect, ?string $html = null + ): string { + $link = null; + $legacyTitle = MediaWikiServices::getInstance()->getTitleFactory() + ->castFromPageReference( $page ); + // @phan-suppress-next-line PhanTypeMismatchArgument castFrom does not return null here + $this->getHookRunner()->onCategoryViewer__generateLink( $type, $legacyTitle, $html, $link ); + if ( $link === null ) { + $linkRenderer = MediaWikiServices::getInstance()->getLinkRenderer(); + if ( $html !== null ) { + $html = new HtmlArmor( $html ); + } + $link = $linkRenderer->makeLink( $page, $html ); + } + if ( $isRedirect ) { + $link = Html::rawElement( + 'span', + [ 'class' => 'redirect-in-category' ], + $link + ); + } + + return $link; + } + + /** + * Get the character to be used for sorting subcategories. + * If there's a link from Category:A to Category:B, the sortkey of the resulting + * entry in the categorylinks table is Category:A, not A, which it SHOULD be. + * Workaround: If sortkey == "Category:".$title, than use $title for sorting, + * else use sortkey... + * + * @param PageIdentity $page + * @param string $sortkey The human-readable sortkey (before transforming to icu or whatever). + * @return string + */ + public function getSubcategorySortChar( PageIdentity $page, string $sortkey ): string { + $titleText = MediaWikiServices::getInstance()->getTitleFormatter() + ->getPrefixedText( $page ); + if ( $titleText === $sortkey ) { + $word = $page->getDBkey(); + } else { + $word = $sortkey; + } + + $firstChar = $this->collation->getFirstLetter( $word ); + + return $this->languageConverter->convert( $firstChar ); + } + + /** + * Add a page in the image namespace + * @param PageReference $page + * @param string $sortkey + * @param int $pageLength + * @param bool $isRedirect + */ + public function addImage( + PageReference $page, string $sortkey, int $pageLength, bool $isRedirect = false + ): void { + $title = MediaWikiServices::getInstance()->getTitleFactory() + ->castFromPageReference( $page ); + if ( $this->showGallery ) { + $flip = $this->flip['file']; + if ( $flip ) { + // @phan-suppress-next-line PhanTypeMismatchArgumentNullable castFrom does not return null here + $this->gallery->insert( $title ); + } else { + // @phan-suppress-next-line PhanTypeMismatchArgumentNullable castFrom does not return null here + $this->gallery->add( $title ); + } + } else { + $this->imgsNoGallery[] = $this->generateLink( 'image', $page, $isRedirect ); + + $this->imgsNoGallery_start_char[] = + $this->languageConverter->convert( $this->collation->getFirstLetter( $sortkey ) ); + } + } + + /** + * Add a miscellaneous page + * @param PageReference $page + * @param string $sortkey + * @param int $pageLength + * @param bool $isRedirect + */ + public function addPage( + PageReference $page, + string $sortkey, + int $pageLength, + bool $isRedirect = false + ): void { + $this->articles[] = $this->generateLink( 'page', $page, $isRedirect ); + + $this->articles_start_char[] = + $this->languageConverter->convert( $this->collation->getFirstLetter( $sortkey ) ); + } + + protected function finaliseCategoryState() { + if ( $this->flip['subcat'] ) { + $this->children = array_reverse( $this->children ); + $this->children_start_char = array_reverse( $this->children_start_char ); + } + if ( $this->flip['page'] ) { + $this->articles = array_reverse( $this->articles ); + $this->articles_start_char = array_reverse( $this->articles_start_char ); + } + if ( !$this->showGallery && $this->flip['file'] ) { + $this->imgsNoGallery = array_reverse( $this->imgsNoGallery ); + $this->imgsNoGallery_start_char = array_reverse( $this->imgsNoGallery_start_char ); + } + } + + protected function doCategoryQuery() { + $dbr = wfGetDB( DB_REPLICA, 'category' ); + + $this->nextPage = [ + 'page' => null, + 'subcat' => null, + 'file' => null, + ]; + $this->prevPage = [ + 'page' => null, + 'subcat' => null, + 'file' => null, + ]; + + $this->flip = [ 'page' => false, 'subcat' => false, 'file' => false ]; + + foreach ( [ 'page', 'subcat', 'file' ] as $type ) { + # Get the sortkeys for start/end, if applicable. Note that if + # the collation in the database differs from the one + # set in $wgCategoryCollation, pagination might go totally haywire. + $extraConds = [ 'cl_type' => $type ]; + if ( isset( $this->from[$type] ) ) { + $extraConds[] = 'cl_sortkey >= ' + . $dbr->addQuotes( $this->collation->getSortKey( $this->from[$type] ) ); + } elseif ( isset( $this->until[$type] ) ) { + $extraConds[] = 'cl_sortkey < ' + . $dbr->addQuotes( $this->collation->getSortKey( $this->until[$type] ) ); + $this->flip[$type] = true; + } + + $queryBuilder = $dbr->newSelectQueryBuilder(); + $queryBuilder->select( array_merge( + LinkCache::getSelectFields(), + [ + 'cl_sortkey', + 'cat_id', + 'cat_title', + 'cat_subcats', + 'cat_pages', + 'cat_files', + 'cl_sortkey_prefix', + 'cl_collation' + ] + ) ) + ->from( 'page' ) + ->where( [ 'cl_to' => $this->page->getDBkey() ] ) + ->andWhere( $extraConds ) + ->useIndex( [ 'categorylinks' => 'cl_sortkey' ] ); + + if ( $this->flip[$type] ) { + $queryBuilder->orderBy( 'cl_sortkey', SelectQueryBuilder::SORT_DESC ); + } else { + $queryBuilder->orderBy( 'cl_sortkey' ); + } + + $queryBuilder + ->join( 'categorylinks', null, [ 'cl_from = page_id' ] ) + ->leftJoin( 'category', null, [ + 'cat_title = page_title', + 'page_namespace' => NS_CATEGORY + ] ) + ->limit( $this->limit + 1 ) + ->caller( __METHOD__ ); + + $res = $queryBuilder->fetchResultSet(); + + $this->getHookRunner()->onCategoryViewer__doCategoryQuery( $type, $res ); + $linkCache = MediaWikiServices::getInstance()->getLinkCache(); + + $count = 0; + foreach ( $res as $row ) { + $title = Title::newFromRow( $row ); + $linkCache->addGoodLinkObjFromRow( $title, $row ); + + if ( $row->cl_collation === '' ) { + // Hack to make sure that while updating from 1.16 schema + // and db is inconsistent, that the sky doesn't fall. + // See r83544. Could perhaps be removed in a couple decades... + $humanSortkey = $row->cl_sortkey; + } else { + $humanSortkey = $title->getCategorySortkey( $row->cl_sortkey_prefix ); + } + + if ( ++$count > $this->limit ) { + # We've reached the one extra which shows that there + # are additional pages to be had. Stop here... + $this->nextPage[$type] = $humanSortkey; + break; + } + if ( $count == $this->limit ) { + $this->prevPage[$type] = $humanSortkey; + } + + if ( $title->getNamespace() === NS_CATEGORY ) { + $cat = Category::newFromRow( $row, $title ); + $this->addSubcategoryObject( $cat, $humanSortkey, $row->page_len ); + } elseif ( $title->getNamespace() === NS_FILE ) { + $this->addImage( $title, $humanSortkey, $row->page_len, $row->page_is_redirect ); + } else { + $this->addPage( $title, $humanSortkey, $row->page_len, $row->page_is_redirect ); + } + } + } + } + + /** + * @return string + */ + protected function getCategoryTop() { + $r = $this->getCategoryBottom(); + return $r === '' + ? $r + : "<br style=\"clear:both;\"/>\n" . $r; + } + + /** + * @return string + */ + protected function getSubcategorySection() { + # Don't show subcategories section if there are none. + $r = ''; + $rescnt = count( $this->children ); + $dbcnt = $this->cat->getSubcatCount(); + // This function should be called even if the result isn't used, it has side-effects + $countmsg = $this->getCountMessage( $rescnt, $dbcnt, 'subcat' ); + + if ( $rescnt > 0 ) { + # Showing subcategories + $r .= Html::openElement( 'div', [ 'id' => 'mw-subcategories' ] ) . "\n"; + $r .= Html::rawElement( 'h2', [], $this->msg( 'subcategories' )->parse() ) . "\n"; + $r .= $countmsg; + $r .= $this->getSectionPagingLinks( 'subcat' ); + $r .= $this->formatList( $this->children, $this->children_start_char ); + $r .= $this->getSectionPagingLinks( 'subcat' ); + $r .= "\n" . Html::closeElement( 'div' ); + } + return $r; + } + + /** + * @return string + */ + protected function getPagesSection() { + $name = $this->getOutput()->getUnprefixedDisplayTitle(); + # Don't show articles section if there are none. + $r = ''; + + # @todo FIXME: Here and in the other two sections: we don't need to bother + # with this rigmarole if the entire category contents fit on one page + # and have already been retrieved. We can just use $rescnt in that + # case and save a query and some logic. + $dbcnt = $this->cat->getPageCount( Category::COUNT_CONTENT_PAGES ); + $rescnt = count( $this->articles ); + // This function should be called even if the result isn't used, it has side-effects + $countmsg = $this->getCountMessage( $rescnt, $dbcnt, 'article' ); + + if ( $rescnt > 0 ) { + $r .= Html::openElement( 'div', [ 'id' => 'mw-pages' ] ) . "\n"; + $r .= Html::rawElement( + 'h2', + [], + $this->msg( 'category_header' )->rawParams( $name )->parse() + ) . "\n"; + $r .= $countmsg; + $r .= $this->getSectionPagingLinks( 'page' ); + $r .= $this->formatList( $this->articles, $this->articles_start_char ); + $r .= $this->getSectionPagingLinks( 'page' ); + $r .= "\n" . Html::closeElement( 'div' ); + } + return $r; + } + + /** + * @return string + */ + protected function getImageSection() { + $name = $this->getOutput()->getUnprefixedDisplayTitle(); + $r = ''; + $rescnt = $this->showGallery ? $this->gallery->count() : count( $this->imgsNoGallery ); + $dbcnt = $this->cat->getFileCount(); + // This function should be called even if the result isn't used, it has side-effects + $countmsg = $this->getCountMessage( $rescnt, $dbcnt, 'file' ); + + if ( $rescnt > 0 ) { + $r .= Html::openElement( 'div', [ 'id' => 'mw-category-media' ] ) . "\n"; + $r .= Html::rawElement( + 'h2', + [], + $this->msg( 'category-media-header' )->rawParams( $name )->parse() + ) . "\n"; + $r .= $countmsg; + $r .= $this->getSectionPagingLinks( 'file' ); + if ( $this->showGallery ) { + $r .= $this->gallery->toHTML(); + } else { + $r .= $this->formatList( $this->imgsNoGallery, $this->imgsNoGallery_start_char ); + } + $r .= $this->getSectionPagingLinks( 'file' ); + $r .= "\n" . Html::closeElement( 'div' ); + } + return $r; + } + + /** + * Get the paging links for a section (subcats/pages/files), to go at the top and bottom + * of the output. + * + * @param string $type 'page', 'subcat', or 'file' + * @return string HTML output, possibly empty if there are no other pages + */ + private function getSectionPagingLinks( $type ) { + if ( isset( $this->until[$type] ) ) { + // The new value for the until parameter should be pointing to the first + // result displayed on the page which is the second last result retrieved + // from the database.The next link should have a from parameter pointing + // to the until parameter of the current page. + if ( $this->nextPage[$type] !== null ) { + return $this->pagingLinks( $this->prevPage[$type], $this->until[$type], $type ); + } else { + // If the nextPage variable is null, it means that we have reached the first page + // and therefore the previous link should be disabled. + return $this->pagingLinks( '', $this->until[$type], $type ); + } + } elseif ( $this->nextPage[$type] !== null || isset( $this->from[$type] ) ) { + return $this->pagingLinks( $this->from[$type], $this->nextPage[$type], $type ); + } else { + return ''; + } + } + + /** + * @return string + */ + protected function getCategoryBottom() { + return ''; + } + + /** + * Format a list of articles chunked by letter, either as a + * bullet list or a columnar format, depending on the length. + * + * @param array $articles + * @param array $articles_start_char + * @param int $cutoff + * @return string + * @internal + */ + private function formatList( $articles, $articles_start_char, $cutoff = 6 ) { + $list = ''; + if ( count( $articles ) > $cutoff ) { + $list = self::columnList( $articles, $articles_start_char ); + } elseif ( count( $articles ) > 0 ) { + // for short lists of articles in categories. + $list = self::shortList( $articles, $articles_start_char ); + } + + $pageLang = MediaWikiServices::getInstance()->getTitleFactory() + ->castFromPageIdentity( $this->page ) + ->getPageLanguage(); + $attribs = [ 'lang' => $pageLang->getHtmlCode(), 'dir' => $pageLang->getDir(), + 'class' => 'mw-content-' . $pageLang->getDir() ]; + $list = Html::rawElement( 'div', $attribs, $list ); + + return $list; + } + + /** + * Format a list of articles chunked by letter in a three-column list, ordered + * vertically. This is used for categories with a significant number of pages. + * + * @param string[] $articles HTML links to each article + * @param string[] $articles_start_char The header characters for each article + * @param string $cssClasses CSS classes for the wrapper element + * @return string HTML to output + * @internal + */ + public static function columnList( + $articles, + $articles_start_char, + $cssClasses = 'mw-category mw-category-columns' + ) { + $columns = array_combine( $articles, $articles_start_char ); + + $ret = Html::openElement( 'div', [ 'class' => $cssClasses ] ); + + $colContents = []; + + # Kind of like array_flip() here, but we keep duplicates in an + # array instead of dropping them. + foreach ( $columns as $article => $char ) { + $colContents[$char][] = $article; + } + + foreach ( $colContents as $char => $articles ) { + # Change space to non-breaking space to keep headers aligned + $h3char = $char === ' ' ? "\u{00A0}" : htmlspecialchars( $char ); + + $ret .= Html::openElement( 'div', [ 'class' => 'mw-category-group' ] ); + $ret .= Html::rawElement( 'h3', [], $h3char ) . "\n"; + $ret .= Html::openElement( 'ul' ); + $ret .= implode( + "\n", + array_map( + static function ( $article ) { + return Html::rawElement( 'li', [], $article ); + }, + $articles + ) + ); + $ret .= Html::closeElement( 'ul' ) . Html::closeElement( 'div' ); + + } + + $ret .= Html::closeElement( 'div' ); + return $ret; + } + + /** + * Format a list of articles chunked by letter in a bullet list. This is used + * for categories with a small number of pages (when columns aren't needed). + * @param string[] $articles HTML links to each article + * @param string[] $articles_start_char The header characters for each article + * @return string HTML to output + * @internal + */ + public static function shortList( $articles, $articles_start_char ) { + return self::columnList( $articles, $articles_start_char, 'mw-category' ); + } + + /** + * Create paging links, as a helper method to getSectionPagingLinks(). + * + * @param string $first The 'until' parameter for the generated URL + * @param string $last The 'from' parameter for the generated URL + * @param string $type A prefix for parameters, 'page' or 'subcat' or + * 'file' + * @return string HTML + */ + private function pagingLinks( $first, $last, $type = '' ) { + $prevLink = $this->msg( 'prev-page' )->escaped(); + + $linkRenderer = MediaWikiServices::getInstance()->getLinkRenderer(); + if ( $first != '' ) { + $prevQuery = $this->query; + $prevQuery["{$type}until"] = $first; + unset( $prevQuery["{$type}from"] ); + $prevLink = $linkRenderer->makeKnownLink( + $this->addFragmentToTitle( $this->page, $type ), + new HtmlArmor( $prevLink ), + [], + $prevQuery + ); + } + + $nextLink = $this->msg( 'next-page' )->escaped(); + + if ( $last != '' ) { + $lastQuery = $this->query; + $lastQuery["{$type}from"] = $last; + unset( $lastQuery["{$type}until"] ); + $nextLink = $linkRenderer->makeKnownLink( + $this->addFragmentToTitle( $this->page, $type ), + new HtmlArmor( $nextLink ), + [], + $lastQuery + ); + } + + return $this->msg( 'categoryviewer-pagedlinks' )->rawParams( $prevLink, $nextLink )->escaped(); + } + + /** + * Takes a title, and adds the fragment identifier that + * corresponds to the correct segment of the category. + * + * @param PageReference $page The title (usually $this->title) + * @param string $section Which section + * @throws MWException + * @return LinkTarget + */ + private function addFragmentToTitle( PageReference $page, string $section ): LinkTarget { + switch ( $section ) { + case 'page': + $fragment = 'mw-pages'; + break; + case 'subcat': + $fragment = 'mw-subcategories'; + break; + case 'file': + $fragment = 'mw-category-media'; + break; + default: + throw new MWException( __METHOD__ . + " Invalid section $section." ); + } + + return new TitleValue( $page->getNamespace(), + $page->getDBkey(), $fragment ); + } + + /** + * What to do if the category table conflicts with the number of results + * returned? This function says what. Each type is considered independently + * of the other types. + * + * @param int $rescnt The number of items returned by our database query. + * @param int $dbcnt The number of items according to the category table. + * @param string $type 'subcat', 'article', or 'file' + * @return string A message giving the number of items, to output to HTML. + */ + private function getCountMessage( $rescnt, $dbcnt, $type ) { + // There are three cases: + // 1) The category table figure seems good. It might be wrong, but + // we can't do anything about it if we don't recalculate it on ev- + // ery category view. + // 2) The category table figure isn't good, like it's smaller than the + // number of actual results, *but* the number of results is less + // than $this->limit and there's no offset. In this case we still + // know the right figure. + // 3) We have no idea. + + // Check if there's a "from" or "until" for anything + + // This is a little ugly, but we seem to use different names + // for the paging types then for the messages. + if ( $type === 'article' ) { + $pagingType = 'page'; + } else { + $pagingType = $type; + } + + $fromOrUntil = false; + if ( isset( $this->from[$pagingType] ) || isset( $this->until[$pagingType] ) ) { + $fromOrUntil = true; + } + + if ( $dbcnt == $rescnt || + ( ( $rescnt == $this->limit || $fromOrUntil ) && $dbcnt > $rescnt ) + ) { + // Case 1: seems good. + $totalcnt = $dbcnt; + } elseif ( $rescnt < $this->limit && !$fromOrUntil ) { + // Case 2: not good, but salvageable. Use the number of results. + $totalcnt = $rescnt; + } else { + // Case 3: hopeless. Don't give a total count at all. + // Messages: category-subcat-count-limited, category-article-count-limited, + // category-file-count-limited + return $this->msg( "category-$type-count-limited" )->numParams( $rescnt )->parseAsBlock(); + } + // Messages: category-subcat-count, category-article-count, category-file-count + return $this->msg( "category-$type-count" )->numParams( $rescnt, $totalcnt )->parseAsBlock(); + } +} + +class_alias( CategoryViewer::class, 'CategoryViewer' ); diff --git a/includes/Category/TrackingCategories.php b/includes/Category/TrackingCategories.php new file mode 100644 index 000000000000..59c288b4fcf5 --- /dev/null +++ b/includes/Category/TrackingCategories.php @@ -0,0 +1,259 @@ +<?php +/** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Categories + */ + +namespace MediaWiki\Category; + +use ExtensionRegistry; +use MediaWiki\Config\ServiceOptions; +use MediaWiki\Linker\LinkTarget; +use MediaWiki\MainConfigNames; +use MediaWiki\Page\PageReference; +use NamespaceInfo; +use ParserOutput; +use Psr\Log\LoggerInterface; +use Title; +use TitleParser; + +/** + * This class performs some operations related to tracking categories, such as creating + * a list of all such categories. + * @since 1.29 + */ +class TrackingCategories { + + /** + * @internal For use by ServiceWiring + */ + public const CONSTRUCTOR_OPTIONS = [ + MainConfigNames::TrackingCategories, + MainConfigNames::EnableMagicLinks, + ]; + + /** @var ServiceOptions */ + private $options; + + /** @var NamespaceInfo */ + private $namespaceInfo; + + /** @var TitleParser */ + private $titleParser; + + /** @var ExtensionRegistry */ + private $extensionRegistry; + + /** @var LoggerInterface */ + private $logger; + + /** + * Tracking categories that exist in core + * + * @var array + */ + private const CORE_TRACKING_CATEGORIES = [ + 'broken-file-category', + 'duplicate-args-category', + 'expansion-depth-exceeded-category', + 'expensive-parserfunction-category', + 'hidden-category-category', + 'index-category', + 'node-count-exceeded-category', + 'noindex-category', + 'nonnumeric-formatnum', + 'post-expand-template-argument-category', + 'post-expand-template-inclusion-category', + 'restricted-displaytitle-ignored', + # template-equals-category is unused in MW>=1.39, but the category + # can be left around for a major release or so for an easier + # transition for anyone who didn't do the cleanup. T91154 + 'template-equals-category', + 'template-loop-category', + 'unstrip-depth-category', + 'unstrip-size-category', + ]; + + /** + * @param ServiceOptions $options + * @param NamespaceInfo $namespaceInfo + * @param TitleParser $titleParser + * @param LoggerInterface $logger + */ + public function __construct( + ServiceOptions $options, + NamespaceInfo $namespaceInfo, + TitleParser $titleParser, + LoggerInterface $logger + ) { + $options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS ); + $this->options = $options; + $this->namespaceInfo = $namespaceInfo; + $this->titleParser = $titleParser; + $this->logger = $logger; + + // TODO convert ExtensionRegistry to a service and inject it + $this->extensionRegistry = ExtensionRegistry::getInstance(); + } + + /** + * Read the global and extract title objects from the corresponding messages + * + * TODO consider renaming this method, since this class is retrieved from + * MediaWikiServices, resulting in calls like: + * MediaWikiServices::getInstance()->getTrackingCategories()->getTrackingCategories() + * + * @return array[] [ 'msg' => Title, 'cats' => Title[] ] + * @phan-return array<string,array{msg:Title,cats:Title[]}> + */ + public function getTrackingCategories() { + $categories = array_merge( + self::CORE_TRACKING_CATEGORIES, + $this->extensionRegistry->getAttribute( MainConfigNames::TrackingCategories ), + $this->options->get( MainConfigNames::TrackingCategories ) // deprecated + ); + + // Only show magic link tracking categories if they are enabled + $enableMagicLinks = $this->options->get( MainConfigNames::EnableMagicLinks ); + if ( $enableMagicLinks['ISBN'] ) { + $categories[] = 'magiclink-tracking-isbn'; + } + if ( $enableMagicLinks['RFC'] ) { + $categories[] = 'magiclink-tracking-rfc'; + } + if ( $enableMagicLinks['PMID'] ) { + $categories[] = 'magiclink-tracking-pmid'; + } + + $trackingCategories = []; + foreach ( $categories as $catMsg ) { + /* + * Check if the tracking category varies by namespace + * Otherwise only pages in the current namespace will be displayed + * If it does vary, show pages considering all namespaces + * + * TODO replace uses of wfMessage with an injected service once that is available + */ + $msgObj = wfMessage( $catMsg )->inContentLanguage(); + $allCats = []; + $catMsgTitle = $this->titleParser->makeTitleValueSafe( NS_MEDIAWIKI, $catMsg ); + if ( !$catMsgTitle ) { + continue; + } + + // Match things like {{NAMESPACE}} and {{NAMESPACENUMBER}}. + // False positives are ok, this is just an efficiency shortcut + if ( strpos( $msgObj->plain(), '{{' ) !== false ) { + $ns = $this->namespaceInfo->getValidNamespaces(); + foreach ( $ns as $namesp ) { + $tempTitle = $this->titleParser->makeTitleValueSafe( $namesp, $catMsg ); + if ( !$tempTitle ) { + continue; + } + // XXX: should be a better way to convert a TitleValue + // to a PageReference! + $tempTitle = Title::newFromLinkTarget( $tempTitle ); + $catName = $msgObj->page( $tempTitle )->text(); + # Allow tracking categories to be disabled by setting them to "-" + if ( $catName !== '-' ) { + $catTitle = $this->titleParser->makeTitleValueSafe( NS_CATEGORY, $catName ); + if ( $catTitle ) { + $allCats[] = $catTitle; + } + } + } + } else { + $catName = $msgObj->text(); + # Allow tracking categories to be disabled by setting them to "-" + if ( $catName !== '-' ) { + $catTitle = $this->titleParser->makeTitleValueSafe( NS_CATEGORY, $catName ); + if ( $catTitle ) { + $allCats[] = $catTitle; + } + } + } + $trackingCategories[$catMsg] = [ + 'cats' => $allCats, + 'msg' => $catMsgTitle, + ]; + } + + return $trackingCategories; + } + + /** + * Resolve a tracking category. + * @param string $msg Message key + * @param ?PageReference $contextPage Context page title + * @return ?LinkTarget the proper category page, or null if + * the tracking category is disabled or unsafe + * @since 1.38 + */ + public function resolveTrackingCategory( string $msg, ?PageReference $contextPage ): ?LinkTarget { + if ( !$contextPage ) { + $this->logger->debug( "Not adding tracking category $msg to missing page!" ); + return null; + } + + if ( $contextPage->getNamespace() === NS_SPECIAL ) { + $this->logger->debug( "Not adding tracking category $msg to special page!" ); + return null; + } + + // Important to parse with correct title (T33469) + // TODO replace uses of wfMessage with an injected service once that is available + $cat = wfMessage( $msg ) + ->page( $contextPage ) + ->inContentLanguage() + ->text(); + + # Allow tracking categories to be disabled by setting them to "-" + if ( $cat === '-' ) { + return null; + } + + $containerCategory = $this->titleParser->makeTitleValueSafe( NS_CATEGORY, $cat ); + if ( $containerCategory === null ) { + $this->logger->debug( "[[MediaWiki:$msg]] is not a valid title!" ); + return null; + } + return $containerCategory; + } + + /** + * Add a tracking category to a ParserOutput. + * @param ParserOutput $parserOutput + * @param string $msg Message key + * @param ?PageReference $contextPage Context page title + * @return bool Whether the addition was successful + * @since 1.38 + */ + public function addTrackingCategory( ParserOutput $parserOutput, string $msg, ?PageReference $contextPage ): bool { + $categoryPage = $this->resolveTrackingCategory( $msg, $contextPage ); + if ( $categoryPage === null ) { + return false; + } + $parserOutput->addCategory( + $categoryPage->getDBkey(), + $parserOutput->getPageProperty( 'defaultsort' ) ?? '' + ); + return true; + } +} + +class_alias( TrackingCategories::class, 'TrackingCategories' ); |