*/ private MapCacheLRU $localCache; private ParserCacheFactory $parserCacheFactory; private RevisionLookup $revisionLookup; private RevisionRenderer $revisionRenderer; private StatsFactory $statsFactory; private ILBFactory $lbFactory; private ChronologyProtector $chronologyProtector; private LoggerSpi $loggerSpi; private WikiPageFactory $wikiPageFactory; private TitleFormatter $titleFormatter; private TracerInterface $tracer; public function __construct( ParserCacheFactory $parserCacheFactory, RevisionLookup $revisionLookup, RevisionRenderer $revisionRenderer, StatsFactory $statsFactory, ILBFactory $lbFactory, ChronologyProtector $chronologyProtector, LoggerSpi $loggerSpi, WikiPageFactory $wikiPageFactory, TitleFormatter $titleFormatter, TracerInterface $tracer ) { $this->parserCacheFactory = $parserCacheFactory; $this->revisionLookup = $revisionLookup; $this->revisionRenderer = $revisionRenderer; $this->statsFactory = $statsFactory; $this->lbFactory = $lbFactory; $this->chronologyProtector = $chronologyProtector; $this->loggerSpi = $loggerSpi; $this->wikiPageFactory = $wikiPageFactory; $this->titleFormatter = $titleFormatter; $this->tracer = $tracer; $this->localCache = new MapCacheLRU( 10 ); } /** * Use a cache? * * @param PageRecord $page * @param RevisionRecord|null $rev * * @return string One of the CACHE_XXX constants. */ private function shouldUseCache( PageRecord $page, ?RevisionRecord $rev ) { if ( $rev && !$rev->getId() ) { // The revision isn't from the database, so the output can't safely be cached. return self::CACHE_NONE; } // NOTE: Keep in sync with ParserWikiPage::shouldCheckParserCache(). // NOTE: when we allow caching of old revisions in the future, // we must not allow caching of deleted revisions. $wikiPage = $this->wikiPageFactory->newFromTitle( $page ); if ( !$page->exists() || !$wikiPage->getContentHandler()->isParserCacheSupported() ) { return self::CACHE_NONE; } $isOld = $rev && $rev->getId() !== $page->getLatest(); if ( !$isOld ) { return self::CACHE_PRIMARY; } if ( !$rev->audienceCan( RevisionRecord::DELETED_TEXT, RevisionRecord::FOR_PUBLIC ) ) { // deleted/suppressed revision return self::CACHE_NONE; } return self::CACHE_SECONDARY; } /** * Returns the rendered output for the given page if it is present in the cache. * * @param PageRecord $page * @param ParserOptions $parserOptions * @param RevisionRecord|null $revision * @param int $options Bitfield using the OPT_XXX constants * * @return ParserOutput|null */ public function getCachedParserOutput( PageRecord $page, ParserOptions $parserOptions, ?RevisionRecord $revision = null, int $options = 0 ): ?ParserOutput { $span = $this->startOperationSpan( __FUNCTION__, $page, $revision ); $isOld = $revision && $revision->getId() !== $page->getLatest(); $useCache = $this->shouldUseCache( $page, $revision ); $primaryCache = $this->getPrimaryCache( $parserOptions ); $classCacheKey = $primaryCache->makeParserOutputKey( $page, $parserOptions ); if ( $useCache === self::CACHE_PRIMARY ) { if ( $this->localCache->hasField( $classCacheKey, $page->getLatest() ) && !$isOld ) { return $this->localCache->getField( $classCacheKey, $page->getLatest() ); } $output = $primaryCache->get( $page, $parserOptions ); } elseif ( $useCache === self::CACHE_SECONDARY && $revision ) { $secondaryCache = $this->getSecondaryCache( $parserOptions ); $output = $secondaryCache->get( $revision, $parserOptions ); } else { $output = null; } $notHitReason = 'miss'; if ( $output && !( $options & self::OPT_IGNORE_PROFILE_VERSION ) && $parserOptions->getUseParsoid() ) { $pageBundleData = $output->getExtensionData( PageBundleParserOutputConverter::PARSOID_PAGE_BUNDLE_KEY ); // T333606: Force a reparse if the version coming from cache is not the default $cachedVersion = $pageBundleData['version'] ?? null; if ( $cachedVersion !== null && // T325137: BadContentModel, no sense in reparsing $cachedVersion !== Parsoid::defaultHTMLVersion() ) { $notHitReason = 'obsolete'; $output = null; } } if ( $output && !$isOld ) { $this->localCache->setField( $classCacheKey, $page->getLatest(), $output ); } if ( $output ) { $this->statsFactory ->getCounter( 'parseroutputaccess_cache' ) ->setLabel( 'cache', $useCache ) ->setLabel( 'reason', 'hit' ) ->setLabel( 'type', 'hit' ) ->copyToStatsdAt( "ParserOutputAccess.Cache.$useCache.hit" ) ->increment(); } else { $this->statsFactory ->getCounter( 'parseroutputaccess_cache' ) ->setLabel( 'reason', $notHitReason ) ->setLabel( 'cache', $useCache ) ->setLabel( 'type', 'miss' ) ->copyToStatsdAt( "ParserOutputAccess.Cache.$useCache.$notHitReason" ) ->increment(); } return $output ?: null; // convert false to null } /** * Returns the rendered output for the given page. * Caching and concurrency control is applied. * * @param PageRecord $page * @param ParserOptions $parserOptions * @param RevisionRecord|null $revision * @param int $options Bitfield using the OPT_XXX constants * * @return Status containing a ParserOutput if no error occurred. * Well known errors and warnings include the following messages: * - 'view-pool-dirty-output' (warning) The output is dirty (from a stale cache entry). * - 'view-pool-contention' (warning) Dirty output was returned immediately instead of * waiting to acquire a work lock (when "fast stale" mode is enabled in PoolCounter). * - 'view-pool-timeout' (warning) Dirty output was returned after failing to acquire * a work lock (got QUEUE_FULL or TIMEOUT from PoolCounter). * - 'pool-queuefull' (error) unable to acquire work lock, and no cached content found. * - 'pool-timeout' (error) unable to acquire work lock, and no cached content found. * - 'pool-servererror' (error) PoolCounterWork failed due to a lock service error. * - 'pool-unknownerror' (error) PoolCounterWork failed for an unknown reason. * - 'nopagetext' (error) The page does not exist */ public function getParserOutput( PageRecord $page, ParserOptions $parserOptions, ?RevisionRecord $revision = null, int $options = 0 ): Status { $span = $this->startOperationSpan( __FUNCTION__, $page, $revision ); $error = $this->checkPreconditions( $page, $revision, $options ); if ( $error ) { $this->statsFactory ->getCounter( 'parseroutputaccess_case' ) ->setLabel( 'case', 'error' ) ->copyToStatsdAt( 'ParserOutputAccess.Case.error' ) ->increment(); return $error; } $isOld = $revision && $revision->getId() !== $page->getLatest(); if ( $isOld ) { $this->statsFactory ->getCounter( 'parseroutputaccess_case' ) ->setLabel( 'case', 'old' ) ->copyToStatsdAt( 'ParserOutputAccess.Case.old' ) ->increment(); } else { $this->statsFactory ->getCounter( 'parseroutputaccess_case' ) ->setLabel( 'case', 'current' ) ->copyToStatsdAt( 'ParserOutputAccess.Case.current' ) ->increment(); } if ( !( $options & self::OPT_NO_CHECK_CACHE ) ) { $output = $this->getCachedParserOutput( $page, $parserOptions, $revision ); if ( $output ) { return Status::newGood( $output ); } } if ( !$revision ) { $revId = $page->getLatest(); $revision = $revId ? $this->revisionLookup->getRevisionById( $revId ) : null; if ( !$revision ) { $this->statsFactory ->getCounter( 'parseroutputaccess_status' ) ->setLabel( 'status', 'norev' ) ->copyToStatsdAt( "ParserOutputAccess.Status.norev" ) ->increment(); return Status::newFatal( 'missing-revision', $revId ); } } if ( $options & self::OPT_FOR_ARTICLE_VIEW ) { $work = $this->newPoolWorkArticleView( $page, $parserOptions, $revision, $options ); /** @var Status $status */ $status = $work->execute(); } else { // XXX: we could try harder to reuse a cache lookup above to // provide the $previous argument here $status = $this->renderRevision( $page, $parserOptions, $revision, $options, null ); } $output = $status->getValue(); Assert::postcondition( $output || !$status->isOK(), 'Inconsistent status' ); if ( $output && !$isOld ) { $primaryCache = $this->getPrimaryCache( $parserOptions ); $classCacheKey = $primaryCache->makeParserOutputKey( $page, $parserOptions ); $this->localCache->setField( $classCacheKey, $page->getLatest(), $output ); } if ( $status->isGood() ) { $this->statsFactory->getCounter( 'parseroutputaccess_status' ) ->setLabel( 'status', 'good' ) ->copyToStatsdAt( 'ParserOutputAccess.Status.good' ) ->increment(); } elseif ( $status->isOK() ) { $this->statsFactory->getCounter( 'parseroutputaccess_status' ) ->setLabel( 'status', 'ok' ) ->copyToStatsdAt( 'ParserOutputAccess.Status.ok' ) ->increment(); } else { $this->statsFactory->getCounter( 'parseroutputaccess_status' ) ->setLabel( 'status', 'error' ) ->copyToStatsdAt( 'ParserOutputAccess.Status.error' ) ->increment(); } return $status; } /** * Render the given revision. * * This method will update the parser cache if appropriate, and will * trigger a links update if OPT_LINKS_UPDATE is set. * * This method does not perform access checks, and will not load content * from caches. The caller is assumed to have taken care of that. * * Where possible, pass in a $previousOutput, which will prevent an * unnecessary double-lookup in the cache. * * @see PoolWorkArticleView::renderRevision */ private function renderRevision( PageRecord $page, ParserOptions $parserOptions, RevisionRecord $revision, int $options, ?ParserOutput $previousOutput = null ): Status { $span = $this->startOperationSpan( __FUNCTION__, $page, $revision ); $this->statsFactory->getCounter( 'parseroutputaccess_poolwork' ) ->copyToStatsdAt( 'ParserOutputAccess.PoolWork.None' ) ->setLabel( 'cache', self::CACHE_NONE ) ->increment(); $useCache = $this->shouldUseCache( $page, $revision ); // T371713: Temporary statistics collection code to determine // feasibility of Parsoid selective update $sampleRate = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::ParsoidSelectiveUpdateSampleRate ); $doSample = ( $sampleRate && mt_rand( 1, $sampleRate ) === 1 ); if ( $previousOutput === null && ( $doSample || $parserOptions->getUseParsoid() ) ) { // If $useCache === self::CACHE_SECONDARY we could potentially // try to reuse the parse of $revision-1 from the secondary cache, // but it is likely those template transclusions are out of date. // Try to reuse the template transclusions from the most recent // parse, which are more likely to reflect the current template. if ( !( $options & self::OPT_NO_CHECK_CACHE ) ) { $previousOutput = $this->getPrimaryCache( $parserOptions )->getDirty( $page, $parserOptions ) ?: null; } } $renderedRev = $this->revisionRenderer->getRenderedRevision( $revision, $parserOptions, null, [ 'audience' => RevisionRecord::RAW, 'previous-output' => $previousOutput, ] ); $output = $renderedRev->getRevisionParserOutput(); if ( $doSample ) { $content = $revision->getContent( SlotRecord::MAIN ); $labels = [ 'source' => 'ParserOutputAccess', 'type' => $previousOutput === null ? 'full' : 'selective', 'reason' => $parserOptions->getRenderReason(), 'parser' => $parserOptions->getUseParsoid() ? 'parsoid' : 'legacy', 'opportunistic' => 'false', 'wiki' => WikiMap::getCurrentWikiId(), 'model' => $content ? $content->getModel() : 'unknown', ]; $this->statsFactory ->getCounter( 'ParserCache_selective_total' ) ->setLabels( $labels ) ->increment(); $this->statsFactory ->getCounter( 'ParserCache_selective_cpu_seconds' ) ->setLabels( $labels ) ->incrementBy( $output->getTimeProfile( 'cpu' ) ); } if ( !( $options & self::OPT_NO_UPDATE_CACHE ) && $output->isCacheable() ) { if ( $useCache === self::CACHE_PRIMARY ) { $primaryCache = $this->getPrimaryCache( $parserOptions ); $primaryCache->save( $output, $page, $parserOptions ); } elseif ( $useCache === self::CACHE_SECONDARY ) { $secondaryCache = $this->getSecondaryCache( $parserOptions ); $secondaryCache->save( $output, $revision, $parserOptions ); } } if ( $options & self::OPT_LINKS_UPDATE ) { $this->wikiPageFactory->newFromTitle( $page ) ->triggerOpportunisticLinksUpdate( $output ); } return Status::newGood( $output ); } /** * @param PageRecord $page * @param RevisionRecord|null $revision * @param int $options * * @return Status|null */ private function checkPreconditions( PageRecord $page, ?RevisionRecord $revision = null, int $options = 0 ): ?Status { if ( !$page->exists() ) { return Status::newFatal( 'nopagetext' ); } if ( !( $options & self::OPT_NO_UPDATE_CACHE ) && $revision && !$revision->getId() ) { throw new InvalidArgumentException( 'The revision does not have a known ID. Use OPT_NO_CACHE.' ); } if ( $revision && $revision->getPageId() !== $page->getId() ) { throw new InvalidArgumentException( 'The revision does not belong to the given page.' ); } if ( $revision && !( $options & self::OPT_NO_AUDIENCE_CHECK ) ) { // NOTE: If per-user checks are desired, the caller should perform them and // then set OPT_NO_AUDIENCE_CHECK if they passed. if ( !$revision->audienceCan( RevisionRecord::DELETED_TEXT, RevisionRecord::FOR_PUBLIC ) ) { return Status::newFatal( 'missing-revision-permission', $revision->getId(), $revision->getTimestamp(), $this->titleFormatter->getPrefixedDBkey( $page ) ); } } return null; } /** * @param PageRecord $page * @param ParserOptions $parserOptions * @param RevisionRecord $revision * @param int $options * * @return PoolCounterWork */ protected function newPoolWorkArticleView( PageRecord $page, ParserOptions $parserOptions, RevisionRecord $revision, int $options ): PoolCounterWork { $useCache = $this->shouldUseCache( $page, $revision ); switch ( $useCache ) { case self::CACHE_PRIMARY: $this->statsFactory->getCounter( 'parseroutputaccess_poolwork' ) ->setLabel( 'cache', self::CACHE_PRIMARY ) ->copyToStatsdAt( 'ParserOutputAccess.PoolWork.Current' ) ->increment(); $primaryCache = $this->getPrimaryCache( $parserOptions ); $parserCacheMetadata = $primaryCache->getMetadata( $page ); $cacheKey = $primaryCache->makeParserOutputKey( $page, $parserOptions, $parserCacheMetadata ? $parserCacheMetadata->getUsedOptions() : null ); $workKey = $cacheKey . ':revid:' . $revision->getId(); return new PoolWorkArticleViewCurrent( $workKey, $page, $revision, $parserOptions, $this->revisionRenderer, $primaryCache, $this->lbFactory, $this->chronologyProtector, $this->loggerSpi, $this->wikiPageFactory, !( $options & self::OPT_NO_UPDATE_CACHE ), (bool)( $options & self::OPT_LINKS_UPDATE ) ); case self::CACHE_SECONDARY: $this->statsFactory->getCounter( 'parseroutputaccess_poolwork' ) ->setLabel( 'cache', self::CACHE_SECONDARY ) ->copyToStatsdAt( 'ParserOutputAccess.PoolWork.Old' ) ->increment(); $secondaryCache = $this->getSecondaryCache( $parserOptions ); $workKey = $secondaryCache->makeParserOutputKey( $revision, $parserOptions ); return new PoolWorkArticleViewOld( $workKey, $secondaryCache, $revision, $parserOptions, $this->revisionRenderer, $this->loggerSpi ); default: $this->statsFactory->getCounter( 'parseroutputaccess_poolwork' ) ->setLabel( 'cache', self::CACHE_NONE ) ->copyToStatsdAt( 'ParserOutputAccess.PoolWork.Uncached' ) ->increment(); $secondaryCache = $this->getSecondaryCache( $parserOptions ); $workKey = $secondaryCache->makeParserOutputKeyOptionalRevId( $revision, $parserOptions ); return new PoolWorkArticleView( $workKey, $revision, $parserOptions, $this->revisionRenderer, $this->loggerSpi ); } // unreachable } private function getPrimaryCache( ParserOptions $pOpts ): ParserCache { if ( $pOpts->getUseParsoid() ) { return $this->parserCacheFactory->getParserCache( self::PARSOID_PCACHE_NAME ); } return $this->parserCacheFactory->getParserCache( ParserCacheFactory::DEFAULT_NAME ); } private function getSecondaryCache( ParserOptions $pOpts ): RevisionOutputCache { if ( $pOpts->getUseParsoid() ) { return $this->parserCacheFactory->getRevisionOutputCache( self::PARSOID_RCACHE_NAME ); } return $this->parserCacheFactory->getRevisionOutputCache( ParserCacheFactory::DEFAULT_RCACHE_NAME ); } private function startOperationSpan( string $opName, PageRecord $page, ?RevisionRecord $revision = null ): SpanInterface { $span = $this->tracer->createSpan( "ParserOutputAccess::$opName" ); if ( $span->getContext()->isSampled() ) { $span->setAttributes( [ 'org.wikimedia.parser.page' => $page->__toString(), 'org.wikimedia.parser.page.id' => $page->getId(), 'org.wikimedia.parser.page.wiki' => $page->getWikiId(), ] ); if ( $revision ) { $span->setAttributes( [ 'org.wikimedia.parser.revision.id' => $revision->getId(), 'org.wikimedia.parser.revision.parent_id' => $revision->getParentId(), ] ); } } $span->start()->activate(); return $span; } }