diff options
author | C. Scott Ananian <cscott@cscott.net> | 2024-09-13 18:19:39 -0400 |
---|---|---|
committer | C. Scott Ananian <cscott@cscott.net> | 2024-09-13 19:29:18 -0400 |
commit | 92ca7f68a41132f99b94189a771d7bab254a4e90 (patch) | |
tree | c8267bcc1aac429eee397ed59a13248d7f758c63 /includes/parser | |
parent | ec9488fb27eda760af35002e7642405ea0baa77a (diff) | |
download | mediawikicore-92ca7f68a41132f99b94189a771d7bab254a4e90.tar.gz mediawikicore-92ca7f68a41132f99b94189a771d7bab254a4e90.zip |
Randomly sample statistics for Parsoid Selective Update
Controlled by $wgParsoidSelectiveUpdateSampleRate (which defaults to off)
randomly sample 1 in N parses to collect statistics to inform the design
of Parsoid selective update:
* For both legacy parses and Parsoid, count how many times a previous
parse is in the cache when a new parse is requested. This needs to
sample the legacy parser as well as Parsoid because Parsoid is not
yet invoked from the RefreshLinksJob. We also count the relative
number of parses from the different
RevisionRenderer::getRenderedRevision() call sites to determine
which pathways might account for the most opportunities for
optimized selective update.
* For sampled parses using the Parsoid parser where a previous parse
result is available, also fetch the previous wikitext source from the
database.
Bug: T371713
Change-Id: I208aeac1b315a96bdb9669427cd03de461b914b4
Diffstat (limited to 'includes/parser')
-rw-r--r-- | includes/parser/Parsoid/ParsoidParser.php | 42 |
1 files changed, 38 insertions, 4 deletions
diff --git a/includes/parser/Parsoid/ParsoidParser.php b/includes/parser/Parsoid/ParsoidParser.php index 11e4978b1941..013e9083339f 100644 --- a/includes/parser/Parsoid/ParsoidParser.php +++ b/includes/parser/Parsoid/ParsoidParser.php @@ -5,6 +5,7 @@ namespace MediaWiki\Parser\Parsoid; use MediaWiki\Content\TextContent; use MediaWiki\Content\WikitextContent; use MediaWiki\Languages\LanguageConverterFactory; +use MediaWiki\MainConfigNames; use MediaWiki\MediaWikiServices; use MediaWiki\Page\PageReference; use MediaWiki\Parser\ParserOutput; @@ -114,6 +115,30 @@ class ParsoidParser /* eventually this will extend \Parser */ { $htmlVariantLanguage = $langCode; } } + $oldPageConfig = null; + $oldPageBundle = null; + + // T371713: Temporary statistics collection code to determine + // feasibility of Parsoid selective update + $sampleRate = MediaWikiServices::getInstance()->getMainConfig()->get( + MainConfigNames::ParsoidSelectiveUpdateSampleRate + ); + $doSample = ( $sampleRate && mt_rand( 1, $sampleRate ) === 1 ); + if ( $doSample && $previousOutput !== null && $previousOutput->getCacheRevisionId() ) { + // Allow fetching the old wikitext corresponding to the + // $previousOutput + $oldPageConfig = $this->pageConfigFactory->create( + Title::newFromLinkTarget( $pageConfig->getLinkTarget() ), + $options->getUserIdentity(), + $previousOutput->getCacheRevisionId(), + null, + $previousOutput->getLanguage(), + ); + $oldPageBundle = + PageBundleParserOutputConverter::pageBundleFromParserOutput( + $previousOutput + ); + } $defaultOptions = [ 'pageBundle' => true, @@ -123,7 +148,9 @@ class ParsoidParser /* eventually this will extend \Parser */ { 'htmlVariantLanguage' => $htmlVariantLanguage, 'offsetType' => 'byte', 'outputContentVersion' => Parsoid::defaultHTMLVersion(), - 'previousOutput' => $previousOutput, + 'previousOutput' => $oldPageBundle, + 'previousInput' => $oldPageConfig, + 'sample_stats' => $doSample, ]; $parserOutput->resetParseStartTime(); @@ -145,7 +172,7 @@ class ParsoidParser /* eventually this will extend \Parser */ { Title::newFromLinkTarget( $pageConfig->getLinkTarget() )->getPrefixedDBkey() ); - // Register a watcher again because the $parserOuptut arg + // Register a watcher again because the $parserOutput arg // and $parserOutput return value above are different objects! $options->registerWatcher( [ $parserOutput, 'recordOption' ] ); @@ -154,12 +181,19 @@ class ParsoidParser /* eventually this will extend \Parser */ { $parserOutput->recordTimeProfile(); $this->makeLimitReport( $options, $parserOutput ); - // Collect statistics on parsing time -vs- presence of $previousOutput - MediaWikiServices::getInstance()->getStatsFactory() + // T371713: Collect statistics on parsing time -vs- presence of + // $previousOutput + $stats = MediaWikiServices::getInstance()->getStatsFactory(); + $stats ->getCounter( 'Parsoid_parse_time_total' ) ->setLabel( 'type', $previousOutput === null ? 'full' : 'selective' ) ->setLabel( 'reason', $options->getRenderReason() ?: 'unknown' ) ->incrementBy( $parserOutput->getTimeProfile( 'cpu' ) ); + $stats + ->getCounter( 'Parsoid_parse_count_total' ) + ->setLabel( 'type', $previousOutput === null ? 'full' : 'selective' ) + ->setLabel( 'reason', $options->getRenderReason() ?: 'unknown' ) + ->increment(); // Add Parsoid skinning module $parserOutput->addModuleStyles( [ 'mediawiki.skinning.content.parsoid' ] ); |