aboutsummaryrefslogtreecommitdiffstats
path: root/includes/parser
diff options
context:
space:
mode:
authorC. Scott Ananian <cscott@cscott.net>2024-09-13 18:19:39 -0400
committerC. Scott Ananian <cscott@cscott.net>2024-09-13 19:29:18 -0400
commit92ca7f68a41132f99b94189a771d7bab254a4e90 (patch)
treec8267bcc1aac429eee397ed59a13248d7f758c63 /includes/parser
parentec9488fb27eda760af35002e7642405ea0baa77a (diff)
downloadmediawikicore-92ca7f68a41132f99b94189a771d7bab254a4e90.tar.gz
mediawikicore-92ca7f68a41132f99b94189a771d7bab254a4e90.zip
Randomly sample statistics for Parsoid Selective Update
Controlled by $wgParsoidSelectiveUpdateSampleRate (which defaults to off) randomly sample 1 in N parses to collect statistics to inform the design of Parsoid selective update: * For both legacy parses and Parsoid, count how many times a previous parse is in the cache when a new parse is requested. This needs to sample the legacy parser as well as Parsoid because Parsoid is not yet invoked from the RefreshLinksJob. We also count the relative number of parses from the different RevisionRenderer::getRenderedRevision() call sites to determine which pathways might account for the most opportunities for optimized selective update. * For sampled parses using the Parsoid parser where a previous parse result is available, also fetch the previous wikitext source from the database. Bug: T371713 Change-Id: I208aeac1b315a96bdb9669427cd03de461b914b4
Diffstat (limited to 'includes/parser')
-rw-r--r--includes/parser/Parsoid/ParsoidParser.php42
1 files changed, 38 insertions, 4 deletions
diff --git a/includes/parser/Parsoid/ParsoidParser.php b/includes/parser/Parsoid/ParsoidParser.php
index 11e4978b1941..013e9083339f 100644
--- a/includes/parser/Parsoid/ParsoidParser.php
+++ b/includes/parser/Parsoid/ParsoidParser.php
@@ -5,6 +5,7 @@ namespace MediaWiki\Parser\Parsoid;
use MediaWiki\Content\TextContent;
use MediaWiki\Content\WikitextContent;
use MediaWiki\Languages\LanguageConverterFactory;
+use MediaWiki\MainConfigNames;
use MediaWiki\MediaWikiServices;
use MediaWiki\Page\PageReference;
use MediaWiki\Parser\ParserOutput;
@@ -114,6 +115,30 @@ class ParsoidParser /* eventually this will extend \Parser */ {
$htmlVariantLanguage = $langCode;
}
}
+ $oldPageConfig = null;
+ $oldPageBundle = null;
+
+ // T371713: Temporary statistics collection code to determine
+ // feasibility of Parsoid selective update
+ $sampleRate = MediaWikiServices::getInstance()->getMainConfig()->get(
+ MainConfigNames::ParsoidSelectiveUpdateSampleRate
+ );
+ $doSample = ( $sampleRate && mt_rand( 1, $sampleRate ) === 1 );
+ if ( $doSample && $previousOutput !== null && $previousOutput->getCacheRevisionId() ) {
+ // Allow fetching the old wikitext corresponding to the
+ // $previousOutput
+ $oldPageConfig = $this->pageConfigFactory->create(
+ Title::newFromLinkTarget( $pageConfig->getLinkTarget() ),
+ $options->getUserIdentity(),
+ $previousOutput->getCacheRevisionId(),
+ null,
+ $previousOutput->getLanguage(),
+ );
+ $oldPageBundle =
+ PageBundleParserOutputConverter::pageBundleFromParserOutput(
+ $previousOutput
+ );
+ }
$defaultOptions = [
'pageBundle' => true,
@@ -123,7 +148,9 @@ class ParsoidParser /* eventually this will extend \Parser */ {
'htmlVariantLanguage' => $htmlVariantLanguage,
'offsetType' => 'byte',
'outputContentVersion' => Parsoid::defaultHTMLVersion(),
- 'previousOutput' => $previousOutput,
+ 'previousOutput' => $oldPageBundle,
+ 'previousInput' => $oldPageConfig,
+ 'sample_stats' => $doSample,
];
$parserOutput->resetParseStartTime();
@@ -145,7 +172,7 @@ class ParsoidParser /* eventually this will extend \Parser */ {
Title::newFromLinkTarget( $pageConfig->getLinkTarget() )->getPrefixedDBkey()
);
- // Register a watcher again because the $parserOuptut arg
+ // Register a watcher again because the $parserOutput arg
// and $parserOutput return value above are different objects!
$options->registerWatcher( [ $parserOutput, 'recordOption' ] );
@@ -154,12 +181,19 @@ class ParsoidParser /* eventually this will extend \Parser */ {
$parserOutput->recordTimeProfile();
$this->makeLimitReport( $options, $parserOutput );
- // Collect statistics on parsing time -vs- presence of $previousOutput
- MediaWikiServices::getInstance()->getStatsFactory()
+ // T371713: Collect statistics on parsing time -vs- presence of
+ // $previousOutput
+ $stats = MediaWikiServices::getInstance()->getStatsFactory();
+ $stats
->getCounter( 'Parsoid_parse_time_total' )
->setLabel( 'type', $previousOutput === null ? 'full' : 'selective' )
->setLabel( 'reason', $options->getRenderReason() ?: 'unknown' )
->incrementBy( $parserOutput->getTimeProfile( 'cpu' ) );
+ $stats
+ ->getCounter( 'Parsoid_parse_count_total' )
+ ->setLabel( 'type', $previousOutput === null ? 'full' : 'selective' )
+ ->setLabel( 'reason', $options->getRenderReason() ?: 'unknown' )
+ ->increment();
// Add Parsoid skinning module
$parserOutput->addModuleStyles( [ 'mediawiki.skinning.content.parsoid' ] );