addDescription( 'Populate parser cache with parsoid output. By default, script attempt to run' . 'for supported content model pages (in a specified batch if provided)' ); $this->addOption( 'force', 'Re-parse pages even if the cached entry seems up to date', false, false ); $this->addOption( 'start-from', 'Start from this page ID', false, true ); $this->addOption( 'namespace', 'Filter pages in this namespace', false, true ); $this->setBatchSize( 100 ); } private function getPageLookup(): PageLookup { $this->pageLookup = $this->getServiceContainer()->getPageStore(); return $this->pageLookup; } private function getRevisionLookup(): RevisionLookup { $this->revisionLookup = $this->getServiceContainer()->getRevisionLookup(); return $this->revisionLookup; } private function getParserOutputAccess(): ParserOutputAccess { $this->parserOutputAccess = $this->getServiceContainer()->getParserOutputAccess(); return $this->parserOutputAccess; } private function getParsoidSiteConfig(): ParsoidSiteConfig { $this->parsoidSiteConfig = $this->getServiceContainer()->getParsoidSiteConfig(); return $this->parsoidSiteConfig; } private function getQueryBuilder(): SelectQueryBuilder { $dbr = $this->getReplicaDB(); return $dbr->newSelectQueryBuilder() ->select( [ 'page_id' ] ) ->from( 'page' ) ->caller( __METHOD__ ) ->orderBy( 'page_id', SelectQueryBuilder::SORT_ASC ); } private function parse( PageRecord $page, RevisionRecord $revision ): Status { $popts = ParserOptions::newFromAnon(); $popts->setUseParsoid(); try { return $this->getParserOutputAccess()->getParserOutput( $page, $popts, $revision, $this->forceParse ); } catch ( ClientError $e ) { return Status::newFatal( 'parsoid-client-error', $e->getMessage() ); } catch ( ResourceLimitExceededException $e ) { return Status::newFatal( 'parsoid-resource-limit-exceeded', $e->getMessage() ); } } /* * NamespaceInfo::getCanonicalIndex() requires the namespace to be in lowercase, * so let's do some normalization and return its canonical index. * * @param string $namespace The namespace string from the command line * @return int The canonical index of the namespace */ private function normalizeNamespace( string $namespace ): int { return $this->getServiceContainer()->getNamespaceInfo() ->getCanonicalIndex( strtolower( $namespace ) ); } /** * Populate parser cache with parsoid output. * * @return bool */ public function execute() { $force = $this->getOption( 'force' ); $startFrom = $this->getOption( 'start-from' ); // We need the namespace index instead of the name to perform the query // on, because that's what the page table stores (in the page_namespace field). $namespaceIndex = null; $namespace = $this->getOption( 'namespace' ); if ( $namespace !== null ) { $namespaceIndex = $this->normalizeNamespace( $namespace ); } if ( $force !== null ) { // If --force is supplied, for a parse for supported pages or supported // pages in the specified batch. $this->forceParse = ParserOutputAccess::OPT_FORCE_PARSE; } $startFrom = (int)$startFrom; $this->output( "\nWarming parsoid parser cache with Parsoid output...\n\n" ); while ( true ) { $query = $this->getQueryBuilder(); if ( $namespaceIndex !== null ) { $query = $query->where( [ 'page_namespace' => $namespaceIndex ] ); } $query = $query->where( $this->getReplicaDB()->expr( 'page_id', '>=', $startFrom ) ) ->limit( $this->getBatchSize() ); $result = $query->fetchResultSet(); if ( !$result->numRows() ) { break; } $currentBatch = $startFrom + ( $this->getBatchSize() - 1 ); $this->output( "\n\nBatch: $startFrom - $currentBatch\n----\n" ); // Look through pages by pageId and populate the parserCache foreach ( $result as $row ) { $page = $this->getPageLookup()->getPageById( $row->page_id ); $startFrom = ( (int)$row->page_id + 1 ); if ( $page === null ) { $this->output( "\n[Skipped] Page ID: $row->page_id not found.\n" ); continue; } $latestRevision = $page->getLatest(); $revision = $this->getRevisionLookup()->getRevisionById( $latestRevision ); $mainSlot = $revision->getSlot( SlotRecord::MAIN ); // POA will write a dummy output to PC, but we don't want that here. Just skip! if ( !$this->getParsoidSiteConfig()->supportsContentModel( $mainSlot->getModel() ) ) { $this->output( '[Skipped] Content model "' . $mainSlot->getModel() . "\" not supported for page ID: $row->page_id.\n" ); continue; } $status = $this->parse( $page, $revision ); if ( !$status->isOK() ) { $this->output( __METHOD__ . ": Error parsing page ID: $row->page_id or writing to parser cache\n" ); continue; } $this->output( "[Done] Page ID: $row->page_id ✔️\n" ); } $this->waitForReplication(); } $this->output( "\nDone pre-warming parsoid parser cache...\n" ); return true; } } // @codeCoverageIgnoreStart $maintClass = PrewarmParsoidParserCache::class; require_once RUN_MAINTENANCE_IF_MAIN; // @codeCoverageIgnoreEnd