addDescription( 'Search the source text of pages for lines matching ' . 'a given regex, and print the lines.' ); $this->addOption( 'prefix', 'Title prefix. Can be specified more than once. ' . 'Use e.g. --prefix=Talk: to search an entire namespace.', false, true, false, true ); $this->addOption( 'show-wiki', 'Add the wiki ID to the output' ); $this->addOption( 'pages-with-matches', 'Suppress normal output; instead print the title of each page ' . 'from which output would normally have been printed.', false, false, 'l' ); $this->addArg( 'regex', 'The regex to search for' ); } private function init() { $services = $this->getServiceContainer(); $this->contLang = $services->getContentLanguage(); $this->wikiPageFactory = $services->getWikiPageFactory(); } public function execute() { $this->init(); $showWiki = $this->getOption( 'show-wiki' ); $wikiId = WikiMap::getCurrentWikiId(); $prefix = $this->getOption( 'prefix' ); $regex = $this->getArg( 0 ); $titleOnly = $this->hasOption( 'pages-with-matches' ); if ( ( $regex[0] ?? '' ) === '/' ) { $delimRegex = $regex; } else { $delimRegex = '{' . $regex . '}'; } foreach ( $this->findPages( $prefix ) as $page ) { $content = $page->getContent( RevisionRecord::RAW ); $titleText = $page->getTitle()->getPrefixedDBkey(); if ( !$content ) { $this->error( "Page has no content: $titleText" ); continue; } if ( !$content instanceof TextContent ) { $this->error( "Page has a non-text content model: $titleText" ); continue; } $text = $content->getText(); if ( $titleOnly ) { if ( preg_match( $delimRegex, $text ) ) { if ( $showWiki ) { echo "$wikiId\t$titleText\n"; } else { echo "$titleText\n"; } } } else { foreach ( StringUtils::explode( "\n", $text ) as $lineNum => $line ) { $lineNum++; if ( preg_match( $delimRegex, $line ) ) { if ( $showWiki ) { echo "$wikiId\t$titleText:$lineNum:$line\n"; } else { echo "$titleText:$lineNum:$line\n"; } } } } } } public function findPages( $prefixes = null ) { $dbr = $this->getReplicaDB(); $orConds = []; if ( $prefixes !== null ) { foreach ( $prefixes as $prefix ) { $colonPos = strpos( $prefix, ':' ); if ( $colonPos !== false ) { $ns = $this->contLang->getNsIndex( substr( $prefix, 0, $colonPos ) ); $prefixDBkey = substr( $prefix, $colonPos + 1 ); } else { $ns = NS_MAIN; $prefixDBkey = $prefix; } $prefixExpr = $dbr->expr( 'page_namespace', '=', $ns ); if ( $prefixDBkey !== '' ) { $prefixExpr = $prefixExpr->and( 'page_title', IExpression::LIKE, new LikeValue( $prefixDBkey, $dbr->anyString() ) ); } $orConds[] = $prefixExpr; } } $lastId = 0; do { $res = $dbr->newSelectQueryBuilder() ->queryInfo( WikiPage::getQueryInfo() ) ->where( $orConds ? $dbr->orExpr( $orConds ) : [] ) ->andWhere( $dbr->expr( 'page_id', '>', $lastId ) ) ->limit( 200 ) ->caller( __METHOD__ ) ->fetchResultSet(); foreach ( $res as $row ) { $title = Title::newFromRow( $row ); yield $this->wikiPageFactory->newFromTitle( $title ); $lastId = $row->page_id; } } while ( $res->numRows() ); } } // @codeCoverageIgnoreStart $maintClass = GrepPages::class; require_once RUN_MAINTENANCE_IF_MAIN; // @codeCoverageIgnoreEnd