diff options
author | jenkins-bot <jenkins-bot@gerrit.wikimedia.org> | 2020-11-17 17:31:04 +0000 |
---|---|---|
committer | Gerrit Code Review <gerrit@wikimedia.org> | 2020-11-17 17:31:04 +0000 |
commit | 0dd35aa6a88f69f29b129f2a1f0a4ae3e7c12e32 (patch) | |
tree | a0a4f2ddfd4250bab21a3bebb9bd8899c792f0dd /includes/Rest | |
parent | 815ee5bf3731b5c3cea9b70f76322741cdbf0ad5 (diff) | |
parent | d4789dc29aa59e9a615755cf32736f36e9c82e0c (diff) | |
download | mediawikicore-0dd35aa6a88f69f29b129f2a1f0a4ae3e7c12e32.tar.gz mediawikicore-0dd35aa6a88f69f29b129f2a1f0a4ae3e7c12e32.zip |
Merge "Revert "Re-apply "Use parsoid directly in /page/html handler"""
Diffstat (limited to 'includes/Rest')
-rw-r--r-- | includes/Rest/Handler/LatestPageContentHandler.php | 2 | ||||
-rw-r--r-- | includes/Rest/Handler/PageHTMLHandler.php | 264 | ||||
-rw-r--r-- | includes/Rest/coreRoutes.json | 9 | ||||
-rw-r--r-- | includes/Rest/i18n/en.json | 1 | ||||
-rw-r--r-- | includes/Rest/i18n/qqq.json | 3 |
5 files changed, 106 insertions, 173 deletions
diff --git a/includes/Rest/Handler/LatestPageContentHandler.php b/includes/Rest/Handler/LatestPageContentHandler.php index d0d4add787fc..491386478609 100644 --- a/includes/Rest/Handler/LatestPageContentHandler.php +++ b/includes/Rest/Handler/LatestPageContentHandler.php @@ -82,7 +82,7 @@ abstract class LatestPageContentHandler extends SimpleHandler { if ( $this->latestRevision === null ) { $title = $this->getTitle(); if ( $title && $title->getArticleID() ) { - $this->latestRevision = $this->revisionLookup->getRevisionByTitle( $title ); + $this->latestRevision = $this->revisionLookup->getKnownCurrentRevision( $title ); } else { $this->latestRevision = false; } diff --git a/includes/Rest/Handler/PageHTMLHandler.php b/includes/Rest/Handler/PageHTMLHandler.php index aa510d6e7630..7f24c22ee02e 100644 --- a/includes/Rest/Handler/PageHTMLHandler.php +++ b/includes/Rest/Handler/PageHTMLHandler.php @@ -3,27 +3,23 @@ namespace MediaWiki\Rest\Handler; use Config; +use ConfigException; +use Exception; +use GuzzleHttp\Psr7\Uri; use LogicException; use MediaWiki\Linker\LinkTarget; -use MediaWiki\MediaWikiServices; -use MediaWiki\Page\WikiPageFactory; -use MediaWiki\Parser\ParserCacheFactory; use MediaWiki\Permissions\PermissionManager; use MediaWiki\Rest\LocalizedHttpException; use MediaWiki\Rest\Response; -use MediaWiki\Rest\ResponseInterface; use MediaWiki\Rest\StringStream; use MediaWiki\Revision\RevisionLookup; -use ParserCache; -use ParserOptions; -use ParserOutput; +use RestbaseVirtualRESTService; use TitleFactory; use TitleFormatter; +use UIDGenerator; +use VirtualRESTServiceClient; +use WebRequest; use Wikimedia\Message\MessageValue; -use Wikimedia\Parsoid\Config\PageConfig; -use Wikimedia\Parsoid\Core\ClientError; -use Wikimedia\Parsoid\Core\ResourceLimitExceededException; -use Wikimedia\Parsoid\Parsoid; /** * A handler that returns Parsoid HTML for the following routes: @@ -39,23 +35,27 @@ use Wikimedia\Parsoid\Parsoid; class PageHTMLHandler extends LatestPageContentHandler { private const MAX_AGE_200 = 5; - /** @var ParserCache */ - private $parserCache; + /** @var VirtualRESTServiceClient */ + private $restClient; - /** @var WikiPageFactory */ - private $wikiPageFactory; - - /** @var Parsoid|null */ - private $parsoid; + /** @var array */ + private $htmlResponse; + /** + * @param Config $config + * @param PermissionManager $permissionManager + * @param RevisionLookup $revisionLookup + * @param TitleFormatter $titleFormatter + * @param TitleFactory $titleFactory + * @param VirtualRESTServiceClient $virtualRESTServiceClient + */ public function __construct( Config $config, PermissionManager $permissionManager, RevisionLookup $revisionLookup, TitleFormatter $titleFormatter, TitleFactory $titleFactory, - ParserCacheFactory $parserCacheFactory, - WikiPageFactory $wikiPageFactory + VirtualRESTServiceClient $virtualRESTServiceClient ) { parent::__construct( $config, @@ -64,33 +64,72 @@ class PageHTMLHandler extends LatestPageContentHandler { $titleFormatter, $titleFactory ); - $this->parserCache = $parserCacheFactory->getInstance( 'parsoid' ); - $this->wikiPageFactory = $wikiPageFactory; + + $this->restClient = $virtualRESTServiceClient; } /** * @param LinkTarget $title - * @return string + * @return array + * @throws LocalizedHttpException */ - private function constructHtmlUrl( LinkTarget $title ): string { - return $this->getRouter()->getRouteUrl( '/v1/page/{title}/html', [ 'title' => $title ] ); + private function fetchHtmlFromRESTBase( LinkTarget $title ): array { + if ( $this->htmlResponse !== null ) { + return $this->htmlResponse; + } + + list( , $service ) = $this->restClient->getMountAndService( '/restbase/ ' ); + if ( !$service ) { + try { + $restConfig = $this->config->get( 'VirtualRestConfig' ); + if ( !isset( $restConfig['modules']['restbase'] ) ) { + throw new ConfigException( + __CLASS__ . " requires restbase module configured for VirtualRestConfig" + ); + } + $this->restClient->mount( '/restbase/', + new RestbaseVirtualRESTService( $restConfig['modules']['restbase'] ) ); + } catch ( Exception $e ) { + // This would usually be config exception, but let's fail on any exception + throw new LocalizedHttpException( MessageValue::new( 'rest-html-backend-error' ), 500 ); + } + } + + $this->htmlResponse = $this->restClient->run( [ + 'method' => 'GET', + 'url' => '/restbase/local/v1/page/html/' . + urlencode( $this->titleFormatter->getPrefixedDBkey( $title ) ) . + '?redirect=false' + ] ); + return $this->htmlResponse; } /** - * Sets the 'Cache-Control' header no more then provided $expiry. - * @param ResponseInterface $response - * @param int|null $expiry + * @param LinkTarget $title + * @return array + * @throws LocalizedHttpException */ - private function setCacheControl( ResponseInterface $response, int $expiry = null ) { - if ( $expiry === null ) { - $maxAge = self::MAX_AGE_200; - } else { - $maxAge = min( self::MAX_AGE_200, $expiry ); + private function fetch200HtmlFromRESTBase( LinkTarget $title ): array { + $restbaseResp = $this->fetchHtmlFromRESTBase( $title ); + if ( $restbaseResp['code'] !== 200 ) { + throw new LocalizedHttpException( + MessageValue::new( 'rest-html-backend-error' ), + $restbaseResp['code'] + ); } - $response->setHeader( - 'Cache-Control', - 'max-age=' . $maxAge - ); + return $restbaseResp; + } + + /** + * @return string + */ + private function constructHtmlUrl(): string { + $wr = new WebRequest(); + $urlParts = wfParseUrl( $wr->getFullRequestURL() ); + $currentPathParts = explode( '/', $urlParts['path'] ); + $currentPathParts[ count( $currentPathParts ) - 1 ] = 'html'; + $urlParts['path'] = implode( '/', $currentPathParts ); + return Uri::fromParts( $urlParts ); } /** @@ -126,29 +165,26 @@ class PageHTMLHandler extends LatestPageContentHandler { switch ( $htmlType ) { case 'bare': $body = $this->constructMetadata( $titleObj, $revision ); - $body['html_url'] = $this->constructHtmlUrl( $titleObj ); + $body['html_url'] = $this->constructHtmlUrl(); $response = $this->getResponseFactory()->createJson( $body ); - $this->setCacheControl( $response ); break; case 'html': - $parserOutput = $this->getHtmlFromCache( $titleObj ); + $restbaseResp = $this->fetch200HtmlFromRESTBase( $titleObj ); $response = $this->getResponseFactory()->create(); - // TODO: need to respect content-type returned by Parsoid. - $response->setHeader( 'Content-Type', 'text/html' ); - $this->setCacheControl( $response, $parserOutput->getCacheExpiry() ); - $response->setBody( new StringStream( $parserOutput->getText() ) ); + $response->setHeader( 'Content-Type', $restbaseResp[ 'headers' ][ 'content-type' ] ); + $response->setBody( new StringStream( $restbaseResp[ 'body' ] ) ); break; case 'with_html': - $parserOutput = $this->getHtmlFromCache( $titleObj ); + $restbaseResp = $this->fetch200HtmlFromRESTBase( $titleObj ); $body = $this->constructMetadata( $titleObj, $revision ); - $body['html'] = $parserOutput->getText(); + $body['html'] = $restbaseResp['body']; $response = $this->getResponseFactory()->createJson( $body ); - $this->setCacheControl( $response, $parserOutput->getCacheExpiry() ); break; default: throw new LogicException( "Unknown HTML type $htmlType" ); } + $response->setHeader( 'Cache-Control', 'max-age=' . self::MAX_AGE_200 ); return $response; } @@ -157,6 +193,7 @@ class PageHTMLHandler extends LatestPageContentHandler { * if the latest revision of a page has been made private, un-readable for another reason, * or a newer revision exists. * @return string|null + * @throws LocalizedHttpException */ protected function getETag(): ?string { $title = $this->getTitle(); @@ -166,141 +203,42 @@ class PageHTMLHandler extends LatestPageContentHandler { if ( $this->getHtmlType() === 'bare' ) { return '"' . $this->getLatestRevision()->getId() . '"'; } - // While we are not differentiating the output by parser options and - // only provide anon parses, cache time or page touched provides a good - // reference for etag. Once we start doing non-anon parses, this needs - // to start incorporating current users ParserOptions. - return '"' . $this->getLastModified() . '"'; + + $restbaseRes = $this->fetch200HtmlFromRESTBase( $title ); + return $restbaseRes['headers']['etag'] ?? null; } /** * @return string|null + * @throws LocalizedHttpException */ protected function getLastModified(): ?string { $title = $this->getTitle(); if ( !$title || !$title->getArticleID() || !$this->isAccessible( $title ) ) { return null; } + if ( $this->getHtmlType() === 'bare' ) { return $this->getLatestRevision()->getTimestamp(); } - $wikiPage = $this->wikiPageFactory->newFromTitle( $title ); - // The cache time of the metadata belongs to the ParserOutput - // variant cached last. While we are not differentiating the - // parser options, it's fine. Once we start supporting non-anon - // parses, we would need to fetch the actual ParserOutput to find - // out it's cache time. - $cacheMetadata = $this->parserCache->getMetadata( - $wikiPage, - ParserCache::USE_CURRENT_ONLY - ); - if ( $cacheMetadata ) { - return $cacheMetadata->getCacheTime(); - } else { - return $wikiPage->getTouched(); - } - } - - private function getHtmlType(): string { - return $this->getConfig()['format']; - } - /** - * Assert that Parsoid services are available. - * TODO: once parsoid glue services are in core, - * this will become a no-op and will be removed. - * See T265518 - * @throws LocalizedHttpException - */ - private function assertParsoidInstalled() { - $services = MediaWikiServices::getInstance(); - if ( $services->has( 'ParsoidSiteConfig' ) && - $services->has( 'ParsoidPageConfigFactory' ) && - $services->has( 'ParsoidDataAccess' ) ) { - return; + $restbaseRes = $this->fetch200HtmlFromRESTBase( $title ); + $restbaseEtag = $restbaseRes['headers']['etag'] ?? null; + if ( !$restbaseEtag ) { + return null; } - throw new LocalizedHttpException( - MessageValue::new( 'rest-html-backend-error' ), 501 ); - } - /** - * @return Parsoid - * @throws LocalizedHttpException - */ - protected function createParsoid(): Parsoid { - $this->assertParsoidInstalled(); - if ( $this->parsoid === null ) { - // TODO: once parsoid glue services are in core, - // this will need to use normal DI. - // See T265518 - $services = MediaWikiServices::getInstance(); - $this->parsoid = new Parsoid( - $services->get( 'ParsoidSiteConfig' ), - $services->get( 'ParsoidDataAccess' ) - ); + $etagComponents = []; + if ( !preg_match( '/^(?:W\/)?"?[^"\/]+(?:\/([^"\/]+))"?$/', + $restbaseEtag, $etagComponents ) + ) { + return null; } - return $this->parsoid; - } - - /** - * @param LinkTarget $linkTarget - * @return PageConfig - * @throws LocalizedHttpException - */ - private function createPageConfig( - LinkTarget $linkTarget - ): PageConfig { - $this->assertParsoidInstalled(); - // Currently everything is parsed as anon since Parsoid - // can't report the used options. - // Already checked that title/revision exist and accessible. - return MediaWikiServices::getInstance() - ->get( 'ParsoidPageConfigFactory' ) - ->create( $linkTarget ); - } - /** - * @param LinkTarget $title - * @return ParserOutput a tuple with html and content-type - * @throws LocalizedHttpException - */ - private function getHtmlFromCache( LinkTarget $title ): ParserOutput { - $wikiPage = $this->wikiPageFactory->newFromLinkTarget( $title ); - $parserOutput = $this->parserCache->get( $wikiPage, ParserOptions::newFromAnon() ); - if ( $parserOutput ) { - return $parserOutput; - } - $fakeParserOutput = $this->parse( $title ); - $this->parserCache->save( $fakeParserOutput, $wikiPage, ParserOptions::newFromAnon() ); - return $fakeParserOutput; + return UIDGenerator::getTimestampFromUUIDv1( $etagComponents[1] ) ?: null; } - /** - * @param LinkTarget $title - * @return ParserOutput - * @throws LocalizedHttpException - */ - private function parse( LinkTarget $title ): ParserOutput { - $parsoid = $this->createParsoid(); - $pageConfig = $this->createPageConfig( $title ); - try { - $pageBundle = $parsoid->wikitext2html( $pageConfig, [ - 'discardDataParsoid' => true, - 'pageBundle' => true, - ] ); - return new ParserOutput( $pageBundle->html ); - } catch ( ClientError $e ) { - throw new LocalizedHttpException( - MessageValue::new( 'rest-html-backend-error' ), - 400, - [ 'reason' => $e->getMessage() ] - ); - } catch ( ResourceLimitExceededException $e ) { - throw new LocalizedHttpException( - MessageValue::new( 'rest-resource-limit-exceeded' ), - 413, - [ 'reason' => $e->getMessage() ] - ); - } + private function getHtmlType(): string { + return $this->getConfig()['format']; } } diff --git a/includes/Rest/coreRoutes.json b/includes/Rest/coreRoutes.json index f54a50a61c80..108bf99c1548 100644 --- a/includes/Rest/coreRoutes.json +++ b/includes/Rest/coreRoutes.json @@ -90,8 +90,7 @@ "RevisionLookup", "TitleFormatter", "TitleFactory", - "ParserCacheFactory", - "WikiPageFactory" + "VirtualRESTServiceClient" ], "format": "bare" }, @@ -104,8 +103,7 @@ "RevisionLookup", "TitleFormatter", "TitleFactory", - "ParserCacheFactory", - "WikiPageFactory" + "VirtualRESTServiceClient" ], "format": "html" }, @@ -118,8 +116,7 @@ "RevisionLookup", "TitleFormatter", "TitleFactory", - "ParserCacheFactory", - "WikiPageFactory" + "VirtualRESTServiceClient" ], "format": "with_html" }, diff --git a/includes/Rest/i18n/en.json b/includes/Rest/i18n/en.json index c5f8e0e86248..ec986880217c 100644 --- a/includes/Rest/i18n/en.json +++ b/includes/Rest/i18n/en.json @@ -16,7 +16,6 @@ "rest-permission-denied-anon": "Not accessible by anonymous user", "rest-permission-denied-title": "The user does not have rights to read title ($1)", "rest-permission-denied-revision": "User doesn't have access to the requested revision ($1).", - "rest-resource-limit-exceeded": "Resources limits exceeded processing the request.", "rest-pagehistory-incompatible-params": "Parameters \"older_than\" and \"newer_than\" cannot both be specified", "rest-pagehistory-param-range-error": "Revision id must be greater than 0", "rest-pagehistory-timestamp-error": "Unable to retrieve timestamp for the specified revision ($1)", diff --git a/includes/Rest/i18n/qqq.json b/includes/Rest/i18n/qqq.json index 32af0cf076eb..e97dee5277c5 100644 --- a/includes/Rest/i18n/qqq.json +++ b/includes/Rest/i18n/qqq.json @@ -17,7 +17,6 @@ "rest-permission-denied-anon": "Error message for REST API debugging, shown when the user is anonymous and therefore is denied permission to a resource", "rest-permission-denied-title": "Error message for REST API debugging, shown if the user doesn't have read access to the specified title. Parameters:\n* $1 The page title", "rest-permission-denied-revision": "Error message for REST API debugging, shown when the user doesn't have access to the specified revision. Parameters:\n* $1: the specified revision.", - "rest-resource-limit-exceeded": "Error message for REST API debugging, shown when the resource limit was exceeded by the request", "rest-pagehistory-incompatible-params": "Error message for REST API debugging, shown if incompatible parameters are specified.", "rest-pagehistory-param-range-error": "Error message for REST API debugging, shown if a revision id is provided but is out of range.", "rest-pagehistory-timestamp-error": "Error message for REST API debugging, shown if an error occurred loading the timestamp for the specified revision.", @@ -34,7 +33,7 @@ "rest-page-source-type-error": "Error message for REST API debugging, shown when trying to retrieve content for a page that has an unsupported content type", "rest-no-revision": "Error message for REST API debugging, shown when fetching a revision by page ID fails. Parameters:\n* $1: The page ID we are getting revision from", "rest-media-too-many-links": "Error message for REST API debugging, shown when there are too many media links on a page. Parameters:\n* $1: The page title.\n* $2: The number of links allowed.", - "rest-html-backend-error": "Error message for REST API debugging, shown when fetching Parsoid HTML from backend has failed. Parameters", + "rest-html-backend-error": "Error message for REST API debugging, shown when fetching Parsoid HTML from backend has failed.", "rest-bad-json-body": "Error message for REST API debugging, shown when request body did not contain a JSON encoded object.", "rest-json-body-parse-error": "Error message for REST API debugging, shown when parsing the JSON body failed. Parameters:\n* $1: the error message from the JSON parser.", "rest-missing-body-field": "Error message for REST API debugging, shown when there is a mandatory field missing from the request body. Parameters:\n* $1: The field name", |