aboutsummaryrefslogtreecommitdiffstats
path: root/includes/Rest
diff options
context:
space:
mode:
authorjenkins-bot <jenkins-bot@gerrit.wikimedia.org>2020-11-17 17:31:04 +0000
committerGerrit Code Review <gerrit@wikimedia.org>2020-11-17 17:31:04 +0000
commit0dd35aa6a88f69f29b129f2a1f0a4ae3e7c12e32 (patch)
treea0a4f2ddfd4250bab21a3bebb9bd8899c792f0dd /includes/Rest
parent815ee5bf3731b5c3cea9b70f76322741cdbf0ad5 (diff)
parentd4789dc29aa59e9a615755cf32736f36e9c82e0c (diff)
downloadmediawikicore-0dd35aa6a88f69f29b129f2a1f0a4ae3e7c12e32.tar.gz
mediawikicore-0dd35aa6a88f69f29b129f2a1f0a4ae3e7c12e32.zip
Merge "Revert "Re-apply "Use parsoid directly in /page/html handler"""
Diffstat (limited to 'includes/Rest')
-rw-r--r--includes/Rest/Handler/LatestPageContentHandler.php2
-rw-r--r--includes/Rest/Handler/PageHTMLHandler.php264
-rw-r--r--includes/Rest/coreRoutes.json9
-rw-r--r--includes/Rest/i18n/en.json1
-rw-r--r--includes/Rest/i18n/qqq.json3
5 files changed, 106 insertions, 173 deletions
diff --git a/includes/Rest/Handler/LatestPageContentHandler.php b/includes/Rest/Handler/LatestPageContentHandler.php
index d0d4add787fc..491386478609 100644
--- a/includes/Rest/Handler/LatestPageContentHandler.php
+++ b/includes/Rest/Handler/LatestPageContentHandler.php
@@ -82,7 +82,7 @@ abstract class LatestPageContentHandler extends SimpleHandler {
if ( $this->latestRevision === null ) {
$title = $this->getTitle();
if ( $title && $title->getArticleID() ) {
- $this->latestRevision = $this->revisionLookup->getRevisionByTitle( $title );
+ $this->latestRevision = $this->revisionLookup->getKnownCurrentRevision( $title );
} else {
$this->latestRevision = false;
}
diff --git a/includes/Rest/Handler/PageHTMLHandler.php b/includes/Rest/Handler/PageHTMLHandler.php
index aa510d6e7630..7f24c22ee02e 100644
--- a/includes/Rest/Handler/PageHTMLHandler.php
+++ b/includes/Rest/Handler/PageHTMLHandler.php
@@ -3,27 +3,23 @@
namespace MediaWiki\Rest\Handler;
use Config;
+use ConfigException;
+use Exception;
+use GuzzleHttp\Psr7\Uri;
use LogicException;
use MediaWiki\Linker\LinkTarget;
-use MediaWiki\MediaWikiServices;
-use MediaWiki\Page\WikiPageFactory;
-use MediaWiki\Parser\ParserCacheFactory;
use MediaWiki\Permissions\PermissionManager;
use MediaWiki\Rest\LocalizedHttpException;
use MediaWiki\Rest\Response;
-use MediaWiki\Rest\ResponseInterface;
use MediaWiki\Rest\StringStream;
use MediaWiki\Revision\RevisionLookup;
-use ParserCache;
-use ParserOptions;
-use ParserOutput;
+use RestbaseVirtualRESTService;
use TitleFactory;
use TitleFormatter;
+use UIDGenerator;
+use VirtualRESTServiceClient;
+use WebRequest;
use Wikimedia\Message\MessageValue;
-use Wikimedia\Parsoid\Config\PageConfig;
-use Wikimedia\Parsoid\Core\ClientError;
-use Wikimedia\Parsoid\Core\ResourceLimitExceededException;
-use Wikimedia\Parsoid\Parsoid;
/**
* A handler that returns Parsoid HTML for the following routes:
@@ -39,23 +35,27 @@ use Wikimedia\Parsoid\Parsoid;
class PageHTMLHandler extends LatestPageContentHandler {
private const MAX_AGE_200 = 5;
- /** @var ParserCache */
- private $parserCache;
+ /** @var VirtualRESTServiceClient */
+ private $restClient;
- /** @var WikiPageFactory */
- private $wikiPageFactory;
-
- /** @var Parsoid|null */
- private $parsoid;
+ /** @var array */
+ private $htmlResponse;
+ /**
+ * @param Config $config
+ * @param PermissionManager $permissionManager
+ * @param RevisionLookup $revisionLookup
+ * @param TitleFormatter $titleFormatter
+ * @param TitleFactory $titleFactory
+ * @param VirtualRESTServiceClient $virtualRESTServiceClient
+ */
public function __construct(
Config $config,
PermissionManager $permissionManager,
RevisionLookup $revisionLookup,
TitleFormatter $titleFormatter,
TitleFactory $titleFactory,
- ParserCacheFactory $parserCacheFactory,
- WikiPageFactory $wikiPageFactory
+ VirtualRESTServiceClient $virtualRESTServiceClient
) {
parent::__construct(
$config,
@@ -64,33 +64,72 @@ class PageHTMLHandler extends LatestPageContentHandler {
$titleFormatter,
$titleFactory
);
- $this->parserCache = $parserCacheFactory->getInstance( 'parsoid' );
- $this->wikiPageFactory = $wikiPageFactory;
+
+ $this->restClient = $virtualRESTServiceClient;
}
/**
* @param LinkTarget $title
- * @return string
+ * @return array
+ * @throws LocalizedHttpException
*/
- private function constructHtmlUrl( LinkTarget $title ): string {
- return $this->getRouter()->getRouteUrl( '/v1/page/{title}/html', [ 'title' => $title ] );
+ private function fetchHtmlFromRESTBase( LinkTarget $title ): array {
+ if ( $this->htmlResponse !== null ) {
+ return $this->htmlResponse;
+ }
+
+ list( , $service ) = $this->restClient->getMountAndService( '/restbase/ ' );
+ if ( !$service ) {
+ try {
+ $restConfig = $this->config->get( 'VirtualRestConfig' );
+ if ( !isset( $restConfig['modules']['restbase'] ) ) {
+ throw new ConfigException(
+ __CLASS__ . " requires restbase module configured for VirtualRestConfig"
+ );
+ }
+ $this->restClient->mount( '/restbase/',
+ new RestbaseVirtualRESTService( $restConfig['modules']['restbase'] ) );
+ } catch ( Exception $e ) {
+ // This would usually be config exception, but let's fail on any exception
+ throw new LocalizedHttpException( MessageValue::new( 'rest-html-backend-error' ), 500 );
+ }
+ }
+
+ $this->htmlResponse = $this->restClient->run( [
+ 'method' => 'GET',
+ 'url' => '/restbase/local/v1/page/html/' .
+ urlencode( $this->titleFormatter->getPrefixedDBkey( $title ) ) .
+ '?redirect=false'
+ ] );
+ return $this->htmlResponse;
}
/**
- * Sets the 'Cache-Control' header no more then provided $expiry.
- * @param ResponseInterface $response
- * @param int|null $expiry
+ * @param LinkTarget $title
+ * @return array
+ * @throws LocalizedHttpException
*/
- private function setCacheControl( ResponseInterface $response, int $expiry = null ) {
- if ( $expiry === null ) {
- $maxAge = self::MAX_AGE_200;
- } else {
- $maxAge = min( self::MAX_AGE_200, $expiry );
+ private function fetch200HtmlFromRESTBase( LinkTarget $title ): array {
+ $restbaseResp = $this->fetchHtmlFromRESTBase( $title );
+ if ( $restbaseResp['code'] !== 200 ) {
+ throw new LocalizedHttpException(
+ MessageValue::new( 'rest-html-backend-error' ),
+ $restbaseResp['code']
+ );
}
- $response->setHeader(
- 'Cache-Control',
- 'max-age=' . $maxAge
- );
+ return $restbaseResp;
+ }
+
+ /**
+ * @return string
+ */
+ private function constructHtmlUrl(): string {
+ $wr = new WebRequest();
+ $urlParts = wfParseUrl( $wr->getFullRequestURL() );
+ $currentPathParts = explode( '/', $urlParts['path'] );
+ $currentPathParts[ count( $currentPathParts ) - 1 ] = 'html';
+ $urlParts['path'] = implode( '/', $currentPathParts );
+ return Uri::fromParts( $urlParts );
}
/**
@@ -126,29 +165,26 @@ class PageHTMLHandler extends LatestPageContentHandler {
switch ( $htmlType ) {
case 'bare':
$body = $this->constructMetadata( $titleObj, $revision );
- $body['html_url'] = $this->constructHtmlUrl( $titleObj );
+ $body['html_url'] = $this->constructHtmlUrl();
$response = $this->getResponseFactory()->createJson( $body );
- $this->setCacheControl( $response );
break;
case 'html':
- $parserOutput = $this->getHtmlFromCache( $titleObj );
+ $restbaseResp = $this->fetch200HtmlFromRESTBase( $titleObj );
$response = $this->getResponseFactory()->create();
- // TODO: need to respect content-type returned by Parsoid.
- $response->setHeader( 'Content-Type', 'text/html' );
- $this->setCacheControl( $response, $parserOutput->getCacheExpiry() );
- $response->setBody( new StringStream( $parserOutput->getText() ) );
+ $response->setHeader( 'Content-Type', $restbaseResp[ 'headers' ][ 'content-type' ] );
+ $response->setBody( new StringStream( $restbaseResp[ 'body' ] ) );
break;
case 'with_html':
- $parserOutput = $this->getHtmlFromCache( $titleObj );
+ $restbaseResp = $this->fetch200HtmlFromRESTBase( $titleObj );
$body = $this->constructMetadata( $titleObj, $revision );
- $body['html'] = $parserOutput->getText();
+ $body['html'] = $restbaseResp['body'];
$response = $this->getResponseFactory()->createJson( $body );
- $this->setCacheControl( $response, $parserOutput->getCacheExpiry() );
break;
default:
throw new LogicException( "Unknown HTML type $htmlType" );
}
+ $response->setHeader( 'Cache-Control', 'max-age=' . self::MAX_AGE_200 );
return $response;
}
@@ -157,6 +193,7 @@ class PageHTMLHandler extends LatestPageContentHandler {
* if the latest revision of a page has been made private, un-readable for another reason,
* or a newer revision exists.
* @return string|null
+ * @throws LocalizedHttpException
*/
protected function getETag(): ?string {
$title = $this->getTitle();
@@ -166,141 +203,42 @@ class PageHTMLHandler extends LatestPageContentHandler {
if ( $this->getHtmlType() === 'bare' ) {
return '"' . $this->getLatestRevision()->getId() . '"';
}
- // While we are not differentiating the output by parser options and
- // only provide anon parses, cache time or page touched provides a good
- // reference for etag. Once we start doing non-anon parses, this needs
- // to start incorporating current users ParserOptions.
- return '"' . $this->getLastModified() . '"';
+
+ $restbaseRes = $this->fetch200HtmlFromRESTBase( $title );
+ return $restbaseRes['headers']['etag'] ?? null;
}
/**
* @return string|null
+ * @throws LocalizedHttpException
*/
protected function getLastModified(): ?string {
$title = $this->getTitle();
if ( !$title || !$title->getArticleID() || !$this->isAccessible( $title ) ) {
return null;
}
+
if ( $this->getHtmlType() === 'bare' ) {
return $this->getLatestRevision()->getTimestamp();
}
- $wikiPage = $this->wikiPageFactory->newFromTitle( $title );
- // The cache time of the metadata belongs to the ParserOutput
- // variant cached last. While we are not differentiating the
- // parser options, it's fine. Once we start supporting non-anon
- // parses, we would need to fetch the actual ParserOutput to find
- // out it's cache time.
- $cacheMetadata = $this->parserCache->getMetadata(
- $wikiPage,
- ParserCache::USE_CURRENT_ONLY
- );
- if ( $cacheMetadata ) {
- return $cacheMetadata->getCacheTime();
- } else {
- return $wikiPage->getTouched();
- }
- }
-
- private function getHtmlType(): string {
- return $this->getConfig()['format'];
- }
- /**
- * Assert that Parsoid services are available.
- * TODO: once parsoid glue services are in core,
- * this will become a no-op and will be removed.
- * See T265518
- * @throws LocalizedHttpException
- */
- private function assertParsoidInstalled() {
- $services = MediaWikiServices::getInstance();
- if ( $services->has( 'ParsoidSiteConfig' ) &&
- $services->has( 'ParsoidPageConfigFactory' ) &&
- $services->has( 'ParsoidDataAccess' ) ) {
- return;
+ $restbaseRes = $this->fetch200HtmlFromRESTBase( $title );
+ $restbaseEtag = $restbaseRes['headers']['etag'] ?? null;
+ if ( !$restbaseEtag ) {
+ return null;
}
- throw new LocalizedHttpException(
- MessageValue::new( 'rest-html-backend-error' ), 501 );
- }
- /**
- * @return Parsoid
- * @throws LocalizedHttpException
- */
- protected function createParsoid(): Parsoid {
- $this->assertParsoidInstalled();
- if ( $this->parsoid === null ) {
- // TODO: once parsoid glue services are in core,
- // this will need to use normal DI.
- // See T265518
- $services = MediaWikiServices::getInstance();
- $this->parsoid = new Parsoid(
- $services->get( 'ParsoidSiteConfig' ),
- $services->get( 'ParsoidDataAccess' )
- );
+ $etagComponents = [];
+ if ( !preg_match( '/^(?:W\/)?"?[^"\/]+(?:\/([^"\/]+))"?$/',
+ $restbaseEtag, $etagComponents )
+ ) {
+ return null;
}
- return $this->parsoid;
- }
-
- /**
- * @param LinkTarget $linkTarget
- * @return PageConfig
- * @throws LocalizedHttpException
- */
- private function createPageConfig(
- LinkTarget $linkTarget
- ): PageConfig {
- $this->assertParsoidInstalled();
- // Currently everything is parsed as anon since Parsoid
- // can't report the used options.
- // Already checked that title/revision exist and accessible.
- return MediaWikiServices::getInstance()
- ->get( 'ParsoidPageConfigFactory' )
- ->create( $linkTarget );
- }
- /**
- * @param LinkTarget $title
- * @return ParserOutput a tuple with html and content-type
- * @throws LocalizedHttpException
- */
- private function getHtmlFromCache( LinkTarget $title ): ParserOutput {
- $wikiPage = $this->wikiPageFactory->newFromLinkTarget( $title );
- $parserOutput = $this->parserCache->get( $wikiPage, ParserOptions::newFromAnon() );
- if ( $parserOutput ) {
- return $parserOutput;
- }
- $fakeParserOutput = $this->parse( $title );
- $this->parserCache->save( $fakeParserOutput, $wikiPage, ParserOptions::newFromAnon() );
- return $fakeParserOutput;
+ return UIDGenerator::getTimestampFromUUIDv1( $etagComponents[1] ) ?: null;
}
- /**
- * @param LinkTarget $title
- * @return ParserOutput
- * @throws LocalizedHttpException
- */
- private function parse( LinkTarget $title ): ParserOutput {
- $parsoid = $this->createParsoid();
- $pageConfig = $this->createPageConfig( $title );
- try {
- $pageBundle = $parsoid->wikitext2html( $pageConfig, [
- 'discardDataParsoid' => true,
- 'pageBundle' => true,
- ] );
- return new ParserOutput( $pageBundle->html );
- } catch ( ClientError $e ) {
- throw new LocalizedHttpException(
- MessageValue::new( 'rest-html-backend-error' ),
- 400,
- [ 'reason' => $e->getMessage() ]
- );
- } catch ( ResourceLimitExceededException $e ) {
- throw new LocalizedHttpException(
- MessageValue::new( 'rest-resource-limit-exceeded' ),
- 413,
- [ 'reason' => $e->getMessage() ]
- );
- }
+ private function getHtmlType(): string {
+ return $this->getConfig()['format'];
}
}
diff --git a/includes/Rest/coreRoutes.json b/includes/Rest/coreRoutes.json
index f54a50a61c80..108bf99c1548 100644
--- a/includes/Rest/coreRoutes.json
+++ b/includes/Rest/coreRoutes.json
@@ -90,8 +90,7 @@
"RevisionLookup",
"TitleFormatter",
"TitleFactory",
- "ParserCacheFactory",
- "WikiPageFactory"
+ "VirtualRESTServiceClient"
],
"format": "bare"
},
@@ -104,8 +103,7 @@
"RevisionLookup",
"TitleFormatter",
"TitleFactory",
- "ParserCacheFactory",
- "WikiPageFactory"
+ "VirtualRESTServiceClient"
],
"format": "html"
},
@@ -118,8 +116,7 @@
"RevisionLookup",
"TitleFormatter",
"TitleFactory",
- "ParserCacheFactory",
- "WikiPageFactory"
+ "VirtualRESTServiceClient"
],
"format": "with_html"
},
diff --git a/includes/Rest/i18n/en.json b/includes/Rest/i18n/en.json
index c5f8e0e86248..ec986880217c 100644
--- a/includes/Rest/i18n/en.json
+++ b/includes/Rest/i18n/en.json
@@ -16,7 +16,6 @@
"rest-permission-denied-anon": "Not accessible by anonymous user",
"rest-permission-denied-title": "The user does not have rights to read title ($1)",
"rest-permission-denied-revision": "User doesn't have access to the requested revision ($1).",
- "rest-resource-limit-exceeded": "Resources limits exceeded processing the request.",
"rest-pagehistory-incompatible-params": "Parameters \"older_than\" and \"newer_than\" cannot both be specified",
"rest-pagehistory-param-range-error": "Revision id must be greater than 0",
"rest-pagehistory-timestamp-error": "Unable to retrieve timestamp for the specified revision ($1)",
diff --git a/includes/Rest/i18n/qqq.json b/includes/Rest/i18n/qqq.json
index 32af0cf076eb..e97dee5277c5 100644
--- a/includes/Rest/i18n/qqq.json
+++ b/includes/Rest/i18n/qqq.json
@@ -17,7 +17,6 @@
"rest-permission-denied-anon": "Error message for REST API debugging, shown when the user is anonymous and therefore is denied permission to a resource",
"rest-permission-denied-title": "Error message for REST API debugging, shown if the user doesn't have read access to the specified title. Parameters:\n* $1 The page title",
"rest-permission-denied-revision": "Error message for REST API debugging, shown when the user doesn't have access to the specified revision. Parameters:\n* $1: the specified revision.",
- "rest-resource-limit-exceeded": "Error message for REST API debugging, shown when the resource limit was exceeded by the request",
"rest-pagehistory-incompatible-params": "Error message for REST API debugging, shown if incompatible parameters are specified.",
"rest-pagehistory-param-range-error": "Error message for REST API debugging, shown if a revision id is provided but is out of range.",
"rest-pagehistory-timestamp-error": "Error message for REST API debugging, shown if an error occurred loading the timestamp for the specified revision.",
@@ -34,7 +33,7 @@
"rest-page-source-type-error": "Error message for REST API debugging, shown when trying to retrieve content for a page that has an unsupported content type",
"rest-no-revision": "Error message for REST API debugging, shown when fetching a revision by page ID fails. Parameters:\n* $1: The page ID we are getting revision from",
"rest-media-too-many-links": "Error message for REST API debugging, shown when there are too many media links on a page. Parameters:\n* $1: The page title.\n* $2: The number of links allowed.",
- "rest-html-backend-error": "Error message for REST API debugging, shown when fetching Parsoid HTML from backend has failed. Parameters",
+ "rest-html-backend-error": "Error message for REST API debugging, shown when fetching Parsoid HTML from backend has failed.",
"rest-bad-json-body": "Error message for REST API debugging, shown when request body did not contain a JSON encoded object.",
"rest-json-body-parse-error": "Error message for REST API debugging, shown when parsing the JSON body failed. Parameters:\n* $1: the error message from the JSON parser.",
"rest-missing-body-field": "Error message for REST API debugging, shown when there is a mandatory field missing from the request body. Parameters:\n* $1: The field name",