aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPetr Pchelko <ppchelko@wikimedia.org>2020-12-07 13:00:14 -0600
committerPetr Pchelko <ppchelko@wikimedia.org>2020-12-09 16:36:07 -0600
commit1162411d7f475cfc30b55dc646cd613a9240072d (patch)
treea27e4aa3fde4fb3e5cbd2fef511fb430bc4b4577
parent2b7dac53f3ac38e36a69854160ea981e43bdb7f2 (diff)
downloadmediawikicore-1162411d7f475cfc30b55dc646cd613a9240072d.tar.gz
mediawikicore-1162411d7f475cfc30b55dc646cd613a9240072d.zip
Make /page/{title}/html emit etags in RESTBase format
RESTBase used to emit ETag in the `"<rev_id>/<render_id>" format. For the benefit of the clients, preserve the formar. Render ID is a UUIDv1 uniquely identifying the ParserOutput. In future it would be used as a stashing key for stash deduplication. At this time I decided to just attach the render ID as extension data to our fake ParserOutput. Once we integrate Parsoid more into core, we will likely move it into a ParserOutput property, or even replace CacheTime::mCacheTime with a UUIDv1, but it's too early for that. Bug: T268234 Change-Id: Ie604e9c98021d59eb1a17ca65f227e8f234a45be
-rw-r--r--includes/Rest/Handler/PageHTMLHandler.php7
-rw-r--r--includes/Rest/Handler/ParsoidHTMLHelper.php52
-rw-r--r--includes/Rest/coreRoutes.json6
-rw-r--r--tests/phpunit/integration/includes/Rest/Handler/PageHTMLHandlerTest.php14
-rw-r--r--tests/phpunit/integration/includes/Rest/Handler/ParsoidHTMLHelperTest.php5
5 files changed, 47 insertions, 37 deletions
diff --git a/includes/Rest/Handler/PageHTMLHandler.php b/includes/Rest/Handler/PageHTMLHandler.php
index 941786d5445e..536ad819e58b 100644
--- a/includes/Rest/Handler/PageHTMLHandler.php
+++ b/includes/Rest/Handler/PageHTMLHandler.php
@@ -16,6 +16,7 @@ use RequestContext;
use TitleFactory;
use TitleFormatter;
use Wikimedia\Assert\Assert;
+use Wikimedia\UUID\GlobalIdGenerator;
/**
* A handler that returns Parsoid HTML for the following routes:
@@ -42,7 +43,8 @@ class PageHTMLHandler extends SimpleHandler {
TitleFormatter $titleFormatter,
TitleFactory $titleFactory,
ParserCacheFactory $parserCacheFactory,
- WikiPageFactory $wikiPageFactory
+ WikiPageFactory $wikiPageFactory,
+ GlobalIdGenerator $globalIdGenerator
) {
$this->contentHelper = new PageContentHelper(
$config,
@@ -53,7 +55,8 @@ class PageHTMLHandler extends SimpleHandler {
);
$this->htmlHelper = new ParsoidHTMLHelper(
$parserCacheFactory->getInstance( 'parsoid' ),
- $wikiPageFactory
+ $wikiPageFactory,
+ $globalIdGenerator
);
}
diff --git a/includes/Rest/Handler/ParsoidHTMLHelper.php b/includes/Rest/Handler/ParsoidHTMLHelper.php
index d20d842c119b..35b6140da2b2 100644
--- a/includes/Rest/Handler/ParsoidHTMLHelper.php
+++ b/includes/Rest/Handler/ParsoidHTMLHelper.php
@@ -34,6 +34,7 @@ use Wikimedia\Parsoid\Config\PageConfig;
use Wikimedia\Parsoid\Core\ClientError;
use Wikimedia\Parsoid\Core\ResourceLimitExceededException;
use Wikimedia\Parsoid\Parsoid;
+use Wikimedia\UUID\GlobalIdGenerator;
use WikiPage;
/**
@@ -46,12 +47,17 @@ use WikiPage;
*/
class ParsoidHTMLHelper {
+ private const RENDER_ID_KEY = 'parsoid-render-id';
+
/** @var ParserCache */
private $parserCache;
/** @var WikiPageFactory */
private $wikiPageFactory;
+ /** @var GlobalIdGenerator */
+ private $globalIdGenerator;
+
/** @var Title|null */
private $title = null;
@@ -61,13 +67,16 @@ class ParsoidHTMLHelper {
/**
* @param ParserCache $parserCache
* @param WikiPageFactory $wikiPageFactory
+ * @param GlobalIdGenerator $globalIdGenerator
*/
public function __construct(
ParserCache $parserCache,
- WikiPageFactory $wikiPageFactory
+ WikiPageFactory $wikiPageFactory,
+ GlobalIdGenerator $globalIdGenerator
) {
$this->parserCache = $parserCache;
$this->wikiPageFactory = $wikiPageFactory;
+ $this->globalIdGenerator = $globalIdGenerator;
}
/**
@@ -90,7 +99,12 @@ class ParsoidHTMLHelper {
'discardDataParsoid' => true,
'pageBundle' => true,
] );
- return new ParserOutput( $pageBundle->html );
+ $fakeParserOutput = new ParserOutput( $pageBundle->html );
+ // TODO: when we make tighter integration with Parsoid, render ID should become
+ // a standard ParserOutput property. Nothing else needs it now, so don't generate
+ // it in ParserCache just yet.
+ $fakeParserOutput->setExtensionData( self::RENDER_ID_KEY, $this->globalIdGenerator->newUUIDv1() );
+ return $fakeParserOutput;
} catch ( ClientError $e ) {
throw new LocalizedHttpException(
MessageValue::new( 'rest-html-backend-error' ),
@@ -192,17 +206,17 @@ class ParsoidHTMLHelper {
}
/**
- * Returns an ETag representing the HTML output. It's based on the timestamp returned
- * by getLastModified().
+ * Returns an ETag uniquely identifying the HTML output.
* @return string|null
*/
public function getETag(): ?string {
- // While we are not differentiating the output by parser options and
- // only provide anon parses, cache time or page touched provides a good
- // reference for etag. Once we start doing non-anon parses, this needs
- // to start incorporating current users ParserOptions.
- // TODO: make this the same as the ETag emitted by RESTbase.
- return '"' . $this->getLastModified() . '"';
+ $parserOutput = $this->getHtml();
+ $renderId = $parserOutput->getExtensionData( self::RENDER_ID_KEY );
+ // Fallback for backwards compatibility with older cached entries.
+ if ( !$renderId ) {
+ $renderId = $this->getLastModified();
+ }
+ return "\"{$parserOutput->getCacheRevisionId()}/{$renderId}\"";
}
/**
@@ -211,23 +225,7 @@ class ParsoidHTMLHelper {
* @return string|null
*/
public function getLastModified(): ?string {
- $wikiPage = $this->wikiPageFactory->newFromTitle( $this->title );
-
- // The cache time of the metadata belongs to the ParserOutput
- // variant cached last. While we are not differentiating the
- // parser options, it's fine. Once we start supporting non-anon
- // parses, we would need to fetch the actual $titleParserOutput to find
- // out it's cache time.
- $cacheMetadata = $this->parserCache->getMetadata(
- $wikiPage,
- ParserCache::USE_CURRENT_ONLY
- );
-
- if ( $cacheMetadata ) {
- return $cacheMetadata->getCacheTime();
- } else {
- return $wikiPage->getTouched();
- }
+ return $this->getHtml()->getCacheTime();
}
}
diff --git a/includes/Rest/coreRoutes.json b/includes/Rest/coreRoutes.json
index a5ef2e33d5ec..714725274ede 100644
--- a/includes/Rest/coreRoutes.json
+++ b/includes/Rest/coreRoutes.json
@@ -104,7 +104,8 @@
"TitleFormatter",
"TitleFactory",
"ParserCacheFactory",
- "WikiPageFactory"
+ "WikiPageFactory",
+ "GlobalIdGenerator"
],
"format": "html"
},
@@ -118,7 +119,8 @@
"TitleFormatter",
"TitleFactory",
"ParserCacheFactory",
- "WikiPageFactory"
+ "WikiPageFactory",
+ "GlobalIdGenerator"
],
"format": "with_html"
},
diff --git a/tests/phpunit/integration/includes/Rest/Handler/PageHTMLHandlerTest.php b/tests/phpunit/integration/includes/Rest/Handler/PageHTMLHandlerTest.php
index f06d6b1c2f58..b3c8f3235a97 100644
--- a/tests/phpunit/integration/includes/Rest/Handler/PageHTMLHandlerTest.php
+++ b/tests/phpunit/integration/includes/Rest/Handler/PageHTMLHandlerTest.php
@@ -84,7 +84,8 @@ class PageHTMLHandlerTest extends MediaWikiIntegrationTestCase {
new NullStatsdDataFactory(),
new NullLogger()
),
- $this->getServiceContainer()->getWikiPageFactory()
+ $this->getServiceContainer()->getWikiPageFactory(),
+ $this->getServiceContainer()->getGlobalIdGenerator()
);
if ( $parsoid !== null ) {
@@ -187,13 +188,15 @@ class PageHTMLHandlerTest extends MediaWikiIntegrationTestCase {
// First, test it works if nothing was cached yet.
// Make some time pass since page was created:
- MWTimestamp::setFakeTime( $time + 10 );
+ $time += 10;
+ MWTimestamp::setFakeTime( $time );
$handler = $this->newHandler( $cache );
$response = $this->executeHandler( $handler, $request, [
'format' => 'html'
] );
$this->assertArrayHasKey( 'ETag', $response->getHeaders() );
$etag = $response->getHeaderLine( 'ETag' );
+ $this->assertStringMatchesFormat( '"' . $page->getLatest() . '/%x-%x-%x-%x-%x"', $etag );
$this->assertArrayHasKey( 'Last-Modified', $response->getHeaders() );
$this->assertSame( MWTimestamp::convert( TS_RFC2822, $time ),
$response->getHeaderLine( 'Last-Modified' ) );
@@ -204,10 +207,11 @@ class PageHTMLHandlerTest extends MediaWikiIntegrationTestCase {
'format' => 'html'
] );
$this->assertArrayHasKey( 'ETag', $response->getHeaders() );
- $this->assertNotSame( $etag, $response->getHeaderLine( 'ETag' ) );
+ $this->assertSame( $etag, $response->getHeaderLine( 'ETag' ) );
$etag = $response->getHeaderLine( 'ETag' );
+ $this->assertStringMatchesFormat( '"' . $page->getLatest() . '/%x-%x-%x-%x-%x"', $etag );
$this->assertArrayHasKey( 'Last-Modified', $response->getHeaders() );
- $this->assertSame( MWTimestamp::convert( TS_RFC2822, $time + 10 ),
+ $this->assertSame( MWTimestamp::convert( TS_RFC2822, $time ),
$response->getHeaderLine( 'Last-Modified' ) );
// Now, expire the cache
@@ -225,6 +229,8 @@ class PageHTMLHandlerTest extends MediaWikiIntegrationTestCase {
] );
$this->assertArrayHasKey( 'ETag', $response->getHeaders() );
$this->assertNotSame( $etag, $response->getHeaderLine( 'ETag' ) );
+ $etag = $response->getHeaderLine( 'ETag' );
+ $this->assertStringMatchesFormat( '"' . $page->getLatest() . '/%x-%x-%x-%x-%x"', $etag );
$this->assertArrayHasKey( 'Last-Modified', $response->getHeaders() );
$this->assertSame( MWTimestamp::convert( TS_RFC2822, $time ),
$response->getHeaderLine( 'Last-Modified' ) );
diff --git a/tests/phpunit/integration/includes/Rest/Handler/ParsoidHTMLHelperTest.php b/tests/phpunit/integration/includes/Rest/Handler/ParsoidHTMLHelperTest.php
index c295c28e7156..90f0e4b8fb71 100644
--- a/tests/phpunit/integration/includes/Rest/Handler/ParsoidHTMLHelperTest.php
+++ b/tests/phpunit/integration/includes/Rest/Handler/ParsoidHTMLHelperTest.php
@@ -69,7 +69,8 @@ class ParsoidHTMLHelperTest extends MediaWikiIntegrationTestCase {
$helper = new ParsoidHTMLHelper(
$parserCache,
- $this->getServiceContainer()->getWikiPageFactory()
+ $this->getServiceContainer()->getWikiPageFactory(),
+ $this->getServiceContainer()->getGlobalIdGenerator()
);
if ( $parsoid !== null ) {
@@ -139,7 +140,7 @@ class ParsoidHTMLHelperTest extends MediaWikiIntegrationTestCase {
$helper = $this->newHelper( $cache );
$helper->init( $page->getTitle() );
- $this->assertNotSame( $etag, $helper->getETag() );
+ $this->assertSame( $etag, $helper->getETag() );
$etag = $helper->getETag();
$this->assertSame(
MWTimestamp::convert( TS_RFC2822, $time + 10 ),