diff options
author | Petr Pchelko <ppchelko@wikimedia.org> | 2020-12-07 13:00:14 -0600 |
---|---|---|
committer | Petr Pchelko <ppchelko@wikimedia.org> | 2020-12-09 16:36:07 -0600 |
commit | 1162411d7f475cfc30b55dc646cd613a9240072d (patch) | |
tree | a27e4aa3fde4fb3e5cbd2fef511fb430bc4b4577 | |
parent | 2b7dac53f3ac38e36a69854160ea981e43bdb7f2 (diff) | |
download | mediawikicore-1162411d7f475cfc30b55dc646cd613a9240072d.tar.gz mediawikicore-1162411d7f475cfc30b55dc646cd613a9240072d.zip |
Make /page/{title}/html emit etags in RESTBase format
RESTBase used to emit ETag in the `"<rev_id>/<render_id>" format.
For the benefit of the clients, preserve the formar.
Render ID is a UUIDv1 uniquely identifying the ParserOutput.
In future it would be used as a stashing key for stash deduplication.
At this time I decided to just attach the render ID as extension data
to our fake ParserOutput. Once we integrate Parsoid more into core,
we will likely move it into a ParserOutput property, or even
replace CacheTime::mCacheTime with a UUIDv1, but it's too early for that.
Bug: T268234
Change-Id: Ie604e9c98021d59eb1a17ca65f227e8f234a45be
5 files changed, 47 insertions, 37 deletions
diff --git a/includes/Rest/Handler/PageHTMLHandler.php b/includes/Rest/Handler/PageHTMLHandler.php index 941786d5445e..536ad819e58b 100644 --- a/includes/Rest/Handler/PageHTMLHandler.php +++ b/includes/Rest/Handler/PageHTMLHandler.php @@ -16,6 +16,7 @@ use RequestContext; use TitleFactory; use TitleFormatter; use Wikimedia\Assert\Assert; +use Wikimedia\UUID\GlobalIdGenerator; /** * A handler that returns Parsoid HTML for the following routes: @@ -42,7 +43,8 @@ class PageHTMLHandler extends SimpleHandler { TitleFormatter $titleFormatter, TitleFactory $titleFactory, ParserCacheFactory $parserCacheFactory, - WikiPageFactory $wikiPageFactory + WikiPageFactory $wikiPageFactory, + GlobalIdGenerator $globalIdGenerator ) { $this->contentHelper = new PageContentHelper( $config, @@ -53,7 +55,8 @@ class PageHTMLHandler extends SimpleHandler { ); $this->htmlHelper = new ParsoidHTMLHelper( $parserCacheFactory->getInstance( 'parsoid' ), - $wikiPageFactory + $wikiPageFactory, + $globalIdGenerator ); } diff --git a/includes/Rest/Handler/ParsoidHTMLHelper.php b/includes/Rest/Handler/ParsoidHTMLHelper.php index d20d842c119b..35b6140da2b2 100644 --- a/includes/Rest/Handler/ParsoidHTMLHelper.php +++ b/includes/Rest/Handler/ParsoidHTMLHelper.php @@ -34,6 +34,7 @@ use Wikimedia\Parsoid\Config\PageConfig; use Wikimedia\Parsoid\Core\ClientError; use Wikimedia\Parsoid\Core\ResourceLimitExceededException; use Wikimedia\Parsoid\Parsoid; +use Wikimedia\UUID\GlobalIdGenerator; use WikiPage; /** @@ -46,12 +47,17 @@ use WikiPage; */ class ParsoidHTMLHelper { + private const RENDER_ID_KEY = 'parsoid-render-id'; + /** @var ParserCache */ private $parserCache; /** @var WikiPageFactory */ private $wikiPageFactory; + /** @var GlobalIdGenerator */ + private $globalIdGenerator; + /** @var Title|null */ private $title = null; @@ -61,13 +67,16 @@ class ParsoidHTMLHelper { /** * @param ParserCache $parserCache * @param WikiPageFactory $wikiPageFactory + * @param GlobalIdGenerator $globalIdGenerator */ public function __construct( ParserCache $parserCache, - WikiPageFactory $wikiPageFactory + WikiPageFactory $wikiPageFactory, + GlobalIdGenerator $globalIdGenerator ) { $this->parserCache = $parserCache; $this->wikiPageFactory = $wikiPageFactory; + $this->globalIdGenerator = $globalIdGenerator; } /** @@ -90,7 +99,12 @@ class ParsoidHTMLHelper { 'discardDataParsoid' => true, 'pageBundle' => true, ] ); - return new ParserOutput( $pageBundle->html ); + $fakeParserOutput = new ParserOutput( $pageBundle->html ); + // TODO: when we make tighter integration with Parsoid, render ID should become + // a standard ParserOutput property. Nothing else needs it now, so don't generate + // it in ParserCache just yet. + $fakeParserOutput->setExtensionData( self::RENDER_ID_KEY, $this->globalIdGenerator->newUUIDv1() ); + return $fakeParserOutput; } catch ( ClientError $e ) { throw new LocalizedHttpException( MessageValue::new( 'rest-html-backend-error' ), @@ -192,17 +206,17 @@ class ParsoidHTMLHelper { } /** - * Returns an ETag representing the HTML output. It's based on the timestamp returned - * by getLastModified(). + * Returns an ETag uniquely identifying the HTML output. * @return string|null */ public function getETag(): ?string { - // While we are not differentiating the output by parser options and - // only provide anon parses, cache time or page touched provides a good - // reference for etag. Once we start doing non-anon parses, this needs - // to start incorporating current users ParserOptions. - // TODO: make this the same as the ETag emitted by RESTbase. - return '"' . $this->getLastModified() . '"'; + $parserOutput = $this->getHtml(); + $renderId = $parserOutput->getExtensionData( self::RENDER_ID_KEY ); + // Fallback for backwards compatibility with older cached entries. + if ( !$renderId ) { + $renderId = $this->getLastModified(); + } + return "\"{$parserOutput->getCacheRevisionId()}/{$renderId}\""; } /** @@ -211,23 +225,7 @@ class ParsoidHTMLHelper { * @return string|null */ public function getLastModified(): ?string { - $wikiPage = $this->wikiPageFactory->newFromTitle( $this->title ); - - // The cache time of the metadata belongs to the ParserOutput - // variant cached last. While we are not differentiating the - // parser options, it's fine. Once we start supporting non-anon - // parses, we would need to fetch the actual $titleParserOutput to find - // out it's cache time. - $cacheMetadata = $this->parserCache->getMetadata( - $wikiPage, - ParserCache::USE_CURRENT_ONLY - ); - - if ( $cacheMetadata ) { - return $cacheMetadata->getCacheTime(); - } else { - return $wikiPage->getTouched(); - } + return $this->getHtml()->getCacheTime(); } } diff --git a/includes/Rest/coreRoutes.json b/includes/Rest/coreRoutes.json index a5ef2e33d5ec..714725274ede 100644 --- a/includes/Rest/coreRoutes.json +++ b/includes/Rest/coreRoutes.json @@ -104,7 +104,8 @@ "TitleFormatter", "TitleFactory", "ParserCacheFactory", - "WikiPageFactory" + "WikiPageFactory", + "GlobalIdGenerator" ], "format": "html" }, @@ -118,7 +119,8 @@ "TitleFormatter", "TitleFactory", "ParserCacheFactory", - "WikiPageFactory" + "WikiPageFactory", + "GlobalIdGenerator" ], "format": "with_html" }, diff --git a/tests/phpunit/integration/includes/Rest/Handler/PageHTMLHandlerTest.php b/tests/phpunit/integration/includes/Rest/Handler/PageHTMLHandlerTest.php index f06d6b1c2f58..b3c8f3235a97 100644 --- a/tests/phpunit/integration/includes/Rest/Handler/PageHTMLHandlerTest.php +++ b/tests/phpunit/integration/includes/Rest/Handler/PageHTMLHandlerTest.php @@ -84,7 +84,8 @@ class PageHTMLHandlerTest extends MediaWikiIntegrationTestCase { new NullStatsdDataFactory(), new NullLogger() ), - $this->getServiceContainer()->getWikiPageFactory() + $this->getServiceContainer()->getWikiPageFactory(), + $this->getServiceContainer()->getGlobalIdGenerator() ); if ( $parsoid !== null ) { @@ -187,13 +188,15 @@ class PageHTMLHandlerTest extends MediaWikiIntegrationTestCase { // First, test it works if nothing was cached yet. // Make some time pass since page was created: - MWTimestamp::setFakeTime( $time + 10 ); + $time += 10; + MWTimestamp::setFakeTime( $time ); $handler = $this->newHandler( $cache ); $response = $this->executeHandler( $handler, $request, [ 'format' => 'html' ] ); $this->assertArrayHasKey( 'ETag', $response->getHeaders() ); $etag = $response->getHeaderLine( 'ETag' ); + $this->assertStringMatchesFormat( '"' . $page->getLatest() . '/%x-%x-%x-%x-%x"', $etag ); $this->assertArrayHasKey( 'Last-Modified', $response->getHeaders() ); $this->assertSame( MWTimestamp::convert( TS_RFC2822, $time ), $response->getHeaderLine( 'Last-Modified' ) ); @@ -204,10 +207,11 @@ class PageHTMLHandlerTest extends MediaWikiIntegrationTestCase { 'format' => 'html' ] ); $this->assertArrayHasKey( 'ETag', $response->getHeaders() ); - $this->assertNotSame( $etag, $response->getHeaderLine( 'ETag' ) ); + $this->assertSame( $etag, $response->getHeaderLine( 'ETag' ) ); $etag = $response->getHeaderLine( 'ETag' ); + $this->assertStringMatchesFormat( '"' . $page->getLatest() . '/%x-%x-%x-%x-%x"', $etag ); $this->assertArrayHasKey( 'Last-Modified', $response->getHeaders() ); - $this->assertSame( MWTimestamp::convert( TS_RFC2822, $time + 10 ), + $this->assertSame( MWTimestamp::convert( TS_RFC2822, $time ), $response->getHeaderLine( 'Last-Modified' ) ); // Now, expire the cache @@ -225,6 +229,8 @@ class PageHTMLHandlerTest extends MediaWikiIntegrationTestCase { ] ); $this->assertArrayHasKey( 'ETag', $response->getHeaders() ); $this->assertNotSame( $etag, $response->getHeaderLine( 'ETag' ) ); + $etag = $response->getHeaderLine( 'ETag' ); + $this->assertStringMatchesFormat( '"' . $page->getLatest() . '/%x-%x-%x-%x-%x"', $etag ); $this->assertArrayHasKey( 'Last-Modified', $response->getHeaders() ); $this->assertSame( MWTimestamp::convert( TS_RFC2822, $time ), $response->getHeaderLine( 'Last-Modified' ) ); diff --git a/tests/phpunit/integration/includes/Rest/Handler/ParsoidHTMLHelperTest.php b/tests/phpunit/integration/includes/Rest/Handler/ParsoidHTMLHelperTest.php index c295c28e7156..90f0e4b8fb71 100644 --- a/tests/phpunit/integration/includes/Rest/Handler/ParsoidHTMLHelperTest.php +++ b/tests/phpunit/integration/includes/Rest/Handler/ParsoidHTMLHelperTest.php @@ -69,7 +69,8 @@ class ParsoidHTMLHelperTest extends MediaWikiIntegrationTestCase { $helper = new ParsoidHTMLHelper( $parserCache, - $this->getServiceContainer()->getWikiPageFactory() + $this->getServiceContainer()->getWikiPageFactory(), + $this->getServiceContainer()->getGlobalIdGenerator() ); if ( $parsoid !== null ) { @@ -139,7 +140,7 @@ class ParsoidHTMLHelperTest extends MediaWikiIntegrationTestCase { $helper = $this->newHelper( $cache ); $helper->init( $page->getTitle() ); - $this->assertNotSame( $etag, $helper->getETag() ); + $this->assertSame( $etag, $helper->getETag() ); $etag = $helper->getETag(); $this->assertSame( MWTimestamp::convert( TS_RFC2822, $time + 10 ), |