'', 'oldid' => null, 'body_only' => null, 'errorEnc' => 'plain', 'iwp' => 'exwiki', 'subst' => null, 'offsetType' => 'byte', 'opts' => [], 'envOptions' => [ 'prefix' => 'exwiki', 'domain' => 'wiki.example.com', 'pageName' => '', 'cookie' => '', 'reqId' => 'test+test+test', 'userAgent' => 'UTAgent', 'htmlVariantLanguage' => null, 'outputContentVersion' => Parsoid::AVAILABLE_VERSIONS[0], ], ]; /** @var string Imperfect wikitext to be preserved if selser is applied. Corresponds to Selser.html. */ private const IMPERFECT_WIKITEXT = "
hi
'; $originalHtml = 'ho
'; $dataParsoid = [ 'ids' => [ 'mwAQ' => [ 'pi' => [ [ [ 'k' => '1' ] ] ] ] ] ]; $dataMediaWiki = [ 'ids' => [ 'mwAQ' => [ 'parts' => [ [ 'template' => [ 'target' => [ 'wt' => '1x', 'href' => './Template:1x' ], 'params' => [ '1' => [ 'wt' => 'hi' ] ], 'i' => 0 ] ] ] ] ] ]; $attribs = [ 'opts' => [ 'from' => ParsoidFormatHelper::FORMAT_PAGEBUNDLE, 'original' => [ 'html' => [ 'headers' => $htmlHeaders, 'body' => $originalHtml ], 'data-parsoid' => [ 'body' => $dataParsoid, ], 'data-mw' => [ 'body' => $dataMediaWiki, ], ] ], ]; yield 'should apply original data-mw' => [ $attribs, $html, [ '{{1x|hi}}' ], ]; // should give precedence to inline data-mw over original //////// $html = 'hi
'; $originalHtml = 'ho
'; $dataParsoid = [ 'ids' => [ 'mwAQ' => [ 'pi' => [ [ [ 'k' => '1' ] ] ] ] ] ]; $dataMediaWiki = [ 'ids' => [ 'mwAQ' => [] ] ]; // Missing data-mw.parts! $attribs = [ 'opts' => [ 'from' => ParsoidFormatHelper::FORMAT_PAGEBUNDLE, 'original' => [ 'html' => [ 'headers' => $htmlHeaders, 'body' => $originalHtml ], 'data-parsoid' => [ 'body' => $dataParsoid, ], 'data-mw' => [ 'body' => $dataMediaWiki, ], ] ], ]; yield 'should give precedence to inline data-mw over original' => [ $attribs, $html, [ '{{1x|hi}}' ], ]; // should not apply original data-mw if modified is supplied /////////// $html = 'hi
'; $originalHtml = 'ho
'; $dataParsoid = [ 'ids' => [ 'mwAQ' => [ 'pi' => [ [ [ 'k' => '1' ] ] ] ] ] ]; $dataMediaWiki = [ 'ids' => [ 'mwAQ' => [] ] ]; // Missing data-mw.parts! $dataMediaWikiModified = [ 'ids' => [ 'mwAQ' => [ 'parts' => [ [ 'template' => [ 'target' => [ 'wt' => '1x', 'href' => './Template:1x' ], 'params' => [ '1' => [ 'wt' => 'hi' ] ], 'i' => 0 ] ] ] ] ] ]; $attribs = [ 'opts' => [ 'from' => ParsoidFormatHelper::FORMAT_PAGEBUNDLE, 'data-mw' => [ // modified data 'body' => $dataMediaWikiModified, ], 'original' => [ 'html' => [ 'headers' => $htmlHeaders999, 'body' => $originalHtml ], 'data-parsoid' => [ 'body' => $dataParsoid, ], 'data-mw' => [ // original data 'body' => $dataMediaWiki, ], ] ], ]; yield 'should not apply original data-mw if modified is supplied' => [ $attribs, $html, [ '{{1x|hi}}' ], ]; // should apply original data-mw when modified is absent (captions 1) /////////// $html = $this->getTextFromFile( 'Image.html' ); $dataParsoid = [ 'ids' => [ 'mwAg' => [ 'optList' => [ [ 'ck' => 'caption', 'ak' => 'Testing 123' ] ] ], 'mwAw' => [ 'a' => [ 'href' => './File:Foobar.jpg' ], 'sa' => [] ], 'mwBA' => [ 'a' => [ 'resource' => './File:Foobar.jpg', 'height' => '28', 'width' => '240' ], 'sa' => [ 'resource' => 'File:Foobar.jpg' ] ] ] ]; $dataMediaWiki = [ 'ids' => [ 'mwAg' => [ 'caption' => 'Testing 123' ] ] ]; $attribs = [ 'opts' => [ 'from' => ParsoidFormatHelper::FORMAT_PAGEBUNDLE, 'original' => [ 'data-parsoid' => [ 'body' => $dataParsoid, ], 'data-mw' => [ // original data 'body' => $dataMediaWiki, ], 'html' => [ 'headers' => $htmlHeaders999, 'body' => $html ], ] ], ]; yield 'should apply original data-mw when modified is absent (captions 1)' => [ $attribs, $html, // modified HTML [ '[[File:Foobar.jpg|Testing 123]]' ], ]; // should give precedence to inline data-mw over modified (captions 2) ///////////// $htmlModified = $this->getTextFromFile( 'Image-data-mw.html' ); $dataMediaWikiModified = [ 'ids' => [ 'mwAg' => [ 'caption' => 'Testing 123' ] ] ]; $attribs = [ 'opts' => [ 'from' => ParsoidFormatHelper::FORMAT_PAGEBUNDLE, 'data-mw' => [ 'body' => $dataMediaWikiModified, ], 'original' => [ 'data-parsoid' => [ 'body' => $dataParsoid, ], 'data-mw' => [ // original data 'body' => $dataMediaWiki, ], 'html' => [ 'headers' => $htmlHeaders999, 'body' => $html ], ] ], ]; yield 'should give precedence to inline data-mw over modified (captions 2)' => [ $attribs, $htmlModified, // modified HTML [ '[[File:Foobar.jpg]]' ], ]; // should give precedence to modified data-mw over original (captions 3) ///////////// $dataMediaWikiModified = [ 'ids' => [ 'mwAg' => [] ] ]; $attribs = [ 'opts' => [ 'from' => ParsoidFormatHelper::FORMAT_PAGEBUNDLE, 'data-mw' => [ 'body' => $dataMediaWikiModified, ], 'original' => [ 'data-parsoid' => [ 'body' => $dataParsoid, ], 'data-mw' => [ // original data 'body' => $dataMediaWiki, ], 'html' => [ 'headers' => $htmlHeaders999, 'body' => $html ], ] ], ]; yield 'should give precedence to modified data-mw over original (captions 3)' => [ $attribs, $html, // modified HTML [ '[[File:Foobar.jpg]]' ], ]; // should apply extra normalizations /////////////////// $htmlModified = 'FooBar'; $attribs = [ 'opts' => [ 'original' => [] ], ]; yield 'should apply extra normalizations' => [ $attribs, $htmlModified, // modified HTML [ 'FooBar' ], // empty tag was stripped ]; // should apply version downgrade /////////// $htmlOfMinimal = $this->getTextFromFile( 'Minimal.html' ); // Uses profile version 2.4.0 $attribs = [ 'opts' => [ 'from' => ParsoidFormatHelper::FORMAT_PAGEBUNDLE, 'original' => [ 'html' => [ 'headers' => [ // Specify newer profile version for original HTML 'content-type' => 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/999.0.0"' ], // The profile version given inline in the original HTML doesn't matter, it's ignored 'body' => $htmlOfMinimal, ], 'data-parsoid' => [ 'body' => [ 'ids' => [] ] ], 'data-mw' => [ 'body' => [ 'ids' => [] ] ], // required by version 999.0.0 ] ], ]; yield 'should apply version downgrade' => [ $attribs, $htmlOfMinimal, [ '123' ] ]; // should not apply version downgrade if versions are the same /////////// $htmlOfMinimal = $this->getTextFromFile( 'Minimal.html' ); // Uses profile version 2.4.0 $attribs = [ 'opts' => [ 'from' => ParsoidFormatHelper::FORMAT_PAGEBUNDLE, 'original' => [ 'html' => [ 'headers' => [ // Specify the exact same version specified inline in Minimal.html 2.4.0 'content-type' => 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/2.4.0"' ], // The profile version given inline in the original HTML doesn't matter, it's ignored 'body' => $htmlOfMinimal, ], 'data-parsoid' => [ 'body' => [ 'ids' => [] ] ], ] ], ]; yield 'should not apply version downgrade if versions are the same' => [ $attribs, $htmlOfMinimal, [ '123' ] ]; // should convert html to json /////////////////////////////////// $html = $this->getTextFromFile( 'JsonConfig.html' ); $expectedText = [ '{"a":4,"b":3}', ]; $attribs = [ 'opts' => [ // even if the path says "wikitext", the contentmodel from the body should win. 'format' => ParsoidFormatHelper::FORMAT_WIKITEXT, 'contentmodel' => CONTENT_MODEL_JSON, ], ]; yield 'should convert html to json' => [ $attribs, $html, $expectedText, [ 'content-type' => 'application/json' ], ]; // page bundle input should work with no original data present /////////// $htmlOfMinimal = $this->getTextFromFile( 'Minimal.html' ); // Uses profile version 2.4.0 $attribs = [ 'opts' => [ 'from' => ParsoidFormatHelper::FORMAT_PAGEBUNDLE, 'original' => [], ], ]; yield 'page bundle input should work with no original data present' => [ $attribs, $htmlOfMinimal, [ '123' ] ]; } private function makePage( $title, $wikitext ): RevisionRecord { $title = new TitleValue( NS_MAIN, $title ); $rev = $this->getServiceContainer()->getRevisionLookup()->getRevisionByTitle( $title ); if ( $rev ) { return $rev; } /** @var RevisionRecord $rev */ [ 'revision-record' => $rev ] = $this->editPage( 'Test_html2wt', $wikitext )->getValue(); return $rev; } /** * @dataProvider provideHtml2wt * * @param array $attribs * @param string $html * @param string[] $expectedText * @param string[] $expectedHeaders * * @covers \MediaWiki\Parser\Parsoid\HtmlToContentTransform * @covers \MediaWiki\Rest\Handler\ParsoidHandler::html2wt */ public function testHtml2wt( array $attribs, string $html, array $expectedText, array $expectedHeaders = [] ) { $wikitextProfileUri = 'https://www.mediawiki.org/wiki/Specs/wikitext/1.0.0'; $expectedHeaders += [ 'content-type' => "text/plain; charset=utf-8; profile=\"$wikitextProfileUri\"", ]; $wikitext = self::IMPERFECT_WIKITEXT; $rev = $this->makePage( 'Test_html2wt', $wikitext ); $page = $rev->getPage(); $pageConfig = $this->getPageConfig( $page ); $attribs += self::DEFAULT_ATTRIBS; $attribs['opts'] += self::DEFAULT_ATTRIBS['opts']; $attribs['opts']['from'] ??= 'html'; $attribs['envOptions'] += self::DEFAULT_ATTRIBS['envOptions']; if ( $attribs['oldid'] ) { // Set the actual ID of an existing revision $attribs['oldid'] = $rev->getId(); } $handler = $this->newParsoidHandler(); $response = $handler->html2wt( $pageConfig, $attribs, $html ); $body = $response->getBody(); $body->rewind(); $wikitext = $body->getContents(); foreach ( $expectedHeaders as $name => $value ) { $this->assertSame( $value, $response->getHeaderLine( $name ) ); } foreach ( (array)$expectedText as $exp ) { $this->assertStringContainsString( $exp, $wikitext ); } } public function provideHtml2wtThrows() { $html = '123'; $profileVersion = '2.4.0'; $htmlProfileUri = 'https://www.mediawiki.org/wiki/Specs/HTML/' . $profileVersion; $htmlContentType = "text/html;profile=\"$htmlProfileUri\""; $htmlHeaders = [ 'content-type' => $htmlContentType, ]; // XXX: what does version 999.0.0 mean?! $htmlContentType999 = 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/999.0.0"'; $htmlHeaders999 = [ 'content-type' => $htmlContentType999, ]; // Content-type of original html is missing //////////////////////////// $attribs = [ 'opts' => [ 'original' => [ 'html' => [ // no headers with content type 'body' => $html, ], ] ], ]; yield 'Content-type of original html is missing' => [ $attribs, $html, new LocalizedHttpException( new MessageValue( 'rest-html-backend-error', [ 'Content-type of original html is missing.' ] ), 400, [ 'reason' => 'Content-type of original html is missing.' ] ) ]; // should fail to downgrade the original version for an unknown transition /////////// $htmlOfMinimal = $this->getTextFromFile( 'Minimal.html' ); $htmlOfMinimal2222 = $this->getTextFromFile( 'Minimal-2222.html' ); $attribs = [ 'opts' => [ 'from' => ParsoidFormatHelper::FORMAT_PAGEBUNDLE, 'original' => [ 'html' => [ 'headers' => [ // Specify version 2222.0.0! 'content-type' => 'text/html;profile="https://www.mediawiki.org/wiki/Specs/HTML/2222.0.0"' ], 'body' => $htmlOfMinimal2222, ], 'data-parsoid' => [ 'body' => [ 'ids' => [] ] ], ] ], ]; yield 'should fail to downgrade the original version for an unknown transition' => [ $attribs, $htmlOfMinimal, new LocalizedHttpException( new MessageValue( 'rest-html-backend-error', [ 'No downgrade possible from schema version 2222.0.0 to 2.4.0.' ] ), 400, [ 'reason' => 'No downgrade possible from schema version 2222.0.0 to 2.4.0.' ] ) ]; // DSR offsetType mismatch: UCS2 vs byte /////////////////////////////// $attribs = [ 'offsetType' => 'byte', 'envOptions' => [], 'opts' => [ 'from' => ParsoidFormatHelper::FORMAT_PAGEBUNDLE, 'original' => [ 'html' => [ 'headers' => $htmlHeaders, 'body' => $html, ], 'data-parsoid' => [ 'body' => [ 'offsetType' => 'UCS2', 'ids' => [], ] ], ] ], ]; yield 'DSR offsetType mismatch: UCS2 vs byte' => [ $attribs, $html, new LocalizedHttpException( new MessageValue( 'rest-html-backend-error', [ 'DSR offsetType mismatch: UCS2 vs byte' ] ), 400, [ 'reason' => 'DSR offsetType mismatch: UCS2 vs byte' ] ) ]; // DSR offsetType mismatch: byte vs UCS2 /////////////////////////////// $attribs = [ 'offsetType' => 'UCS2', 'envOptions' => [], 'opts' => [ // Enable selser 'from' => ParsoidFormatHelper::FORMAT_PAGEBUNDLE, 'original' => [ 'html' => [ 'headers' => $htmlHeaders, 'body' => $html, ], 'data-parsoid' => [ 'body' => [ 'offsetType' => 'byte', 'ids' => [], ] ], ] ], ]; yield 'DSR offsetType mismatch: byte vs UCS2' => [ $attribs, $html, new LocalizedHttpException( new MessageValue( 'rest-html-backend-error', [ 'DSR offsetType mismatch: byte vs UCS2' ] ), 400, [ 'reason' => 'DSR offsetType mismatch: byte vs UCS2' ] ) ]; // Could not find previous revision //////////////////////////// $attribs = [ 'oldid' => 1155779922, 'opts' => [ // set original HTML to enable selser 'original' => [ 'html' => [ 'headers' => $htmlHeaders, 'body' => $html, ] ] ] ]; yield 'Could not find previous revision' => [ $attribs, $html, new LocalizedHttpException( new MessageValue( "rest-specified-revision-unavailable" ), 404 ) ]; // should return a 400 for missing inline data-mw (2.x) /////////////////// $html = 'hi
'; $dataParsoid = [ 'ids' => [ 'mwAQ' => [ 'pi' => [ [ [ 'k' => '1' ] ] ] ] ] ]; $htmlOrig = 'ho
'; $attribs = [ 'opts' => [ 'from' => ParsoidFormatHelper::FORMAT_PAGEBUNDLE, 'original' => [ 'data-parsoid' => [ 'body' => $dataParsoid, ], 'html' => [ 'headers' => $htmlHeaders, // slightly modified 'body' => $htmlOrig, ] ] ], ]; yield 'should return a 400 for missing inline data-mw (2.x)' => [ $attribs, $html, new LocalizedHttpException( new MessageValue( 'rest-parsoid-error', [ 'Cannot serialize mw:Transclusion without data-mw.parts or data-parsoid.src' ] ), 400 ) ]; // should return a 400 for not supplying data-mw ////////////////////// $attribs = [ 'opts' => [ 'from' => ParsoidFormatHelper::FORMAT_PAGEBUNDLE, 'original' => [ 'data-parsoid' => [ 'body' => $dataParsoid, ], 'html' => [ 'headers' => $htmlHeaders999, 'body' => $htmlOrig, ] ] ], ]; yield 'should return a 400 for not supplying data-mw' => [ $attribs, $html, new LocalizedHttpException( new MessageValue( 'rest-html-backend-error', [ 'Invalid data-mw was provided.' ] ), 400, [ 'reason' => 'Invalid data-mw was provided.' ] ) ]; // should return a 400 for missing modified data-mw $attribs = [ 'opts' => [ 'from' => ParsoidFormatHelper::FORMAT_PAGEBUNDLE, 'original' => [ 'data-parsoid' => [ 'body' => $dataParsoid, ], 'data-mw' => [ 'body' => [ // Missing data-mw.parts! 'ids' => [ 'mwAQ' => [] ], ] ], 'html' => [ 'headers' => $htmlHeaders999, 'body' => $htmlOrig, ] ] ], ]; yield 'should return a 400 for missing modified data-mw' => [ $attribs, $html, new LocalizedHttpException( new MessageValue( 'rest-parsoid-error', [ 'Cannot serialize mw:Transclusion without data-mw.parts or data-parsoid.src' ] ), 400 ) ]; // should return http 400 if supplied data-parsoid is empty //////////// $html = 'hi
'; $htmlOrig = 'ho
'; $attribs = [ 'opts' => [ 'from' => ParsoidFormatHelper::FORMAT_PAGEBUNDLE, 'original' => [ 'data-parsoid' => [ 'body' => [], ], 'html' => [ 'headers' => $htmlHeaders, 'body' => $htmlOrig, ] ] ], ]; yield 'should return http 400 if supplied data-parsoid is empty' => [ $attribs, $html, new LocalizedHttpException( new MessageValue( 'rest-html-backend-error', [ 'Invalid data-parsoid was provided.' ] ), 400, [ 'reason' => 'Invalid data-parsoid was provided.' ] ) ]; // TODO: ResourceLimitExceededException from $parsoid->dom2wikitext -> 413 // TODO: ClientError from $parsoid->dom2wikitext -> 413 // TODO: Errors from PageBundle->validate } /** * @dataProvider provideHtml2wtThrows * * @param array $attribs * @param string $html * @param Exception $expectedException */ public function testHtml2wtThrows( array $attribs, string $html, Exception $expectedException ) { if ( isset( $attribs['oldid'] ) ) { // If a specific revision ID is requested, it's almost certain to no exist. // So we are testing with a non-existing page. $page = $this->getNonexistingTestPage(); } else { $page = $this->getExistingTestPage(); } $pageConfig = $this->getPageConfig( $page ); $attribs += self::DEFAULT_ATTRIBS; $attribs['opts'] += self::DEFAULT_ATTRIBS['opts']; $attribs['opts']['from'] ??= 'html'; $attribs['envOptions'] += self::DEFAULT_ATTRIBS['envOptions']; $handler = $this->newParsoidHandler(); try { $handler->html2wt( $pageConfig, $attribs, $html ); $this->fail( 'Expected exception: ' . $expectedException ); } catch ( Exception $e ) { $this->assertInstanceOf( get_class( $expectedException ), $e ); $this->assertSame( $expectedException->getCode(), $e->getCode() ); if ( $expectedException instanceof HttpException ) { /** @var HttpException $e */ $this->assertSame( $expectedException->getErrorData(), array_intersect_key( $expectedException->getErrorData(), $e->getErrorData() ) ); } if ( $expectedException instanceof LocalizedHttpException ) { /** @var LocalizedHttpException $expectedException */ $this->assertInstanceOf( LocalizedHttpException::class, $e ); $this->assertEquals( $expectedException->getMessageValue(), $e->getMessageValue() ); $this->assertSame( $expectedException->getErrorData(), $e->getErrorData() ); } $this->assertSame( $expectedException->getMessage(), $e->getMessage() ); } } public static function provideDom2wikitextException() { yield 'ClientError' => [ new ClientError( 'test' ), new LocalizedHttpException( new MessageValue( 'rest-parsoid-error', [ 'test' ] ), 400 ) ]; yield 'ResourceLimitExceededException' => [ new ResourceLimitExceededException( 'test' ), new LocalizedHttpException( new MessageValue( 'rest-parsoid-resource-exceeded', [ 'test' ] ), 413 ) ]; } /** * @dataProvider provideDom2wikitextException * * @param Exception $throw * @param Exception $expectedException */ public function testHtml2wtHandlesDom2wikitextException( Exception $throw, Exception $expectedException ) { $html = 'hi
'; $page = $this->getExistingTestPage(); $attribs = [ 'opts' => [ 'from' => ParsoidFormatHelper::FORMAT_HTML ] ] + self::DEFAULT_ATTRIBS; // Make a fake Parsoid that throws /** @var Parsoid|MockObject $parsoid */ $parsoid = $this->createNoOpMock( Parsoid::class, [ 'dom2wikitext' ] ); $parsoid->method( 'dom2wikitext' )->willThrowException( $throw ); // Make a fake HtmlTransformFactory that returns an HtmlToContentTransform that uses the fake Parsoid. /** @var HtmlTransformFactory|MockObject $factory */ $factory = $this->createNoOpMock( HtmlTransformFactory::class, [ 'getHtmlToContentTransform' ] ); $factory->method( 'getHtmlToContentTransform' )->willReturn( new HtmlToContentTransform( $html, $page, $parsoid, [], $this->getPageConfigFactory( $page ), $this->getServiceContainer()->getContentHandlerFactory() ) ); // Use an HtmlInputTransformHelper that uses the fake HtmlTransformFactory, so it ends up // using the HtmlToContentTransform that has the fake Parsoid which throws an exception. $handler = $this->newParsoidHandler( [ 'getHtmlInputHelper' => function () use ( $factory, $page, $html ) { $helper = new HtmlInputTransformHelper( new NullStatsdDataFactory(), $factory, $this->getServiceContainer()->getParsoidOutputStash(), $this->getServiceContainer()->getParserOutputAccess(), $this->getServiceContainer()->getPageStore(), $this->getServiceContainer()->getRevisionLookup(), [], $page, [ 'html' => $html ], [] ); return $helper; } ] ); try { $handler->html2wt( $page, $attribs, $html ); $this->fail( 'Expected exception ' . get_class( $expectedException ) . ' not thrown' ); } catch ( Exception $e ) { $this->assertSame( $expectedException->getCode(), $e->getCode() ); $this->assertSame( $expectedException->getMessage(), $e->getMessage() ); if ( $expectedException instanceof LocalizedHttpException ) { $this->assertEquals( $expectedException->getMessageValue(), $e->getMessageValue() ); $this->assertSame( $expectedException->getErrorData(), $e->getErrorData() ); } $this->assertSame( $expectedException->getMessage(), $e->getMessage() ); } } /** @return Generator */ public function provideTryToCreatePageConfigData() { $en = $this->createLanguageMock( 'en' ); $ar = $this->createLanguageMock( 'ar' ); $de = $this->createLanguageMock( 'de' ); yield 'Default attribs for tryToCreatePageConfig()' => [ 'attribs' => [ 'oldid' => 1, 'pageName' => 'Test', 'pagelanguage' => $en ], 'wikitext' => null, 'html2WtMode' => false, 'expectedPageLanguage' => $en, ]; yield 'tryToCreatePageConfig with wikitext' => [ 'attribs' => [ 'oldid' => 1, 'pageName' => 'Test', 'pagelanguage' => $en ], 'wikitext' => "=test=", 'html2WtMode' => false, 'expected page language' => $en, ]; yield 'tryToCreatePageConfig with html2WtMode set to true' => [ 'attribs' => [ 'oldid' => 1, 'pageName' => 'Test', 'pagelanguage' => null ], 'wikitext' => null, 'html2WtMode' => true, 'expected page language' => $en, ]; yield 'tryToCreatePageConfig with both wikitext and html2WtMode' => [ 'attribs' => [ 'oldid' => 1, 'pageName' => 'Test', 'pagelanguage' => $ar ], 'wikitext' => "=header=", 'html2WtMode' => true, 'expected page language' => $ar, ]; yield 'Try to create a page config with pageName set to empty string' => [ 'attribs' => [ 'oldid' => 1, 'pageName' => '', 'pagelanguage' => $de ], 'wikitext' => null, 'html2WtMode' => false, 'expected page language' => $de, ]; yield 'Try to create a page config with pageName set to zero string' => [ 'attribs' => [ 'oldid' => 1, 'pageName' => '0', 'pagelanguage' => $de ], 'wikitext' => null, 'html2WtMode' => false, 'expected page language' => $de, ]; yield 'Try to create a page config with no page language' => [ 'attribs' => [ 'oldid' => 1, 'pageName' => '', 'pagelanguage' => null ], 'wikitext' => null, false, 'expected page language' => $en, ]; } /** * @covers \MediaWiki\Rest\Handler\ParsoidHandler::tryToCreatePageConfig * * @dataProvider provideTryToCreatePageConfigData */ public function testTryToCreatePageConfig( array $attribs, ?string $wikitext, $html2WtMode, Language $expectedLanguage ) { // Create a page, if needed, to test with oldid $origContent = 'Test content for ' . __METHOD__; $page = $this->getNonexistingTestPage(); $this->editPage( $page, $origContent ); $expectedWikitext = $wikitext ?? $origContent; $pageConfig = $this->newParsoidHandler()->tryToCreatePageConfig( $attribs, $wikitext, $html2WtMode ); $this->assertSame( $expectedWikitext, $pageConfig->getRevisionContent()->getContent( SlotRecord::MAIN ) ); $pageName = ( $attribs['pageName'] === '' ) ? 'Main Page' : $attribs['pageName']; $this->assertSame( $pageName, $pageConfig->getLinkTarget()->getPrefixedText() ); $this->assertSame( $expectedLanguage->getCode(), $pageConfig->getPageLanguageBcp47()->getCode() ); } /** @return Generator */ public function provideTryToCreatePageConfigDataThrows() { $en = $this->createLanguageMock( 'en' ); yield "PageConfig with oldid that doesn't exist" => [ 'attribs' => [ 'oldid' => null, 'pageName' => 'Test', 'pagelanguage' => $en ], 'wikitext' => null, 'html2WtMode' => false, ]; yield 'PageConfig with a bad title' => [ [ 'oldid' => null, 'pageName' => 'Special:Badtitle', 'pagelanguage' => $en ], 'wikitext' => null, 'html2WtMode' => false, ]; yield "PageConfig with a revision that doesn't exist" => [ // 'oldid' is so large because we want to emulate a revision // that doesn't exist. [ 'oldid' => 12345678, 'pageName' => 'Test', 'pagelanguage' => $en ], 'wikitext' => null, 'html2WtMode' => false, ]; } /** * @covers \MediaWiki\Rest\Handler\ParsoidHandler::tryToCreatePageConfig * * @dataProvider provideTryToCreatePageConfigDataThrows */ public function testTryToCreatePageConfigThrows( array $attribs, $wikitext, $html2WtMode ) { $this->expectException( HttpException::class ); $this->expectExceptionCode( 404 ); $this->newParsoidHandler()->tryToCreatePageConfig( $attribs, $wikitext, $html2WtMode ); } public static function provideRoundTripNoSelser() { yield 'space in heading' => [ "==foo==\nsomething\n" ]; } public static function provideRoundTripNeedingSelser() { yield 'uppercase tags' => [ "test language conversion
', ], ]; yield [ $attribs, $revision, '>esttay anguagelay onversioncay<', [ 'content-type' => $htmlContentType, 'content-language' => $enPigLatin->toBcp47Code(), ] ]; } /** * @dataProvider provideLanguageConversion */ public function testLanguageConversion( array $attribs, array $revision, string $expectedText, array $expectedHeaders ) { $handler = $this->newParsoidHandler(); $pageConfig = $handler->tryToCreatePageConfig( $attribs, null, true ); $response = $handler->languageConversion( $pageConfig, $attribs, $revision ); $body = $response->getBody(); $body->rewind(); $actual = $body->getContents(); $pb = json_decode( $actual, true ); $this->assertNotEmpty( $pb ); $this->assertArrayHasKey( 'html', $pb ); $this->assertArrayHasKey( 'body', $pb['html'] ); $this->assertStringContainsString( $expectedText, $pb['html']['body'] ); foreach ( $expectedHeaders as $key => $value ) { $this->assertArrayHasKey( $key, $pb['html']['headers'] ); $this->assertSame( $value, $pb['html']['headers'][$key] ); } } public static function provideWt2html() { $profileVersion = '2.6.0'; $htmlProfileUri = 'https://www.mediawiki.org/wiki/Specs/HTML/' . $profileVersion; $pbProfileUri = 'https://www.mediawiki.org/wiki/Specs/pagebundle/' . $profileVersion; $dpProfileUri = 'https://www.mediawiki.org/wiki/Specs/data-parsoid/' . $profileVersion; $htmlContentType = "text/html; charset=utf-8; profile=\"$htmlProfileUri\""; $pbContentType = "application/json; charset=utf-8; profile=\"$pbProfileUri\""; $dpContentType = "application/json; charset=utf-8; profile=\"$dpProfileUri\""; $lintContentType = "application/json"; $htmlHeaders = [ 'content-type' => $htmlContentType, ]; $pbHeaders = [ 'content-type' => $pbContentType, ]; $lintHeaders = [ 'content-type' => $lintContentType, ]; // should get from a title and revision (html) /////////////////////////////////// $expectedText = [ '>First Revision Content<', ' 1, // will be replaced by a real revision id ]; yield 'should get from a title and revision (html)' => [ $attribs, null, $expectedText, $unexpectedText, $htmlHeaders ]; // should get from a title and revision (pagebundle) /////////////////////////////////// $expectedText = [ // bits of json '"body":"', 'First Revision Content', 'contentmodel' => 'wikitext', 'data-parsoid' => [ 'headers' => [ 'content-type' => $dpContentType, ], 'body' => [ 'counter' => 2, 'ids' => [ // NOTE: match "First Revision Content" 'mwAA' => [ 'dsr' => [ 0, 22, 0, 0 ] ], 'mwAQ' => [], 'mwAg' => [ 'dsr' => [ 0, 22, 0, 0 ] ], ], 'offsetType' => 'ucs2', // as provided in the input ] ], ]; $unexpectedText = []; $attribs = [ 'oldid' => 1, // will be replaced by a real revision id 'opts' => [ 'format' => ParsoidFormatHelper::FORMAT_PAGEBUNDLE ], // Ensure this is ucs2 so we have a ucs2 offsetType test since // Parsoid's rt-testing script is node.js based and hence needs // ucs2 offsets to function correctly! 'offsetType' => 'ucs2', // make sure this is looped through to data-parsoid attribute ]; yield 'should get from a title and revision (pagebundle)' => [ $attribs, null, $expectedText, $unexpectedText, $pbHeaders ]; // should parse the given wikitext /////////////////////////////////// $wikitext = 'lorem ipsum'; $expectedText = [ '>lorem ipsum<', ' [ $attribs, $wikitext, $expectedText, $unexpectedText, $htmlHeaders ]; // should parse the given wikitext (body_only) /////////////////////////////////// $wikitext = 'lorem ipsum'; $expectedText = [ '>lorem ipsum<' ]; $unexpectedText = [ ' true ]; yield 'should parse the given wikitext (body_only)' => [ $attribs, $wikitext, $expectedText, $unexpectedText, $htmlHeaders ]; // should lint the given wikitext /////////////////////////////////// $wikitext = "{|\nhi\n|ho\n|}"; $expectedText = [ '"type":"fostered"', '"dsr"' ]; $unexpectedText = [ ' [ 'format' => ParsoidFormatHelper::FORMAT_LINT ] ]; yield 'should lint the given wikitext' => [ $attribs, $wikitext, $expectedText, $unexpectedText, $lintHeaders ]; // should lint the given wikitext 2 /////////////////////////////////// $wikitext = "{|\n|wide\n|wide\n|wide\n|wide\n|wide\n|wide\n|}"; if ( ExtensionRegistry::getInstance()->isLoaded( 'Linter' ) ) { $expectedText = []; } else { $expectedText = [ '"type":"large-tables"', '"dsr"' ]; } $unexpectedText = [ ' [ 'format' => ParsoidFormatHelper::FORMAT_LINT ] ]; yield 'should lint the given wikitext 2' => [ $attribs, $wikitext, $expectedText, $unexpectedText, $lintHeaders ]; // should lint the given wikitext 3 /////////////////////////////////// // Multibyte characters before lint error $wikitext = "ăăă ''test"; $expectedText = [ '"type":"missing-end-tag"', // '"dsr":[7,13,2,0]', // 'byte' offsets '"dsr":[4,10,2,0]', // 'ucs2' offsets ]; $unexpectedText = [ ' [ 'format' => ParsoidFormatHelper::FORMAT_LINT ], 'offsetType' => 'ucs2', ]; yield 'should lint the given wikitext 3' => [ $attribs, $wikitext, $expectedText, $unexpectedText, $lintHeaders ]; // should parse the given JSON /////////////////////////////////// $wikitext = '{ "color": "green" }'; // should be rendered as table, not interpreted as wikitext $expectedText = [ '>color', '>green', '' ]; $attribs = [ 'opts' => [ 'contentmodel' => CONTENT_MODEL_JSON, ] ]; yield 'should parse the given JSON' => [ $attribs, $wikitext, $expectedText, $unexpectedText, $htmlHeaders ]; } /** * @dataProvider provideWt2html * * @param array $attribs * @param string|null $text * @param array $expectedData * @param string[] $unexpectedHtml * @param string[] $expectedHeaders */ public function testWt2html( array $attribs, ?string $text, array $expectedData, array $unexpectedHtml, array $expectedHeaders = [] ) { $htmlProfileUri = 'https://www.mediawiki.org/wiki/Specs/html/2.6.0'; $expectedHeaders += [ 'content-type' => "text/x-wiki; charset=utf-8; profile=\"$htmlProfileUri\"", ]; $page = $this->getNonexistingTestPage( __METHOD__ ); $status = $this->editPage( $page, 'First Revision Content' ); $currentRev = $status->getNewRevision(); $attribs += self::DEFAULT_ATTRIBS; $attribs['opts'] += self::DEFAULT_ATTRIBS['opts']; $attribs['opts']['from'] ??= 'wikitext'; $attribs['opts']['format'] ??= 'html'; $attribs['envOptions'] += self::DEFAULT_ATTRIBS['envOptions']; if ( $attribs['oldid'] ) { // Set the actual ID of an existing revision $attribs['oldid'] = $currentRev->getId(); // Make sure we are testing against a non-current revision $this->editPage( $page, 'this is not the content you are looking for' ); } $handler = $this->newParsoidHandler(); $revTextOrId = $text ?? $attribs['oldid'] ?? null; $pageConfig = $this->getPageConfig( $page, $revTextOrId ); $response = $handler->wt2html( $pageConfig, $attribs, $text ); $body = $response->getBody(); $body->rewind(); $data = $body->getContents(); foreach ( $expectedHeaders as $name => $value ) { $responseHeaderValue = $response->getHeaderLine( $name ); if ( $name === 'content-type' ) { $this->assertTrue( $this->contentTypeMatcher( $value, $responseHeaderValue ) ); } else { $this->assertSame( $value, $responseHeaderValue ); } } // HACK: try to parse as json, just in case: $jsonData = json_decode( $data, JSON_OBJECT_AS_ARRAY ); foreach ( $expectedData as $index => $exp ) { if ( is_int( $index ) ) { $this->assertStringContainsString( $exp, $data ); } else { $this->assertArrayHasKey( $index, $jsonData ); if ( $index === 'data-parsoid' ) { // FIXME: Assert headers as well $this->assertArrayHasKey( 'body', $jsonData[$index] ); $this->assertSame( $exp['body'], $jsonData[$index]['body'] ); } else { $this->assertSame( $exp, $jsonData[$index] ); } } } foreach ( $unexpectedHtml as $exp ) { $this->assertStringNotContainsString( $exp, $data ); } } public function testLenientRevisionHandling() { $page1 = $this->getNonexistingTestPage( "Page1" ); $status = $this->editPage( $page1, 'Page 1 revision content' ); $rev1 = $status->getNewRevision(); $page2 = $this->getNonexistingTestPage( "Page2" ); $status = $this->editPage( $page2, '#REDIRECT [[Page1]]' ); $rev2 = $status->getNewRevision(); $handler = $this->newParsoidHandler(); // Test 1: