diff options
-rw-r--r-- | includes/diff/TextSlotDiffRenderer.php | 5 | ||||
-rw-r--r-- | includes/languages/LanguageZh.php | 15 | ||||
-rw-r--r-- | tests/phpunit/includes/languages/LanguageZhTest.php | 20 |
3 files changed, 35 insertions, 5 deletions
diff --git a/includes/diff/TextSlotDiffRenderer.php b/includes/diff/TextSlotDiffRenderer.php index c254c08f1897..22ca94ce9124 100644 --- a/includes/diff/TextSlotDiffRenderer.php +++ b/includes/diff/TextSlotDiffRenderer.php @@ -92,14 +92,15 @@ class TextSlotDiffRenderer extends SlotDiffRenderer { * Convenience helper to use getTextDiff without an instance. * @param string $oldText * @param string $newText + * @param array $options * @return string */ - public static function diff( $oldText, $newText ) { + public static function diff( $oldText, $newText, $options = [] ) { /** @var TextSlotDiffRenderer $slotDiffRenderer */ $slotDiffRenderer = MediaWikiServices::getInstance() ->getContentHandlerFactory() ->getContentHandler( CONTENT_MODEL_TEXT ) - ->getSlotDiffRenderer( RequestContext::getMain() ); + ->getSlotDiffRenderer( RequestContext::getMain(), $options ); '@phan-var TextSlotDiffRenderer $slotDiffRenderer'; return $slotDiffRenderer->getTextDiff( $oldText, $newText ); } diff --git a/includes/languages/LanguageZh.php b/includes/languages/LanguageZh.php index 4e23ca3934bd..11aa6be44480 100644 --- a/includes/languages/LanguageZh.php +++ b/includes/languages/LanguageZh.php @@ -29,13 +29,22 @@ */ class LanguageZh extends LanguageZh_hans { /** - * this should give much better diff info + * Add a formfeed character between each non-ASCII character, so that + * "word-level" diffs will effectively operate on a character level. The FF + * characters are stripped out by unsegmentForDiff(). + * + * We use FF because it is the least used character that is matched by + * PCRE's \s class. + * + * In the unlikely event that an FF character appears in the input, it will + * be displayed in the diff as a replacement character. * * @param string $text * @return string */ public function segmentForDiff( $text ) { - return preg_replace( '/[\xc0-\xff][\x80-\xbf]*/', ' $0', $text ); + $text = str_replace( "\x0c", "\u{FFFD}", $text ); + return preg_replace( '/[\xc0-\xff][\x80-\xbf]*/', "\x0c$0", $text ); } /** @@ -43,7 +52,7 @@ class LanguageZh extends LanguageZh_hans { * @return string */ public function unsegmentForDiff( $text ) { - return preg_replace( '/ ([\xc0-\xff][\x80-\xbf]*)/', '$1', $text ); + return str_replace( "\x0c", '', $text ); } /** diff --git a/tests/phpunit/includes/languages/LanguageZhTest.php b/tests/phpunit/includes/languages/LanguageZhTest.php new file mode 100644 index 000000000000..815890d82d90 --- /dev/null +++ b/tests/phpunit/includes/languages/LanguageZhTest.php @@ -0,0 +1,20 @@ +<?php + +use MediaWiki\MainConfigNames; + +/** + * @covers LanguageZh + */ +class LanguageZhTest extends LanguageClassesTestCase { + public function testSegmentForDiff() { + $this->overrideConfigValue( MainConfigNames::DiffEngine, 'php' ); + $lhs = '维基'; + $rhs = '维基百科'; + $diff = TextSlotDiffRenderer::diff( $lhs, $rhs, [ 'contentLanguage' => 'zh' ] ); + // Check that only the second part is highlighted, and word segmentation markers are not present + $this->assertStringContainsString( + '<div>维基<ins class="diffchange diffchange-inline">百科</ins></div>', + $diff + ); + } +} |