diff options
Diffstat (limited to 'includes/language')
-rw-r--r-- | includes/language/FormatterFactory.php | 9 | ||||
-rw-r--r-- | includes/language/Language.php | 50 | ||||
-rw-r--r-- | includes/language/Message/Message.php | 4 | ||||
-rw-r--r-- | includes/language/MessageCache.php | 156 | ||||
-rw-r--r-- | includes/language/MessageParser.php | 243 |
5 files changed, 288 insertions, 174 deletions
diff --git a/includes/language/FormatterFactory.php b/includes/language/FormatterFactory.php index 6a038713ca41..bf92fffb0e7b 100644 --- a/includes/language/FormatterFactory.php +++ b/includes/language/FormatterFactory.php @@ -8,7 +8,6 @@ use MediaWiki\Languages\LanguageFactory; use MediaWiki\Status\StatusFormatter; use MediaWiki\Title\TitleFormatter; use MediaWiki\User\UserIdentityUtils; -use MessageCache; use MessageLocalizer; use Psr\Log\LoggerInterface; @@ -19,7 +18,7 @@ use Psr\Log\LoggerInterface; */ class FormatterFactory { - private MessageCache $messageCache; + private MessageParser $messageParser; private TitleFormatter $titleFormatter; private HookContainer $hookContainer; private UserIdentityUtils $userIdentityUtils; @@ -27,14 +26,14 @@ class FormatterFactory { private LoggerInterface $logger; public function __construct( - MessageCache $messageCache, + MessageParser $messageParser, TitleFormatter $titleFormatter, HookContainer $hookContainer, UserIdentityUtils $userIdentityUtils, LanguageFactory $languageFactory, LoggerInterface $logger ) { - $this->messageCache = $messageCache; + $this->messageParser = $messageParser; $this->titleFormatter = $titleFormatter; $this->hookContainer = $hookContainer; $this->userIdentityUtils = $userIdentityUtils; @@ -43,7 +42,7 @@ class FormatterFactory { } public function getStatusFormatter( MessageLocalizer $messageLocalizer ): StatusFormatter { - return new StatusFormatter( $messageLocalizer, $this->messageCache, $this->logger ); + return new StatusFormatter( $messageLocalizer, $this->messageParser, $this->logger ); } public function getBlockErrorFormatter( LocalizationContext $context ): BlockErrorFormatter { diff --git a/includes/language/Language.php b/includes/language/Language.php index 1c9b047550d4..34ba77884d19 100644 --- a/includes/language/Language.php +++ b/includes/language/Language.php @@ -3143,19 +3143,18 @@ class Language implements Bcp47Code { * @return string */ public function formatNum( $number ) { - return $this->formatNumInternal( (string)$number, false, false ); + return $this->formatNumInternal( (string)$number, false ); } /** * Internal implementation function, shared between formatNum and formatNumNoSeparators. * * @param string $number The stringification of a valid PHP number - * @param bool $noTranslate Whether to translate digits and separators * @param bool $noSeparators Whether to add separators * @return string */ private function formatNumInternal( - string $number, bool $noTranslate, bool $noSeparators + string $number, bool $noSeparators ): string { $translateNumerals = $this->config->get( MainConfigNames::TranslateNumerals ); @@ -3184,8 +3183,8 @@ class Language implements Bcp47Code { // numbers in the string. Don't split on NAN/INF in this legacy // case as they are likely to be found embedded inside non-numeric // text. - return preg_replace_callback( "/{$validNumberRe}/", function ( $m ) use ( $noTranslate, $noSeparators ) { - return $this->formatNumInternal( $m[0], $noTranslate, $noSeparators ); + return preg_replace_callback( "/{$validNumberRe}/", function ( $m ) use ( $noSeparators ) { + return $this->formatNumInternal( $m[0], $noSeparators ); }, $number ); } @@ -3211,15 +3210,11 @@ class Language implements Bcp47Code { // enough. We still need to do decimal separator // transformation, though. For example, 1234.56 becomes 1234,56 // in pl with $minimumGroupingDigits = 2. - if ( !$noTranslate ) { - $number = strtr( $number, $separatorTransformTable ?: [] ); - } + $number = strtr( $number, $separatorTransformTable ?: [] ); } elseif ( $number === '-0' ) { // Special case to ensure we don't lose the minus sign by // converting to an int. - if ( !$noTranslate ) { - $number = strtr( $number, $separatorTransformTable ?: [] ); - } + $number = strtr( $number, $separatorTransformTable ?: [] ); } else { // NumberFormatter supports separator transformation, // but it does not know all languages MW @@ -3227,16 +3222,7 @@ class Language implements Bcp47Code { // customisation. So manually set it. $fmt = clone $fmt; - if ( $noTranslate ) { - $fmt->setSymbol( - NumberFormatter::DECIMAL_SEPARATOR_SYMBOL, - '.' - ); - $fmt->setSymbol( - NumberFormatter::GROUPING_SEPARATOR_SYMBOL, - ',' - ); - } elseif ( $separatorTransformTable ) { + if ( $separatorTransformTable ) { $fmt->setSymbol( NumberFormatter::DECIMAL_SEPARATOR_SYMBOL, $separatorTransformTable[ '.' ] ?? '.' @@ -3260,19 +3246,17 @@ class Language implements Bcp47Code { } } - if ( !$noTranslate ) { - if ( $translateNumerals ) { - // This is often unnecessary: PHP's NumberFormatter will often - // do the digit transform itself (T267614) - $s = $this->digitTransformTable(); - if ( $s ) { - $number = strtr( $number, $s ); - } + if ( $translateNumerals ) { + // This is often unnecessary: PHP's NumberFormatter will often + // do the digit transform itself (T267614) + $s = $this->digitTransformTable(); + if ( $s ) { + $number = strtr( $number, $s ); } - # T10327: Make our formatted numbers prettier by using a - # proper Unicode 'minus' character. - $number = strtr( $number, [ '-' => "\u{2212}" ] ); } + # T10327: Make our formatted numbers prettier by using a + # proper Unicode 'minus' character. + $number = strtr( $number, [ '-' => "\u{2212}" ] ); // Remove any LRM or RLM characters generated from NumberFormatter, // since directionality is handled outside of this context. @@ -3293,7 +3277,7 @@ class Language implements Bcp47Code { * @return string */ public function formatNumNoSeparators( $number ) { - return $this->formatNumInternal( (string)$number, false, true ); + return $this->formatNumInternal( (string)$number, true ); } /** diff --git a/includes/language/Message/Message.php b/includes/language/Message/Message.php index afec3409d43a..04c52a133244 100644 --- a/includes/language/Message/Message.php +++ b/includes/language/Message/Message.php @@ -1450,7 +1450,7 @@ class Message implements Stringable, MessageSpecifier, Serializable { * @return ParserOutput Wikitext parsed into HTML. */ protected function parseText( string $string ): ParserOutput { - $out = MediaWikiServices::getInstance()->getMessageCache()->parseWithPostprocessing( + $out = MediaWikiServices::getInstance()->getMessageParser()->parse( $string, $this->contextPage ?? PageReferenceValue::localReference( NS_SPECIAL, 'Badtitle/Message' ), /*linestart*/ true, @@ -1472,7 +1472,7 @@ class Message implements Stringable, MessageSpecifier, Serializable { * @return string Wikitext with {{-constructs substituted with its parsed result. */ protected function transformText( $string ) { - return MediaWikiServices::getInstance()->getMessageCache()->transform( + return MediaWikiServices::getInstance()->getMessageParser()->transform( $string, $this->isInterface, $this->getLanguage(), diff --git a/includes/language/MessageCache.php b/includes/language/MessageCache.php index 45eff8cf547b..490f9f0fc4bc 100644 --- a/includes/language/MessageCache.php +++ b/includes/language/MessageCache.php @@ -20,13 +20,13 @@ use MediaWiki\Config\ServiceOptions; use MediaWiki\Content\Content; -use MediaWiki\Context\RequestContext; use MediaWiki\Deferred\DeferredUpdates; use MediaWiki\HookContainer\HookContainer; use MediaWiki\HookContainer\HookRunner; use MediaWiki\Language\ILanguageConverter; use MediaWiki\Language\Language; use MediaWiki\Language\MessageCacheUpdate; +use MediaWiki\Language\MessageParser; use MediaWiki\Languages\LanguageConverterFactory; use MediaWiki\Languages\LanguageFactory; use MediaWiki\Languages\LanguageFallback; @@ -37,9 +37,6 @@ use MediaWiki\MediaWikiServices; use MediaWiki\Page\PageIdentity; use MediaWiki\Page\PageReference; use MediaWiki\Page\PageReferenceValue; -use MediaWiki\Parser\Parser; -use MediaWiki\Parser\ParserFactory; -use MediaWiki\Parser\ParserOptions; use MediaWiki\Parser\ParserOutput; use MediaWiki\Revision\SlotRecord; use MediaWiki\StubObject\StubObject; @@ -139,23 +136,6 @@ class MessageCache implements LoggerAwareInterface { /** @var string[] */ private $rawHtmlMessages; - /** - * Message cache has its own parser which it uses to transform messages - * @var ParserOptions - */ - private $parserOptions; - - /** @var Parser[] Lazy-created via self::getParser() */ - private array $parsers = []; - private int $curParser = -1; - - /** - * Parsing some messages may require parsing another message first, due to special page - * transclusion and some hooks (T372891). This constant is the limit of nesting depth where - * we'll display an error instead of the other message. - */ - private const MAX_PARSER_DEPTH = 5; - /** @var WANObjectCache */ private $wanCache; /** @var BagOStuff */ @@ -178,8 +158,8 @@ class MessageCache implements LoggerAwareInterface { private $languageFallback; /** @var HookRunner */ private $hookRunner; - /** @var ParserFactory */ - private $parserFactory; + /** @var MessageParser */ + private $messageParser; /** @var (string|callable)[]|null */ private $messageKeyOverrides; @@ -220,7 +200,7 @@ class MessageCache implements LoggerAwareInterface { * @param LanguageNameUtils $languageNameUtils * @param LanguageFallback $languageFallback * @param HookContainer $hookContainer - * @param ParserFactory $parserFactory + * @param MessageParser $messageParser */ public function __construct( WANObjectCache $wanCache, @@ -235,7 +215,7 @@ class MessageCache implements LoggerAwareInterface { LanguageNameUtils $languageNameUtils, LanguageFallback $languageFallback, HookContainer $hookContainer, - ParserFactory $parserFactory + MessageParser $messageParser ) { $this->wanCache = $wanCache; $this->clusterCache = $clusterCache; @@ -249,7 +229,7 @@ class MessageCache implements LoggerAwareInterface { $this->languageNameUtils = $languageNameUtils; $this->languageFallback = $languageFallback; $this->hookRunner = new HookRunner( $hookContainer ); - $this->parserFactory = $parserFactory; + $this->messageParser = $messageParser; // limit size $this->cache = new MapCacheLRU( self::MAX_REQUEST_LANGUAGES ); @@ -267,34 +247,6 @@ class MessageCache implements LoggerAwareInterface { } /** - * ParserOptions is lazily initialised. - * - * @return ParserOptions - */ - private function getParserOptions() { - if ( !$this->parserOptions ) { - $context = RequestContext::getMain(); - $user = $context->getUser(); - if ( !$user->isSafeToLoad() ) { - // It isn't safe to use the context user yet, so don't try to get a - // ParserOptions for it. And don't cache this ParserOptions - // either. - $po = ParserOptions::newFromAnon(); - $po->setAllowUnsafeRawHtml( false ); - return $po; - } - - $this->parserOptions = ParserOptions::newFromContext( $context ); - // Messages may take parameters that could come - // from malicious sources. As a precaution, disable - // the <html> parser tag when parsing messages. - $this->parserOptions->setAllowUnsafeRawHtml( false ); - } - - return $this->parserOptions; - } - - /** * Try to load the cache from APC. * * @param string $code Optional language code, see documentation of load(). @@ -1456,6 +1408,8 @@ class MessageCache implements LoggerAwareInterface { } /** + * @deprecated since 1.44 use MessageParser::transform() + * * @param string $message * @param bool $interface * @param Language|null $language @@ -1463,48 +1417,14 @@ class MessageCache implements LoggerAwareInterface { * @return string */ public function transform( $message, $interface = false, $language = null, ?PageReference $page = null ) { - // Avoid creating parser if nothing to transform - if ( !str_contains( $message, '{{' ) ) { - return $message; - } - - $popts = $this->getParserOptions(); - $popts->setInterfaceMessage( $interface ); - $popts->setTargetLanguage( $language ); - - $userlang = $popts->setUserLang( $language ); - try { - $this->curParser++; - $parser = $this->getParser(); - if ( !$parser ) { - return '<span class="error">Message transform depth limit exceeded</span>'; - } - $message = $parser->transformMsg( $message, $popts, $page ); - } finally { - $this->curParser--; - } - $popts->setUserLang( $userlang ); - - return $message; - } - - /** - * You should increment $this->curParser before calling this method and decrement it after - * to support recursive calls to message parsing. - */ - private function getParser(): ?Parser { - if ( $this->curParser >= self::MAX_PARSER_DEPTH ) { - $this->logger->debug( __METHOD__ . ": Refusing to create a new parser with index {$this->curParser}" ); - return null; - } - if ( !isset( $this->parsers[ $this->curParser ] ) ) { - $this->logger->debug( __METHOD__ . ": Creating a new parser with index {$this->curParser}" ); - $this->parsers[ $this->curParser ] = $this->parserFactory->create(); - } - return $this->parsers[ $this->curParser ]; + return $this->messageParser->transform( + $message, $interface, $language, $page ); } /** + * @deprecated since 1.44 use MessageParser::parse() + * @internal + * * @param string $text * @param PageReference $contextPage * @param bool $linestart Whether this should be parsed in start-of-line @@ -1513,7 +1433,6 @@ class MessageCache implements LoggerAwareInterface { * (defaults to false) * @param Language|StubUserLang|string|null $language Language code * @return ParserOutput - * @internal */ public function parseWithPostprocessing( string $text, PageReference $contextPage, @@ -1521,48 +1440,25 @@ class MessageCache implements LoggerAwareInterface { bool $interface = false, $language = null ): ParserOutput { - $options = [ - 'allowTOC' => false, - 'enableSectionEditLinks' => false, - // Wrapping messages in an extra <div> is probably not expected. If - // they're outside the content area they probably shouldn't be - // targeted by CSS that's targeting the parser output, and if - // they're inside they already are from the outer div. - 'unwrap' => true, - 'userLang' => $language, - ]; - // Parse $text to yield a ParserOutput - $po = $this->parse( $text, $contextPage, $linestart, $interface, $language ); - if ( is_string( $po ) ) { - $po = new ParserOutput( $po ); - } - // Run the post-processing pipeline - return MediaWikiServices::getInstance()->getDefaultOutputPipeline() - ->run( $po, $this->getParserOptions(), $options ); + return $this->messageParser->parse( + $text, $contextPage, $linestart, $interface, $language ); } /** + * @deprecated since 1.44 use MessageParser::parseWithoutPostprocessing() + * * @param string $text * @param PageReference|null $page * @param bool $linestart Whether this is at the start of a line * @param bool $interface Whether this is an interface message * @param Language|StubUserLang|string|null $language Language code - * @return ParserOutput|string + * @return ParserOutput */ - public function parse( $text, ?PageReference $page = null, $linestart = true, - $interface = false, $language = null + public function parse( $text, ?PageReference $page = null, + $linestart = true, $interface = false, $language = null ) { // phpcs:ignore MediaWiki.Usage.DeprecatedGlobalVariables.Deprecated$wgTitle global $wgTitle; - - $popts = $this->getParserOptions(); - $popts->setInterfaceMessage( $interface ); - - if ( is_string( $language ) ) { - $language = $this->langFactory->getLanguage( $language ); - } - $popts->setTargetLanguage( $language ); - if ( !$page ) { $logger = LoggerFactory::getInstance( 'GlobalTitleFail' ); $logger->info( @@ -1581,16 +1477,8 @@ class MessageCache implements LoggerAwareInterface { ); } - try { - $this->curParser++; - $parser = $this->getParser(); - if ( !$parser ) { - return '<span class="error">Message parse depth limit exceeded</span>'; - } - return $parser->parse( $text, $page, $popts, $linestart ); - } finally { - $this->curParser--; - } + return $this->messageParser->parseWithoutPostprocessing( + $text, $page, $linestart, $interface, $language ); } public function disable() { diff --git a/includes/language/MessageParser.php b/includes/language/MessageParser.php new file mode 100644 index 000000000000..8f595fbe8f23 --- /dev/null +++ b/includes/language/MessageParser.php @@ -0,0 +1,243 @@ +<?php + +namespace MediaWiki\Language; + +use MediaWiki\Context\RequestContext; +use MediaWiki\DAO\WikiAwareEntity; +use MediaWiki\Languages\LanguageFactory; +use MediaWiki\OutputTransform\OutputTransformPipeline; +use MediaWiki\Page\PageReference; +use MediaWiki\Page\PageReferenceValue; +use MediaWiki\Parser\Parser; +use MediaWiki\Parser\ParserFactory; +use MediaWiki\Parser\ParserOptions; +use MediaWiki\Parser\ParserOutput; +use MediaWiki\StubObject\StubUserLang; +use Psr\Log\LoggerInterface; + +/** + * Service for transformation of interface message text. + * + * @since 1.44 + */ +class MessageParser { + private const DEPTH_EXCEEDED_MESSAGE = + '<span class="error">Message parse depth limit exceeded</span>'; + + private ParserFactory $parserFactory; + private OutputTransformPipeline $outputPipeline; + private LanguageFactory $langFactory; + private LoggerInterface $logger; + + /** @var ParserOptions|null Lazy-initialised */ + private ?ParserOptions $parserOptions = null; + + /** @var Parser[] Cached Parser objects */ + private array $parsers = []; + /** @var int Index into $this->parsers for the active Parser */ + private int $curParser = -1; + + /** + * Parsing some messages may require parsing another message first, due to special page + * transclusion and some hooks (T372891). This constant is the limit of nesting depth where + * we'll display an error instead of the other message. + */ + private const MAX_PARSER_DEPTH = 5; + + public function __construct( + ParserFactory $parserFactory, + OutputTransformPipeline $outputPipeline, + LanguageFactory $languageFactory, + LoggerInterface $logger + ) { + $this->parserFactory = $parserFactory; + $this->outputPipeline = $outputPipeline; + $this->langFactory = $languageFactory; + $this->logger = $logger; + } + + private function getParserOptions(): ParserOptions { + if ( !$this->parserOptions ) { + $context = RequestContext::getMain(); + $user = $context->getUser(); + if ( !$user->isSafeToLoad() ) { + // It isn't safe to use the context user yet, so don't try to get a + // ParserOptions for it. And don't cache this ParserOptions + // either. + $po = ParserOptions::newFromAnon(); + $po->setAllowUnsafeRawHtml( false ); + return $po; + } + + $this->parserOptions = ParserOptions::newFromContext( $context ); + // Messages may take parameters that could come + // from malicious sources. As a precaution, disable + // the <html> parser tag when parsing messages. + $this->parserOptions->setAllowUnsafeRawHtml( false ); + } + + return $this->parserOptions; + } + + /** + * Run message text through the preprocessor, expanding parser functions + * + * @param string $message + * @param bool $interface + * @param Language|string|null $language + * @param PageReference|null $page + * @return string + */ + public function transform( + $message, + $interface = false, + $language = null, + ?PageReference $page = null + ) { + // Avoid creating parser if nothing to transform + if ( !str_contains( $message, '{{' ) ) { + return $message; + } + if ( is_string( $language ) ) { + $language = $this->langFactory->getLanguage( $language ); + } + + $popts = $this->getParserOptions(); + $popts->setInterfaceMessage( $interface ); + $popts->setTargetLanguage( $language ); + + if ( $language ) { + $oldUserLang = $popts->setUserLang( $language ); + } else { + $oldUserLang = null; + } + $page ??= $this->getPlaceholderTitle(); + + $parser = $this->acquireParser(); + if ( !$parser ) { + return self::DEPTH_EXCEEDED_MESSAGE; + } + try { + return $parser->transformMsg( $message, $popts, $page ); + } finally { + $this->releaseParser( $parser ); + if ( $oldUserLang ) { + $popts->setUserLang( $oldUserLang ); + } + } + } + + /** + * @param string $text + * @param ?PageReference $contextPage The context page, or null to use a placeholder + * @param bool $lineStart Whether this should be parsed in start-of-line context + * @param bool $interface Whether this is an interface message + * @param Language|StubUserLang|string|null $language Language code + * @return ParserOutput + */ + public function parse( + string $text, + ?PageReference $contextPage = null, + bool $lineStart = true, + bool $interface = false, + $language = null + ): ParserOutput { + $options = [ + 'allowTOC' => false, + 'enableSectionEditLinks' => false, + // Wrapping messages in an extra <div> is probably not expected. If + // they're outside the content area they probably shouldn't be + // targeted by CSS that's targeting the parser output, and if + // they're inside they already are from the outer div. + 'unwrap' => true, + 'userLang' => $language, + ]; + // Parse $text to yield a ParserOutput + $po = $this->parseWithoutPostprocessing( $text, $contextPage, $lineStart, $interface, $language ); + // Run the post-processing pipeline + return $this->outputPipeline->run( $po, $this->getParserOptions(), $options ); + } + + /** + * @param string $text + * @param ?PageReference $page The context title, or null to use a placeholder + * @param bool $lineStart Whether this is at the start of a line + * @param bool $interface Whether this is an interface message + * @param Language|StubUserLang|string|null $language Language code + * @return ParserOutput + */ + public function parseWithoutPostprocessing( + $text, + ?PageReference $page = null, + $lineStart = true, + $interface = false, + $language = null + ): ParserOutput { + $popts = $this->getParserOptions(); + $popts->setInterfaceMessage( $interface ); + + if ( is_string( $language ) ) { + $language = $this->langFactory->getLanguage( $language ); + } + $popts->setTargetLanguage( $language ); + + $page ??= $this->getPlaceholderTitle(); + + $parser = $this->acquireParser(); + if ( !$parser ) { + return new ParserOutput( self::DEPTH_EXCEEDED_MESSAGE ); + } + try { + return $parser->parse( $text, $page, $popts, $lineStart ); + } finally { + $this->releaseParser( $parser ); + } + } + + private function getPlaceholderTitle(): PageReference { + return new PageReferenceValue( + NS_SPECIAL, + 'Badtitle/MessageParser', + WikiAwareEntity::LOCAL + ); + } + + /** + * Attempt to get a free parser from the cache. If none exists, create one, + * up to a limit of MAX_PARSER_DEPTH. If the limit is exceeded, return null. + * + * If a parser is returned, it must be released with releaseParser(). + * + * @return Parser|null + */ + private function acquireParser(): ?Parser { + $index = $this->curParser + 1; + if ( $index >= self::MAX_PARSER_DEPTH ) { + $this->logger->debug( __METHOD__ . ": Refusing to create a new parser with index {$index}" ); + return null; + } + $parser = $this->parsers[ $index ] ?? null; + if ( !$parser ) { + $this->logger->debug( __METHOD__ . ": Creating a new parser with index {$index}" ); + $parser = $this->parserFactory->create(); + } + $this->parsers[ $index ] = $parser; + $this->curParser = $index; + return $parser; + } + + /** + * Release a parser previously acquired by acquireParser(). + * + * @param Parser $parser + */ + private function releaseParser( Parser $parser ) { + if ( $this->parsers[$this->curParser] !== $parser ) { + throw new \LogicException( 'releaseParser called with the wrong ' . + "parser instance: #{$this->curParser} = " . + gettype( $this->parsers[$this->curParser] ) ); + } + $this->curParser--; + } + +} |