diff options
Diffstat (limited to 'includes/language/LanguageNameUtils.php')
-rw-r--r-- | includes/language/LanguageNameUtils.php | 319 |
1 files changed, 319 insertions, 0 deletions
diff --git a/includes/language/LanguageNameUtils.php b/includes/language/LanguageNameUtils.php new file mode 100644 index 000000000000..08d9ab3e0d87 --- /dev/null +++ b/includes/language/LanguageNameUtils.php @@ -0,0 +1,319 @@ +<?php +/** + * Internationalisation code. + * See https://www.mediawiki.org/wiki/Special:MyLanguage/Localisation for more information. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Language + */ + +/** + * @defgroup Language Language + */ + +namespace MediaWiki\Languages; + +use HashBagOStuff; +use Hooks; +use MediaWiki\Config\ServiceOptions; +use MediaWikiTitleCodec; +use MWException; +use Wikimedia\Assert\Assert; + +/** + * @ingroup Language + * + * A service that provides utilities to do with language names and codes. + * + * @since 1.34 + */ +class LanguageNameUtils { + /** + * Return autonyms in getLanguageName(s). + */ + const AUTONYMS = null; + + /** + * Return all known languages in getLanguageName(s). + */ + const ALL = 'all'; + + /** + * Return in getLanguageName(s) only the languages that are defined by MediaWiki. + */ + const DEFINED = 'mw'; + + /** + * Return in getLanguageName(s) only the languages for which we have at least some localisation. + */ + const SUPPORTED = 'mwfile'; + + /** @var ServiceOptions */ + private $options; + + /** + * Cache for language names + * @var HashBagOStuff|null + */ + private $languageNameCache; + + /** + * Cache for validity of language codes + * @var array + */ + private $validCodeCache = []; + + public static $constructorOptions = [ + 'ExtraLanguageNames', + 'UsePigLatinVariant', + ]; + + /** + * @param ServiceOptions $options + */ + public function __construct( ServiceOptions $options ) { + $options->assertRequiredOptions( self::$constructorOptions ); + $this->options = $options; + } + + /** + * Checks whether any localisation is available for that language tag in MediaWiki + * (MessagesXx.php or xx.json exists). + * + * @param string $code Language tag (in lower case) + * @return bool Whether language is supported + */ + public function isSupportedLanguage( $code ) { + if ( !$this->isValidBuiltInCode( $code ) ) { + return false; + } + + if ( $code === 'qqq' ) { + // Special code for internal use, not supported even though there is a qqq.json + return false; + } + + return is_readable( $this->getMessagesFileName( $code ) ) || + is_readable( $this->getJsonMessagesFileName( $code ) ); + } + + /** + * Returns true if a language code string is of a valid form, whether or not it exists. This + * includes codes which are used solely for customisation via the MediaWiki namespace. + * + * @param string $code + * + * @return bool + */ + public function isValidCode( $code ) { + Assert::parameterType( 'string', $code, '$code' ); + if ( !isset( $this->validCodeCache[$code] ) ) { + // People think language codes are HTML-safe, so enforce it. Ideally we should only + // allow a-zA-Z0-9- but .+ and other chars are often used for {{int:}} hacks. See bugs + // T39564, T39587, T38938. + $this->validCodeCache[$code] = + // Protect against path traversal + strcspn( $code, ":/\\\000&<>'\"" ) === strlen( $code ) && + !preg_match( MediaWikiTitleCodec::getTitleInvalidRegex(), $code ); + } + return $this->validCodeCache[$code]; + } + + /** + * Returns true if a language code is of a valid form for the purposes of internal customisation + * of MediaWiki, via Messages*.php or *.json. + * + * @param string $code + * @return bool + */ + public function isValidBuiltInCode( $code ) { + Assert::parameterType( 'string', $code, '$code' ); + + return (bool)preg_match( '/^[a-z0-9-]{2,}$/', $code ); + } + + /** + * Returns true if a language code is an IETF tag known to MediaWiki. + * + * @param string $tag + * + * @return bool + */ + public function isKnownLanguageTag( $tag ) { + // Quick escape for invalid input to avoid exceptions down the line when code tries to + // process tags which are not valid at all. + if ( !$this->isValidBuiltInCode( $tag ) ) { + return false; + } + + if ( isset( Data\Names::$names[$tag] ) || $this->getLanguageName( $tag, $tag ) !== '' ) { + return true; + } + + return false; + } + + /** + * Get an array of language names, indexed by code. + * @param null|string $inLanguage Code of language in which to return the names + * Use self::AUTONYMS for autonyms (native names) + * @param string $include One of: + * self::ALL all available languages + * self::DEFINED only if the language is defined in MediaWiki or wgExtraLanguageNames + * (default) + * self::SUPPORTED only if the language is in self::DEFINED *and* has a message file + * @return array Language code => language name (sorted by key) + */ + public function getLanguageNames( $inLanguage = self::AUTONYMS, $include = self::DEFINED ) { + $cacheKey = $inLanguage === self::AUTONYMS ? 'null' : $inLanguage; + $cacheKey .= ":$include"; + if ( !$this->languageNameCache ) { + $this->languageNameCache = new HashBagOStuff( [ 'maxKeys' => 20 ] ); + } + + $ret = $this->languageNameCache->get( $cacheKey ); + if ( !$ret ) { + $ret = $this->getLanguageNamesUncached( $inLanguage, $include ); + $this->languageNameCache->set( $cacheKey, $ret ); + } + return $ret; + } + + /** + * Uncached helper for getLanguageNames + * @param null|string $inLanguage As getLanguageNames + * @param string $include As getLanguageNames + * @return array Language code => language name (sorted by key) + */ + private function getLanguageNamesUncached( $inLanguage, $include ) { + // If passed an invalid language code to use, fallback to en + if ( $inLanguage !== self::AUTONYMS && !$this->isValidCode( $inLanguage ) ) { + $inLanguage = 'en'; + } + + $names = []; + + if ( $inLanguage !== self::AUTONYMS ) { + # TODO: also include for self::AUTONYMS, when this code is more efficient + Hooks::run( 'LanguageGetTranslatedLanguageNames', [ &$names, $inLanguage ] ); + } + + $mwNames = $this->options->get( 'ExtraLanguageNames' ) + Data\Names::$names; + if ( $this->options->get( 'UsePigLatinVariant' ) ) { + // Pig Latin (for variant development) + $mwNames['en-x-piglatin'] = 'Igpay Atinlay'; + } + + foreach ( $mwNames as $mwCode => $mwName ) { + # - Prefer own MediaWiki native name when not using the hook + # - For other names just add if not added through the hook + if ( $mwCode === $inLanguage || !isset( $names[$mwCode] ) ) { + $names[$mwCode] = $mwName; + } + } + + if ( $include === self::ALL ) { + ksort( $names ); + return $names; + } + + $returnMw = []; + $coreCodes = array_keys( $mwNames ); + foreach ( $coreCodes as $coreCode ) { + $returnMw[$coreCode] = $names[$coreCode]; + } + + if ( $include === self::SUPPORTED ) { + $namesMwFile = []; + # We do this using a foreach over the codes instead of a directory loop so that messages + # files in extensions will work correctly. + foreach ( $returnMw as $code => $value ) { + if ( is_readable( $this->getMessagesFileName( $code ) ) || + is_readable( $this->getJsonMessagesFileName( $code ) ) + ) { + $namesMwFile[$code] = $names[$code]; + } + } + + ksort( $namesMwFile ); + return $namesMwFile; + } + + ksort( $returnMw ); + # self::DEFINED option; default if it's not one of the other two options + # (self::ALL/self::SUPPORTED) + return $returnMw; + } + + /** + * @param string $code The code of the language for which to get the name + * @param null|string $inLanguage Code of language in which to return the name (self::AUTONYMS + * for autonyms) + * @param string $include See getLanguageNames(), except this defaults to self::ALL instead of + * self::DEFINED + * @return string Language name or empty + * @since 1.20 + */ + public function getLanguageName( $code, $inLanguage = self::AUTONYMS, $include = self::ALL ) { + $code = strtolower( $code ); + $array = $this->getLanguageNames( $inLanguage, $include ); + return $array[$code] ?? ''; + } + + /** + * Get the name of a file for a certain language code + * @param string $prefix Prepend this to the filename + * @param string $code Language code + * @param string $suffix Append this to the filename + * @throws MWException + * @return string $prefix . $mangledCode . $suffix + */ + public function getFileName( $prefix, $code, $suffix = '.php' ) { + if ( !$this->isValidBuiltInCode( $code ) ) { + throw new MWException( "Invalid language code \"$code\"" ); + } + + return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix; + } + + /** + * @param string $code + * @return string + */ + public function getMessagesFileName( $code ) { + global $IP; + $file = $this->getFileName( "$IP/languages/messages/Messages", $code, '.php' ); + Hooks::run( 'Language::getMessagesFileName', [ $code, &$file ] ); + return $file; + } + + /** + * @param string $code + * @return string + * @throws MWException + */ + public function getJsonMessagesFileName( $code ) { + global $IP; + + if ( !$this->isValidBuiltInCode( $code ) ) { + throw new MWException( "Invalid language code \"$code\"" ); + } + + return "$IP/languages/i18n/$code.json"; + } +} |