diff options
author | Timo Tijhof <krinkle@fastmail.com> | 2022-06-22 23:18:27 +0100 |
---|---|---|
committer | Timo Tijhof <krinkle@fastmail.com> | 2022-06-28 16:27:20 -0700 |
commit | 051e127bdb4e76837ea0dcd66f36ba07ed23d808 (patch) | |
tree | 6a75ec85b93f17ab47006c2d7eb5f850a15478e8 /includes/language | |
parent | 7f3de1a582acd6bfa80ac0b7ce90f84efda31520 (diff) | |
download | mediawikicore-051e127bdb4e76837ea0dcd66f36ba07ed23d808.tar.gz mediawikicore-051e127bdb4e76837ea0dcd66f36ba07ed23d808.zip |
language: Move l10n-related classes from /cache to /language
* Make the code easier to own and maintain by being part of a topical
area of expertise rather than a generic catch-all bucket.
This also helps Gerrit team dashboards, [[mw:Git/Reviewers]]
subscription patterns, more meaningful component prefix for
[[mw:Commit_message_guidelines]].
* Prep for possible CODEOWNERS adoption.
* Prep for T166010 to give us better PSR-4 namespaces that we won't
have to break/migrate again shortly after for the above.
Bug: T225756
Change-Id: I62c701d574d2a79365b1c5350f92bef47770e3ce
Diffstat (limited to 'includes/language')
19 files changed, 3835 insertions, 0 deletions
diff --git a/includes/language/Hook/LocalisationCacheRecacheFallbackHook.php b/includes/language/Hook/LocalisationCacheRecacheFallbackHook.php new file mode 100644 index 000000000000..ca699108fa56 --- /dev/null +++ b/includes/language/Hook/LocalisationCacheRecacheFallbackHook.php @@ -0,0 +1,26 @@ +<?php + +namespace MediaWiki\Hook; + +/** + * This is a hook handler interface, see docs/Hooks.md. + * Use the hook name "LocalisationCacheRecacheFallback" to register handlers implementing this interface. + * + * @stable to implement + * @ingroup Hooks + */ +interface LocalisationCacheRecacheFallbackHook { + /** + * Called for each language when merging + * fallback data into the cache. + * + * @since 1.35 + * + * @param ?mixed $cache The LocalisationCache object + * @param ?mixed $code language code + * @param ?mixed &$alldata The localisation data from core and extensions. Note some keys may + * be omitted if they won't be merged into the final result. + * @return bool|void True or no return value to continue or false to abort + */ + public function onLocalisationCacheRecacheFallback( $cache, $code, &$alldata ); +} diff --git a/includes/language/Hook/LocalisationCacheRecacheHook.php b/includes/language/Hook/LocalisationCacheRecacheHook.php new file mode 100644 index 000000000000..c489694f9907 --- /dev/null +++ b/includes/language/Hook/LocalisationCacheRecacheHook.php @@ -0,0 +1,26 @@ +<?php + +namespace MediaWiki\Hook; + +/** + * This is a hook handler interface, see docs/Hooks.md. + * Use the hook name "LocalisationCacheRecache" to register handlers implementing this interface. + * + * @stable to implement + * @ingroup Hooks + */ +interface LocalisationCacheRecacheHook { + /** + * Called when loading the localisation data into + * cache. + * + * @since 1.35 + * + * @param ?mixed $cache The LocalisationCache object + * @param ?mixed $code language code + * @param ?mixed &$alldata The localisation data from core and extensions + * @param ?mixed $unused Used to be $purgeBlobs, removed in 1.34 + * @return bool|void True or no return value to continue or false to abort + */ + public function onLocalisationCacheRecache( $cache, $code, &$alldata, $unused ); +} diff --git a/includes/language/Hook/MessageCacheReplaceHook.php b/includes/language/Hook/MessageCacheReplaceHook.php new file mode 100644 index 000000000000..7d555320fd46 --- /dev/null +++ b/includes/language/Hook/MessageCacheReplaceHook.php @@ -0,0 +1,25 @@ +<?php + +namespace MediaWiki\Cache\Hook; + +use Title; + +/** + * This is a hook handler interface, see docs/Hooks.md. + * Use the hook name "MessageCacheReplace" to register handlers implementing this interface. + * + * @stable to implement + * @ingroup Hooks + */ +interface MessageCacheReplaceHook { + /** + * This hook is called when a message page is changed. Use this hook to update caches. + * + * @since 1.35 + * + * @param Title $title Name of the page changed + * @param string $text New contents of the page + * @return bool|void True or no return value to continue or false to abort + */ + public function onMessageCacheReplace( $title, $text ); +} diff --git a/includes/language/Hook/MessageCache__getHook.php b/includes/language/Hook/MessageCache__getHook.php new file mode 100644 index 000000000000..aad00a1af688 --- /dev/null +++ b/includes/language/Hook/MessageCache__getHook.php @@ -0,0 +1,26 @@ +<?php + +namespace MediaWiki\Cache\Hook; + +// phpcs:disable Squiz.Classes.ValidClassName.NotCamelCaps +/** + * This is a hook handler interface, see docs/Hooks.md. + * Use the hook name "MessageCache::get" to register handlers implementing this interface. + * + * @stable to implement + * @ingroup Hooks + */ +interface MessageCache__getHook { + /** + * This hook is called when fetching a message. Use this hook to override the key + * for customisations. Given and returned message key must be formatted with: + * 1) the first letter in lower case according to the content language + * 2) spaces replaced with underscores + * + * @since 1.35 + * + * @param string &$key Message key + * @return bool|void True or no return value to continue or false to abort + */ + public function onMessageCache__get( &$key ); +} diff --git a/includes/language/Hook/MessagesPreLoadHook.php b/includes/language/Hook/MessagesPreLoadHook.php new file mode 100644 index 000000000000..e020f1d2b3db --- /dev/null +++ b/includes/language/Hook/MessagesPreLoadHook.php @@ -0,0 +1,24 @@ +<?php + +namespace MediaWiki\Cache\Hook; + +/** + * This is a hook handler interface, see docs/Hooks.md. + * Use the hook name "MessagesPreLoad" to register handlers implementing this interface. + * + * @stable to implement + * @ingroup Hooks + */ +interface MessagesPreLoadHook { + /** + * This hook is called when loading a message from the database. + * + * @since 1.35 + * + * @param string $title Title of the message + * @param string &$message Message you want to define + * @param string $code Language code + * @return bool|void True or no return value to continue or false to abort + */ + public function onMessagesPreLoad( $title, &$message, $code ); +} diff --git a/includes/language/LCStore.php b/includes/language/LCStore.php new file mode 100644 index 000000000000..dbd996e537c7 --- /dev/null +++ b/includes/language/LCStore.php @@ -0,0 +1,66 @@ +<?php +/** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +/** + * Interface for the persistence layer of LocalisationCache. + * + * The persistence layer is two-level hierarchical cache. The first level + * is the language, the second level is the item or subitem. + * + * Since the data for a whole language is rebuilt in one operation, it needs + * to have a fast and atomic method for deleting or replacing all of the + * current data for a given language. The interface reflects this bulk update + * operation. Callers writing to the cache must first call startWrite(), then + * will call set() a couple of thousand times, then will call finishWrite() + * to commit the operation. When finishWrite() is called, the cache is + * expected to delete all data previously stored for that language. + * + * The values stored are PHP variables suitable for serialize(). Implementations + * of LCStore are responsible for serializing and unserializing. + */ +interface LCStore { + + /** + * Get a value. + * @param string $code Language code + * @param string $key Cache key + */ + public function get( $code, $key ); + + /** + * Start a write transaction. + * @param string $code Language code + */ + public function startWrite( $code ); + + /** + * Finish a write transaction. + */ + public function finishWrite(); + + /** + * Set a key to a given value. startWrite() must be called before this + * is called, and finishWrite() must be called afterwards. + * @param string $key + * @param mixed $value + */ + public function set( $key, $value ); + +} diff --git a/includes/language/LCStoreCDB.php b/includes/language/LCStoreCDB.php new file mode 100644 index 000000000000..e9cac7c08ba9 --- /dev/null +++ b/includes/language/LCStoreCDB.php @@ -0,0 +1,134 @@ +<?php +/** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ +use Cdb\Exception as CdbException; +use Cdb\Reader; +use Cdb\Writer; + +/** + * LCStore implementation which stores data as a collection of CDB files. + * + * Profiling indicates that on Linux, this implementation outperforms MySQL if + * the directory is on a local filesystem and there is ample kernel cache + * space. The performance advantage is greater when the DBA extension is + * available than it is with the PHP port. + * + * See Cdb.php and https://cr.yp.to/cdb.html + */ +class LCStoreCDB implements LCStore { + + /** @var Reader[]|false[] */ + private $readers; + + /** @var Writer|null */ + private $writer; + + /** @var string|null Current language code */ + private $currentLang; + + /** @var string Cache directory */ + private $directory; + + public function __construct( $conf = [] ) { + $this->directory = $conf['directory']; + } + + public function get( $code, $key ) { + if ( !isset( $this->readers[$code] ) ) { + $fileName = $this->getFileName( $code ); + + $this->readers[$code] = false; + if ( is_file( $fileName ) ) { + try { + $this->readers[$code] = Reader::open( $fileName ); + } catch ( CdbException $e ) { + wfDebug( __METHOD__ . ": unable to open cdb file for reading" ); + } + } + } + + if ( !$this->readers[$code] ) { + return null; + } else { + $value = false; + try { + $value = $this->readers[$code]->get( $key ); + } catch ( CdbException $e ) { + wfDebug( __METHOD__ . ": \Cdb\Exception caught, error message was " + . $e->getMessage() ); + } + if ( $value === false ) { + return null; + } + + return unserialize( $value ); + } + } + + public function startWrite( $code ) { + if ( !is_dir( $this->directory ) && !wfMkdirParents( $this->directory, null, __METHOD__ ) ) { + throw new MWException( "Unable to create the localisation store " . + "directory \"{$this->directory}\"" ); + } + + // Close reader to stop permission errors on write + if ( !empty( $this->readers[$code] ) ) { + $this->readers[$code]->close(); + } + + try { + $this->writer = Writer::open( $this->getFileName( $code ) ); + } catch ( CdbException $e ) { + throw new MWException( $e->getMessage() ); + } + $this->currentLang = $code; + } + + public function finishWrite() { + // Close the writer + try { + $this->writer->close(); + } catch ( CdbException $e ) { + throw new MWException( $e->getMessage() ); + } + $this->writer = null; + unset( $this->readers[$this->currentLang] ); + $this->currentLang = null; + } + + public function set( $key, $value ) { + if ( $this->writer === null ) { + throw new MWException( __CLASS__ . ': must call startWrite() before calling set()' ); + } + try { + $this->writer->set( $key, serialize( $value ) ); + } catch ( CdbException $e ) { + throw new MWException( $e->getMessage() ); + } + } + + protected function getFileName( $code ) { + if ( strval( $code ) === '' || strpos( $code, '/' ) !== false ) { + throw new MWException( __METHOD__ . ": Invalid language \"$code\"" ); + } + + return "{$this->directory}/l10n_cache-$code.cdb"; + } + +} diff --git a/includes/language/LCStoreDB.php b/includes/language/LCStoreDB.php new file mode 100644 index 000000000000..d6a9866bb711 --- /dev/null +++ b/includes/language/LCStoreDB.php @@ -0,0 +1,147 @@ +<?php +/** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +use Wikimedia\Rdbms\Database; +use Wikimedia\Rdbms\DBQueryError; +use Wikimedia\Rdbms\IDatabase; +use Wikimedia\ScopedCallback; + +/** + * LCStore implementation which uses the standard DB functions to store data. + * This will work on any MediaWiki installation. + */ +class LCStoreDB implements LCStore { + /** @var string|null Language code */ + private $code; + /** @var array Server configuration map */ + private $server; + + /** @var array Rows buffered for insertion */ + private $batch = []; + + /** @var IDatabase|null */ + private $dbw; + /** @var bool Whether a batch of writes were recently written */ + private $writesDone = false; + /** @var bool Whether the DB is read-only or otherwise unavailable for writes */ + private $readOnly = false; + + public function __construct( $params ) { + $this->server = $params['server'] ?? []; + } + + public function get( $code, $key ) { + if ( $this->server || $this->writesDone ) { + // If a server configuration map is specified, always used that connection + // for reads and writes. Otherwise, if writes occurred in finishWrite(), make + // sure those changes are always visible. + $db = $this->getWriteConnection(); + } else { + $db = wfGetDB( DB_REPLICA ); + } + + $value = $db->selectField( + 'l10n_cache', + 'lc_value', + [ 'lc_lang' => $code, 'lc_key' => $key ], + __METHOD__ + ); + + return ( $value !== false ) ? unserialize( $db->decodeBlob( $value ) ) : null; + } + + public function startWrite( $code ) { + if ( $this->readOnly ) { + return; + } elseif ( !$code ) { + throw new MWException( __METHOD__ . ": Invalid language \"$code\"" ); + } + + $dbw = $this->getWriteConnection(); + $this->readOnly = $dbw->isReadOnly(); + + $this->code = $code; + $this->batch = []; + } + + public function finishWrite() { + if ( $this->readOnly ) { + return; + } elseif ( $this->code === null ) { + throw new MWException( __CLASS__ . ': must call startWrite() before finishWrite()' ); + } + + $scope = Profiler::instance()->getTransactionProfiler()->silenceForScope(); + $dbw = $this->getWriteConnection(); + $dbw->startAtomic( __METHOD__ ); + try { + $dbw->delete( 'l10n_cache', [ 'lc_lang' => $this->code ], __METHOD__ ); + foreach ( array_chunk( $this->batch, 500 ) as $rows ) { + $dbw->insert( 'l10n_cache', $rows, __METHOD__ ); + } + $this->writesDone = true; + } catch ( DBQueryError $e ) { + if ( $dbw->wasReadOnlyError() ) { + $this->readOnly = true; // just avoid site down time + } else { + throw $e; + } + } + $dbw->endAtomic( __METHOD__ ); + ScopedCallback::consume( $scope ); + + $this->code = null; + $this->batch = []; + } + + public function set( $key, $value ) { + if ( $this->readOnly ) { + return; + } elseif ( $this->code === null ) { + throw new MWException( __CLASS__ . ': must call startWrite() before set()' ); + } + + $dbw = $this->getWriteConnection(); + + $this->batch[] = [ + 'lc_lang' => $this->code, + 'lc_key' => $key, + 'lc_value' => $dbw->encodeBlob( serialize( $value ) ) + ]; + } + + /** + * @return IDatabase + */ + private function getWriteConnection() { + if ( !$this->dbw ) { + if ( $this->server ) { + $this->dbw = Database::factory( $this->server['type'], $this->server ); + if ( !$this->dbw ) { + throw new MWException( __CLASS__ . ': failed to obtain a DB connection' ); + } + } else { + $this->dbw = wfGetDB( DB_PRIMARY ); + } + } + + return $this->dbw; + } +} diff --git a/includes/language/LCStoreNull.php b/includes/language/LCStoreNull.php new file mode 100644 index 000000000000..62f88ebf2d73 --- /dev/null +++ b/includes/language/LCStoreNull.php @@ -0,0 +1,39 @@ +<?php +/** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +/** + * Null store backend, used to avoid DB errors during install + */ +class LCStoreNull implements LCStore { + + public function get( $code, $key ) { + return null; + } + + public function startWrite( $code ) { + } + + public function finishWrite() { + } + + public function set( $key, $value ) { + } + +} diff --git a/includes/language/LCStoreStaticArray.php b/includes/language/LCStoreStaticArray.php new file mode 100644 index 000000000000..ca7c997d1bcd --- /dev/null +++ b/includes/language/LCStoreStaticArray.php @@ -0,0 +1,166 @@ +<?php +/** + * Localisation cache storage based on PHP files and static arrays. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +use Wikimedia\StaticArrayWriter; + +/** + * @since 1.26 + */ +class LCStoreStaticArray implements LCStore { + /** @var string|null Current language code. */ + private $currentLang = null; + + /** @var array Localisation data. */ + private $data = []; + + /** @var string|null File name. */ + private $fname = null; + + /** @var string Directory for cache files. */ + private $directory; + + public function __construct( $conf = [] ) { + $this->directory = $conf['directory']; + } + + public function startWrite( $code ) { + if ( !is_dir( $this->directory ) && !wfMkdirParents( $this->directory, null, __METHOD__ ) ) { + throw new MWException( "Unable to create the localisation store " . + "directory \"{$this->directory}\"" ); + } + + $this->currentLang = $code; + $this->fname = $this->directory . '/' . $code . '.l10n.php'; + $this->data[$code] = []; + if ( is_file( $this->fname ) ) { + $this->data[$code] = require $this->fname; + } + } + + public function set( $key, $value ) { + $this->data[$this->currentLang][$key] = self::encode( $value ); + } + + /** + * Determine whether this array contains only scalar values. + * + * @param array $arr + * @return bool + */ + private static function isValueArray( array $arr ) { + foreach ( $arr as $key => $value ) { + if ( is_scalar( $value ) + || $value === null + || ( is_array( $value ) && self::isValueArray( $value ) ) + ) { + continue; + } + return false; + } + return true; + } + + /** + * Encodes a value into an array format + * + * @param mixed $value + * @return array|mixed + * @throws RuntimeException + */ + public static function encode( $value ) { + if ( is_array( $value ) && self::isValueArray( $value ) ) { + // Type: scalar [v]alue. + // Optimization: Write large arrays as one value to avoid recursive decoding cost. + return [ 'v', $value ]; + } + if ( is_array( $value ) || is_object( $value ) ) { + // Type: [s]serialized. + // Optimization: Avoid recursive decoding cost. Write arrays with an objects + // as one serialised value. + return [ 's', serialize( $value ) ]; + } + if ( is_scalar( $value ) || $value === null ) { + // Optimization: Reduce file size by not wrapping scalar values. + return $value; + } + + throw new RuntimeException( 'Cannot encode ' . var_export( $value, true ) ); + } + + /** + * Decode something that was encoded with encode + * + * @param mixed $encoded + * @return array|mixed + * @throws RuntimeException + */ + public static function decode( $encoded ) { + if ( !is_array( $encoded ) ) { + // Unwrapped scalar value + return $encoded; + } + + list( $type, $data ) = $encoded; + + switch ( $type ) { + case 'v': + // Value array (1.35+) or unwrapped scalar value (1.32 and earlier) + return $data; + case 's': + return unserialize( $data ); + case 'a': + // Support: MediaWiki 1.34 and earlier (older file format) + return array_map( [ __CLASS__, 'decode' ], $data ); + default: + throw new RuntimeException( + 'Unable to decode ' . var_export( $encoded, true ) ); + } + } + + public function finishWrite() { + $writer = new StaticArrayWriter(); + $out = $writer->create( + $this->data[$this->currentLang], + 'Generated by LCStoreStaticArray.php -- do not edit!' + ); + file_put_contents( $this->fname, $out ); + // Release the data to manage the memory in rebuildLocalisationCache + unset( $this->data[$this->currentLang] ); + $this->currentLang = null; + $this->fname = null; + } + + public function get( $code, $key ) { + if ( !array_key_exists( $code, $this->data ) ) { + $fname = $this->directory . '/' . $code . '.l10n.php'; + if ( !is_file( $fname ) ) { + return null; + } + $this->data[$code] = require $fname; + } + $data = $this->data[$code]; + if ( array_key_exists( $key, $data ) ) { + return self::decode( $data[$key] ); + } + return null; + } +} diff --git a/includes/language/LocalisationCache.php b/includes/language/LocalisationCache.php new file mode 100644 index 000000000000..92615d9867d9 --- /dev/null +++ b/includes/language/LocalisationCache.php @@ -0,0 +1,1125 @@ +<?php +/** + * Cache of the contents of localisation files. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +use CLDRPluralRuleParser\Error as CLDRPluralRuleError; +use CLDRPluralRuleParser\Evaluator; +use MediaWiki\Config\ServiceOptions; +use MediaWiki\HookContainer\HookContainer; +use MediaWiki\HookContainer\HookRunner; +use MediaWiki\Languages\LanguageNameUtils; +use MediaWiki\MainConfigNames; +use Psr\Log\LoggerInterface; + +/** + * Class for caching the contents of localisation files, Messages*.php + * and *.i18n.php. + * + * An instance of this class is available using MediaWikiServices. + * + * The values retrieved from here are merged, containing items from extension + * files, core messages files and the language fallback sequence (e.g. zh-cn -> + * zh-hans -> en ). Some common errors are corrected, for example namespace + * names with spaces instead of underscores, but heavyweight processing, such + * as grammatical transformation, is done by the caller. + */ +class LocalisationCache { + public const VERSION = 4; + + /** @var ServiceOptions */ + private $options; + + /** + * True if recaching should only be done on an explicit call to recache(). + * Setting this reduces the overhead of cache freshness checking, which + * requires doing a stat() for every extension i18n file. + */ + private $manualRecache; + + /** + * The cache data. 3-d array, where the first key is the language code, + * the second key is the item key e.g. 'messages', and the third key is + * an item specific subkey index. Some items are not arrays and so for those + * items, there are no subkeys. + */ + protected $data = []; + + /** + * The persistent store object. An instance of LCStore. + * + * @var LCStore + */ + private $store; + + /** + * @var LoggerInterface + */ + private $logger; + + /** @var HookRunner */ + private $hookRunner; + + /** @var callable[] See comment for parameter in constructor */ + private $clearStoreCallbacks; + + /** @var LanguageNameUtils */ + private $langNameUtils; + + /** + * A 2-d associative array, code/key, where presence indicates that the item + * is loaded. Value arbitrary. + * + * For split items, if set, this indicates that all of the subitems have been + * loaded. + * + */ + private $loadedItems = []; + + /** + * A 3-d associative array, code/key/subkey, where presence indicates that + * the subitem is loaded. Only used for the split items, i.e. messages. + */ + private $loadedSubitems = []; + + /** + * An array where presence of a key indicates that that language has been + * initialised. Initialisation includes checking for cache expiry and doing + * any necessary updates. + */ + private $initialisedLangs = []; + + /** + * An array mapping non-existent pseudo-languages to fallback languages. This + * is filled by initShallowFallback() when data is requested from a language + * that lacks a Messages*.php file. + */ + private $shallowFallbacks = []; + + /** + * An array where the keys are codes that have been recached by this instance. + */ + private $recachedLangs = []; + + /** + * All item keys + */ + public static $allKeys = [ + 'fallback', 'namespaceNames', 'bookstoreList', + 'magicWords', 'messages', 'rtl', + 'digitTransformTable', 'separatorTransformTable', + 'minimumGroupingDigits', 'fallback8bitEncoding', + 'linkPrefixExtension', 'linkTrail', 'linkPrefixCharset', + 'namespaceAliases', 'dateFormats', 'datePreferences', + 'datePreferenceMigrationMap', 'defaultDateFormat', + 'specialPageAliases', 'imageFiles', 'preloadedMessages', + 'namespaceGenderAliases', 'digitGroupingPattern', 'pluralRules', + 'pluralRuleTypes', 'compiledPluralRules', + ]; + + /** + * Keys for items which consist of associative arrays, which may be merged + * by a fallback sequence. + */ + public static $mergeableMapKeys = [ 'messages', 'namespaceNames', + 'namespaceAliases', 'dateFormats', 'imageFiles', 'preloadedMessages' + ]; + + /** + * Keys for items which are a numbered array. + */ + public static $mergeableListKeys = []; + + /** + * Keys for items which contain an array of arrays of equivalent aliases + * for each subitem. The aliases may be merged by a fallback sequence. + */ + public static $mergeableAliasListKeys = [ 'specialPageAliases' ]; + + /** + * Keys for items which contain an associative array, and may be merged if + * the primary value contains the special array key "inherit". That array + * key is removed after the first merge. + */ + public static $optionalMergeKeys = [ 'bookstoreList' ]; + + /** + * Keys for items that are formatted like $magicWords + */ + public static $magicWordKeys = [ 'magicWords' ]; + + /** + * Keys for items where the subitems are stored in the backend separately. + */ + public static $splitKeys = [ 'messages' ]; + + /** + * Keys which are loaded automatically by initLanguage() + */ + public static $preloadedKeys = [ 'dateFormats', 'namespaceNames' ]; + + /** + * Associative array of cached plural rules. The key is the language code, + * the value is an array of plural rules for that language. + */ + private $pluralRules = null; + + /** + * Associative array of cached plural rule types. The key is the language + * code, the value is an array of plural rule types for that language. For + * example, $pluralRuleTypes['ar'] = ['zero', 'one', 'two', 'few', 'many']. + * The index for each rule type matches the index for the rule in + * $pluralRules, thus allowing correlation between the two. The reason we + * don't just use the type names as the keys in $pluralRules is because + * Language::convertPlural applies the rules based on numeric order (or + * explicit numeric parameter), not based on the name of the rule type. For + * example, {{plural:count|wordform1|wordform2|wordform3}}, rather than + * {{plural:count|one=wordform1|two=wordform2|many=wordform3}}. + */ + private $pluralRuleTypes = null; + + private $mergeableKeys = null; + + /** + * Return a suitable LCStore as specified by the given configuration. + * + * @since 1.34 + * @param array $conf In the format of $wgLocalisationCacheConf + * @param string|false|null $fallbackCacheDir In case 'storeDirectory' isn't specified + * @return LCStore + */ + public static function getStoreFromConf( array $conf, $fallbackCacheDir ): LCStore { + $storeArg = []; + $storeArg['directory'] = + $conf['storeDirectory'] ?: $fallbackCacheDir; + + if ( !empty( $conf['storeClass'] ) ) { + $storeClass = $conf['storeClass']; + } elseif ( $conf['store'] === 'files' || $conf['store'] === 'file' || + ( $conf['store'] === 'detect' && $storeArg['directory'] ) + ) { + $storeClass = LCStoreCDB::class; + } elseif ( $conf['store'] === 'db' || $conf['store'] === 'detect' ) { + $storeClass = LCStoreDB::class; + $storeArg['server'] = $conf['storeServer'] ?? []; + } elseif ( $conf['store'] === 'array' ) { + $storeClass = LCStoreStaticArray::class; + } else { + throw new MWException( + 'Please set $wgLocalisationCacheConf[\'store\'] to something sensible.' + ); + } + + return new $storeClass( $storeArg ); + } + + /** + * @internal For use by ServiceWiring + */ + public const CONSTRUCTOR_OPTIONS = [ + // True to treat all files as expired until they are regenerated by this object. + 'forceRecache', + 'manualRecache', + MainConfigNames::ExtensionMessagesFiles, + MainConfigNames::MessagesDirs, + ]; + + /** + * For constructor parameters, see the documentation for the LocalisationCacheConf + * setting in docs/Configuration.md. + * + * Do not construct this directly. Use MediaWikiServices. + * + * @param ServiceOptions $options + * @param LCStore $store What backend to use for storage + * @param LoggerInterface $logger + * @param callable[] $clearStoreCallbacks To be called whenever the cache is cleared. Can be + * used to clear other caches that depend on this one, such as ResourceLoader's + * MessageBlobStore. + * @param LanguageNameUtils $langNameUtils + * @param HookContainer $hookContainer + * @throws MWException + */ + public function __construct( + ServiceOptions $options, + LCStore $store, + LoggerInterface $logger, + array $clearStoreCallbacks, + LanguageNameUtils $langNameUtils, + HookContainer $hookContainer + ) { + $options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS ); + + $this->options = $options; + $this->store = $store; + $this->logger = $logger; + $this->clearStoreCallbacks = $clearStoreCallbacks; + $this->langNameUtils = $langNameUtils; + $this->hookRunner = new HookRunner( $hookContainer ); + + // Keep this separate from $this->options so it can be mutable + $this->manualRecache = $options->get( 'manualRecache' ); + } + + /** + * Returns true if the given key is mergeable, that is, if it is an associative + * array which can be merged through a fallback sequence. + * @param string $key + * @return bool + */ + public function isMergeableKey( $key ) { + if ( $this->mergeableKeys === null ) { + $this->mergeableKeys = array_fill_keys( array_merge( + self::$mergeableMapKeys, + self::$mergeableListKeys, + self::$mergeableAliasListKeys, + self::$optionalMergeKeys, + self::$magicWordKeys + ), true ); + } + + return isset( $this->mergeableKeys[$key] ); + } + + /** + * Get a cache item. + * + * Warning: this may be slow for split items (messages), since it will + * need to fetch all of the subitems from the cache individually. + * @param string $code + * @param string $key + * @return mixed + */ + public function getItem( $code, $key ) { + if ( !isset( $this->loadedItems[$code][$key] ) ) { + $this->loadItem( $code, $key ); + } + + if ( $key === 'fallback' && isset( $this->shallowFallbacks[$code] ) ) { + return $this->shallowFallbacks[$code]; + } + + return $this->data[$code][$key]; + } + + /** + * Get a subitem, for instance a single message for a given language. + * @param string $code + * @param string $key + * @param string $subkey + * @return mixed|null + */ + public function getSubitem( $code, $key, $subkey ) { + if ( !isset( $this->loadedSubitems[$code][$key][$subkey] ) && + !isset( $this->loadedItems[$code][$key] ) + ) { + $this->loadSubitem( $code, $key, $subkey ); + } + + return $this->data[$code][$key][$subkey] ?? null; + } + + /** + * Get the list of subitem keys for a given item. + * + * This is faster than array_keys($lc->getItem(...)) for the items listed in + * self::$splitKeys. + * + * Will return null if the item is not found, or false if the item is not an + * array. + * @param string $code + * @param string $key + * @return bool|null|string|string[] + */ + public function getSubitemList( $code, $key ) { + if ( in_array( $key, self::$splitKeys ) ) { + return $this->getSubitem( $code, 'list', $key ); + } else { + $item = $this->getItem( $code, $key ); + if ( is_array( $item ) ) { + return array_keys( $item ); + } else { + return false; + } + } + } + + /** + * Load an item into the cache. + * @param string $code + * @param string $key + */ + protected function loadItem( $code, $key ) { + if ( !isset( $this->initialisedLangs[$code] ) ) { + $this->initLanguage( $code ); + } + + // Check to see if initLanguage() loaded it for us + if ( isset( $this->loadedItems[$code][$key] ) ) { + return; + } + + if ( isset( $this->shallowFallbacks[$code] ) ) { + $this->loadItem( $this->shallowFallbacks[$code], $key ); + + return; + } + + if ( in_array( $key, self::$splitKeys ) ) { + $subkeyList = $this->getSubitem( $code, 'list', $key ); + foreach ( $subkeyList as $subkey ) { + if ( isset( $this->data[$code][$key][$subkey] ) ) { + continue; + } + $this->data[$code][$key][$subkey] = $this->getSubitem( $code, $key, $subkey ); + } + } else { + $this->data[$code][$key] = $this->store->get( $code, $key ); + } + + $this->loadedItems[$code][$key] = true; + } + + /** + * Load a subitem into the cache + * @param string $code + * @param string $key + * @param string $subkey + */ + protected function loadSubitem( $code, $key, $subkey ) { + if ( !in_array( $key, self::$splitKeys ) ) { + $this->loadItem( $code, $key ); + + return; + } + + if ( !isset( $this->initialisedLangs[$code] ) ) { + $this->initLanguage( $code ); + } + + // Check to see if initLanguage() loaded it for us + if ( isset( $this->loadedItems[$code][$key] ) || + isset( $this->loadedSubitems[$code][$key][$subkey] ) + ) { + return; + } + + if ( isset( $this->shallowFallbacks[$code] ) ) { + $this->loadSubitem( $this->shallowFallbacks[$code], $key, $subkey ); + + return; + } + + $value = $this->store->get( $code, "$key:$subkey" ); + $this->data[$code][$key][$subkey] = $value; + $this->loadedSubitems[$code][$key][$subkey] = true; + } + + /** + * Returns true if the cache identified by $code is missing or expired. + * + * @param string $code + * + * @return bool + */ + public function isExpired( $code ) { + if ( $this->options->get( 'forceRecache' ) && !isset( $this->recachedLangs[$code] ) ) { + $this->logger->debug( __METHOD__ . "($code): forced reload" ); + + return true; + } + + $deps = $this->store->get( $code, 'deps' ); + $keys = $this->store->get( $code, 'list' ); + $preload = $this->store->get( $code, 'preload' ); + // Different keys may expire separately for some stores + if ( $deps === null || $keys === null || $preload === null ) { + $this->logger->debug( __METHOD__ . "($code): cache missing, need to make one" ); + + return true; + } + + foreach ( $deps as $dep ) { + // Because we're unserializing stuff from cache, we + // could receive objects of classes that don't exist + // anymore (e.g. uninstalled extensions) + // When this happens, always expire the cache + if ( !$dep instanceof CacheDependency || $dep->isExpired() ) { + $this->logger->debug( __METHOD__ . "($code): cache for $code expired due to " . + get_class( $dep ) ); + + return true; + } + } + + return false; + } + + /** + * Initialise a language in this object. Rebuild the cache if necessary. + * @param string $code + * @throws MWException + */ + protected function initLanguage( $code ) { + if ( isset( $this->initialisedLangs[$code] ) ) { + return; + } + + $this->initialisedLangs[$code] = true; + + # If the code is of the wrong form for a Messages*.php file, do a shallow fallback + if ( !$this->langNameUtils->isValidBuiltInCode( $code ) ) { + $this->initShallowFallback( $code, 'en' ); + + return; + } + + # Recache the data if necessary + if ( !$this->manualRecache && $this->isExpired( $code ) ) { + if ( $this->langNameUtils->isSupportedLanguage( $code ) ) { + $this->recache( $code ); + } elseif ( $code === 'en' ) { + throw new MWException( 'MessagesEn.php is missing.' ); + } else { + $this->initShallowFallback( $code, 'en' ); + } + + return; + } + + # Preload some stuff + $preload = $this->getItem( $code, 'preload' ); + if ( $preload === null ) { + if ( $this->manualRecache ) { + // No Messages*.php file. Do shallow fallback to en. + if ( $code === 'en' ) { + throw new MWException( 'No localisation cache found for English. ' . + 'Please run maintenance/rebuildLocalisationCache.php.' ); + } + $this->initShallowFallback( $code, 'en' ); + + return; + } else { + throw new MWException( 'Invalid or missing localisation cache.' ); + } + } + $this->data[$code] = $preload; + foreach ( $preload as $key => $item ) { + if ( in_array( $key, self::$splitKeys ) ) { + foreach ( $item as $subkey => $subitem ) { + $this->loadedSubitems[$code][$key][$subkey] = true; + } + } else { + $this->loadedItems[$code][$key] = true; + } + } + } + + /** + * Create a fallback from one language to another, without creating a + * complete persistent cache. + * @param string $primaryCode + * @param string $fallbackCode + */ + public function initShallowFallback( $primaryCode, $fallbackCode ) { + $this->data[$primaryCode] =& $this->data[$fallbackCode]; + $this->loadedItems[$primaryCode] =& $this->loadedItems[$fallbackCode]; + $this->loadedSubitems[$primaryCode] =& $this->loadedSubitems[$fallbackCode]; + $this->shallowFallbacks[$primaryCode] = $fallbackCode; + } + + /** + * Read a PHP file containing localisation data. + * @param string $_fileName + * @param string $_fileType + * @throws MWException + * @return array + */ + protected function readPHPFile( $_fileName, $_fileType ) { + include $_fileName; + + $data = []; + if ( $_fileType == 'core' || $_fileType == 'extension' ) { + foreach ( self::$allKeys as $key ) { + // Not all keys are set in language files, so + // check they exist first + if ( isset( $$key ) ) { + $data[$key] = $$key; + } + } + } elseif ( $_fileType == 'aliases' ) { + // @phan-suppress-next-line PhanImpossibleCondition May be set in included file + if ( isset( $aliases ) ) { + $data['aliases'] = $aliases; + } + } else { + throw new MWException( __METHOD__ . ": Invalid file type: $_fileType" ); + } + + return $data; + } + + /** + * Read a JSON file containing localisation messages. + * @param string $fileName Name of file to read + * @throws MWException If there is a syntax error in the JSON file + * @return array Array with a 'messages' key, or empty array if the file doesn't exist + */ + public function readJSONFile( $fileName ) { + if ( !is_readable( $fileName ) ) { + return []; + } + + $json = file_get_contents( $fileName ); + if ( $json === false ) { + return []; + } + + $data = FormatJson::decode( $json, true ); + if ( $data === null ) { + throw new MWException( __METHOD__ . ": Invalid JSON file: $fileName" ); + } + + // Remove keys starting with '@', they're reserved for metadata and non-message data + foreach ( $data as $key => $unused ) { + if ( $key === '' || $key[0] === '@' ) { + unset( $data[$key] ); + } + } + + // The JSON format only supports messages, none of the other variables, so wrap the data + return [ 'messages' => $data ]; + } + + /** + * Get the compiled plural rules for a given language from the XML files. + * @since 1.20 + * @param string $code + * @return array|null + */ + public function getCompiledPluralRules( $code ) { + $rules = $this->getPluralRules( $code ); + if ( $rules === null ) { + return null; + } + try { + $compiledRules = Evaluator::compile( $rules ); + } catch ( CLDRPluralRuleError $e ) { + $this->logger->debug( $e->getMessage() ); + + return []; + } + + return $compiledRules; + } + + /** + * Get the plural rules for a given language from the XML files. + * Cached. + * @since 1.20 + * @param string $code + * @return array|null + */ + public function getPluralRules( $code ) { + if ( $this->pluralRules === null ) { + $this->loadPluralFiles(); + } + return $this->pluralRules[$code] ?? null; + } + + /** + * Get the plural rule types for a given language from the XML files. + * Cached. + * @since 1.22 + * @param string $code + * @return array|null + */ + public function getPluralRuleTypes( $code ) { + if ( $this->pluralRuleTypes === null ) { + $this->loadPluralFiles(); + } + return $this->pluralRuleTypes[$code] ?? null; + } + + /** + * Load the plural XML files. + */ + protected function loadPluralFiles() { + foreach ( $this->getPluralFiles() as $fileName ) { + $this->loadPluralFile( $fileName ); + } + } + + private function getPluralFiles(): array { + global $IP; + return [ + // Load CLDR plural rules + "$IP/languages/data/plurals.xml", + // Override or extend with MW-specific rules + "$IP/languages/data/plurals-mediawiki.xml", + ]; + } + + /** + * Load a plural XML file with the given filename, compile the relevant + * rules, and save the compiled rules in a process-local cache. + * + * @param string $fileName + * @throws MWException + */ + protected function loadPluralFile( $fileName ) { + // Use file_get_contents instead of DOMDocument::load (T58439) + $xml = file_get_contents( $fileName ); + if ( !$xml ) { + throw new MWException( "Unable to read plurals file $fileName" ); + } + $doc = new DOMDocument; + $doc->loadXML( $xml ); + $rulesets = $doc->getElementsByTagName( "pluralRules" ); + foreach ( $rulesets as $ruleset ) { + $codes = $ruleset->getAttribute( 'locales' ); + $rules = []; + $ruleTypes = []; + $ruleElements = $ruleset->getElementsByTagName( "pluralRule" ); + foreach ( $ruleElements as $elt ) { + $ruleType = $elt->getAttribute( 'count' ); + if ( $ruleType === 'other' ) { + // Don't record "other" rules, which have an empty condition + continue; + } + $rules[] = $elt->nodeValue; + $ruleTypes[] = $ruleType; + } + foreach ( explode( ' ', $codes ) as $code ) { + $this->pluralRules[$code] = $rules; + $this->pluralRuleTypes[$code] = $ruleTypes; + } + } + } + + /** + * Read the data from the source files for a given language, and register + * the relevant dependencies in the $deps array. If the localisation + * exists, the data array is returned, otherwise false is returned. + * + * @param string $code + * @param array &$deps + * @return array + */ + protected function readSourceFilesAndRegisterDeps( $code, &$deps ) { + // This reads in the PHP i18n file with non-messages l10n data + $fileName = $this->langNameUtils->getMessagesFileName( $code ); + if ( !is_file( $fileName ) ) { + $data = []; + } else { + $deps[] = new FileDependency( $fileName ); + $data = $this->readPHPFile( $fileName, 'core' ); + } + + // Load CLDR plural rules for JavaScript + $data['pluralRules'] = $this->getPluralRules( $code ); + // And for PHP + $data['compiledPluralRules'] = $this->getCompiledPluralRules( $code ); + // Load plural rule types + $data['pluralRuleTypes'] = $this->getPluralRuleTypes( $code ); + + foreach ( $this->getPluralFiles() as $fileName ) { + $deps[] = new FileDependency( $fileName ); + } + + return $data; + } + + /** + * Merge two localisation values, a primary and a fallback, overwriting the + * primary value in place. + * @param string $key + * @param mixed &$value + * @param mixed $fallbackValue + */ + protected function mergeItem( $key, &$value, $fallbackValue ) { + if ( $value !== null ) { + if ( $fallbackValue !== null ) { + if ( in_array( $key, self::$mergeableMapKeys ) ) { + $value += $fallbackValue; + } elseif ( in_array( $key, self::$mergeableListKeys ) ) { + $value = array_unique( array_merge( $fallbackValue, $value ) ); + } elseif ( in_array( $key, self::$mergeableAliasListKeys ) ) { + $value = array_merge_recursive( $value, $fallbackValue ); + } elseif ( in_array( $key, self::$optionalMergeKeys ) ) { + if ( !empty( $value['inherit'] ) ) { + $value = array_merge( $fallbackValue, $value ); + } + + unset( $value['inherit'] ); + } elseif ( in_array( $key, self::$magicWordKeys ) ) { + $this->mergeMagicWords( $value, $fallbackValue ); + } + } + } else { + $value = $fallbackValue; + } + } + + /** + * @param mixed &$value + * @param mixed $fallbackValue + */ + protected function mergeMagicWords( &$value, $fallbackValue ) { + foreach ( $fallbackValue as $magicName => $fallbackInfo ) { + if ( !isset( $value[$magicName] ) ) { + $value[$magicName] = $fallbackInfo; + } else { + $oldSynonyms = array_slice( $fallbackInfo, 1 ); + $newSynonyms = array_slice( $value[$magicName], 1 ); + $synonyms = array_values( array_unique( array_merge( + $newSynonyms, $oldSynonyms ) ) ); + $value[$magicName] = array_merge( [ $fallbackInfo[0] ], $synonyms ); + } + } + } + + /** + * Given an array mapping language code to localisation value, such as is + * found in extension *.i18n.php files, iterate through a fallback sequence + * to merge the given data with an existing primary value. + * + * Returns true if any data from the extension array was used, false + * otherwise. + * @param array $codeSequence + * @param string $key + * @param mixed &$value + * @param mixed $fallbackValue + * @return bool + */ + protected function mergeExtensionItem( $codeSequence, $key, &$value, $fallbackValue ) { + $used = false; + foreach ( $codeSequence as $code ) { + if ( isset( $fallbackValue[$code] ) ) { + $this->mergeItem( $key, $value, $fallbackValue[$code] ); + $used = true; + } + } + + return $used; + } + + /** + * Gets the combined list of messages dirs from + * core and extensions + * + * @since 1.25 + * @return array + */ + public function getMessagesDirs() { + global $IP; + + return [ + 'core' => "$IP/languages/i18n", + 'exif' => "$IP/languages/i18n/exif", + 'api' => "$IP/includes/api/i18n", + 'rest' => "$IP/includes/Rest/i18n", + 'oojs-ui' => "$IP/resources/lib/ooui/i18n", + 'paramvalidator' => "$IP/includes/libs/ParamValidator/i18n", + ] + $this->options->get( MainConfigNames::MessagesDirs ); + } + + /** + * Load localisation data for a given language for both core and extensions + * and save it to the persistent cache store and the process cache + * @param string $code + * @throws MWException + */ + public function recache( $code ) { + if ( !$code ) { + throw new MWException( "Invalid language code requested" ); + } + $this->recachedLangs[ $code ] = true; + + # Initial values + $initialData = array_fill_keys( self::$allKeys, null ); + $coreData = $initialData; + $deps = []; + + # Load the primary localisation from the source file + $data = $this->readSourceFilesAndRegisterDeps( $code, $deps ); + $this->logger->debug( __METHOD__ . ": got localisation for $code from source" ); + + # Merge primary localisation + foreach ( $data as $key => $value ) { + $this->mergeItem( $key, $coreData[ $key ], $value ); + } + + # Fill in the fallback if it's not there already + // @phan-suppress-next-line PhanSuspiciousValueComparison + if ( ( $coreData['fallback'] === null || $coreData['fallback'] === false ) && $code === 'en' ) { + $coreData['fallback'] = false; + $coreData['originalFallbackSequence'] = $coreData['fallbackSequence'] = []; + } else { + if ( $coreData['fallback'] !== null ) { + $coreData['fallbackSequence'] = array_map( 'trim', explode( ',', $coreData['fallback'] ) ); + } else { + $coreData['fallbackSequence'] = []; + } + $len = count( $coreData['fallbackSequence'] ); + + # Before we add the 'en' fallback for messages, keep a copy of + # the original fallback sequence + $coreData['originalFallbackSequence'] = $coreData['fallbackSequence']; + + # Ensure that the sequence ends at 'en' for messages + if ( !$len || $coreData['fallbackSequence'][$len - 1] !== 'en' ) { + $coreData['fallbackSequence'][] = 'en'; + } + } + + $codeSequence = array_merge( [ $code ], $coreData['fallbackSequence'] ); + $messageDirs = $this->getMessagesDirs(); + + # Load non-JSON localisation data for extensions + $extensionData = array_fill_keys( $codeSequence, $initialData ); + foreach ( $this->options->get( MainConfigNames::ExtensionMessagesFiles ) as $extension => $fileName ) { + if ( isset( $messageDirs[$extension] ) ) { + # This extension has JSON message data; skip the PHP shim + continue; + } + + $data = $this->readPHPFile( $fileName, 'extension' ); + $used = false; + + foreach ( $data as $key => $item ) { + foreach ( $codeSequence as $csCode ) { + if ( isset( $item[$csCode] ) ) { + $this->mergeItem( $key, $extensionData[$csCode][$key], $item[$csCode] ); + $used = true; + } + } + } + + if ( $used ) { + $deps[] = new FileDependency( $fileName ); + } + } + + # Load the localisation data for each fallback, then merge it into the full array + $allData = $initialData; + foreach ( $codeSequence as $csCode ) { + $csData = $initialData; + + # Load core messages and the extension localisations. + foreach ( $messageDirs as $dirs ) { + foreach ( (array)$dirs as $dir ) { + $fileName = "$dir/$csCode.json"; + $data = $this->readJSONFile( $fileName ); + + foreach ( $data as $key => $item ) { + $this->mergeItem( $key, $csData[$key], $item ); + } + + $deps[] = new FileDependency( $fileName ); + } + } + + # Merge non-JSON extension data + if ( isset( $extensionData[$csCode] ) ) { + foreach ( $extensionData[$csCode] as $key => $item ) { + $this->mergeItem( $key, $csData[$key], $item ); + } + } + + if ( $csCode === $code ) { + # Merge core data into extension data + foreach ( $coreData as $key => $item ) { + $this->mergeItem( $key, $csData[$key], $item ); + } + } else { + # Load the secondary localisation from the source file to + # avoid infinite cycles on cyclic fallbacks + $fbData = $this->readSourceFilesAndRegisterDeps( $csCode, $deps ); + # Only merge the keys that make sense to merge + foreach ( self::$allKeys as $key ) { + if ( !isset( $fbData[ $key ] ) ) { + continue; + } + + if ( ( $coreData[ $key ] ) === null || $this->isMergeableKey( $key ) ) { + $this->mergeItem( $key, $csData[ $key ], $fbData[ $key ] ); + } + } + } + + # Allow extensions an opportunity to adjust the data for this + # fallback + $this->hookRunner->onLocalisationCacheRecacheFallback( $this, $csCode, $csData ); + + # Merge the data for this fallback into the final array + if ( $csCode === $code ) { + $allData = $csData; + } else { + foreach ( self::$allKeys as $key ) { + if ( !isset( $csData[$key] ) ) { + continue; + } + + // @phan-suppress-next-line PhanTypeArraySuspiciousNullable + if ( $allData[$key] === null || $this->isMergeableKey( $key ) ) { + $this->mergeItem( $key, $allData[$key], $csData[$key] ); + } + } + } + } + + # Add cache dependencies for any referenced globals + $deps['wgExtensionMessagesFiles'] = new GlobalDependency( 'wgExtensionMessagesFiles' ); + // The 'MessagesDirs' config setting is used in LocalisationCache::getMessagesDirs(). + // We use the key 'wgMessagesDirs' for historical reasons. + $deps['wgMessagesDirs'] = new MainConfigDependency( MainConfigNames::MessagesDirs ); + $deps['version'] = new ConstantDependency( 'LocalisationCache::VERSION' ); + + # Add dependencies to the cache entry + $allData['deps'] = $deps; + + # Replace spaces with underscores in namespace names + $allData['namespaceNames'] = str_replace( ' ', '_', $allData['namespaceNames'] ); + + # And do the same for special page aliases. $page is an array. + foreach ( $allData['specialPageAliases'] as &$page ) { + $page = str_replace( ' ', '_', $page ); + } + # Decouple the reference to prevent accidental damage + unset( $page ); + + # If there were no plural rules, return an empty array + if ( $allData['pluralRules'] === null ) { + $allData['pluralRules'] = []; + } + if ( $allData['compiledPluralRules'] === null ) { + $allData['compiledPluralRules'] = []; + } + # If there were no plural rule types, return an empty array + if ( $allData['pluralRuleTypes'] === null ) { + $allData['pluralRuleTypes'] = []; + } + + # Set the list keys + $allData['list'] = []; + foreach ( self::$splitKeys as $key ) { + $allData['list'][$key] = array_keys( $allData[$key] ); + } + # Run hooks + $unused = true; // Used to be $purgeBlobs, removed in 1.34 + $this->hookRunner->onLocalisationCacheRecache( $this, $code, $allData, $unused ); + + // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset False positive + if ( $allData['namespaceNames'] === null ) { + throw new MWException( __METHOD__ . ': Localisation data failed validation check! ' . + 'Check that your languages/messages/MessagesEn.php file is intact.' ); + } + + # Set the preload key + $allData['preload'] = $this->buildPreload( $allData ); + + # Save to the process cache and register the items loaded + $this->data[$code] = $allData; + foreach ( $allData as $key => $item ) { + $this->loadedItems[$code][$key] = true; + } + + # Save to the persistent cache + $this->store->startWrite( $code ); + foreach ( $allData as $key => $value ) { + if ( in_array( $key, self::$splitKeys ) ) { + foreach ( $value as $subkey => $subvalue ) { + $this->store->set( "$key:$subkey", $subvalue ); + } + } else { + $this->store->set( $key, $value ); + } + } + $this->store->finishWrite(); + + # Clear out the MessageBlobStore + # HACK: If using a null (i.e. disabled) storage backend, we + # can't write to the MessageBlobStore either + if ( !$this->store instanceof LCStoreNull ) { + foreach ( $this->clearStoreCallbacks as $callback ) { + $callback(); + } + } + } + + /** + * Build the preload item from the given pre-cache data. + * + * The preload item will be loaded automatically, improving performance + * for the commonly-requested items it contains. + * @param array $data + * @return array + */ + protected function buildPreload( $data ) { + $preload = [ 'messages' => [] ]; + foreach ( self::$preloadedKeys as $key ) { + $preload[$key] = $data[$key]; + } + + foreach ( $data['preloadedMessages'] as $subkey ) { + $subitem = $data['messages'][$subkey] ?? null; + $preload['messages'][$subkey] = $subitem; + } + + return $preload; + } + + /** + * Unload the data for a given language from the object cache. + * Reduces memory usage. + * @param string $code + */ + public function unload( $code ) { + unset( $this->data[$code] ); + unset( $this->loadedItems[$code] ); + unset( $this->loadedSubitems[$code] ); + unset( $this->initialisedLangs[$code] ); + unset( $this->shallowFallbacks[$code] ); + + foreach ( $this->shallowFallbacks as $shallowCode => $fbCode ) { + if ( $fbCode === $code ) { + $this->unload( $shallowCode ); + } + } + } + + /** + * Unload all data + */ + public function unloadAll() { + foreach ( $this->initialisedLangs as $lang => $unused ) { + $this->unload( $lang ); + } + } + + /** + * Disable the storage backend + */ + public function disableBackend() { + $this->store = new LCStoreNull; + $this->manualRecache = false; + } +} diff --git a/includes/language/LocalisationCacheBulkLoad.php b/includes/language/LocalisationCacheBulkLoad.php new file mode 100644 index 000000000000..5c962d3a040e --- /dev/null +++ b/includes/language/LocalisationCacheBulkLoad.php @@ -0,0 +1,126 @@ +<?php +/** + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + */ + +/** + * A localisation cache optimised for loading large amounts of data for many + * languages. Used by rebuildLocalisationCache.php. + */ +class LocalisationCacheBulkLoad extends LocalisationCache { + + /** + * A cache of the contents of data files. + * Core files are serialized to avoid using ~1GB of RAM during a recache. + */ + private $fileCache = []; + + /** + * Most recently used languages. Uses the linked-list aspect of PHP hashtables + * to keep the most recently used language codes at the end of the array, and + * the language codes that are ready to be deleted at the beginning. + */ + private $mruLangs = []; + + /** + * Maximum number of languages that may be loaded into $this->data + */ + private $maxLoadedLangs = 10; + + /** + * @param string $fileName + * @param string $fileType + * @return array|mixed + */ + protected function readPHPFile( $fileName, $fileType ) { + $serialize = $fileType === 'core'; + if ( !isset( $this->fileCache[$fileName][$fileType] ) ) { + $data = parent::readPHPFile( $fileName, $fileType ); + + if ( $serialize ) { + $encData = serialize( $data ); + } else { + $encData = $data; + } + + $this->fileCache[$fileName][$fileType] = $encData; + + return $data; + } elseif ( $serialize ) { + return unserialize( $this->fileCache[$fileName][$fileType] ); + } else { + return $this->fileCache[$fileName][$fileType]; + } + } + + /** + * @param string $code + * @param string $key + * @return mixed + */ + public function getItem( $code, $key ) { + unset( $this->mruLangs[$code] ); + $this->mruLangs[$code] = true; + + return parent::getItem( $code, $key ); + } + + /** + * @param string $code + * @param string $key + * @param string $subkey + * @return mixed + */ + public function getSubitem( $code, $key, $subkey ) { + unset( $this->mruLangs[$code] ); + $this->mruLangs[$code] = true; + + return parent::getSubitem( $code, $key, $subkey ); + } + + /** + * @param string $code + */ + public function recache( $code ) { + parent::recache( $code ); + unset( $this->mruLangs[$code] ); + $this->mruLangs[$code] = true; + $this->trimCache(); + } + + /** + * @param string $code + */ + public function unload( $code ) { + unset( $this->mruLangs[$code] ); + parent::unload( $code ); + } + + /** + * Unload cached languages until there are less than $this->maxLoadedLangs + */ + protected function trimCache() { + while ( count( $this->data ) > $this->maxLoadedLangs && count( $this->mruLangs ) ) { + reset( $this->mruLangs ); + $code = key( $this->mruLangs ); + wfDebug( __METHOD__ . ": unloading $code" ); + $this->unload( $code ); + } + } + +} diff --git a/includes/language/MessageCache.php b/includes/language/MessageCache.php new file mode 100644 index 000000000000..ced9810caa76 --- /dev/null +++ b/includes/language/MessageCache.php @@ -0,0 +1,1495 @@ +<?php +/** + * Localisation messages cache. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Cache + */ + +use MediaWiki\HookContainer\HookContainer; +use MediaWiki\HookContainer\HookRunner; +use MediaWiki\Languages\LanguageConverterFactory; +use MediaWiki\Languages\LanguageFactory; +use MediaWiki\Languages\LanguageFallback; +use MediaWiki\Languages\LanguageNameUtils; +use MediaWiki\Linker\LinkTarget; +use MediaWiki\Logger\LoggerFactory; +use MediaWiki\MainConfigNames; +use MediaWiki\MediaWikiServices; +use MediaWiki\Page\PageReference; +use MediaWiki\Page\PageReferenceValue; +use MediaWiki\Revision\SlotRecord; +use Psr\Log\LoggerAwareInterface; +use Psr\Log\LoggerInterface; +use Wikimedia\Rdbms\Database; +use Wikimedia\RequestTimeout\TimeoutException; +use Wikimedia\ScopedCallback; + +/** + * MediaWiki message cache structure version. + * Bump this whenever the message cache format has changed. + */ +define( 'MSG_CACHE_VERSION', 2 ); + +/** + * Cache of messages that are defined by MediaWiki namespace pages or by hooks + * + * Performs various MediaWiki namespace-related functions + * @ingroup Cache + */ +class MessageCache implements LoggerAwareInterface { + /** + * The size of the MapCacheLRU which stores message data. The maximum + * number of languages which can be efficiently loaded in a given request. + */ + public const MAX_REQUEST_LANGUAGES = 10; + + private const FOR_UPDATE = 1; // force message reload + + /** How long to wait for memcached locks */ + private const WAIT_SEC = 15; + /** How long memcached locks last */ + private const LOCK_TTL = 30; + + /** + * Lifetime for cache, for keys stored in $wanCache, in seconds. + * @var int + */ + private const WAN_TTL = IExpiringStore::TTL_DAY; + + /** @var LoggerInterface */ + private $logger; + + /** + * Process cache of loaded messages that are defined in MediaWiki namespace + * + * @var MapCacheLRU Map of (language code => key => " <MESSAGE>" or "!TOO BIG" or "!ERROR") + */ + protected $cache; + + /** + * Map of (lowercase message key => unused) for all software defined messages + * + * @var array + */ + private $systemMessageNames; + + /** + * @var bool[] Map of (language code => boolean) + */ + protected $cacheVolatile = []; + + /** + * Should mean that database cannot be used, but check + * @var bool + */ + protected $mDisable; + + /** + * Message cache has its own parser which it uses to transform messages + * @var ParserOptions + */ + protected $mParserOptions; + /** @var Parser */ + protected $mParser; + + /** + * @var bool + */ + protected $mInParser = false; + + /** @var WANObjectCache */ + protected $wanCache; + /** @var BagOStuff */ + protected $clusterCache; + /** @var BagOStuff */ + protected $srvCache; + /** @var Language */ + protected $contLang; + /** @var string */ + protected $contLangCode; + /** @var ILanguageConverter */ + protected $contLangConverter; + /** @var LanguageFactory */ + protected $langFactory; + /** @var LocalisationCache */ + protected $localisationCache; + /** @var LanguageNameUtils */ + protected $languageNameUtils; + /** @var LanguageFallback */ + protected $languageFallback; + /** @var HookRunner */ + private $hookRunner; + + /** + * Normalize message key input + * + * @param string $key Input message key to be normalized + * @return string Normalized message key + */ + public static function normalizeKey( $key ) { + $lckey = strtr( $key, ' ', '_' ); + if ( $lckey === '' ) { + // T300792 + return $lckey; + } + + if ( ord( $lckey ) < 128 ) { + $lckey[0] = strtolower( $lckey[0] ); + } else { + $lckey = MediaWikiServices::getInstance()->getContentLanguage()->lcfirst( $lckey ); + } + + return $lckey; + } + + /** + * @internal For use by ServiceWiring + * @param WANObjectCache $wanCache + * @param BagOStuff $clusterCache + * @param BagOStuff $serverCache + * @param Language $contLang Content language of site + * @param LanguageConverterFactory $langConverterFactory + * @param LoggerInterface $logger + * @param array $options + * - useDB (bool): Whether to allow message overrides from "MediaWiki:" pages. + * Default: true. + * @param LanguageFactory $langFactory + * @param LocalisationCache $localisationCache + * @param LanguageNameUtils $languageNameUtils + * @param LanguageFallback $languageFallback + * @param HookContainer $hookContainer + */ + public function __construct( + WANObjectCache $wanCache, + BagOStuff $clusterCache, + BagOStuff $serverCache, + Language $contLang, + LanguageConverterFactory $langConverterFactory, + LoggerInterface $logger, + array $options, + LanguageFactory $langFactory, + LocalisationCache $localisationCache, + LanguageNameUtils $languageNameUtils, + LanguageFallback $languageFallback, + HookContainer $hookContainer + ) { + $this->wanCache = $wanCache; + $this->clusterCache = $clusterCache; + $this->srvCache = $serverCache; + $this->contLang = $contLang; + $this->contLangConverter = $langConverterFactory->getLanguageConverter( $contLang ); + $this->contLangCode = $contLang->getCode(); + $this->logger = $logger; + $this->langFactory = $langFactory; + $this->localisationCache = $localisationCache; + $this->languageNameUtils = $languageNameUtils; + $this->languageFallback = $languageFallback; + $this->hookRunner = new HookRunner( $hookContainer ); + + // limit size + $this->cache = new MapCacheLRU( self::MAX_REQUEST_LANGUAGES ); + + $this->mDisable = !( $options['useDB'] ?? true ); + } + + public function setLogger( LoggerInterface $logger ) { + $this->logger = $logger; + } + + /** + * ParserOptions is lazy initialised. + * + * @return ParserOptions + */ + private function getParserOptions() { + if ( !$this->mParserOptions ) { + $context = RequestContext::getMain(); + $user = $context->getUser(); + if ( !$user->isSafeToLoad() ) { + // It isn't safe to use the context user yet, so don't try to get a + // ParserOptions for it. And don't cache this ParserOptions + // either. + $po = ParserOptions::newFromAnon(); + $po->setAllowUnsafeRawHtml( false ); + return $po; + } + + $this->mParserOptions = ParserOptions::newFromContext( $context ); + // Messages may take parameters that could come + // from malicious sources. As a precaution, disable + // the <html> parser tag when parsing messages. + $this->mParserOptions->setAllowUnsafeRawHtml( false ); + } + + return $this->mParserOptions; + } + + /** + * Try to load the cache from APC. + * + * @param string $code Optional language code, see documentation of load(). + * @return array|false The cache array, or false if not in cache. + */ + protected function getLocalCache( $code ) { + $cacheKey = $this->srvCache->makeKey( __CLASS__, $code ); + + return $this->srvCache->get( $cacheKey ); + } + + /** + * Save the cache to APC. + * + * @param string $code + * @param array $cache The cache array + */ + protected function saveToLocalCache( $code, $cache ) { + $cacheKey = $this->srvCache->makeKey( __CLASS__, $code ); + $this->srvCache->set( $cacheKey, $cache ); + } + + /** + * Loads messages from caches or from database in this order: + * (1) local message cache (if $wgUseLocalMessageCache is enabled) + * (2) memcached + * (3) from the database. + * + * When successfully loading from (2) or (3), all higher level caches are + * updated for the newest version. + * + * Nothing is loaded if member variable mDisable is true, either manually + * set by calling code or if message loading fails (is this possible?). + * + * Returns true if cache is already populated or it was successfully populated, + * or false if populating empty cache fails. Also returns true if MessageCache + * is disabled. + * + * @param string $code Language to which load messages + * @param int|null $mode Use MessageCache::FOR_UPDATE to skip process cache [optional] + * @throws InvalidArgumentException + * @return bool + */ + protected function load( $code, $mode = null ) { + if ( !is_string( $code ) ) { + throw new InvalidArgumentException( "Missing language code" ); + } + + # Don't do double loading... + if ( $this->isLanguageLoaded( $code ) && $mode != self::FOR_UPDATE ) { + return true; + } + + # 8 lines of code just to say (once) that message cache is disabled + if ( $this->mDisable ) { + static $shownDisabled = false; + if ( !$shownDisabled ) { + $this->logger->debug( __METHOD__ . ': disabled' ); + $shownDisabled = true; + } + + return true; + } + + # Loading code starts + $success = false; # Keep track of success + $staleCache = false; # a cache array with expired data, or false if none has been loaded + $where = []; # Debug info, delayed to avoid spamming debug log too much + + # Hash of the contents is stored in memcache, to detect if data-center cache + # or local cache goes out of date (e.g. due to replace() on some other server) + list( $hash, $hashVolatile ) = $this->getValidationHash( $code ); + $this->cacheVolatile[$code] = $hashVolatile; + + # Try the local cache and check against the cluster hash key... + $cache = $this->getLocalCache( $code ); + if ( !$cache ) { + $where[] = 'local cache is empty'; + } elseif ( !isset( $cache['HASH'] ) || $cache['HASH'] !== $hash ) { + $where[] = 'local cache has the wrong hash'; + $staleCache = $cache; + } elseif ( $this->isCacheExpired( $cache ) ) { + $where[] = 'local cache is expired'; + $staleCache = $cache; + } elseif ( $hashVolatile ) { + $where[] = 'local cache validation key is expired/volatile'; + $staleCache = $cache; + } else { + $where[] = 'got from local cache'; + $this->cache->set( $code, $cache ); + $success = true; + } + + if ( !$success ) { + $cacheKey = $this->clusterCache->makeKey( 'messages', $code ); + # Try the global cache. If it is empty, try to acquire a lock. If + # the lock can't be acquired, wait for the other thread to finish + # and then try the global cache a second time. + for ( $failedAttempts = 0; $failedAttempts <= 1; $failedAttempts++ ) { + if ( $hashVolatile && $staleCache ) { + # Do not bother fetching the whole cache blob to avoid I/O. + # Instead, just try to get the non-blocking $statusKey lock + # below, and use the local stale value if it was not acquired. + $where[] = 'global cache is presumed expired'; + } else { + $cache = $this->clusterCache->get( $cacheKey ); + if ( !$cache ) { + $where[] = 'global cache is empty'; + } elseif ( $this->isCacheExpired( $cache ) ) { + $where[] = 'global cache is expired'; + $staleCache = $cache; + } elseif ( $hashVolatile ) { + # DB results are replica DB lag prone until the holdoff TTL passes. + # By then, updates should be reflected in loadFromDBWithLock(). + # One thread regenerates the cache while others use old values. + $where[] = 'global cache is expired/volatile'; + $staleCache = $cache; + } else { + $where[] = 'got from global cache'; + $this->cache->set( $code, $cache ); + $this->saveToCaches( $cache, 'local-only', $code ); + $success = true; + } + } + + if ( $success ) { + # Done, no need to retry + break; + } + + # We need to call loadFromDB. Limit the concurrency to one process. + # This prevents the site from going down when the cache expires. + # Note that the DB slam protection lock here is non-blocking. + $loadStatus = $this->loadFromDBWithLock( $code, $where, $mode ); + if ( $loadStatus === true ) { + $success = true; + break; + } elseif ( $staleCache ) { + # Use the stale cache while some other thread constructs the new one + $where[] = 'using stale cache'; + $this->cache->set( $code, $staleCache ); + $success = true; + break; + } elseif ( $failedAttempts > 0 ) { + # Already blocked once, so avoid another lock/unlock cycle. + # This case will typically be hit if memcached is down, or if + # loadFromDB() takes longer than LOCK_WAIT. + $where[] = "could not acquire status key."; + break; + } elseif ( $loadStatus === 'cantacquire' ) { + # Wait for the other thread to finish, then retry. Normally, + # the memcached get() will then yield the other thread's result. + $where[] = 'waited for other thread to complete'; + $this->getReentrantScopedLock( $cacheKey ); + } else { + # Disable cache; $loadStatus is 'disabled' + break; + } + } + } + + if ( !$success ) { + $where[] = 'loading FAILED - cache is disabled'; + $this->mDisable = true; + $this->cache->set( $code, [] ); + $this->logger->error( __METHOD__ . ": Failed to load $code" ); + # This used to throw an exception, but that led to nasty side effects like + # the whole wiki being instantly down if the memcached server died + } + + if ( !$this->isLanguageLoaded( $code ) ) { + throw new LogicException( "Process cache for '$code' should be set by now." ); + } + + $info = implode( ', ', $where ); + $this->logger->debug( __METHOD__ . ": Loading $code... $info" ); + + return $success; + } + + /** + * @param string $code + * @param string[] &$where List of debug comments + * @param int|null $mode Use MessageCache::FOR_UPDATE to use DB_PRIMARY + * @return true|string True on success or one of ("cantacquire", "disabled") + */ + protected function loadFromDBWithLock( $code, array &$where, $mode = null ) { + # If cache updates on all levels fail, give up on message overrides. + # This is to avoid easy site outages; see $saveSuccess comments below. + $statusKey = $this->clusterCache->makeKey( 'messages', $code, 'status' ); + $status = $this->clusterCache->get( $statusKey ); + if ( $status === 'error' ) { + $where[] = "could not load; method is still globally disabled"; + return 'disabled'; + } + + # Now let's regenerate + $where[] = 'loading from database'; + + # Lock the cache to prevent conflicting writes. + # This lock is non-blocking so stale cache can quickly be used. + # Note that load() will call a blocking getReentrantScopedLock() + # after this if it really need to wait for any current thread. + $cacheKey = $this->clusterCache->makeKey( 'messages', $code ); + $scopedLock = $this->getReentrantScopedLock( $cacheKey, 0 ); + if ( !$scopedLock ) { + $where[] = 'could not acquire main lock'; + return 'cantacquire'; + } + + $cache = $this->loadFromDB( $code, $mode ); + $this->cache->set( $code, $cache ); + $saveSuccess = $this->saveToCaches( $cache, 'all', $code ); + + if ( !$saveSuccess ) { + /** + * Cache save has failed. + * + * There are two main scenarios where this could be a problem: + * - The cache is more than the maximum size (typically 1MB compressed). + * - Memcached has no space remaining in the relevant slab class. This is + * unlikely with recent versions of memcached. + * + * Either way, if there is a local cache, nothing bad will happen. If there + * is no local cache, disabling the message cache for all requests avoids + * incurring a loadFromDB() overhead on every request, and thus saves the + * wiki from complete downtime under moderate traffic conditions. + */ + if ( $this->srvCache instanceof EmptyBagOStuff ) { + $this->clusterCache->set( $statusKey, 'error', 60 * 5 ); + $where[] = 'could not save cache, disabled globally for 5 minutes'; + } else { + $where[] = "could not save global cache"; + } + } + + return true; + } + + /** + * Loads cacheable messages from the database. Messages bigger than + * $wgMaxMsgCacheEntrySize are assigned a special value, and are loaded + * on-demand from the database later. + * + * @param string $code Language code + * @param int|null $mode Use MessageCache::FOR_UPDATE to skip process cache + * @return array Loaded messages for storing in caches + */ + protected function loadFromDB( $code, $mode = null ) { + $maxMsgCacheEntrySize = MediaWikiServices::getInstance()->getMainConfig() + ->get( MainConfigNames::MaxMsgCacheEntrySize ); + $adaptiveMessageCache = MediaWikiServices::getInstance()->getMainConfig() + ->get( MainConfigNames::AdaptiveMessageCache ); + // (T164666) The query here performs really poorly on WMF's + // contributions replicas. We don't have a way to say "any group except + // contributions", so for the moment let's specify 'api'. + // @todo: Get rid of this hack. + $dbr = wfGetDB( ( $mode == self::FOR_UPDATE ) ? DB_PRIMARY : DB_REPLICA, 'api' ); + + $cache = []; + + $mostused = []; // list of "<cased message key>/<code>" + if ( $adaptiveMessageCache && $code !== $this->contLangCode ) { + if ( !$this->cache->has( $this->contLangCode ) ) { + $this->load( $this->contLangCode ); + } + $mostused = array_keys( $this->cache->get( $this->contLangCode ) ); + foreach ( $mostused as $key => $value ) { + $mostused[$key] = "$value/$code"; + } + } + + // Common conditions + $conds = [ + 'page_is_redirect' => 0, + 'page_namespace' => NS_MEDIAWIKI, + ]; + if ( count( $mostused ) ) { + $conds['page_title'] = $mostused; + } elseif ( $code !== $this->contLangCode ) { + $conds[] = 'page_title' . $dbr->buildLike( $dbr->anyString(), '/', $code ); + } else { + # Effectively disallows use of '/' character in NS_MEDIAWIKI for uses + # other than language code. + $conds[] = 'page_title NOT' . + $dbr->buildLike( $dbr->anyString(), '/', $dbr->anyString() ); + } + + // Set the stubs for oversized software-defined messages in the main cache map + $res = $dbr->select( + 'page', + [ 'page_title', 'page_latest' ], + array_merge( $conds, [ 'page_len > ' . intval( $maxMsgCacheEntrySize ) ] ), + __METHOD__ . "($code)-big" + ); + foreach ( $res as $row ) { + // Include entries/stubs for all keys in $mostused in adaptive mode + if ( $adaptiveMessageCache || $this->isMainCacheable( $row->page_title ) + ) { + $cache[$row->page_title] = '!TOO BIG'; + } + // At least include revision ID so page changes are reflected in the hash + $cache['EXCESSIVE'][$row->page_title] = $row->page_latest; + } + + // Can not inject the RevisionStore as it would break the installer since + // it instantiates MessageCache before the DB. + $revisionStore = MediaWikiServices::getInstance()->getRevisionStore(); + // Set the text for small software-defined messages in the main cache map + $revQuery = $revisionStore->getQueryInfo( [ 'page' ] ); + + // T231196: MySQL/MariaDB (10.1.37) can sometimes irrationally decide that querying `actor` then + // `revision` then `page` is somehow better than starting with `page`. Tell it not to reorder the + // query (and also reorder it ourselves because as generated by RevisionStore it'll have + // `revision` first rather than `page`). + $revQuery['joins']['revision'] = $revQuery['joins']['page']; + unset( $revQuery['joins']['page'] ); + // It isn't actually necessary to reorder $revQuery['tables'] as Database does the right thing + // when join conditions are given for all joins, but GergÅ‘ is wary of relying on that so pull + // `page` to the start. + $revQuery['tables'] = array_merge( + [ 'page' ], + array_diff( $revQuery['tables'], [ 'page' ] ) + ); + + $res = $dbr->select( + $revQuery['tables'], + $revQuery['fields'], + array_merge( $conds, [ + 'page_len <= ' . intval( $maxMsgCacheEntrySize ), + 'page_latest = rev_id' // get the latest revision only + ] ), + __METHOD__ . "($code)-small", + [ 'STRAIGHT_JOIN' ], + $revQuery['joins'] + ); + $result = $revisionStore->newRevisionsFromBatch( $res, [ + 'slots' => [ SlotRecord::MAIN ], + 'content' => true + ] ); + $revisions = $result->isOK() ? $result->getValue() : []; + foreach ( $res as $row ) { + // Include entries/stubs for all keys in $mostused in adaptive mode + if ( $adaptiveMessageCache || $this->isMainCacheable( $row->page_title ) + ) { + try { + $rev = $revisions[$row->rev_id] ?? null; + $content = $rev ? $rev->getContent( SlotRecord::MAIN ) : null; + $text = $this->getMessageTextFromContent( $content ); + } catch ( TimeoutException $e ) { + throw $e; + } catch ( Exception $ex ) { + $text = false; + } + + if ( !is_string( $text ) ) { + $entry = '!ERROR'; + $this->logger->error( + __METHOD__ + . ": failed to load message page text for {$row->page_title} ($code)" + ); + } else { + $entry = ' ' . $text; + } + $cache[$row->page_title] = $entry; + } else { + // T193271: cache object gets too big and slow to generate. + // At least include revision ID so page changes are reflected in the hash. + $cache['EXCESSIVE'][$row->page_title] = $row->page_latest; + } + } + + $cache['VERSION'] = MSG_CACHE_VERSION; + ksort( $cache ); + + # Hash for validating local cache (APC). No need to take into account + # messages larger than $wgMaxMsgCacheEntrySize, since those are only + # stored and fetched from memcache. + $cache['HASH'] = md5( serialize( $cache ) ); + $cache['EXPIRY'] = wfTimestamp( TS_MW, time() + self::WAN_TTL ); + unset( $cache['EXCESSIVE'] ); // only needed for hash + + return $cache; + } + + /** + * Whether the language was loaded and its data is still in the process cache. + * + * @param string $lang + * @return bool + */ + private function isLanguageLoaded( $lang ) { + // It is important that this only returns true if the cache was fully + // populated by load(), so that callers can assume all cache keys exist. + // It is possible for $this->cache to be only partially populated through + // methods like MessageCache::replace(), which must not make this method + // return true (T208897). And this method must cease to return true + // if the language was evicted by MapCacheLRU (T230690). + return $this->cache->hasField( $lang, 'VERSION' ); + } + + /** + * Can the given DB key be added to the main cache blob? To reduce the + * impact of abuse of the MediaWiki namespace by {{int:}} and CentralNotice, + * this is only true if the page overrides a predefined message. + * + * @param string $name Message name (possibly with /code suffix) + * @param string|null $code The language code. If this is null, message + * presence will be bulk loaded for the content language. Otherwise, + * presence will be detected by loading the specified message. + * @return bool + */ + private function isMainCacheable( $name, $code = null ) { + // Convert first letter to lowercase, and strip /code suffix + $name = $this->contLang->lcfirst( $name ); + // Include common conversion table pages. This also avoids problems with + // Installer::parse() bailing out due to disallowed DB queries (T207979). + if ( strpos( $name, 'conversiontable/' ) === 0 ) { + return true; + } + $msg = preg_replace( '/\/[a-z0-9-]{2,}$/', '', $name ); + + if ( $code === null ) { + // Bulk load + if ( $this->systemMessageNames === null ) { + $this->systemMessageNames = array_fill_keys( + $this->localisationCache->getSubitemList( $this->contLangCode, 'messages' ), + true ); + } + return isset( $this->systemMessageNames[$msg] ); + } else { + // Use individual subitem + return $this->localisationCache->getSubitem( $code, 'messages', $msg ) !== null; + } + } + + /** + * Updates cache as necessary when message page is changed + * + * @param string $title Message cache key with initial uppercase letter + * @param string|false $text New contents of the page (false if deleted) + */ + public function replace( $title, $text ) { + if ( $this->mDisable ) { + return; + } + + list( $msg, $code ) = $this->figureMessage( $title ); + if ( strpos( $title, '/' ) !== false && $code === $this->contLangCode ) { + // Content language overrides do not use the /<code> suffix + return; + } + + // (a) Update the process cache with the new message text + if ( $text === false ) { + // Page deleted + $this->cache->setField( $code, $title, '!NONEXISTENT' ); + } else { + // Ignore $wgMaxMsgCacheEntrySize so the process cache is up to date + $this->cache->setField( $code, $title, ' ' . $text ); + } + + // (b) Update the shared caches in a deferred update with a fresh DB snapshot + DeferredUpdates::addUpdate( + new MessageCacheUpdate( $code, $title, $msg ), + DeferredUpdates::PRESEND + ); + } + + /** + * @param string $code + * @param array[] $replacements List of (title, message key) pairs + * @throws MWException + */ + public function refreshAndReplaceInternal( $code, array $replacements ) { + $maxMsgCacheEntrySize = MediaWikiServices::getInstance()->getMainConfig() + ->get( MainConfigNames::MaxMsgCacheEntrySize ); + + // Allow one caller at a time to avoid race conditions + $scopedLock = $this->getReentrantScopedLock( + $this->clusterCache->makeKey( 'messages', $code ) + ); + if ( !$scopedLock ) { + foreach ( $replacements as list( $title ) ) { + $this->logger->error( + __METHOD__ . ': could not acquire lock to update {title} ({code})', + [ 'title' => $title, 'code' => $code ] ); + } + + return; + } + + // Load the existing cache to update it in the local DC cache. + // The other DCs will see a hash mismatch. + if ( $this->load( $code, self::FOR_UPDATE ) ) { + $cache = $this->cache->get( $code ); + } else { + // Err? Fall back to loading from the database. + $cache = $this->loadFromDB( $code, self::FOR_UPDATE ); + } + // Check if individual cache keys should exist and update cache accordingly + $newTextByTitle = []; // map of (title => content) + $newBigTitles = []; // map of (title => latest revision ID), like EXCESSIVE in loadFromDB() + // Can not inject the WikiPageFactory as it would break the installer since + // it instantiates MessageCache before the DB. + $wikiPageFactory = MediaWikiServices::getInstance()->getWikiPageFactory(); + foreach ( $replacements as list( $title ) ) { + $page = $wikiPageFactory->newFromTitle( Title::makeTitle( NS_MEDIAWIKI, $title ) ); + $page->loadPageData( $page::READ_LATEST ); + $text = $this->getMessageTextFromContent( $page->getContent() ); + // Remember the text for the blob store update later on + $newTextByTitle[$title] = $text ?? ''; + // Note that if $text is false, then $cache should have a !NONEXISTANT entry + if ( !is_string( $text ) ) { + $cache[$title] = '!NONEXISTENT'; + } elseif ( strlen( $text ) > $maxMsgCacheEntrySize ) { + $cache[$title] = '!TOO BIG'; + $newBigTitles[$title] = $page->getLatest(); + } else { + $cache[$title] = ' ' . $text; + } + } + // Update HASH for the new key. Incorporates various administrative keys, + // including the old HASH (and thereby the EXCESSIVE value from loadFromDB() + // and previous replace() calls), but that doesn't really matter since we + // only ever compare it for equality with a copy saved by saveToCaches(). + $cache['HASH'] = md5( serialize( $cache + [ 'EXCESSIVE' => $newBigTitles ] ) ); + // Update the too-big WAN cache entries now that we have the new HASH + foreach ( $newBigTitles as $title => $id ) { + // Match logic of loadCachedMessagePageEntry() + $this->wanCache->set( + $this->bigMessageCacheKey( $cache['HASH'], $title ), + ' ' . $newTextByTitle[$title], + self::WAN_TTL + ); + } + // Mark this cache as definitely being "latest" (non-volatile) so + // load() calls do not try to refresh the cache with replica DB data + $cache['LATEST'] = time(); + // Update the process cache + $this->cache->set( $code, $cache ); + // Pre-emptively update the local datacenter cache so things like edit filter and + // prevented changes are reflected immediately; these often use MediaWiki: pages. + // The datacenter handling replace() calls should be the same one handling edits + // as they require HTTP POST. + $this->saveToCaches( $cache, 'all', $code ); + // Release the lock now that the cache is saved + ScopedCallback::consume( $scopedLock ); + + // Relay the purge. Touching this check key expires cache contents + // and local cache (APC) validation hash across all datacenters. + $this->wanCache->touchCheckKey( $this->getCheckKey( $code ) ); + + // Purge the messages in the message blob store and fire any hook handlers + $blobStore = MediaWikiServices::getInstance()->getResourceLoader()->getMessageBlobStore(); + foreach ( $replacements as list( $title, $msg ) ) { + $blobStore->updateMessage( $this->contLang->lcfirst( $msg ) ); + $this->hookRunner->onMessageCacheReplace( $title, $newTextByTitle[$title] ); + } + } + + /** + * Is the given cache array expired due to time passing or a version change? + * + * @param array $cache + * @return bool + */ + protected function isCacheExpired( $cache ) { + if ( !isset( $cache['VERSION'] ) || !isset( $cache['EXPIRY'] ) ) { + return true; + } + if ( $cache['VERSION'] != MSG_CACHE_VERSION ) { + return true; + } + if ( wfTimestampNow() >= $cache['EXPIRY'] ) { + return true; + } + + return false; + } + + /** + * Shortcut to update caches. + * + * @param array $cache Cached messages with a version. + * @param string $dest Either "local-only" to save to local caches only + * or "all" to save to all caches. + * @param string|false $code Language code (default: false) + * @return bool + */ + protected function saveToCaches( array $cache, $dest, $code = false ) { + if ( $dest === 'all' ) { + $cacheKey = $this->clusterCache->makeKey( 'messages', $code ); + $success = $this->clusterCache->set( $cacheKey, $cache ); + $this->setValidationHash( $code, $cache ); + } else { + $success = true; + } + + $this->saveToLocalCache( $code, $cache ); + + return $success; + } + + /** + * Get the md5 used to validate the local APC cache + * + * @param string $code + * @return array (hash or false, bool expiry/volatility status) + */ + protected function getValidationHash( $code ) { + $curTTL = null; + $value = $this->wanCache->get( + $this->wanCache->makeKey( 'messages', $code, 'hash', 'v1' ), + $curTTL, + [ $this->getCheckKey( $code ) ] + ); + + if ( $value ) { + $hash = $value['hash']; + if ( ( time() - $value['latest'] ) < WANObjectCache::TTL_MINUTE ) { + // Cache was recently updated via replace() and should be up-to-date. + // That method is only called in the primary datacenter and uses FOR_UPDATE. + // Also, it is unlikely that the current datacenter is *now* secondary one. + $expired = false; + } else { + // See if the "check" key was bumped after the hash was generated + $expired = ( $curTTL < 0 ); + } + } else { + // No hash found at all; cache must regenerate to be safe + $hash = false; + $expired = true; + } + + return [ $hash, $expired ]; + } + + /** + * Set the md5 used to validate the local disk cache + * + * If $cache has a 'LATEST' UNIX timestamp key, then the hash will not + * be treated as "volatile" by getValidationHash() for the next few seconds. + * This is triggered when $cache is generated using FOR_UPDATE mode. + * + * @param string $code + * @param array $cache Cached messages with a version + */ + protected function setValidationHash( $code, array $cache ) { + $this->wanCache->set( + $this->wanCache->makeKey( 'messages', $code, 'hash', 'v1' ), + [ + 'hash' => $cache['HASH'], + 'latest' => $cache['LATEST'] ?? 0 + ], + WANObjectCache::TTL_INDEFINITE + ); + } + + /** + * @param string $key A language message cache key that stores blobs + * @param int $timeout Wait timeout in seconds + * @return null|ScopedCallback + */ + protected function getReentrantScopedLock( $key, $timeout = self::WAIT_SEC ) { + return $this->clusterCache->getScopedLock( $key, $timeout, self::LOCK_TTL, __METHOD__ ); + } + + /** + * Get a message from either the content language or the user language. + * + * First, assemble a list of languages to attempt getting the message from. This + * chain begins with the requested language and its fallbacks and then continues with + * the content language and its fallbacks. For each language in the chain, the following + * process will occur (in this order): + * 1. If a language-specific override, i.e., [[MW:msg/lang]], is available, use that. + * Note: for the content language, there is no /lang subpage. + * 2. Fetch from the static CDB cache. + * 3. If available, check the database for fallback language overrides. + * + * This process provides a number of guarantees. When changing this code, make sure all + * of these guarantees are preserved. + * * If the requested language is *not* the content language, then the CDB cache for that + * specific language will take precedence over the root database page ([[MW:msg]]). + * * Fallbacks will be just that: fallbacks. A fallback language will never be reached if + * the message is available *anywhere* in the language for which it is a fallback. + * + * @param string $key The message key + * @param bool $useDB If true, look for the message in the DB, false + * to use only the compiled l10n cache. + * @param bool|string|Language $langcode Code of the language to get the message for. + * - If string and a valid code, will create a standard language object + * - If string but not a valid code, will create a basic language object + * - If boolean and false, create object from the current users language + * - If boolean and true, create object from the wikis content language + * - If language object, use it as given + * + * @throws MWException When given an invalid key + * @return string|false False if the message doesn't exist, otherwise the + * message (which can be empty) + */ + public function get( $key, $useDB = true, $langcode = true ) { + if ( is_int( $key ) ) { + // Fix numerical strings that somehow become ints + // on their way here + $key = (string)$key; + } elseif ( !is_string( $key ) ) { + throw new MWException( 'Non-string key given' ); + } elseif ( $key === '' ) { + // Shortcut: the empty key is always missing + return false; + } + + // Normalise title-case input (with some inlining) + $lckey = self::normalizeKey( $key ); + + $this->hookRunner->onMessageCache__get( $lckey ); + + // Loop through each language in the fallback list until we find something useful + $message = $this->getMessageFromFallbackChain( + wfGetLangObj( $langcode ), + $lckey, + !$this->mDisable && $useDB + ); + + // If we still have no message, maybe the key was in fact a full key so try that + if ( $message === false ) { + $parts = explode( '/', $lckey ); + // We may get calls for things that are http-urls from sidebar + // Let's not load nonexistent languages for those + // They usually have more than one slash. + if ( count( $parts ) == 2 && $parts[1] !== '' ) { + $message = $this->localisationCache->getSubitem( $parts[1], 'messages', $parts[0] ); + if ( $message === null ) { + $message = false; + } + } + } + + // Post-processing if the message exists + if ( $message !== false ) { + // Fix whitespace + $message = str_replace( + [ + # Fix for trailing whitespace, removed by textarea + ' ', + # Fix for NBSP, converted to space by firefox + ' ', + ' ', + '­' + ], + [ + ' ', + "\u{00A0}", + "\u{00A0}", + "\u{00AD}" + ], + $message + ); + } + + return $message; + } + + /** + * Given a language, try and fetch messages from that language. + * + * Will also consider fallbacks of that language, the site language, and fallbacks for + * the site language. + * + * @see MessageCache::get + * @param Language|StubObject $lang Preferred language + * @param string $lckey Lowercase key for the message (as for localisation cache) + * @param bool $useDB Whether to include messages from the wiki database + * @return string|false The message, or false if not found + */ + protected function getMessageFromFallbackChain( $lang, $lckey, $useDB ) { + $alreadyTried = []; + + // First try the requested language. + $message = $this->getMessageForLang( $lang, $lckey, $useDB, $alreadyTried ); + if ( $message !== false ) { + return $message; + } + + // Now try checking the site language. + $message = $this->getMessageForLang( $this->contLang, $lckey, $useDB, $alreadyTried ); + return $message; + } + + /** + * Given a language, try and fetch messages from that language and its fallbacks. + * + * @see MessageCache::get + * @param Language|StubObject $lang Preferred language + * @param string $lckey Lowercase key for the message (as for localisation cache) + * @param bool $useDB Whether to include messages from the wiki database + * @param bool[] &$alreadyTried Contains true for each language that has been tried already + * @return string|false The message, or false if not found + */ + private function getMessageForLang( $lang, $lckey, $useDB, &$alreadyTried ) { + $langcode = $lang->getCode(); + + // Try checking the database for the requested language + if ( $useDB ) { + $uckey = $this->contLang->ucfirst( $lckey ); + + if ( !isset( $alreadyTried[$langcode] ) ) { + $message = $this->getMsgFromNamespace( + $this->getMessagePageName( $langcode, $uckey ), + $langcode + ); + if ( $message !== false ) { + return $message; + } + $alreadyTried[$langcode] = true; + } + } else { + $uckey = null; + } + + // Check the CDB cache + $message = $lang->getMessage( $lckey ); + if ( $message !== null ) { + return $message; + } + + // Try checking the database for all of the fallback languages + if ( $useDB ) { + $fallbackChain = $this->languageFallback->getAll( $langcode ); + + foreach ( $fallbackChain as $code ) { + if ( isset( $alreadyTried[$code] ) ) { + continue; + } + + $message = $this->getMsgFromNamespace( + // @phan-suppress-next-line PhanTypeMismatchArgumentNullable uckey is set when used + $this->getMessagePageName( $code, $uckey ), $code ); + + if ( $message !== false ) { + return $message; + } + $alreadyTried[$code] = true; + } + } + + return false; + } + + /** + * Get the message page name for a given language + * + * @param string $langcode + * @param string $uckey Uppercase key for the message + * @return string The page name + */ + private function getMessagePageName( $langcode, $uckey ) { + if ( $langcode === $this->contLangCode ) { + // Messages created in the content language will not have the /lang extension + return $uckey; + } else { + return "$uckey/$langcode"; + } + } + + /** + * Get a message from the MediaWiki namespace, with caching. The key must + * first be converted to two-part lang/msg form if necessary. + * + * Unlike self::get(), this function doesn't resolve fallback chains, and + * some callers require this behavior. LanguageConverter::parseCachedTable() + * and self::get() are some examples in core. + * + * @param string $title Message cache key with initial uppercase letter + * @param string $code Code denoting the language to try + * @return string|false The message, or false if it does not exist or on error + */ + public function getMsgFromNamespace( $title, $code ) { + // Load all MediaWiki page definitions into cache. Note that individual keys + // already loaded into cache during this request remain in the cache, which + // includes the value of hook-defined messages. + $this->load( $code ); + + $entry = $this->cache->getField( $code, $title ); + + if ( $entry !== null ) { + // Message page exists as an override of a software messages + if ( substr( $entry, 0, 1 ) === ' ' ) { + // The message exists and is not '!TOO BIG' or '!ERROR' + return (string)substr( $entry, 1 ); + } elseif ( $entry === '!NONEXISTENT' ) { + // The text might be '-' or missing due to some data loss + return false; + } + // Load the message page, utilizing the individual message cache. + // If the page does not exist, there will be no hook handler fallbacks. + $entry = $this->loadCachedMessagePageEntry( + $title, + $code, + $this->cache->getField( $code, 'HASH' ) + ); + } else { + // Message page either does not exist or does not override a software message + if ( !$this->isMainCacheable( $title, $code ) ) { + // Message page does not override any software-defined message. A custom + // message might be defined to have content or settings specific to the wiki. + // Load the message page, utilizing the individual message cache as needed. + $entry = $this->loadCachedMessagePageEntry( + $title, + $code, + $this->cache->getField( $code, 'HASH' ) + ); + } + if ( $entry === null || substr( $entry, 0, 1 ) !== ' ' ) { + // Message does not have a MediaWiki page definition; try hook handlers + $message = false; + // @phan-suppress-next-line PhanTypeMismatchArgument Type mismatch on pass-by-ref args + $this->hookRunner->onMessagesPreLoad( $title, $message, $code ); + if ( $message !== false ) { + $this->cache->setField( $code, $title, ' ' . $message ); + } else { + $this->cache->setField( $code, $title, '!NONEXISTENT' ); + } + + return $message; + } + } + + if ( $entry !== false && substr( $entry, 0, 1 ) === ' ' ) { + if ( $this->cacheVolatile[$code] ) { + // Make sure that individual keys respect the WAN cache holdoff period too + $this->logger->debug( + __METHOD__ . ': loading volatile key \'{titleKey}\'', + [ 'titleKey' => $title, 'code' => $code ] ); + } else { + $this->cache->setField( $code, $title, $entry ); + } + // The message exists, so make sure a string is returned + return (string)substr( $entry, 1 ); + } + + $this->cache->setField( $code, $title, '!NONEXISTENT' ); + + return false; + } + + /** + * @param string $dbKey + * @param string $code + * @param string $hash + * @return string Either " <MESSAGE>" or "!NONEXISTANT" + */ + private function loadCachedMessagePageEntry( $dbKey, $code, $hash ) { + $fname = __METHOD__; + return $this->srvCache->getWithSetCallback( + $this->srvCache->makeKey( 'messages-big', $hash, $dbKey ), + BagOStuff::TTL_HOUR, + function () use ( $code, $dbKey, $hash, $fname ) { + return $this->wanCache->getWithSetCallback( + $this->bigMessageCacheKey( $hash, $dbKey ), + self::WAN_TTL, + function ( $oldValue, &$ttl, &$setOpts ) use ( $dbKey, $code, $fname ) { + // Try loading the message from the database + $setOpts += Database::getCacheSetOptions( wfGetDB( DB_REPLICA ) ); + // Use newKnownCurrent() to avoid querying revision/user tables + $title = Title::makeTitle( NS_MEDIAWIKI, $dbKey ); + // Injecting RevisionStore breaks installer since it + // instantiates MessageCache before DB. + $revision = MediaWikiServices::getInstance() + ->getRevisionLookup() + ->getKnownCurrentRevision( $title ); + if ( !$revision ) { + // The wiki doesn't have a local override page. Cache absence with normal TTL. + // When overrides are created, self::replace() takes care of the cache. + return '!NONEXISTENT'; + } + $content = $revision->getContent( SlotRecord::MAIN ); + if ( $content ) { + $message = $this->getMessageTextFromContent( $content ); + } else { + $this->logger->warning( + $fname . ': failed to load page text for \'{titleKey}\'', + [ 'titleKey' => $dbKey, 'code' => $code ] + ); + $message = null; + } + + if ( !is_string( $message ) ) { + // Revision failed to load Content, or Content is incompatible with wikitext. + // Possibly a temporary loading failure. + $ttl = 5; + + return '!NONEXISTENT'; + } + + return ' ' . $message; + } + ); + } + ); + } + + /** + * @param string $message + * @param bool $interface + * @param Language|null $language + * @param PageReference|null $page + * @return string + */ + public function transform( $message, $interface = false, $language = null, PageReference $page = null ) { + // Avoid creating parser if nothing to transform + if ( strpos( $message, '{{' ) === false ) { + return $message; + } + + if ( $this->mInParser ) { + return $message; + } + + $parser = $this->getParser(); + if ( $parser ) { + $popts = $this->getParserOptions(); + $popts->setInterfaceMessage( $interface ); + $popts->setTargetLanguage( $language ); + + $userlang = $popts->setUserLang( $language ); + $this->mInParser = true; + $message = $parser->transformMsg( $message, $popts, $page ); + $this->mInParser = false; + $popts->setUserLang( $userlang ); + } + + return $message; + } + + /** + * @return Parser + */ + public function getParser() { + if ( !$this->mParser ) { + $parser = MediaWikiServices::getInstance()->getParser(); + # Clone it and store it + $this->mParser = clone $parser; + } + + return $this->mParser; + } + + /** + * @param string $text + * @param PageReference|null $page + * @param bool $linestart Whether or not this is at the start of a line + * @param bool $interface Whether this is an interface message + * @param Language|string|null $language Language code + * @return ParserOutput|string + */ + public function parse( $text, PageReference $page = null, $linestart = true, + $interface = false, $language = null + ) { + global $wgTitle; + + if ( $this->mInParser ) { + return htmlspecialchars( $text ); + } + + $parser = $this->getParser(); + $popts = $this->getParserOptions(); + $popts->setInterfaceMessage( $interface ); + + if ( is_string( $language ) ) { + $language = $this->langFactory->getLanguage( $language ); + } + $popts->setTargetLanguage( $language ); + + if ( !$page ) { + $logger = LoggerFactory::getInstance( 'GlobalTitleFail' ); + $logger->info( + __METHOD__ . ' called with no title set.', + [ 'exception' => new Exception ] + ); + $page = $wgTitle; + } + // Sometimes $wgTitle isn't set either... + if ( !$page ) { + # It's not uncommon having a null $wgTitle in scripts. See r80898 + # Create a ghost title in such case + $page = PageReferenceValue::localReference( + NS_SPECIAL, + 'Badtitle/title not set in ' . __METHOD__ + ); + } + + $this->mInParser = true; + $res = $parser->parse( $text, $page, $popts, $linestart ); + $this->mInParser = false; + + return $res; + } + + public function disable() { + $this->mDisable = true; + } + + public function enable() { + $this->mDisable = false; + } + + /** + * Whether DB/cache usage is disabled for determining messages + * + * If so, this typically indicates either: + * - a) load() failed to find a cached copy nor query the DB + * - b) we are in a special context or error mode that cannot use the DB + * If the DB is ignored, any derived HTML output or cached objects may be wrong. + * To avoid long-term cache pollution, TTLs can be adjusted accordingly. + * + * @return bool + * @since 1.27 + */ + public function isDisabled() { + return $this->mDisable; + } + + /** + * Clear all stored messages in global and local cache + * + * Mainly used after a mass rebuild + */ + public function clear() { + $langs = $this->languageNameUtils->getLanguageNames(); + foreach ( array_keys( $langs ) as $code ) { + $this->wanCache->touchCheckKey( $this->getCheckKey( $code ) ); + } + $this->cache->clear(); + } + + /** + * @param string $key + * @return array + */ + public function figureMessage( $key ) { + $pieces = explode( '/', $key ); + if ( count( $pieces ) < 2 ) { + return [ $key, $this->contLangCode ]; + } + + $lang = array_pop( $pieces ); + if ( !$this->languageNameUtils->getLanguageName( + $lang, + LanguageNameUtils::AUTONYMS, + LanguageNameUtils::DEFINED + ) ) { + return [ $key, $this->contLangCode ]; + } + + $message = implode( '/', $pieces ); + + return [ $message, $lang ]; + } + + /** + * Get all message keys stored in the message cache for a given language. + * If $code is the content language code, this will return all message keys + * for which MediaWiki:msgkey exists. If $code is another language code, this + * will ONLY return message keys for which MediaWiki:msgkey/$code exists. + * @param string $code Language code + * @return string[]|null Array of message keys + */ + public function getAllMessageKeys( $code ) { + $this->load( $code ); + if ( !$this->cache->has( $code ) ) { + // Apparently load() failed + return null; + } + // Remove administrative keys + $cache = $this->cache->get( $code ); + unset( $cache['VERSION'] ); + unset( $cache['EXPIRY'] ); + unset( $cache['EXCESSIVE'] ); + // Remove any !NONEXISTENT keys + $cache = array_diff( $cache, [ '!NONEXISTENT' ] ); + + // Keys may appear with a capital first letter. lcfirst them. + return array_map( [ $this->contLang, 'lcfirst' ], array_keys( $cache ) ); + } + + /** + * Purge message caches when a MediaWiki: page is created, updated, or deleted + * + * @param LinkTarget $linkTarget Message page title + * @param Content|null $content New content for edit/create, null on deletion + * @since 1.29 + */ + public function updateMessageOverride( LinkTarget $linkTarget, Content $content = null ) { + $msgText = $this->getMessageTextFromContent( $content ); + if ( $msgText === null ) { + $msgText = false; // treat as not existing + } + + $this->replace( $linkTarget->getDBkey(), $msgText ); + + if ( $this->contLangConverter->hasVariants() ) { + $this->contLangConverter->updateConversionTable( $linkTarget ); + } + } + + /** + * @param string $code Language code + * @return string WAN cache key usable as a "check key" against language page edits + */ + public function getCheckKey( $code ) { + return $this->wanCache->makeKey( 'messages', $code ); + } + + /** + * @param Content|null $content Content or null if the message page does not exist + * @return string|false|null Returns false if $content is null and null on error + */ + private function getMessageTextFromContent( Content $content = null ) { + // @TODO: could skip pseudo-messages like js/css here, based on content model + if ( $content ) { + // Message page exists... + // XXX: Is this the right way to turn a Content object into a message? + // NOTE: $content is typically either WikitextContent, JavaScriptContent or + // CssContent. MessageContent is *not* used for storing messages, it's + // only used for wrapping them when needed. + $msgText = $content->getWikitextForTransclusion(); + if ( $msgText === false || $msgText === null ) { + // This might be due to some kind of misconfiguration... + $msgText = null; + $this->logger->warning( + __METHOD__ . ": message content doesn't provide wikitext " + . "(content model: " . $content->getModel() . ")" ); + } + } else { + // Message page does not exist... + $msgText = false; + } + + return $msgText; + } + + /** + * @param string $hash Hash for this version of the entire key/value overrides map + * @param string $title Message cache key with initial uppercase letter + * @return string + */ + private function bigMessageCacheKey( $hash, $title ) { + return $this->wanCache->makeKey( 'messages-big', $hash, $title ); + } +} diff --git a/includes/language/dependency/CacheDependency.php b/includes/language/dependency/CacheDependency.php new file mode 100644 index 000000000000..9bc71ea845be --- /dev/null +++ b/includes/language/dependency/CacheDependency.php @@ -0,0 +1,40 @@ +<?php +/** + * Data caching with dependencies. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Cache + */ + +/** + * @stable to extend + * @ingroup Cache + */ +abstract class CacheDependency { + /** + * Returns true if the dependency is expired, false otherwise + */ + abstract public function isExpired(); + + /** + * Hook to perform any expensive pre-serialize loading of dependency values. + * @stable to override + */ + public function loadDependencyValues() { + } +} diff --git a/includes/language/dependency/ConstantDependency.php b/includes/language/dependency/ConstantDependency.php new file mode 100644 index 000000000000..1f62093640f4 --- /dev/null +++ b/includes/language/dependency/ConstantDependency.php @@ -0,0 +1,42 @@ +<?php +/** + * Data caching with dependencies. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Cache + */ + +/** + * @ingroup Cache + */ +class ConstantDependency extends CacheDependency { + private $name; + private $value; + + public function __construct( $name ) { + $this->name = $name; + $this->value = constant( $name ); + } + + /** + * @return bool + */ + public function isExpired() { + return constant( $this->name ) != $this->value; + } +} diff --git a/includes/language/dependency/DependencyWrapper.php b/includes/language/dependency/DependencyWrapper.php new file mode 100644 index 000000000000..789a24a0b095 --- /dev/null +++ b/includes/language/dependency/DependencyWrapper.php @@ -0,0 +1,130 @@ +<?php +/** + * Data caching with dependencies. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Cache + */ + +/** + * This class stores an arbitrary value along with its dependencies. + * Users should typically only use DependencyWrapper::getValueFromCache(), + * rather than instantiating one of these objects directly. + * @ingroup Cache + */ +class DependencyWrapper { + private $value; + /** @var CacheDependency[] */ + private $deps; + + /** + * @param mixed $value The user-supplied value + * @param CacheDependency|CacheDependency[] $deps A dependency or dependency + * array. All dependencies must be objects implementing CacheDependency. + */ + public function __construct( $value = false, $deps = [] ) { + $this->value = $value; + + if ( !is_array( $deps ) ) { + $deps = [ $deps ]; + } + + $this->deps = $deps; + } + + /** + * Returns true if any of the dependencies have expired + * + * @return bool + */ + public function isExpired() { + foreach ( $this->deps as $dep ) { + if ( $dep->isExpired() ) { + return true; + } + } + + return false; + } + + /** + * Initialise dependency values in preparation for storing. This must be + * called before serialization. + */ + public function initialiseDeps() { + foreach ( $this->deps as $dep ) { + $dep->loadDependencyValues(); + } + } + + /** + * Get the user-defined value + * @return bool|mixed + */ + public function getValue() { + return $this->value; + } + + /** + * Store the wrapper to a cache + * + * @param BagOStuff $cache + * @param string $key + * @param int $expiry + */ + public function storeToCache( $cache, $key, $expiry = 0 ) { + $this->initialiseDeps(); + $cache->set( $key, $this, $expiry ); + } + + /** + * Attempt to get a value from the cache. If the value is expired or missing, + * it will be generated with the callback function (if present), and the newly + * calculated value will be stored to the cache in a wrapper. + * + * @param BagOStuff $cache + * @param string $key The cache key + * @param int $expiry The expiry timestamp or interval in seconds + * @param bool|callable $callback The callback for generating the value, or false + * @param array $callbackParams The function parameters for the callback + * @param array $deps The dependencies to store on a cache miss. Note: these + * are not the dependencies used on a cache hit! Cache hits use the stored + * dependency array. + * + * @return mixed The value, or null if it was not present in the cache and no + * callback was defined. + */ + public static function getValueFromCache( $cache, $key, $expiry = 0, $callback = false, + $callbackParams = [], $deps = [] + ) { + $obj = $cache->get( $key ); + + if ( is_object( $obj ) && $obj instanceof DependencyWrapper && !$obj->isExpired() ) { + $value = $obj->value; + } elseif ( $callback ) { + $value = $callback( ...$callbackParams ); + # Cache the newly-generated value + $wrapper = new DependencyWrapper( $value, $deps ); + $wrapper->storeToCache( $cache, $key, $expiry ); + } else { + $value = null; + } + + return $value; + } +} diff --git a/includes/language/dependency/FileDependency.php b/includes/language/dependency/FileDependency.php new file mode 100644 index 000000000000..1cc2379ff668 --- /dev/null +++ b/includes/language/dependency/FileDependency.php @@ -0,0 +1,101 @@ +<?php +/** + * Data caching with dependencies. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Cache + */ + +use Wikimedia\AtEase\AtEase; + +/** + * @newable + * @ingroup Cache + */ +class FileDependency extends CacheDependency { + private $filename; + private $timestamp; + + /** + * Create a file dependency + * + * @stable to call + * + * @param string $filename The name of the file, preferably fully qualified + * @param null|bool|int $timestamp The unix last modified timestamp, or false if the + * file does not exist. If omitted, the timestamp will be loaded from + * the file. + * + * A dependency on a nonexistent file will be triggered when the file is + * created. A dependency on an existing file will be triggered when the + * file is changed. + */ + public function __construct( $filename, $timestamp = null ) { + $this->filename = $filename; + $this->timestamp = $timestamp; + } + + /** + * @return array + */ + public function __sleep() { + $this->loadDependencyValues(); + + return [ 'filename', 'timestamp' ]; + } + + public function loadDependencyValues() { + if ( $this->timestamp === null ) { + AtEase::suppressWarnings(); + # Dependency on a non-existent file stores "false" + # This is a valid concept! + $this->timestamp = filemtime( $this->filename ); + AtEase::restoreWarnings(); + } + } + + /** + * @return bool + */ + public function isExpired() { + AtEase::suppressWarnings(); + $lastmod = filemtime( $this->filename ); + AtEase::restoreWarnings(); + if ( $lastmod === false ) { + if ( $this->timestamp === false ) { + # Still nonexistent + return false; + } + + # Deleted + wfDebug( "Dependency triggered: {$this->filename} deleted." ); + + return true; + } + + if ( $lastmod > $this->timestamp ) { + # Modified or created + wfDebug( "Dependency triggered: {$this->filename} changed." ); + + return true; + } + + # Not modified + return false; + } +} diff --git a/includes/language/dependency/GlobalDependency.php b/includes/language/dependency/GlobalDependency.php new file mode 100644 index 000000000000..15a3e90b113d --- /dev/null +++ b/includes/language/dependency/GlobalDependency.php @@ -0,0 +1,46 @@ +<?php +/** + * Data caching with dependencies. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Cache + */ + +/** + * @ingroup Cache + */ +class GlobalDependency extends CacheDependency { + private $name; + private $value; + + public function __construct( $name ) { + $this->name = $name; + $this->value = $GLOBALS[$name]; + } + + /** + * @return bool + */ + public function isExpired() { + if ( !isset( $GLOBALS[$this->name] ) ) { + return true; + } + + return $GLOBALS[$this->name] != $this->value; + } +} diff --git a/includes/language/dependency/MainConfigDependency.php b/includes/language/dependency/MainConfigDependency.php new file mode 100644 index 000000000000..4eab7268eee6 --- /dev/null +++ b/includes/language/dependency/MainConfigDependency.php @@ -0,0 +1,51 @@ +<?php +/** + * Data caching with dependencies. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Cache + */ +use MediaWiki\MediaWikiServices; + +/** + * @ingroup Cache + */ +class MainConfigDependency extends CacheDependency { + private $name; + private $value; + + public function __construct( $name ) { + $this->name = $name; + $this->value = $this->getConfig()->get( $this->name ); + } + + private function getConfig() { + return MediaWikiServices::getInstance()->getMainConfig(); + } + + /** + * @return bool + */ + public function isExpired() { + if ( !$this->getConfig()->has( $this->name ) ) { + return true; + } + + return $this->getConfig()->get( $this->name ) != $this->value; + } +} |