aboutsummaryrefslogtreecommitdiffstats
path: root/includes/language
diff options
context:
space:
mode:
authorTimo Tijhof <krinklemail@gmail.com>2020-07-02 01:57:56 +0100
committerTimo Tijhof <krinklemail@gmail.com>2020-07-02 01:57:56 +0100
commitf5644ba90454cba1d58b42f6b79e8a3c140afa68 (patch)
treeead2f4fc6bcefef5b385e72531fc2cb0aca6eed9 /includes/language
parent3806d1b08179f857dc704671047de16ce01276b2 (diff)
downloadmediawikicore-f5644ba90454cba1d58b42f6b79e8a3c140afa68.tar.gz
mediawikicore-f5644ba90454cba1d58b42f6b79e8a3c140afa68.zip
language: Move converters/ to includes/language/converters
These are easy to move. They contain regular, testable, source code, are loaded only via the autoloader, and have no references to their file paths from anywhere else in or outside of core (as far as Codesearch can see). Change-Id: Ibe94e541637bb273bd11dba6c2bc5b59f601dd19
Diffstat (limited to 'includes/language')
-rw-r--r--includes/language/converters/CrhConverter.php300
-rw-r--r--includes/language/converters/EnConverter.php74
-rw-r--r--includes/language/converters/GanConverter.php74
-rw-r--r--includes/language/converters/IuConverter.php161
-rw-r--r--includes/language/converters/KkConverter.php361
-rw-r--r--includes/language/converters/KuConverter.php238
-rw-r--r--includes/language/converters/ShiConverter.php137
-rw-r--r--includes/language/converters/SrConverter.php185
-rw-r--r--includes/language/converters/TgConverter.php120
-rw-r--r--includes/language/converters/UzConverter.php138
-rw-r--r--includes/language/converters/ZhConverter.php125
11 files changed, 1913 insertions, 0 deletions
diff --git a/includes/language/converters/CrhConverter.php b/includes/language/converters/CrhConverter.php
new file mode 100644
index 000000000000..550b07cae87d
--- /dev/null
+++ b/includes/language/converters/CrhConverter.php
@@ -0,0 +1,300 @@
+<?php
+/**
+ * Crimean Tatar (Qırımtatarca) specific code.
+ *
+ * Adapted from https://crh.wikipedia.org/wiki/Qullan%C4%B1c%C4%B1:Don_Alessandro/Translit
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Language
+ */
+
+/**
+ * Crimean Tatar (Qırımtatarca) converter routines
+ *
+ * @ingroup Language
+ */
+class CrhConverter extends LanguageConverter {
+ // Defines working character ranges
+
+ // Cyrillic
+ # Crimean Tatar Cyrillic uppercase
+ public const C_UC = 'АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ';
+ # Crimean Tatar Cyrillic lowercase
+ public const C_LC = 'абвгдеёжзийклмнопрстуфхцчшщъыьэюя';
+ # Crimean Tatar Cyrillic + CÑ uppercase consonants
+ public const C_CONS_UC = 'БВГДЖЗЙКЛМНПРСТФХЦЧШЩCÑ';
+ # Crimean Tatar Cyrillic + CÑ lowercase consonants
+ public const C_CONS_LC = 'бвгджзйклмнпрстфхцчшщcñ';
+ # Crimean Tatar Cyrillic M-type consonants
+ public const C_M_CONS = 'бгкмшcБГКМШC';
+
+ // Crimean Tatar Cyrillic + CÑ consonants
+ public const C_CONS = 'бвгджзйклмнпрстфхцчшщcñБВГДЖЗЙКЛМНПРСТФХЦЧШЩCÑ';
+
+ // Latin
+ # Crimean Tatar Latin uppercase
+ public const L_UC = 'AÂBCÇDEFGĞHIİJKLMNÑOÖPQRSŞTUÜVYZ';
+ # Crimean Tatar Latin lowercase
+ public const L_LC = 'aâbcçdefgğhıijklmnñoöpqrsştuüvyz';
+ # Crimean Tatar Latin N-type upper case consonants
+ public const L_N_CONS_UC = 'ÇNRSTZ';
+ # Crimean Tatar Latin N-type lower case consonants
+ public const L_N_CONS_LC = 'çnrstz';
+ # Crimean Tatar Latin N-type consonants
+ public const L_N_CONS = 'çnrstzÇNRSTZ';
+ # Crimean Tatar Latin M-type consonants
+ public const L_M_CONS = 'bcgkmpşBCGKMPŞ';
+ # Crimean Tatar Latin uppercase consonants
+ public const L_CONS_UC = 'BCÇDFGĞHJKLMNÑPQRSŞTVZ';
+ # Crimean Tatar Latin lowercase consonants
+ public const L_CONS_LC = 'bcçdfgğhjklmnñpqrsştvz';
+ # Crimean Tatar Latin consonants
+ public const L_CONS = 'bcçdfgğhjklmnñpqrsştvzBCÇDFGĞHJKLMNÑPQRSŞTVZ';
+ # Crimean Tatar Latin uppercase vowels
+ public const L_VOW_UC = 'AÂEIİOÖUÜ';
+ # Crimean Tatar Latin vowels
+ public const L_VOW = 'aâeıioöuüAÂEIİOÖUÜ';
+ # Crimean Tatar Latin uppercase front vowels
+ public const L_F_UC = 'EİÖÜ';
+ # Crimean Tatar Latin front vowels
+ public const L_F = 'eiöüEİÖÜ';
+
+ /**
+ * @param Language $langobj
+ */
+ public function __construct( $langobj ) {
+ $variants = [ 'crh', 'crh-cyrl', 'crh-latn' ];
+ $variantfallbacks = [
+ 'crh' => 'crh-latn',
+ 'crh-cyrl' => 'crh-latn',
+ 'crh-latn' => 'crh-cyrl',
+ ];
+
+ parent::__construct( $langobj, 'crh',
+ $variants, $variantfallbacks, [] );
+
+ // No point delaying this since they're in code.
+ // Waiting until loadDefaultTables() means they never get loaded
+ // when the tables themselves are loaded from cache.
+ $this->loadExceptions();
+ }
+
+ public $mCyrillicToLatin = [
+
+ ## these are independent of location in the word, but have
+ ## to go first so other transforms don't bleed them
+ 'гъ' => 'ğ', 'Гъ' => 'Ğ', 'ГЪ' => 'Ğ',
+ 'къ' => 'q', 'Къ' => 'Q', 'КЪ' => 'Q',
+ 'нъ' => 'ñ', 'Нъ' => 'Ñ', 'НЪ' => 'Ñ',
+ 'дж' => 'c', 'Дж' => 'C', 'ДЖ' => 'C',
+
+ 'А' => 'A', 'а' => 'a', 'Б' => 'B', 'б' => 'b',
+ 'В' => 'V', 'в' => 'v', 'Г' => 'G', 'г' => 'g',
+ 'Д' => 'D', 'д' => 'd', 'Ж' => 'J', 'ж' => 'j',
+ 'З' => 'Z', 'з' => 'z', 'И' => 'İ', 'и' => 'i',
+ 'Й' => 'Y', 'й' => 'y', 'К' => 'K', 'к' => 'k',
+ 'Л' => 'L', 'л' => 'l', 'М' => 'M', 'м' => 'm',
+ 'Н' => 'N', 'н' => 'n', 'П' => 'P', 'п' => 'p',
+ 'Р' => 'R', 'р' => 'r', 'С' => 'S', 'с' => 's',
+ 'Т' => 'T', 'т' => 't', 'Ф' => 'F', 'ф' => 'f',
+ 'Х' => 'H', 'х' => 'h', 'Ч' => 'Ç', 'ч' => 'ç',
+ 'Ш' => 'Ş', 'ш' => 'ş', 'Ы' => 'I', 'ы' => 'ı',
+ 'Э' => 'E', 'э' => 'e', 'Е' => 'E', 'е' => 'e',
+ 'Я' => 'Â', 'я' => 'â', 'У' => 'U', 'у' => 'u',
+ 'О' => 'O', 'о' => 'o',
+
+ 'Ё' => 'Yo', 'ё' => 'yo', 'Ю' => 'Yu', 'ю' => 'yu',
+ 'Ц' => 'Ts', 'ц' => 'ts', 'Щ' => 'Şç', 'щ' => 'şç',
+ 'Ь' => '', 'ь' => '', 'Ъ' => '', 'ъ' => '',
+
+ ];
+
+ public $mLatinToCyrillic = [
+ 'Â' => 'Я', 'â' => 'я', 'B' => 'Б', 'b' => 'б',
+ 'Ç' => 'Ч', 'ç' => 'ч', 'D' => 'Д', 'd' => 'д',
+ 'F' => 'Ф', 'f' => 'ф', 'G' => 'Г', 'g' => 'г',
+ 'H' => 'Х', 'h' => 'х', 'I' => 'Ы', 'ı' => 'ы',
+ 'İ' => 'И', 'i' => 'и', 'J' => 'Ж', 'j' => 'ж',
+ 'K' => 'К', 'k' => 'к', 'L' => 'Л', 'l' => 'л',
+ 'M' => 'М', 'm' => 'м', 'N' => 'Н', 'n' => 'н',
+ 'O' => 'О', 'o' => 'о', 'P' => 'П', 'p' => 'п',
+ 'R' => 'Р', 'r' => 'р', 'S' => 'С', 's' => 'с',
+ 'Ş' => 'Ш', 'ş' => 'ш', 'T' => 'Т', 't' => 'т',
+ 'V' => 'В', 'v' => 'в', 'Z' => 'З', 'z' => 'з',
+
+ 'ya' => 'я', 'Ya' => 'Я', 'YA' => 'Я',
+ 'ye' => 'е', 'YE' => 'Е', 'Ye' => 'Е',
+
+ // hack, hack, hack
+ 'A' => 'А', 'a' => 'а', 'E' => 'Е', 'e' => 'е',
+ 'Ö' => 'Ё', 'ö' => 'ё', 'U' => 'У', 'u' => 'у',
+ 'Ü' => 'Ю', 'ü' => 'ю', 'Y' => 'Й', 'y' => 'й',
+ 'C' => 'Дж', 'c' => 'дж', 'Ğ' => 'Гъ', 'ğ' => 'гъ',
+ 'Ñ' => 'Нъ', 'ñ' => 'нъ', 'Q' => 'Къ', 'q' => 'къ',
+
+ ];
+
+ public $mCyrl2LatnExceptions = [];
+ public $mLatn2CyrlExceptions = [];
+
+ public $mCyrl2LatnPatterns = [];
+ public $mLatn2CyrlPatterns = [];
+
+ public $mCyrlCleanUpRegexes = [];
+
+ public $mExceptionsLoaded = false;
+
+ protected function loadDefaultTables() {
+ $this->mTables = [
+ 'crh-latn' => new ReplacementArray( $this->mCyrillicToLatin ),
+ 'crh-cyrl' => new ReplacementArray( $this->mLatinToCyrillic ),
+ 'crh' => new ReplacementArray()
+ ];
+ }
+
+ private function loadExceptions() {
+ if ( $this->mExceptionsLoaded ) {
+ return;
+ }
+
+ $this->mExceptionsLoaded = true;
+ $crhExceptions = new MediaWiki\Languages\Data\CrhExceptions();
+ list( $this->mCyrl2LatnExceptions, $this->mLatn2CyrlExceptions,
+ $this->mCyrl2LatnPatterns, $this->mLatn2CyrlPatterns, $this->mCyrlCleanUpRegexes ) =
+ $crhExceptions->loadExceptions( self::L_LC . self::C_LC, self::L_UC . self::C_UC );
+ }
+
+ /**
+ * A function wrapper:
+ * - if there is no selected variant, leave the link
+ * names as they were
+ * - do not try to find variants for usernames
+ *
+ * @param string &$link
+ * @param Title &$nt
+ * @param bool $ignoreOtherCond
+ */
+ public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
+ // check for user namespace
+ if ( is_object( $nt ) ) {
+ $ns = $nt->getNamespace();
+ if ( $ns == NS_USER || $ns == NS_USER_TALK ) {
+ return;
+ }
+ }
+
+ $oldlink = $link;
+ parent::findVariantLink( $link, $nt, $ignoreOtherCond );
+ if ( $this->getPreferredVariant() == $this->mMainLanguageCode ) {
+ $link = $oldlink;
+ }
+ }
+
+ /**
+ * It translates text into variant, specials:
+ * - omitting roman numbers
+ *
+ * @param string $text
+ * @param bool $toVariant
+ *
+ * @throws MWException
+ * @return string
+ */
+ public function translate( $text, $toVariant ) {
+ switch ( $toVariant ) {
+ case 'crh-cyrl':
+ case 'crh-latn':
+ break;
+ default:
+ return $text;
+ }
+
+ $this->loadTables();
+
+ if ( !isset( $this->mTables[$toVariant] ) ) {
+ throw new MWException( "Broken variant table: " . implode( ',', array_keys( $this->mTables ) ) );
+ }
+
+ switch ( $toVariant ) {
+ case 'crh-cyrl':
+ /* Check for roman numbers like VII, XIX...
+ * Only need to split on Roman numerals when converting to Cyrillic
+ * Lookahead assertion ensures $roman doesn't match the empty string, and
+ * non-period after first "Roman" character allows initials to be converted
+ */
+ $roman = '(?=[MDCLXVI]([^.]|$))M{0,4}(C[DM]|D?C{0,3})(X[LC]|L?X{0,3})(I[VX]|V?I{0,3})';
+
+ $breaks = '([^\w\x80-\xff])';
+
+ // allow for multiple Roman numerals in a row; rare but it happens
+ $romanRegex = '/^' . $roman . '$|^(' . $roman . $breaks . ')+|(' . $breaks . $roman . ')+$|' .
+ $breaks . '(' . $roman . $breaks . ')+/';
+
+ $matches = preg_split( $romanRegex, $text, -1, PREG_SPLIT_OFFSET_CAPTURE );
+ $mstart = 0;
+ $ret = '';
+ foreach ( $matches as $m ) {
+ // copy over Roman numerals
+ $ret .= substr( $text, $mstart, $m[1] - $mstart );
+
+ // process everything else
+ if ( $m[0] !== '' ) {
+ $ret .= $this->regsConverter( $m[0], $toVariant );
+ }
+
+ $mstart = $m[1] + strlen( $m[0] );
+ }
+
+ return $ret;
+ default:
+ // Just process the whole string in one go
+ return $this->regsConverter( $text, $toVariant );
+ }
+ }
+
+ private function regsConverter( $text, $toVariant ) {
+ if ( $text == '' ) return $text;
+
+ $pat = [];
+ $rep = [];
+ switch ( $toVariant ) {
+ case 'crh-latn':
+ $text = strtr( $text, $this->mCyrl2LatnExceptions );
+ foreach ( $this->mCyrl2LatnPatterns as $pat => $rep ) {
+ $text = preg_replace( $pat, $rep, $text );
+ }
+ $text = parent::translate( $text, $toVariant );
+ $text = strtr( $text, [ '«' => '"', '»' => '"', ] );
+ return $text;
+ case 'crh-cyrl':
+ $text = strtr( $text, $this->mLatn2CyrlExceptions );
+ foreach ( $this->mLatn2CyrlPatterns as $pat => $rep ) {
+ $text = preg_replace( $pat, $rep, $text );
+ }
+ $text = parent::translate( $text, $toVariant );
+ $text = strtr( $text, [ '“' => '«', '”' => '»', ] );
+ foreach ( $this->mCyrlCleanUpRegexes as $pat => $rep ) {
+ $text = preg_replace( $pat, $rep, $text );
+ }
+ return $text;
+ default:
+ return $text;
+ }
+ }
+
+}
diff --git a/includes/language/converters/EnConverter.php b/includes/language/converters/EnConverter.php
new file mode 100644
index 000000000000..c9270d75bdee
--- /dev/null
+++ b/includes/language/converters/EnConverter.php
@@ -0,0 +1,74 @@
+<?php
+/**
+ * English specific code.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * @ingroup Language
+ */
+class EnConverter extends LanguageConverter {
+
+ public function __construct( $langobj ) {
+ parent::__construct( $langobj, 'en', [ 'en', 'en-x-piglatin' ] );
+ }
+
+ /**
+ * Dummy methods required by base class.
+ */
+ protected function loadDefaultTables() {
+ $this->mTables = [
+ 'en' => new ReplacementArray(),
+ 'en-x-piglatin' => new ReplacementArray(),
+ ];
+ }
+
+ /**
+ * Translates text into Pig Latin. This allows developers to test the language variants
+ * functionality and user interface without having to switch wiki language away from default.
+ *
+ * @param string $text
+ * @param string $toVariant
+ * @return string
+ */
+ public function translate( $text, $toVariant ) {
+ if ( $toVariant !== 'en-x-piglatin' ) {
+ return $text;
+ }
+
+ // Only process words composed of standard English alphabet, leave the rest unchanged.
+ // This skips some English words like 'naïve' or 'résumé', but we can live with that.
+ // Ignore single letters and words which aren't lowercase or uppercase-first.
+ return preg_replace_callback( '/[A-Za-z][a-z\']+/', function ( $matches ) {
+ $word = $matches[0];
+ if ( preg_match( '/^[aeiou]/i', $word ) ) {
+ return $word . 'way';
+ }
+
+ return preg_replace_callback( '/^(s?qu|[^aeiou][^aeiouy]*)(.*)$/i', function ( $m ) {
+ $ucfirst = strtoupper( $m[1][0] ) === $m[1][0];
+ if ( $ucfirst ) {
+ return ucfirst( $m[2] ) . lcfirst( $m[1] ) . 'ay';
+ }
+
+ return $m[2] . $m[1] . 'ay';
+ }, $word );
+ }, $text );
+ }
+}
diff --git a/includes/language/converters/GanConverter.php b/includes/language/converters/GanConverter.php
new file mode 100644
index 000000000000..4435b196dba5
--- /dev/null
+++ b/includes/language/converters/GanConverter.php
@@ -0,0 +1,74 @@
+<?php
+/**
+ * Gan Chinese specific code.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * @ingroup Language
+ */
+class GanConverter extends LanguageConverter {
+ /**
+ * @param Language $langobj
+ */
+ public function __construct( $langobj ) {
+ $this->mDescCodeSep = ':';
+ $this->mDescVarSep = ';';
+
+ $variants = [ 'gan', 'gan-hans', 'gan-hant' ];
+ $variantfallbacks = [
+ 'gan' => [ 'gan-hans', 'gan-hant' ],
+ 'gan-hans' => [ 'gan' ],
+ 'gan-hant' => [ 'gan' ],
+ ];
+ $ml = [
+ 'gan' => 'disable',
+ ];
+
+ parent::__construct( $langobj, 'gan',
+ $variants,
+ $variantfallbacks,
+ [],
+ $ml
+ );
+
+ $names = [
+ 'gan' => '原文',
+ 'gan-hans' => '简体',
+ 'gan-hant' => '繁體',
+ ];
+ $this->mVariantNames = array_merge( $this->mVariantNames, $names );
+ }
+
+ protected function loadDefaultTables() {
+ $this->mTables = [
+ 'gan-hans' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2Hans ),
+ 'gan-hant' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2Hant ),
+ 'gan' => new ReplacementArray
+ ];
+ }
+
+ /**
+ * @param string $key
+ * @return string
+ */
+ public function convertCategoryKey( $key ) {
+ return $this->autoConvert( $key, 'gan' );
+ }
+}
diff --git a/includes/language/converters/IuConverter.php b/includes/language/converters/IuConverter.php
new file mode 100644
index 000000000000..71c5b5057116
--- /dev/null
+++ b/includes/language/converters/IuConverter.php
@@ -0,0 +1,161 @@
+<?php
+/**
+ * Inuktitut specific code.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Language
+ */
+
+/**
+ * Conversion script between Latin and Syllabics for Inuktitut.
+ * - Syllabics -> lowercase Latin
+ * - lowercase/uppercase Latin -> Syllabics
+ *
+ *
+ * Based on:
+ * - https://commons.wikimedia.org/wiki/Image:Inuktitut.png
+ * - LanguageSr.php
+ *
+ * @ingroup Language
+ */
+class IuConverter extends LanguageConverter {
+ protected $mDoContentConvert;
+
+ public $mToLatin = [
+ 'ᐦ' => 'h', 'ᐃ' => 'i', 'ᐄ' => 'ii', 'ᐅ' => 'u', 'ᐆ' => 'uu', 'ᐊ' => 'a', 'ᐋ' => 'aa',
+ 'ᑉ' => 'p', 'ᐱ' => 'pi', 'ᐲ' => 'pii', 'ᐳ' => 'pu', 'ᐴ' => 'puu', 'ᐸ' => 'pa', 'ᐹ' => 'paa',
+ 'ᑦ' => 't', 'ᑎ' => 'ti', 'ᑏ' => 'tii', 'ᑐ' => 'tu', 'ᑑ' => 'tuu', 'ᑕ' => 'ta', 'ᑖ' => 'taa',
+ 'ᒃ' => 'k', 'ᑭ' => 'ki', 'ᑮ' => 'kii', 'ᑯ' => 'ku', 'ᑰ' => 'kuu', 'ᑲ' => 'ka', 'ᑳ' => 'kaa',
+ 'ᖅᒃ' => 'qq', 'ᖅᑭ' => 'qqi', 'ᖅᑮ' => 'qqii', 'ᖅᑯ' => 'qqu', 'ᖅᑰ' => 'ᖅqquu', 'ᖅᑲ' => 'qqa',
+ 'ᖅᑳ' => 'qqaa', 'ᒡ' => 'g', 'ᒋ' => 'gi', 'ᒌ' => 'gii', 'ᒍ' => 'gu', 'ᒎ' => 'guu',
+ 'ᒐ' => 'ga', 'ᒑ' => 'gaa', 'ᒻ' => 'm', 'ᒥ' => 'mi', 'ᒦ' => 'mii', 'ᒧ' => 'mu', 'ᒨ' => 'muu',
+ 'ᒪ' => 'ma', 'ᒫ' => 'maa', 'ᓐ' => 'n', 'ᓂ' => 'ni', 'ᓃ' => 'nii', 'ᓄ' => 'nu', 'ᓅ' => 'nuu',
+ 'ᓇ' => 'na', 'ᓈ' => 'naa', 'ᔅ' => 's', 'ᓯ' => 'si', 'ᓰ' => 'sii', 'ᓱ' => 'su', 'ᓲ' => 'suu',
+ 'ᓴ' => 'sa', 'ᓵ' => 'saa', 'ᓪ' => 'l', 'ᓕ' => 'li', 'ᓖ' => 'lii', 'ᓗ' => 'lu', 'ᓘ' => 'luu',
+ 'ᓚ' => 'la', 'ᓛ' => 'laa', 'ᔾ' => 'j', 'ᔨ' => 'ji', 'ᔩ' => 'jii', 'ᔪ' => 'ju', 'ᔫ' => 'juu',
+ 'ᔭ' => 'ja', 'ᔮ' => 'jaa', 'ᕝ' => 'v', 'ᕕ' => 'vi', 'ᕖ' => 'vii', 'ᕗ' => 'vu', 'ᕘ' => 'vuu',
+ 'ᕙ' => 'va', 'ᕚ' => 'vaa', 'ᕐ' => 'r', 'ᕆ' => 'ri', 'ᕇ' => 'rii', 'ᕈ' => 'ru', 'ᕉ' => 'ruu',
+ 'ᕋ' => 'ra', 'ᕌ' => 'raa', 'ᖅ' => 'q', 'ᕿ' => 'qi', 'ᖀ' => 'qii', 'ᖁ' => 'qu', 'ᖂ' => 'quu',
+ 'ᖃ' => 'qa', 'ᖄ' => 'qaa', 'ᖕ' => 'ng', 'ᖏ' => 'ngi', 'ᖐ' => 'ngii', 'ᖑ' => 'ngu',
+ 'ᖒ' => 'nguu', 'ᖓ' => 'nga', 'ᖔ' => 'ngaa', 'ᖖ' => 'nng', 'ᙱ' => 'nngi', 'ᙲ' => 'nngii',
+ 'ᙳ' => 'nngu', 'ᙴ' => 'nnguu', 'ᙵ' => 'nnga', 'ᙶ' => 'nngaa', 'ᖦ' => 'ɫ', 'ᖠ' => 'ɫi',
+ 'ᖡ' => 'ɫii', 'ᖢ' => 'ɫu', 'ᖣ' => 'ɫuu', 'ᖤ' => 'ɫa', 'ᖥ' => 'ɫaa',
+ ];
+
+ public $mUpperToLowerCaseLatin = [
+ 'A' => 'a', 'B' => 'b', 'C' => 'c', 'D' => 'd', 'E' => 'e',
+ 'F' => 'f', 'G' => 'g', 'H' => 'h', 'I' => 'i', 'J' => 'j',
+ 'K' => 'k', 'L' => 'l', 'M' => 'm', 'N' => 'n', 'O' => 'o',
+ 'P' => 'p', 'Q' => 'q', 'R' => 'r', 'S' => 's', 'T' => 't',
+ 'U' => 'u', 'V' => 'v', 'W' => 'w', 'X' => 'x', 'Y' => 'y',
+ 'Z' => 'z',
+ ];
+
+ public $mToSyllabics = [
+ 'h' => 'ᐦ', 'i' => 'ᐃ', 'ii' => 'ᐄ', 'u' => 'ᐅ', 'uu' => 'ᐆ', 'a' => 'ᐊ', 'aa' => 'ᐋ',
+ 'p' => 'ᑉ', 'pi' => 'ᐱ', 'pii' => 'ᐲ', 'pu' => 'ᐳ', 'puu' => 'ᐴ', 'pa' => 'ᐸ', 'paa' => 'ᐹ',
+ 't' => 'ᑦ', 'ti' => 'ᑎ', 'tii' => 'ᑏ', 'tu' => 'ᑐ', 'tuu' => 'ᑑ', 'ta' => 'ᑕ', 'taa' => 'ᑖ',
+ 'k' => 'ᒃ', 'ki' => 'ᑭ', 'kii' => 'ᑮ', 'ku' => 'ᑯ', 'kuu' => 'ᑰ', 'ka' => 'ᑲ', 'kaa' => 'ᑳ',
+ 'g' => 'ᒡ', 'gi' => 'ᒋ', 'gii' => 'ᒌ', 'gu' => 'ᒍ', 'guu' => 'ᒎ', 'ga' => 'ᒐ', 'gaa' => 'ᒑ',
+ 'm' => 'ᒻ', 'mi' => 'ᒥ', 'mii' => 'ᒦ', 'mu' => 'ᒧ', 'muu' => 'ᒨ', 'ma' => 'ᒪ', 'maa' => 'ᒫ',
+ 'n' => 'ᓐ', 'ni' => 'ᓂ', 'nii' => 'ᓃ', 'nu' => 'ᓄ', 'nuu' => 'ᓅ', 'na' => 'ᓇ', 'naa' => 'ᓈ',
+ 's' => 'ᔅ', 'si' => 'ᓯ', 'sii' => 'ᓰ', 'su' => 'ᓱ', 'suu' => 'ᓲ', 'sa' => 'ᓴ', 'saa' => 'ᓵ',
+ 'l' => 'ᓪ', 'li' => 'ᓕ', 'lii' => 'ᓖ', 'lu' => 'ᓗ', 'luu' => 'ᓘ', 'la' => 'ᓚ', 'laa' => 'ᓛ',
+ 'j' => 'ᔾ', 'ji' => 'ᔨ', 'jii' => 'ᔩ', 'ju' => 'ᔪ', 'juu' => 'ᔫ', 'ja' => 'ᔭ', 'jaa' => 'ᔮ',
+ 'v' => 'ᕝ', 'vi' => 'ᕕ', 'vii' => 'ᕖ', 'vu' => 'ᕗ', 'vuu' => 'ᕘ', 'va' => 'ᕙ', 'vaa' => 'ᕚ',
+ 'r' => 'ᕐ', 'ri' => 'ᕆ', 'rii' => 'ᕇ', 'ru' => 'ᕈ', 'ruu' => 'ᕉ', 'ra' => 'ᕋ', 'raa' => 'ᕌ',
+ 'qq' => 'ᖅᒃ', 'qqi' => 'ᖅᑭ', 'qqii' => 'ᖅᑮ', 'qqu' => 'ᖅᑯ', 'qquu' => 'ᖅᑰ', 'qqa' => 'ᖅᑲ',
+ 'qqaa' => 'ᖅᑳ', 'q' => 'ᖅ', 'qi' => 'ᕿ', 'qii' => 'ᖀ', 'qu' => 'ᖁ', 'quu' => 'ᖂ',
+ 'qa' => 'ᖃ', 'qaa' => 'ᖄ', 'ng' => 'ᖕ', 'ngi' => 'ᖏ', 'ngii' => 'ᖐ', 'ngu' => 'ᖑ',
+ 'nguu' => 'ᖒ', 'nga' => 'ᖓ', 'ngaa' => 'ᖔ', 'nng' => 'ᖖ', 'nngi' => 'ᙱ', 'nngii' => 'ᙲ',
+ 'nngu' => 'ᙳ', 'nnguu' => 'ᙴ', 'nnga' => 'ᙵ', 'nngaa' => 'ᙶ', 'ɫ' => 'ᖦ', 'ɫi' => 'ᖠ',
+ 'ɫii' => 'ᖡ', 'ɫu' => 'ᖢ', 'ɫuu' => 'ᖣ', 'ɫa' => 'ᖤ', 'ɫaa' => 'ᖥ',
+ ];
+
+ /**
+ * @param Language $langobj
+ */
+ public function __construct( $langobj ) {
+ $variants = [ 'iu', 'ike-cans', 'ike-latn' ];
+ $variantfallbacks = [
+ 'iu' => 'ike-cans',
+ 'ike-cans' => 'iu',
+ 'ike-latn' => 'iu',
+ ];
+ $flags = [];
+
+ parent::__construct( $langobj, 'iu', $variants, $variantfallbacks, $flags );
+ }
+
+ protected function loadDefaultTables() {
+ $this->mTables = [
+ 'lowercase' => new ReplacementArray( $this->mUpperToLowerCaseLatin ),
+ 'ike-cans' => new ReplacementArray( $this->mToSyllabics ),
+ 'ike-latn' => new ReplacementArray( $this->mToLatin ),
+ 'iu' => new ReplacementArray()
+ ];
+ }
+
+ /**
+ * A function wrapper:
+ * - if there is no selected variant, leave the link
+ * names as they were
+ * - do not try to find variants for usernames
+ *
+ * @param string &$link
+ * @param Title &$nt
+ * @param bool $ignoreOtherCond
+ */
+ public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
+ // check for user namespace
+ if ( is_object( $nt ) ) {
+ $ns = $nt->getNamespace();
+ if ( $ns == NS_USER || $ns == NS_USER_TALK ) {
+ return;
+ }
+ }
+
+ $oldlink = $link;
+ parent::findVariantLink( $link, $nt, $ignoreOtherCond );
+ if ( $this->getPreferredVariant() == $this->mMainLanguageCode ) {
+ $link = $oldlink;
+ }
+ }
+
+ /**
+ * It translates text into variant
+ *
+ * @param string $text
+ * @param bool $toVariant
+ *
+ * @return string
+ */
+ public function translate( $text, $toVariant ) {
+ // If $text is empty or only includes spaces, do nothing
+ // Otherwise translate it
+ if ( trim( $text ) ) {
+ $this->loadTables();
+ // To syllabics, first translate uppercase to lowercase Latin
+ if ( $toVariant == 'ike-cans' ) {
+ $text = $this->mTables['lowercase']->replace( $text );
+ }
+ $text = $this->mTables[$toVariant]->replace( $text );
+ }
+ return $text;
+ }
+}
diff --git a/includes/language/converters/KkConverter.php b/includes/language/converters/KkConverter.php
new file mode 100644
index 000000000000..2ead0425a4b5
--- /dev/null
+++ b/includes/language/converters/KkConverter.php
@@ -0,0 +1,361 @@
+<?php
+/**
+ * Kazakh (Қазақша) specific code.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Language
+ */
+
+define( 'KK_C_UC', 'АӘБВГҒДЕЁЖЗИЙКҚЛМНҢОӨПРСТУҰҮФХҺЦЧШЩЪЫІЬЭЮЯ' ); # Kazakh Cyrillic uppercase
+define( 'KK_C_LC', 'аәбвгғдеёжзийкқлмнңоөпрстуұүфхһцчшщъыіьэюя' ); # Kazakh Cyrillic lowercase
+define( 'KK_L_UC', 'AÄBCÇDEÉFGĞHIİÏJKLMNÑOÖPQRSŞTUÜVWXYÝZ' ); # Kazakh Latin uppercase
+define( 'KK_L_LC', 'aäbcçdeéfgğhıiïjklmnñoöpqrsştuüvwxyýz' ); # Kazakh Latin lowercase
+// define( 'KK_A', 'ٴابپتجحدرزسشعفقكلمنڭەوۇۋۆىيچھ' ); # Kazakh Arabic
+define( 'H_HAMZA', 'ٴ' ); # U+0674 ARABIC LETTER HIGH HAMZA
+// define( 'ZWNJ', '‌' ); # U+200C ZERO WIDTH NON-JOINER
+
+/**
+ * Kazakh (Қазақша) converter routines
+ *
+ * @ingroup Language
+ */
+class KkConverter extends LanguageConverter {
+
+ protected $mCyrl2Latn, $mLatn2Cyrl, $mCyLa2Arab;
+
+ /**
+ * @param Language $langobj
+ */
+ public function __construct( $langobj ) {
+ $variants = [ 'kk', 'kk-cyrl', 'kk-latn', 'kk-arab', 'kk-kz', 'kk-tr', 'kk-cn' ];
+ $variantfallbacks = [
+ 'kk' => 'kk-cyrl',
+ 'kk-cyrl' => 'kk',
+ 'kk-latn' => 'kk',
+ 'kk-arab' => 'kk',
+ 'kk-kz' => 'kk-cyrl',
+ 'kk-tr' => 'kk-latn',
+ 'kk-cn' => 'kk-arab'
+ ];
+
+ parent::__construct( $langobj, 'kk',
+ $variants, $variantfallbacks, [] );
+
+ // No point delaying this since they're in code.
+ // Waiting until loadDefaultTables() means they never get loaded
+ // when the tables themselves are loaded from cache.
+ $this->loadRegs();
+ }
+
+ protected function loadDefaultTables() {
+ // require __DIR__."/../../includes/KkConversion.php";
+ // Placeholder for future implementing. Remove variables declarations
+ // after generating KkConversion.php
+ $kk2Cyrl = [];
+ $kk2Latn = [];
+ $kk2Arab = [];
+ $kk2KZ = [];
+ $kk2TR = [];
+ $kk2CN = [];
+
+ $this->mTables = [
+ 'kk-cyrl' => new ReplacementArray( $kk2Cyrl ),
+ 'kk-latn' => new ReplacementArray( $kk2Latn ),
+ 'kk-arab' => new ReplacementArray( $kk2Arab ),
+ 'kk-kz' => new ReplacementArray( array_merge( $kk2Cyrl, $kk2KZ ) ),
+ 'kk-tr' => new ReplacementArray( array_merge( $kk2Latn, $kk2TR ) ),
+ 'kk-cn' => new ReplacementArray( array_merge( $kk2Arab, $kk2CN ) ),
+ 'kk' => new ReplacementArray()
+ ];
+ }
+
+ protected function postLoadTables() {
+ $this->mTables['kk-kz']->merge( $this->mTables['kk-cyrl'] );
+ $this->mTables['kk-tr']->merge( $this->mTables['kk-latn'] );
+ $this->mTables['kk-cn']->merge( $this->mTables['kk-arab'] );
+ }
+
+ private function loadRegs() {
+ $this->mCyrl2Latn = [
+ # # Punctuation
+ '/№/u' => 'No.',
+ # # Е after vowels
+ '/([АӘЕЁИОӨҰҮЭЮЯЪЬ])Е/u' => '$1YE',
+ '/([АӘЕЁИОӨҰҮЭЮЯЪЬ])е/ui' => '$1ye',
+ # # leading ЁЮЯЩ
+ '/^Ё([' . KK_C_UC . ']|$)/u' => 'YO$1', '/^Ё([' . KK_C_LC . ']|$)/u' => 'Yo$1',
+ '/^Ю([' . KK_C_UC . ']|$)/u' => 'YU$1', '/^Ю([' . KK_C_LC . ']|$)/u' => 'Yu$1',
+ '/^Я([' . KK_C_UC . ']|$)/u' => 'YA$1', '/^Я([' . KK_C_LC . ']|$)/u' => 'Ya$1',
+ '/^Щ([' . KK_C_UC . ']|$)/u' => 'ŞÇ$1', '/^Щ([' . KK_C_LC . ']|$)/u' => 'Şç$1',
+ # # other ЁЮЯ
+ '/Ё/u' => 'YO', '/ё/u' => 'yo',
+ '/Ю/u' => 'YU', '/ю/u' => 'yu',
+ '/Я/u' => 'YA', '/я/u' => 'ya',
+ '/Щ/u' => 'ŞÇ', '/щ/u' => 'şç',
+ # # soft and hard signs
+ '/[ъЪ]/u' => 'ʺ', '/[ьЬ]/u' => 'ʹ',
+ # # other characters
+ '/А/u' => 'A', '/а/u' => 'a', '/Ә/u' => 'Ä', '/ә/u' => 'ä',
+ '/Б/u' => 'B', '/б/u' => 'b', '/В/u' => 'V', '/в/u' => 'v',
+ '/Г/u' => 'G', '/г/u' => 'g', '/Ғ/u' => 'Ğ', '/ғ/u' => 'ğ',
+ '/Д/u' => 'D', '/д/u' => 'd', '/Е/u' => 'E', '/е/u' => 'e',
+ '/Ж/u' => 'J', '/ж/u' => 'j', '/З/u' => 'Z', '/з/u' => 'z',
+ '/И/u' => 'Ï', '/и/u' => 'ï', '/Й/u' => 'Ý', '/й/u' => 'ý',
+ '/К/u' => 'K', '/к/u' => 'k', '/Қ/u' => 'Q', '/қ/u' => 'q',
+ '/Л/u' => 'L', '/л/u' => 'l', '/М/u' => 'M', '/м/u' => 'm',
+ '/Н/u' => 'N', '/н/u' => 'n', '/Ң/u' => 'Ñ', '/ң/u' => 'ñ',
+ '/О/u' => 'O', '/о/u' => 'o', '/Ө/u' => 'Ö', '/ө/u' => 'ö',
+ '/П/u' => 'P', '/п/u' => 'p', '/Р/u' => 'R', '/р/u' => 'r',
+ '/С/u' => 'S', '/с/u' => 's', '/Т/u' => 'T', '/т/u' => 't',
+ '/У/u' => 'W', '/у/u' => 'w', '/Ұ/u' => 'U', '/ұ/u' => 'u',
+ '/Ү/u' => 'Ü', '/ү/u' => 'ü', '/Ф/u' => 'F', '/ф/u' => 'f',
+ '/Х/u' => 'X', '/х/u' => 'x', '/Һ/u' => 'H', '/һ/u' => 'h',
+ '/Ц/u' => 'C', '/ц/u' => 'c', '/Ч/u' => 'Ç', '/ч/u' => 'ç',
+ '/Ш/u' => 'Ş', '/ш/u' => 'ş', '/Ы/u' => 'I', '/ы/u' => 'ı',
+ '/І/u' => 'İ', '/і/u' => 'i', '/Э/u' => 'É', '/э/u' => 'é',
+ ];
+
+ $this->mLatn2Cyrl = [
+ # # Punctuation
+ '/#|No\./' => '№',
+ # # Şç
+ '/ŞÇʹ/u' => 'ЩЬ', '/Şçʹ/u' => 'Щь',
+ '/Ş[Çç]/u' => 'Щ', '/şç/u' => 'щ',
+ # # soft and hard signs
+ '/([' . KK_L_UC . '])ʺ([' . KK_L_UC . '])/u' => '$1Ъ$2',
+ '/ʺ([' . KK_L_LC . '])/u' => 'ъ$1',
+ '/([' . KK_L_UC . '])ʹ([' . KK_L_UC . '])/u' => '$1Ь$2',
+ '/ʹ([' . KK_L_LC . '])/u' => 'ь$1',
+ '/ʺ/u' => 'ъ',
+ '/ʹ/u' => 'ь',
+ # # Ye Yo Yu Ya.
+ '/Y[Ee]/u' => 'Е', '/ye/u' => 'е',
+ '/Y[Oo]/u' => 'Ё', '/yo/u' => 'ё',
+ '/Y[UWuw]/u' => 'Ю', '/y[uw]/u' => 'ю',
+ '/Y[Aa]/u' => 'Я', '/ya/u' => 'я',
+ # # other characters
+ '/A/u' => 'А', '/a/u' => 'а', '/Ä/u' => 'Ә', '/ä/u' => 'ә',
+ '/B/u' => 'Б', '/b/u' => 'б', '/C/u' => 'Ц', '/c/u' => 'ц',
+ '/Ç/u' => 'Ч', '/ç/u' => 'ч', '/D/u' => 'Д', '/d/u' => 'д',
+ '/E/u' => 'Е', '/e/u' => 'е', '/É/u' => 'Э', '/é/u' => 'э',
+ '/F/u' => 'Ф', '/f/u' => 'ф', '/G/u' => 'Г', '/g/u' => 'г',
+ '/Ğ/u' => 'Ғ', '/ğ/u' => 'ғ', '/H/u' => 'Һ', '/h/u' => 'һ',
+ '/I/u' => 'Ы', '/ı/u' => 'ы', '/İ/u' => 'І', '/i/u' => 'і',
+ '/Ï/u' => 'И', '/ï/u' => 'и', '/J/u' => 'Ж', '/j/u' => 'ж',
+ '/K/u' => 'К', '/k/u' => 'к', '/L/u' => 'Л', '/l/u' => 'л',
+ '/M/u' => 'М', '/m/u' => 'м', '/N/u' => 'Н', '/n/u' => 'н',
+ '/Ñ/u' => 'Ң', '/ñ/u' => 'ң', '/O/u' => 'О', '/o/u' => 'о',
+ '/Ö/u' => 'Ө', '/ö/u' => 'ө', '/P/u' => 'П', '/p/u' => 'п',
+ '/Q/u' => 'Қ', '/q/u' => 'қ', '/R/u' => 'Р', '/r/u' => 'р',
+ '/S/u' => 'С', '/s/u' => 'с', '/Ş/u' => 'Ш', '/ş/u' => 'ш',
+ '/T/u' => 'Т', '/t/u' => 'т', '/U/u' => 'Ұ', '/u/u' => 'ұ',
+ '/Ü/u' => 'Ү', '/ü/u' => 'ү', '/V/u' => 'В', '/v/u' => 'в',
+ '/W/u' => 'У', '/w/u' => 'у', '/Ý/u' => 'Й', '/ý/u' => 'й',
+ '/X/u' => 'Х', '/x/u' => 'х', '/Z/u' => 'З', '/z/u' => 'з',
+ ];
+
+ $this->mCyLa2Arab = [
+ # # Punctuation -> Arabic
+ '/#|№|No\./u' => '؀', # U+0600
+ '/\,/' => '،', # U+060C
+ '/;/' => '؛', # U+061B
+ '/\?/' => '؟', # U+061F
+ '/%/' => '٪', # U+066A
+ '/\*/' => '٭', # U+066D
+ # # Digits -> Arabic
+ '/0/' => '۰', # U+06F0
+ '/1/' => '۱', # U+06F1
+ '/2/' => '۲', # U+06F2
+ '/3/' => '۳', # U+06F3
+ '/4/' => '۴', # U+06F4
+ '/5/' => '۵', # U+06F5
+ '/6/' => '۶', # U+06F6
+ '/7/' => '۷', # U+06F7
+ '/8/' => '۸', # U+06F8
+ '/9/' => '۹', # U+06F9
+ # # Cyrillic -> Arabic
+ '/Аллаһ/ui' => 'ﷲ',
+ '/([АӘЕЁИОӨҰҮЭЮЯЪЬ])е/ui' => '$1يە',
+ '/[еэ]/ui' => 'ە', '/[ъь]/ui' => '',
+ '/[аә]/ui' => 'ا', '/[оө]/ui' => 'و', '/[ұү]/ui' => 'ۇ', '/[ыі]/ui' => 'ى',
+ '/[и]/ui' => 'ىي', '/ё/ui' => 'يو', '/ю/ui' => 'يۋ', '/я/ui' => 'يا', '/[й]/ui' => 'ي',
+ '/ц/ui' => 'تس', '/щ/ui' => 'شش',
+ '/һ/ui' => 'ح', '/ч/ui' => 'تش',
+ # '/һ/ui' => 'ھ', '/ч/ui' => 'چ',
+ '/б/ui' => 'ب', '/в/ui' => 'ۆ', '/г/ui' => 'گ', '/ғ/ui' => 'ع',
+ '/д/ui' => 'د', '/ж/ui' => 'ج', '/з/ui' => 'ز', '/к/ui' => 'ك',
+ '/қ/ui' => 'ق', '/л/ui' => 'ل', '/м/ui' => 'م', '/н/ui' => 'ن',
+ '/ң/ui' => 'ڭ', '/п/ui' => 'پ', '/р/ui' => 'ر', '/с/ui' => 'س',
+ '/т/ui' => 'ت', '/у/ui' => 'ۋ', '/ф/ui' => 'ف', '/х/ui' => 'ح',
+ '/ш/ui' => 'ش',
+ # # Latin -> Arabic // commented for now...
+ /*'/Allah/ui' => 'ﷲ',
+ '/[eé]/ui' => 'ە', '/[yý]/ui' => 'ي', '/[ʺʹ]/ui' => '',
+ '/[aä]/ui' => 'ا', '/[oö]/ui' => 'و', '/[uü]/ui' => 'ۇ',
+ '/[ï]/ui' => 'ىي', '/[ıIiİ]/u' => 'ى',
+ '/c/ui' => 'تس',
+ '/ç/ui' => 'تش', '/h/ui' => 'ح',
+ #'/ç/ui' => 'چ', '/h/ui' => 'ھ',
+ '/b/ui' => 'ب','/d/ui' => 'د',
+ '/f/ui' => 'ف', '/g/ui' => 'گ', '/ğ/ui' => 'ع',
+ '/j/ui' => 'ج', '/k/ui' => 'ك', '/l/ui' => 'ل', '/m/ui' => 'م',
+ '/n/ui' => 'ن', '/ñ/ui' => 'ڭ', '/p/ui' => 'پ', '/q/ui' => 'ق',
+ '/r/ui' => 'ر', '/s/ui' => 'س', '/ş/ui' => 'ش', '/t/ui' => 'ت',
+ '/v/ui' => 'ۆ', '/w/ui' => 'ۋ', '/x/ui' => 'ح', '/z/ui' => 'ز',*/
+ ];
+ }
+
+ /**
+ * A function wrapper:
+ * - if there is no selected variant, leave the link
+ * names as they were
+ * - do not try to find variants for usernames
+ *
+ * @param string &$link
+ * @param Title &$nt
+ * @param bool $ignoreOtherCond
+ */
+ public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
+ // check for user namespace
+ if ( is_object( $nt ) ) {
+ $ns = $nt->getNamespace();
+ if ( $ns == NS_USER || $ns == NS_USER_TALK ) {
+ return;
+ }
+ }
+
+ $oldlink = $link;
+ parent::findVariantLink( $link, $nt, $ignoreOtherCond );
+ if ( $this->getPreferredVariant() == $this->mMainLanguageCode ) {
+ $link = $oldlink;
+ }
+ }
+
+ /**
+ * It translates text into variant
+ *
+ * @param string $text
+ * @param string $toVariant
+ *
+ * @return string
+ */
+ public function translate( $text, $toVariant ) {
+ $text = parent::translate( $text, $toVariant );
+
+ switch ( $toVariant ) {
+ case 'kk-cyrl':
+ case 'kk-kz':
+ $letters = KK_L_UC . KK_L_LC . 'ʺʹ#0123456789';
+ break;
+ case 'kk-latn':
+ case 'kk-tr':
+ $letters = KK_C_UC . KK_C_LC . '№0123456789';
+ break;
+ case 'kk-arab':
+ case 'kk-cn':
+ $letters = KK_C_UC . KK_C_LC . /*KK_L_UC.KK_L_LC.'ʺʹ'.*/',;\?%\*№0123456789';
+ break;
+ default:
+ return $text;
+ }
+ // disable conversion variables like $1, $2...
+ $varsfix = '\$[0-9]';
+
+ $matches = preg_split(
+ '/' . $varsfix . '[^' . $letters . ']+/u',
+ $text,
+ -1,
+ PREG_SPLIT_OFFSET_CAPTURE
+ );
+
+ $mstart = 0;
+ $ret = '';
+
+ foreach ( $matches as $m ) {
+ $ret .= substr( $text, $mstart, $m[1] - $mstart );
+ $ret .= $this->regsConverter( $m[0], $toVariant );
+ $mstart = $m[1] + strlen( $m[0] );
+ }
+
+ return $ret;
+ }
+
+ /**
+ * @param string $text
+ * @param string $toVariant
+ * @return mixed|string
+ */
+ private function regsConverter( $text, $toVariant ) {
+ if ( $text == '' ) {
+ return $text;
+ }
+
+ switch ( $toVariant ) {
+ case 'kk-arab':
+ case 'kk-cn':
+ $letters = KK_C_LC . KK_C_UC; /*.KK_L_LC.KK_L_UC*/
+ $front = 'әөүіӘӨҮІ'; /*.'äöüiÄÖÜİ'*/
+ $excludes = 'еэгғкқЕЭГҒКҚ'; /*.'eégğkqEÉGĞKQ'*/
+ // split text to words
+ $matches = preg_split( '/[\b\s\-\.:]+/', $text, -1, PREG_SPLIT_OFFSET_CAPTURE );
+ $mstart = 0;
+ $ret = '';
+ foreach ( $matches as $m ) {
+ $ret .= substr( $text, $mstart, $m[1] - $mstart );
+ // is matched the word to front vowels?
+ // exclude a words matched to е, э, г, к, к, қ,
+ // them should be without hamza
+ if ( preg_match( '/[' . $front . ']/u', $m[0] ) &&
+ !preg_match( '/[' . $excludes . ']/u', $m[0] )
+ ) {
+ $ret .= preg_replace( '/[' . $letters . ']+/u', H_HAMZA . '$0', $m[0] );
+ } else {
+ $ret .= $m[0];
+ }
+ $mstart = $m[1] + strlen( $m[0] );
+ }
+ $text =& $ret;
+ foreach ( $this->mCyLa2Arab as $pat => $rep ) {
+ $text = preg_replace( $pat, $rep, $text );
+ }
+ return $text;
+ case 'kk-latn':
+ case 'kk-tr':
+ foreach ( $this->mCyrl2Latn as $pat => $rep ) {
+ $text = preg_replace( $pat, $rep, $text );
+ }
+ return $text;
+ case 'kk-cyrl':
+ case 'kk-kz':
+ foreach ( $this->mLatn2Cyrl as $pat => $rep ) {
+ $text = preg_replace( $pat, $rep, $text );
+ }
+ return $text;
+ default:
+ return $text;
+ }
+ }
+
+ /**
+ * @param string $key
+ * @return string
+ */
+ public function convertCategoryKey( $key ) {
+ return $this->autoConvert( $key, 'kk' );
+ }
+}
diff --git a/includes/language/converters/KuConverter.php b/includes/language/converters/KuConverter.php
new file mode 100644
index 000000000000..0183ba807547
--- /dev/null
+++ b/includes/language/converters/KuConverter.php
@@ -0,0 +1,238 @@
+<?php
+/**
+ * Kurdish specific code.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Language
+ */
+
+/**
+ * Kurdish converter routines
+ *
+ * @ingroup Language
+ */
+class KuConverter extends LanguageConverter {
+ public $mArabicToLatin = [
+ 'ب' => 'b', 'ج' => 'c', 'چ' => 'ç', 'د' => 'd', 'ف' => 'f', 'گ' => 'g', 'ھ' => 'h',
+ 'ہ' => 'h', 'ه' => 'h', 'ح' => 'h', 'ژ' => 'j', 'ك' => 'k', 'ک' => 'k', 'ل' => 'l',
+ 'م' => 'm', 'ن' => 'n', 'پ' => 'p', 'ق' => 'q', 'ر' => 'r', 'س' => 's', 'ش' => 'ş',
+ 'ت' => 't', 'ڤ' => 'v', 'خ' => 'x', 'غ' => 'x', 'ز' => 'z',
+
+// ک و => ku -- ist richtig
+// و ك=> ku -- ist auch richtig
+
+ /* Doppel- und Halbvokale */
+ 'ڵ' => 'll', # ll
+ 'ڕ' => 'rr', # rr
+ 'ا' => 'a',
+ # 'ئێ' => 'ê', # initial e
+ 'ە' => 'e',
+ 'ه‌' => 'e', # with one non-joiner
+ 'ه‌‌' => 'e', # with two non-joiner
+ 'ة' => 'e',
+ 'ێ' => 'ê',
+ 'ي' => 'î',
+ 'ی' => 'î', # U+06CC db 8c ARABIC LETTER FARSI YEH
+ 'ى' => 'î', # U+0649 d9 89 ARABIC LETTER ALEF MAKSURA
+ 'ۆ' => 'o',
+ 'و' => 'w',
+ 'ئ' => '', # initial hemze should not be shown
+ '،' => ',',
+ 'ع' => '\'', # ayn
+ '؟' => '?',
+
+ # digits
+ '٠' => '0', # U+0660
+ '١' => '1', # U+0661
+ '٢' => '2', # U+0662
+ '٣' => '3', # U+0663
+ '٤' => '4', # U+0664
+ '٥' => '5', # U+0665
+ '٦' => '6', # U+0666
+ '٧' => '7', # U+0667
+ '٨' => '8', # U+0668
+ '٩' => '9', # U+0669
+ ];
+
+ public $mLatinToArabic = [
+ 'b' => 'ب', 'c' => 'ج', 'ç' => 'چ', 'd' => 'د', 'f' => 'ف', 'g' => 'گ',
+ 'h' => 'ه', 'j' => 'ژ', 'k' => 'ک', 'l' => 'ل',
+ 'm' => 'م', 'n' => 'ن', 'p' => 'پ', 'q' => 'ق', 'r' => 'ر', 's' => 'س', 'ş' => 'ش',
+ 't' => 'ت', 'v' => 'ڤ',
+ 'x' => 'خ', 'y' => 'ی', 'z' => 'ز',
+
+ 'B' => 'ب', 'C' => 'ج', 'Ç' => 'چ', 'D' => 'د', 'F' => 'ف', 'G' => 'گ',
+ 'H' => 'ح', 'J' => 'ژ', 'K' => 'ک', 'L' => 'ل',
+ 'M' => 'م', 'N' => 'ن', 'P' => 'پ', 'Q' => 'ق', 'R' => 'ر', 'S' => 'س', 'Ş' => 'ش',
+ 'T' => 'ت', 'V' => 'ڤ', 'W' => 'و', 'X' => 'خ',
+ 'Y' => 'ی', 'Z' => 'ز',
+
+ /* Doppelkonsonanten */
+ # 'll' => 'ڵ', # wenn es geht, doppel-l und l getrennt zu behandeln
+ # 'rr' => 'ڕ', # selbiges für doppel-r
+
+ /* Einzelne Großbuchstaben */
+ // ' C' => 'ج',
+
+ /* Vowels */
+ 'a' => 'ا',
+ 'e' => 'ە',
+ 'ê' => 'ێ',
+ 'i' => '',
+ 'î' => 'ی',
+ 'o' => 'ۆ',
+ 'u' => 'و',
+ 'û' => 'وو',
+ 'w' => 'و',
+ ',' => '،',
+ '?' => '؟',
+
+ # Try to replace the leading vowel
+ ' a' => 'ئا ',
+ ' e' => 'ئە ',
+ ' ê' => 'ئێ ',
+ ' î' => 'ئی ',
+ ' o' => 'ئۆ ',
+ ' u' => 'ئو ',
+ ' û' => 'ئوو ',
+ 'A' => 'ئا',
+ 'E' => 'ئە',
+ 'Ê' => 'ئێ',
+ 'Î' => 'ئی',
+ 'O' => 'ئۆ',
+ 'U' => 'ئو',
+ 'Û' => 'ئوو',
+ ' A' => 'ئا ',
+ ' E' => 'ئە ',
+ ' Ê' => 'ئێ ',
+ ' Î' => 'ئی ',
+ ' O' => 'ئۆ ',
+ ' U' => 'ئو ',
+ ' Û' => 'ئوو ',
+ # eyn erstmal deaktivieren, einfache Anführungsstriche sind einfach zu
+ # häufig, um sie als eyn zu interpretieren.
+ # '\'' => 'ع',
+
+/* # deactivated for now, breaks links i.e. in header of Special:Recentchanges :-(
+ # digits
+ '0' => '٠', # U+0660
+ '1' => '١', # U+0661
+ '2' => '٢', # U+0662
+ '3' => '٣', # U+0663
+ '4' => '٤', # U+0664
+ '5' => '٥', # U+0665
+ '6' => '٦', # U+0666
+ '7' => '٧', # U+0667
+ '8' => '٨', # U+0668
+ '9' => '٩', # U+0669
+*/
+ ];
+
+ /**
+ * @param Language $langobj
+ */
+ public function __construct( $langobj ) {
+ $variants = [ 'ku', 'ku-arab', 'ku-latn' ];
+ $variantfallbacks = [
+ 'ku' => 'ku-latn',
+ 'ku-arab' => 'ku-latn',
+ 'ku-latn' => 'ku-arab',
+ ];
+
+ parent::__construct( $langobj, 'ku', $variants, $variantfallbacks );
+ }
+
+ protected function loadDefaultTables() {
+ $this->mTables = [
+ 'ku-latn' => new ReplacementArray( $this->mArabicToLatin ),
+ 'ku-arab' => new ReplacementArray( $this->mLatinToArabic ),
+ 'ku' => new ReplacementArray()
+ ];
+ }
+
+ /**
+ * A function wrapper:
+ * - if there is no selected variant, leave the link
+ * names as they were
+ * - do not try to find variants for usernames
+ *
+ * @param string &$link
+ * @param Title &$nt
+ * @param bool $ignoreOtherCond
+ */
+ public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
+ // check for user namespace
+ if ( is_object( $nt ) ) {
+ $ns = $nt->getNamespace();
+ if ( $ns == NS_USER || $ns == NS_USER_TALK ) {
+ return;
+ }
+ }
+
+ $oldlink = $link;
+ parent::findVariantLink( $link, $nt, $ignoreOtherCond );
+ if ( $this->getPreferredVariant() == $this->mMainLanguageCode ) {
+ $link = $oldlink;
+ }
+ }
+
+ /**
+ * It translates text into variant, specials:
+ * - ommiting roman numbers
+ *
+ * @param string $text
+ * @param bool $toVariant
+ *
+ * @throws MWException
+ * @return string
+ */
+ public function translate( $text, $toVariant ) {
+ $this->loadTables();
+ /* From Kazakh interface, maybe we need it later
+ $breaks = '[^\w\x80-\xff]';
+ // regexp for roman numbers
+ // Lookahead assertion ensures $roman doesn't match the empty string
+ $roman = '(?=[MDCLXVI])M{0,4}(C[DM]|D?C{0,3})(X[LC]|L?X{0,3})(I[VX]|V?I{0,3})';
+ $roman = '';
+
+ $reg = '/^'.$roman.'$|^'.$roman.$breaks.'|'.$breaks.$roman.'$|'.$breaks.$roman.$breaks.'/';
+
+ $matches = preg_split($reg, $text, -1, PREG_SPLIT_OFFSET_CAPTURE);
+
+ $m = array_shift($matches);
+ if( !isset( $this->mTables[$toVariant] ) ) {
+ throw new MWException( "Broken variant table: " . implode( ',', array_keys( $this->mTables ) ) );
+ }
+ $ret = $this->mTables[$toVariant]->replace( $m[0] );
+ $mstart = $m[1]+strlen($m[0]);
+ foreach($matches as $m) {
+ $ret .= substr($text, $mstart, $m[1]-$mstart);
+ $ret .= parent::translate($m[0], $toVariant);
+ $mstart = $m[1] + strlen($m[0]);
+ }
+
+ return $ret;
+ */
+
+ if ( !isset( $this->mTables[$toVariant] ) ) {
+ throw new MWException( "Broken variant table: " . implode( ',', array_keys( $this->mTables ) ) );
+ }
+
+ return parent::translate( $text, $toVariant );
+ }
+}
diff --git a/includes/language/converters/ShiConverter.php b/includes/language/converters/ShiConverter.php
new file mode 100644
index 000000000000..cacf4d69d1ab
--- /dev/null
+++ b/includes/language/converters/ShiConverter.php
@@ -0,0 +1,137 @@
+<?php
+/**
+ * Shilha specific code.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Language
+ */
+
+/**
+ * Conversion script between Latin and Tifinagh for Tachelhit.
+ * - Tifinagh -> lowercase Latin
+ * - lowercase/uppercase Latin -> Tifinagh
+ *
+ *
+ * Based on:
+ * - https://en.wikipedia.org/wiki/Shilha_language
+ * - LanguageSr.php
+ *
+ * @ingroup Language
+ */
+class ShiConverter extends LanguageConverter {
+ protected $mDoContentConvert;
+
+ public $mToLatin = [
+ 'ⴰ' => 'a', 'ⴱ' => 'b', 'ⴳ' => 'g', 'ⴷ' => 'd', 'ⴹ' => 'ḍ', 'ⴻ' => 'e',
+ 'ⴼ' => 'f', 'ⴽ' => 'k', 'ⵀ' => 'h', 'ⵃ' => 'ḥ', 'ⵄ' => 'ε', 'ⵅ' => 'x',
+ 'ⵇ' => 'q', 'ⵉ' => 'i', 'ⵊ' => 'j', 'ⵍ' => 'l', 'ⵎ' => 'm', 'ⵏ' => 'n',
+ 'ⵓ' => 'u', 'ⵔ' => 'r', 'ⵕ' => 'ṛ', 'ⵙ' => 's', 'ⵚ' => 'ṣ',
+ 'ⵛ' => 'š', 'ⵜ' => 't', 'ⵟ' => 'ṭ', 'ⵡ' => 'w', 'ⵢ' => 'y', 'ⵣ' => 'z',
+ 'ⵥ' => 'ẓ', 'ⵯ' => 'ʷ', 'ⵖ' => 'ɣ', 'ⵠ' => 'v', 'ⵒ' => 'p',
+ ];
+
+ public $mUpperToLowerCaseLatin = [
+ 'A' => 'a', 'B' => 'b', 'C' => 'c', 'D' => 'd', 'E' => 'e',
+ 'F' => 'f', 'G' => 'g', 'H' => 'h', 'I' => 'i', 'J' => 'j',
+ 'K' => 'k', 'L' => 'l', 'M' => 'm', 'N' => 'n', 'O' => 'o',
+ 'P' => 'p', 'Q' => 'q', 'R' => 'r', 'S' => 's', 'T' => 't',
+ 'U' => 'u', 'V' => 'v', 'W' => 'w', 'X' => 'x', 'Y' => 'y',
+ 'Z' => 'z', 'Ɣ' => 'ɣ',
+ ];
+
+ public $mToTifinagh = [
+ 'a' => 'ⴰ', 'b' => 'ⴱ', 'g' => 'ⴳ', 'd' => 'ⴷ', 'ḍ' => 'ⴹ', 'e' => 'ⴻ',
+ 'f' => 'ⴼ', 'k' => 'ⴽ', 'h' => 'ⵀ', 'ḥ' => 'ⵃ', 'ε' => 'ⵄ', 'x' => 'ⵅ',
+ 'q' => 'ⵇ', 'i' => 'ⵉ', 'j' => 'ⵊ', 'l' => 'ⵍ', 'm' => 'ⵎ', 'n' => 'ⵏ',
+ 'u' => 'ⵓ', 'r' => 'ⵔ', 'ṛ' => 'ⵕ', 'γ' => 'ⵖ', 's' => 'ⵙ', 'ṣ' => 'ⵚ',
+ 'š' => 'ⵛ', 't' => 'ⵜ', 'ṭ' => 'ⵟ', 'w' => 'ⵡ', 'y' => 'ⵢ', 'z' => 'ⵣ',
+ 'ẓ' => 'ⵥ', 'ʷ' => 'ⵯ', 'ɣ' => 'ⵖ', 'v' => 'ⵠ', 'p' => 'ⵒ',
+ ];
+
+ /**
+ * @param Language $langobj
+ */
+ public function __construct( $langobj ) {
+ $variants = [ 'shi', 'shi-tfng', 'shi-latn' ];
+ $variantfallbacks = [
+ 'shi' => 'shi-tfng',
+ 'shi-tfng' => 'shi',
+ 'shi-latn' => 'shi',
+ ];
+
+ $flags = [];
+ parent::__construct( $langobj, 'shi', $variants, $variantfallbacks, $flags );
+ }
+
+ protected function loadDefaultTables() {
+ $this->mTables = [
+ 'lowercase' => new ReplacementArray( $this->mUpperToLowerCaseLatin ),
+ 'shi-tfng' => new ReplacementArray( $this->mToTifinagh ),
+ 'shi-latn' => new ReplacementArray( $this->mToLatin ),
+ 'shi' => new ReplacementArray()
+ ];
+ }
+
+ /**
+ * A function wrapper:
+ * - if there is no selected variant, leave the link
+ * names as they were
+ * - do not try to find variants for usernames
+ *
+ * @param string &$link
+ * @param Title &$nt
+ * @param bool $ignoreOtherCond
+ */
+ public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
+ // check for user namespace
+ if ( is_object( $nt ) ) {
+ $ns = $nt->getNamespace();
+ if ( $ns == NS_USER || $ns == NS_USER_TALK ) {
+ return;
+ }
+ }
+
+ $oldlink = $link;
+ parent::findVariantLink( $link, $nt, $ignoreOtherCond );
+ if ( $this->getPreferredVariant() == $this->mMainLanguageCode ) {
+ $link = $oldlink;
+ }
+ }
+
+ /**
+ * It translates text into variant
+ *
+ * @param string $text
+ * @param string $toVariant
+ *
+ * @return string
+ */
+ public function translate( $text, $toVariant ) {
+ // If $text is empty or only includes spaces, do nothing
+ // Otherwise translate it
+ if ( trim( $text ) ) {
+ $this->loadTables();
+ // To Tifinagh, first translate uppercase to lowercase Latin
+ if ( $toVariant == 'shi-tfng' ) {
+ $text = $this->mTables['lowercase']->replace( $text );
+ }
+ $text = $this->mTables[$toVariant]->replace( $text );
+ }
+ return $text;
+ }
+}
diff --git a/includes/language/converters/SrConverter.php b/includes/language/converters/SrConverter.php
new file mode 100644
index 000000000000..df71d6a56143
--- /dev/null
+++ b/includes/language/converters/SrConverter.php
@@ -0,0 +1,185 @@
+<?php
+/**
+ * Serbian (Српски / Srpski) specific code.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Language
+ */
+
+/**
+ * There are two levels of conversion for Serbian: the script level
+ * (Cyrillics <-> Latin), and the variant level (ekavian
+ * <->iyekavian). The two are orthogonal. So we really only need two
+ * dictionaries: one for Cyrillics and Latin, and one for ekavian and
+ * iyekavian.
+ *
+ * @ingroup Language
+ */
+class SrConverter extends LanguageConverter {
+ public $mToLatin = [
+ 'а' => 'a', 'б' => 'b', 'в' => 'v', 'г' => 'g', 'д' => 'd',
+ 'ђ' => 'đ', 'е' => 'e', 'ж' => 'ž', 'з' => 'z', 'и' => 'i',
+ 'ј' => 'j', 'к' => 'k', 'л' => 'l', 'љ' => 'lj', 'м' => 'm',
+ 'н' => 'n', 'њ' => 'nj', 'о' => 'o', 'п' => 'p', 'р' => 'r',
+ 'с' => 's', 'т' => 't', 'ћ' => 'ć', 'у' => 'u', 'ф' => 'f',
+ 'х' => 'h', 'ц' => 'c', 'ч' => 'č', 'џ' => 'dž', 'ш' => 'š',
+
+ 'А' => 'A', 'Б' => 'B', 'В' => 'V', 'Г' => 'G', 'Д' => 'D',
+ 'Ђ' => 'Đ', 'Е' => 'E', 'Ж' => 'Ž', 'З' => 'Z', 'И' => 'I',
+ 'Ј' => 'J', 'К' => 'K', 'Л' => 'L', 'Љ' => 'Lj', 'М' => 'M',
+ 'Н' => 'N', 'Њ' => 'Nj', 'О' => 'O', 'П' => 'P', 'Р' => 'R',
+ 'С' => 'S', 'Т' => 'T', 'Ћ' => 'Ć', 'У' => 'U', 'Ф' => 'F',
+ 'Х' => 'H', 'Ц' => 'C', 'Ч' => 'Č', 'Џ' => 'Dž', 'Ш' => 'Š',
+ ];
+
+ public $mToCyrillics = [
+ 'a' => 'а', 'b' => 'б', 'c' => 'ц', 'č' => 'ч', 'ć' => 'ћ',
+ 'd' => 'д', 'dž' => 'џ', 'đ' => 'ђ', 'e' => 'е', 'f' => 'ф',
+ 'g' => 'г', 'h' => 'х', 'i' => 'и', 'j' => 'ј', 'k' => 'к',
+ 'l' => 'л', 'lj' => 'љ', 'm' => 'м', 'n' => 'н', 'nj' => 'њ',
+ 'o' => 'о', 'p' => 'п', 'r' => 'р', 's' => 'с', 'š' => 'ш',
+ 't' => 'т', 'u' => 'у', 'v' => 'в', 'z' => 'з', 'ž' => 'ж',
+
+ 'A' => 'А', 'B' => 'Б', 'C' => 'Ц', 'Č' => 'Ч', 'Ć' => 'Ћ',
+ 'D' => 'Д', 'Dž' => 'Џ', 'Đ' => 'Ђ', 'E' => 'Е', 'F' => 'Ф',
+ 'G' => 'Г', 'H' => 'Х', 'I' => 'И', 'J' => 'Ј', 'K' => 'К',
+ 'L' => 'Л', 'LJ' => 'Љ', 'M' => 'М', 'N' => 'Н', 'NJ' => 'Њ',
+ 'O' => 'О', 'P' => 'П', 'R' => 'Р', 'S' => 'С', 'Š' => 'Ш',
+ 'T' => 'Т', 'U' => 'У', 'V' => 'В', 'Z' => 'З', 'Ž' => 'Ж',
+
+ 'DŽ' => 'Џ', 'd!ž' => 'дж', 'D!ž' => 'Дж', 'D!Ž' => 'ДЖ',
+ 'Lj' => 'Љ', 'l!j' => 'лј', 'L!j' => 'Лј', 'L!J' => 'ЛЈ',
+ 'Nj' => 'Њ', 'n!j' => 'нј', 'N!j' => 'Нј', 'N!J' => 'НЈ'
+ ];
+
+ /**
+ * @param Language $langobj
+ */
+ public function __construct( $langobj ) {
+ $variants = [ 'sr', 'sr-ec', 'sr-el' ];
+ $variantfallbacks = [
+ 'sr' => 'sr-ec',
+ 'sr-ec' => 'sr',
+ 'sr-el' => 'sr',
+ ];
+
+ $flags = [
+ 'S' => 'S', 'писмо' => 'S', 'pismo' => 'S',
+ 'W' => 'W', 'реч' => 'W', 'reč' => 'W', 'ријеч' => 'W', 'riječ' => 'W'
+ ];
+ parent::__construct( $langobj, 'sr', $variants, $variantfallbacks, $flags );
+ }
+
+ protected function loadDefaultTables() {
+ $this->mTables = [
+ 'sr-ec' => new ReplacementArray( $this->mToCyrillics ),
+ 'sr-el' => new ReplacementArray( $this->mToLatin ),
+ 'sr' => new ReplacementArray()
+ ];
+ }
+
+ /**
+ * A function wrapper:
+ * - if there is no selected variant, leave the link
+ * names as they were
+ * - do not try to find variants for usernames
+ *
+ * @param string &$link
+ * @param Title &$nt
+ * @param bool $ignoreOtherCond
+ */
+ public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
+ // check for user namespace
+ if ( is_object( $nt ) ) {
+ $ns = $nt->getNamespace();
+ if ( $ns == NS_USER || $ns == NS_USER_TALK ) {
+ return;
+ }
+ }
+
+ $oldlink = $link;
+ parent::findVariantLink( $link, $nt, $ignoreOtherCond );
+ if ( $this->getPreferredVariant() == $this->mMainLanguageCode ) {
+ $link = $oldlink;
+ }
+ }
+
+ /**
+ * It translates text into variant, specials:
+ * - ommiting roman numbers
+ *
+ * @param string $text
+ * @param string $toVariant
+ *
+ * @throws MWException
+ * @return string
+ */
+ public function translate( $text, $toVariant ) {
+ $breaks = '[^\w\x80-\xff]';
+
+ // regexp for roman numbers
+ // Lookahead assertion ensures $roman doesn't match the empty string
+ $roman = '(?=[MDCLXVI])M{0,4}(C[DM]|D?C{0,3})(X[LC]|L?X{0,3})(I[VX]|V?I{0,3})';
+
+ $reg = '/^' . $roman . '$|^' . $roman . $breaks . '|' . $breaks
+ . $roman . '$|' . $breaks . $roman . $breaks . '/';
+
+ $matches = preg_split( $reg, $text, -1, PREG_SPLIT_OFFSET_CAPTURE );
+
+ $m = array_shift( $matches );
+ $this->loadTables();
+ if ( !isset( $this->mTables[$toVariant] ) ) {
+ throw new MWException( "Broken variant table: "
+ . implode( ',', array_keys( $this->mTables ) ) );
+ }
+ $ret = $this->mTables[$toVariant]->replace( $m[0] );
+ $mstart = $m[1] + strlen( $m[0] );
+ foreach ( $matches as $m ) {
+ $ret .= substr( $text, $mstart, $m[1] - $mstart );
+ $ret .= parent::translate( $m[0], $toVariant );
+ $mstart = $m[1] + strlen( $m[0] );
+ }
+
+ return $ret;
+ }
+
+ /**
+ * Guess if a text is written in Cyrillic or Latin.
+ * Overrides LanguageConverter::guessVariant()
+ *
+ * @param string $text The text to be checked
+ * @param string $variant Language code of the variant to be checked for
+ * @return bool True if $text appears to be written in $variant
+ *
+ * @author Nikola Smolenski <smolensk@eunet.rs>
+ * @since 1.19
+ */
+ public function guessVariant( $text, $variant ) {
+ $numCyrillic = preg_match_all( "/[шђчћжШЂЧЋЖ]/u", $text, $dummy );
+ $numLatin = preg_match_all( "/[šđč枊ĐČĆŽ]/u", $text, $dummy );
+
+ if ( $variant == 'sr-ec' ) {
+ return $numCyrillic > $numLatin;
+ } elseif ( $variant == 'sr-el' ) {
+ return $numLatin > $numCyrillic;
+ } else {
+ return false;
+ }
+ }
+
+}
diff --git a/includes/language/converters/TgConverter.php b/includes/language/converters/TgConverter.php
new file mode 100644
index 000000000000..1f31377d24cf
--- /dev/null
+++ b/includes/language/converters/TgConverter.php
@@ -0,0 +1,120 @@
+<?php
+/**
+ * Tajik (Тоҷикӣ) specific code.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Language
+ */
+
+/**
+ * Converts Tajiki to Latin orthography
+ *
+ * @ingroup Language
+ */
+class TgConverter extends LanguageConverter {
+ private $table = [
+ 'а' => 'a',
+ 'б' => 'b',
+ 'в' => 'v',
+ 'г' => 'g',
+ 'д' => 'd',
+ 'е' => 'e',
+ 'ё' => 'jo',
+ 'ж' => 'ƶ',
+ 'з' => 'z',
+ 'ии ' => 'iji ',
+ 'и' => 'i',
+ 'й' => 'j',
+ 'к' => 'k',
+ 'л' => 'l',
+ 'м' => 'm',
+ 'н' => 'n',
+ 'о' => 'o',
+ 'п' => 'p',
+ 'р' => 'r',
+ 'с' => 's',
+ 'т' => 't',
+ 'у' => 'u',
+ 'ф' => 'f',
+ 'х' => 'x',
+ 'ч' => 'c',
+ 'ш' => 'ş',
+ 'ъ' => '\'',
+ 'э' => 'e',
+ 'ю' => 'ju',
+ 'я' => 'ja',
+ 'ғ' => 'ƣ',
+ 'ӣ' => 'ī',
+ 'қ' => 'q',
+ 'ӯ' => 'ū',
+ 'ҳ' => 'h',
+ 'ҷ' => 'ç',
+ 'ц' => 'ts',
+ 'А' => 'A',
+ 'Б' => 'B',
+ 'В' => 'V',
+ 'Г' => 'G',
+ 'Д' => 'D',
+ 'Е' => 'E',
+ 'Ё' => 'Jo',
+ 'Ж' => 'Ƶ',
+ 'З' => 'Z',
+ 'И' => 'I',
+ 'Й' => 'J',
+ 'К' => 'K',
+ 'Л' => 'L',
+ 'М' => 'M',
+ 'Н' => 'N',
+ 'О' => 'O',
+ 'П' => 'P',
+ 'Р' => 'R',
+ 'С' => 'S',
+ 'Т' => 'T',
+ 'У' => 'U',
+ 'Ф' => 'F',
+ 'Х' => 'X',
+ 'Ч' => 'C',
+ 'Ш' => 'Ş',
+ 'Ъ' => '\'',
+ 'Э' => 'E',
+ 'Ю' => 'Ju',
+ 'Я' => 'Ja',
+ 'Ғ' => 'Ƣ',
+ 'Ӣ' => 'Ī',
+ 'Қ' => 'Q',
+ 'Ӯ' => 'Ū',
+ 'Ҳ' => 'H',
+ 'Ҷ' => 'Ç',
+ 'Ц' => 'Ts',
+ ];
+
+ /**
+ * @param Language $langobj
+ */
+ public function __construct( $langobj ) {
+ $variants = [ 'tg', 'tg-latn' ];
+ parent::__construct( $langobj, 'tg', $variants );
+ }
+
+ protected function loadDefaultTables() {
+ $this->mTables = [
+ 'tg-latn' => new ReplacementArray( $this->table ),
+ 'tg' => new ReplacementArray()
+ ];
+ }
+}
diff --git a/includes/language/converters/UzConverter.php b/includes/language/converters/UzConverter.php
new file mode 100644
index 000000000000..186664cc82c3
--- /dev/null
+++ b/includes/language/converters/UzConverter.php
@@ -0,0 +1,138 @@
+<?php
+/**
+ * Uzbek specific code.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Language
+ */
+
+/**
+ * @ingroup Language
+ */
+class UzConverter extends LanguageConverter {
+ public $toLatin = [
+ 'а' => 'a', 'А' => 'A',
+ 'б' => 'b', 'Б' => 'B',
+ 'д' => 'd', 'Д' => 'D',
+ 'е' => 'e', 'Е' => 'E',
+ 'э' => 'e', 'Э' => 'E',
+ 'в' => 'v', 'В' => 'V',
+ 'х' => 'x', 'Х' => 'X',
+ 'ғ' => 'gʻ', 'Ғ' => 'Gʻ',
+ 'г' => 'g', 'Г' => 'G',
+ 'ҳ' => 'h', 'Ҳ' => 'H',
+ 'ж' => 'j', 'Ж' => 'J',
+ 'з' => 'z', 'З' => 'Z',
+ 'и' => 'i', 'И' => 'I',
+ 'к' => 'k', 'К' => 'K',
+ 'л' => 'l', 'Л' => 'L',
+ 'м' => 'm', 'М' => 'M',
+ 'н' => 'n', 'Н' => 'N',
+ 'о' => 'o', 'О' => 'O',
+ 'п' => 'p', 'П' => 'P',
+ 'р' => 'r', 'Р' => 'R',
+ 'с' => 's', 'С' => 'S',
+ 'т' => 't', 'Т' => 'T',
+ 'у' => 'u', 'У' => 'U',
+ 'ф' => 'f', 'Ф' => 'F',
+ 'ў' => 'oʻ', 'Ў' => 'Oʻ',
+ // note: at the beginning of a word and right after a consonant, only "s" is used
+ 'ц' => 'ts', 'Ц' => 'Ts',
+ 'қ' => 'q', 'Қ' => 'Q',
+ 'ё' => 'yo', 'Ё' => 'Yo',
+ 'ю' => 'yu', 'Ю' => 'Yu',
+ 'ч' => 'ch', 'Ч' => 'Ch',
+ 'ш' => 'sh', 'Ш' => 'Sh',
+ 'й' => 'y', 'Й' => 'Y',
+ 'я' => 'ya', 'Я' => 'Ya',
+ 'ъ' => 'ʼ',
+ ];
+
+ public $toCyrillic = [
+ 'a' => 'а', 'A' => 'А',
+ 'b' => 'б', 'B' => 'Б',
+ 'd' => 'д', 'D' => 'Д',
+ // at the beginning of a word and after a vowel, "э" is used instead of "e"
+ // (see regex below)
+ 'e' => 'э', 'E' => 'Э',
+ 'f' => 'ф', 'F' => 'Ф',
+ 'g' => 'г', 'G' => 'Г',
+ 'g‘' => 'ғ', 'G‘' => 'Ғ', 'gʻ' => 'ғ', 'Gʻ' => 'Ғ',
+ 'h' => 'ҳ', 'H' => 'Ҳ',
+ 'i' => 'и', 'I' => 'И',
+ 'k' => 'к', 'K' => 'К',
+ 'l' => 'л', 'L' => 'Л',
+ 'm' => 'м', 'M' => 'М',
+ 'n' => 'н', 'N' => 'Н',
+ 'o' => 'о', 'O' => 'О',
+ 'p' => 'п', 'P' => 'П',
+ 'r' => 'р', 'R' => 'Р',
+ 's' => 'с', 'S' => 'С',
+ 't' => 'т', 'T' => 'Т',
+ 'u' => 'у', 'U' => 'У',
+ 'v' => 'в', 'V' => 'В',
+ 'x' => 'х', 'X' => 'Х',
+ 'z' => 'з', 'Z' => 'З',
+ 'j' => 'ж', 'J' => 'Ж',
+ 'o‘' => 'ў', 'O‘' => 'Ў', 'oʻ' => 'ў', 'Oʻ' => 'Ў',
+ 'yo‘' => 'йў', 'Yo‘' => 'Йў', 'yoʻ' => 'йў', 'Yoʻ' => 'Йў',
+ 'ts' => 'ц', 'Ts' => 'Ц',
+ 'q' => 'қ', 'Q' => 'Қ',
+ 'yo' => 'ё', 'Yo' => 'Ё',
+ 'yu' => 'ю', 'Yu' => 'Ю',
+ 'ch' => 'ч', 'Ch' => 'Ч',
+ 'sh' => 'ш', 'Sh' => 'Ш',
+ 'y' => 'й', 'Y' => 'Й',
+ 'ya' => 'я', 'Ya' => 'Я',
+ 'ʼ' => 'ъ',
+ ];
+
+ /**
+ * @param Language $langobj
+ */
+ public function __construct( $langobj ) {
+ $variants = [ 'uz', 'uz-latn', 'uz-cyrl' ];
+ $variantfallbacks = [
+ 'uz' => 'uz-latn',
+ 'uz-cyrl' => 'uz',
+ 'uz-latn' => 'uz',
+ ];
+ parent::__construct( $langobj, 'uz', $variants, $variantfallbacks );
+ }
+
+ protected function loadDefaultTables() {
+ $this->mTables = [
+ 'uz-cyrl' => new ReplacementArray( $this->toCyrillic ),
+ 'uz-latn' => new ReplacementArray( $this->toLatin ),
+ 'uz' => new ReplacementArray()
+ ];
+ }
+
+ public function translate( $text, $toVariant ) {
+ if ( $toVariant == 'uz-cyrl' ) {
+ $text = str_replace( 'ye', 'е', $text );
+ $text = str_replace( 'Ye', 'Е', $text );
+ $text = str_replace( 'YE', 'Е', $text );
+ // "е" after consonants, otherwise "э" (see above)
+ $text = preg_replace( '/([BVGDJZYKLMNPRSTFXCWQʻ‘H])E/u', '$1Е', $text );
+ $text = preg_replace( '/([bvgdjzyklmnprstfxcwqʻ‘h])e/ui', '$1е', $text );
+ }
+ return parent::translate( $text, $toVariant );
+ }
+
+}
diff --git a/includes/language/converters/ZhConverter.php b/includes/language/converters/ZhConverter.php
new file mode 100644
index 000000000000..11e596546e42
--- /dev/null
+++ b/includes/language/converters/ZhConverter.php
@@ -0,0 +1,125 @@
+<?php
+/**
+ * Chinese specific code.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Language
+ */
+
+/**
+ * @ingroup Language
+ */
+class ZhConverter extends LanguageConverter {
+ /**
+ * @param Language $langobj
+ */
+ public function __construct( $langobj ) {
+ $this->mDescCodeSep = ':';
+ $this->mDescVarSep = ';';
+
+ $variants = [
+ 'zh',
+ 'zh-hans',
+ 'zh-hant',
+ 'zh-cn',
+ 'zh-hk',
+ 'zh-mo',
+ 'zh-my',
+ 'zh-sg',
+ 'zh-tw'
+ ];
+
+ $variantfallbacks = [
+ 'zh' => [ 'zh-hans', 'zh-hant', 'zh-cn', 'zh-tw', 'zh-hk', 'zh-sg', 'zh-mo', 'zh-my' ],
+ 'zh-hans' => [ 'zh-cn', 'zh-sg', 'zh-my' ],
+ 'zh-hant' => [ 'zh-tw', 'zh-hk', 'zh-mo' ],
+ 'zh-cn' => [ 'zh-hans', 'zh-sg', 'zh-my' ],
+ 'zh-sg' => [ 'zh-hans', 'zh-cn', 'zh-my' ],
+ 'zh-my' => [ 'zh-hans', 'zh-sg', 'zh-cn' ],
+ 'zh-tw' => [ 'zh-hant', 'zh-hk', 'zh-mo' ],
+ 'zh-hk' => [ 'zh-hant', 'zh-mo', 'zh-tw' ],
+ 'zh-mo' => [ 'zh-hant', 'zh-hk', 'zh-tw' ],
+ ];
+ $ml = [
+ 'zh' => 'disable',
+ 'zh-hans' => 'unidirectional',
+ 'zh-hant' => 'unidirectional',
+ ];
+
+ parent::__construct( $langobj, 'zh',
+ $variants,
+ $variantfallbacks,
+ [],
+ $ml );
+ $names = [
+ 'zh' => '原文',
+ 'zh-hans' => '简体',
+ 'zh-hant' => '繁體',
+ 'zh-cn' => '大陆',
+ 'zh-tw' => '台灣',
+ 'zh-hk' => '香港',
+ 'zh-mo' => '澳門',
+ 'zh-sg' => '新加坡',
+ 'zh-my' => '大马',
+ ];
+ $this->mVariantNames = array_merge( $this->mVariantNames, $names );
+ }
+
+ protected function loadDefaultTables() {
+ $this->mTables = [
+ 'zh-hans' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2Hans ),
+ 'zh-hant' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2Hant ),
+ 'zh-cn' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2CN ),
+ 'zh-hk' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2HK ),
+ 'zh-mo' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2HK ),
+ 'zh-my' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2CN ),
+ 'zh-sg' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2CN ),
+ 'zh-tw' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2TW ),
+ 'zh' => new ReplacementArray
+ ];
+ }
+
+ protected function postLoadTables() {
+ $this->mTables['zh-cn']->setArray(
+ $this->mTables['zh-cn']->getArray() + $this->mTables['zh-hans']->getArray()
+ );
+ $this->mTables['zh-hk']->setArray(
+ $this->mTables['zh-hk']->getArray() + $this->mTables['zh-hant']->getArray()
+ );
+ $this->mTables['zh-mo']->setArray(
+ $this->mTables['zh-mo']->getArray() + $this->mTables['zh-hant']->getArray()
+ );
+ $this->mTables['zh-my']->setArray(
+ $this->mTables['zh-my']->getArray() + $this->mTables['zh-hans']->getArray()
+ );
+ $this->mTables['zh-sg']->setArray(
+ $this->mTables['zh-sg']->getArray() + $this->mTables['zh-hans']->getArray()
+ );
+ $this->mTables['zh-tw']->setArray(
+ $this->mTables['zh-tw']->getArray() + $this->mTables['zh-hant']->getArray()
+ );
+ }
+
+ /**
+ * @param string $key
+ * @return string
+ */
+ public function convertCategoryKey( $key ) {
+ return $this->autoConvert( $key, 'zh' );
+ }
+}