diff options
Diffstat (limited to 'languages/classes')
-rw-r--r-- | languages/classes/LanguageCrh.php | 284 | ||||
-rw-r--r-- | languages/classes/LanguageEn.php | 56 | ||||
-rw-r--r-- | languages/classes/LanguageGan.php | 54 | ||||
-rw-r--r-- | languages/classes/LanguageIu.php | 161 | ||||
-rw-r--r-- | languages/classes/LanguageKk.php | 338 | ||||
-rw-r--r-- | languages/classes/LanguageKu.php | 238 | ||||
-rw-r--r-- | languages/classes/LanguageShi.php | 137 | ||||
-rw-r--r-- | languages/classes/LanguageSr.php | 185 | ||||
-rw-r--r-- | languages/classes/LanguageTg.php | 120 | ||||
-rw-r--r-- | languages/classes/LanguageUz.php | 138 | ||||
-rw-r--r-- | languages/classes/LanguageZh.php | 105 |
11 files changed, 1 insertions, 1815 deletions
diff --git a/languages/classes/LanguageCrh.php b/languages/classes/LanguageCrh.php deleted file mode 100644 index 0f1050559ba6..000000000000 --- a/languages/classes/LanguageCrh.php +++ /dev/null @@ -1,284 +0,0 @@ -<?php -/** - * Crimean Tatar (Qırımtatarca) specific code. - * - * Adapted from https://crh.wikipedia.org/wiki/Qullan%C4%B1c%C4%B1:Don_Alessandro/Translit - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * http://www.gnu.org/copyleft/gpl.html - * - * @file - * @ingroup Language - */ - -/** - * Crimean Tatar (Qırımtatarca) converter routines - * - * @ingroup Language - */ -class CrhConverter extends LanguageConverter { - // Defines working character ranges - - // Cyrillic - const C_UC = 'АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ'; # Crimean Tatar Cyrillic uppercase - const C_LC = 'абвгдеёжзийклмнопрстуфхцчшщъыьэюя'; # Crimean Tatar Cyrillic lowercase - const C_CONS_UC = 'БВГДЖЗЙКЛМНПРСТФХЦЧШЩCÑ'; # Crimean Tatar Cyrillic + CÑ uppercase consonants - const C_CONS_LC = 'бвгджзйклмнпрстфхцчшщcñ'; # Crimean Tatar Cyrillic + CÑ lowercase consonants - const C_M_CONS = 'бгкмшcБГКМШC'; # Crimean Tatar Cyrillic M-type consonants - - // Crimean Tatar Cyrillic + CÑ consonants - const C_CONS = 'бвгджзйклмнпрстфхцчшщcñБВГДЖЗЙКЛМНПРСТФХЦЧШЩCÑ'; - - // Latin - const L_UC = 'AÂBCÇDEFGĞHIİJKLMNÑOÖPQRSŞTUÜVYZ'; # Crimean Tatar Latin uppercase - const L_LC = 'aâbcçdefgğhıijklmnñoöpqrsştuüvyz'; # Crimean Tatar Latin lowercase - const L_N_CONS_UC = 'ÇNRSTZ'; # Crimean Tatar Latin N-type upper case consonants - const L_N_CONS_LC = 'çnrstz'; # Crimean Tatar Latin N-type lower case consonants - const L_N_CONS = 'çnrstzÇNRSTZ'; # Crimean Tatar Latin N-type consonants - const L_M_CONS = 'bcgkmpşBCGKMPŞ'; # Crimean Tatar Latin M-type consonants - const L_CONS_UC = 'BCÇDFGĞHJKLMNÑPQRSŞTVZ'; # Crimean Tatar Latin uppercase consonants - const L_CONS_LC = 'bcçdfgğhjklmnñpqrsştvz'; # Crimean Tatar Latin lowercase consonants - const L_CONS = 'bcçdfgğhjklmnñpqrsştvzBCÇDFGĞHJKLMNÑPQRSŞTVZ'; # Crimean Tatar Latin consonants - const L_VOW_UC = 'AÂEIİOÖUÜ'; # Crimean Tatar Latin uppercase vowels - const L_VOW = 'aâeıioöuüAÂEIİOÖUÜ'; # Crimean Tatar Latin vowels - const L_F_UC = 'EİÖÜ'; # Crimean Tatar Latin uppercase front vowels - const L_F = 'eiöüEİÖÜ'; # Crimean Tatar Latin front vowels - - /** - * @param Language $langobj - */ - public function __construct( Language $langobj ) { - $variants = [ 'crh', 'crh-cyrl', 'crh-latn' ]; - $variantfallbacks = [ - 'crh' => 'crh-latn', - 'crh-cyrl' => 'crh-latn', - 'crh-latn' => 'crh-cyrl', - ]; - - parent::__construct( $langobj, 'crh', - $variants, $variantfallbacks, [] ); - - // No point delaying this since they're in code. - // Waiting until loadDefaultTables() means they never get loaded - // when the tables themselves are loaded from cache. - $this->loadExceptions(); - } - - public $mCyrillicToLatin = [ - - ## these are independent of location in the word, but have - ## to go first so other transforms don't bleed them - 'гъ' => 'ğ', 'Гъ' => 'Ğ', 'ГЪ' => 'Ğ', - 'къ' => 'q', 'Къ' => 'Q', 'КЪ' => 'Q', - 'нъ' => 'ñ', 'Нъ' => 'Ñ', 'НЪ' => 'Ñ', - 'дж' => 'c', 'Дж' => 'C', 'ДЖ' => 'C', - - 'А' => 'A', 'а' => 'a', 'Б' => 'B', 'б' => 'b', - 'В' => 'V', 'в' => 'v', 'Г' => 'G', 'г' => 'g', - 'Д' => 'D', 'д' => 'd', 'Ж' => 'J', 'ж' => 'j', - 'З' => 'Z', 'з' => 'z', 'И' => 'İ', 'и' => 'i', - 'Й' => 'Y', 'й' => 'y', 'К' => 'K', 'к' => 'k', - 'Л' => 'L', 'л' => 'l', 'М' => 'M', 'м' => 'm', - 'Н' => 'N', 'н' => 'n', 'П' => 'P', 'п' => 'p', - 'Р' => 'R', 'р' => 'r', 'С' => 'S', 'с' => 's', - 'Т' => 'T', 'т' => 't', 'Ф' => 'F', 'ф' => 'f', - 'Х' => 'H', 'х' => 'h', 'Ч' => 'Ç', 'ч' => 'ç', - 'Ш' => 'Ş', 'ш' => 'ş', 'Ы' => 'I', 'ы' => 'ı', - 'Э' => 'E', 'э' => 'e', 'Е' => 'E', 'е' => 'e', - 'Я' => 'Â', 'я' => 'â', 'У' => 'U', 'у' => 'u', - 'О' => 'O', 'о' => 'o', - - 'Ё' => 'Yo', 'ё' => 'yo', 'Ю' => 'Yu', 'ю' => 'yu', - 'Ц' => 'Ts', 'ц' => 'ts', 'Щ' => 'Şç', 'щ' => 'şç', - 'Ь' => '', 'ь' => '', 'Ъ' => '', 'ъ' => '', - - ]; - - public $mLatinToCyrillic = [ - 'Â' => 'Я', 'â' => 'я', 'B' => 'Б', 'b' => 'б', - 'Ç' => 'Ч', 'ç' => 'ч', 'D' => 'Д', 'd' => 'д', - 'F' => 'Ф', 'f' => 'ф', 'G' => 'Г', 'g' => 'г', - 'H' => 'Х', 'h' => 'х', 'I' => 'Ы', 'ı' => 'ы', - 'İ' => 'И', 'i' => 'и', 'J' => 'Ж', 'j' => 'ж', - 'K' => 'К', 'k' => 'к', 'L' => 'Л', 'l' => 'л', - 'M' => 'М', 'm' => 'м', 'N' => 'Н', 'n' => 'н', - 'O' => 'О', 'o' => 'о', 'P' => 'П', 'p' => 'п', - 'R' => 'Р', 'r' => 'р', 'S' => 'С', 's' => 'с', - 'Ş' => 'Ш', 'ş' => 'ш', 'T' => 'Т', 't' => 'т', - 'V' => 'В', 'v' => 'в', 'Z' => 'З', 'z' => 'з', - - 'ya' => 'я', 'Ya' => 'Я', 'YA' => 'Я', - 'ye' => 'е', 'YE' => 'Е', 'Ye' => 'Е', - - // hack, hack, hack - 'A' => 'А', 'a' => 'а', 'E' => 'Е', 'e' => 'е', - 'Ö' => 'Ё', 'ö' => 'ё', 'U' => 'У', 'u' => 'у', - 'Ü' => 'Ю', 'ü' => 'ю', 'Y' => 'Й', 'y' => 'й', - 'C' => 'Дж', 'c' => 'дж', 'Ğ' => 'Гъ', 'ğ' => 'гъ', - 'Ñ' => 'Нъ', 'ñ' => 'нъ', 'Q' => 'Къ', 'q' => 'къ', - - ]; - - public $mCyrl2LatnExceptions = []; - public $mLatn2CyrlExceptions = []; - - public $mCyrl2LatnPatterns = []; - public $mLatn2CyrlPatterns = []; - - public $mCyrlCleanUpRegexes = []; - - public $mExceptionsLoaded = false; - - protected function loadDefaultTables() { - $this->mTables = [ - 'crh-latn' => new ReplacementArray( $this->mCyrillicToLatin ), - 'crh-cyrl' => new ReplacementArray( $this->mLatinToCyrillic ), - 'crh' => new ReplacementArray() - ]; - } - - private function loadExceptions() { - if ( $this->mExceptionsLoaded ) { - return; - } - - $this->mExceptionsLoaded = true; - $crhExceptions = new MediaWiki\Languages\Data\CrhExceptions(); - list( $this->mCyrl2LatnExceptions, $this->mLatn2CyrlExceptions, - $this->mCyrl2LatnPatterns, $this->mLatn2CyrlPatterns, $this->mCyrlCleanUpRegexes ) = - $crhExceptions->loadExceptions( self::L_LC . self::C_LC, self::L_UC . self::C_UC ); - } - - /** - * A function wrapper: - * - if there is no selected variant, leave the link - * names as they were - * - do not try to find variants for usernames - * - * @param string &$link - * @param Title &$nt - * @param bool $ignoreOtherCond - */ - public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) { - // check for user namespace - if ( is_object( $nt ) ) { - $ns = $nt->getNamespace(); - if ( $ns == NS_USER || $ns == NS_USER_TALK ) { - return; - } - } - - $oldlink = $link; - parent::findVariantLink( $link, $nt, $ignoreOtherCond ); - if ( $this->getPreferredVariant() == $this->mMainLanguageCode ) { - $link = $oldlink; - } - } - - /** - * It translates text into variant, specials: - * - omitting roman numbers - * - * @param string $text - * @param bool $toVariant - * - * @throws MWException - * @return string - */ - public function translate( $text, $toVariant ) { - switch ( $toVariant ) { - case 'crh-cyrl': - case 'crh-latn': - break; - default: - return $text; - } - - if ( !$this->mTablesLoaded ) { - $this->loadTables(); - } - - if ( !isset( $this->mTables[$toVariant] ) ) { - throw new MWException( "Broken variant table: " . implode( ',', array_keys( $this->mTables ) ) ); - } - - switch ( $toVariant ) { - case 'crh-cyrl': - /* Check for roman numbers like VII, XIX... - * Only need to split on Roman numerals when converting to Cyrillic - * Lookahead assertion ensures $roman doesn't match the empty string, and - * non-period after first "Roman" character allows initials to be converted - */ - $roman = '(?=[MDCLXVI]([^.]|$))M{0,4}(C[DM]|D?C{0,3})(X[LC]|L?X{0,3})(I[VX]|V?I{0,3})'; - - $breaks = '([^\w\x80-\xff])'; - - // allow for multiple Roman numerals in a row; rare but it happens - $romanRegex = '/^' . $roman . '$|^(' . $roman . $breaks . ')+|(' . $breaks . $roman . ')+$|' . - $breaks . '(' . $roman . $breaks . ')+/'; - - $matches = preg_split( $romanRegex, $text, -1, PREG_SPLIT_OFFSET_CAPTURE ); - $mstart = 0; - $ret = ''; - foreach ( $matches as $m ) { - // copy over Roman numerals - $ret .= substr( $text, $mstart, $m[1] - $mstart ); - - // process everything else - if ( $m[0] !== '' ) { - $ret .= $this->regsConverter( $m[0], $toVariant ); - } - - $mstart = $m[1] + strlen( $m[0] ); - } - - return $ret; - default: - // Just process the whole string in one go - return $this->regsConverter( $text, $toVariant ); - } - } - - private function regsConverter( $text, $toVariant ) { - if ( $text == '' ) return $text; - - $pat = []; - $rep = []; - switch ( $toVariant ) { - case 'crh-latn': - $text = strtr( $text, $this->mCyrl2LatnExceptions ); - foreach ( $this->mCyrl2LatnPatterns as $pat => $rep ) { - $text = preg_replace( $pat, $rep, $text ); - } - $text = parent::translate( $text, $toVariant ); - $text = strtr( $text, [ '«' => '"', '»' => '"', ] ); - return $text; - case 'crh-cyrl': - $text = strtr( $text, $this->mLatn2CyrlExceptions ); - foreach ( $this->mLatn2CyrlPatterns as $pat => $rep ) { - $text = preg_replace( $pat, $rep, $text ); - } - $text = parent::translate( $text, $toVariant ); - $text = strtr( $text, [ '“' => '«', '”' => '»', ] ); - foreach ( $this->mCyrlCleanUpRegexes as $pat => $rep ) { - $text = preg_replace( $pat, $rep, $text ); - } - return $text; - default: - return $text; - } - } - -} diff --git a/languages/classes/LanguageEn.php b/languages/classes/LanguageEn.php index 0ea06f58be89..7f1e2cf2c64b 100644 --- a/languages/classes/LanguageEn.php +++ b/languages/classes/LanguageEn.php @@ -21,62 +21,6 @@ */ /** - * @ingroup Language - */ -class EnConverter extends LanguageConverter { - - /** - * @param Language $langobj - */ - public function __construct( Language $langobj ) { - parent::__construct( $langobj, 'en', [ 'en', 'en-x-piglatin' ] ); - } - - /** - * Dummy methods required by base class. - */ - protected function loadDefaultTables() { - $this->mTables = [ - 'en' => new ReplacementArray(), - 'en-x-piglatin' => new ReplacementArray(), - ]; - } - - /** - * Translates text into Pig Latin. This allows developers to test the language variants - * functionality and user interface without having to switch wiki language away from default. - * - * @param string $text - * @param string $toVariant - * @return string - */ - public function translate( $text, $toVariant ) { - if ( $toVariant !== 'en-x-piglatin' ) { - return $text; - } - - // Only process words composed of standard English alphabet, leave the rest unchanged. - // This skips some English words like 'naïve' or 'résumé', but we can live with that. - // Ignore single letters and words which aren't lowercase or uppercase-first. - return preg_replace_callback( '/[A-Za-z][a-z\']+/', function ( $matches ) { - $word = $matches[0]; - if ( preg_match( '/^[aeiou]/i', $word ) ) { - return $word . 'way'; - } - - return preg_replace_callback( '/^(s?qu|[^aeiou][^aeiouy]*)(.*)$/i', function ( $m ) { - $ucfirst = strtoupper( $m[1][0] ) === $m[1][0]; - if ( $ucfirst ) { - return ucfirst( $m[2] ) . lcfirst( $m[1] ) . 'ay'; - } - - return $m[2] . $m[1] . 'ay'; - }, $word ); - }, $text ); - } -} - -/** * English * * @ingroup Language diff --git a/languages/classes/LanguageGan.php b/languages/classes/LanguageGan.php index 42d36c39759f..567e379a5633 100644 --- a/languages/classes/LanguageGan.php +++ b/languages/classes/LanguageGan.php @@ -21,59 +21,6 @@ */ /** - * @ingroup Language - */ -class GanConverter extends LanguageConverter { - /** - * @param Language $langobj - */ - public function __construct( Language $langobj ) { - $this->mDescCodeSep = ':'; - $this->mDescVarSep = ';'; - - $variants = [ 'gan', 'gan-hans', 'gan-hant' ]; - $variantfallbacks = [ - 'gan' => [ 'gan-hans', 'gan-hant' ], - 'gan-hans' => [ 'gan' ], - 'gan-hant' => [ 'gan' ], - ]; - $ml = [ - 'gan' => 'disable', - ]; - - parent::__construct( $langobj, 'gan', - $variants, - $variantfallbacks, - [], - $ml - ); - - $names = [ - 'gan' => '原文', - 'gan-hans' => '简体', - 'gan-hant' => '繁體', - ]; - $this->mVariantNames = array_merge( $this->mVariantNames, $names ); - } - - protected function loadDefaultTables() { - $this->mTables = [ - 'gan-hans' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2Hans ), - 'gan-hant' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2Hant ), - 'gan' => new ReplacementArray - ]; - } - - /** - * @param string $key - * @return string - */ - public function convertCategoryKey( $key ) { - return $this->autoConvert( $key, 'gan' ); - } -} - -/** * Gan Chinese * * class that handles both Traditional and Simplified Chinese @@ -93,5 +40,4 @@ class LanguageGan extends LanguageZh { // LanguageZh::normalizeForSearch return parent::normalizeForSearch( $string, $autoVariant ); } - } diff --git a/languages/classes/LanguageIu.php b/languages/classes/LanguageIu.php deleted file mode 100644 index 209c145ebf3a..000000000000 --- a/languages/classes/LanguageIu.php +++ /dev/null @@ -1,161 +0,0 @@ -<?php -/** - * Inuktitut specific code. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * http://www.gnu.org/copyleft/gpl.html - * - * @file - * @ingroup Language - */ - -/** - * Conversion script between Latin and Syllabics for Inuktitut. - * - Syllabics -> lowercase Latin - * - lowercase/uppercase Latin -> Syllabics - * - * - * Based on: - * - https://commons.wikimedia.org/wiki/Image:Inuktitut.png - * - LanguageSr.php - * - * @ingroup Language - */ -class IuConverter extends LanguageConverter { - protected $mDoContentConvert; - - public $mToLatin = [ - 'ᐦ' => 'h', 'ᐃ' => 'i', 'ᐄ' => 'ii', 'ᐅ' => 'u', 'ᐆ' => 'uu', 'ᐊ' => 'a', 'ᐋ' => 'aa', - 'ᑉ' => 'p', 'ᐱ' => 'pi', 'ᐲ' => 'pii', 'ᐳ' => 'pu', 'ᐴ' => 'puu', 'ᐸ' => 'pa', 'ᐹ' => 'paa', - 'ᑦ' => 't', 'ᑎ' => 'ti', 'ᑏ' => 'tii', 'ᑐ' => 'tu', 'ᑑ' => 'tuu', 'ᑕ' => 'ta', 'ᑖ' => 'taa', - 'ᒃ' => 'k', 'ᑭ' => 'ki', 'ᑮ' => 'kii', 'ᑯ' => 'ku', 'ᑰ' => 'kuu', 'ᑲ' => 'ka', 'ᑳ' => 'kaa', - 'ᖅᒃ' => 'qq', 'ᖅᑭ' => 'qqi', 'ᖅᑮ' => 'qqii', 'ᖅᑯ' => 'qqu', 'ᖅᑰ' => 'ᖅqquu', 'ᖅᑲ' => 'qqa', - 'ᖅᑳ' => 'qqaa', 'ᒡ' => 'g', 'ᒋ' => 'gi', 'ᒌ' => 'gii', 'ᒍ' => 'gu', 'ᒎ' => 'guu', - 'ᒐ' => 'ga', 'ᒑ' => 'gaa', 'ᒻ' => 'm', 'ᒥ' => 'mi', 'ᒦ' => 'mii', 'ᒧ' => 'mu', 'ᒨ' => 'muu', - 'ᒪ' => 'ma', 'ᒫ' => 'maa', 'ᓐ' => 'n', 'ᓂ' => 'ni', 'ᓃ' => 'nii', 'ᓄ' => 'nu', 'ᓅ' => 'nuu', - 'ᓇ' => 'na', 'ᓈ' => 'naa', 'ᔅ' => 's', 'ᓯ' => 'si', 'ᓰ' => 'sii', 'ᓱ' => 'su', 'ᓲ' => 'suu', - 'ᓴ' => 'sa', 'ᓵ' => 'saa', 'ᓪ' => 'l', 'ᓕ' => 'li', 'ᓖ' => 'lii', 'ᓗ' => 'lu', 'ᓘ' => 'luu', - 'ᓚ' => 'la', 'ᓛ' => 'laa', 'ᔾ' => 'j', 'ᔨ' => 'ji', 'ᔩ' => 'jii', 'ᔪ' => 'ju', 'ᔫ' => 'juu', - 'ᔭ' => 'ja', 'ᔮ' => 'jaa', 'ᕝ' => 'v', 'ᕕ' => 'vi', 'ᕖ' => 'vii', 'ᕗ' => 'vu', 'ᕘ' => 'vuu', - 'ᕙ' => 'va', 'ᕚ' => 'vaa', 'ᕐ' => 'r', 'ᕆ' => 'ri', 'ᕇ' => 'rii', 'ᕈ' => 'ru', 'ᕉ' => 'ruu', - 'ᕋ' => 'ra', 'ᕌ' => 'raa', 'ᖅ' => 'q', 'ᕿ' => 'qi', 'ᖀ' => 'qii', 'ᖁ' => 'qu', 'ᖂ' => 'quu', - 'ᖃ' => 'qa', 'ᖄ' => 'qaa', 'ᖕ' => 'ng', 'ᖏ' => 'ngi', 'ᖐ' => 'ngii', 'ᖑ' => 'ngu', - 'ᖒ' => 'nguu', 'ᖓ' => 'nga', 'ᖔ' => 'ngaa', 'ᖖ' => 'nng', 'ᙱ' => 'nngi', 'ᙲ' => 'nngii', - 'ᙳ' => 'nngu', 'ᙴ' => 'nnguu', 'ᙵ' => 'nnga', 'ᙶ' => 'nngaa', 'ᖦ' => 'ɫ', 'ᖠ' => 'ɫi', - 'ᖡ' => 'ɫii', 'ᖢ' => 'ɫu', 'ᖣ' => 'ɫuu', 'ᖤ' => 'ɫa', 'ᖥ' => 'ɫaa', - ]; - - public $mUpperToLowerCaseLatin = [ - 'A' => 'a', 'B' => 'b', 'C' => 'c', 'D' => 'd', 'E' => 'e', - 'F' => 'f', 'G' => 'g', 'H' => 'h', 'I' => 'i', 'J' => 'j', - 'K' => 'k', 'L' => 'l', 'M' => 'm', 'N' => 'n', 'O' => 'o', - 'P' => 'p', 'Q' => 'q', 'R' => 'r', 'S' => 's', 'T' => 't', - 'U' => 'u', 'V' => 'v', 'W' => 'w', 'X' => 'x', 'Y' => 'y', - 'Z' => 'z', - ]; - - public $mToSyllabics = [ - 'h' => 'ᐦ', 'i' => 'ᐃ', 'ii' => 'ᐄ', 'u' => 'ᐅ', 'uu' => 'ᐆ', 'a' => 'ᐊ', 'aa' => 'ᐋ', - 'p' => 'ᑉ', 'pi' => 'ᐱ', 'pii' => 'ᐲ', 'pu' => 'ᐳ', 'puu' => 'ᐴ', 'pa' => 'ᐸ', 'paa' => 'ᐹ', - 't' => 'ᑦ', 'ti' => 'ᑎ', 'tii' => 'ᑏ', 'tu' => 'ᑐ', 'tuu' => 'ᑑ', 'ta' => 'ᑕ', 'taa' => 'ᑖ', - 'k' => 'ᒃ', 'ki' => 'ᑭ', 'kii' => 'ᑮ', 'ku' => 'ᑯ', 'kuu' => 'ᑰ', 'ka' => 'ᑲ', 'kaa' => 'ᑳ', - 'g' => 'ᒡ', 'gi' => 'ᒋ', 'gii' => 'ᒌ', 'gu' => 'ᒍ', 'guu' => 'ᒎ', 'ga' => 'ᒐ', 'gaa' => 'ᒑ', - 'm' => 'ᒻ', 'mi' => 'ᒥ', 'mii' => 'ᒦ', 'mu' => 'ᒧ', 'muu' => 'ᒨ', 'ma' => 'ᒪ', 'maa' => 'ᒫ', - 'n' => 'ᓐ', 'ni' => 'ᓂ', 'nii' => 'ᓃ', 'nu' => 'ᓄ', 'nuu' => 'ᓅ', 'na' => 'ᓇ', 'naa' => 'ᓈ', - 's' => 'ᔅ', 'si' => 'ᓯ', 'sii' => 'ᓰ', 'su' => 'ᓱ', 'suu' => 'ᓲ', 'sa' => 'ᓴ', 'saa' => 'ᓵ', - 'l' => 'ᓪ', 'li' => 'ᓕ', 'lii' => 'ᓖ', 'lu' => 'ᓗ', 'luu' => 'ᓘ', 'la' => 'ᓚ', 'laa' => 'ᓛ', - 'j' => 'ᔾ', 'ji' => 'ᔨ', 'jii' => 'ᔩ', 'ju' => 'ᔪ', 'juu' => 'ᔫ', 'ja' => 'ᔭ', 'jaa' => 'ᔮ', - 'v' => 'ᕝ', 'vi' => 'ᕕ', 'vii' => 'ᕖ', 'vu' => 'ᕗ', 'vuu' => 'ᕘ', 'va' => 'ᕙ', 'vaa' => 'ᕚ', - 'r' => 'ᕐ', 'ri' => 'ᕆ', 'rii' => 'ᕇ', 'ru' => 'ᕈ', 'ruu' => 'ᕉ', 'ra' => 'ᕋ', 'raa' => 'ᕌ', - 'qq' => 'ᖅᒃ', 'qqi' => 'ᖅᑭ', 'qqii' => 'ᖅᑮ', 'qqu' => 'ᖅᑯ', 'qquu' => 'ᖅᑰ', 'qqa' => 'ᖅᑲ', - 'qqaa' => 'ᖅᑳ', 'q' => 'ᖅ', 'qi' => 'ᕿ', 'qii' => 'ᖀ', 'qu' => 'ᖁ', 'quu' => 'ᖂ', - 'qa' => 'ᖃ', 'qaa' => 'ᖄ', 'ng' => 'ᖕ', 'ngi' => 'ᖏ', 'ngii' => 'ᖐ', 'ngu' => 'ᖑ', - 'nguu' => 'ᖒ', 'nga' => 'ᖓ', 'ngaa' => 'ᖔ', 'nng' => 'ᖖ', 'nngi' => 'ᙱ', 'nngii' => 'ᙲ', - 'nngu' => 'ᙳ', 'nnguu' => 'ᙴ', 'nnga' => 'ᙵ', 'nngaa' => 'ᙶ', 'ɫ' => 'ᖦ', 'ɫi' => 'ᖠ', - 'ɫii' => 'ᖡ', 'ɫu' => 'ᖢ', 'ɫuu' => 'ᖣ', 'ɫa' => 'ᖤ', 'ɫaa' => 'ᖥ', - ]; - - /** - * @param Language $langobj - */ - public function __construct( Language $langobj ) { - $variants = [ 'iu', 'ike-cans', 'ike-latn' ]; - $variantfallbacks = [ - 'iu' => 'ike-cans', - 'ike-cans' => 'iu', - 'ike-latn' => 'iu', - ]; - $flags = []; - - parent::__construct( $langobj, 'iu', $variants, $variantfallbacks, $flags ); - } - - protected function loadDefaultTables() { - $this->mTables = [ - 'lowercase' => new ReplacementArray( $this->mUpperToLowerCaseLatin ), - 'ike-cans' => new ReplacementArray( $this->mToSyllabics ), - 'ike-latn' => new ReplacementArray( $this->mToLatin ), - 'iu' => new ReplacementArray() - ]; - } - - /** - * A function wrapper: - * - if there is no selected variant, leave the link - * names as they were - * - do not try to find variants for usernames - * - * @param string &$link - * @param Title &$nt - * @param bool $ignoreOtherCond - */ - public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) { - // check for user namespace - if ( is_object( $nt ) ) { - $ns = $nt->getNamespace(); - if ( $ns == NS_USER || $ns == NS_USER_TALK ) { - return; - } - } - - $oldlink = $link; - parent::findVariantLink( $link, $nt, $ignoreOtherCond ); - if ( $this->getPreferredVariant() == $this->mMainLanguageCode ) { - $link = $oldlink; - } - } - - /** - * It translates text into variant - * - * @param string $text - * @param bool $toVariant - * - * @return string - */ - public function translate( $text, $toVariant ) { - // If $text is empty or only includes spaces, do nothing - // Otherwise translate it - if ( trim( $text ) ) { - $this->loadTables(); - // To syllabics, first translate uppercase to lowercase Latin - if ( $toVariant == 'ike-cans' ) { - $text = $this->mTables['lowercase']->replace( $text ); - } - $text = $this->mTables[$toVariant]->replace( $text ); - } - return $text; - } -} diff --git a/languages/classes/LanguageKk.php b/languages/classes/LanguageKk.php index d2467ef3a111..44149c3327f6 100644 --- a/languages/classes/LanguageKk.php +++ b/languages/classes/LanguageKk.php @@ -21,344 +21,6 @@ * @ingroup Language */ -define( 'KK_C_UC', 'АӘБВГҒДЕЁЖЗИЙКҚЛМНҢОӨПРСТУҰҮФХҺЦЧШЩЪЫІЬЭЮЯ' ); # Kazakh Cyrillic uppercase -define( 'KK_C_LC', 'аәбвгғдеёжзийкқлмнңоөпрстуұүфхһцчшщъыіьэюя' ); # Kazakh Cyrillic lowercase -define( 'KK_L_UC', 'AÄBCÇDEÉFGĞHIİÏJKLMNÑOÖPQRSŞTUÜVWXYÝZ' ); # Kazakh Latin uppercase -define( 'KK_L_LC', 'aäbcçdeéfgğhıiïjklmnñoöpqrsştuüvwxyýz' ); # Kazakh Latin lowercase -// define( 'KK_A', 'ٴابپتجحدرزسشعفقكلمنڭەوۇۋۆىيچھ' ); # Kazakh Arabic -define( 'H_HAMZA', 'ٴ' ); # U+0674 ARABIC LETTER HIGH HAMZA -// define( 'ZWNJ', '' ); # U+200C ZERO WIDTH NON-JOINER - -/** - * Kazakh (Қазақша) converter routines - * - * @ingroup Language - */ -class KkConverter extends LanguageConverter { - protected $mCyrl2Latn, $mLatn2Cyrl, $mCyLa2Arab; - - /** - * @param Language $langobj - */ - public function __construct( Language $langobj ) { - $variants = [ 'kk', 'kk-cyrl', 'kk-latn', 'kk-arab', 'kk-kz', 'kk-tr', 'kk-cn' ]; - $variantfallbacks = [ - 'kk' => 'kk-cyrl', - 'kk-cyrl' => 'kk', - 'kk-latn' => 'kk', - 'kk-arab' => 'kk', - 'kk-kz' => 'kk-cyrl', - 'kk-tr' => 'kk-latn', - 'kk-cn' => 'kk-arab' - ]; - - parent::__construct( $langobj, 'kk', - $variants, $variantfallbacks, [] ); - - // No point delaying this since they're in code. - // Waiting until loadDefaultTables() means they never get loaded - // when the tables themselves are loaded from cache. - $this->loadRegs(); - } - - protected function loadDefaultTables() { - // require __DIR__."/../../includes/KkConversion.php"; - // Placeholder for future implementing. Remove variables declarations - // after generating KkConversion.php - $kk2Cyrl = []; - $kk2Latn = []; - $kk2Arab = []; - $kk2KZ = []; - $kk2TR = []; - $kk2CN = []; - - $this->mTables = [ - 'kk-cyrl' => new ReplacementArray( $kk2Cyrl ), - 'kk-latn' => new ReplacementArray( $kk2Latn ), - 'kk-arab' => new ReplacementArray( $kk2Arab ), - 'kk-kz' => new ReplacementArray( array_merge( $kk2Cyrl, $kk2KZ ) ), - 'kk-tr' => new ReplacementArray( array_merge( $kk2Latn, $kk2TR ) ), - 'kk-cn' => new ReplacementArray( array_merge( $kk2Arab, $kk2CN ) ), - 'kk' => new ReplacementArray() - ]; - } - - protected function postLoadTables() { - $this->mTables['kk-kz']->merge( $this->mTables['kk-cyrl'] ); - $this->mTables['kk-tr']->merge( $this->mTables['kk-latn'] ); - $this->mTables['kk-cn']->merge( $this->mTables['kk-arab'] ); - } - - private function loadRegs() { - $this->mCyrl2Latn = [ - # # Punctuation - '/№/u' => 'No.', - # # Е after vowels - '/([АӘЕЁИОӨҰҮЭЮЯЪЬ])Е/u' => '$1YE', - '/([АӘЕЁИОӨҰҮЭЮЯЪЬ])е/ui' => '$1ye', - # # leading ЁЮЯЩ - '/^Ё([' . KK_C_UC . ']|$)/u' => 'YO$1', '/^Ё([' . KK_C_LC . ']|$)/u' => 'Yo$1', - '/^Ю([' . KK_C_UC . ']|$)/u' => 'YU$1', '/^Ю([' . KK_C_LC . ']|$)/u' => 'Yu$1', - '/^Я([' . KK_C_UC . ']|$)/u' => 'YA$1', '/^Я([' . KK_C_LC . ']|$)/u' => 'Ya$1', - '/^Щ([' . KK_C_UC . ']|$)/u' => 'ŞÇ$1', '/^Щ([' . KK_C_LC . ']|$)/u' => 'Şç$1', - # # other ЁЮЯ - '/Ё/u' => 'YO', '/ё/u' => 'yo', - '/Ю/u' => 'YU', '/ю/u' => 'yu', - '/Я/u' => 'YA', '/я/u' => 'ya', - '/Щ/u' => 'ŞÇ', '/щ/u' => 'şç', - # # soft and hard signs - '/[ъЪ]/u' => 'ʺ', '/[ьЬ]/u' => 'ʹ', - # # other characters - '/А/u' => 'A', '/а/u' => 'a', '/Ә/u' => 'Ä', '/ә/u' => 'ä', - '/Б/u' => 'B', '/б/u' => 'b', '/В/u' => 'V', '/в/u' => 'v', - '/Г/u' => 'G', '/г/u' => 'g', '/Ғ/u' => 'Ğ', '/ғ/u' => 'ğ', - '/Д/u' => 'D', '/д/u' => 'd', '/Е/u' => 'E', '/е/u' => 'e', - '/Ж/u' => 'J', '/ж/u' => 'j', '/З/u' => 'Z', '/з/u' => 'z', - '/И/u' => 'Ï', '/и/u' => 'ï', '/Й/u' => 'Ý', '/й/u' => 'ý', - '/К/u' => 'K', '/к/u' => 'k', '/Қ/u' => 'Q', '/қ/u' => 'q', - '/Л/u' => 'L', '/л/u' => 'l', '/М/u' => 'M', '/м/u' => 'm', - '/Н/u' => 'N', '/н/u' => 'n', '/Ң/u' => 'Ñ', '/ң/u' => 'ñ', - '/О/u' => 'O', '/о/u' => 'o', '/Ө/u' => 'Ö', '/ө/u' => 'ö', - '/П/u' => 'P', '/п/u' => 'p', '/Р/u' => 'R', '/р/u' => 'r', - '/С/u' => 'S', '/с/u' => 's', '/Т/u' => 'T', '/т/u' => 't', - '/У/u' => 'W', '/у/u' => 'w', '/Ұ/u' => 'U', '/ұ/u' => 'u', - '/Ү/u' => 'Ü', '/ү/u' => 'ü', '/Ф/u' => 'F', '/ф/u' => 'f', - '/Х/u' => 'X', '/х/u' => 'x', '/Һ/u' => 'H', '/һ/u' => 'h', - '/Ц/u' => 'C', '/ц/u' => 'c', '/Ч/u' => 'Ç', '/ч/u' => 'ç', - '/Ш/u' => 'Ş', '/ш/u' => 'ş', '/Ы/u' => 'I', '/ы/u' => 'ı', - '/І/u' => 'İ', '/і/u' => 'i', '/Э/u' => 'É', '/э/u' => 'é', - ]; - - $this->mLatn2Cyrl = [ - # # Punctuation - '/#|No\./' => '№', - # # Şç - '/ŞÇʹ/u' => 'ЩЬ', '/Şçʹ/u' => 'Щь', - '/Ş[Çç]/u' => 'Щ', '/şç/u' => 'щ', - # # soft and hard signs - '/([' . KK_L_UC . '])ʺ([' . KK_L_UC . '])/u' => '$1Ъ$2', - '/ʺ([' . KK_L_LC . '])/u' => 'ъ$1', - '/([' . KK_L_UC . '])ʹ([' . KK_L_UC . '])/u' => '$1Ь$2', - '/ʹ([' . KK_L_LC . '])/u' => 'ь$1', - '/ʺ/u' => 'ъ', - '/ʹ/u' => 'ь', - # # Ye Yo Yu Ya. - '/Y[Ee]/u' => 'Е', '/ye/u' => 'е', - '/Y[Oo]/u' => 'Ё', '/yo/u' => 'ё', - '/Y[UWuw]/u' => 'Ю', '/y[uw]/u' => 'ю', - '/Y[Aa]/u' => 'Я', '/ya/u' => 'я', - # # other characters - '/A/u' => 'А', '/a/u' => 'а', '/Ä/u' => 'Ә', '/ä/u' => 'ә', - '/B/u' => 'Б', '/b/u' => 'б', '/C/u' => 'Ц', '/c/u' => 'ц', - '/Ç/u' => 'Ч', '/ç/u' => 'ч', '/D/u' => 'Д', '/d/u' => 'д', - '/E/u' => 'Е', '/e/u' => 'е', '/É/u' => 'Э', '/é/u' => 'э', - '/F/u' => 'Ф', '/f/u' => 'ф', '/G/u' => 'Г', '/g/u' => 'г', - '/Ğ/u' => 'Ғ', '/ğ/u' => 'ғ', '/H/u' => 'Һ', '/h/u' => 'һ', - '/I/u' => 'Ы', '/ı/u' => 'ы', '/İ/u' => 'І', '/i/u' => 'і', - '/Ï/u' => 'И', '/ï/u' => 'и', '/J/u' => 'Ж', '/j/u' => 'ж', - '/K/u' => 'К', '/k/u' => 'к', '/L/u' => 'Л', '/l/u' => 'л', - '/M/u' => 'М', '/m/u' => 'м', '/N/u' => 'Н', '/n/u' => 'н', - '/Ñ/u' => 'Ң', '/ñ/u' => 'ң', '/O/u' => 'О', '/o/u' => 'о', - '/Ö/u' => 'Ө', '/ö/u' => 'ө', '/P/u' => 'П', '/p/u' => 'п', - '/Q/u' => 'Қ', '/q/u' => 'қ', '/R/u' => 'Р', '/r/u' => 'р', - '/S/u' => 'С', '/s/u' => 'с', '/Ş/u' => 'Ш', '/ş/u' => 'ш', - '/T/u' => 'Т', '/t/u' => 'т', '/U/u' => 'Ұ', '/u/u' => 'ұ', - '/Ü/u' => 'Ү', '/ü/u' => 'ү', '/V/u' => 'В', '/v/u' => 'в', - '/W/u' => 'У', '/w/u' => 'у', '/Ý/u' => 'Й', '/ý/u' => 'й', - '/X/u' => 'Х', '/x/u' => 'х', '/Z/u' => 'З', '/z/u' => 'з', - ]; - - $this->mCyLa2Arab = [ - # # Punctuation -> Arabic - '/#|№|No\./u' => '', # U+0600 - '/\,/' => '،', # U+060C - '/;/' => '؛', # U+061B - '/\?/' => '؟', # U+061F - '/%/' => '٪', # U+066A - '/\*/' => '٭', # U+066D - # # Digits -> Arabic - '/0/' => '۰', # U+06F0 - '/1/' => '۱', # U+06F1 - '/2/' => '۲', # U+06F2 - '/3/' => '۳', # U+06F3 - '/4/' => '۴', # U+06F4 - '/5/' => '۵', # U+06F5 - '/6/' => '۶', # U+06F6 - '/7/' => '۷', # U+06F7 - '/8/' => '۸', # U+06F8 - '/9/' => '۹', # U+06F9 - # # Cyrillic -> Arabic - '/Аллаһ/ui' => 'ﷲ', - '/([АӘЕЁИОӨҰҮЭЮЯЪЬ])е/ui' => '$1يە', - '/[еэ]/ui' => 'ە', '/[ъь]/ui' => '', - '/[аә]/ui' => 'ا', '/[оө]/ui' => 'و', '/[ұү]/ui' => 'ۇ', '/[ыі]/ui' => 'ى', - '/[и]/ui' => 'ىي', '/ё/ui' => 'يو', '/ю/ui' => 'يۋ', '/я/ui' => 'يا', '/[й]/ui' => 'ي', - '/ц/ui' => 'تس', '/щ/ui' => 'شش', - '/һ/ui' => 'ح', '/ч/ui' => 'تش', - # '/һ/ui' => 'ھ', '/ч/ui' => 'چ', - '/б/ui' => 'ب', '/в/ui' => 'ۆ', '/г/ui' => 'گ', '/ғ/ui' => 'ع', - '/д/ui' => 'د', '/ж/ui' => 'ج', '/з/ui' => 'ز', '/к/ui' => 'ك', - '/қ/ui' => 'ق', '/л/ui' => 'ل', '/м/ui' => 'م', '/н/ui' => 'ن', - '/ң/ui' => 'ڭ', '/п/ui' => 'پ', '/р/ui' => 'ر', '/с/ui' => 'س', - '/т/ui' => 'ت', '/у/ui' => 'ۋ', '/ф/ui' => 'ف', '/х/ui' => 'ح', - '/ш/ui' => 'ش', - # # Latin -> Arabic // commented for now... - /*'/Allah/ui' => 'ﷲ', - '/[eé]/ui' => 'ە', '/[yý]/ui' => 'ي', '/[ʺʹ]/ui' => '', - '/[aä]/ui' => 'ا', '/[oö]/ui' => 'و', '/[uü]/ui' => 'ۇ', - '/[ï]/ui' => 'ىي', '/[ıIiİ]/u' => 'ى', - '/c/ui' => 'تس', - '/ç/ui' => 'تش', '/h/ui' => 'ح', - #'/ç/ui' => 'چ', '/h/ui' => 'ھ', - '/b/ui' => 'ب','/d/ui' => 'د', - '/f/ui' => 'ف', '/g/ui' => 'گ', '/ğ/ui' => 'ع', - '/j/ui' => 'ج', '/k/ui' => 'ك', '/l/ui' => 'ل', '/m/ui' => 'م', - '/n/ui' => 'ن', '/ñ/ui' => 'ڭ', '/p/ui' => 'پ', '/q/ui' => 'ق', - '/r/ui' => 'ر', '/s/ui' => 'س', '/ş/ui' => 'ش', '/t/ui' => 'ت', - '/v/ui' => 'ۆ', '/w/ui' => 'ۋ', '/x/ui' => 'ح', '/z/ui' => 'ز',*/ - ]; - } - - /** - * A function wrapper: - * - if there is no selected variant, leave the link - * names as they were - * - do not try to find variants for usernames - * - * @param string &$link - * @param Title &$nt - * @param bool $ignoreOtherCond - */ - public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) { - // check for user namespace - if ( is_object( $nt ) ) { - $ns = $nt->getNamespace(); - if ( $ns == NS_USER || $ns == NS_USER_TALK ) { - return; - } - } - - $oldlink = $link; - parent::findVariantLink( $link, $nt, $ignoreOtherCond ); - if ( $this->getPreferredVariant() == $this->mMainLanguageCode ) { - $link = $oldlink; - } - } - - /** - * It translates text into variant - * - * @param string $text - * @param string $toVariant - * - * @return string - */ - public function translate( $text, $toVariant ) { - $text = parent::translate( $text, $toVariant ); - - switch ( $toVariant ) { - case 'kk-cyrl': - case 'kk-kz': - $letters = KK_L_UC . KK_L_LC . 'ʺʹ#0123456789'; - break; - case 'kk-latn': - case 'kk-tr': - $letters = KK_C_UC . KK_C_LC . '№0123456789'; - break; - case 'kk-arab': - case 'kk-cn': - $letters = KK_C_UC . KK_C_LC . /*KK_L_UC.KK_L_LC.'ʺʹ'.*/',;\?%\*№0123456789'; - break; - default: - return $text; - } - // disable conversion variables like $1, $2... - $varsfix = '\$[0-9]'; - - $matches = preg_split( - '/' . $varsfix . '[^' . $letters . ']+/u', - $text, - -1, - PREG_SPLIT_OFFSET_CAPTURE - ); - - $mstart = 0; - $ret = ''; - - foreach ( $matches as $m ) { - $ret .= substr( $text, $mstart, $m[1] - $mstart ); - $ret .= $this->regsConverter( $m[0], $toVariant ); - $mstart = $m[1] + strlen( $m[0] ); - } - - return $ret; - } - - /** - * @param string $text - * @param string $toVariant - * @return mixed|string - */ - private function regsConverter( $text, $toVariant ) { - if ( $text == '' ) { - return $text; - } - - switch ( $toVariant ) { - case 'kk-arab': - case 'kk-cn': - $letters = KK_C_LC . KK_C_UC; /*.KK_L_LC.KK_L_UC*/ - $front = 'әөүіӘӨҮІ'; /*.'äöüiÄÖÜİ'*/ - $excludes = 'еэгғкқЕЭГҒКҚ'; /*.'eégğkqEÉGĞKQ'*/ - // split text to words - $matches = preg_split( '/[\b\s\-\.:]+/', $text, -1, PREG_SPLIT_OFFSET_CAPTURE ); - $mstart = 0; - $ret = ''; - foreach ( $matches as $m ) { - $ret .= substr( $text, $mstart, $m[1] - $mstart ); - // is matched the word to front vowels? - // exclude a words matched to е, э, г, к, к, қ, - // them should be without hamza - if ( preg_match( '/[' . $front . ']/u', $m[0] ) && - !preg_match( '/[' . $excludes . ']/u', $m[0] ) - ) { - $ret .= preg_replace( '/[' . $letters . ']+/u', H_HAMZA . '$0', $m[0] ); - } else { - $ret .= $m[0]; - } - $mstart = $m[1] + strlen( $m[0] ); - } - $text =& $ret; - foreach ( $this->mCyLa2Arab as $pat => $rep ) { - $text = preg_replace( $pat, $rep, $text ); - } - return $text; - case 'kk-latn': - case 'kk-tr': - foreach ( $this->mCyrl2Latn as $pat => $rep ) { - $text = preg_replace( $pat, $rep, $text ); - } - return $text; - case 'kk-cyrl': - case 'kk-kz': - foreach ( $this->mLatn2Cyrl as $pat => $rep ) { - $text = preg_replace( $pat, $rep, $text ); - } - return $text; - default: - return $text; - } - } - - /** - * @param string $key - * @return string - */ - public function convertCategoryKey( $key ) { - return $this->autoConvert( $key, 'kk' ); - } -} - /** * class that handles Cyrillic, Latin and Arabic scripts for Kazakh * right now it only distinguish kk_cyrl, kk_latn, kk_arab and kk_kz, kk_tr, kk_cn. diff --git a/languages/classes/LanguageKu.php b/languages/classes/LanguageKu.php deleted file mode 100644 index bf3575483982..000000000000 --- a/languages/classes/LanguageKu.php +++ /dev/null @@ -1,238 +0,0 @@ -<?php -/** - * Kurdish specific code. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * http://www.gnu.org/copyleft/gpl.html - * - * @file - * @ingroup Language - */ - -/** - * Kurdish converter routines - * - * @ingroup Language - */ -class KuConverter extends LanguageConverter { - public $mArabicToLatin = [ - 'ب' => 'b', 'ج' => 'c', 'چ' => 'ç', 'د' => 'd', 'ف' => 'f', 'گ' => 'g', 'ھ' => 'h', - 'ہ' => 'h', 'ه' => 'h', 'ح' => 'h', 'ژ' => 'j', 'ك' => 'k', 'ک' => 'k', 'ل' => 'l', - 'م' => 'm', 'ن' => 'n', 'پ' => 'p', 'ق' => 'q', 'ر' => 'r', 'س' => 's', 'ش' => 'ş', - 'ت' => 't', 'ڤ' => 'v', 'خ' => 'x', 'غ' => 'x', 'ز' => 'z', - -// ک و => ku -- ist richtig -// و ك=> ku -- ist auch richtig - - /* Doppel- und Halbvokale */ - 'ڵ' => 'll', # ll - 'ڕ' => 'rr', # rr - 'ا' => 'a', - # 'ئێ' => 'ê', # initial e - 'ە' => 'e', - 'ه' => 'e', # with one non-joiner - 'ه' => 'e', # with two non-joiner - 'ة' => 'e', - 'ێ' => 'ê', - 'ي' => 'î', - 'ی' => 'î', # U+06CC db 8c ARABIC LETTER FARSI YEH - 'ى' => 'î', # U+0649 d9 89 ARABIC LETTER ALEF MAKSURA - 'ۆ' => 'o', - 'و' => 'w', - 'ئ' => '', # initial hemze should not be shown - '،' => ',', - 'ع' => '\'', # ayn - '؟' => '?', - - # digits - '٠' => '0', # U+0660 - '١' => '1', # U+0661 - '٢' => '2', # U+0662 - '٣' => '3', # U+0663 - '٤' => '4', # U+0664 - '٥' => '5', # U+0665 - '٦' => '6', # U+0666 - '٧' => '7', # U+0667 - '٨' => '8', # U+0668 - '٩' => '9', # U+0669 - ]; - - public $mLatinToArabic = [ - 'b' => 'ب', 'c' => 'ج', 'ç' => 'چ', 'd' => 'د', 'f' => 'ف', 'g' => 'گ', - 'h' => 'ه', 'j' => 'ژ', 'k' => 'ک', 'l' => 'ل', - 'm' => 'م', 'n' => 'ن', 'p' => 'پ', 'q' => 'ق', 'r' => 'ر', 's' => 'س', 'ş' => 'ش', - 't' => 'ت', 'v' => 'ڤ', - 'x' => 'خ', 'y' => 'ی', 'z' => 'ز', - - 'B' => 'ب', 'C' => 'ج', 'Ç' => 'چ', 'D' => 'د', 'F' => 'ف', 'G' => 'گ', - 'H' => 'ح', 'J' => 'ژ', 'K' => 'ک', 'L' => 'ل', - 'M' => 'م', 'N' => 'ن', 'P' => 'پ', 'Q' => 'ق', 'R' => 'ر', 'S' => 'س', 'Ş' => 'ش', - 'T' => 'ت', 'V' => 'ڤ', 'W' => 'و', 'X' => 'خ', - 'Y' => 'ی', 'Z' => 'ز', - - /* Doppelkonsonanten */ - # 'll' => 'ڵ', # wenn es geht, doppel-l und l getrennt zu behandeln - # 'rr' => 'ڕ', # selbiges für doppel-r - - /* Einzelne Großbuchstaben */ - // ' C' => 'ج', - - /* Vowels */ - 'a' => 'ا', - 'e' => 'ە', - 'ê' => 'ێ', - 'i' => '', - 'î' => 'ی', - 'o' => 'ۆ', - 'u' => 'و', - 'û' => 'وو', - 'w' => 'و', - ',' => '،', - '?' => '؟', - - # Try to replace the leading vowel - ' a' => 'ئا ', - ' e' => 'ئە ', - ' ê' => 'ئێ ', - ' î' => 'ئی ', - ' o' => 'ئۆ ', - ' u' => 'ئو ', - ' û' => 'ئوو ', - 'A' => 'ئا', - 'E' => 'ئە', - 'Ê' => 'ئێ', - 'Î' => 'ئی', - 'O' => 'ئۆ', - 'U' => 'ئو', - 'Û' => 'ئوو', - ' A' => 'ئا ', - ' E' => 'ئە ', - ' Ê' => 'ئێ ', - ' Î' => 'ئی ', - ' O' => 'ئۆ ', - ' U' => 'ئو ', - ' Û' => 'ئوو ', - # eyn erstmal deaktivieren, einfache Anführungsstriche sind einfach zu - # häufig, um sie als eyn zu interpretieren. - # '\'' => 'ع', - -/* # deactivated for now, breaks links i.e. in header of Special:Recentchanges :-( - # digits - '0' => '٠', # U+0660 - '1' => '١', # U+0661 - '2' => '٢', # U+0662 - '3' => '٣', # U+0663 - '4' => '٤', # U+0664 - '5' => '٥', # U+0665 - '6' => '٦', # U+0666 - '7' => '٧', # U+0667 - '8' => '٨', # U+0668 - '9' => '٩', # U+0669 -*/ - ]; - - /** - * @param Language $langobj - */ - public function __construct( Language $langobj ) { - $variants = [ 'ku', 'ku-arab', 'ku-latn' ]; - $variantfallbacks = [ - 'ku' => 'ku-latn', - 'ku-arab' => 'ku-latn', - 'ku-latn' => 'ku-arab', - ]; - - parent::__construct( $langobj, 'ku', $variants, $variantfallbacks ); - } - - protected function loadDefaultTables() { - $this->mTables = [ - 'ku-latn' => new ReplacementArray( $this->mArabicToLatin ), - 'ku-arab' => new ReplacementArray( $this->mLatinToArabic ), - 'ku' => new ReplacementArray() - ]; - } - - /** - * A function wrapper: - * - if there is no selected variant, leave the link - * names as they were - * - do not try to find variants for usernames - * - * @param string &$link - * @param Title &$nt - * @param bool $ignoreOtherCond - */ - public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) { - // check for user namespace - if ( is_object( $nt ) ) { - $ns = $nt->getNamespace(); - if ( $ns == NS_USER || $ns == NS_USER_TALK ) { - return; - } - } - - $oldlink = $link; - parent::findVariantLink( $link, $nt, $ignoreOtherCond ); - if ( $this->getPreferredVariant() == $this->mMainLanguageCode ) { - $link = $oldlink; - } - } - - /** - * It translates text into variant, specials: - * - ommiting roman numbers - * - * @param string $text - * @param bool $toVariant - * - * @throws MWException - * @return string - */ - public function translate( $text, $toVariant ) { - $this->loadTables(); - /* From Kazakh interface, maybe we need it later - $breaks = '[^\w\x80-\xff]'; - // regexp for roman numbers - // Lookahead assertion ensures $roman doesn't match the empty string - $roman = '(?=[MDCLXVI])M{0,4}(C[DM]|D?C{0,3})(X[LC]|L?X{0,3})(I[VX]|V?I{0,3})'; - $roman = ''; - - $reg = '/^'.$roman.'$|^'.$roman.$breaks.'|'.$breaks.$roman.'$|'.$breaks.$roman.$breaks.'/'; - - $matches = preg_split($reg, $text, -1, PREG_SPLIT_OFFSET_CAPTURE); - - $m = array_shift($matches); - if( !isset( $this->mTables[$toVariant] ) ) { - throw new MWException( "Broken variant table: " . implode( ',', array_keys( $this->mTables ) ) ); - } - $ret = $this->mTables[$toVariant]->replace( $m[0] ); - $mstart = $m[1]+strlen($m[0]); - foreach($matches as $m) { - $ret .= substr($text, $mstart, $m[1]-$mstart); - $ret .= parent::translate($m[0], $toVariant); - $mstart = $m[1] + strlen($m[0]); - } - - return $ret; - */ - - if ( !isset( $this->mTables[$toVariant] ) ) { - throw new MWException( "Broken variant table: " . implode( ',', array_keys( $this->mTables ) ) ); - } - - return parent::translate( $text, $toVariant ); - } -} diff --git a/languages/classes/LanguageShi.php b/languages/classes/LanguageShi.php deleted file mode 100644 index 6565b51a1f63..000000000000 --- a/languages/classes/LanguageShi.php +++ /dev/null @@ -1,137 +0,0 @@ -<?php -/** - * Shilha specific code. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * http://www.gnu.org/copyleft/gpl.html - * - * @file - * @ingroup Language - */ - -/** - * Conversion script between Latin and Tifinagh for Tachelhit. - * - Tifinagh -> lowercase Latin - * - lowercase/uppercase Latin -> Tifinagh - * - * - * Based on: - * - https://en.wikipedia.org/wiki/Shilha_language - * - LanguageSr.php - * - * @ingroup Language - */ -class ShiConverter extends LanguageConverter { - protected $mDoContentConvert; - - public $mToLatin = [ - 'ⴰ' => 'a', 'ⴱ' => 'b', 'ⴳ' => 'g', 'ⴷ' => 'd', 'ⴹ' => 'ḍ', 'ⴻ' => 'e', - 'ⴼ' => 'f', 'ⴽ' => 'k', 'ⵀ' => 'h', 'ⵃ' => 'ḥ', 'ⵄ' => 'ε', 'ⵅ' => 'x', - 'ⵇ' => 'q', 'ⵉ' => 'i', 'ⵊ' => 'j', 'ⵍ' => 'l', 'ⵎ' => 'm', 'ⵏ' => 'n', - 'ⵓ' => 'u', 'ⵔ' => 'r', 'ⵕ' => 'ṛ', 'ⵙ' => 's', 'ⵚ' => 'ṣ', - 'ⵛ' => 'š', 'ⵜ' => 't', 'ⵟ' => 'ṭ', 'ⵡ' => 'w', 'ⵢ' => 'y', 'ⵣ' => 'z', - 'ⵥ' => 'ẓ', 'ⵯ' => 'ʷ', 'ⵖ' => 'ɣ', 'ⵠ' => 'v', 'ⵒ' => 'p', - ]; - - public $mUpperToLowerCaseLatin = [ - 'A' => 'a', 'B' => 'b', 'C' => 'c', 'D' => 'd', 'E' => 'e', - 'F' => 'f', 'G' => 'g', 'H' => 'h', 'I' => 'i', 'J' => 'j', - 'K' => 'k', 'L' => 'l', 'M' => 'm', 'N' => 'n', 'O' => 'o', - 'P' => 'p', 'Q' => 'q', 'R' => 'r', 'S' => 's', 'T' => 't', - 'U' => 'u', 'V' => 'v', 'W' => 'w', 'X' => 'x', 'Y' => 'y', - 'Z' => 'z', 'Ɣ' => 'ɣ', - ]; - - public $mToTifinagh = [ - 'a' => 'ⴰ', 'b' => 'ⴱ', 'g' => 'ⴳ', 'd' => 'ⴷ', 'ḍ' => 'ⴹ', 'e' => 'ⴻ', - 'f' => 'ⴼ', 'k' => 'ⴽ', 'h' => 'ⵀ', 'ḥ' => 'ⵃ', 'ε' => 'ⵄ', 'x' => 'ⵅ', - 'q' => 'ⵇ', 'i' => 'ⵉ', 'j' => 'ⵊ', 'l' => 'ⵍ', 'm' => 'ⵎ', 'n' => 'ⵏ', - 'u' => 'ⵓ', 'r' => 'ⵔ', 'ṛ' => 'ⵕ', 'γ' => 'ⵖ', 's' => 'ⵙ', 'ṣ' => 'ⵚ', - 'š' => 'ⵛ', 't' => 'ⵜ', 'ṭ' => 'ⵟ', 'w' => 'ⵡ', 'y' => 'ⵢ', 'z' => 'ⵣ', - 'ẓ' => 'ⵥ', 'ʷ' => 'ⵯ', 'ɣ' => 'ⵖ', 'v' => 'ⵠ', 'p' => 'ⵒ', - ]; - - /** - * @param Language $langobj - */ - public function __construct( Language $langobj ) { - $variants = [ 'shi', 'shi-tfng', 'shi-latn' ]; - $variantfallbacks = [ - 'shi' => 'shi-tfng', - 'shi-tfng' => 'shi', - 'shi-latn' => 'shi', - ]; - - $flags = []; - parent::__construct( $langobj, 'shi', $variants, $variantfallbacks, $flags ); - } - - protected function loadDefaultTables() { - $this->mTables = [ - 'lowercase' => new ReplacementArray( $this->mUpperToLowerCaseLatin ), - 'shi-tfng' => new ReplacementArray( $this->mToTifinagh ), - 'shi-latn' => new ReplacementArray( $this->mToLatin ), - 'shi' => new ReplacementArray() - ]; - } - - /** - * A function wrapper: - * - if there is no selected variant, leave the link - * names as they were - * - do not try to find variants for usernames - * - * @param string &$link - * @param Title &$nt - * @param bool $ignoreOtherCond - */ - public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) { - // check for user namespace - if ( is_object( $nt ) ) { - $ns = $nt->getNamespace(); - if ( $ns == NS_USER || $ns == NS_USER_TALK ) { - return; - } - } - - $oldlink = $link; - parent::findVariantLink( $link, $nt, $ignoreOtherCond ); - if ( $this->getPreferredVariant() == $this->mMainLanguageCode ) { - $link = $oldlink; - } - } - - /** - * It translates text into variant - * - * @param string $text - * @param string $toVariant - * - * @return string - */ - public function translate( $text, $toVariant ) { - // If $text is empty or only includes spaces, do nothing - // Otherwise translate it - if ( trim( $text ) ) { - $this->loadTables(); - // To Tifinagh, first translate uppercase to lowercase Latin - if ( $toVariant == 'shi-tfng' ) { - $text = $this->mTables['lowercase']->replace( $text ); - } - $text = $this->mTables[$toVariant]->replace( $text ); - } - return $text; - } -} diff --git a/languages/classes/LanguageSr.php b/languages/classes/LanguageSr.php deleted file mode 100644 index 2672f1f0c63f..000000000000 --- a/languages/classes/LanguageSr.php +++ /dev/null @@ -1,185 +0,0 @@ -<?php -/** - * Serbian (Српски / Srpski) specific code. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * http://www.gnu.org/copyleft/gpl.html - * - * @file - * @ingroup Language - */ - -/** - * There are two levels of conversion for Serbian: the script level - * (Cyrillics <-> Latin), and the variant level (ekavian - * <->iyekavian). The two are orthogonal. So we really only need two - * dictionaries: one for Cyrillics and Latin, and one for ekavian and - * iyekavian. - * - * @ingroup Language - */ -class SrConverter extends LanguageConverter { - public $mToLatin = [ - 'а' => 'a', 'б' => 'b', 'в' => 'v', 'г' => 'g', 'д' => 'd', - 'ђ' => 'đ', 'е' => 'e', 'ж' => 'ž', 'з' => 'z', 'и' => 'i', - 'ј' => 'j', 'к' => 'k', 'л' => 'l', 'љ' => 'lj', 'м' => 'm', - 'н' => 'n', 'њ' => 'nj', 'о' => 'o', 'п' => 'p', 'р' => 'r', - 'с' => 's', 'т' => 't', 'ћ' => 'ć', 'у' => 'u', 'ф' => 'f', - 'х' => 'h', 'ц' => 'c', 'ч' => 'č', 'џ' => 'dž', 'ш' => 'š', - - 'А' => 'A', 'Б' => 'B', 'В' => 'V', 'Г' => 'G', 'Д' => 'D', - 'Ђ' => 'Đ', 'Е' => 'E', 'Ж' => 'Ž', 'З' => 'Z', 'И' => 'I', - 'Ј' => 'J', 'К' => 'K', 'Л' => 'L', 'Љ' => 'Lj', 'М' => 'M', - 'Н' => 'N', 'Њ' => 'Nj', 'О' => 'O', 'П' => 'P', 'Р' => 'R', - 'С' => 'S', 'Т' => 'T', 'Ћ' => 'Ć', 'У' => 'U', 'Ф' => 'F', - 'Х' => 'H', 'Ц' => 'C', 'Ч' => 'Č', 'Џ' => 'Dž', 'Ш' => 'Š', - ]; - - public $mToCyrillics = [ - 'a' => 'а', 'b' => 'б', 'c' => 'ц', 'č' => 'ч', 'ć' => 'ћ', - 'd' => 'д', 'dž' => 'џ', 'đ' => 'ђ', 'e' => 'е', 'f' => 'ф', - 'g' => 'г', 'h' => 'х', 'i' => 'и', 'j' => 'ј', 'k' => 'к', - 'l' => 'л', 'lj' => 'љ', 'm' => 'м', 'n' => 'н', 'nj' => 'њ', - 'o' => 'о', 'p' => 'п', 'r' => 'р', 's' => 'с', 'š' => 'ш', - 't' => 'т', 'u' => 'у', 'v' => 'в', 'z' => 'з', 'ž' => 'ж', - - 'A' => 'А', 'B' => 'Б', 'C' => 'Ц', 'Č' => 'Ч', 'Ć' => 'Ћ', - 'D' => 'Д', 'Dž' => 'Џ', 'Đ' => 'Ђ', 'E' => 'Е', 'F' => 'Ф', - 'G' => 'Г', 'H' => 'Х', 'I' => 'И', 'J' => 'Ј', 'K' => 'К', - 'L' => 'Л', 'LJ' => 'Љ', 'M' => 'М', 'N' => 'Н', 'NJ' => 'Њ', - 'O' => 'О', 'P' => 'П', 'R' => 'Р', 'S' => 'С', 'Š' => 'Ш', - 'T' => 'Т', 'U' => 'У', 'V' => 'В', 'Z' => 'З', 'Ž' => 'Ж', - - 'DŽ' => 'Џ', 'd!ž' => 'дж', 'D!ž' => 'Дж', 'D!Ž' => 'ДЖ', - 'Lj' => 'Љ', 'l!j' => 'лј', 'L!j' => 'Лј', 'L!J' => 'ЛЈ', - 'Nj' => 'Њ', 'n!j' => 'нј', 'N!j' => 'Нј', 'N!J' => 'НЈ' - ]; - - /** - * @param Language $langobj - */ - public function __construct( Language $langobj ) { - $variants = [ 'sr', 'sr-ec', 'sr-el' ]; - $variantfallbacks = [ - 'sr' => 'sr-ec', - 'sr-ec' => 'sr', - 'sr-el' => 'sr', - ]; - - $flags = [ - 'S' => 'S', 'писмо' => 'S', 'pismo' => 'S', - 'W' => 'W', 'реч' => 'W', 'reč' => 'W', 'ријеч' => 'W', 'riječ' => 'W' - ]; - parent::__construct( $langobj, 'sr', $variants, $variantfallbacks, $flags ); - } - - protected function loadDefaultTables() { - $this->mTables = [ - 'sr-ec' => new ReplacementArray( $this->mToCyrillics ), - 'sr-el' => new ReplacementArray( $this->mToLatin ), - 'sr' => new ReplacementArray() - ]; - } - - /** - * A function wrapper: - * - if there is no selected variant, leave the link - * names as they were - * - do not try to find variants for usernames - * - * @param string &$link - * @param Title &$nt - * @param bool $ignoreOtherCond - */ - public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) { - // check for user namespace - if ( is_object( $nt ) ) { - $ns = $nt->getNamespace(); - if ( $ns == NS_USER || $ns == NS_USER_TALK ) { - return; - } - } - - $oldlink = $link; - parent::findVariantLink( $link, $nt, $ignoreOtherCond ); - if ( $this->getPreferredVariant() == $this->mMainLanguageCode ) { - $link = $oldlink; - } - } - - /** - * It translates text into variant, specials: - * - ommiting roman numbers - * - * @param string $text - * @param string $toVariant - * - * @throws MWException - * @return string - */ - public function translate( $text, $toVariant ) { - $breaks = '[^\w\x80-\xff]'; - - // regexp for roman numbers - // Lookahead assertion ensures $roman doesn't match the empty string - $roman = '(?=[MDCLXVI])M{0,4}(C[DM]|D?C{0,3})(X[LC]|L?X{0,3})(I[VX]|V?I{0,3})'; - - $reg = '/^' . $roman . '$|^' . $roman . $breaks . '|' . $breaks - . $roman . '$|' . $breaks . $roman . $breaks . '/'; - - $matches = preg_split( $reg, $text, -1, PREG_SPLIT_OFFSET_CAPTURE ); - - $m = array_shift( $matches ); - $this->loadTables(); - if ( !isset( $this->mTables[$toVariant] ) ) { - throw new MWException( "Broken variant table: " - . implode( ',', array_keys( $this->mTables ) ) ); - } - $ret = $this->mTables[$toVariant]->replace( $m[0] ); - $mstart = $m[1] + strlen( $m[0] ); - foreach ( $matches as $m ) { - $ret .= substr( $text, $mstart, $m[1] - $mstart ); - $ret .= parent::translate( $m[0], $toVariant ); - $mstart = $m[1] + strlen( $m[0] ); - } - - return $ret; - } - - /** - * Guess if a text is written in Cyrillic or Latin. - * Overrides LanguageConverter::guessVariant() - * - * @param string $text The text to be checked - * @param string $variant Language code of the variant to be checked for - * @return bool True if $text appears to be written in $variant - * - * @author Nikola Smolenski <smolensk@eunet.rs> - * @since 1.19 - */ - public function guessVariant( $text, $variant ) { - $numCyrillic = preg_match_all( "/[шђчћжШЂЧЋЖ]/u", $text, $dummy ); - $numLatin = preg_match_all( "/[šđč枊ĐČĆŽ]/u", $text, $dummy ); - - if ( $variant == 'sr-ec' ) { - return $numCyrillic > $numLatin; - } elseif ( $variant == 'sr-el' ) { - return $numLatin > $numCyrillic; - } else { - return false; - } - } - -} diff --git a/languages/classes/LanguageTg.php b/languages/classes/LanguageTg.php deleted file mode 100644 index 2cec7d031fee..000000000000 --- a/languages/classes/LanguageTg.php +++ /dev/null @@ -1,120 +0,0 @@ -<?php -/** - * Tajik (Тоҷикӣ) specific code. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * http://www.gnu.org/copyleft/gpl.html - * - * @file - * @ingroup Language - */ - -/** - * Converts Tajiki to Latin orthography - * - * @ingroup Language - */ -class TgConverter extends LanguageConverter { - private $table = [ - 'а' => 'a', - 'б' => 'b', - 'в' => 'v', - 'г' => 'g', - 'д' => 'd', - 'е' => 'e', - 'ё' => 'jo', - 'ж' => 'ƶ', - 'з' => 'z', - 'ии ' => 'iji ', - 'и' => 'i', - 'й' => 'j', - 'к' => 'k', - 'л' => 'l', - 'м' => 'm', - 'н' => 'n', - 'о' => 'o', - 'п' => 'p', - 'р' => 'r', - 'с' => 's', - 'т' => 't', - 'у' => 'u', - 'ф' => 'f', - 'х' => 'x', - 'ч' => 'c', - 'ш' => 'ş', - 'ъ' => '\'', - 'э' => 'e', - 'ю' => 'ju', - 'я' => 'ja', - 'ғ' => 'ƣ', - 'ӣ' => 'ī', - 'қ' => 'q', - 'ӯ' => 'ū', - 'ҳ' => 'h', - 'ҷ' => 'ç', - 'ц' => 'ts', - 'А' => 'A', - 'Б' => 'B', - 'В' => 'V', - 'Г' => 'G', - 'Д' => 'D', - 'Е' => 'E', - 'Ё' => 'Jo', - 'Ж' => 'Ƶ', - 'З' => 'Z', - 'И' => 'I', - 'Й' => 'J', - 'К' => 'K', - 'Л' => 'L', - 'М' => 'M', - 'Н' => 'N', - 'О' => 'O', - 'П' => 'P', - 'Р' => 'R', - 'С' => 'S', - 'Т' => 'T', - 'У' => 'U', - 'Ф' => 'F', - 'Х' => 'X', - 'Ч' => 'C', - 'Ш' => 'Ş', - 'Ъ' => '\'', - 'Э' => 'E', - 'Ю' => 'Ju', - 'Я' => 'Ja', - 'Ғ' => 'Ƣ', - 'Ӣ' => 'Ī', - 'Қ' => 'Q', - 'Ӯ' => 'Ū', - 'Ҳ' => 'H', - 'Ҷ' => 'Ç', - 'Ц' => 'Ts', - ]; - - /** - * @param Language $langobj - */ - public function __construct( Language $langobj ) { - $variants = [ 'tg', 'tg-latn' ]; - parent::__construct( $langobj, 'tg', $variants ); - } - - protected function loadDefaultTables() { - $this->mTables = [ - 'tg-latn' => new ReplacementArray( $this->table ), - 'tg' => new ReplacementArray() - ]; - } -} diff --git a/languages/classes/LanguageUz.php b/languages/classes/LanguageUz.php deleted file mode 100644 index ae822efd680b..000000000000 --- a/languages/classes/LanguageUz.php +++ /dev/null @@ -1,138 +0,0 @@ -<?php -/** - * Uzbek specific code. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * http://www.gnu.org/copyleft/gpl.html - * - * @file - * @ingroup Language - */ - -/** - * @ingroup Language - */ -class UzConverter extends LanguageConverter { - public $toLatin = [ - 'а' => 'a', 'А' => 'A', - 'б' => 'b', 'Б' => 'B', - 'д' => 'd', 'Д' => 'D', - 'е' => 'e', 'Е' => 'E', - 'э' => 'e', 'Э' => 'E', - 'в' => 'v', 'В' => 'V', - 'х' => 'x', 'Х' => 'X', - 'ғ' => 'gʻ', 'Ғ' => 'Gʻ', - 'г' => 'g', 'Г' => 'G', - 'ҳ' => 'h', 'Ҳ' => 'H', - 'ж' => 'j', 'Ж' => 'J', - 'з' => 'z', 'З' => 'Z', - 'и' => 'i', 'И' => 'I', - 'к' => 'k', 'К' => 'K', - 'л' => 'l', 'Л' => 'L', - 'м' => 'm', 'М' => 'M', - 'н' => 'n', 'Н' => 'N', - 'о' => 'o', 'О' => 'O', - 'п' => 'p', 'П' => 'P', - 'р' => 'r', 'Р' => 'R', - 'с' => 's', 'С' => 'S', - 'т' => 't', 'Т' => 'T', - 'у' => 'u', 'У' => 'U', - 'ф' => 'f', 'Ф' => 'F', - 'ў' => 'oʻ', 'Ў' => 'Oʻ', - // note: at the beginning of a word and right after a consonant, only "s" is used - 'ц' => 'ts', 'Ц' => 'Ts', - 'қ' => 'q', 'Қ' => 'Q', - 'ё' => 'yo', 'Ё' => 'Yo', - 'ю' => 'yu', 'Ю' => 'Yu', - 'ч' => 'ch', 'Ч' => 'Ch', - 'ш' => 'sh', 'Ш' => 'Sh', - 'й' => 'y', 'Й' => 'Y', - 'я' => 'ya', 'Я' => 'Ya', - 'ъ' => 'ʼ', - ]; - - public $toCyrillic = [ - 'a' => 'а', 'A' => 'А', - 'b' => 'б', 'B' => 'Б', - 'd' => 'д', 'D' => 'Д', - // at the beginning of a word and after a vowel, "э" is used instead of "e" - // (see regex below) - 'e' => 'э', 'E' => 'Э', - 'f' => 'ф', 'F' => 'Ф', - 'g' => 'г', 'G' => 'Г', - 'g‘' => 'ғ', 'G‘' => 'Ғ', 'gʻ' => 'ғ', 'Gʻ' => 'Ғ', - 'h' => 'ҳ', 'H' => 'Ҳ', - 'i' => 'и', 'I' => 'И', - 'k' => 'к', 'K' => 'К', - 'l' => 'л', 'L' => 'Л', - 'm' => 'м', 'M' => 'М', - 'n' => 'н', 'N' => 'Н', - 'o' => 'о', 'O' => 'О', - 'p' => 'п', 'P' => 'П', - 'r' => 'р', 'R' => 'Р', - 's' => 'с', 'S' => 'С', - 't' => 'т', 'T' => 'Т', - 'u' => 'у', 'U' => 'У', - 'v' => 'в', 'V' => 'В', - 'x' => 'х', 'X' => 'Х', - 'z' => 'з', 'Z' => 'З', - 'j' => 'ж', 'J' => 'Ж', - 'o‘' => 'ў', 'O‘' => 'Ў', 'oʻ' => 'ў', 'Oʻ' => 'Ў', - 'yo‘' => 'йў', 'Yo‘' => 'Йў', 'yoʻ' => 'йў', 'Yoʻ' => 'Йў', - 'ts' => 'ц', 'Ts' => 'Ц', - 'q' => 'қ', 'Q' => 'Қ', - 'yo' => 'ё', 'Yo' => 'Ё', - 'yu' => 'ю', 'Yu' => 'Ю', - 'ch' => 'ч', 'Ch' => 'Ч', - 'sh' => 'ш', 'Sh' => 'Ш', - 'y' => 'й', 'Y' => 'Й', - 'ya' => 'я', 'Ya' => 'Я', - 'ʼ' => 'ъ', - ]; - - /** - * @param Language $langobj - */ - public function __construct( Language $langobj ) { - $variants = [ 'uz', 'uz-latn', 'uz-cyrl' ]; - $variantfallbacks = [ - 'uz' => 'uz-latn', - 'uz-cyrl' => 'uz', - 'uz-latn' => 'uz', - ]; - parent::__construct( $langobj, 'uz', $variants, $variantfallbacks ); - } - - protected function loadDefaultTables() { - $this->mTables = [ - 'uz-cyrl' => new ReplacementArray( $this->toCyrillic ), - 'uz-latn' => new ReplacementArray( $this->toLatin ), - 'uz' => new ReplacementArray() - ]; - } - - public function translate( $text, $toVariant ) { - if ( $toVariant == 'uz-cyrl' ) { - $text = str_replace( 'ye', 'е', $text ); - $text = str_replace( 'Ye', 'Е', $text ); - $text = str_replace( 'YE', 'Е', $text ); - // "е" after consonants, otherwise "э" (see above) - $text = preg_replace( '/([BVGDJZYKLMNPRSTFXCWQʻ‘H])E/u', '$1Е', $text ); - $text = preg_replace( '/([bvgdjzyklmnprstfxcwqʻ‘h])e/ui', '$1е', $text ); - } - return parent::translate( $text, $toVariant ); - } - -} diff --git a/languages/classes/LanguageZh.php b/languages/classes/LanguageZh.php index 9adcb582eb31..9de4965320ed 100644 --- a/languages/classes/LanguageZh.php +++ b/languages/classes/LanguageZh.php @@ -22,109 +22,6 @@ */ /** - * @ingroup Language - */ -class ZhConverter extends LanguageConverter { - /** - * @param Language $langobj - */ - public function __construct( Language $langobj ) { - $this->mDescCodeSep = ':'; - $this->mDescVarSep = ';'; - - $variants = [ - 'zh', - 'zh-hans', - 'zh-hant', - 'zh-cn', - 'zh-hk', - 'zh-mo', - 'zh-my', - 'zh-sg', - 'zh-tw' - ]; - - $variantfallbacks = [ - 'zh' => [ 'zh-hans', 'zh-hant', 'zh-cn', 'zh-tw', 'zh-hk', 'zh-sg', 'zh-mo', 'zh-my' ], - 'zh-hans' => [ 'zh-cn', 'zh-sg', 'zh-my' ], - 'zh-hant' => [ 'zh-tw', 'zh-hk', 'zh-mo' ], - 'zh-cn' => [ 'zh-hans', 'zh-sg', 'zh-my' ], - 'zh-sg' => [ 'zh-hans', 'zh-cn', 'zh-my' ], - 'zh-my' => [ 'zh-hans', 'zh-sg', 'zh-cn' ], - 'zh-tw' => [ 'zh-hant', 'zh-hk', 'zh-mo' ], - 'zh-hk' => [ 'zh-hant', 'zh-mo', 'zh-tw' ], - 'zh-mo' => [ 'zh-hant', 'zh-hk', 'zh-tw' ], - ]; - $ml = [ - 'zh' => 'disable', - 'zh-hans' => 'unidirectional', - 'zh-hant' => 'unidirectional', - ]; - - parent::__construct( $langobj, 'zh', - $variants, - $variantfallbacks, - [], - $ml ); - $names = [ - 'zh' => '原文', - 'zh-hans' => '简体', - 'zh-hant' => '繁體', - 'zh-cn' => '大陆', - 'zh-tw' => '台灣', - 'zh-hk' => '香港', - 'zh-mo' => '澳門', - 'zh-sg' => '新加坡', - 'zh-my' => '大马', - ]; - $this->mVariantNames = array_merge( $this->mVariantNames, $names ); - } - - protected function loadDefaultTables() { - $this->mTables = [ - 'zh-hans' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2Hans ), - 'zh-hant' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2Hant ), - 'zh-cn' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2CN ), - 'zh-hk' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2HK ), - 'zh-mo' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2HK ), - 'zh-my' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2CN ), - 'zh-sg' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2CN ), - 'zh-tw' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2TW ), - 'zh' => new ReplacementArray - ]; - } - - protected function postLoadTables() { - $this->mTables['zh-cn']->setArray( - $this->mTables['zh-cn']->getArray() + $this->mTables['zh-hans']->getArray() - ); - $this->mTables['zh-hk']->setArray( - $this->mTables['zh-hk']->getArray() + $this->mTables['zh-hant']->getArray() - ); - $this->mTables['zh-mo']->setArray( - $this->mTables['zh-mo']->getArray() + $this->mTables['zh-hant']->getArray() - ); - $this->mTables['zh-my']->setArray( - $this->mTables['zh-my']->getArray() + $this->mTables['zh-hans']->getArray() - ); - $this->mTables['zh-sg']->setArray( - $this->mTables['zh-sg']->getArray() + $this->mTables['zh-hans']->getArray() - ); - $this->mTables['zh-tw']->setArray( - $this->mTables['zh-tw']->getArray() + $this->mTables['zh-hant']->getArray() - ); - } - - /** - * @param string $key - * @return string - */ - public function convertCategoryKey( $key ) { - return $this->autoConvert( $key, 'zh' ); - } -} - -/** * class that handles both Traditional and Simplified Chinese * right now it only distinguish zh_hans, zh_hant, zh_cn, zh_tw, zh_sg and zh_hk. * @@ -174,7 +71,7 @@ class LanguageZh extends LanguageZh_hans { public function convertForSearchResult( $termsArray ) { $terms = implode( '|', $termsArray ); $terms = self::convertDoubleWidth( $terms ); - $terms = implode( '|', $this->mConverter->autoConvertToAllVariants( $terms ) ); + $terms = implode( '|', $this->getConverter()->autoConvertToAllVariants( $terms ) ); $ret = array_unique( explode( '|', $terms ) ); return $ret; } |