aboutsummaryrefslogtreecommitdiffstats
path: root/languages/classes
diff options
context:
space:
mode:
Diffstat (limited to 'languages/classes')
-rw-r--r--languages/classes/LanguageCrh.php284
-rw-r--r--languages/classes/LanguageEn.php56
-rw-r--r--languages/classes/LanguageGan.php54
-rw-r--r--languages/classes/LanguageIu.php161
-rw-r--r--languages/classes/LanguageKk.php338
-rw-r--r--languages/classes/LanguageKu.php238
-rw-r--r--languages/classes/LanguageShi.php137
-rw-r--r--languages/classes/LanguageSr.php185
-rw-r--r--languages/classes/LanguageTg.php120
-rw-r--r--languages/classes/LanguageUz.php138
-rw-r--r--languages/classes/LanguageZh.php105
11 files changed, 1 insertions, 1815 deletions
diff --git a/languages/classes/LanguageCrh.php b/languages/classes/LanguageCrh.php
deleted file mode 100644
index 0f1050559ba6..000000000000
--- a/languages/classes/LanguageCrh.php
+++ /dev/null
@@ -1,284 +0,0 @@
-<?php
-/**
- * Crimean Tatar (Qırımtatarca) specific code.
- *
- * Adapted from https://crh.wikipedia.org/wiki/Qullan%C4%B1c%C4%B1:Don_Alessandro/Translit
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- * @ingroup Language
- */
-
-/**
- * Crimean Tatar (Qırımtatarca) converter routines
- *
- * @ingroup Language
- */
-class CrhConverter extends LanguageConverter {
- // Defines working character ranges
-
- // Cyrillic
- const C_UC = 'АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ'; # Crimean Tatar Cyrillic uppercase
- const C_LC = 'абвгдеёжзийклмнопрстуфхцчшщъыьэюя'; # Crimean Tatar Cyrillic lowercase
- const C_CONS_UC = 'БВГДЖЗЙКЛМНПРСТФХЦЧШЩCÑ'; # Crimean Tatar Cyrillic + CÑ uppercase consonants
- const C_CONS_LC = 'бвгджзйклмнпрстфхцчшщcñ'; # Crimean Tatar Cyrillic + CÑ lowercase consonants
- const C_M_CONS = 'бгкмшcБГКМШC'; # Crimean Tatar Cyrillic M-type consonants
-
- // Crimean Tatar Cyrillic + CÑ consonants
- const C_CONS = 'бвгджзйклмнпрстфхцчшщcñБВГДЖЗЙКЛМНПРСТФХЦЧШЩCÑ';
-
- // Latin
- const L_UC = 'AÂBCÇDEFGĞHIİJKLMNÑOÖPQRSŞTUÜVYZ'; # Crimean Tatar Latin uppercase
- const L_LC = 'aâbcçdefgğhıijklmnñoöpqrsştuüvyz'; # Crimean Tatar Latin lowercase
- const L_N_CONS_UC = 'ÇNRSTZ'; # Crimean Tatar Latin N-type upper case consonants
- const L_N_CONS_LC = 'çnrstz'; # Crimean Tatar Latin N-type lower case consonants
- const L_N_CONS = 'çnrstzÇNRSTZ'; # Crimean Tatar Latin N-type consonants
- const L_M_CONS = 'bcgkmpşBCGKMPŞ'; # Crimean Tatar Latin M-type consonants
- const L_CONS_UC = 'BCÇDFGĞHJKLMNÑPQRSŞTVZ'; # Crimean Tatar Latin uppercase consonants
- const L_CONS_LC = 'bcçdfgğhjklmnñpqrsştvz'; # Crimean Tatar Latin lowercase consonants
- const L_CONS = 'bcçdfgğhjklmnñpqrsştvzBCÇDFGĞHJKLMNÑPQRSŞTVZ'; # Crimean Tatar Latin consonants
- const L_VOW_UC = 'AÂEIİOÖUÜ'; # Crimean Tatar Latin uppercase vowels
- const L_VOW = 'aâeıioöuüAÂEIİOÖUÜ'; # Crimean Tatar Latin vowels
- const L_F_UC = 'EİÖÜ'; # Crimean Tatar Latin uppercase front vowels
- const L_F = 'eiöüEİÖÜ'; # Crimean Tatar Latin front vowels
-
- /**
- * @param Language $langobj
- */
- public function __construct( Language $langobj ) {
- $variants = [ 'crh', 'crh-cyrl', 'crh-latn' ];
- $variantfallbacks = [
- 'crh' => 'crh-latn',
- 'crh-cyrl' => 'crh-latn',
- 'crh-latn' => 'crh-cyrl',
- ];
-
- parent::__construct( $langobj, 'crh',
- $variants, $variantfallbacks, [] );
-
- // No point delaying this since they're in code.
- // Waiting until loadDefaultTables() means they never get loaded
- // when the tables themselves are loaded from cache.
- $this->loadExceptions();
- }
-
- public $mCyrillicToLatin = [
-
- ## these are independent of location in the word, but have
- ## to go first so other transforms don't bleed them
- 'гъ' => 'ğ', 'Гъ' => 'Ğ', 'ГЪ' => 'Ğ',
- 'къ' => 'q', 'Къ' => 'Q', 'КЪ' => 'Q',
- 'нъ' => 'ñ', 'Нъ' => 'Ñ', 'НЪ' => 'Ñ',
- 'дж' => 'c', 'Дж' => 'C', 'ДЖ' => 'C',
-
- 'А' => 'A', 'а' => 'a', 'Б' => 'B', 'б' => 'b',
- 'В' => 'V', 'в' => 'v', 'Г' => 'G', 'г' => 'g',
- 'Д' => 'D', 'д' => 'd', 'Ж' => 'J', 'ж' => 'j',
- 'З' => 'Z', 'з' => 'z', 'И' => 'İ', 'и' => 'i',
- 'Й' => 'Y', 'й' => 'y', 'К' => 'K', 'к' => 'k',
- 'Л' => 'L', 'л' => 'l', 'М' => 'M', 'м' => 'm',
- 'Н' => 'N', 'н' => 'n', 'П' => 'P', 'п' => 'p',
- 'Р' => 'R', 'р' => 'r', 'С' => 'S', 'с' => 's',
- 'Т' => 'T', 'т' => 't', 'Ф' => 'F', 'ф' => 'f',
- 'Х' => 'H', 'х' => 'h', 'Ч' => 'Ç', 'ч' => 'ç',
- 'Ш' => 'Ş', 'ш' => 'ş', 'Ы' => 'I', 'ы' => 'ı',
- 'Э' => 'E', 'э' => 'e', 'Е' => 'E', 'е' => 'e',
- 'Я' => 'Â', 'я' => 'â', 'У' => 'U', 'у' => 'u',
- 'О' => 'O', 'о' => 'o',
-
- 'Ё' => 'Yo', 'ё' => 'yo', 'Ю' => 'Yu', 'ю' => 'yu',
- 'Ц' => 'Ts', 'ц' => 'ts', 'Щ' => 'Şç', 'щ' => 'şç',
- 'Ь' => '', 'ь' => '', 'Ъ' => '', 'ъ' => '',
-
- ];
-
- public $mLatinToCyrillic = [
- 'Â' => 'Я', 'â' => 'я', 'B' => 'Б', 'b' => 'б',
- 'Ç' => 'Ч', 'ç' => 'ч', 'D' => 'Д', 'd' => 'д',
- 'F' => 'Ф', 'f' => 'ф', 'G' => 'Г', 'g' => 'г',
- 'H' => 'Х', 'h' => 'х', 'I' => 'Ы', 'ı' => 'ы',
- 'İ' => 'И', 'i' => 'и', 'J' => 'Ж', 'j' => 'ж',
- 'K' => 'К', 'k' => 'к', 'L' => 'Л', 'l' => 'л',
- 'M' => 'М', 'm' => 'м', 'N' => 'Н', 'n' => 'н',
- 'O' => 'О', 'o' => 'о', 'P' => 'П', 'p' => 'п',
- 'R' => 'Р', 'r' => 'р', 'S' => 'С', 's' => 'с',
- 'Ş' => 'Ш', 'ş' => 'ш', 'T' => 'Т', 't' => 'т',
- 'V' => 'В', 'v' => 'в', 'Z' => 'З', 'z' => 'з',
-
- 'ya' => 'я', 'Ya' => 'Я', 'YA' => 'Я',
- 'ye' => 'е', 'YE' => 'Е', 'Ye' => 'Е',
-
- // hack, hack, hack
- 'A' => 'А', 'a' => 'а', 'E' => 'Е', 'e' => 'е',
- 'Ö' => 'Ё', 'ö' => 'ё', 'U' => 'У', 'u' => 'у',
- 'Ü' => 'Ю', 'ü' => 'ю', 'Y' => 'Й', 'y' => 'й',
- 'C' => 'Дж', 'c' => 'дж', 'Ğ' => 'Гъ', 'ğ' => 'гъ',
- 'Ñ' => 'Нъ', 'ñ' => 'нъ', 'Q' => 'Къ', 'q' => 'къ',
-
- ];
-
- public $mCyrl2LatnExceptions = [];
- public $mLatn2CyrlExceptions = [];
-
- public $mCyrl2LatnPatterns = [];
- public $mLatn2CyrlPatterns = [];
-
- public $mCyrlCleanUpRegexes = [];
-
- public $mExceptionsLoaded = false;
-
- protected function loadDefaultTables() {
- $this->mTables = [
- 'crh-latn' => new ReplacementArray( $this->mCyrillicToLatin ),
- 'crh-cyrl' => new ReplacementArray( $this->mLatinToCyrillic ),
- 'crh' => new ReplacementArray()
- ];
- }
-
- private function loadExceptions() {
- if ( $this->mExceptionsLoaded ) {
- return;
- }
-
- $this->mExceptionsLoaded = true;
- $crhExceptions = new MediaWiki\Languages\Data\CrhExceptions();
- list( $this->mCyrl2LatnExceptions, $this->mLatn2CyrlExceptions,
- $this->mCyrl2LatnPatterns, $this->mLatn2CyrlPatterns, $this->mCyrlCleanUpRegexes ) =
- $crhExceptions->loadExceptions( self::L_LC . self::C_LC, self::L_UC . self::C_UC );
- }
-
- /**
- * A function wrapper:
- * - if there is no selected variant, leave the link
- * names as they were
- * - do not try to find variants for usernames
- *
- * @param string &$link
- * @param Title &$nt
- * @param bool $ignoreOtherCond
- */
- public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
- // check for user namespace
- if ( is_object( $nt ) ) {
- $ns = $nt->getNamespace();
- if ( $ns == NS_USER || $ns == NS_USER_TALK ) {
- return;
- }
- }
-
- $oldlink = $link;
- parent::findVariantLink( $link, $nt, $ignoreOtherCond );
- if ( $this->getPreferredVariant() == $this->mMainLanguageCode ) {
- $link = $oldlink;
- }
- }
-
- /**
- * It translates text into variant, specials:
- * - omitting roman numbers
- *
- * @param string $text
- * @param bool $toVariant
- *
- * @throws MWException
- * @return string
- */
- public function translate( $text, $toVariant ) {
- switch ( $toVariant ) {
- case 'crh-cyrl':
- case 'crh-latn':
- break;
- default:
- return $text;
- }
-
- if ( !$this->mTablesLoaded ) {
- $this->loadTables();
- }
-
- if ( !isset( $this->mTables[$toVariant] ) ) {
- throw new MWException( "Broken variant table: " . implode( ',', array_keys( $this->mTables ) ) );
- }
-
- switch ( $toVariant ) {
- case 'crh-cyrl':
- /* Check for roman numbers like VII, XIX...
- * Only need to split on Roman numerals when converting to Cyrillic
- * Lookahead assertion ensures $roman doesn't match the empty string, and
- * non-period after first "Roman" character allows initials to be converted
- */
- $roman = '(?=[MDCLXVI]([^.]|$))M{0,4}(C[DM]|D?C{0,3})(X[LC]|L?X{0,3})(I[VX]|V?I{0,3})';
-
- $breaks = '([^\w\x80-\xff])';
-
- // allow for multiple Roman numerals in a row; rare but it happens
- $romanRegex = '/^' . $roman . '$|^(' . $roman . $breaks . ')+|(' . $breaks . $roman . ')+$|' .
- $breaks . '(' . $roman . $breaks . ')+/';
-
- $matches = preg_split( $romanRegex, $text, -1, PREG_SPLIT_OFFSET_CAPTURE );
- $mstart = 0;
- $ret = '';
- foreach ( $matches as $m ) {
- // copy over Roman numerals
- $ret .= substr( $text, $mstart, $m[1] - $mstart );
-
- // process everything else
- if ( $m[0] !== '' ) {
- $ret .= $this->regsConverter( $m[0], $toVariant );
- }
-
- $mstart = $m[1] + strlen( $m[0] );
- }
-
- return $ret;
- default:
- // Just process the whole string in one go
- return $this->regsConverter( $text, $toVariant );
- }
- }
-
- private function regsConverter( $text, $toVariant ) {
- if ( $text == '' ) return $text;
-
- $pat = [];
- $rep = [];
- switch ( $toVariant ) {
- case 'crh-latn':
- $text = strtr( $text, $this->mCyrl2LatnExceptions );
- foreach ( $this->mCyrl2LatnPatterns as $pat => $rep ) {
- $text = preg_replace( $pat, $rep, $text );
- }
- $text = parent::translate( $text, $toVariant );
- $text = strtr( $text, [ '«' => '"', '»' => '"', ] );
- return $text;
- case 'crh-cyrl':
- $text = strtr( $text, $this->mLatn2CyrlExceptions );
- foreach ( $this->mLatn2CyrlPatterns as $pat => $rep ) {
- $text = preg_replace( $pat, $rep, $text );
- }
- $text = parent::translate( $text, $toVariant );
- $text = strtr( $text, [ '“' => '«', '”' => '»', ] );
- foreach ( $this->mCyrlCleanUpRegexes as $pat => $rep ) {
- $text = preg_replace( $pat, $rep, $text );
- }
- return $text;
- default:
- return $text;
- }
- }
-
-}
diff --git a/languages/classes/LanguageEn.php b/languages/classes/LanguageEn.php
index 0ea06f58be89..7f1e2cf2c64b 100644
--- a/languages/classes/LanguageEn.php
+++ b/languages/classes/LanguageEn.php
@@ -21,62 +21,6 @@
*/
/**
- * @ingroup Language
- */
-class EnConverter extends LanguageConverter {
-
- /**
- * @param Language $langobj
- */
- public function __construct( Language $langobj ) {
- parent::__construct( $langobj, 'en', [ 'en', 'en-x-piglatin' ] );
- }
-
- /**
- * Dummy methods required by base class.
- */
- protected function loadDefaultTables() {
- $this->mTables = [
- 'en' => new ReplacementArray(),
- 'en-x-piglatin' => new ReplacementArray(),
- ];
- }
-
- /**
- * Translates text into Pig Latin. This allows developers to test the language variants
- * functionality and user interface without having to switch wiki language away from default.
- *
- * @param string $text
- * @param string $toVariant
- * @return string
- */
- public function translate( $text, $toVariant ) {
- if ( $toVariant !== 'en-x-piglatin' ) {
- return $text;
- }
-
- // Only process words composed of standard English alphabet, leave the rest unchanged.
- // This skips some English words like 'naïve' or 'résumé', but we can live with that.
- // Ignore single letters and words which aren't lowercase or uppercase-first.
- return preg_replace_callback( '/[A-Za-z][a-z\']+/', function ( $matches ) {
- $word = $matches[0];
- if ( preg_match( '/^[aeiou]/i', $word ) ) {
- return $word . 'way';
- }
-
- return preg_replace_callback( '/^(s?qu|[^aeiou][^aeiouy]*)(.*)$/i', function ( $m ) {
- $ucfirst = strtoupper( $m[1][0] ) === $m[1][0];
- if ( $ucfirst ) {
- return ucfirst( $m[2] ) . lcfirst( $m[1] ) . 'ay';
- }
-
- return $m[2] . $m[1] . 'ay';
- }, $word );
- }, $text );
- }
-}
-
-/**
* English
*
* @ingroup Language
diff --git a/languages/classes/LanguageGan.php b/languages/classes/LanguageGan.php
index 42d36c39759f..567e379a5633 100644
--- a/languages/classes/LanguageGan.php
+++ b/languages/classes/LanguageGan.php
@@ -21,59 +21,6 @@
*/
/**
- * @ingroup Language
- */
-class GanConverter extends LanguageConverter {
- /**
- * @param Language $langobj
- */
- public function __construct( Language $langobj ) {
- $this->mDescCodeSep = ':';
- $this->mDescVarSep = ';';
-
- $variants = [ 'gan', 'gan-hans', 'gan-hant' ];
- $variantfallbacks = [
- 'gan' => [ 'gan-hans', 'gan-hant' ],
- 'gan-hans' => [ 'gan' ],
- 'gan-hant' => [ 'gan' ],
- ];
- $ml = [
- 'gan' => 'disable',
- ];
-
- parent::__construct( $langobj, 'gan',
- $variants,
- $variantfallbacks,
- [],
- $ml
- );
-
- $names = [
- 'gan' => '原文',
- 'gan-hans' => '简体',
- 'gan-hant' => '繁體',
- ];
- $this->mVariantNames = array_merge( $this->mVariantNames, $names );
- }
-
- protected function loadDefaultTables() {
- $this->mTables = [
- 'gan-hans' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2Hans ),
- 'gan-hant' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2Hant ),
- 'gan' => new ReplacementArray
- ];
- }
-
- /**
- * @param string $key
- * @return string
- */
- public function convertCategoryKey( $key ) {
- return $this->autoConvert( $key, 'gan' );
- }
-}
-
-/**
* Gan Chinese
*
* class that handles both Traditional and Simplified Chinese
@@ -93,5 +40,4 @@ class LanguageGan extends LanguageZh {
// LanguageZh::normalizeForSearch
return parent::normalizeForSearch( $string, $autoVariant );
}
-
}
diff --git a/languages/classes/LanguageIu.php b/languages/classes/LanguageIu.php
deleted file mode 100644
index 209c145ebf3a..000000000000
--- a/languages/classes/LanguageIu.php
+++ /dev/null
@@ -1,161 +0,0 @@
-<?php
-/**
- * Inuktitut specific code.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- * @ingroup Language
- */
-
-/**
- * Conversion script between Latin and Syllabics for Inuktitut.
- * - Syllabics -> lowercase Latin
- * - lowercase/uppercase Latin -> Syllabics
- *
- *
- * Based on:
- * - https://commons.wikimedia.org/wiki/Image:Inuktitut.png
- * - LanguageSr.php
- *
- * @ingroup Language
- */
-class IuConverter extends LanguageConverter {
- protected $mDoContentConvert;
-
- public $mToLatin = [
- 'ᐦ' => 'h', 'ᐃ' => 'i', 'ᐄ' => 'ii', 'ᐅ' => 'u', 'ᐆ' => 'uu', 'ᐊ' => 'a', 'ᐋ' => 'aa',
- 'ᑉ' => 'p', 'ᐱ' => 'pi', 'ᐲ' => 'pii', 'ᐳ' => 'pu', 'ᐴ' => 'puu', 'ᐸ' => 'pa', 'ᐹ' => 'paa',
- 'ᑦ' => 't', 'ᑎ' => 'ti', 'ᑏ' => 'tii', 'ᑐ' => 'tu', 'ᑑ' => 'tuu', 'ᑕ' => 'ta', 'ᑖ' => 'taa',
- 'ᒃ' => 'k', 'ᑭ' => 'ki', 'ᑮ' => 'kii', 'ᑯ' => 'ku', 'ᑰ' => 'kuu', 'ᑲ' => 'ka', 'ᑳ' => 'kaa',
- 'ᖅᒃ' => 'qq', 'ᖅᑭ' => 'qqi', 'ᖅᑮ' => 'qqii', 'ᖅᑯ' => 'qqu', 'ᖅᑰ' => 'ᖅqquu', 'ᖅᑲ' => 'qqa',
- 'ᖅᑳ' => 'qqaa', 'ᒡ' => 'g', 'ᒋ' => 'gi', 'ᒌ' => 'gii', 'ᒍ' => 'gu', 'ᒎ' => 'guu',
- 'ᒐ' => 'ga', 'ᒑ' => 'gaa', 'ᒻ' => 'm', 'ᒥ' => 'mi', 'ᒦ' => 'mii', 'ᒧ' => 'mu', 'ᒨ' => 'muu',
- 'ᒪ' => 'ma', 'ᒫ' => 'maa', 'ᓐ' => 'n', 'ᓂ' => 'ni', 'ᓃ' => 'nii', 'ᓄ' => 'nu', 'ᓅ' => 'nuu',
- 'ᓇ' => 'na', 'ᓈ' => 'naa', 'ᔅ' => 's', 'ᓯ' => 'si', 'ᓰ' => 'sii', 'ᓱ' => 'su', 'ᓲ' => 'suu',
- 'ᓴ' => 'sa', 'ᓵ' => 'saa', 'ᓪ' => 'l', 'ᓕ' => 'li', 'ᓖ' => 'lii', 'ᓗ' => 'lu', 'ᓘ' => 'luu',
- 'ᓚ' => 'la', 'ᓛ' => 'laa', 'ᔾ' => 'j', 'ᔨ' => 'ji', 'ᔩ' => 'jii', 'ᔪ' => 'ju', 'ᔫ' => 'juu',
- 'ᔭ' => 'ja', 'ᔮ' => 'jaa', 'ᕝ' => 'v', 'ᕕ' => 'vi', 'ᕖ' => 'vii', 'ᕗ' => 'vu', 'ᕘ' => 'vuu',
- 'ᕙ' => 'va', 'ᕚ' => 'vaa', 'ᕐ' => 'r', 'ᕆ' => 'ri', 'ᕇ' => 'rii', 'ᕈ' => 'ru', 'ᕉ' => 'ruu',
- 'ᕋ' => 'ra', 'ᕌ' => 'raa', 'ᖅ' => 'q', 'ᕿ' => 'qi', 'ᖀ' => 'qii', 'ᖁ' => 'qu', 'ᖂ' => 'quu',
- 'ᖃ' => 'qa', 'ᖄ' => 'qaa', 'ᖕ' => 'ng', 'ᖏ' => 'ngi', 'ᖐ' => 'ngii', 'ᖑ' => 'ngu',
- 'ᖒ' => 'nguu', 'ᖓ' => 'nga', 'ᖔ' => 'ngaa', 'ᖖ' => 'nng', 'ᙱ' => 'nngi', 'ᙲ' => 'nngii',
- 'ᙳ' => 'nngu', 'ᙴ' => 'nnguu', 'ᙵ' => 'nnga', 'ᙶ' => 'nngaa', 'ᖦ' => 'ɫ', 'ᖠ' => 'ɫi',
- 'ᖡ' => 'ɫii', 'ᖢ' => 'ɫu', 'ᖣ' => 'ɫuu', 'ᖤ' => 'ɫa', 'ᖥ' => 'ɫaa',
- ];
-
- public $mUpperToLowerCaseLatin = [
- 'A' => 'a', 'B' => 'b', 'C' => 'c', 'D' => 'd', 'E' => 'e',
- 'F' => 'f', 'G' => 'g', 'H' => 'h', 'I' => 'i', 'J' => 'j',
- 'K' => 'k', 'L' => 'l', 'M' => 'm', 'N' => 'n', 'O' => 'o',
- 'P' => 'p', 'Q' => 'q', 'R' => 'r', 'S' => 's', 'T' => 't',
- 'U' => 'u', 'V' => 'v', 'W' => 'w', 'X' => 'x', 'Y' => 'y',
- 'Z' => 'z',
- ];
-
- public $mToSyllabics = [
- 'h' => 'ᐦ', 'i' => 'ᐃ', 'ii' => 'ᐄ', 'u' => 'ᐅ', 'uu' => 'ᐆ', 'a' => 'ᐊ', 'aa' => 'ᐋ',
- 'p' => 'ᑉ', 'pi' => 'ᐱ', 'pii' => 'ᐲ', 'pu' => 'ᐳ', 'puu' => 'ᐴ', 'pa' => 'ᐸ', 'paa' => 'ᐹ',
- 't' => 'ᑦ', 'ti' => 'ᑎ', 'tii' => 'ᑏ', 'tu' => 'ᑐ', 'tuu' => 'ᑑ', 'ta' => 'ᑕ', 'taa' => 'ᑖ',
- 'k' => 'ᒃ', 'ki' => 'ᑭ', 'kii' => 'ᑮ', 'ku' => 'ᑯ', 'kuu' => 'ᑰ', 'ka' => 'ᑲ', 'kaa' => 'ᑳ',
- 'g' => 'ᒡ', 'gi' => 'ᒋ', 'gii' => 'ᒌ', 'gu' => 'ᒍ', 'guu' => 'ᒎ', 'ga' => 'ᒐ', 'gaa' => 'ᒑ',
- 'm' => 'ᒻ', 'mi' => 'ᒥ', 'mii' => 'ᒦ', 'mu' => 'ᒧ', 'muu' => 'ᒨ', 'ma' => 'ᒪ', 'maa' => 'ᒫ',
- 'n' => 'ᓐ', 'ni' => 'ᓂ', 'nii' => 'ᓃ', 'nu' => 'ᓄ', 'nuu' => 'ᓅ', 'na' => 'ᓇ', 'naa' => 'ᓈ',
- 's' => 'ᔅ', 'si' => 'ᓯ', 'sii' => 'ᓰ', 'su' => 'ᓱ', 'suu' => 'ᓲ', 'sa' => 'ᓴ', 'saa' => 'ᓵ',
- 'l' => 'ᓪ', 'li' => 'ᓕ', 'lii' => 'ᓖ', 'lu' => 'ᓗ', 'luu' => 'ᓘ', 'la' => 'ᓚ', 'laa' => 'ᓛ',
- 'j' => 'ᔾ', 'ji' => 'ᔨ', 'jii' => 'ᔩ', 'ju' => 'ᔪ', 'juu' => 'ᔫ', 'ja' => 'ᔭ', 'jaa' => 'ᔮ',
- 'v' => 'ᕝ', 'vi' => 'ᕕ', 'vii' => 'ᕖ', 'vu' => 'ᕗ', 'vuu' => 'ᕘ', 'va' => 'ᕙ', 'vaa' => 'ᕚ',
- 'r' => 'ᕐ', 'ri' => 'ᕆ', 'rii' => 'ᕇ', 'ru' => 'ᕈ', 'ruu' => 'ᕉ', 'ra' => 'ᕋ', 'raa' => 'ᕌ',
- 'qq' => 'ᖅᒃ', 'qqi' => 'ᖅᑭ', 'qqii' => 'ᖅᑮ', 'qqu' => 'ᖅᑯ', 'qquu' => 'ᖅᑰ', 'qqa' => 'ᖅᑲ',
- 'qqaa' => 'ᖅᑳ', 'q' => 'ᖅ', 'qi' => 'ᕿ', 'qii' => 'ᖀ', 'qu' => 'ᖁ', 'quu' => 'ᖂ',
- 'qa' => 'ᖃ', 'qaa' => 'ᖄ', 'ng' => 'ᖕ', 'ngi' => 'ᖏ', 'ngii' => 'ᖐ', 'ngu' => 'ᖑ',
- 'nguu' => 'ᖒ', 'nga' => 'ᖓ', 'ngaa' => 'ᖔ', 'nng' => 'ᖖ', 'nngi' => 'ᙱ', 'nngii' => 'ᙲ',
- 'nngu' => 'ᙳ', 'nnguu' => 'ᙴ', 'nnga' => 'ᙵ', 'nngaa' => 'ᙶ', 'ɫ' => 'ᖦ', 'ɫi' => 'ᖠ',
- 'ɫii' => 'ᖡ', 'ɫu' => 'ᖢ', 'ɫuu' => 'ᖣ', 'ɫa' => 'ᖤ', 'ɫaa' => 'ᖥ',
- ];
-
- /**
- * @param Language $langobj
- */
- public function __construct( Language $langobj ) {
- $variants = [ 'iu', 'ike-cans', 'ike-latn' ];
- $variantfallbacks = [
- 'iu' => 'ike-cans',
- 'ike-cans' => 'iu',
- 'ike-latn' => 'iu',
- ];
- $flags = [];
-
- parent::__construct( $langobj, 'iu', $variants, $variantfallbacks, $flags );
- }
-
- protected function loadDefaultTables() {
- $this->mTables = [
- 'lowercase' => new ReplacementArray( $this->mUpperToLowerCaseLatin ),
- 'ike-cans' => new ReplacementArray( $this->mToSyllabics ),
- 'ike-latn' => new ReplacementArray( $this->mToLatin ),
- 'iu' => new ReplacementArray()
- ];
- }
-
- /**
- * A function wrapper:
- * - if there is no selected variant, leave the link
- * names as they were
- * - do not try to find variants for usernames
- *
- * @param string &$link
- * @param Title &$nt
- * @param bool $ignoreOtherCond
- */
- public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
- // check for user namespace
- if ( is_object( $nt ) ) {
- $ns = $nt->getNamespace();
- if ( $ns == NS_USER || $ns == NS_USER_TALK ) {
- return;
- }
- }
-
- $oldlink = $link;
- parent::findVariantLink( $link, $nt, $ignoreOtherCond );
- if ( $this->getPreferredVariant() == $this->mMainLanguageCode ) {
- $link = $oldlink;
- }
- }
-
- /**
- * It translates text into variant
- *
- * @param string $text
- * @param bool $toVariant
- *
- * @return string
- */
- public function translate( $text, $toVariant ) {
- // If $text is empty or only includes spaces, do nothing
- // Otherwise translate it
- if ( trim( $text ) ) {
- $this->loadTables();
- // To syllabics, first translate uppercase to lowercase Latin
- if ( $toVariant == 'ike-cans' ) {
- $text = $this->mTables['lowercase']->replace( $text );
- }
- $text = $this->mTables[$toVariant]->replace( $text );
- }
- return $text;
- }
-}
diff --git a/languages/classes/LanguageKk.php b/languages/classes/LanguageKk.php
index d2467ef3a111..44149c3327f6 100644
--- a/languages/classes/LanguageKk.php
+++ b/languages/classes/LanguageKk.php
@@ -21,344 +21,6 @@
* @ingroup Language
*/
-define( 'KK_C_UC', 'АӘБВГҒДЕЁЖЗИЙКҚЛМНҢОӨПРСТУҰҮФХҺЦЧШЩЪЫІЬЭЮЯ' ); # Kazakh Cyrillic uppercase
-define( 'KK_C_LC', 'аәбвгғдеёжзийкқлмнңоөпрстуұүфхһцчшщъыіьэюя' ); # Kazakh Cyrillic lowercase
-define( 'KK_L_UC', 'AÄBCÇDEÉFGĞHIİÏJKLMNÑOÖPQRSŞTUÜVWXYÝZ' ); # Kazakh Latin uppercase
-define( 'KK_L_LC', 'aäbcçdeéfgğhıiïjklmnñoöpqrsştuüvwxyýz' ); # Kazakh Latin lowercase
-// define( 'KK_A', 'ٴابپتجحدرزسشعفقكلمنڭەوۇۋۆىيچھ' ); # Kazakh Arabic
-define( 'H_HAMZA', 'ٴ' ); # U+0674 ARABIC LETTER HIGH HAMZA
-// define( 'ZWNJ', '‌' ); # U+200C ZERO WIDTH NON-JOINER
-
-/**
- * Kazakh (Қазақша) converter routines
- *
- * @ingroup Language
- */
-class KkConverter extends LanguageConverter {
- protected $mCyrl2Latn, $mLatn2Cyrl, $mCyLa2Arab;
-
- /**
- * @param Language $langobj
- */
- public function __construct( Language $langobj ) {
- $variants = [ 'kk', 'kk-cyrl', 'kk-latn', 'kk-arab', 'kk-kz', 'kk-tr', 'kk-cn' ];
- $variantfallbacks = [
- 'kk' => 'kk-cyrl',
- 'kk-cyrl' => 'kk',
- 'kk-latn' => 'kk',
- 'kk-arab' => 'kk',
- 'kk-kz' => 'kk-cyrl',
- 'kk-tr' => 'kk-latn',
- 'kk-cn' => 'kk-arab'
- ];
-
- parent::__construct( $langobj, 'kk',
- $variants, $variantfallbacks, [] );
-
- // No point delaying this since they're in code.
- // Waiting until loadDefaultTables() means they never get loaded
- // when the tables themselves are loaded from cache.
- $this->loadRegs();
- }
-
- protected function loadDefaultTables() {
- // require __DIR__."/../../includes/KkConversion.php";
- // Placeholder for future implementing. Remove variables declarations
- // after generating KkConversion.php
- $kk2Cyrl = [];
- $kk2Latn = [];
- $kk2Arab = [];
- $kk2KZ = [];
- $kk2TR = [];
- $kk2CN = [];
-
- $this->mTables = [
- 'kk-cyrl' => new ReplacementArray( $kk2Cyrl ),
- 'kk-latn' => new ReplacementArray( $kk2Latn ),
- 'kk-arab' => new ReplacementArray( $kk2Arab ),
- 'kk-kz' => new ReplacementArray( array_merge( $kk2Cyrl, $kk2KZ ) ),
- 'kk-tr' => new ReplacementArray( array_merge( $kk2Latn, $kk2TR ) ),
- 'kk-cn' => new ReplacementArray( array_merge( $kk2Arab, $kk2CN ) ),
- 'kk' => new ReplacementArray()
- ];
- }
-
- protected function postLoadTables() {
- $this->mTables['kk-kz']->merge( $this->mTables['kk-cyrl'] );
- $this->mTables['kk-tr']->merge( $this->mTables['kk-latn'] );
- $this->mTables['kk-cn']->merge( $this->mTables['kk-arab'] );
- }
-
- private function loadRegs() {
- $this->mCyrl2Latn = [
- # # Punctuation
- '/№/u' => 'No.',
- # # Е after vowels
- '/([АӘЕЁИОӨҰҮЭЮЯЪЬ])Е/u' => '$1YE',
- '/([АӘЕЁИОӨҰҮЭЮЯЪЬ])е/ui' => '$1ye',
- # # leading ЁЮЯЩ
- '/^Ё([' . KK_C_UC . ']|$)/u' => 'YO$1', '/^Ё([' . KK_C_LC . ']|$)/u' => 'Yo$1',
- '/^Ю([' . KK_C_UC . ']|$)/u' => 'YU$1', '/^Ю([' . KK_C_LC . ']|$)/u' => 'Yu$1',
- '/^Я([' . KK_C_UC . ']|$)/u' => 'YA$1', '/^Я([' . KK_C_LC . ']|$)/u' => 'Ya$1',
- '/^Щ([' . KK_C_UC . ']|$)/u' => 'ŞÇ$1', '/^Щ([' . KK_C_LC . ']|$)/u' => 'Şç$1',
- # # other ЁЮЯ
- '/Ё/u' => 'YO', '/ё/u' => 'yo',
- '/Ю/u' => 'YU', '/ю/u' => 'yu',
- '/Я/u' => 'YA', '/я/u' => 'ya',
- '/Щ/u' => 'ŞÇ', '/щ/u' => 'şç',
- # # soft and hard signs
- '/[ъЪ]/u' => 'ʺ', '/[ьЬ]/u' => 'ʹ',
- # # other characters
- '/А/u' => 'A', '/а/u' => 'a', '/Ә/u' => 'Ä', '/ә/u' => 'ä',
- '/Б/u' => 'B', '/б/u' => 'b', '/В/u' => 'V', '/в/u' => 'v',
- '/Г/u' => 'G', '/г/u' => 'g', '/Ғ/u' => 'Ğ', '/ғ/u' => 'ğ',
- '/Д/u' => 'D', '/д/u' => 'd', '/Е/u' => 'E', '/е/u' => 'e',
- '/Ж/u' => 'J', '/ж/u' => 'j', '/З/u' => 'Z', '/з/u' => 'z',
- '/И/u' => 'Ï', '/и/u' => 'ï', '/Й/u' => 'Ý', '/й/u' => 'ý',
- '/К/u' => 'K', '/к/u' => 'k', '/Қ/u' => 'Q', '/қ/u' => 'q',
- '/Л/u' => 'L', '/л/u' => 'l', '/М/u' => 'M', '/м/u' => 'm',
- '/Н/u' => 'N', '/н/u' => 'n', '/Ң/u' => 'Ñ', '/ң/u' => 'ñ',
- '/О/u' => 'O', '/о/u' => 'o', '/Ө/u' => 'Ö', '/ө/u' => 'ö',
- '/П/u' => 'P', '/п/u' => 'p', '/Р/u' => 'R', '/р/u' => 'r',
- '/С/u' => 'S', '/с/u' => 's', '/Т/u' => 'T', '/т/u' => 't',
- '/У/u' => 'W', '/у/u' => 'w', '/Ұ/u' => 'U', '/ұ/u' => 'u',
- '/Ү/u' => 'Ü', '/ү/u' => 'ü', '/Ф/u' => 'F', '/ф/u' => 'f',
- '/Х/u' => 'X', '/х/u' => 'x', '/Һ/u' => 'H', '/һ/u' => 'h',
- '/Ц/u' => 'C', '/ц/u' => 'c', '/Ч/u' => 'Ç', '/ч/u' => 'ç',
- '/Ш/u' => 'Ş', '/ш/u' => 'ş', '/Ы/u' => 'I', '/ы/u' => 'ı',
- '/І/u' => 'İ', '/і/u' => 'i', '/Э/u' => 'É', '/э/u' => 'é',
- ];
-
- $this->mLatn2Cyrl = [
- # # Punctuation
- '/#|No\./' => '№',
- # # Şç
- '/ŞÇʹ/u' => 'ЩЬ', '/Şçʹ/u' => 'Щь',
- '/Ş[Çç]/u' => 'Щ', '/şç/u' => 'щ',
- # # soft and hard signs
- '/([' . KK_L_UC . '])ʺ([' . KK_L_UC . '])/u' => '$1Ъ$2',
- '/ʺ([' . KK_L_LC . '])/u' => 'ъ$1',
- '/([' . KK_L_UC . '])ʹ([' . KK_L_UC . '])/u' => '$1Ь$2',
- '/ʹ([' . KK_L_LC . '])/u' => 'ь$1',
- '/ʺ/u' => 'ъ',
- '/ʹ/u' => 'ь',
- # # Ye Yo Yu Ya.
- '/Y[Ee]/u' => 'Е', '/ye/u' => 'е',
- '/Y[Oo]/u' => 'Ё', '/yo/u' => 'ё',
- '/Y[UWuw]/u' => 'Ю', '/y[uw]/u' => 'ю',
- '/Y[Aa]/u' => 'Я', '/ya/u' => 'я',
- # # other characters
- '/A/u' => 'А', '/a/u' => 'а', '/Ä/u' => 'Ә', '/ä/u' => 'ә',
- '/B/u' => 'Б', '/b/u' => 'б', '/C/u' => 'Ц', '/c/u' => 'ц',
- '/Ç/u' => 'Ч', '/ç/u' => 'ч', '/D/u' => 'Д', '/d/u' => 'д',
- '/E/u' => 'Е', '/e/u' => 'е', '/É/u' => 'Э', '/é/u' => 'э',
- '/F/u' => 'Ф', '/f/u' => 'ф', '/G/u' => 'Г', '/g/u' => 'г',
- '/Ğ/u' => 'Ғ', '/ğ/u' => 'ғ', '/H/u' => 'Һ', '/h/u' => 'һ',
- '/I/u' => 'Ы', '/ı/u' => 'ы', '/İ/u' => 'І', '/i/u' => 'і',
- '/Ï/u' => 'И', '/ï/u' => 'и', '/J/u' => 'Ж', '/j/u' => 'ж',
- '/K/u' => 'К', '/k/u' => 'к', '/L/u' => 'Л', '/l/u' => 'л',
- '/M/u' => 'М', '/m/u' => 'м', '/N/u' => 'Н', '/n/u' => 'н',
- '/Ñ/u' => 'Ң', '/ñ/u' => 'ң', '/O/u' => 'О', '/o/u' => 'о',
- '/Ö/u' => 'Ө', '/ö/u' => 'ө', '/P/u' => 'П', '/p/u' => 'п',
- '/Q/u' => 'Қ', '/q/u' => 'қ', '/R/u' => 'Р', '/r/u' => 'р',
- '/S/u' => 'С', '/s/u' => 'с', '/Ş/u' => 'Ш', '/ş/u' => 'ш',
- '/T/u' => 'Т', '/t/u' => 'т', '/U/u' => 'Ұ', '/u/u' => 'ұ',
- '/Ü/u' => 'Ү', '/ü/u' => 'ү', '/V/u' => 'В', '/v/u' => 'в',
- '/W/u' => 'У', '/w/u' => 'у', '/Ý/u' => 'Й', '/ý/u' => 'й',
- '/X/u' => 'Х', '/x/u' => 'х', '/Z/u' => 'З', '/z/u' => 'з',
- ];
-
- $this->mCyLa2Arab = [
- # # Punctuation -> Arabic
- '/#|№|No\./u' => '؀', # U+0600
- '/\,/' => '،', # U+060C
- '/;/' => '؛', # U+061B
- '/\?/' => '؟', # U+061F
- '/%/' => '٪', # U+066A
- '/\*/' => '٭', # U+066D
- # # Digits -> Arabic
- '/0/' => '۰', # U+06F0
- '/1/' => '۱', # U+06F1
- '/2/' => '۲', # U+06F2
- '/3/' => '۳', # U+06F3
- '/4/' => '۴', # U+06F4
- '/5/' => '۵', # U+06F5
- '/6/' => '۶', # U+06F6
- '/7/' => '۷', # U+06F7
- '/8/' => '۸', # U+06F8
- '/9/' => '۹', # U+06F9
- # # Cyrillic -> Arabic
- '/Аллаһ/ui' => 'ﷲ',
- '/([АӘЕЁИОӨҰҮЭЮЯЪЬ])е/ui' => '$1يە',
- '/[еэ]/ui' => 'ە', '/[ъь]/ui' => '',
- '/[аә]/ui' => 'ا', '/[оө]/ui' => 'و', '/[ұү]/ui' => 'ۇ', '/[ыі]/ui' => 'ى',
- '/[и]/ui' => 'ىي', '/ё/ui' => 'يو', '/ю/ui' => 'يۋ', '/я/ui' => 'يا', '/[й]/ui' => 'ي',
- '/ц/ui' => 'تس', '/щ/ui' => 'شش',
- '/һ/ui' => 'ح', '/ч/ui' => 'تش',
- # '/һ/ui' => 'ھ', '/ч/ui' => 'چ',
- '/б/ui' => 'ب', '/в/ui' => 'ۆ', '/г/ui' => 'گ', '/ғ/ui' => 'ع',
- '/д/ui' => 'د', '/ж/ui' => 'ج', '/з/ui' => 'ز', '/к/ui' => 'ك',
- '/қ/ui' => 'ق', '/л/ui' => 'ل', '/м/ui' => 'م', '/н/ui' => 'ن',
- '/ң/ui' => 'ڭ', '/п/ui' => 'پ', '/р/ui' => 'ر', '/с/ui' => 'س',
- '/т/ui' => 'ت', '/у/ui' => 'ۋ', '/ф/ui' => 'ف', '/х/ui' => 'ح',
- '/ш/ui' => 'ش',
- # # Latin -> Arabic // commented for now...
- /*'/Allah/ui' => 'ﷲ',
- '/[eé]/ui' => 'ە', '/[yý]/ui' => 'ي', '/[ʺʹ]/ui' => '',
- '/[aä]/ui' => 'ا', '/[oö]/ui' => 'و', '/[uü]/ui' => 'ۇ',
- '/[ï]/ui' => 'ىي', '/[ıIiİ]/u' => 'ى',
- '/c/ui' => 'تس',
- '/ç/ui' => 'تش', '/h/ui' => 'ح',
- #'/ç/ui' => 'چ', '/h/ui' => 'ھ',
- '/b/ui' => 'ب','/d/ui' => 'د',
- '/f/ui' => 'ف', '/g/ui' => 'گ', '/ğ/ui' => 'ع',
- '/j/ui' => 'ج', '/k/ui' => 'ك', '/l/ui' => 'ل', '/m/ui' => 'م',
- '/n/ui' => 'ن', '/ñ/ui' => 'ڭ', '/p/ui' => 'پ', '/q/ui' => 'ق',
- '/r/ui' => 'ر', '/s/ui' => 'س', '/ş/ui' => 'ش', '/t/ui' => 'ت',
- '/v/ui' => 'ۆ', '/w/ui' => 'ۋ', '/x/ui' => 'ح', '/z/ui' => 'ز',*/
- ];
- }
-
- /**
- * A function wrapper:
- * - if there is no selected variant, leave the link
- * names as they were
- * - do not try to find variants for usernames
- *
- * @param string &$link
- * @param Title &$nt
- * @param bool $ignoreOtherCond
- */
- public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
- // check for user namespace
- if ( is_object( $nt ) ) {
- $ns = $nt->getNamespace();
- if ( $ns == NS_USER || $ns == NS_USER_TALK ) {
- return;
- }
- }
-
- $oldlink = $link;
- parent::findVariantLink( $link, $nt, $ignoreOtherCond );
- if ( $this->getPreferredVariant() == $this->mMainLanguageCode ) {
- $link = $oldlink;
- }
- }
-
- /**
- * It translates text into variant
- *
- * @param string $text
- * @param string $toVariant
- *
- * @return string
- */
- public function translate( $text, $toVariant ) {
- $text = parent::translate( $text, $toVariant );
-
- switch ( $toVariant ) {
- case 'kk-cyrl':
- case 'kk-kz':
- $letters = KK_L_UC . KK_L_LC . 'ʺʹ#0123456789';
- break;
- case 'kk-latn':
- case 'kk-tr':
- $letters = KK_C_UC . KK_C_LC . '№0123456789';
- break;
- case 'kk-arab':
- case 'kk-cn':
- $letters = KK_C_UC . KK_C_LC . /*KK_L_UC.KK_L_LC.'ʺʹ'.*/',;\?%\*№0123456789';
- break;
- default:
- return $text;
- }
- // disable conversion variables like $1, $2...
- $varsfix = '\$[0-9]';
-
- $matches = preg_split(
- '/' . $varsfix . '[^' . $letters . ']+/u',
- $text,
- -1,
- PREG_SPLIT_OFFSET_CAPTURE
- );
-
- $mstart = 0;
- $ret = '';
-
- foreach ( $matches as $m ) {
- $ret .= substr( $text, $mstart, $m[1] - $mstart );
- $ret .= $this->regsConverter( $m[0], $toVariant );
- $mstart = $m[1] + strlen( $m[0] );
- }
-
- return $ret;
- }
-
- /**
- * @param string $text
- * @param string $toVariant
- * @return mixed|string
- */
- private function regsConverter( $text, $toVariant ) {
- if ( $text == '' ) {
- return $text;
- }
-
- switch ( $toVariant ) {
- case 'kk-arab':
- case 'kk-cn':
- $letters = KK_C_LC . KK_C_UC; /*.KK_L_LC.KK_L_UC*/
- $front = 'әөүіӘӨҮІ'; /*.'äöüiÄÖÜİ'*/
- $excludes = 'еэгғкқЕЭГҒКҚ'; /*.'eégğkqEÉGĞKQ'*/
- // split text to words
- $matches = preg_split( '/[\b\s\-\.:]+/', $text, -1, PREG_SPLIT_OFFSET_CAPTURE );
- $mstart = 0;
- $ret = '';
- foreach ( $matches as $m ) {
- $ret .= substr( $text, $mstart, $m[1] - $mstart );
- // is matched the word to front vowels?
- // exclude a words matched to е, э, г, к, к, қ,
- // them should be without hamza
- if ( preg_match( '/[' . $front . ']/u', $m[0] ) &&
- !preg_match( '/[' . $excludes . ']/u', $m[0] )
- ) {
- $ret .= preg_replace( '/[' . $letters . ']+/u', H_HAMZA . '$0', $m[0] );
- } else {
- $ret .= $m[0];
- }
- $mstart = $m[1] + strlen( $m[0] );
- }
- $text =& $ret;
- foreach ( $this->mCyLa2Arab as $pat => $rep ) {
- $text = preg_replace( $pat, $rep, $text );
- }
- return $text;
- case 'kk-latn':
- case 'kk-tr':
- foreach ( $this->mCyrl2Latn as $pat => $rep ) {
- $text = preg_replace( $pat, $rep, $text );
- }
- return $text;
- case 'kk-cyrl':
- case 'kk-kz':
- foreach ( $this->mLatn2Cyrl as $pat => $rep ) {
- $text = preg_replace( $pat, $rep, $text );
- }
- return $text;
- default:
- return $text;
- }
- }
-
- /**
- * @param string $key
- * @return string
- */
- public function convertCategoryKey( $key ) {
- return $this->autoConvert( $key, 'kk' );
- }
-}
-
/**
* class that handles Cyrillic, Latin and Arabic scripts for Kazakh
* right now it only distinguish kk_cyrl, kk_latn, kk_arab and kk_kz, kk_tr, kk_cn.
diff --git a/languages/classes/LanguageKu.php b/languages/classes/LanguageKu.php
deleted file mode 100644
index bf3575483982..000000000000
--- a/languages/classes/LanguageKu.php
+++ /dev/null
@@ -1,238 +0,0 @@
-<?php
-/**
- * Kurdish specific code.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- * @ingroup Language
- */
-
-/**
- * Kurdish converter routines
- *
- * @ingroup Language
- */
-class KuConverter extends LanguageConverter {
- public $mArabicToLatin = [
- 'ب' => 'b', 'ج' => 'c', 'چ' => 'ç', 'د' => 'd', 'ف' => 'f', 'گ' => 'g', 'ھ' => 'h',
- 'ہ' => 'h', 'ه' => 'h', 'ح' => 'h', 'ژ' => 'j', 'ك' => 'k', 'ک' => 'k', 'ل' => 'l',
- 'م' => 'm', 'ن' => 'n', 'پ' => 'p', 'ق' => 'q', 'ر' => 'r', 'س' => 's', 'ش' => 'ş',
- 'ت' => 't', 'ڤ' => 'v', 'خ' => 'x', 'غ' => 'x', 'ز' => 'z',
-
-// ک و => ku -- ist richtig
-// و ك=> ku -- ist auch richtig
-
- /* Doppel- und Halbvokale */
- 'ڵ' => 'll', # ll
- 'ڕ' => 'rr', # rr
- 'ا' => 'a',
- # 'ئێ' => 'ê', # initial e
- 'ە' => 'e',
- 'ه‌' => 'e', # with one non-joiner
- 'ه‌‌' => 'e', # with two non-joiner
- 'ة' => 'e',
- 'ێ' => 'ê',
- 'ي' => 'î',
- 'ی' => 'î', # U+06CC db 8c ARABIC LETTER FARSI YEH
- 'ى' => 'î', # U+0649 d9 89 ARABIC LETTER ALEF MAKSURA
- 'ۆ' => 'o',
- 'و' => 'w',
- 'ئ' => '', # initial hemze should not be shown
- '،' => ',',
- 'ع' => '\'', # ayn
- '؟' => '?',
-
- # digits
- '٠' => '0', # U+0660
- '١' => '1', # U+0661
- '٢' => '2', # U+0662
- '٣' => '3', # U+0663
- '٤' => '4', # U+0664
- '٥' => '5', # U+0665
- '٦' => '6', # U+0666
- '٧' => '7', # U+0667
- '٨' => '8', # U+0668
- '٩' => '9', # U+0669
- ];
-
- public $mLatinToArabic = [
- 'b' => 'ب', 'c' => 'ج', 'ç' => 'چ', 'd' => 'د', 'f' => 'ف', 'g' => 'گ',
- 'h' => 'ه', 'j' => 'ژ', 'k' => 'ک', 'l' => 'ل',
- 'm' => 'م', 'n' => 'ن', 'p' => 'پ', 'q' => 'ق', 'r' => 'ر', 's' => 'س', 'ş' => 'ش',
- 't' => 'ت', 'v' => 'ڤ',
- 'x' => 'خ', 'y' => 'ی', 'z' => 'ز',
-
- 'B' => 'ب', 'C' => 'ج', 'Ç' => 'چ', 'D' => 'د', 'F' => 'ف', 'G' => 'گ',
- 'H' => 'ح', 'J' => 'ژ', 'K' => 'ک', 'L' => 'ل',
- 'M' => 'م', 'N' => 'ن', 'P' => 'پ', 'Q' => 'ق', 'R' => 'ر', 'S' => 'س', 'Ş' => 'ش',
- 'T' => 'ت', 'V' => 'ڤ', 'W' => 'و', 'X' => 'خ',
- 'Y' => 'ی', 'Z' => 'ز',
-
- /* Doppelkonsonanten */
- # 'll' => 'ڵ', # wenn es geht, doppel-l und l getrennt zu behandeln
- # 'rr' => 'ڕ', # selbiges für doppel-r
-
- /* Einzelne Großbuchstaben */
- // ' C' => 'ج',
-
- /* Vowels */
- 'a' => 'ا',
- 'e' => 'ە',
- 'ê' => 'ێ',
- 'i' => '',
- 'î' => 'ی',
- 'o' => 'ۆ',
- 'u' => 'و',
- 'û' => 'وو',
- 'w' => 'و',
- ',' => '،',
- '?' => '؟',
-
- # Try to replace the leading vowel
- ' a' => 'ئا ',
- ' e' => 'ئە ',
- ' ê' => 'ئێ ',
- ' î' => 'ئی ',
- ' o' => 'ئۆ ',
- ' u' => 'ئو ',
- ' û' => 'ئوو ',
- 'A' => 'ئا',
- 'E' => 'ئە',
- 'Ê' => 'ئێ',
- 'Î' => 'ئی',
- 'O' => 'ئۆ',
- 'U' => 'ئو',
- 'Û' => 'ئوو',
- ' A' => 'ئا ',
- ' E' => 'ئە ',
- ' Ê' => 'ئێ ',
- ' Î' => 'ئی ',
- ' O' => 'ئۆ ',
- ' U' => 'ئو ',
- ' Û' => 'ئوو ',
- # eyn erstmal deaktivieren, einfache Anführungsstriche sind einfach zu
- # häufig, um sie als eyn zu interpretieren.
- # '\'' => 'ع',
-
-/* # deactivated for now, breaks links i.e. in header of Special:Recentchanges :-(
- # digits
- '0' => '٠', # U+0660
- '1' => '١', # U+0661
- '2' => '٢', # U+0662
- '3' => '٣', # U+0663
- '4' => '٤', # U+0664
- '5' => '٥', # U+0665
- '6' => '٦', # U+0666
- '7' => '٧', # U+0667
- '8' => '٨', # U+0668
- '9' => '٩', # U+0669
-*/
- ];
-
- /**
- * @param Language $langobj
- */
- public function __construct( Language $langobj ) {
- $variants = [ 'ku', 'ku-arab', 'ku-latn' ];
- $variantfallbacks = [
- 'ku' => 'ku-latn',
- 'ku-arab' => 'ku-latn',
- 'ku-latn' => 'ku-arab',
- ];
-
- parent::__construct( $langobj, 'ku', $variants, $variantfallbacks );
- }
-
- protected function loadDefaultTables() {
- $this->mTables = [
- 'ku-latn' => new ReplacementArray( $this->mArabicToLatin ),
- 'ku-arab' => new ReplacementArray( $this->mLatinToArabic ),
- 'ku' => new ReplacementArray()
- ];
- }
-
- /**
- * A function wrapper:
- * - if there is no selected variant, leave the link
- * names as they were
- * - do not try to find variants for usernames
- *
- * @param string &$link
- * @param Title &$nt
- * @param bool $ignoreOtherCond
- */
- public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
- // check for user namespace
- if ( is_object( $nt ) ) {
- $ns = $nt->getNamespace();
- if ( $ns == NS_USER || $ns == NS_USER_TALK ) {
- return;
- }
- }
-
- $oldlink = $link;
- parent::findVariantLink( $link, $nt, $ignoreOtherCond );
- if ( $this->getPreferredVariant() == $this->mMainLanguageCode ) {
- $link = $oldlink;
- }
- }
-
- /**
- * It translates text into variant, specials:
- * - ommiting roman numbers
- *
- * @param string $text
- * @param bool $toVariant
- *
- * @throws MWException
- * @return string
- */
- public function translate( $text, $toVariant ) {
- $this->loadTables();
- /* From Kazakh interface, maybe we need it later
- $breaks = '[^\w\x80-\xff]';
- // regexp for roman numbers
- // Lookahead assertion ensures $roman doesn't match the empty string
- $roman = '(?=[MDCLXVI])M{0,4}(C[DM]|D?C{0,3})(X[LC]|L?X{0,3})(I[VX]|V?I{0,3})';
- $roman = '';
-
- $reg = '/^'.$roman.'$|^'.$roman.$breaks.'|'.$breaks.$roman.'$|'.$breaks.$roman.$breaks.'/';
-
- $matches = preg_split($reg, $text, -1, PREG_SPLIT_OFFSET_CAPTURE);
-
- $m = array_shift($matches);
- if( !isset( $this->mTables[$toVariant] ) ) {
- throw new MWException( "Broken variant table: " . implode( ',', array_keys( $this->mTables ) ) );
- }
- $ret = $this->mTables[$toVariant]->replace( $m[0] );
- $mstart = $m[1]+strlen($m[0]);
- foreach($matches as $m) {
- $ret .= substr($text, $mstart, $m[1]-$mstart);
- $ret .= parent::translate($m[0], $toVariant);
- $mstart = $m[1] + strlen($m[0]);
- }
-
- return $ret;
- */
-
- if ( !isset( $this->mTables[$toVariant] ) ) {
- throw new MWException( "Broken variant table: " . implode( ',', array_keys( $this->mTables ) ) );
- }
-
- return parent::translate( $text, $toVariant );
- }
-}
diff --git a/languages/classes/LanguageShi.php b/languages/classes/LanguageShi.php
deleted file mode 100644
index 6565b51a1f63..000000000000
--- a/languages/classes/LanguageShi.php
+++ /dev/null
@@ -1,137 +0,0 @@
-<?php
-/**
- * Shilha specific code.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- * @ingroup Language
- */
-
-/**
- * Conversion script between Latin and Tifinagh for Tachelhit.
- * - Tifinagh -> lowercase Latin
- * - lowercase/uppercase Latin -> Tifinagh
- *
- *
- * Based on:
- * - https://en.wikipedia.org/wiki/Shilha_language
- * - LanguageSr.php
- *
- * @ingroup Language
- */
-class ShiConverter extends LanguageConverter {
- protected $mDoContentConvert;
-
- public $mToLatin = [
- 'ⴰ' => 'a', 'ⴱ' => 'b', 'ⴳ' => 'g', 'ⴷ' => 'd', 'ⴹ' => 'ḍ', 'ⴻ' => 'e',
- 'ⴼ' => 'f', 'ⴽ' => 'k', 'ⵀ' => 'h', 'ⵃ' => 'ḥ', 'ⵄ' => 'ε', 'ⵅ' => 'x',
- 'ⵇ' => 'q', 'ⵉ' => 'i', 'ⵊ' => 'j', 'ⵍ' => 'l', 'ⵎ' => 'm', 'ⵏ' => 'n',
- 'ⵓ' => 'u', 'ⵔ' => 'r', 'ⵕ' => 'ṛ', 'ⵙ' => 's', 'ⵚ' => 'ṣ',
- 'ⵛ' => 'š', 'ⵜ' => 't', 'ⵟ' => 'ṭ', 'ⵡ' => 'w', 'ⵢ' => 'y', 'ⵣ' => 'z',
- 'ⵥ' => 'ẓ', 'ⵯ' => 'ʷ', 'ⵖ' => 'ɣ', 'ⵠ' => 'v', 'ⵒ' => 'p',
- ];
-
- public $mUpperToLowerCaseLatin = [
- 'A' => 'a', 'B' => 'b', 'C' => 'c', 'D' => 'd', 'E' => 'e',
- 'F' => 'f', 'G' => 'g', 'H' => 'h', 'I' => 'i', 'J' => 'j',
- 'K' => 'k', 'L' => 'l', 'M' => 'm', 'N' => 'n', 'O' => 'o',
- 'P' => 'p', 'Q' => 'q', 'R' => 'r', 'S' => 's', 'T' => 't',
- 'U' => 'u', 'V' => 'v', 'W' => 'w', 'X' => 'x', 'Y' => 'y',
- 'Z' => 'z', 'Ɣ' => 'ɣ',
- ];
-
- public $mToTifinagh = [
- 'a' => 'ⴰ', 'b' => 'ⴱ', 'g' => 'ⴳ', 'd' => 'ⴷ', 'ḍ' => 'ⴹ', 'e' => 'ⴻ',
- 'f' => 'ⴼ', 'k' => 'ⴽ', 'h' => 'ⵀ', 'ḥ' => 'ⵃ', 'ε' => 'ⵄ', 'x' => 'ⵅ',
- 'q' => 'ⵇ', 'i' => 'ⵉ', 'j' => 'ⵊ', 'l' => 'ⵍ', 'm' => 'ⵎ', 'n' => 'ⵏ',
- 'u' => 'ⵓ', 'r' => 'ⵔ', 'ṛ' => 'ⵕ', 'γ' => 'ⵖ', 's' => 'ⵙ', 'ṣ' => 'ⵚ',
- 'š' => 'ⵛ', 't' => 'ⵜ', 'ṭ' => 'ⵟ', 'w' => 'ⵡ', 'y' => 'ⵢ', 'z' => 'ⵣ',
- 'ẓ' => 'ⵥ', 'ʷ' => 'ⵯ', 'ɣ' => 'ⵖ', 'v' => 'ⵠ', 'p' => 'ⵒ',
- ];
-
- /**
- * @param Language $langobj
- */
- public function __construct( Language $langobj ) {
- $variants = [ 'shi', 'shi-tfng', 'shi-latn' ];
- $variantfallbacks = [
- 'shi' => 'shi-tfng',
- 'shi-tfng' => 'shi',
- 'shi-latn' => 'shi',
- ];
-
- $flags = [];
- parent::__construct( $langobj, 'shi', $variants, $variantfallbacks, $flags );
- }
-
- protected function loadDefaultTables() {
- $this->mTables = [
- 'lowercase' => new ReplacementArray( $this->mUpperToLowerCaseLatin ),
- 'shi-tfng' => new ReplacementArray( $this->mToTifinagh ),
- 'shi-latn' => new ReplacementArray( $this->mToLatin ),
- 'shi' => new ReplacementArray()
- ];
- }
-
- /**
- * A function wrapper:
- * - if there is no selected variant, leave the link
- * names as they were
- * - do not try to find variants for usernames
- *
- * @param string &$link
- * @param Title &$nt
- * @param bool $ignoreOtherCond
- */
- public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
- // check for user namespace
- if ( is_object( $nt ) ) {
- $ns = $nt->getNamespace();
- if ( $ns == NS_USER || $ns == NS_USER_TALK ) {
- return;
- }
- }
-
- $oldlink = $link;
- parent::findVariantLink( $link, $nt, $ignoreOtherCond );
- if ( $this->getPreferredVariant() == $this->mMainLanguageCode ) {
- $link = $oldlink;
- }
- }
-
- /**
- * It translates text into variant
- *
- * @param string $text
- * @param string $toVariant
- *
- * @return string
- */
- public function translate( $text, $toVariant ) {
- // If $text is empty or only includes spaces, do nothing
- // Otherwise translate it
- if ( trim( $text ) ) {
- $this->loadTables();
- // To Tifinagh, first translate uppercase to lowercase Latin
- if ( $toVariant == 'shi-tfng' ) {
- $text = $this->mTables['lowercase']->replace( $text );
- }
- $text = $this->mTables[$toVariant]->replace( $text );
- }
- return $text;
- }
-}
diff --git a/languages/classes/LanguageSr.php b/languages/classes/LanguageSr.php
deleted file mode 100644
index 2672f1f0c63f..000000000000
--- a/languages/classes/LanguageSr.php
+++ /dev/null
@@ -1,185 +0,0 @@
-<?php
-/**
- * Serbian (Српски / Srpski) specific code.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- * @ingroup Language
- */
-
-/**
- * There are two levels of conversion for Serbian: the script level
- * (Cyrillics <-> Latin), and the variant level (ekavian
- * <->iyekavian). The two are orthogonal. So we really only need two
- * dictionaries: one for Cyrillics and Latin, and one for ekavian and
- * iyekavian.
- *
- * @ingroup Language
- */
-class SrConverter extends LanguageConverter {
- public $mToLatin = [
- 'а' => 'a', 'б' => 'b', 'в' => 'v', 'г' => 'g', 'д' => 'd',
- 'ђ' => 'đ', 'е' => 'e', 'ж' => 'ž', 'з' => 'z', 'и' => 'i',
- 'ј' => 'j', 'к' => 'k', 'л' => 'l', 'љ' => 'lj', 'м' => 'm',
- 'н' => 'n', 'њ' => 'nj', 'о' => 'o', 'п' => 'p', 'р' => 'r',
- 'с' => 's', 'т' => 't', 'ћ' => 'ć', 'у' => 'u', 'ф' => 'f',
- 'х' => 'h', 'ц' => 'c', 'ч' => 'č', 'џ' => 'dž', 'ш' => 'š',
-
- 'А' => 'A', 'Б' => 'B', 'В' => 'V', 'Г' => 'G', 'Д' => 'D',
- 'Ђ' => 'Đ', 'Е' => 'E', 'Ж' => 'Ž', 'З' => 'Z', 'И' => 'I',
- 'Ј' => 'J', 'К' => 'K', 'Л' => 'L', 'Љ' => 'Lj', 'М' => 'M',
- 'Н' => 'N', 'Њ' => 'Nj', 'О' => 'O', 'П' => 'P', 'Р' => 'R',
- 'С' => 'S', 'Т' => 'T', 'Ћ' => 'Ć', 'У' => 'U', 'Ф' => 'F',
- 'Х' => 'H', 'Ц' => 'C', 'Ч' => 'Č', 'Џ' => 'Dž', 'Ш' => 'Š',
- ];
-
- public $mToCyrillics = [
- 'a' => 'а', 'b' => 'б', 'c' => 'ц', 'č' => 'ч', 'ć' => 'ћ',
- 'd' => 'д', 'dž' => 'џ', 'đ' => 'ђ', 'e' => 'е', 'f' => 'ф',
- 'g' => 'г', 'h' => 'х', 'i' => 'и', 'j' => 'ј', 'k' => 'к',
- 'l' => 'л', 'lj' => 'љ', 'm' => 'м', 'n' => 'н', 'nj' => 'њ',
- 'o' => 'о', 'p' => 'п', 'r' => 'р', 's' => 'с', 'š' => 'ш',
- 't' => 'т', 'u' => 'у', 'v' => 'в', 'z' => 'з', 'ž' => 'ж',
-
- 'A' => 'А', 'B' => 'Б', 'C' => 'Ц', 'Č' => 'Ч', 'Ć' => 'Ћ',
- 'D' => 'Д', 'Dž' => 'Џ', 'Đ' => 'Ђ', 'E' => 'Е', 'F' => 'Ф',
- 'G' => 'Г', 'H' => 'Х', 'I' => 'И', 'J' => 'Ј', 'K' => 'К',
- 'L' => 'Л', 'LJ' => 'Љ', 'M' => 'М', 'N' => 'Н', 'NJ' => 'Њ',
- 'O' => 'О', 'P' => 'П', 'R' => 'Р', 'S' => 'С', 'Š' => 'Ш',
- 'T' => 'Т', 'U' => 'У', 'V' => 'В', 'Z' => 'З', 'Ž' => 'Ж',
-
- 'DŽ' => 'Џ', 'd!ž' => 'дж', 'D!ž' => 'Дж', 'D!Ž' => 'ДЖ',
- 'Lj' => 'Љ', 'l!j' => 'лј', 'L!j' => 'Лј', 'L!J' => 'ЛЈ',
- 'Nj' => 'Њ', 'n!j' => 'нј', 'N!j' => 'Нј', 'N!J' => 'НЈ'
- ];
-
- /**
- * @param Language $langobj
- */
- public function __construct( Language $langobj ) {
- $variants = [ 'sr', 'sr-ec', 'sr-el' ];
- $variantfallbacks = [
- 'sr' => 'sr-ec',
- 'sr-ec' => 'sr',
- 'sr-el' => 'sr',
- ];
-
- $flags = [
- 'S' => 'S', 'писмо' => 'S', 'pismo' => 'S',
- 'W' => 'W', 'реч' => 'W', 'reč' => 'W', 'ријеч' => 'W', 'riječ' => 'W'
- ];
- parent::__construct( $langobj, 'sr', $variants, $variantfallbacks, $flags );
- }
-
- protected function loadDefaultTables() {
- $this->mTables = [
- 'sr-ec' => new ReplacementArray( $this->mToCyrillics ),
- 'sr-el' => new ReplacementArray( $this->mToLatin ),
- 'sr' => new ReplacementArray()
- ];
- }
-
- /**
- * A function wrapper:
- * - if there is no selected variant, leave the link
- * names as they were
- * - do not try to find variants for usernames
- *
- * @param string &$link
- * @param Title &$nt
- * @param bool $ignoreOtherCond
- */
- public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
- // check for user namespace
- if ( is_object( $nt ) ) {
- $ns = $nt->getNamespace();
- if ( $ns == NS_USER || $ns == NS_USER_TALK ) {
- return;
- }
- }
-
- $oldlink = $link;
- parent::findVariantLink( $link, $nt, $ignoreOtherCond );
- if ( $this->getPreferredVariant() == $this->mMainLanguageCode ) {
- $link = $oldlink;
- }
- }
-
- /**
- * It translates text into variant, specials:
- * - ommiting roman numbers
- *
- * @param string $text
- * @param string $toVariant
- *
- * @throws MWException
- * @return string
- */
- public function translate( $text, $toVariant ) {
- $breaks = '[^\w\x80-\xff]';
-
- // regexp for roman numbers
- // Lookahead assertion ensures $roman doesn't match the empty string
- $roman = '(?=[MDCLXVI])M{0,4}(C[DM]|D?C{0,3})(X[LC]|L?X{0,3})(I[VX]|V?I{0,3})';
-
- $reg = '/^' . $roman . '$|^' . $roman . $breaks . '|' . $breaks
- . $roman . '$|' . $breaks . $roman . $breaks . '/';
-
- $matches = preg_split( $reg, $text, -1, PREG_SPLIT_OFFSET_CAPTURE );
-
- $m = array_shift( $matches );
- $this->loadTables();
- if ( !isset( $this->mTables[$toVariant] ) ) {
- throw new MWException( "Broken variant table: "
- . implode( ',', array_keys( $this->mTables ) ) );
- }
- $ret = $this->mTables[$toVariant]->replace( $m[0] );
- $mstart = $m[1] + strlen( $m[0] );
- foreach ( $matches as $m ) {
- $ret .= substr( $text, $mstart, $m[1] - $mstart );
- $ret .= parent::translate( $m[0], $toVariant );
- $mstart = $m[1] + strlen( $m[0] );
- }
-
- return $ret;
- }
-
- /**
- * Guess if a text is written in Cyrillic or Latin.
- * Overrides LanguageConverter::guessVariant()
- *
- * @param string $text The text to be checked
- * @param string $variant Language code of the variant to be checked for
- * @return bool True if $text appears to be written in $variant
- *
- * @author Nikola Smolenski <smolensk@eunet.rs>
- * @since 1.19
- */
- public function guessVariant( $text, $variant ) {
- $numCyrillic = preg_match_all( "/[шђчћжШЂЧЋЖ]/u", $text, $dummy );
- $numLatin = preg_match_all( "/[šđč枊ĐČĆŽ]/u", $text, $dummy );
-
- if ( $variant == 'sr-ec' ) {
- return $numCyrillic > $numLatin;
- } elseif ( $variant == 'sr-el' ) {
- return $numLatin > $numCyrillic;
- } else {
- return false;
- }
- }
-
-}
diff --git a/languages/classes/LanguageTg.php b/languages/classes/LanguageTg.php
deleted file mode 100644
index 2cec7d031fee..000000000000
--- a/languages/classes/LanguageTg.php
+++ /dev/null
@@ -1,120 +0,0 @@
-<?php
-/**
- * Tajik (Тоҷикӣ) specific code.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- * @ingroup Language
- */
-
-/**
- * Converts Tajiki to Latin orthography
- *
- * @ingroup Language
- */
-class TgConverter extends LanguageConverter {
- private $table = [
- 'а' => 'a',
- 'б' => 'b',
- 'в' => 'v',
- 'г' => 'g',
- 'д' => 'd',
- 'е' => 'e',
- 'ё' => 'jo',
- 'ж' => 'ƶ',
- 'з' => 'z',
- 'ии ' => 'iji ',
- 'и' => 'i',
- 'й' => 'j',
- 'к' => 'k',
- 'л' => 'l',
- 'м' => 'm',
- 'н' => 'n',
- 'о' => 'o',
- 'п' => 'p',
- 'р' => 'r',
- 'с' => 's',
- 'т' => 't',
- 'у' => 'u',
- 'ф' => 'f',
- 'х' => 'x',
- 'ч' => 'c',
- 'ш' => 'ş',
- 'ъ' => '\'',
- 'э' => 'e',
- 'ю' => 'ju',
- 'я' => 'ja',
- 'ғ' => 'ƣ',
- 'ӣ' => 'ī',
- 'қ' => 'q',
- 'ӯ' => 'ū',
- 'ҳ' => 'h',
- 'ҷ' => 'ç',
- 'ц' => 'ts',
- 'А' => 'A',
- 'Б' => 'B',
- 'В' => 'V',
- 'Г' => 'G',
- 'Д' => 'D',
- 'Е' => 'E',
- 'Ё' => 'Jo',
- 'Ж' => 'Ƶ',
- 'З' => 'Z',
- 'И' => 'I',
- 'Й' => 'J',
- 'К' => 'K',
- 'Л' => 'L',
- 'М' => 'M',
- 'Н' => 'N',
- 'О' => 'O',
- 'П' => 'P',
- 'Р' => 'R',
- 'С' => 'S',
- 'Т' => 'T',
- 'У' => 'U',
- 'Ф' => 'F',
- 'Х' => 'X',
- 'Ч' => 'C',
- 'Ш' => 'Ş',
- 'Ъ' => '\'',
- 'Э' => 'E',
- 'Ю' => 'Ju',
- 'Я' => 'Ja',
- 'Ғ' => 'Ƣ',
- 'Ӣ' => 'Ī',
- 'Қ' => 'Q',
- 'Ӯ' => 'Ū',
- 'Ҳ' => 'H',
- 'Ҷ' => 'Ç',
- 'Ц' => 'Ts',
- ];
-
- /**
- * @param Language $langobj
- */
- public function __construct( Language $langobj ) {
- $variants = [ 'tg', 'tg-latn' ];
- parent::__construct( $langobj, 'tg', $variants );
- }
-
- protected function loadDefaultTables() {
- $this->mTables = [
- 'tg-latn' => new ReplacementArray( $this->table ),
- 'tg' => new ReplacementArray()
- ];
- }
-}
diff --git a/languages/classes/LanguageUz.php b/languages/classes/LanguageUz.php
deleted file mode 100644
index ae822efd680b..000000000000
--- a/languages/classes/LanguageUz.php
+++ /dev/null
@@ -1,138 +0,0 @@
-<?php
-/**
- * Uzbek specific code.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- * @ingroup Language
- */
-
-/**
- * @ingroup Language
- */
-class UzConverter extends LanguageConverter {
- public $toLatin = [
- 'а' => 'a', 'А' => 'A',
- 'б' => 'b', 'Б' => 'B',
- 'д' => 'd', 'Д' => 'D',
- 'е' => 'e', 'Е' => 'E',
- 'э' => 'e', 'Э' => 'E',
- 'в' => 'v', 'В' => 'V',
- 'х' => 'x', 'Х' => 'X',
- 'ғ' => 'gʻ', 'Ғ' => 'Gʻ',
- 'г' => 'g', 'Г' => 'G',
- 'ҳ' => 'h', 'Ҳ' => 'H',
- 'ж' => 'j', 'Ж' => 'J',
- 'з' => 'z', 'З' => 'Z',
- 'и' => 'i', 'И' => 'I',
- 'к' => 'k', 'К' => 'K',
- 'л' => 'l', 'Л' => 'L',
- 'м' => 'm', 'М' => 'M',
- 'н' => 'n', 'Н' => 'N',
- 'о' => 'o', 'О' => 'O',
- 'п' => 'p', 'П' => 'P',
- 'р' => 'r', 'Р' => 'R',
- 'с' => 's', 'С' => 'S',
- 'т' => 't', 'Т' => 'T',
- 'у' => 'u', 'У' => 'U',
- 'ф' => 'f', 'Ф' => 'F',
- 'ў' => 'oʻ', 'Ў' => 'Oʻ',
- // note: at the beginning of a word and right after a consonant, only "s" is used
- 'ц' => 'ts', 'Ц' => 'Ts',
- 'қ' => 'q', 'Қ' => 'Q',
- 'ё' => 'yo', 'Ё' => 'Yo',
- 'ю' => 'yu', 'Ю' => 'Yu',
- 'ч' => 'ch', 'Ч' => 'Ch',
- 'ш' => 'sh', 'Ш' => 'Sh',
- 'й' => 'y', 'Й' => 'Y',
- 'я' => 'ya', 'Я' => 'Ya',
- 'ъ' => 'ʼ',
- ];
-
- public $toCyrillic = [
- 'a' => 'а', 'A' => 'А',
- 'b' => 'б', 'B' => 'Б',
- 'd' => 'д', 'D' => 'Д',
- // at the beginning of a word and after a vowel, "э" is used instead of "e"
- // (see regex below)
- 'e' => 'э', 'E' => 'Э',
- 'f' => 'ф', 'F' => 'Ф',
- 'g' => 'г', 'G' => 'Г',
- 'g‘' => 'ғ', 'G‘' => 'Ғ', 'gʻ' => 'ғ', 'Gʻ' => 'Ғ',
- 'h' => 'ҳ', 'H' => 'Ҳ',
- 'i' => 'и', 'I' => 'И',
- 'k' => 'к', 'K' => 'К',
- 'l' => 'л', 'L' => 'Л',
- 'm' => 'м', 'M' => 'М',
- 'n' => 'н', 'N' => 'Н',
- 'o' => 'о', 'O' => 'О',
- 'p' => 'п', 'P' => 'П',
- 'r' => 'р', 'R' => 'Р',
- 's' => 'с', 'S' => 'С',
- 't' => 'т', 'T' => 'Т',
- 'u' => 'у', 'U' => 'У',
- 'v' => 'в', 'V' => 'В',
- 'x' => 'х', 'X' => 'Х',
- 'z' => 'з', 'Z' => 'З',
- 'j' => 'ж', 'J' => 'Ж',
- 'o‘' => 'ў', 'O‘' => 'Ў', 'oʻ' => 'ў', 'Oʻ' => 'Ў',
- 'yo‘' => 'йў', 'Yo‘' => 'Йў', 'yoʻ' => 'йў', 'Yoʻ' => 'Йў',
- 'ts' => 'ц', 'Ts' => 'Ц',
- 'q' => 'қ', 'Q' => 'Қ',
- 'yo' => 'ё', 'Yo' => 'Ё',
- 'yu' => 'ю', 'Yu' => 'Ю',
- 'ch' => 'ч', 'Ch' => 'Ч',
- 'sh' => 'ш', 'Sh' => 'Ш',
- 'y' => 'й', 'Y' => 'Й',
- 'ya' => 'я', 'Ya' => 'Я',
- 'ʼ' => 'ъ',
- ];
-
- /**
- * @param Language $langobj
- */
- public function __construct( Language $langobj ) {
- $variants = [ 'uz', 'uz-latn', 'uz-cyrl' ];
- $variantfallbacks = [
- 'uz' => 'uz-latn',
- 'uz-cyrl' => 'uz',
- 'uz-latn' => 'uz',
- ];
- parent::__construct( $langobj, 'uz', $variants, $variantfallbacks );
- }
-
- protected function loadDefaultTables() {
- $this->mTables = [
- 'uz-cyrl' => new ReplacementArray( $this->toCyrillic ),
- 'uz-latn' => new ReplacementArray( $this->toLatin ),
- 'uz' => new ReplacementArray()
- ];
- }
-
- public function translate( $text, $toVariant ) {
- if ( $toVariant == 'uz-cyrl' ) {
- $text = str_replace( 'ye', 'е', $text );
- $text = str_replace( 'Ye', 'Е', $text );
- $text = str_replace( 'YE', 'Е', $text );
- // "е" after consonants, otherwise "э" (see above)
- $text = preg_replace( '/([BVGDJZYKLMNPRSTFXCWQʻ‘H])E/u', '$1Е', $text );
- $text = preg_replace( '/([bvgdjzyklmnprstfxcwqʻ‘h])e/ui', '$1е', $text );
- }
- return parent::translate( $text, $toVariant );
- }
-
-}
diff --git a/languages/classes/LanguageZh.php b/languages/classes/LanguageZh.php
index 9adcb582eb31..9de4965320ed 100644
--- a/languages/classes/LanguageZh.php
+++ b/languages/classes/LanguageZh.php
@@ -22,109 +22,6 @@
*/
/**
- * @ingroup Language
- */
-class ZhConverter extends LanguageConverter {
- /**
- * @param Language $langobj
- */
- public function __construct( Language $langobj ) {
- $this->mDescCodeSep = ':';
- $this->mDescVarSep = ';';
-
- $variants = [
- 'zh',
- 'zh-hans',
- 'zh-hant',
- 'zh-cn',
- 'zh-hk',
- 'zh-mo',
- 'zh-my',
- 'zh-sg',
- 'zh-tw'
- ];
-
- $variantfallbacks = [
- 'zh' => [ 'zh-hans', 'zh-hant', 'zh-cn', 'zh-tw', 'zh-hk', 'zh-sg', 'zh-mo', 'zh-my' ],
- 'zh-hans' => [ 'zh-cn', 'zh-sg', 'zh-my' ],
- 'zh-hant' => [ 'zh-tw', 'zh-hk', 'zh-mo' ],
- 'zh-cn' => [ 'zh-hans', 'zh-sg', 'zh-my' ],
- 'zh-sg' => [ 'zh-hans', 'zh-cn', 'zh-my' ],
- 'zh-my' => [ 'zh-hans', 'zh-sg', 'zh-cn' ],
- 'zh-tw' => [ 'zh-hant', 'zh-hk', 'zh-mo' ],
- 'zh-hk' => [ 'zh-hant', 'zh-mo', 'zh-tw' ],
- 'zh-mo' => [ 'zh-hant', 'zh-hk', 'zh-tw' ],
- ];
- $ml = [
- 'zh' => 'disable',
- 'zh-hans' => 'unidirectional',
- 'zh-hant' => 'unidirectional',
- ];
-
- parent::__construct( $langobj, 'zh',
- $variants,
- $variantfallbacks,
- [],
- $ml );
- $names = [
- 'zh' => '原文',
- 'zh-hans' => '简体',
- 'zh-hant' => '繁體',
- 'zh-cn' => '大陆',
- 'zh-tw' => '台灣',
- 'zh-hk' => '香港',
- 'zh-mo' => '澳門',
- 'zh-sg' => '新加坡',
- 'zh-my' => '大马',
- ];
- $this->mVariantNames = array_merge( $this->mVariantNames, $names );
- }
-
- protected function loadDefaultTables() {
- $this->mTables = [
- 'zh-hans' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2Hans ),
- 'zh-hant' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2Hant ),
- 'zh-cn' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2CN ),
- 'zh-hk' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2HK ),
- 'zh-mo' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2HK ),
- 'zh-my' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2CN ),
- 'zh-sg' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2CN ),
- 'zh-tw' => new ReplacementArray( MediaWiki\Languages\Data\ZhConversion::$zh2TW ),
- 'zh' => new ReplacementArray
- ];
- }
-
- protected function postLoadTables() {
- $this->mTables['zh-cn']->setArray(
- $this->mTables['zh-cn']->getArray() + $this->mTables['zh-hans']->getArray()
- );
- $this->mTables['zh-hk']->setArray(
- $this->mTables['zh-hk']->getArray() + $this->mTables['zh-hant']->getArray()
- );
- $this->mTables['zh-mo']->setArray(
- $this->mTables['zh-mo']->getArray() + $this->mTables['zh-hant']->getArray()
- );
- $this->mTables['zh-my']->setArray(
- $this->mTables['zh-my']->getArray() + $this->mTables['zh-hans']->getArray()
- );
- $this->mTables['zh-sg']->setArray(
- $this->mTables['zh-sg']->getArray() + $this->mTables['zh-hans']->getArray()
- );
- $this->mTables['zh-tw']->setArray(
- $this->mTables['zh-tw']->getArray() + $this->mTables['zh-hant']->getArray()
- );
- }
-
- /**
- * @param string $key
- * @return string
- */
- public function convertCategoryKey( $key ) {
- return $this->autoConvert( $key, 'zh' );
- }
-}
-
-/**
* class that handles both Traditional and Simplified Chinese
* right now it only distinguish zh_hans, zh_hant, zh_cn, zh_tw, zh_sg and zh_hk.
*
@@ -174,7 +71,7 @@ class LanguageZh extends LanguageZh_hans {
public function convertForSearchResult( $termsArray ) {
$terms = implode( '|', $termsArray );
$terms = self::convertDoubleWidth( $terms );
- $terms = implode( '|', $this->mConverter->autoConvertToAllVariants( $terms ) );
+ $terms = implode( '|', $this->getConverter()->autoConvertToAllVariants( $terms ) );
$ret = array_unique( explode( '|', $terms ) );
return $ret;
}