diff options
author | Amir Sarabadani <ladsgroup@gmail.com> | 2023-11-07 22:22:36 +0100 |
---|---|---|
committer | James D. Forrester <jforrester@wikimedia.org> | 2023-11-20 10:31:16 -0500 |
commit | beb3261b8d4cf52edc286b43381e8e92aca7ac09 (patch) | |
tree | 20fc1cd06d59803b16441143be4d9f8b80a7a0c5 /includes | |
parent | 8c51e4b03c34483db81930ef054b94942f224ce3 (diff) | |
download | mediawikicore-beb3261b8d4cf52edc286b43381e8e92aca7ac09.tar.gz mediawikicore-beb3261b8d4cf52edc286b43381e8e92aca7ac09.zip |
Remove language coverter for Kazakh
This has been constantly mentioned as buggy and broken and there is no
official version of latin or Arabic (see the ticket for more details).
This can be turned back as an extension if needed by third party users.
Bug: T350684
Bug: T268143
Depends-On: I6180dca2c49b3119751766268acc56087aaf8414
Change-Id: Ifbf3c8954d885daf891f8d9efc11743d898302f0
Diffstat (limited to 'includes')
-rw-r--r-- | includes/language/LanguageConverter.php | 2 | ||||
-rw-r--r-- | includes/language/LanguageConverterFactory.php | 4 | ||||
-rw-r--r-- | includes/language/converters/KkConverter.php | 353 | ||||
-rw-r--r-- | includes/languages/LanguageKk.php | 50 | ||||
-rw-r--r-- | includes/languages/LanguageKk_cyrl.php | 486 |
5 files changed, 1 insertions, 894 deletions
diff --git a/includes/language/LanguageConverter.php b/includes/language/LanguageConverter.php index 192bf8338bff..5cadb8defb3a 100644 --- a/includes/language/LanguageConverter.php +++ b/includes/language/LanguageConverter.php @@ -55,7 +55,6 @@ abstract class LanguageConverter implements ILanguageConverter { 'crh', 'gan', 'iu', - 'kk', 'ku', 'sh', 'shi', @@ -79,7 +78,6 @@ abstract class LanguageConverter implements ILanguageConverter { 'crh' => 'crh', 'gan' => 'gan', 'iu' => 'iu', - 'kk' => 'kk', 'ku' => 'ku', 'sh' => 'sh-latn', 'shi' => 'shi', diff --git a/includes/language/LanguageConverterFactory.php b/includes/language/LanguageConverterFactory.php index 29064f1fcdd9..05a0f2dc7436 100644 --- a/includes/language/LanguageConverterFactory.php +++ b/includes/language/LanguageConverterFactory.php @@ -26,7 +26,6 @@ use EnConverter; use GanConverter; use ILanguageConverter; use IuConverter; -use KkConverter; use KuConverter; use Language; use MediaWiki\StubObject\StubUserLang; @@ -66,9 +65,6 @@ class LanguageConverterFactory { 'iu' => [ 'class' => IuConverter::class, ], - 'kk' => [ - 'class' => KkConverter::class, - ], 'ku' => [ 'class' => KuConverter::class, ], diff --git a/includes/language/converters/KkConverter.php b/includes/language/converters/KkConverter.php deleted file mode 100644 index 0894d8daf7f5..000000000000 --- a/includes/language/converters/KkConverter.php +++ /dev/null @@ -1,353 +0,0 @@ -<?php -/** - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - * http://www.gnu.org/copyleft/gpl.html - * - * @file - */ - -define( 'KK_C_UC', 'АӘБВГҒДЕЁЖЗИЙКҚЛМНҢОӨПРСТУҰҮФХҺЦЧШЩЪЫІЬЭЮЯ' ); # Kazakh Cyrillic uppercase -define( 'KK_C_LC', 'аәбвгғдеёжзийкқлмнңоөпрстуұүфхһцчшщъыіьэюя' ); # Kazakh Cyrillic lowercase -define( 'KK_L_UC', 'AÄBCÇDEÉFGĞHIİÏJKLMNÑOÖPQRSŞTUÜVWXYÝZ' ); # Kazakh Latin uppercase -define( 'KK_L_LC', 'aäbcçdeéfgğhıiïjklmnñoöpqrsştuüvwxyýz' ); # Kazakh Latin lowercase -// define( 'KK_A', 'ٴابپتجحدرزسشعفقكلمنڭەوۇۋۆىيچھ' ); # Kazakh Arabic -define( 'H_HAMZA', 'ٴ' ); # U+0674 ARABIC LETTER HIGH HAMZA -// define( 'ZWNJ', '' ); # U+200C ZERO WIDTH NON-JOINER - -/** - * Kazakh (Қазақша) converter routines - * - * @ingroup Languages - */ -class KkConverter extends LanguageConverterSpecific { - - public function getMainCode(): string { - return 'kk'; - } - - public function getLanguageVariants(): array { - return [ - 'kk', - 'kk-cyrl', - 'kk-latn', - 'kk-arab', - 'kk-kz', - 'kk-tr', - 'kk-cn' - ]; - } - - public function getVariantsFallbacks(): array { - return [ - 'kk' => 'kk-cyrl', - 'kk-cyrl' => 'kk', - 'kk-latn' => 'kk', - 'kk-arab' => 'kk', - 'kk-kz' => 'kk-cyrl', - 'kk-tr' => 'kk-latn', - 'kk-cn' => 'kk-arab' - ]; - } - - protected function loadDefaultTables(): array { - // require __DIR__."/../../includes/KkConversion.php"; - // Placeholder for future implementing. Remove the variables declarations - // after generating KkConversion.php - $kk2Cyrl = []; - $kk2Latn = []; - $kk2Arab = []; - $kk2KZ = []; - $kk2TR = []; - $kk2CN = []; - - return [ - 'kk-cyrl' => new ReplacementArray( $kk2Cyrl ), - 'kk-latn' => new ReplacementArray( $kk2Latn ), - 'kk-arab' => new ReplacementArray( $kk2Arab ), - 'kk-kz' => new ReplacementArray( array_merge( $kk2Cyrl, $kk2KZ ) ), - 'kk-tr' => new ReplacementArray( array_merge( $kk2Latn, $kk2TR ) ), - 'kk-cn' => new ReplacementArray( array_merge( $kk2Arab, $kk2CN ) ), - 'kk' => new ReplacementArray() - ]; - } - - protected function postLoadTables( &$tables ) { - $tables['kk-kz']->merge( $tables['kk-cyrl'] ); - $tables['kk-tr']->merge( $tables['kk-latn'] ); - $tables['kk-cn']->merge( $tables['kk-arab'] ); - } - - /** - * Return cyrillic to latin reg conversion table - * @since 1.36 - * - * @return array - */ - protected function getMCyrl2Latn(): array { - return [ - # # Punctuation - '/№/u' => 'No.', - # # Е after vowels - '/([АӘЕЁИОӨҰҮЭЮЯЪЬ])Е/u' => '$1YE', - '/([АӘЕЁИОӨҰҮЭЮЯЪЬ])е/ui' => '$1ye', - # # leading ЁЮЯЩ - '/^Ё([' . KK_C_UC . ']|$)/u' => 'YO$1', '/^Ё([' . KK_C_LC . ']|$)/u' => 'Yo$1', - '/^Ю([' . KK_C_UC . ']|$)/u' => 'YU$1', '/^Ю([' . KK_C_LC . ']|$)/u' => 'Yu$1', - '/^Я([' . KK_C_UC . ']|$)/u' => 'YA$1', '/^Я([' . KK_C_LC . ']|$)/u' => 'Ya$1', - '/^Щ([' . KK_C_UC . ']|$)/u' => 'ŞÇ$1', '/^Щ([' . KK_C_LC . ']|$)/u' => 'Şç$1', - # # other ЁЮЯ - '/Ё/u' => 'YO', '/ё/u' => 'yo', - '/Ю/u' => 'YU', '/ю/u' => 'yu', - '/Я/u' => 'YA', '/я/u' => 'ya', - '/Щ/u' => 'ŞÇ', '/щ/u' => 'şç', - # # soft and hard signs - '/[ъЪ]/u' => 'ʺ', '/[ьЬ]/u' => 'ʹ', - # # other characters - '/А/u' => 'A', '/а/u' => 'a', '/Ә/u' => 'Ä', '/ә/u' => 'ä', - '/Б/u' => 'B', '/б/u' => 'b', '/В/u' => 'V', '/в/u' => 'v', - '/Г/u' => 'G', '/г/u' => 'g', '/Ғ/u' => 'Ğ', '/ғ/u' => 'ğ', - '/Д/u' => 'D', '/д/u' => 'd', '/Е/u' => 'E', '/е/u' => 'e', - '/Ж/u' => 'J', '/ж/u' => 'j', '/З/u' => 'Z', '/з/u' => 'z', - '/И/u' => 'Ï', '/и/u' => 'ï', '/Й/u' => 'Ý', '/й/u' => 'ý', - '/К/u' => 'K', '/к/u' => 'k', '/Қ/u' => 'Q', '/қ/u' => 'q', - '/Л/u' => 'L', '/л/u' => 'l', '/М/u' => 'M', '/м/u' => 'm', - '/Н/u' => 'N', '/н/u' => 'n', '/Ң/u' => 'Ñ', '/ң/u' => 'ñ', - '/О/u' => 'O', '/о/u' => 'o', '/Ө/u' => 'Ö', '/ө/u' => 'ö', - '/П/u' => 'P', '/п/u' => 'p', '/Р/u' => 'R', '/р/u' => 'r', - '/С/u' => 'S', '/с/u' => 's', '/Т/u' => 'T', '/т/u' => 't', - '/У/u' => 'W', '/у/u' => 'w', '/Ұ/u' => 'U', '/ұ/u' => 'u', - '/Ү/u' => 'Ü', '/ү/u' => 'ü', '/Ф/u' => 'F', '/ф/u' => 'f', - '/Х/u' => 'X', '/х/u' => 'x', '/Һ/u' => 'H', '/һ/u' => 'h', - '/Ц/u' => 'C', '/ц/u' => 'c', '/Ч/u' => 'Ç', '/ч/u' => 'ç', - '/Ш/u' => 'Ş', '/ш/u' => 'ş', '/Ы/u' => 'I', '/ы/u' => 'ı', - '/І/u' => 'İ', '/і/u' => 'i', '/Э/u' => 'É', '/э/u' => 'é', - ]; - } - - /** - * Return latin to cyrillic reg conversion table - * @since 1.36 - * - * @return array - */ - protected function getMLatn2Cyrl(): array { - return [ - # # Punctuation - '/#|No\./' => '№', - # # Şç - '/ŞÇʹ/u' => 'ЩЬ', '/Şçʹ/u' => 'Щь', - '/Ş[Çç]/u' => 'Щ', '/şç/u' => 'щ', - # # soft and hard signs - '/([' . KK_L_UC . '])ʺ([' . KK_L_UC . '])/u' => '$1Ъ$2', - '/ʺ([' . KK_L_LC . '])/u' => 'ъ$1', - '/([' . KK_L_UC . '])ʹ([' . KK_L_UC . '])/u' => '$1Ь$2', - '/ʹ([' . KK_L_LC . '])/u' => 'ь$1', - '/ʺ/u' => 'ъ', - '/ʹ/u' => 'ь', - # # Ye Yo Yu Ya. - '/Y[Ee]/u' => 'Е', '/ye/u' => 'е', - '/Y[Oo]/u' => 'Ё', '/yo/u' => 'ё', - '/Y[UWuw]/u' => 'Ю', '/y[uw]/u' => 'ю', - '/Y[Aa]/u' => 'Я', '/ya/u' => 'я', - # # other characters - '/A/u' => 'А', '/a/u' => 'а', '/Ä/u' => 'Ә', '/ä/u' => 'ә', - '/B/u' => 'Б', '/b/u' => 'б', '/C/u' => 'Ц', '/c/u' => 'ц', - '/Ç/u' => 'Ч', '/ç/u' => 'ч', '/D/u' => 'Д', '/d/u' => 'д', - '/E/u' => 'Е', '/e/u' => 'е', '/É/u' => 'Э', '/é/u' => 'э', - '/F/u' => 'Ф', '/f/u' => 'ф', '/G/u' => 'Г', '/g/u' => 'г', - '/Ğ/u' => 'Ғ', '/ğ/u' => 'ғ', '/H/u' => 'Һ', '/h/u' => 'һ', - '/I/u' => 'Ы', '/ı/u' => 'ы', '/İ/u' => 'І', '/i/u' => 'і', - '/Ï/u' => 'И', '/ï/u' => 'и', '/J/u' => 'Ж', '/j/u' => 'ж', - '/K/u' => 'К', '/k/u' => 'к', '/L/u' => 'Л', '/l/u' => 'л', - '/M/u' => 'М', '/m/u' => 'м', '/N/u' => 'Н', '/n/u' => 'н', - '/Ñ/u' => 'Ң', '/ñ/u' => 'ң', '/O/u' => 'О', '/o/u' => 'о', - '/Ö/u' => 'Ө', '/ö/u' => 'ө', '/P/u' => 'П', '/p/u' => 'п', - '/Q/u' => 'Қ', '/q/u' => 'қ', '/R/u' => 'Р', '/r/u' => 'р', - '/S/u' => 'С', '/s/u' => 'с', '/Ş/u' => 'Ш', '/ş/u' => 'ш', - '/T/u' => 'Т', '/t/u' => 'т', '/U/u' => 'Ұ', '/u/u' => 'ұ', - '/Ü/u' => 'Ү', '/ü/u' => 'ү', '/V/u' => 'В', '/v/u' => 'в', - '/W/u' => 'У', '/w/u' => 'у', '/Ý/u' => 'Й', '/ý/u' => 'й', - '/X/u' => 'Х', '/x/u' => 'х', '/Z/u' => 'З', '/z/u' => 'з', - ]; - } - - /** - * Return latin or cyrillic to arab reg conversion table. - * @since 1.36 - * - * @return array - */ - public function getMCyLa2Arab() { - return [ - # # Punctuation -> Arabic - '/#|№|No\./u' => '', # U+0600 - '/\,/' => '،', # U+060C - '/;/' => '؛', # U+061B - '/\?/' => '؟', # U+061F - '/%/' => '٪', # U+066A - '/\*/' => '٭', # U+066D - # # Digits -> Arabic - '/0/' => '۰', # U+06F0 - '/1/' => '۱', # U+06F1 - '/2/' => '۲', # U+06F2 - '/3/' => '۳', # U+06F3 - '/4/' => '۴', # U+06F4 - '/5/' => '۵', # U+06F5 - '/6/' => '۶', # U+06F6 - '/7/' => '۷', # U+06F7 - '/8/' => '۸', # U+06F8 - '/9/' => '۹', # U+06F9 - # # Cyrillic -> Arabic - '/Аллаһ/ui' => 'ﷲ', - '/([АӘЕЁИОӨҰҮЭЮЯЪЬ])е/ui' => '$1يە', - '/[еэ]/ui' => 'ە', '/[ъь]/ui' => '', - '/[аә]/ui' => 'ا', '/[оө]/ui' => 'و', '/[ұү]/ui' => 'ۇ', '/[ыі]/ui' => 'ى', - '/[и]/ui' => 'ىي', '/ё/ui' => 'يو', '/ю/ui' => 'يۋ', '/я/ui' => 'يا', '/[й]/ui' => 'ي', - '/ц/ui' => 'تس', '/щ/ui' => 'شش', - '/һ/ui' => 'ح', '/ч/ui' => 'تش', - # '/һ/ui' => 'ھ', '/ч/ui' => 'چ', - '/б/ui' => 'ب', '/в/ui' => 'ۆ', '/г/ui' => 'گ', '/ғ/ui' => 'ع', - '/д/ui' => 'د', '/ж/ui' => 'ج', '/з/ui' => 'ز', '/к/ui' => 'ك', - '/қ/ui' => 'ق', '/л/ui' => 'ل', '/м/ui' => 'م', '/н/ui' => 'ن', - '/ң/ui' => 'ڭ', '/п/ui' => 'پ', '/р/ui' => 'ر', '/с/ui' => 'س', - '/т/ui' => 'ت', '/у/ui' => 'ۋ', '/ф/ui' => 'ف', '/х/ui' => 'ح', - '/ш/ui' => 'ش', - # # Latin -> Arabic // commented for now... - /*'/Allah/ui' => 'ﷲ', - '/[eé]/ui' => 'ە', '/[yý]/ui' => 'ي', '/[ʺʹ]/ui' => '', - '/[aä]/ui' => 'ا', '/[oö]/ui' => 'و', '/[uü]/ui' => 'ۇ', - '/[ï]/ui' => 'ىي', '/[ıIiİ]/u' => 'ى', - '/c/ui' => 'تس', - '/ç/ui' => 'تش', '/h/ui' => 'ح', - #'/ç/ui' => 'چ', '/h/ui' => 'ھ', - '/b/ui' => 'ب','/d/ui' => 'د', - '/f/ui' => 'ف', '/g/ui' => 'گ', '/ğ/ui' => 'ع', - '/j/ui' => 'ج', '/k/ui' => 'ك', '/l/ui' => 'ل', '/m/ui' => 'م', - '/n/ui' => 'ن', '/ñ/ui' => 'ڭ', '/p/ui' => 'پ', '/q/ui' => 'ق', - '/r/ui' => 'ر', '/s/ui' => 'س', '/ş/ui' => 'ش', '/t/ui' => 'ت', - '/v/ui' => 'ۆ', '/w/ui' => 'ۋ', '/x/ui' => 'ح', '/z/ui' => 'ز',*/ - ]; - } - - public function translate( $text, $toVariant ) { - $text = parent::translate( $text, $toVariant ); - - switch ( $toVariant ) { - case 'kk-cyrl': - case 'kk-kz': - $letters = KK_L_UC . KK_L_LC . 'ʺʹ#0123456789'; - break; - - case 'kk-latn': - case 'kk-tr': - $letters = KK_C_UC . KK_C_LC . '№0123456789'; - break; - - case 'kk-arab': - case 'kk-cn': - $letters = KK_C_UC . KK_C_LC . /*KK_L_UC.KK_L_LC.'ʺʹ'.*/',;\?%\*№0123456789'; - break; - - default: - return $text; - } - // disable conversion variables like $1, $2... - $varsfix = '\$[0-9]'; - - $matches = preg_split( - '/' . $varsfix . '[^' . $letters . ']+/u', - $text, - -1, - PREG_SPLIT_OFFSET_CAPTURE - ); - - $mstart = 0; - $ret = ''; - - foreach ( $matches as $m ) { - $ret .= substr( $text, $mstart, (int)$m[1] - $mstart ); - $ret .= $this->regsConverter( $m[0], $toVariant ); - $mstart = (int)$m[1] + strlen( $m[0] ); - } - - return $ret; - } - - /** - * @param string $text - * @param string $toVariant - * @return mixed|string - */ - private function regsConverter( $text, $toVariant ) { - if ( $text == '' ) { - return $text; - } - - switch ( $toVariant ) { - case 'kk-arab': - case 'kk-cn': - $letters = KK_C_LC . KK_C_UC; /*.KK_L_LC.KK_L_UC*/ - $front = 'әөүіӘӨҮІ'; /*.'äöüiÄÖÜİ'*/ - $excludes = 'еэгғкқЕЭГҒКҚ'; /*.'eégğkqEÉGĞKQ'*/ - // split text to words - $matches = preg_split( '/[\b\s\-\.:]+/', $text, -1, PREG_SPLIT_OFFSET_CAPTURE ); - $mstart = 0; - $ret = ''; - foreach ( $matches as $m ) { - $ret .= substr( $text, $mstart, (int)$m[1] - $mstart ); - // is matched the word to front vowels? - // exclude a words matched to е, э, г, к, к, қ, - // them should be without hamza - if ( preg_match( '/[' . $front . ']/u', $m[0] ) && - !preg_match( '/[' . $excludes . ']/u', $m[0] ) - ) { - $ret .= preg_replace( '/[' . $letters . ']+/u', H_HAMZA . '$0', $m[0] ); - } else { - $ret .= $m[0]; - } - $mstart = (int)$m[1] + strlen( $m[0] ); - } - $text =& $ret; - $mCyLa2Arab = $this->getMCyLa2Arab(); - foreach ( $mCyLa2Arab as $pat => $rep ) { - $text = preg_replace( $pat, $rep, $text ); - } - return $text; - - case 'kk-latn': - case 'kk-tr': - $mCyrl2Latn = $this->getMCyrl2Latn(); - foreach ( $mCyrl2Latn as $pat => $rep ) { - $text = preg_replace( $pat, $rep, $text ); - } - return $text; - - case 'kk-cyrl': - case 'kk-kz': - $mLatn2Cyrl = $this->getMLatn2Cyrl(); - foreach ( $mLatn2Cyrl as $pat => $rep ) { - $text = preg_replace( $pat, $rep, $text ); - } - return $text; - - default: - return $text; - } - } - - public function convertCategoryKey( $key ) { - return $this->autoConvert( $key, 'kk' ); - } -} diff --git a/includes/languages/LanguageKk.php b/includes/languages/LanguageKk.php index 9576cc1e96d1..b2fe443e3814 100644 --- a/includes/languages/LanguageKk.php +++ b/includes/languages/LanguageKk.php @@ -28,59 +28,11 @@ * @ingroup Languages */ class LanguageKk extends LanguageKk_cyrl { - /** - * Fixes an issue with ucfirst for transforming 'i' to 'İ' - * - * @inheritDoc - */ - public function ucfirst( $str ) { - if ( substr( $str, 0, 1 ) === 'i' ) { - $variant = $this->getConverterInternal()->getPreferredVariant(); - if ( $variant == 'kk-latn' || $variant == 'kk-tr' ) { - return 'İ' . substr( $str, 1 ); - } - } - return parent::ucfirst( $str ); - } - - /** - * Fixes issue with lcfirst for transforming 'I' to 'ı' - * - * @inheritDoc - */ - public function lcfirst( $str ) { - if ( substr( $str, 0, 1 ) === 'I' ) { - $variant = $this->getConverterInternal()->getPreferredVariant(); - if ( $variant == 'kk-latn' || $variant == 'kk-tr' ) { - return 'ı' . substr( $str, 1 ); - } - } - return parent::lcfirst( $str ); - } - public function convertGrammar( $word, $case ) { // T277689: If there's no word, then there's nothing to convert. if ( $word === '' ) { return ''; } - - $variant = $this->getConverterInternal()->getPreferredVariant(); - switch ( $variant ) { - case 'kk-arab': - case 'kk-cn': - $word = parent::convertGrammarKk_arab( $word, $case ); - break; - case 'kk-latn': - case 'kk-tr': - $word = parent::convertGrammarKk_latn( $word, $case ); - break; - case 'kk-cyrl': - case 'kk-kz': - case 'kk': - default: - $word = parent::convertGrammarKk_cyrl( $word, $case ); - } - - return $word; + return parent::convertGrammarKk_cyrl( $word, $case ); } } diff --git a/includes/languages/LanguageKk_cyrl.php b/includes/languages/LanguageKk_cyrl.php index 6cc4fd4905f5..8dedc3afbae0 100644 --- a/includes/languages/LanguageKk_cyrl.php +++ b/includes/languages/LanguageKk_cyrl.php @@ -280,492 +280,6 @@ class LanguageKk_cyrl extends Language { /** * @param string $word - * @param string $case - * @return string - */ - protected function convertGrammarKk_latn( $word, $case ) { - $grammarForms = - MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::GrammarForms ); - if ( isset( $grammarForms['kk-tr'][$case][$word] ) ) { - return $grammarForms['kk-tr'][$case][$word]; - } - if ( isset( $grammarForms['kk-latn'][$case][$word] ) ) { - return $grammarForms['kk-latn'][$case][$word]; - } - // Set up some constants... - // Vowels in last syllable - $frontVowels = [ "e", "ö", "ü", "i", "ä", "é" ]; - $backVowels = [ "a", "o", "u", "ı" ]; - $allVowels = [ "e", "ö", "ü", "i", "ä", "é", "a", "o", "u", "ı" ]; - // Preceding letters - $Nasals = [ "m", "n", "ñ" ]; - $Sonants = [ "ï", "y", "ý", "l", "r", "w" ]; - $Consonants = [ "p", "f", "k", "q", "t", "ş", "s", "x", "c", "ç", "b", "v", "g", "d" ]; - $Sibilants = [ "j", "z" ]; - $Sonorants = [ "ï", "y", "ý", "l", "r", "w", "m", "n", "ñ", "j", "z" ]; - - // Possessives - $firstPerson = [ "m", "ñ" ]; // 1st singular, 2nd informal - $secondPerson = [ "z" ]; // 1st plural, 2nd formal - $thirdPerson = [ "ı", "i" ]; // 3rd - - [ $wordEnding, $wordLastVowel ] = $this->lastLetter( $word, $allVowels ); - - // Now convert the word - switch ( $case ) { - case "dc1": - case "genitive": # ilik - if ( in_array( $wordEnding, $Consonants ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "tiñ"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "tıñ"; - } - } elseif ( in_array( $wordEnding, $allVowels ) || in_array( $wordEnding, $Nasals ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "niñ"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "nıñ"; - } - } elseif ( in_array( $wordEnding, $Sonants ) || in_array( $wordEnding, $Sibilants ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "diñ"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "dıñ"; - } - } - break; - - case "dc2": - case "dative": # barıs - if ( in_array( $wordEnding, $Consonants ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "ke"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "qa"; - } - } elseif ( in_array( $wordEnding, $allVowels ) || in_array( $wordEnding, $Sonorants ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "ge"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "ğa"; - } - } - break; - - case "dc21": - case "possessive dative": # täweldık + barıs - if ( in_array( $wordEnding, $firstPerson ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "e"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "a"; - } - } elseif ( in_array( $wordEnding, $secondPerson ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "ge"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "ğa"; - } - } elseif ( in_array( $wordEnding, $thirdPerson ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "ne"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "na"; - } - } - break; - - case "dc3": - case "accusative": # tabıs - if ( in_array( $wordEnding, $Consonants ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "ti"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "tı"; - } - } elseif ( in_array( $wordEnding, $allVowels ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "ni"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "nı"; - } - } elseif ( in_array( $wordEnding, $Sonorants ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "di"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "dı"; - } - } - break; - - case "dc31": - case "possessive accusative": # täweldık + tabıs - if ( in_array( $wordEnding, $firstPerson ) || in_array( $wordEnding, $secondPerson ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "di"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "dı"; - } - } elseif ( in_array( $wordEnding, $thirdPerson ) ) { - $word .= "n"; - } - break; - - case "dc4": - case "locative": # jatıs - if ( in_array( $wordEnding, $Consonants ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "te"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "ta"; - } - } elseif ( in_array( $wordEnding, $allVowels ) || in_array( $wordEnding, $Sonorants ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "de"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "da"; - } - } - break; - - case "dc41": - case "possessive locative": # täweldık + jatıs - if ( in_array( $wordEnding, $firstPerson ) || in_array( $wordEnding, $secondPerson ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "de"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "da"; - } - } elseif ( in_array( $wordEnding, $thirdPerson ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "nde"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "nda"; - } - } - break; - - case "dc5": - case "ablative": # şığıs - if ( in_array( $wordEnding, $Consonants ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "ten"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "tan"; - } - } elseif ( in_array( $wordEnding, $allVowels ) - || in_array( $wordEnding, $Sonants ) - || in_array( $wordEnding, $Sibilants ) - ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "den"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "dan"; - } - } elseif ( in_array( $wordEnding, $Nasals ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "nen"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "nan"; - } - } - break; - - case "dc51": - case "possessive ablative": # täweldık + şığıs - if ( in_array( $wordEnding, $firstPerson ) || in_array( $wordEnding, $thirdPerson ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "nen"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "nan"; - } - } elseif ( in_array( $wordEnding, $secondPerson ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "den"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "dan"; - } - } - break; - - case "dc6": - case "comitative": # kömektes - if ( in_array( $wordEnding, $Consonants ) ) { - $word .= "pen"; - } elseif ( in_array( $wordEnding, $allVowels ) - || in_array( $wordEnding, $Nasals ) - || in_array( $wordEnding, $Sonants ) - ) { - $word .= "men"; - } elseif ( in_array( $wordEnding, $Sibilants ) ) { - $word .= "ben"; - } - break; - - case "dc61": - case "possessive comitative": # täweldık + kömektes - if ( in_array( $wordEnding, $Consonants ) ) { - $word .= "penen"; - } elseif ( in_array( $wordEnding, $allVowels ) - || in_array( $wordEnding, $Nasals ) - || in_array( $wordEnding, $Sonants ) - ) { - $word .= "menen"; - } elseif ( in_array( $wordEnding, $Sibilants ) ) { - $word .= "benen"; - } - break; - default: # dc0 #nominative #ataw - } - return $word; - } - - /** - * @param string $word - * @param string $case - * @return string - */ - protected function convertGrammarKk_arab( $word, $case ) { - $grammarForms = - MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::GrammarForms ); - if ( isset( $grammarForms['kk-cn'][$case][$word] ) ) { - return $grammarForms['kk-cn'][$case][$word]; - } - if ( isset( $grammarForms['kk-arab'][$case][$word] ) ) { - return $grammarForms['kk-arab'][$case][$word]; - } - // Set up some constants... - // Vowels in last syllable - $frontVowels = [ "ە", "ٶ", "ٷ", "ٸ", "ٵ", "ە" ]; - $backVowels = [ "ا", "و", "ۇ", "ى" ]; - $allVowels = [ "ە", "ٶ", "ٷ", "ٸ", "ٵ", "ە", "ا", "و", "ۇ", "ى" ]; - // Preceding letters - $Nasals = [ "م", "ن", "ڭ" ]; - $Sonants = [ "ي", "ي", "ل", "ر", "ۋ" ]; - $Consonants = [ "پ", "ف", "ك", "ق", "ت", "ش", "س", "ح", "تس", "چ", "ب", "ۆ", "گ", "د" ]; - $Sibilants = [ "ج", "ز" ]; - $Sonorants = [ "ي", "ي", "ل", "ر", "ۋ", "م", "ن", "ڭ", "ج", "ز" ]; - - // Possessives - $firstPerson = [ "م", "ڭ" ]; // 1st singular, 2nd informal - $secondPerson = [ "ز" ]; // 1st plural, 2nd formal - $thirdPerson = [ "ى", "ٸ" ]; // 3rd - - [ $wordEnding, $wordLastVowel ] = $this->lastLetter( $word, $allVowels ); - - // Now convert the word - switch ( $case ) { - case "dc1": - case "genitive": # ilik - if ( in_array( $wordEnding, $Consonants ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "تٸڭ"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "تىڭ"; - } - } elseif ( in_array( $wordEnding, $allVowels ) || in_array( $wordEnding, $Nasals ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "نٸڭ"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "نىڭ"; - } - } elseif ( in_array( $wordEnding, $Sonants ) || in_array( $wordEnding, $Sibilants ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "دٸڭ"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "دىڭ"; - } - } - break; - - case "dc2": - case "dative": # barıs - if ( in_array( $wordEnding, $Consonants ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "كە"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "قا"; - } - } elseif ( in_array( $wordEnding, $allVowels ) || in_array( $wordEnding, $Sonorants ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "گە"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "عا"; - } - } - break; - - case "dc21": - case "possessive dative": # täweldık + barıs - if ( in_array( $wordEnding, $firstPerson ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "ە"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "ا"; - } - } elseif ( in_array( $wordEnding, $secondPerson ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "گە"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "عا"; - } - } elseif ( in_array( $wordEnding, $thirdPerson ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "نە"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "نا"; - } - } - break; - case "dc3": - case "accusative": # tabıs - if ( in_array( $wordEnding, $Consonants ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "تٸ"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "تى"; - } - } elseif ( in_array( $wordEnding, $allVowels ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "نٸ"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "نى"; - } - } elseif ( in_array( $wordEnding, $Sonorants ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "دٸ"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "دى"; - } - } - break; - - case "dc31": - case "possessive accusative": # täweldık + tabıs - if ( in_array( $wordEnding, $firstPerson ) || in_array( $wordEnding, $secondPerson ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "دٸ"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "دى"; - } - } elseif ( in_array( $wordEnding, $thirdPerson ) ) { - $word .= "ن"; - } - break; - - case "dc4": - case "locative": # jatıs - if ( in_array( $wordEnding, $Consonants ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "تە"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "تا"; - } - } elseif ( in_array( $wordEnding, $allVowels ) || in_array( $wordEnding, $Sonorants ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "دە"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "دا"; - } - } - break; - - case "dc41": - case "possessive locative": # täweldık + jatıs - if ( in_array( $wordEnding, $firstPerson ) || in_array( $wordEnding, $secondPerson ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "دە"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "دا"; - } - } elseif ( in_array( $wordEnding, $thirdPerson ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "ندە"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "ندا"; - } - } - break; - - case "dc5": - case "ablative": # şığıs - if ( in_array( $wordEnding, $Consonants ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "تەن"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "تان"; - } - } elseif ( in_array( $wordEnding, $allVowels ) - || in_array( $wordEnding, $Sonants ) - || in_array( $wordEnding, $Sibilants ) - ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "دەن"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "دان"; - } - } elseif ( in_array( $wordEnding, $Nasals ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "نەن"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "نان"; - } - } - break; - - case "dc51": - case "possessive ablative": # täweldık + şığıs - if ( in_array( $wordEnding, $firstPerson ) || in_array( $wordEnding, $thirdPerson ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "نەن"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "نان"; - } - } elseif ( in_array( $wordEnding, $secondPerson ) ) { - if ( in_array( $wordLastVowel, $frontVowels ) ) { - $word .= "دەن"; - } elseif ( in_array( $wordLastVowel, $backVowels ) ) { - $word .= "دان"; - } - } - break; - - case "dc6": - case "comitative": # kömektes - if ( in_array( $wordEnding, $Consonants ) ) { - $word .= "پەن"; - } elseif ( in_array( $wordEnding, $allVowels ) - || in_array( $wordEnding, $Nasals ) - || in_array( $wordEnding, $Sonants ) - ) { - $word .= "مەن"; - } elseif ( in_array( $wordEnding, $Sibilants ) ) { - $word .= "بەن"; - } - break; - case "dc61": - case "possessive comitative": # täweldık + kömektes - if ( in_array( $wordEnding, $Consonants ) ) { - $word .= "پەنەن"; - } elseif ( in_array( $wordEnding, $allVowels ) - || in_array( $wordEnding, $Nasals ) - || in_array( $wordEnding, $Sonants ) - ) { - $word .= "مەنەن"; - } elseif ( in_array( $wordEnding, $Sibilants ) ) { - $word .= "بەنەن"; - } - break; - - default: # dc0 #nominative #ataw - break; - } - return $word; - } - - /** - * @param string $word * @param string[] $allVowels * @return array */ |