'ban', 'ban-x-dharma' => 'ban', 'ban-x-palmleaf' => 'ban', 'ban-x-pku' => 'ban', ]; } public function getVariantNames(): array { $names = [ 'ban' => 'Basa Bali', 'ban-bali' => 'ᬩᬲᬩᬮᬶ', 'ban-x-dharma' => 'Basa Bali (alih aksara DHARMA)', 'ban-x-palmleaf' => 'Basa Bali (alih aksara Palmleaf.org)', 'ban-x-pku' => 'Basa Bali (alih aksara Puri Kauhan Ubud)', ]; return array_merge( parent::getVariantNames(), $names ); } protected function getIcuRules() { $rules = []; # transliteration rules developed for Palmleaf.org $rules['ban-x-palmleaf'] = <<<'EOF' ::NFC; ᬒᬁ → \uE050; # OM ᬁ → \uE001; # SIGN ULU CANDRA ᬂ → \uE002; # SIGN CECEK ᬄ → \uE003; # SIGN BISAH ᬅ → \uE005; # LETTER AKARA ᬆ → \uE006; # LETTER AKARA TEDUNG ᬇ → \uE007; # LETTER IKARA ᬈ → \uE008; # LETTER IKARA TEDUNG ᬉ → \uE009; # LETTER UKARA ᬊ → \uE00A; # LETTER UKARA TEDUNG ᬋ → \uE00B; # LETTER RA REPA ᬌ → \uE060; # LETTER RA REPA TEDUNG ᬍ → \uE00C; # LETTER LA LENGA ᬎ → \uE061; # LETTER LA LENGA TEDUNG ᬏ → \uE00F; # LETTER EKARA ᬐ → \uE010; # LETTER AIKARA ᬑ → \uE013; # LETTER OKARA ᬒ → \uE014; # LETTER OKARA TEDUNG ᬓ → \uE015; # LETTER KA ᬔ → \uE016; # LETTER KA MAHAPRANA ᬕ → \uE017; # LETTER GA ᬖ → \uE018; # LETTER GA GORA ᬗ → \uE019; # LETTER NGA ᬘ → \uE01A; # LETTER CA ᬙ → \uE01B; # LETTER CA LACA ᬚ → \uE01C; # LETTER JA ᬛ → \uE01D; # LETTER JA JERA ᬜ → \uE01E; # LETTER NYA ᬝ → \uE01F; # LETTER TA LATIK ᬞ → \uE020; # LETTER TA MURDA MAHAPRANA ᬟ → \uE021; # LETTER DA MURDA ALPAPRANA ᬠ → \uE022; # LETTER DA MURDA MAHAPRANA ᬡ → \uE023; # LETTER NA RAMBAT ᬢ → \uE024; # LETTER TA ᬣ → \uE025; # LETTER TA TAWA ᬤ → \uE026; # LETTER DA ᬥ → \uE027; # LETTER DA MADU ᬦ → \uE028; # LETTER NA ᬧ → \uE02A; # LETTER PA ᬨ → \uE02B; # LETTER PA KAPAL ᬩ → \uE02C; # LETTER BA ᬪ → \uE02D; # LETTER BA KEMBANG ᬫ → \uE02E; # LETTER MA ᬬ → \uE02F; # LETTER YA ᬭ → \uE030; # LETTER RA ᬮ → \uE032; # LETTER LA ᬯ → \uE035; # LETTER WA ᬰ → \uE036; # LETTER SA SAGA ᬱ → \uE037; # LETTER SA SAPA ᬲ → \uE038; # LETTER SA ᬳ → \uE039; # LETTER HA ᬴ → \uE03C; # SIGN REREKAN ᬵ → \uE03E; # VOWEL SIGN TEDUNG ᬶ → \uE03F; # VOWEL SIGN ULU ᬷ → \uE040; # VOWEL SIGN ULU SARI ᬸ → \uE041; # VOWEL SIGN SUKU ᬹ → \uE042; # VOWEL SIGN SUKU ILUT ᬺ → \uE043; # VOWEL SIGN RA REPA ᬻ → \uE044; # VOWEL SIGN RA REPA TEDUNG ᬼ→ \uE062; # VOWEL SIGN LA LENGA ᬽ → \uE063; # VOWEL SIGN LA LENGA TEDUNG ᬾ → \uE047; # VOWEL SIGN TALING ᬿ → \uE048; # VOWEL SIGN TALING REPA ᭀ → \uE04B; # VOWEL SIGN TALING TEDUNG ᭁ → \uE04C; # VOWEL SIGN TALING REPA TEDUNG ᭂ → \uE045; # VOWEL SIGN PEPET ᭃ → \uE049; # VOWEL SIGN PEPET TEDUNG ᭄ → \uE04D; # ADEG ADEG ᭅ → \uE058; # LETTER KAF SASAK ᭆ → \uE059; # LETTER KHOT SASAK ᭇ → \uE024\uE03C; # LETTER TZIR SASAK ᭈ → \uE05E; # LETTER EF SASAK ᭉ → \uE081; # LETTER VE SASAK ᭊ → \uE05B; # LETTER ZAL SASAK ᭋ → \uE038\uE03C; # LETTER ASYURA SASAK ᭐ → \uE066; # DIGIT ZERO ᭑ → \uE067; # DIGIT ONE ᭒ → \uE068; # DIGIT TWO ᭓ → \uE069; # DIGIT THREE ᭔ → \uE06A; # DIGIT FO ᭕ → \uE06B; # DIGIT FIVE ᭖ → \uE06C; # DIGIT SIX ᭗ → \uE06D; # DIGIT SEVEN ᭘ → \uE06E; # DIGIT EIGHT ᭙ → \uE06F; # DIGIT NINE ᭚ → '//'; # PANTI ᭛ → '///'; # PAMADA ᭜ → •; # WINDU ᭟᭜᭟ → '\\•\\'; ᭟ ' ' ᭜ ' ' ᭟ → '\\ • \\'; ᭝ → \:; # CARIK PAMUNGKAH ᭞ → \uE064; # CARIK SIKI ᭟ → \uE065; # CARIK PAREREN ᭠ → ‐; # PAMENENG #consonants $chandrabindu=\uE001; $ardhachandra=\u1B00; $anusvara=\uE002; $visarga=\uE003; # w←vowel→ represents the stand-alone form $wa=\uE005; $waa=\uE006; $wi=\uE007; $wii=\uE008; $wu=\uE009; $wuu=\uE00A; $wr=\uE00B; $wl=\uE00C; $wce=\uE00D; # LETTER CANDRA E $wse=\uE00E; # LETTER SHORT E $we=\uE00F; # ए LETTER E $wai=\uE010; $wco=\uE011; # LETTER CANDRA O $wso=\uE012; # LETTER SHORT O $wo=\uE013; # ओ LETTER O $wau=\uE014; $ka=\uE015; $kha=\uE016; $ga=\uE017; $gha=\uE018; $nga=\uE019; $ca=\uE01A; $cha=\uE01B; $ja=\uE01C; $jha=\uE01D; $nya=\uE01E; $tta=\uE01F; $ttha=\uE020; $dda=\uE021; $ddha=\uE022; $nna=\uE023; $ta=\uE024; $tha=\uE025; $da=\uE026; $dha=\uE027; $na=\uE028; $ena=\uE029; #compatibility $pa=\uE02A; $pha=\uE02B; $ba=\uE02C; $bha=\uE02D; $ma=\uE02E; $ya=\uE02F; $ra=\uE030; $vva=\uE081; $rra=\uE031; $la=\uE032; $lla=\uE033; $ela=\uE034; #compatibility $va=\uE035; $sha=\uE036; $ssa=\uE037; $sa=\uE038; $ha=\uE039; $nukta=\uE03C; $avagraha=\uE03D; # SIGN AVAGRAHA # ←vowel→ represents the dependent form $aa=\uE03E; $i=\uE03F; $ii=\uE040; $u=\uE041; $uu=\uE042; $rh=\uE043; $rrh=\uE044; $ce=\uE045; #VOWEL SIGN CANDRA E $se=\uE046; #VOWEL SIGN SHORT E $e=\uE047; $ai=\uE048; $co=\uE049; # VOWEL SIGN CANDRA O $so=\uE04A; # VOWEL SIGN SHORT O $o=\uE04B; # ो $au=\uE04C; $virama=\uE04D; $om=\uE050; # OM \uE051→; # UNMAPPED STRESS SIGN UDATTA \uE052→; # UNMAPPED STRESS SIGN ANUDATTA \uE053→; # UNMAPPED GRAVE ACCENT \uE054→; # UNMAPPED ACUTE ACCENT $lm = \uE055;# Telugu Length Mark $ailm=\uE056;# AI Length Mark $aulm=\uE057;# AU Length Mark #urdu compatibity forms $uka=\uE058; $ukha=\uE059; $ugha=\uE05A; $ujha=\uE05B; $uddha=\uE05C; $udha=\uE05D; $ufa=\uE05E; $uya=\uE05F; $wrr=\uE060; $wll=\uE061; $lh=\uE062; $llh=\uE063; $danda=\uE064; $doubleDanda=\uE065; $zero=\uE066; # DIGIT ZERO $one=\uE067; # DIGIT ONE $two=\uE068; # DIGIT TWO $three=\uE069; # DIGIT THREE $four=\uE06A; # DIGIT FOUR $five=\uE06B; # DIGIT FIVE $six=\uE06C; # DIGIT SIX $seven=\uE06D; # DIGIT SEVEN $eight=\uE06E; # DIGIT EIGHT $nine=\uE06F; # DIGIT NINE # Glottal stop $dgs=\uE082; #Khanda-ta $kta=\uE083; $depVowelAbove=[\uE03E-\uE040\uE045-\uE04C]; $depVowelBelow=[\uE041-\uE044]; # $x was originally called '§'; $z was '%' $x=[$aa$ai$au$ii$i$uu$u$rrh$rh$lh$llh$e$o$se$ce$so$co]; $z=[bcdfghjklmnpqrstvwxyz]; $vowels=[aeiour̥̄̆]; $forceIndependentMatra = [^[[:L:][̀-͌]]]; $strike=\u0336; ###################################################################### # normalize input ###################################################################### # delete zwnj \u200C→; # reprocess from beginning ::Null; ###################################################################### # convert from Native letters to Latin letters ###################################################################### #glottal stop $wa$virama → k''; #anusvara $anusvara → ng; #surang ᬃ → r̀; # Urdu compatibility $ya$nukta}$x → y; $ya$nukta$virama → y; $ya$nukta → ya; $la$nukta }$x → l; $la$nukta$virama → l; $la$nukta → la; $na$nukta }$x → n; $na$nukta$virama → n; $na$nukta → na; $ena }$x → n; $ena$virama → n; $ena → na; $uka → qa; $ka$nukta }$x → q; $ka$nukta$virama → q; $ka$nukta → qa; $kha$nukta }$x → kh; $kha$nukta$virama → kh; $kha$nukta → kha; $ukha$virama → kh; $ukha → kha; $ugha → gha; $ga$nukta }$x → gh; $ga$nukta$virama → gh; $ga$nukta → gha; $ujha → za; $ja$nukta }$x → z; $ja$nukta$virama → z; $ja$nukta → za; $ddha$nukta}$x → r; $ddha$nukta$virama → r; $ddha$nukta → ra; $uddha}$x → r; $uddha$virama → r; $uddha → ra; $udha → ra; $dda$nukta}$x → r; $dda$nukta$virama → r; $dda$nukta → ra; $pha$nukta }$x → f; $pha$nukta$virama → f; $pha$nukta → fa; $ufa }$x → f; $ufa$virama → f; $ufa → fa; $ra$nukta}$x → r; $ra$nukta$virama → r; $ra$nukta → ra; $lla$nukta}$x → l; $lla$nukta$virama → l; $lla$nukta → la; $ela}$x → l; $ela$virama → l; $ela → la; $uya}$x → y; $uya$virama → y; $uya → ya; # normal consonants $ka$virama}$ha→k''; $ka}$x→k; $ka$virama→k; $ka→ka; $kha$i$u→k $strike h $strike; $kha}$x→kh; $kha$virama→kh; $kha→kha; $ga$virama}$ha→g''; $ga}$x→g; $ga$virama→g; $ga→ga; $gha$i$u→g $strike h $strike; $gha}$x→gh; $gha$virama→gh; $gha→gha; $nga$i$u→n $strike g $strike; $nga}$x→ng; $nga$virama→ng; $nga→nga; $ca$virama}$ha→c''; $ca}$x→c; $ca$virama→c; $ca→ca; $cha$i$u→c $strike h $strike; $cha}$x→ch; $cha$virama→ch; $cha→cha; $ja$virama}$ha→j''; $ja}$x→j; $ja$virama→j; $ja→ja; $jha$i$u→j $strike h $strike; $jha}$x→jh; $jha$virama→jh; $jha→jha; $nya }$x→ñ; $nya$virama→ñ; $nya → ña; $tta$virama}$ha→ṭ''; $tta}$x→ṭ; $tta$virama→ṭ; $tta→ṭa; $ttha$i$u→ṭ $strike h $strike; $ttha}$x→ṭh; $ttha$virama→ṭh; $ttha→ṭha; $dda}$x$ha→ḍ''; $dda}$x→ḍ; $dda$virama→ḍ; $dda→ḍa; $ddha$i$u→ḍ $strike h $strike; $ddha}$x→ḍh; $ddha$virama→ḍh; $ddha→ḍha; $nna}$x→ṇ; $nna$virama→ṇ; $nna→ṇa; $ta$virama}$ha→t''; $ta}$x→t; $ta$virama→t; $ta→ta; $tha$i$u→t $strike h $strike; $tha}$x→th; $tha$virama→th; $tha→tha; $da$virama}$ha→d''; $da}$x→d; $da$virama→d; $da→da; $dha$i$u→d $strike h $strike; $dha}$x→dh; $dha$virama→dh; $dha→dha; $na$virama}$ga→n''; $na}$x→n; $na$virama→n; $na→na; $pa$virama}$ha→p''; $pa}$x→p; $pa$virama→p; $pa→pa; $pha$i$u→p $strike h $strike; $pha}$x→ph; $pha$virama→ph; $pha→pha; $ba$virama}$ha→b''; $ba}$x→b; $ba$virama→b; $ba→ba; $bha$i$u→b $strike h $strike; $bha}$x→bh; $bha$virama→bh; $bha→bha; $ma}$x→m; $ma$virama→m; $ma→ma; $ya}$x→y; $ya$virama→y; $ya→ya; $ra}$x→r; $ra$virama→r; $ra→ra; $vva}$x→v; $vva$virama→v; $vva→va; $rra}$x→r; $rra$virama→r; $rra→ra; $la}$x→l; $la$virama→l; $la→la; $lla}$x→l; $lla$virama→l; $lla→la; $va}$x→w; $va$virama→w; $va→wa; $sa}$x→s; $sa$virama→s; #for gurmukhi $sa$nukta}$x→sy; $sa$nukta$virama→sy; $sa$nukta→sya; $sa→sa; $sha}$x→ś; $sha$virama→ś; $sha→śa; $ssa}$x→sy; $ssa$virama→ṣ; $ssa→ṣa; $ha}$x→h; $ha$virama→h; $ha→ha; # dependent vowels (should never occur except following consonants) $forceIndependentMatra{$aa → ̔ā; $forceIndependentMatra{$ai → ̔ai; $forceIndependentMatra{$au → ̔au; $forceIndependentMatra{$ii → ̔ī; $forceIndependentMatra{$i → ̔i; $forceIndependentMatra{$uu → ̔ū; $forceIndependentMatra{$u → ̔u; $forceIndependentMatra{$rrh → ̔r̥ö; $forceIndependentMatra{$rh → ̔r̥ĕ; $forceIndependentMatra{$llh → ̔l̥ö; $forceIndependentMatra{$lh → ̔l̥ĕ; $forceIndependentMatra{$e → ̔e; $forceIndependentMatra{$o → ̔o; #extra vowels $forceIndependentMatra{$ce → ̔ĕ; $forceIndependentMatra{$co → ̔ö; $forceIndependentMatra{$se → ̔ĕ; $forceIndependentMatra{$so → ̔o; $forceIndependentMatra{$nukta →; # Nukta cannot appear independently or as first character $forceIndependentMatra{$virama →; # Virama cannot appear independently or as first character $i$u → $strike; $aa → ā; $ai → ai; $au → au; $ii → ī; $i → i; $uu → ū; $u → u; $rrh → r̥ö; $rh → r̥ĕ; $llh → l̥ö; $lh → l̥ĕ; $e → e; $o → o; #extra vowels $ce → ĕ; $co → ö; $se → ĕ; $so → o; #dependent vowels when following independent vowels. Generally Illegal only for roundtripping $waa} $x → ā; $wai} $x → ai; $wau} $x → au; $wii} $x → ī; $wi } $x → i; $wuu} $x → ū; $wu } $x → u; $wrr} $x → r̥ö; $wr } $x → r̥ĕ; $wll} $x → l̥ö; $wl } $x → l̥ĕ; $we } $x → e; $wo } $x → o; $wa } $x → a; #extra vowels $wce} $x → ĕ; $wco} $x → ö; $wse} $x → ĕ; $wso} $x → o; $om} $x → oṁ; # independent vowels when preceeded by vowels $vowels{$waa → ''ā; $vowels{$wai → ''ai; $vowels{$wau → ''au; $vowels{$wii → ''ī; $vowels{$wi → ''i; $vowels{$wuu → ''ū; $vowels{$wu → ''u; $vowels{$we → ''e; $vowels{$wo → ''o; $vowels{$wa → ''a; #extra vowels $vowels{$wce → ''ĕ; $vowels{$wco → ''ö; $vowels{$wse → ''ĕ; $vowels{$wso → ''o; $vowels{$om → ''oṁ; # independent vowels (otherwise) $waa → ā; $wai → ai; $wau → au; $wii → ī; $wi → i; $wuu → ū; $wu → u; $wrr → r̥ö; $wr → r̥ĕ; $wll → l̥ö; $wl → l̥ĕ; $we → e; $wo → o; $wa → a; #extra vowels $wce → ĕ; $wco → ö; $wse → ĕ; $wso → o; $om → oṁ; # stress marks $avagraha → ; $chandrabindu → ṅġ; $ardhachandra → ṃ; $visarga → ḥ; # numbers $zero → 0; $one → 1; $two → 2; $three → 3; $four → 4; $five → 5; $six → 6; $seven → 7; $eight → 8; $nine → 9; $lm →; $ailm →; $aulm →; $dgs→''; $kta→t; # Balinese numbers are surrounded by dandas which can be removed $danda } [$zero$one$two$three$four$five$six$seven$eight$nine] → ' '; [0123456789] { $danda → ' '; $danda→', '; $doubleDanda→'. '; \uE070→; # ABBREVIATION SIGN # LETTER RA WITH MIDDLE DIAGONAL \uE071}$x→ra; \uE071$virama→r; \uE071→ra; # LETTER RA WITH LOWER DIAGONAL \uE072}$x→ra; \uE072$virama→r; \uE072→ra; \uE073→; # RUPEE MARK \uE074→; # RUPEE SIGN \uE075→; # CURRENCY NUMERATOR ONE \uE076→; # CURRENCY NUMERATOR TWO \uE077→; # CURRENCY NUMERATOR THREE \uE078→; # CURRENCY NUMERATOR FOUR \uE079→; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR \uE07A→; # CURRENCY DENOMINATOR SIXTEEN \uE07B→; # ISSHAR \uE07C→; # TIPPI \uE07D→; # ADDAK \uE07E→; # IRI \uE07F→; # URA \uE080→; # EK ONKAR \uE004→; # DEVANAGARI VOWEL SIGN SHORT A ::NFC; EOF; # transliteration rules following DHARMA project "strict transliteration" # mostly follows ISO-15919, with modifications for precision and broader coverage # https://hal.inria.fr/halshs-02272407/ $rules['ban-x-dharma'] = <<<'EOF' ::NFC; $dv_no_rerekan = [\u1B35-\u1B44]; $dv = [\u1B34$dv_no_rerekan]; $c = [\u1B13-\u1B33 \u1B45-\u1B4C]; # disambiguation from aspirates [kgcjṭḍtdpb] { ᭄ } ᬳ → \:; # various signs ᬀ → ṁ\*; # ulu ricem / ardhacandra ᬁ → m̐; # ulu candra / candrabindu ᬂ → ṁ; # cecek / anusvara ᬃ → r\=; # surang / repha (note, "Indonesian mode" not "Indian mode") ᬄ → ḥ; # bisah / visarga # akara used as glottal ᬅ } $dv_no_rerekan → q; # independent vowels ᬅ → A; # LETTER AKARA ᬆ → A\:; # LETTER AKARA TEDUNG ᬇ → I; # LETTER IKARA ᬈ → I\:; # LETTER IKARA TEDUNG ᬉ → U; # LETTER UKARA ᬊ → U\:; # LETTER UKARA TEDUNG ᬋ → R̥; # LETTER RA REPA ᬌ → R̥\:; # LETTER RA REPA TEDUNG ᬍ → L̥; # LETTER LA LENGA ᬎ → L̥̄; # LETTER LA LENGA TEDUNG ᬏ → E; # LETTER EKARA ᬐ → Ai; # LETTER AIKARA ᬑ → O; # LETTER OKARA ᬒ → O\:; # LETTER OKARA TEDUNG # consonants ᬓ } $dv → k; ᬓ → ka; # LETTER KA ᬔ } $dv → kh; ᬔ → kha; # LETTER KA MAHAPRANA ᬕ } $dv → g; ᬕ → ga; # LETTER GA ᬖ } $dv → gh; ᬖ → gha; # LETTER GA GORA ᬗ } $dv → ṅ; ᬗ → ṅa; # LETTER NGA ᬘ } $dv → c; ᬘ → ca; # LETTER CA ᬙ } $dv → ch; ᬙ → cha; # LETTER CA LACA ᬚ } $dv → j; ᬚ → ja; # LETTER JA ᬛ } $dv → jh; ᬛ → jha; # LETTER JA JERA ᬜ } $dv → ñ; ᬜ → ña; # LETTER NYA ᬝ } $dv → ṭ; ᬝ → ṭa; # LETTER TA LATIK ᬞ } $dv → ṭh; ᬞ → ṭha; # LETTER TA MURDA MAHAPRANA ᬟ } $dv → ḍ; ᬟ → ḍa; # LETTER DA MURDA ALPAPRANA ᬠ } $dv → ḍh; ᬠ → ḍha; # LETTER DA MURDA MAHAPRANA ᬡ } $dv → ṇ; ᬡ → ṇa; # LETTER NA RAMBAT ᬢ } $dv → t; ᬢ → ta; # LETTER TA ᬣ } $dv → th; ᬣ → tha; # LETTER TA TAWA ᬤ } $dv → d; ᬤ → da; # LETTER DA ᬥ } $dv → dh; ᬥ → dha; # LETTER DA MADU ᬦ } $dv → n; ᬦ → na; # LETTER NA ᬧ } $dv → p; ᬧ → pa; # LETTER PA ᬨ } $dv → ph; ᬨ → pha; # LETTER PA KAPAL ᬩ } $dv → b; ᬩ → ba; # LETTER BA ᬪ } $dv → bh; ᬪ → bha; # LETTER BA KEMBANG ᬫ } $dv → m; ᬫ → ma; # LETTER MA ᬬ } $dv → y; ᬬ → ya; # LETTER YA ᬭ } $dv → r; ᬭ → ra; # LETTER RA ᬮ } $dv → l; ᬮ → la; # LETTER LA ᬯ } $dv → v; ᬯ → va; # LETTER WA ᬰ } $dv → ś; ᬰ → śa; # LETTER SA SAGA ᬱ } $dv → ṣ; ᬱ → ṣa; # LETTER SA SAPA ᬲ } $dv → s; ᬲ → sa; # LETTER SA ᬳ } $dv → h; ᬳ → ha; # LETTER HA \u1B4C } $dv → j\=ñ; \u1B4C → j\=ña; # LETTER ARCHAIC JNYA # rerekan (not present in DHARMA, "*" used as impromptu mark) ᬴ } $dv_no_rerekan → \*; ᬴ → \* a; # SIGN REREKAN # dependent vowels ᬵ → ā; # VOWEL SIGN TEDUNG ᬶ → i; # VOWEL SIGN ULU ᬷ → ī; # VOWEL SIGN ULU SARI ᬸ → u; # VOWEL SIGN SUKU ᬹ → ū; # VOWEL SIGN SUKU ILUT ᬺ → r̥; # VOWEL SIGN RA REPA ᬻ → r̥\:; # VOWEL SIGN RA REPA TEDUNG ᬼ→ l̥; # VOWEL SIGN LA LENGA ᬽ → l̥\:; # VOWEL SIGN LA LENGA TEDUNG ᬾ → e; # VOWEL SIGN TALING ᬿ → ai; # VOWEL SIGN TALING REPA ᭀ → o; # VOWEL SIGN TALING TEDUNG ᭁ → au; # VOWEL SIGN TALING REPA TEDUNG ᭂ → ə; # VOWEL SIGN PEPET ᭃ → ə\:; # VOWEL SIGN PEPET TEDUNG # adeg-adeg ᭄\u200C → ·; # explicit ADEG ADEG ᭄ } $c → ; # ADEG ADEG ᭄ → ·; # ADEG ADEG # Sasak consonants (not present in DHARMA, "'" used as impromptu mark) ᭅ } $dv → k\'; ᭅ → k\'a; # LETTER KAF SASAK ᭆ } $dv → kh\'; ᭆ → kh\'a; # LETTER KHOT SASAK ᭇ } $dv → t\'; ᭇ → t\'a; # LETTER TZIR SASAK ᭈ } $dv → p\'; ᭈ → p\'a; # LETTER EF SASAK ᭉ } $dv → v\'; ᭉ → v\'a; # LETTER VE SASAK ᭊ } $dv → j\'; ᭊ → j\'a; # LETTER ZAL SASAK ᭋ } $dv → s\'; ᭋ → s\'a; # LETTER ASYURA SASAK # digits ᭐ → 0; # DIGIT ZERO ᭑ → 1; # DIGIT ONE ᭒ → 2; # DIGIT TWO ᭓ → 3; # DIGIT THREE ᭔ → 4; # DIGIT FOUR ᭕ → 5; # DIGIT FIVE ᭖ → 6; # DIGIT SIX ᭗ → 7; # DIGIT SEVEN ᭘ → 8; # DIGIT EIGHT ᭙ → 9; # DIGIT NINE # punctuation ᭚ → ''; # PANTI ᭛ → ''; # PAMADA ᭜ → \@; # WINDU ᭝ → ''; # CARIK PAMUNGKAH ᭞ → \,; # CARIK SIKI ᭟ → \,\,; # CARIK PAREREN ᭠ → ''; # PAMENENG \u1B7D → ''; \u1B7E → ''; EOF; # transliteration rules developed at Puri Kauhan Ubud and widely used in Bali # default Balinese to Latin transliteration variant $rules['ban-x-pku'] = <<<'EOF' ::NFC; $dv_no_rerekan = [\u1B35-\u1B44]; $dv = [\u1B34$dv_no_rerekan]; $c = [\u1B13-\u1B33 \u1B45-\u1B4C]; $base = [\u1B05-\u1B33 \u1B45-\u1B60]; # ulu suku deletion mark $base ᬶᬸ → ∅; # disambiguation from aspirates [kgcjṭḍtdpb] { ᭄ } ᬳ → \:; # various signs ᬀ → ṃ; # ulu ricem / ardhacandra ᬁ → m̐; # ulu candra / candrabindu ᬂ → ŋ; # cecek / anusvara ᬃ → ŕ; # surang / repha (note, "Indonesian mode" not "Indian mode") ᬄ → ḥ; # bisah / visarga # akara used as glottal ᬅ } $dv_no_rerekan → \*; # independent vowels ᬅ → ᵒa; # LETTER AKARA ᬆ → ᵒā; # LETTER AKARA TEDUNG ᬇ → ᵒi; # LETTER IKARA ᬈ → ᵒī; # LETTER IKARA TEDUNG ᬉ → ᵒu; # LETTER UKARA ᬊ → ᵒū; # LETTER UKARA TEDUNG ᬋ → r̥; # LETTER RA REPA ᬌ → r̥̄; # LETTER RA REPA TEDUNG ᬍ → l̥; # LETTER LA LENGA ᬎ → l̥̄; # LETTER LA LENGA TEDUNG ᬏ → ᵒe; # LETTER EKARA ᬐ → ᵒai; # LETTER AIKARA ᬑ → ᵒo; # LETTER OKARA ᬒ → ᵒau; # LETTER OKARA TEDUNG # consonants ᬓ } $dv → k; ᬓ → ka; # LETTER KA ᬔ } $dv → kh; ᬔ → kha; # LETTER KA MAHAPRANA ᬕ } $dv → g; ᬕ → ga; # LETTER GA ᬖ } $dv → gh; ᬖ → gha; # LETTER GA GORA ᬗ } $dv → ṅ; ᬗ → ṅa; # LETTER NGA ᬘ } $dv → c; ᬘ → ca; # LETTER CA ᬙ } $dv → ch; ᬙ → cha; # LETTER CA LACA ᬚ } $dv → j; ᬚ → ja; # LETTER JA ᬛ } $dv → jh; ᬛ → jha; # LETTER JA JERA ᬜ } $dv → ñ; ᬜ → ña; # LETTER NYA ᬝ } $dv → ṭ; ᬝ → ṭa; # LETTER TA LATIK ᬞ } $dv → ṭh; ᬞ → ṭha; # LETTER TA MURDA MAHAPRANA ᬟ } $dv → ḍ; ᬟ → ḍa; # LETTER DA MURDA ALPAPRANA ᬠ } $dv → ḍh; ᬠ → ḍha; # LETTER DA MURDA MAHAPRANA ᬡ } $dv → ṇ; ᬡ → ṇa; # LETTER NA RAMBAT ᬢ } $dv → t; ᬢ → ta; # LETTER TA ᬣ } $dv → th; ᬣ → tha; # LETTER TA TAWA ᬤ } $dv → d; ᬤ → da; # LETTER DA ᬥ } $dv → dh; ᬥ → dha; # LETTER DA MADU ᬦ } $dv → n; ᬦ → na; # LETTER NA ᬧ } $dv → p; ᬧ → pa; # LETTER PA ᬨ } $dv → ph; ᬨ → pha; # LETTER PA KAPAL ᬩ } $dv → b; ᬩ → ba; # LETTER BA ᬪ } $dv → bh; ᬪ → bha; # LETTER BA KEMBANG ᬫ } $dv → m; ᬫ → ma; # LETTER MA ᬬ } $dv → y; ᬬ → ya; # LETTER YA ᬭ } $dv → r; ᬭ → ra; # LETTER RA ᬮ } $dv → l; ᬮ → la; # LETTER LA ᬯ } $dv → w; ᬯ → wa; # LETTER WA ᬰ } $dv → ś; ᬰ → śa; # LETTER SA SAGA ᬱ } $dv → ṣ; ᬱ → ṣa; # LETTER SA SAPA ᬲ } $dv → s; ᬲ → sa; # LETTER SA ᬳ } $dv → h; ᬳ → ha; # LETTER HA \u1B4C } $dv → j\=ñ; \u1B4C → j\=ña; # LETTER ARCHAIC JNYA # rerekan (not present in DHARMA, "*" used as impromptu mark) ᬴ } $dv_no_rerekan → \*; ᬴ → \* a; # SIGN REREKAN # dependent vowels ᬵ → ā; # VOWEL SIGN TEDUNG ᬶ → i; # VOWEL SIGN ULU ᬷ → ī; # VOWEL SIGN ULU SARI ᬸ → u; # VOWEL SIGN SUKU ᬹ → ū; # VOWEL SIGN SUKU ILUT ᬺᭂ → r̥ĕ; ᬺ → r̥ĕ; # VOWEL SIGN RA REPA ᬻ → r̥ö; # VOWEL SIGN RA REPA TEDUNG ᬼ→ lĕ; # VOWEL SIGN LA LENGA ᬽ → lö; # VOWEL SIGN LA LENGA TEDUNG ᬾ → e; # VOWEL SIGN TALING ᬿ → ai; # VOWEL SIGN TALING REPA ᭀ → o; # VOWEL SIGN TALING TEDUNG ᭁ → au; # VOWEL SIGN TALING REPA TEDUNG ᭂ → ĕ; # VOWEL SIGN PEPET ᭃ → ö; # VOWEL SIGN PEPET TEDUNG # adeg-adeg ᭄\u200C → ·; # explicit ADEG ADEG ᭄ } $c → ; # ADEG ADEG ᭄ → ·; # ADEG ADEG # Sasak consonants (not present in DHARMA, "'" used as impromptu mark) ᭅ } $dv → k\'; ᭅ → k\'a; # LETTER KAF SASAK ᭆ } $dv → kh\'; ᭆ → kh\'a; # LETTER KHOT SASAK ᭇ } $dv → t\'; ᭇ → t\'a; # LETTER TZIR SASAK ᭈ } $dv → p\'; ᭈ → p\'a; # LETTER EF SASAK ᭉ } $dv → w\'; ᭉ → w\'a; # LETTER VE SASAK ᭊ } $dv → j\'; ᭊ → j\'a; # LETTER ZAL SASAK ᭋ } $dv → s\'; ᭋ → s\'a; # LETTER ASYURA SASAK # digits ᭐ → 0; # DIGIT ZERO ᭑ → 1; # DIGIT ONE ᭒ → 2; # DIGIT TWO ᭓ → 3; # DIGIT THREE ᭔ → 4; # DIGIT FOUR ᭕ → 5; # DIGIT FIVE ᭖ → 6; # DIGIT SIX ᭗ → 7; # DIGIT SEVEN ᭘ → 8; # DIGIT EIGHT ᭙ → 9; # DIGIT NINE # punctuation ᭚ → '||'; # PANTI ᭛ → '//'; # PAMADA ᭜ → 0; # WINDU ᭝ → \=; # CARIK PAMUNGKAH ᭞ → \,; # CARIK SIKI ᭟ → \.; # CARIK PAREREN ᭠ → \-; # PAMENENG \u1B7D → '|||'; \u1B7E → '///'; EOF; return $rules; } protected function getTransliteratorAliases() { return [ 'ban' => 'ban-x-pku', 'ban-bali' => 'ban-x-pku', ]; } /** * Guess if a text is written in Balinese or Latin. * * @param string $text The text to be checked * @param string $variant Language code of the variant to be checked for * @return bool True if $text appears to be written in $variant */ public function guessVariant( $text, $variant ) { $hasBalinese = preg_match( "/[\x{1B00}-\x{1B7F}]/u", $text ); return ( $variant == 'ban-bali' ) == $hasBalinese; } }