diff options
Diffstat (limited to 'components/shared/base')
-rw-r--r-- | components/shared/base/Cargo.toml | 2 | ||||
-rwxr-xr-x | components/shared/base/generate-unicode-block.py | 63 | ||||
-rw-r--r-- | components/shared/base/lib.rs | 6 | ||||
-rw-r--r-- | components/shared/base/text.rs | 64 | ||||
-rw-r--r-- | components/shared/base/unicode_block.rs | 679 |
5 files changed, 811 insertions, 3 deletions
diff --git a/components/shared/base/Cargo.toml b/components/shared/base/Cargo.toml index f33f90ec9a2..bdf4237bb27 100644 --- a/components/shared/base/Cargo.toml +++ b/components/shared/base/Cargo.toml @@ -9,7 +9,7 @@ publish = false [lib] name = "base" path = "lib.rs" -test = false +test = true doctest = false [dependencies] diff --git a/components/shared/base/generate-unicode-block.py b/components/shared/base/generate-unicode-block.py new file mode 100755 index 00000000000..3191d4f26f1 --- /dev/null +++ b/components/shared/base/generate-unicode-block.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. + +# The beginning of this script is both valid shell and valid python, +# such that the script starts with the shell and is reexecuted with +# the right python. + +import dataclasses +import re +import sys + + +@dataclasses.dataclass +class UnicodeBlock: + name: str + start: str + end: str + + +def process_line(line: str) -> UnicodeBlock: + # Split on either '..' or ';' surrounded by whitespace. + [start, end, name] = re.split(r"\W*\.\.|;\W*", line, maxsplit=3) + name = name.strip().replace("-", "").replace(" ", "") + return UnicodeBlock(name, start.zfill(6), end.zfill(6)) + + +with open(sys.argv[1]) as file: + lines_to_keep = filter( + lambda line: line.strip() and not line.startswith("#"), + file.readlines() + ) + results = list(map(process_line, lines_to_keep)) + +print("/* This Source Code Form is subject to the terms of the Mozilla Public") +print(" * License, v. 2.0. If a copy of the MPL was not distributed with this") +print(" * file, You can obtain one at https://mozilla.org/MPL/2.0/. */") +print() +print("// Do not edit:") +print("// Generated via: https://www.unicode.org/Public/UNIDATA/Blocks.txt.") +print("// $ ./generate-unicode-block.py Blocks.txt > unicode_block.rs") +print() +print("#[derive(Clone, Copy, Debug, PartialEq)]") +print("pub enum UnicodeBlock {") +for block in results: + print(f" {block.name},") +print("}") +print() +print("pub trait UnicodeBlockMethod {") +print(" fn block(&self) -> Option<UnicodeBlock>;") +print("}") +print() +print("impl UnicodeBlockMethod for char {") +print(" fn block(&self) -> Option<UnicodeBlock> {") +print(" match *self as u32 {") +for block in results: + print(f" 0x{block.start}..=0x{block.end} => Some(UnicodeBlock::{block.name}),") +print(" _ => None,") +print(" }") +print(" }") +print("}") diff --git a/components/shared/base/lib.rs b/components/shared/base/lib.rs index e084f142112..062ece48e21 100644 --- a/components/shared/base/lib.rs +++ b/components/shared/base/lib.rs @@ -9,11 +9,13 @@ //! You should almost never need to add a data type to this crate. Instead look for //! a more shared crate that has fewer dependents. -use serde::{Deserialize, Serialize}; - pub mod generic_channel; pub mod id; pub mod print_tree; +pub mod text; +mod unicode_block; + +use serde::{Deserialize, Serialize}; use webrender_api::Epoch as WebRenderEpoch; /// A struct for denoting the age of messages; prevents race conditions. diff --git a/components/shared/base/text.rs b/components/shared/base/text.rs new file mode 100644 index 00000000000..9a3fa5f68e6 --- /dev/null +++ b/components/shared/base/text.rs @@ -0,0 +1,64 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +pub use crate::unicode_block::{UnicodeBlock, UnicodeBlockMethod}; + +pub fn is_bidi_control(c: char) -> bool { + matches!(c, '\u{202A}'..='\u{202E}' | '\u{2066}'..='\u{2069}' | '\u{200E}' | '\u{200F}' | '\u{061C}') +} + +pub fn unicode_plane(codepoint: char) -> u32 { + (codepoint as u32) >> 16 +} + +pub fn is_cjk(codepoint: char) -> bool { + if let Some(block) = codepoint.block() { + match block { + UnicodeBlock::CJKRadicalsSupplement | + UnicodeBlock::KangxiRadicals | + UnicodeBlock::IdeographicDescriptionCharacters | + UnicodeBlock::CJKSymbolsandPunctuation | + UnicodeBlock::Hiragana | + UnicodeBlock::Katakana | + UnicodeBlock::Bopomofo | + UnicodeBlock::HangulCompatibilityJamo | + UnicodeBlock::Kanbun | + UnicodeBlock::BopomofoExtended | + UnicodeBlock::CJKStrokes | + UnicodeBlock::KatakanaPhoneticExtensions | + UnicodeBlock::EnclosedCJKLettersandMonths | + UnicodeBlock::CJKCompatibility | + UnicodeBlock::CJKUnifiedIdeographsExtensionA | + UnicodeBlock::YijingHexagramSymbols | + UnicodeBlock::CJKUnifiedIdeographs | + UnicodeBlock::CJKCompatibilityIdeographs | + UnicodeBlock::CJKCompatibilityForms | + UnicodeBlock::HalfwidthandFullwidthForms => return true, + _ => {}, + } + } + + // https://en.wikipedia.org/wiki/Plane_(Unicode)#Supplementary_Ideographic_Plane + // https://en.wikipedia.org/wiki/Plane_(Unicode)#Tertiary_Ideographic_Plane + unicode_plane(codepoint) == 2 || unicode_plane(codepoint) == 3 +} + +#[test] +fn test_is_cjk() { + // Test characters from different CJK blocks + assert_eq!(is_cjk('〇'), true); + assert_eq!(is_cjk('㐀'), true); + assert_eq!(is_cjk('あ'), true); + assert_eq!(is_cjk('ア'), true); + assert_eq!(is_cjk('㆒'), true); + assert_eq!(is_cjk('ㆣ'), true); + assert_eq!(is_cjk('龥'), true); + assert_eq!(is_cjk('𰾑'), true); + assert_eq!(is_cjk('𰻝'), true); + + // Test characters from outside CJK blocks + assert_eq!(is_cjk('a'), false); + assert_eq!(is_cjk('🙂'), false); + assert_eq!(is_cjk('©'), false); +} diff --git a/components/shared/base/unicode_block.rs b/components/shared/base/unicode_block.rs new file mode 100644 index 00000000000..3356a21fd0c --- /dev/null +++ b/components/shared/base/unicode_block.rs @@ -0,0 +1,679 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +// Do not edit: +// Generated via: https://www.unicode.org/Public/UNIDATA/Blocks.txt. +// $ ./generate-unicode-block.py Blocks.txt > unicode_block.rs + +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum UnicodeBlock { + BasicLatin, + Latin1Supplement, + LatinExtendedA, + LatinExtendedB, + IPAExtensions, + SpacingModifierLetters, + CombiningDiacriticalMarks, + GreekandCoptic, + Cyrillic, + CyrillicSupplement, + Armenian, + Hebrew, + Arabic, + Syriac, + ArabicSupplement, + Thaana, + NKo, + Samaritan, + Mandaic, + SyriacSupplement, + ArabicExtendedB, + ArabicExtendedA, + Devanagari, + Bengali, + Gurmukhi, + Gujarati, + Oriya, + Tamil, + Telugu, + Kannada, + Malayalam, + Sinhala, + Thai, + Lao, + Tibetan, + Myanmar, + Georgian, + HangulJamo, + Ethiopic, + EthiopicSupplement, + Cherokee, + UnifiedCanadianAboriginalSyllabics, + Ogham, + Runic, + Tagalog, + Hanunoo, + Buhid, + Tagbanwa, + Khmer, + Mongolian, + UnifiedCanadianAboriginalSyllabicsExtended, + Limbu, + TaiLe, + NewTaiLue, + KhmerSymbols, + Buginese, + TaiTham, + CombiningDiacriticalMarksExtended, + Balinese, + Sundanese, + Batak, + Lepcha, + OlChiki, + CyrillicExtendedC, + GeorgianExtended, + SundaneseSupplement, + VedicExtensions, + PhoneticExtensions, + PhoneticExtensionsSupplement, + CombiningDiacriticalMarksSupplement, + LatinExtendedAdditional, + GreekExtended, + GeneralPunctuation, + SuperscriptsandSubscripts, + CurrencySymbols, + CombiningDiacriticalMarksforSymbols, + LetterlikeSymbols, + NumberForms, + Arrows, + MathematicalOperators, + MiscellaneousTechnical, + ControlPictures, + OpticalCharacterRecognition, + EnclosedAlphanumerics, + BoxDrawing, + BlockElements, + GeometricShapes, + MiscellaneousSymbols, + Dingbats, + MiscellaneousMathematicalSymbolsA, + SupplementalArrowsA, + BraillePatterns, + SupplementalArrowsB, + MiscellaneousMathematicalSymbolsB, + SupplementalMathematicalOperators, + MiscellaneousSymbolsandArrows, + Glagolitic, + LatinExtendedC, + Coptic, + GeorgianSupplement, + Tifinagh, + EthiopicExtended, + CyrillicExtendedA, + SupplementalPunctuation, + CJKRadicalsSupplement, + KangxiRadicals, + IdeographicDescriptionCharacters, + CJKSymbolsandPunctuation, + Hiragana, + Katakana, + Bopomofo, + HangulCompatibilityJamo, + Kanbun, + BopomofoExtended, + CJKStrokes, + KatakanaPhoneticExtensions, + EnclosedCJKLettersandMonths, + CJKCompatibility, + CJKUnifiedIdeographsExtensionA, + YijingHexagramSymbols, + CJKUnifiedIdeographs, + YiSyllables, + YiRadicals, + Lisu, + Vai, + CyrillicExtendedB, + Bamum, + ModifierToneLetters, + LatinExtendedD, + SylotiNagri, + CommonIndicNumberForms, + Phagspa, + Saurashtra, + DevanagariExtended, + KayahLi, + Rejang, + HangulJamoExtendedA, + Javanese, + MyanmarExtendedB, + Cham, + MyanmarExtendedA, + TaiViet, + MeeteiMayekExtensions, + EthiopicExtendedA, + LatinExtendedE, + CherokeeSupplement, + MeeteiMayek, + HangulSyllables, + HangulJamoExtendedB, + HighSurrogates, + HighPrivateUseSurrogates, + LowSurrogates, + PrivateUseArea, + CJKCompatibilityIdeographs, + AlphabeticPresentationForms, + ArabicPresentationFormsA, + VariationSelectors, + VerticalForms, + CombiningHalfMarks, + CJKCompatibilityForms, + SmallFormVariants, + ArabicPresentationFormsB, + HalfwidthandFullwidthForms, + Specials, + LinearBSyllabary, + LinearBIdeograms, + AegeanNumbers, + AncientGreekNumbers, + AncientSymbols, + PhaistosDisc, + Lycian, + Carian, + CopticEpactNumbers, + OldItalic, + Gothic, + OldPermic, + Ugaritic, + OldPersian, + Deseret, + Shavian, + Osmanya, + Osage, + Elbasan, + CaucasianAlbanian, + Vithkuqi, + LinearA, + LatinExtendedF, + CypriotSyllabary, + ImperialAramaic, + Palmyrene, + Nabataean, + Hatran, + Phoenician, + Lydian, + MeroiticHieroglyphs, + MeroiticCursive, + Kharoshthi, + OldSouthArabian, + OldNorthArabian, + Manichaean, + Avestan, + InscriptionalParthian, + InscriptionalPahlavi, + PsalterPahlavi, + OldTurkic, + OldHungarian, + HanifiRohingya, + RumiNumeralSymbols, + Yezidi, + ArabicExtendedC, + OldSogdian, + Sogdian, + OldUyghur, + Chorasmian, + Elymaic, + Brahmi, + Kaithi, + SoraSompeng, + Chakma, + Mahajani, + Sharada, + SinhalaArchaicNumbers, + Khojki, + Multani, + Khudawadi, + Grantha, + Newa, + Tirhuta, + Siddham, + Modi, + MongolianSupplement, + Takri, + Ahom, + Dogra, + WarangCiti, + DivesAkuru, + Nandinagari, + ZanabazarSquare, + Soyombo, + UnifiedCanadianAboriginalSyllabicsExtendedA, + PauCinHau, + DevanagariExtendedA, + Bhaiksuki, + Marchen, + MasaramGondi, + GunjalaGondi, + Makasar, + Kawi, + LisuSupplement, + TamilSupplement, + Cuneiform, + CuneiformNumbersandPunctuation, + EarlyDynasticCuneiform, + CyproMinoan, + EgyptianHieroglyphs, + EgyptianHieroglyphFormatControls, + AnatolianHieroglyphs, + BamumSupplement, + Mro, + Tangsa, + BassaVah, + PahawhHmong, + Medefaidrin, + Miao, + IdeographicSymbolsandPunctuation, + Tangut, + TangutComponents, + KhitanSmallScript, + TangutSupplement, + KanaExtendedB, + KanaSupplement, + KanaExtendedA, + SmallKanaExtension, + Nushu, + Duployan, + ShorthandFormatControls, + ZnamennyMusicalNotation, + ByzantineMusicalSymbols, + MusicalSymbols, + AncientGreekMusicalNotation, + KaktovikNumerals, + MayanNumerals, + TaiXuanJingSymbols, + CountingRodNumerals, + MathematicalAlphanumericSymbols, + SuttonSignWriting, + LatinExtendedG, + GlagoliticSupplement, + CyrillicExtendedD, + NyiakengPuachueHmong, + Toto, + Wancho, + NagMundari, + EthiopicExtendedB, + MendeKikakui, + Adlam, + IndicSiyaqNumbers, + OttomanSiyaqNumbers, + ArabicMathematicalAlphabeticSymbols, + MahjongTiles, + DominoTiles, + PlayingCards, + EnclosedAlphanumericSupplement, + EnclosedIdeographicSupplement, + MiscellaneousSymbolsandPictographs, + Emoticons, + OrnamentalDingbats, + TransportandMapSymbols, + AlchemicalSymbols, + GeometricShapesExtended, + SupplementalArrowsC, + SupplementalSymbolsandPictographs, + ChessSymbols, + SymbolsandPictographsExtendedA, + SymbolsforLegacyComputing, + CJKUnifiedIdeographsExtensionB, + CJKUnifiedIdeographsExtensionC, + CJKUnifiedIdeographsExtensionD, + CJKUnifiedIdeographsExtensionE, + CJKUnifiedIdeographsExtensionF, + CJKUnifiedIdeographsExtensionI, + CJKCompatibilityIdeographsSupplement, + CJKUnifiedIdeographsExtensionG, + CJKUnifiedIdeographsExtensionH, + Tags, + VariationSelectorsSupplement, + SupplementaryPrivateUseAreaA, + SupplementaryPrivateUseAreaB, +} + +pub trait UnicodeBlockMethod { + fn block(&self) -> Option<UnicodeBlock>; +} + +impl UnicodeBlockMethod for char { + fn block(&self) -> Option<UnicodeBlock> { + match *self as u32 { + 0x000000..=0x00007F => Some(UnicodeBlock::BasicLatin), + 0x000080..=0x0000FF => Some(UnicodeBlock::Latin1Supplement), + 0x000100..=0x00017F => Some(UnicodeBlock::LatinExtendedA), + 0x000180..=0x00024F => Some(UnicodeBlock::LatinExtendedB), + 0x000250..=0x0002AF => Some(UnicodeBlock::IPAExtensions), + 0x0002B0..=0x0002FF => Some(UnicodeBlock::SpacingModifierLetters), + 0x000300..=0x00036F => Some(UnicodeBlock::CombiningDiacriticalMarks), + 0x000370..=0x0003FF => Some(UnicodeBlock::GreekandCoptic), + 0x000400..=0x0004FF => Some(UnicodeBlock::Cyrillic), + 0x000500..=0x00052F => Some(UnicodeBlock::CyrillicSupplement), + 0x000530..=0x00058F => Some(UnicodeBlock::Armenian), + 0x000590..=0x0005FF => Some(UnicodeBlock::Hebrew), + 0x000600..=0x0006FF => Some(UnicodeBlock::Arabic), + 0x000700..=0x00074F => Some(UnicodeBlock::Syriac), + 0x000750..=0x00077F => Some(UnicodeBlock::ArabicSupplement), + 0x000780..=0x0007BF => Some(UnicodeBlock::Thaana), + 0x0007C0..=0x0007FF => Some(UnicodeBlock::NKo), + 0x000800..=0x00083F => Some(UnicodeBlock::Samaritan), + 0x000840..=0x00085F => Some(UnicodeBlock::Mandaic), + 0x000860..=0x00086F => Some(UnicodeBlock::SyriacSupplement), + 0x000870..=0x00089F => Some(UnicodeBlock::ArabicExtendedB), + 0x0008A0..=0x0008FF => Some(UnicodeBlock::ArabicExtendedA), + 0x000900..=0x00097F => Some(UnicodeBlock::Devanagari), + 0x000980..=0x0009FF => Some(UnicodeBlock::Bengali), + 0x000A00..=0x000A7F => Some(UnicodeBlock::Gurmukhi), + 0x000A80..=0x000AFF => Some(UnicodeBlock::Gujarati), + 0x000B00..=0x000B7F => Some(UnicodeBlock::Oriya), + 0x000B80..=0x000BFF => Some(UnicodeBlock::Tamil), + 0x000C00..=0x000C7F => Some(UnicodeBlock::Telugu), + 0x000C80..=0x000CFF => Some(UnicodeBlock::Kannada), + 0x000D00..=0x000D7F => Some(UnicodeBlock::Malayalam), + 0x000D80..=0x000DFF => Some(UnicodeBlock::Sinhala), + 0x000E00..=0x000E7F => Some(UnicodeBlock::Thai), + 0x000E80..=0x000EFF => Some(UnicodeBlock::Lao), + 0x000F00..=0x000FFF => Some(UnicodeBlock::Tibetan), + 0x001000..=0x00109F => Some(UnicodeBlock::Myanmar), + 0x0010A0..=0x0010FF => Some(UnicodeBlock::Georgian), + 0x001100..=0x0011FF => Some(UnicodeBlock::HangulJamo), + 0x001200..=0x00137F => Some(UnicodeBlock::Ethiopic), + 0x001380..=0x00139F => Some(UnicodeBlock::EthiopicSupplement), + 0x0013A0..=0x0013FF => Some(UnicodeBlock::Cherokee), + 0x001400..=0x00167F => Some(UnicodeBlock::UnifiedCanadianAboriginalSyllabics), + 0x001680..=0x00169F => Some(UnicodeBlock::Ogham), + 0x0016A0..=0x0016FF => Some(UnicodeBlock::Runic), + 0x001700..=0x00171F => Some(UnicodeBlock::Tagalog), + 0x001720..=0x00173F => Some(UnicodeBlock::Hanunoo), + 0x001740..=0x00175F => Some(UnicodeBlock::Buhid), + 0x001760..=0x00177F => Some(UnicodeBlock::Tagbanwa), + 0x001780..=0x0017FF => Some(UnicodeBlock::Khmer), + 0x001800..=0x0018AF => Some(UnicodeBlock::Mongolian), + 0x0018B0..=0x0018FF => Some(UnicodeBlock::UnifiedCanadianAboriginalSyllabicsExtended), + 0x001900..=0x00194F => Some(UnicodeBlock::Limbu), + 0x001950..=0x00197F => Some(UnicodeBlock::TaiLe), + 0x001980..=0x0019DF => Some(UnicodeBlock::NewTaiLue), + 0x0019E0..=0x0019FF => Some(UnicodeBlock::KhmerSymbols), + 0x001A00..=0x001A1F => Some(UnicodeBlock::Buginese), + 0x001A20..=0x001AAF => Some(UnicodeBlock::TaiTham), + 0x001AB0..=0x001AFF => Some(UnicodeBlock::CombiningDiacriticalMarksExtended), + 0x001B00..=0x001B7F => Some(UnicodeBlock::Balinese), + 0x001B80..=0x001BBF => Some(UnicodeBlock::Sundanese), + 0x001BC0..=0x001BFF => Some(UnicodeBlock::Batak), + 0x001C00..=0x001C4F => Some(UnicodeBlock::Lepcha), + 0x001C50..=0x001C7F => Some(UnicodeBlock::OlChiki), + 0x001C80..=0x001C8F => Some(UnicodeBlock::CyrillicExtendedC), + 0x001C90..=0x001CBF => Some(UnicodeBlock::GeorgianExtended), + 0x001CC0..=0x001CCF => Some(UnicodeBlock::SundaneseSupplement), + 0x001CD0..=0x001CFF => Some(UnicodeBlock::VedicExtensions), + 0x001D00..=0x001D7F => Some(UnicodeBlock::PhoneticExtensions), + 0x001D80..=0x001DBF => Some(UnicodeBlock::PhoneticExtensionsSupplement), + 0x001DC0..=0x001DFF => Some(UnicodeBlock::CombiningDiacriticalMarksSupplement), + 0x001E00..=0x001EFF => Some(UnicodeBlock::LatinExtendedAdditional), + 0x001F00..=0x001FFF => Some(UnicodeBlock::GreekExtended), + 0x002000..=0x00206F => Some(UnicodeBlock::GeneralPunctuation), + 0x002070..=0x00209F => Some(UnicodeBlock::SuperscriptsandSubscripts), + 0x0020A0..=0x0020CF => Some(UnicodeBlock::CurrencySymbols), + 0x0020D0..=0x0020FF => Some(UnicodeBlock::CombiningDiacriticalMarksforSymbols), + 0x002100..=0x00214F => Some(UnicodeBlock::LetterlikeSymbols), + 0x002150..=0x00218F => Some(UnicodeBlock::NumberForms), + 0x002190..=0x0021FF => Some(UnicodeBlock::Arrows), + 0x002200..=0x0022FF => Some(UnicodeBlock::MathematicalOperators), + 0x002300..=0x0023FF => Some(UnicodeBlock::MiscellaneousTechnical), + 0x002400..=0x00243F => Some(UnicodeBlock::ControlPictures), + 0x002440..=0x00245F => Some(UnicodeBlock::OpticalCharacterRecognition), + 0x002460..=0x0024FF => Some(UnicodeBlock::EnclosedAlphanumerics), + 0x002500..=0x00257F => Some(UnicodeBlock::BoxDrawing), + 0x002580..=0x00259F => Some(UnicodeBlock::BlockElements), + 0x0025A0..=0x0025FF => Some(UnicodeBlock::GeometricShapes), + 0x002600..=0x0026FF => Some(UnicodeBlock::MiscellaneousSymbols), + 0x002700..=0x0027BF => Some(UnicodeBlock::Dingbats), + 0x0027C0..=0x0027EF => Some(UnicodeBlock::MiscellaneousMathematicalSymbolsA), + 0x0027F0..=0x0027FF => Some(UnicodeBlock::SupplementalArrowsA), + 0x002800..=0x0028FF => Some(UnicodeBlock::BraillePatterns), + 0x002900..=0x00297F => Some(UnicodeBlock::SupplementalArrowsB), + 0x002980..=0x0029FF => Some(UnicodeBlock::MiscellaneousMathematicalSymbolsB), + 0x002A00..=0x002AFF => Some(UnicodeBlock::SupplementalMathematicalOperators), + 0x002B00..=0x002BFF => Some(UnicodeBlock::MiscellaneousSymbolsandArrows), + 0x002C00..=0x002C5F => Some(UnicodeBlock::Glagolitic), + 0x002C60..=0x002C7F => Some(UnicodeBlock::LatinExtendedC), + 0x002C80..=0x002CFF => Some(UnicodeBlock::Coptic), + 0x002D00..=0x002D2F => Some(UnicodeBlock::GeorgianSupplement), + 0x002D30..=0x002D7F => Some(UnicodeBlock::Tifinagh), + 0x002D80..=0x002DDF => Some(UnicodeBlock::EthiopicExtended), + 0x002DE0..=0x002DFF => Some(UnicodeBlock::CyrillicExtendedA), + 0x002E00..=0x002E7F => Some(UnicodeBlock::SupplementalPunctuation), + 0x002E80..=0x002EFF => Some(UnicodeBlock::CJKRadicalsSupplement), + 0x002F00..=0x002FDF => Some(UnicodeBlock::KangxiRadicals), + 0x002FF0..=0x002FFF => Some(UnicodeBlock::IdeographicDescriptionCharacters), + 0x003000..=0x00303F => Some(UnicodeBlock::CJKSymbolsandPunctuation), + 0x003040..=0x00309F => Some(UnicodeBlock::Hiragana), + 0x0030A0..=0x0030FF => Some(UnicodeBlock::Katakana), + 0x003100..=0x00312F => Some(UnicodeBlock::Bopomofo), + 0x003130..=0x00318F => Some(UnicodeBlock::HangulCompatibilityJamo), + 0x003190..=0x00319F => Some(UnicodeBlock::Kanbun), + 0x0031A0..=0x0031BF => Some(UnicodeBlock::BopomofoExtended), + 0x0031C0..=0x0031EF => Some(UnicodeBlock::CJKStrokes), + 0x0031F0..=0x0031FF => Some(UnicodeBlock::KatakanaPhoneticExtensions), + 0x003200..=0x0032FF => Some(UnicodeBlock::EnclosedCJKLettersandMonths), + 0x003300..=0x0033FF => Some(UnicodeBlock::CJKCompatibility), + 0x003400..=0x004DBF => Some(UnicodeBlock::CJKUnifiedIdeographsExtensionA), + 0x004DC0..=0x004DFF => Some(UnicodeBlock::YijingHexagramSymbols), + 0x004E00..=0x009FFF => Some(UnicodeBlock::CJKUnifiedIdeographs), + 0x00A000..=0x00A48F => Some(UnicodeBlock::YiSyllables), + 0x00A490..=0x00A4CF => Some(UnicodeBlock::YiRadicals), + 0x00A4D0..=0x00A4FF => Some(UnicodeBlock::Lisu), + 0x00A500..=0x00A63F => Some(UnicodeBlock::Vai), + 0x00A640..=0x00A69F => Some(UnicodeBlock::CyrillicExtendedB), + 0x00A6A0..=0x00A6FF => Some(UnicodeBlock::Bamum), + 0x00A700..=0x00A71F => Some(UnicodeBlock::ModifierToneLetters), + 0x00A720..=0x00A7FF => Some(UnicodeBlock::LatinExtendedD), + 0x00A800..=0x00A82F => Some(UnicodeBlock::SylotiNagri), + 0x00A830..=0x00A83F => Some(UnicodeBlock::CommonIndicNumberForms), + 0x00A840..=0x00A87F => Some(UnicodeBlock::Phagspa), + 0x00A880..=0x00A8DF => Some(UnicodeBlock::Saurashtra), + 0x00A8E0..=0x00A8FF => Some(UnicodeBlock::DevanagariExtended), + 0x00A900..=0x00A92F => Some(UnicodeBlock::KayahLi), + 0x00A930..=0x00A95F => Some(UnicodeBlock::Rejang), + 0x00A960..=0x00A97F => Some(UnicodeBlock::HangulJamoExtendedA), + 0x00A980..=0x00A9DF => Some(UnicodeBlock::Javanese), + 0x00A9E0..=0x00A9FF => Some(UnicodeBlock::MyanmarExtendedB), + 0x00AA00..=0x00AA5F => Some(UnicodeBlock::Cham), + 0x00AA60..=0x00AA7F => Some(UnicodeBlock::MyanmarExtendedA), + 0x00AA80..=0x00AADF => Some(UnicodeBlock::TaiViet), + 0x00AAE0..=0x00AAFF => Some(UnicodeBlock::MeeteiMayekExtensions), + 0x00AB00..=0x00AB2F => Some(UnicodeBlock::EthiopicExtendedA), + 0x00AB30..=0x00AB6F => Some(UnicodeBlock::LatinExtendedE), + 0x00AB70..=0x00ABBF => Some(UnicodeBlock::CherokeeSupplement), + 0x00ABC0..=0x00ABFF => Some(UnicodeBlock::MeeteiMayek), + 0x00AC00..=0x00D7AF => Some(UnicodeBlock::HangulSyllables), + 0x00D7B0..=0x00D7FF => Some(UnicodeBlock::HangulJamoExtendedB), + 0x00D800..=0x00DB7F => Some(UnicodeBlock::HighSurrogates), + 0x00DB80..=0x00DBFF => Some(UnicodeBlock::HighPrivateUseSurrogates), + 0x00DC00..=0x00DFFF => Some(UnicodeBlock::LowSurrogates), + 0x00E000..=0x00F8FF => Some(UnicodeBlock::PrivateUseArea), + 0x00F900..=0x00FAFF => Some(UnicodeBlock::CJKCompatibilityIdeographs), + 0x00FB00..=0x00FB4F => Some(UnicodeBlock::AlphabeticPresentationForms), + 0x00FB50..=0x00FDFF => Some(UnicodeBlock::ArabicPresentationFormsA), + 0x00FE00..=0x00FE0F => Some(UnicodeBlock::VariationSelectors), + 0x00FE10..=0x00FE1F => Some(UnicodeBlock::VerticalForms), + 0x00FE20..=0x00FE2F => Some(UnicodeBlock::CombiningHalfMarks), + 0x00FE30..=0x00FE4F => Some(UnicodeBlock::CJKCompatibilityForms), + 0x00FE50..=0x00FE6F => Some(UnicodeBlock::SmallFormVariants), + 0x00FE70..=0x00FEFF => Some(UnicodeBlock::ArabicPresentationFormsB), + 0x00FF00..=0x00FFEF => Some(UnicodeBlock::HalfwidthandFullwidthForms), + 0x00FFF0..=0x00FFFF => Some(UnicodeBlock::Specials), + 0x010000..=0x01007F => Some(UnicodeBlock::LinearBSyllabary), + 0x010080..=0x0100FF => Some(UnicodeBlock::LinearBIdeograms), + 0x010100..=0x01013F => Some(UnicodeBlock::AegeanNumbers), + 0x010140..=0x01018F => Some(UnicodeBlock::AncientGreekNumbers), + 0x010190..=0x0101CF => Some(UnicodeBlock::AncientSymbols), + 0x0101D0..=0x0101FF => Some(UnicodeBlock::PhaistosDisc), + 0x010280..=0x01029F => Some(UnicodeBlock::Lycian), + 0x0102A0..=0x0102DF => Some(UnicodeBlock::Carian), + 0x0102E0..=0x0102FF => Some(UnicodeBlock::CopticEpactNumbers), + 0x010300..=0x01032F => Some(UnicodeBlock::OldItalic), + 0x010330..=0x01034F => Some(UnicodeBlock::Gothic), + 0x010350..=0x01037F => Some(UnicodeBlock::OldPermic), + 0x010380..=0x01039F => Some(UnicodeBlock::Ugaritic), + 0x0103A0..=0x0103DF => Some(UnicodeBlock::OldPersian), + 0x010400..=0x01044F => Some(UnicodeBlock::Deseret), + 0x010450..=0x01047F => Some(UnicodeBlock::Shavian), + 0x010480..=0x0104AF => Some(UnicodeBlock::Osmanya), + 0x0104B0..=0x0104FF => Some(UnicodeBlock::Osage), + 0x010500..=0x01052F => Some(UnicodeBlock::Elbasan), + 0x010530..=0x01056F => Some(UnicodeBlock::CaucasianAlbanian), + 0x010570..=0x0105BF => Some(UnicodeBlock::Vithkuqi), + 0x010600..=0x01077F => Some(UnicodeBlock::LinearA), + 0x010780..=0x0107BF => Some(UnicodeBlock::LatinExtendedF), + 0x010800..=0x01083F => Some(UnicodeBlock::CypriotSyllabary), + 0x010840..=0x01085F => Some(UnicodeBlock::ImperialAramaic), + 0x010860..=0x01087F => Some(UnicodeBlock::Palmyrene), + 0x010880..=0x0108AF => Some(UnicodeBlock::Nabataean), + 0x0108E0..=0x0108FF => Some(UnicodeBlock::Hatran), + 0x010900..=0x01091F => Some(UnicodeBlock::Phoenician), + 0x010920..=0x01093F => Some(UnicodeBlock::Lydian), + 0x010980..=0x01099F => Some(UnicodeBlock::MeroiticHieroglyphs), + 0x0109A0..=0x0109FF => Some(UnicodeBlock::MeroiticCursive), + 0x010A00..=0x010A5F => Some(UnicodeBlock::Kharoshthi), + 0x010A60..=0x010A7F => Some(UnicodeBlock::OldSouthArabian), + 0x010A80..=0x010A9F => Some(UnicodeBlock::OldNorthArabian), + 0x010AC0..=0x010AFF => Some(UnicodeBlock::Manichaean), + 0x010B00..=0x010B3F => Some(UnicodeBlock::Avestan), + 0x010B40..=0x010B5F => Some(UnicodeBlock::InscriptionalParthian), + 0x010B60..=0x010B7F => Some(UnicodeBlock::InscriptionalPahlavi), + 0x010B80..=0x010BAF => Some(UnicodeBlock::PsalterPahlavi), + 0x010C00..=0x010C4F => Some(UnicodeBlock::OldTurkic), + 0x010C80..=0x010CFF => Some(UnicodeBlock::OldHungarian), + 0x010D00..=0x010D3F => Some(UnicodeBlock::HanifiRohingya), + 0x010E60..=0x010E7F => Some(UnicodeBlock::RumiNumeralSymbols), + 0x010E80..=0x010EBF => Some(UnicodeBlock::Yezidi), + 0x010EC0..=0x010EFF => Some(UnicodeBlock::ArabicExtendedC), + 0x010F00..=0x010F2F => Some(UnicodeBlock::OldSogdian), + 0x010F30..=0x010F6F => Some(UnicodeBlock::Sogdian), + 0x010F70..=0x010FAF => Some(UnicodeBlock::OldUyghur), + 0x010FB0..=0x010FDF => Some(UnicodeBlock::Chorasmian), + 0x010FE0..=0x010FFF => Some(UnicodeBlock::Elymaic), + 0x011000..=0x01107F => Some(UnicodeBlock::Brahmi), + 0x011080..=0x0110CF => Some(UnicodeBlock::Kaithi), + 0x0110D0..=0x0110FF => Some(UnicodeBlock::SoraSompeng), + 0x011100..=0x01114F => Some(UnicodeBlock::Chakma), + 0x011150..=0x01117F => Some(UnicodeBlock::Mahajani), + 0x011180..=0x0111DF => Some(UnicodeBlock::Sharada), + 0x0111E0..=0x0111FF => Some(UnicodeBlock::SinhalaArchaicNumbers), + 0x011200..=0x01124F => Some(UnicodeBlock::Khojki), + 0x011280..=0x0112AF => Some(UnicodeBlock::Multani), + 0x0112B0..=0x0112FF => Some(UnicodeBlock::Khudawadi), + 0x011300..=0x01137F => Some(UnicodeBlock::Grantha), + 0x011400..=0x01147F => Some(UnicodeBlock::Newa), + 0x011480..=0x0114DF => Some(UnicodeBlock::Tirhuta), + 0x011580..=0x0115FF => Some(UnicodeBlock::Siddham), + 0x011600..=0x01165F => Some(UnicodeBlock::Modi), + 0x011660..=0x01167F => Some(UnicodeBlock::MongolianSupplement), + 0x011680..=0x0116CF => Some(UnicodeBlock::Takri), + 0x011700..=0x01174F => Some(UnicodeBlock::Ahom), + 0x011800..=0x01184F => Some(UnicodeBlock::Dogra), + 0x0118A0..=0x0118FF => Some(UnicodeBlock::WarangCiti), + 0x011900..=0x01195F => Some(UnicodeBlock::DivesAkuru), + 0x0119A0..=0x0119FF => Some(UnicodeBlock::Nandinagari), + 0x011A00..=0x011A4F => Some(UnicodeBlock::ZanabazarSquare), + 0x011A50..=0x011AAF => Some(UnicodeBlock::Soyombo), + 0x011AB0..=0x011ABF => Some(UnicodeBlock::UnifiedCanadianAboriginalSyllabicsExtendedA), + 0x011AC0..=0x011AFF => Some(UnicodeBlock::PauCinHau), + 0x011B00..=0x011B5F => Some(UnicodeBlock::DevanagariExtendedA), + 0x011C00..=0x011C6F => Some(UnicodeBlock::Bhaiksuki), + 0x011C70..=0x011CBF => Some(UnicodeBlock::Marchen), + 0x011D00..=0x011D5F => Some(UnicodeBlock::MasaramGondi), + 0x011D60..=0x011DAF => Some(UnicodeBlock::GunjalaGondi), + 0x011EE0..=0x011EFF => Some(UnicodeBlock::Makasar), + 0x011F00..=0x011F5F => Some(UnicodeBlock::Kawi), + 0x011FB0..=0x011FBF => Some(UnicodeBlock::LisuSupplement), + 0x011FC0..=0x011FFF => Some(UnicodeBlock::TamilSupplement), + 0x012000..=0x0123FF => Some(UnicodeBlock::Cuneiform), + 0x012400..=0x01247F => Some(UnicodeBlock::CuneiformNumbersandPunctuation), + 0x012480..=0x01254F => Some(UnicodeBlock::EarlyDynasticCuneiform), + 0x012F90..=0x012FFF => Some(UnicodeBlock::CyproMinoan), + 0x013000..=0x01342F => Some(UnicodeBlock::EgyptianHieroglyphs), + 0x013430..=0x01345F => Some(UnicodeBlock::EgyptianHieroglyphFormatControls), + 0x014400..=0x01467F => Some(UnicodeBlock::AnatolianHieroglyphs), + 0x016800..=0x016A3F => Some(UnicodeBlock::BamumSupplement), + 0x016A40..=0x016A6F => Some(UnicodeBlock::Mro), + 0x016A70..=0x016ACF => Some(UnicodeBlock::Tangsa), + 0x016AD0..=0x016AFF => Some(UnicodeBlock::BassaVah), + 0x016B00..=0x016B8F => Some(UnicodeBlock::PahawhHmong), + 0x016E40..=0x016E9F => Some(UnicodeBlock::Medefaidrin), + 0x016F00..=0x016F9F => Some(UnicodeBlock::Miao), + 0x016FE0..=0x016FFF => Some(UnicodeBlock::IdeographicSymbolsandPunctuation), + 0x017000..=0x0187FF => Some(UnicodeBlock::Tangut), + 0x018800..=0x018AFF => Some(UnicodeBlock::TangutComponents), + 0x018B00..=0x018CFF => Some(UnicodeBlock::KhitanSmallScript), + 0x018D00..=0x018D7F => Some(UnicodeBlock::TangutSupplement), + 0x01AFF0..=0x01AFFF => Some(UnicodeBlock::KanaExtendedB), + 0x01B000..=0x01B0FF => Some(UnicodeBlock::KanaSupplement), + 0x01B100..=0x01B12F => Some(UnicodeBlock::KanaExtendedA), + 0x01B130..=0x01B16F => Some(UnicodeBlock::SmallKanaExtension), + 0x01B170..=0x01B2FF => Some(UnicodeBlock::Nushu), + 0x01BC00..=0x01BC9F => Some(UnicodeBlock::Duployan), + 0x01BCA0..=0x01BCAF => Some(UnicodeBlock::ShorthandFormatControls), + 0x01CF00..=0x01CFCF => Some(UnicodeBlock::ZnamennyMusicalNotation), + 0x01D000..=0x01D0FF => Some(UnicodeBlock::ByzantineMusicalSymbols), + 0x01D100..=0x01D1FF => Some(UnicodeBlock::MusicalSymbols), + 0x01D200..=0x01D24F => Some(UnicodeBlock::AncientGreekMusicalNotation), + 0x01D2C0..=0x01D2DF => Some(UnicodeBlock::KaktovikNumerals), + 0x01D2E0..=0x01D2FF => Some(UnicodeBlock::MayanNumerals), + 0x01D300..=0x01D35F => Some(UnicodeBlock::TaiXuanJingSymbols), + 0x01D360..=0x01D37F => Some(UnicodeBlock::CountingRodNumerals), + 0x01D400..=0x01D7FF => Some(UnicodeBlock::MathematicalAlphanumericSymbols), + 0x01D800..=0x01DAAF => Some(UnicodeBlock::SuttonSignWriting), + 0x01DF00..=0x01DFFF => Some(UnicodeBlock::LatinExtendedG), + 0x01E000..=0x01E02F => Some(UnicodeBlock::GlagoliticSupplement), + 0x01E030..=0x01E08F => Some(UnicodeBlock::CyrillicExtendedD), + 0x01E100..=0x01E14F => Some(UnicodeBlock::NyiakengPuachueHmong), + 0x01E290..=0x01E2BF => Some(UnicodeBlock::Toto), + 0x01E2C0..=0x01E2FF => Some(UnicodeBlock::Wancho), + 0x01E4D0..=0x01E4FF => Some(UnicodeBlock::NagMundari), + 0x01E7E0..=0x01E7FF => Some(UnicodeBlock::EthiopicExtendedB), + 0x01E800..=0x01E8DF => Some(UnicodeBlock::MendeKikakui), + 0x01E900..=0x01E95F => Some(UnicodeBlock::Adlam), + 0x01EC70..=0x01ECBF => Some(UnicodeBlock::IndicSiyaqNumbers), + 0x01ED00..=0x01ED4F => Some(UnicodeBlock::OttomanSiyaqNumbers), + 0x01EE00..=0x01EEFF => Some(UnicodeBlock::ArabicMathematicalAlphabeticSymbols), + 0x01F000..=0x01F02F => Some(UnicodeBlock::MahjongTiles), + 0x01F030..=0x01F09F => Some(UnicodeBlock::DominoTiles), + 0x01F0A0..=0x01F0FF => Some(UnicodeBlock::PlayingCards), + 0x01F100..=0x01F1FF => Some(UnicodeBlock::EnclosedAlphanumericSupplement), + 0x01F200..=0x01F2FF => Some(UnicodeBlock::EnclosedIdeographicSupplement), + 0x01F300..=0x01F5FF => Some(UnicodeBlock::MiscellaneousSymbolsandPictographs), + 0x01F600..=0x01F64F => Some(UnicodeBlock::Emoticons), + 0x01F650..=0x01F67F => Some(UnicodeBlock::OrnamentalDingbats), + 0x01F680..=0x01F6FF => Some(UnicodeBlock::TransportandMapSymbols), + 0x01F700..=0x01F77F => Some(UnicodeBlock::AlchemicalSymbols), + 0x01F780..=0x01F7FF => Some(UnicodeBlock::GeometricShapesExtended), + 0x01F800..=0x01F8FF => Some(UnicodeBlock::SupplementalArrowsC), + 0x01F900..=0x01F9FF => Some(UnicodeBlock::SupplementalSymbolsandPictographs), + 0x01FA00..=0x01FA6F => Some(UnicodeBlock::ChessSymbols), + 0x01FA70..=0x01FAFF => Some(UnicodeBlock::SymbolsandPictographsExtendedA), + 0x01FB00..=0x01FBFF => Some(UnicodeBlock::SymbolsforLegacyComputing), + 0x020000..=0x02A6DF => Some(UnicodeBlock::CJKUnifiedIdeographsExtensionB), + 0x02A700..=0x02B73F => Some(UnicodeBlock::CJKUnifiedIdeographsExtensionC), + 0x02B740..=0x02B81F => Some(UnicodeBlock::CJKUnifiedIdeographsExtensionD), + 0x02B820..=0x02CEAF => Some(UnicodeBlock::CJKUnifiedIdeographsExtensionE), + 0x02CEB0..=0x02EBEF => Some(UnicodeBlock::CJKUnifiedIdeographsExtensionF), + 0x02EBF0..=0x02EE5F => Some(UnicodeBlock::CJKUnifiedIdeographsExtensionI), + 0x02F800..=0x02FA1F => Some(UnicodeBlock::CJKCompatibilityIdeographsSupplement), + 0x030000..=0x03134F => Some(UnicodeBlock::CJKUnifiedIdeographsExtensionG), + 0x031350..=0x0323AF => Some(UnicodeBlock::CJKUnifiedIdeographsExtensionH), + 0x0E0000..=0x0E007F => Some(UnicodeBlock::Tags), + 0x0E0100..=0x0E01EF => Some(UnicodeBlock::VariationSelectorsSupplement), + 0x0F0000..=0x0FFFFF => Some(UnicodeBlock::SupplementaryPrivateUseAreaA), + 0x100000..=0x10FFFF => Some(UnicodeBlock::SupplementaryPrivateUseAreaB), + _ => None, + } + } +} |