diff options
Diffstat (limited to 'components/shared/base/generate-unicode-block.py')
-rwxr-xr-x | components/shared/base/generate-unicode-block.py | 63 |
1 files changed, 63 insertions, 0 deletions
diff --git a/components/shared/base/generate-unicode-block.py b/components/shared/base/generate-unicode-block.py new file mode 100755 index 00000000000..3191d4f26f1 --- /dev/null +++ b/components/shared/base/generate-unicode-block.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at https://mozilla.org/MPL/2.0/. + +# The beginning of this script is both valid shell and valid python, +# such that the script starts with the shell and is reexecuted with +# the right python. + +import dataclasses +import re +import sys + + +@dataclasses.dataclass +class UnicodeBlock: + name: str + start: str + end: str + + +def process_line(line: str) -> UnicodeBlock: + # Split on either '..' or ';' surrounded by whitespace. + [start, end, name] = re.split(r"\W*\.\.|;\W*", line, maxsplit=3) + name = name.strip().replace("-", "").replace(" ", "") + return UnicodeBlock(name, start.zfill(6), end.zfill(6)) + + +with open(sys.argv[1]) as file: + lines_to_keep = filter( + lambda line: line.strip() and not line.startswith("#"), + file.readlines() + ) + results = list(map(process_line, lines_to_keep)) + +print("/* This Source Code Form is subject to the terms of the Mozilla Public") +print(" * License, v. 2.0. If a copy of the MPL was not distributed with this") +print(" * file, You can obtain one at https://mozilla.org/MPL/2.0/. */") +print() +print("// Do not edit:") +print("// Generated via: https://www.unicode.org/Public/UNIDATA/Blocks.txt.") +print("// $ ./generate-unicode-block.py Blocks.txt > unicode_block.rs") +print() +print("#[derive(Clone, Copy, Debug, PartialEq)]") +print("pub enum UnicodeBlock {") +for block in results: + print(f" {block.name},") +print("}") +print() +print("pub trait UnicodeBlockMethod {") +print(" fn block(&self) -> Option<UnicodeBlock>;") +print("}") +print() +print("impl UnicodeBlockMethod for char {") +print(" fn block(&self) -> Option<UnicodeBlock> {") +print(" match *self as u32 {") +for block in results: + print(f" 0x{block.start}..=0x{block.end} => Some(UnicodeBlock::{block.name}),") +print(" _ => None,") +print(" }") +print(" }") +print("}") |