aboutsummaryrefslogtreecommitdiffstats
path: root/components/shared/base/generate-unicode-block.py
blob: 3191d4f26f1b2a32db6d4bf4fcdb1bf2d4a599c8 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/env python3

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.

# The beginning of this script is both valid shell and valid python,
# such that the script starts with the shell and is reexecuted with
# the right python.

import dataclasses
import re
import sys


@dataclasses.dataclass
class UnicodeBlock:
    name: str
    start: str
    end: str


def process_line(line: str) -> UnicodeBlock:
    # Split on either '..' or ';' surrounded by whitespace.
    [start, end, name] = re.split(r"\W*\.\.|;\W*", line, maxsplit=3)
    name = name.strip().replace("-", "").replace(" ", "")
    return UnicodeBlock(name, start.zfill(6), end.zfill(6))


with open(sys.argv[1]) as file:
    lines_to_keep = filter(
        lambda line: line.strip() and not line.startswith("#"),
        file.readlines()
    )
    results = list(map(process_line, lines_to_keep))

print("/* This Source Code Form is subject to the terms of the Mozilla Public")
print(" * License, v. 2.0. If a copy of the MPL was not distributed with this")
print(" * file, You can obtain one at https://mozilla.org/MPL/2.0/. */")
print()
print("// Do not edit:")
print("// Generated via: https://www.unicode.org/Public/UNIDATA/Blocks.txt.")
print("// $ ./generate-unicode-block.py Blocks.txt > unicode_block.rs")
print()
print("#[derive(Clone, Copy, Debug, PartialEq)]")
print("pub enum UnicodeBlock {")
for block in results:
    print(f"    {block.name},")
print("}")
print()
print("pub trait UnicodeBlockMethod {")
print("    fn block(&self) -> Option<UnicodeBlock>;")
print("}")
print()
print("impl UnicodeBlockMethod for char {")
print("    fn block(&self) -> Option<UnicodeBlock> {")
print("        match *self as u32 {")
for block in results:
    print(f"            0x{block.start}..=0x{block.end} => Some(UnicodeBlock::{block.name}),")
print("            _ => None,")
print("        }")
print("    }")
print("}")