aboutsummaryrefslogtreecommitdiffstats
path: root/components/shared/base/generate-unicode-block.py
diff options
context:
space:
mode:
authorMartin Robinson <mrobinson@igalia.com>2024-06-03 19:10:01 +0200
committerGitHub <noreply@github.com>2024-06-03 17:10:01 +0000
commitf8985c5521cdf72a9137a7fa847043e5a789dfe0 (patch)
tree519ab8999e6d6c32fed65f0812ce6b36dbcb8359 /components/shared/base/generate-unicode-block.py
parent48ab8d8847eadd0c94f43307860e880d4802a075 (diff)
downloadservo-f8985c5521cdf72a9137a7fa847043e5a789dfe0.tar.gz
servo-f8985c5521cdf72a9137a7fa847043e5a789dfe0.zip
base: Remove `ucd` dependency (#32424)
Remove the `ucd` dependency which has not been updated in 8 years. In addition, replace it with a generated UnicodeBlock enum which reflects the modern Unicode standard. This is generated via a Python script which is included in the repository. The generation is not part of the build process, because the Unicode database is hosted on the web and it does not change the frequently. This is done instead of bringing in the more up-to-date `unicode_blocks` dependency. `unicode_blocks` defines each block as constant, which means that they cannot be used in match statements -- which we do in Servo. Co-authored-by: Lauryn Menard <lauryn.menard@gmail.com>
Diffstat (limited to 'components/shared/base/generate-unicode-block.py')
-rwxr-xr-xcomponents/shared/base/generate-unicode-block.py63
1 files changed, 63 insertions, 0 deletions
diff --git a/components/shared/base/generate-unicode-block.py b/components/shared/base/generate-unicode-block.py
new file mode 100755
index 00000000000..3191d4f26f1
--- /dev/null
+++ b/components/shared/base/generate-unicode-block.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python3
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+# The beginning of this script is both valid shell and valid python,
+# such that the script starts with the shell and is reexecuted with
+# the right python.
+
+import dataclasses
+import re
+import sys
+
+
+@dataclasses.dataclass
+class UnicodeBlock:
+ name: str
+ start: str
+ end: str
+
+
+def process_line(line: str) -> UnicodeBlock:
+ # Split on either '..' or ';' surrounded by whitespace.
+ [start, end, name] = re.split(r"\W*\.\.|;\W*", line, maxsplit=3)
+ name = name.strip().replace("-", "").replace(" ", "")
+ return UnicodeBlock(name, start.zfill(6), end.zfill(6))
+
+
+with open(sys.argv[1]) as file:
+ lines_to_keep = filter(
+ lambda line: line.strip() and not line.startswith("#"),
+ file.readlines()
+ )
+ results = list(map(process_line, lines_to_keep))
+
+print("/* This Source Code Form is subject to the terms of the Mozilla Public")
+print(" * License, v. 2.0. If a copy of the MPL was not distributed with this")
+print(" * file, You can obtain one at https://mozilla.org/MPL/2.0/. */")
+print()
+print("// Do not edit:")
+print("// Generated via: https://www.unicode.org/Public/UNIDATA/Blocks.txt.")
+print("// $ ./generate-unicode-block.py Blocks.txt > unicode_block.rs")
+print()
+print("#[derive(Clone, Copy, Debug, PartialEq)]")
+print("pub enum UnicodeBlock {")
+for block in results:
+ print(f" {block.name},")
+print("}")
+print()
+print("pub trait UnicodeBlockMethod {")
+print(" fn block(&self) -> Option<UnicodeBlock>;")
+print("}")
+print()
+print("impl UnicodeBlockMethod for char {")
+print(" fn block(&self) -> Option<UnicodeBlock> {")
+print(" match *self as u32 {")
+for block in results:
+ print(f" 0x{block.start}..=0x{block.end} => Some(UnicodeBlock::{block.name}),")
+print(" _ => None,")
+print(" }")
+print(" }")
+print("}")