layout: Add initial support for `text-transform` (#31396)

This adds basic support for `text-transform` in a way that is more complete than legacy layout. There are still many missing elements of proper `text-transform` support such as: 1. Support for `full-width` and `full-size-kana` 2. Support for grapheme based uppercasing, lowercasing, and capitalization. These are all done per-code point right now. 3. Support for the language-specific `SpecialCasing.txt` cases for case mapping such as the ones for Irish and Turkish. Co-authored-by: Rakhi Sharma <atbrakhi@igalia.com>
author: Martin Robinson <mrobinson@igalia.com> 2024-02-22 15:15:59 +0100
committer: GitHub <noreply@github.com> 2024-02-22 14:15:59 +0000
commit: d8b326528b3d0646ef08714b87958f701cf89c88 (patch)
tree: 5027983cff9e39f5efdef3ba3d11266dc5fe58a7 /components/layout_2020
parent: f60e5e767b5002e9a440cf5d6e63f462d3e85a8e (diff)
download: servo-d8b326528b3d0646ef08714b87958f701cf89c88.tar.gz
servo-d8b326528b3d0646ef08714b87958f701cf89c88.zip
5 files changed, 168 insertions, 14 deletions
diff --git a/components/layout_2020/Cargo.toml b/components/layout_2020/Cargo.toml
index ce1aa86787b..e5b647955b8 100644
--- a/components/layout_2020/Cargo.toml
+++ b/components/layout_2020/Cargo.toml
@@ -42,6 +42,7 @@ servo_url = { path = "../url" }
 style = { path = "../style", features = ["servo"] }
 style_traits = { workspace = true }
 unicode-script = { workspace = true }
+unicode-segmentation = { workspace = true }
 webrender_api = { workspace = true }
 xi-unicode = { workspace = true }
 
diff --git a/components/layout_2020/display_list/mod.rs b/components/layout_2020/display_list/mod.rs
index 91fd0746086..61c2a46b4dd 100644
--- a/components/layout_2020/display_list/mod.rs
+++ b/components/layout_2020/display_list/mod.rs
@@ -684,7 +684,7 @@ impl<'a> BuilderForBoxFragment<'a> {
                     );
 
                     if let Some(layer) =
-                        background::layout_layer(self, &painter, builder, index, intrinsic)
+                        background::layout_layer(self, painter, builder, index, intrinsic)
                     {
                         let image_rendering = image_rendering(style.clone_image_rendering());
                         if layer.repeat {
diff --git a/components/layout_2020/flow/inline.rs b/components/layout_2020/flow/inline.rs
index 40950089c06..d071200eeaa 100644
--- a/components/layout_2020/flow/inline.rs
+++ b/components/layout_2020/flow/inline.rs
@@ -1578,6 +1578,9 @@ impl InlineFormattingContext {
         // > (It is invisible, but retains its soft wrap opportunity, if any.)
         let mut last_inline_box_ended_with_white_space = false;
 
+        // For the purposes of `text-transform: capitalize` the start of the IFC is a word boundary.
+        let mut on_word_boundary = true;
+
         crate::context::with_thread_local_font_context(layout_context, |font_context| {
             let mut linebreaker = None;
             self.foreach(|iter_item| match iter_item {
@@ -1589,6 +1592,7 @@ impl InlineFormattingContext {
                         &mut linebreaker,
                         &mut ifc_fonts,
                         &mut last_inline_box_ended_with_white_space,
+                        &mut on_word_boundary,
                     );
                 },
                 InlineFormattingContextIterItem::Item(InlineLevelBox::InlineBox(inline_box)) => {
@@ -1601,6 +1605,7 @@ impl InlineFormattingContext {
                 },
                 InlineFormattingContextIterItem::Item(InlineLevelBox::Atomic(_)) => {
                     last_inline_box_ended_with_white_space = false;
+                    on_word_boundary = true;
                 },
                 _ => {},
             });
diff --git a/components/layout_2020/flow/text_run.rs b/components/layout_2020/flow/text_run.rs
index 26a8c4db7e9..16f03b72b84 100644
--- a/components/layout_2020/flow/text_run.rs
+++ b/components/layout_2020/flow/text_run.rs
@@ -2,8 +2,8 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
 
+use std::char::{ToLowercase, ToUppercase};
 use std::mem;
-use std::str::Chars;
 
 use app_units::Au;
 use gfx::font::{FontRef, ShapingFlags, ShapingOptions};
@@ -19,7 +19,10 @@ use style::computed_values::text_rendering::T as TextRendering;
 use style::computed_values::white_space::T as WhiteSpace;
 use style::computed_values::word_break::T as WordBreak;
 use style::properties::ComputedValues;
+use style::values::specified::text::TextTransformCase;
+use style::values::specified::TextTransform;
 use unicode_script::Script;
+use unicode_segmentation::UnicodeSegmentation;
 use xi_unicode::{linebreak_property, LineBreakLeafIter};
 
 use super::inline::{FontKeyAndMetrics, InlineFormattingContextState};
@@ -209,11 +212,13 @@ impl TextRun {
         linebreaker: &mut Option<LineBreakLeafIter>,
         font_cache: &mut Vec<FontKeyAndMetrics>,
         last_inline_box_ended_with_white_space: &mut bool,
+        on_word_boundary: &mut bool,
     ) {
         let segment_results = self.segment_text(
             font_context,
             font_cache,
             last_inline_box_ended_with_white_space,
+            on_word_boundary,
         );
         let inherited_text_style = self.parent_style.get_inherited_text().clone();
         let letter_spacing = if inherited_text_style.letter_spacing.0.px() != 0. {
@@ -278,25 +283,49 @@ impl TextRun {
         font_context: &mut FontContext<FontCacheThread>,
         font_cache: &mut Vec<FontKeyAndMetrics>,
         last_inline_box_ended_with_white_space: &mut bool,
+        on_word_boundary: &mut bool,
     ) -> Vec<(TextRunSegment, FontRef)> {
         let font_group = font_context.font_group(self.parent_style.clone_font());
         let mut current: Option<(TextRunSegment, FontRef)> = None;
         let mut results = Vec::new();
 
-        let text = std::mem::replace(&mut self.text, String::new());
+        // TODO: Eventually the text should come directly from the Cow strings of the DOM nodes.
+        let text = std::mem::take(&mut self.text);
         let collapsed = WhitespaceCollapse::new(
-            text.as_str(),
+            text.as_str().chars(),
             self.parent_style.clone_white_space(),
             *last_inline_box_ended_with_white_space,
         );
 
+        let text_transform = self.parent_style.clone_text_transform();
+        let collected_text: String;
+        let char_iterator: Box<dyn Iterator<Item = char>> =
+            if text_transform.case_ == TextTransformCase::Capitalize {
+                // `TextTransformation` doesn't support capitalization, so we must capitalize the whole
+                // string at once and make a copy. Here `on_word_boundary` indicates whether or not the
+                // inline formatting context as a whole is on a word boundary. This is different from
+                // `last_inline_box_ended_with_white_space` because the word boundaries are between
+                // atomic inlines and at the start of the IFC.
+                let collapsed_string: String = collapsed.collect();
+                collected_text = capitalize_string(&collapsed_string, *on_word_boundary);
+                Box::new(collected_text.chars())
+            } else if !text_transform.is_none() {
+                // If `text-transform` is active, wrap the `WhitespaceCollapse` iterator in
+                // a `TextTransformation` iterator.
+                Box::new(TextTransformation::new(collapsed, text_transform))
+            } else {
+                Box::new(collapsed)
+            };
+
         let mut next_byte_index = 0;
-        let text = collapsed
+        let text = char_iterator
             .map(|character| {
                 let current_byte_index = next_byte_index;
                 next_byte_index += character.len_utf8();
 
                 *last_inline_box_ended_with_white_space = character.is_whitespace();
+                *on_word_boundary = *last_inline_box_ended_with_white_space;
+
                 let prevents_soft_wrap_opportunity =
                     char_prevents_soft_wrap_opportunity_when_before_or_after_atomic(character);
                 if current_byte_index == 0 && prevents_soft_wrap_opportunity {
@@ -331,7 +360,7 @@ impl TextRun {
                 // segment in the middle of the run (ie the start should be 0).
                 let start_byte_index = match current {
                     Some(_) => ByteIndex(current_byte_index as isize),
-                    None => ByteIndex(0 as isize),
+                    None => ByteIndex(0_isize),
                 };
                 let new = (
                     TextRunSegment::new(font_index, script, start_byte_index),
@@ -491,8 +520,8 @@ fn preserve_segment_break() -> bool {
     true
 }
 
-pub struct WhitespaceCollapse<'a> {
-    char_iterator: Chars<'a>,
+pub struct WhitespaceCollapse<InputIterator> {
+    char_iterator: InputIterator,
     white_space: WhiteSpace,
 
     /// Whether or not we should collapse white space completely at the start of the string.
@@ -519,10 +548,14 @@ pub struct WhitespaceCollapse<'a> {
     character_pending_to_return: Option<char>,
 }
 
-impl<'a> WhitespaceCollapse<'a> {
-    pub fn new(input: &'a str, white_space: WhiteSpace, trim_beginning_white_space: bool) -> Self {
+impl<InputIterator> WhitespaceCollapse<InputIterator> {
+    pub fn new(
+        char_iterator: InputIterator,
+        white_space: WhiteSpace,
+        trim_beginning_white_space: bool,
+    ) -> Self {
         Self {
-            char_iterator: input.chars(),
+            char_iterator,
             white_space,
             remove_collapsible_white_space_at_start: trim_beginning_white_space,
             inside_white_space: false,
@@ -545,7 +578,10 @@ impl<'a> WhitespaceCollapse<'a> {
     }
 }
 
-impl<'a> Iterator for WhitespaceCollapse<'a> {
+impl<InputIterator> Iterator for WhitespaceCollapse<InputIterator>
+where
+    InputIterator: Iterator<Item = char>,
+{
     type Item = char;
 
     fn next(&mut self) -> Option<Self::Item> {
@@ -645,3 +681,115 @@ impl<'a> Iterator for WhitespaceCollapse<'a> {
         self.char_iterator.count()
     }
 }
+
+enum PendingCaseConversionResult {
+    Uppercase(ToUppercase),
+    Lowercase(ToLowercase),
+}
+
+impl PendingCaseConversionResult {
+    fn next(&mut self) -> Option<char> {
+        match self {
+            PendingCaseConversionResult::Uppercase(to_uppercase) => to_uppercase.next(),
+            PendingCaseConversionResult::Lowercase(to_lowercase) => to_lowercase.next(),
+        }
+    }
+}
+
+/// This is an interator that consumes a char iterator and produces character transformed
+/// by the given CSS `text-transform` value. It currently does not support
+/// `text-transform: capitalize` because Unicode segmentation libraries do not support
+/// streaming input one character at a time.
+pub struct TextTransformation<InputIterator> {
+    /// The input character iterator.
+    char_iterator: InputIterator,
+    /// The `text-transform` value to use.
+    text_transform: TextTransform,
+    /// If an uppercasing or lowercasing produces more than one character, this
+    /// caches them so that they can be returned in subsequent iterator calls.
+    pending_case_conversion_result: Option<PendingCaseConversionResult>,
+}
+
+impl<'a, InputIterator> TextTransformation<InputIterator> {
+    pub fn new(char_iterator: InputIterator, text_transform: TextTransform) -> Self {
+        Self {
+            char_iterator,
+            text_transform,
+            pending_case_conversion_result: None,
+        }
+    }
+}
+
+impl<InputIterator> Iterator for TextTransformation<InputIterator>
+where
+    InputIterator: Iterator<Item = char>,
+{
+    type Item = char;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if let Some(character) = self
+            .pending_case_conversion_result
+            .as_mut()
+            .and_then(|result| result.next())
+        {
+            return Some(character);
+        }
+        self.pending_case_conversion_result = None;
+
+        for character in self.char_iterator.by_ref() {
+            match self.text_transform.case_ {
+                TextTransformCase::None => return Some(character),
+                TextTransformCase::Uppercase => {
+                    let mut pending_result =
+                        PendingCaseConversionResult::Uppercase(character.to_uppercase());
+                    if let Some(character) = pending_result.next() {
+                        self.pending_case_conversion_result = Some(pending_result);
+                        return Some(character);
+                    }
+                },
+                TextTransformCase::Lowercase => {
+                    let mut pending_result =
+                        PendingCaseConversionResult::Lowercase(character.to_lowercase());
+                    if let Some(character) = pending_result.next() {
+                        self.pending_case_conversion_result = Some(pending_result);
+                        return Some(character);
+                    }
+                },
+                // `text-transform: capitalize` currently cannot work on a per-character basis,
+                // so must be handled outside of this iterator.
+                // TODO: Add support for `full-width` and `full-size-kana`.
+                _ => return Some(character),
+            }
+        }
+        None
+    }
+}
+
+/// Given a string and whether the start of the string represents a word boundary, create a copy of
+/// the string with letters after word boundaries capitalized.
+fn capitalize_string(string: &str, allow_word_at_start: bool) -> String {
+    let mut output_string = String::new();
+    output_string.reserve(string.len());
+
+    let mut bounds = string.unicode_word_indices().peekable();
+    let mut byte_index = 0;
+    for character in string.chars() {
+        let current_byte_index = byte_index;
+        byte_index += character.len_utf8();
+
+        if let Some((next_index, _)) = bounds.peek() {
+            if *next_index == current_byte_index {
+                bounds.next();
+
+                if current_byte_index != 0 || allow_word_at_start {
+                    output_string.extend(character.to_uppercase());
+                    continue;
+                }
+            }
+        }
+
+        output_string.push(character);
+    }
+
+    output_string
+}
diff --git a/components/layout_2020/tests/text.rs b/components/layout_2020/tests/text.rs
index 894ccff277d..6b9ee652c05 100644
--- a/components/layout_2020/tests/text.rs
+++ b/components/layout_2020/tests/text.rs
@@ -8,8 +8,8 @@ mod text {
 
     #[test]
     fn test_collapse_whitespace() {
-        let collapse = |input, white_space, trim_beginning_white_space| {
-            WhitespaceCollapse::new(input, white_space, trim_beginning_white_space)
+        let collapse = |input: &str, white_space, trim_beginning_white_space| {
+            WhitespaceCollapse::new(input.chars(), white_space, trim_beginning_white_space)
                 .collect::<String>()
         };
author	Martin Robinson <mrobinson@igalia.com>	2024-02-22 15:15:59 +0100
committer	GitHub <noreply@github.com>	2024-02-22 14:15:59 +0000
commit	d8b326528b3d0646ef08714b87958f701cf89c88 (patch)
tree	5027983cff9e39f5efdef3ba3d11266dc5fe58a7 /components/layout_2020
parent	f60e5e767b5002e9a440cf5d6e63f462d3e85a8e (diff)
download	servo-d8b326528b3d0646ef08714b87958f701cf89c88.tar.gz servo-d8b326528b3d0646ef08714b87958f701cf89c88.zip