diff options
author | Martin Robinson <mrobinson@igalia.com> | 2024-02-22 15:15:59 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-22 14:15:59 +0000 |
commit | d8b326528b3d0646ef08714b87958f701cf89c88 (patch) | |
tree | 5027983cff9e39f5efdef3ba3d11266dc5fe58a7 /components/layout_2020 | |
parent | f60e5e767b5002e9a440cf5d6e63f462d3e85a8e (diff) | |
download | servo-d8b326528b3d0646ef08714b87958f701cf89c88.tar.gz servo-d8b326528b3d0646ef08714b87958f701cf89c88.zip |
layout: Add initial support for `text-transform` (#31396)
This adds basic support for `text-transform` in a way that is more
complete than legacy layout. There are still many missing elements of
proper `text-transform` support such as:
1. Support for `full-width` and `full-size-kana`
2. Support for grapheme based uppercasing, lowercasing, and
capitalization. These are all done per-code point right now.
3. Support for the language-specific `SpecialCasing.txt` cases for case
mapping such as the ones for Irish and Turkish.
Co-authored-by: Rakhi Sharma <atbrakhi@igalia.com>
Diffstat (limited to 'components/layout_2020')
-rw-r--r-- | components/layout_2020/Cargo.toml | 1 | ||||
-rw-r--r-- | components/layout_2020/display_list/mod.rs | 2 | ||||
-rw-r--r-- | components/layout_2020/flow/inline.rs | 5 | ||||
-rw-r--r-- | components/layout_2020/flow/text_run.rs | 170 | ||||
-rw-r--r-- | components/layout_2020/tests/text.rs | 4 |
5 files changed, 168 insertions, 14 deletions
diff --git a/components/layout_2020/Cargo.toml b/components/layout_2020/Cargo.toml index ce1aa86787b..e5b647955b8 100644 --- a/components/layout_2020/Cargo.toml +++ b/components/layout_2020/Cargo.toml @@ -42,6 +42,7 @@ servo_url = { path = "../url" } style = { path = "../style", features = ["servo"] } style_traits = { workspace = true } unicode-script = { workspace = true } +unicode-segmentation = { workspace = true } webrender_api = { workspace = true } xi-unicode = { workspace = true } diff --git a/components/layout_2020/display_list/mod.rs b/components/layout_2020/display_list/mod.rs index 91fd0746086..61c2a46b4dd 100644 --- a/components/layout_2020/display_list/mod.rs +++ b/components/layout_2020/display_list/mod.rs @@ -684,7 +684,7 @@ impl<'a> BuilderForBoxFragment<'a> { ); if let Some(layer) = - background::layout_layer(self, &painter, builder, index, intrinsic) + background::layout_layer(self, painter, builder, index, intrinsic) { let image_rendering = image_rendering(style.clone_image_rendering()); if layer.repeat { diff --git a/components/layout_2020/flow/inline.rs b/components/layout_2020/flow/inline.rs index 40950089c06..d071200eeaa 100644 --- a/components/layout_2020/flow/inline.rs +++ b/components/layout_2020/flow/inline.rs @@ -1578,6 +1578,9 @@ impl InlineFormattingContext { // > (It is invisible, but retains its soft wrap opportunity, if any.) let mut last_inline_box_ended_with_white_space = false; + // For the purposes of `text-transform: capitalize` the start of the IFC is a word boundary. + let mut on_word_boundary = true; + crate::context::with_thread_local_font_context(layout_context, |font_context| { let mut linebreaker = None; self.foreach(|iter_item| match iter_item { @@ -1589,6 +1592,7 @@ impl InlineFormattingContext { &mut linebreaker, &mut ifc_fonts, &mut last_inline_box_ended_with_white_space, + &mut on_word_boundary, ); }, InlineFormattingContextIterItem::Item(InlineLevelBox::InlineBox(inline_box)) => { @@ -1601,6 +1605,7 @@ impl InlineFormattingContext { }, InlineFormattingContextIterItem::Item(InlineLevelBox::Atomic(_)) => { last_inline_box_ended_with_white_space = false; + on_word_boundary = true; }, _ => {}, }); diff --git a/components/layout_2020/flow/text_run.rs b/components/layout_2020/flow/text_run.rs index 26a8c4db7e9..16f03b72b84 100644 --- a/components/layout_2020/flow/text_run.rs +++ b/components/layout_2020/flow/text_run.rs @@ -2,8 +2,8 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ +use std::char::{ToLowercase, ToUppercase}; use std::mem; -use std::str::Chars; use app_units::Au; use gfx::font::{FontRef, ShapingFlags, ShapingOptions}; @@ -19,7 +19,10 @@ use style::computed_values::text_rendering::T as TextRendering; use style::computed_values::white_space::T as WhiteSpace; use style::computed_values::word_break::T as WordBreak; use style::properties::ComputedValues; +use style::values::specified::text::TextTransformCase; +use style::values::specified::TextTransform; use unicode_script::Script; +use unicode_segmentation::UnicodeSegmentation; use xi_unicode::{linebreak_property, LineBreakLeafIter}; use super::inline::{FontKeyAndMetrics, InlineFormattingContextState}; @@ -209,11 +212,13 @@ impl TextRun { linebreaker: &mut Option<LineBreakLeafIter>, font_cache: &mut Vec<FontKeyAndMetrics>, last_inline_box_ended_with_white_space: &mut bool, + on_word_boundary: &mut bool, ) { let segment_results = self.segment_text( font_context, font_cache, last_inline_box_ended_with_white_space, + on_word_boundary, ); let inherited_text_style = self.parent_style.get_inherited_text().clone(); let letter_spacing = if inherited_text_style.letter_spacing.0.px() != 0. { @@ -278,25 +283,49 @@ impl TextRun { font_context: &mut FontContext<FontCacheThread>, font_cache: &mut Vec<FontKeyAndMetrics>, last_inline_box_ended_with_white_space: &mut bool, + on_word_boundary: &mut bool, ) -> Vec<(TextRunSegment, FontRef)> { let font_group = font_context.font_group(self.parent_style.clone_font()); let mut current: Option<(TextRunSegment, FontRef)> = None; let mut results = Vec::new(); - let text = std::mem::replace(&mut self.text, String::new()); + // TODO: Eventually the text should come directly from the Cow strings of the DOM nodes. + let text = std::mem::take(&mut self.text); let collapsed = WhitespaceCollapse::new( - text.as_str(), + text.as_str().chars(), self.parent_style.clone_white_space(), *last_inline_box_ended_with_white_space, ); + let text_transform = self.parent_style.clone_text_transform(); + let collected_text: String; + let char_iterator: Box<dyn Iterator<Item = char>> = + if text_transform.case_ == TextTransformCase::Capitalize { + // `TextTransformation` doesn't support capitalization, so we must capitalize the whole + // string at once and make a copy. Here `on_word_boundary` indicates whether or not the + // inline formatting context as a whole is on a word boundary. This is different from + // `last_inline_box_ended_with_white_space` because the word boundaries are between + // atomic inlines and at the start of the IFC. + let collapsed_string: String = collapsed.collect(); + collected_text = capitalize_string(&collapsed_string, *on_word_boundary); + Box::new(collected_text.chars()) + } else if !text_transform.is_none() { + // If `text-transform` is active, wrap the `WhitespaceCollapse` iterator in + // a `TextTransformation` iterator. + Box::new(TextTransformation::new(collapsed, text_transform)) + } else { + Box::new(collapsed) + }; + let mut next_byte_index = 0; - let text = collapsed + let text = char_iterator .map(|character| { let current_byte_index = next_byte_index; next_byte_index += character.len_utf8(); *last_inline_box_ended_with_white_space = character.is_whitespace(); + *on_word_boundary = *last_inline_box_ended_with_white_space; + let prevents_soft_wrap_opportunity = char_prevents_soft_wrap_opportunity_when_before_or_after_atomic(character); if current_byte_index == 0 && prevents_soft_wrap_opportunity { @@ -331,7 +360,7 @@ impl TextRun { // segment in the middle of the run (ie the start should be 0). let start_byte_index = match current { Some(_) => ByteIndex(current_byte_index as isize), - None => ByteIndex(0 as isize), + None => ByteIndex(0_isize), }; let new = ( TextRunSegment::new(font_index, script, start_byte_index), @@ -491,8 +520,8 @@ fn preserve_segment_break() -> bool { true } -pub struct WhitespaceCollapse<'a> { - char_iterator: Chars<'a>, +pub struct WhitespaceCollapse<InputIterator> { + char_iterator: InputIterator, white_space: WhiteSpace, /// Whether or not we should collapse white space completely at the start of the string. @@ -519,10 +548,14 @@ pub struct WhitespaceCollapse<'a> { character_pending_to_return: Option<char>, } -impl<'a> WhitespaceCollapse<'a> { - pub fn new(input: &'a str, white_space: WhiteSpace, trim_beginning_white_space: bool) -> Self { +impl<InputIterator> WhitespaceCollapse<InputIterator> { + pub fn new( + char_iterator: InputIterator, + white_space: WhiteSpace, + trim_beginning_white_space: bool, + ) -> Self { Self { - char_iterator: input.chars(), + char_iterator, white_space, remove_collapsible_white_space_at_start: trim_beginning_white_space, inside_white_space: false, @@ -545,7 +578,10 @@ impl<'a> WhitespaceCollapse<'a> { } } -impl<'a> Iterator for WhitespaceCollapse<'a> { +impl<InputIterator> Iterator for WhitespaceCollapse<InputIterator> +where + InputIterator: Iterator<Item = char>, +{ type Item = char; fn next(&mut self) -> Option<Self::Item> { @@ -645,3 +681,115 @@ impl<'a> Iterator for WhitespaceCollapse<'a> { self.char_iterator.count() } } + +enum PendingCaseConversionResult { + Uppercase(ToUppercase), + Lowercase(ToLowercase), +} + +impl PendingCaseConversionResult { + fn next(&mut self) -> Option<char> { + match self { + PendingCaseConversionResult::Uppercase(to_uppercase) => to_uppercase.next(), + PendingCaseConversionResult::Lowercase(to_lowercase) => to_lowercase.next(), + } + } +} + +/// This is an interator that consumes a char iterator and produces character transformed +/// by the given CSS `text-transform` value. It currently does not support +/// `text-transform: capitalize` because Unicode segmentation libraries do not support +/// streaming input one character at a time. +pub struct TextTransformation<InputIterator> { + /// The input character iterator. + char_iterator: InputIterator, + /// The `text-transform` value to use. + text_transform: TextTransform, + /// If an uppercasing or lowercasing produces more than one character, this + /// caches them so that they can be returned in subsequent iterator calls. + pending_case_conversion_result: Option<PendingCaseConversionResult>, +} + +impl<'a, InputIterator> TextTransformation<InputIterator> { + pub fn new(char_iterator: InputIterator, text_transform: TextTransform) -> Self { + Self { + char_iterator, + text_transform, + pending_case_conversion_result: None, + } + } +} + +impl<InputIterator> Iterator for TextTransformation<InputIterator> +where + InputIterator: Iterator<Item = char>, +{ + type Item = char; + + fn next(&mut self) -> Option<Self::Item> { + if let Some(character) = self + .pending_case_conversion_result + .as_mut() + .and_then(|result| result.next()) + { + return Some(character); + } + self.pending_case_conversion_result = None; + + for character in self.char_iterator.by_ref() { + match self.text_transform.case_ { + TextTransformCase::None => return Some(character), + TextTransformCase::Uppercase => { + let mut pending_result = + PendingCaseConversionResult::Uppercase(character.to_uppercase()); + if let Some(character) = pending_result.next() { + self.pending_case_conversion_result = Some(pending_result); + return Some(character); + } + }, + TextTransformCase::Lowercase => { + let mut pending_result = + PendingCaseConversionResult::Lowercase(character.to_lowercase()); + if let Some(character) = pending_result.next() { + self.pending_case_conversion_result = Some(pending_result); + return Some(character); + } + }, + // `text-transform: capitalize` currently cannot work on a per-character basis, + // so must be handled outside of this iterator. + // TODO: Add support for `full-width` and `full-size-kana`. + _ => return Some(character), + } + } + None + } +} + +/// Given a string and whether the start of the string represents a word boundary, create a copy of +/// the string with letters after word boundaries capitalized. +fn capitalize_string(string: &str, allow_word_at_start: bool) -> String { + let mut output_string = String::new(); + output_string.reserve(string.len()); + + let mut bounds = string.unicode_word_indices().peekable(); + let mut byte_index = 0; + for character in string.chars() { + let current_byte_index = byte_index; + byte_index += character.len_utf8(); + + if let Some((next_index, _)) = bounds.peek() { + if *next_index == current_byte_index { + bounds.next(); + + if current_byte_index != 0 || allow_word_at_start { + output_string.extend(character.to_uppercase()); + continue; + } + } + } + + output_string.push(character); + } + + output_string +} diff --git a/components/layout_2020/tests/text.rs b/components/layout_2020/tests/text.rs index 894ccff277d..6b9ee652c05 100644 --- a/components/layout_2020/tests/text.rs +++ b/components/layout_2020/tests/text.rs @@ -8,8 +8,8 @@ mod text { #[test] fn test_collapse_whitespace() { - let collapse = |input, white_space, trim_beginning_white_space| { - WhitespaceCollapse::new(input, white_space, trim_beginning_white_space) + let collapse = |input: &str, white_space, trim_beginning_white_space| { + WhitespaceCollapse::new(input.chars(), white_space, trim_beginning_white_space) .collect::<String>() }; |