diff options
author | Matt Brubeck <mbrubeck@limpet.net> | 2016-04-27 11:22:02 -0700 |
---|---|---|
committer | Matt Brubeck <mbrubeck@limpet.net> | 2016-04-28 14:32:14 -0700 |
commit | 659305fe0a8f94e950ca64fab5ccef9949abd295 (patch) | |
tree | ac836803ac3fba799a4b4f73c24b4d0d2d208d6f /components | |
parent | dba878dfb278619bf2d808c0c21758a937ec6bb7 (diff) | |
download | servo-659305fe0a8f94e950ca64fab5ccef9949abd295.tar.gz servo-659305fe0a8f94e950ca64fab5ccef9949abd295.zip |
Use byte indices instead of char indices for text runs
Replace character indices with UTF-8 byte offsets throughout the code dealing
with text shaping and breaking. This eliminates a lot of complexity when
converting from one to the other, and interoperates better with the rest of
the Rust ecosystem.
Diffstat (limited to 'components')
-rw-r--r-- | components/gfx/display_list/mod.rs | 4 | ||||
-rw-r--r-- | components/gfx/font.rs | 2 | ||||
-rw-r--r-- | components/gfx/paint_context.rs | 11 | ||||
-rw-r--r-- | components/gfx/text/glyph.rs | 107 | ||||
-rw-r--r-- | components/gfx/text/shaping/harfbuzz.rs | 149 | ||||
-rw-r--r-- | components/gfx/text/text_run.rs | 81 | ||||
-rw-r--r-- | components/layout/display_list_builder.rs | 4 | ||||
-rw-r--r-- | components/layout/fragment.rs | 112 | ||||
-rw-r--r-- | components/layout/text.rs | 81 | ||||
-rw-r--r-- | components/layout/webrender_helpers.rs | 2 | ||||
-rw-r--r-- | components/layout/wrapper.rs | 37 | ||||
-rw-r--r-- | components/script/dom/htmlinputelement.rs | 40 | ||||
-rw-r--r-- | components/script/dom/htmltextareaelement.rs | 13 | ||||
-rw-r--r-- | components/util/str.rs | 39 |
14 files changed, 258 insertions, 424 deletions
diff --git a/components/gfx/display_list/mod.rs b/components/gfx/display_list/mod.rs index 0b8d5f2dbb1..d0f117b439e 100644 --- a/components/gfx/display_list/mod.rs +++ b/components/gfx/display_list/mod.rs @@ -43,7 +43,7 @@ use style::computed_values::{border_style, filter, image_rendering, mix_blend_mo use style::properties::{ComputedValues}; use style_traits::cursor::Cursor; use text::TextRun; -use text::glyph::CharIndex; +use text::glyph::ByteIndex; use util::geometry::{self, MAX_RECT, ScreenPx}; use util::print_tree::PrintTree; use webrender_traits::{self, WebGLContextId}; @@ -989,7 +989,7 @@ pub struct TextDisplayItem { pub text_run: Arc<TextRun>, /// The range of text within the text run. - pub range: Range<CharIndex>, + pub range: Range<ByteIndex>, /// The color of the text. pub text_color: Color, diff --git a/components/gfx/font.rs b/components/gfx/font.rs index 304a201e298..5bdfa78fb64 100644 --- a/components/gfx/font.rs +++ b/components/gfx/font.rs @@ -154,7 +154,7 @@ impl Font { let start_time = time::precise_time_ns(); - let mut glyphs = GlyphStore::new(text.chars().count(), + let mut glyphs = GlyphStore::new(text.len(), options.flags.contains(IS_WHITESPACE_SHAPING_FLAG), options.flags.contains(RTL_FLAG)); shaper.as_ref().unwrap().shape_text(text, options, &mut glyphs); diff --git a/components/gfx/paint_context.rs b/components/gfx/paint_context.rs index 7b8813d907e..047cfbac733 100644 --- a/components/gfx/paint_context.rs +++ b/components/gfx/paint_context.rs @@ -33,7 +33,7 @@ use std::default::Default; use std::{f32, mem, ptr}; use style::computed_values::{border_style, filter, image_rendering, mix_blend_mode}; use text::TextRun; -use text::glyph::CharIndex; +use text::glyph::ByteIndex; use util::geometry::{self, MAX_RECT, PagePx, ScreenPx}; use util::opts; @@ -1768,7 +1768,7 @@ trait ScaledFontExtensionMethods { fn draw_text(&self, draw_target: &DrawTarget, run: &TextRun, - range: &Range<CharIndex>, + range: &Range<ByteIndex>, baseline_origin: Point2D<Au>, color: Color, antialias: bool); @@ -1779,7 +1779,7 @@ impl ScaledFontExtensionMethods for ScaledFont { fn draw_text(&self, draw_target: &DrawTarget, run: &TextRun, - range: &Range<CharIndex>, + range: &Range<ByteIndex>, baseline_origin: Point2D<Au>, color: Color, antialias: bool) { @@ -1795,11 +1795,10 @@ impl ScaledFontExtensionMethods for ScaledFont { }; let mut origin = baseline_origin.clone(); - let mut azglyphs = vec!(); - azglyphs.reserve(range.length().to_usize()); + let mut azglyphs = Vec::with_capacity(range.length().to_usize()); for slice in run.natural_word_slices_in_visual_order(range) { - for glyph in slice.glyphs.iter_glyphs_for_char_range(&slice.range) { + for glyph in slice.glyphs.iter_glyphs_for_byte_range(&slice.range) { let glyph_advance = glyph.advance(); let glyph_offset = glyph.offset().unwrap_or(Point2D::zero()); let azglyph = struct__AzGlyph { diff --git a/components/gfx/text/glyph.rs b/components/gfx/text/glyph.rs index 6841ad23b94..c5fe90e1c60 100644 --- a/components/gfx/text/glyph.rs +++ b/components/gfx/text/glyph.rs @@ -164,7 +164,7 @@ impl DetailedGlyph { #[derive(PartialEq, Clone, Eq, Debug, Copy, Deserialize, Serialize)] struct DetailedGlyphRecord { // source string offset/GlyphEntry offset in the TextRun - entry_offset: CharIndex, + entry_offset: ByteIndex, // offset into the detailed glyphs buffer detail_offset: usize, } @@ -205,7 +205,7 @@ impl<'a> DetailedGlyphStore { } } - fn add_detailed_glyphs_for_entry(&mut self, entry_offset: CharIndex, glyphs: &[DetailedGlyph]) { + fn add_detailed_glyphs_for_entry(&mut self, entry_offset: ByteIndex, glyphs: &[DetailedGlyph]) { let entry = DetailedGlyphRecord { entry_offset: entry_offset, detail_offset: self.detail_buffer.len(), @@ -229,7 +229,7 @@ impl<'a> DetailedGlyphStore { self.lookup_is_sorted = false; } - fn detailed_glyphs_for_entry(&'a self, entry_offset: CharIndex, count: u16) + fn detailed_glyphs_for_entry(&'a self, entry_offset: ByteIndex, count: u16) -> &'a [DetailedGlyph] { debug!("Requesting detailed glyphs[n={}] for entry[off={:?}]", count, entry_offset); @@ -256,7 +256,7 @@ impl<'a> DetailedGlyphStore { } fn detailed_glyph_with_index(&'a self, - entry_offset: CharIndex, + entry_offset: ByteIndex, detail_offset: u16) -> &'a DetailedGlyph { assert!((detail_offset as usize) <= self.detail_buffer.len()); @@ -336,8 +336,8 @@ impl GlyphData { // values as they are needed from the GlyphStore, using provided offsets. #[derive(Copy, Clone)] pub enum GlyphInfo<'a> { - Simple(&'a GlyphStore, CharIndex), - Detail(&'a GlyphStore, CharIndex, u16), + Simple(&'a GlyphStore, ByteIndex), + Detail(&'a GlyphStore, ByteIndex, u16), } impl<'a> GlyphInfo<'a> { @@ -413,10 +413,10 @@ pub struct GlyphStore { int_range_index! { #[derive(Deserialize, Serialize, RustcEncodable)] - #[doc = "An index that refers to a character in a text run. This could \ + #[doc = "An index that refers to a byte offset in a text run. This could \ point to the middle of a glyph."] #[derive(HeapSizeOf)] - struct CharIndex(isize) + struct ByteIndex(isize) } impl<'a> GlyphStore { @@ -436,8 +436,8 @@ impl<'a> GlyphStore { } } - pub fn char_len(&self) -> CharIndex { - CharIndex(self.entry_buffer.len() as isize) + pub fn len(&self) -> ByteIndex { + ByteIndex(self.entry_buffer.len() as isize) } pub fn is_whitespace(&self) -> bool { @@ -452,7 +452,7 @@ impl<'a> GlyphStore { #[inline(never)] fn cache_total_advance(&mut self) { let mut total_advance = Au(0); - for glyph in self.iter_glyphs_for_char_range(&Range::new(CharIndex(0), self.char_len())) { + for glyph in self.iter_glyphs_for_byte_range(&Range::new(ByteIndex(0), self.len())) { total_advance = total_advance + glyph.advance() } self.total_advance = total_advance @@ -462,10 +462,9 @@ impl<'a> GlyphStore { self.total_advance } - /// Adds a single glyph. If `character` is present, this represents a single character; - /// otherwise, this glyph represents multiple characters. - pub fn add_glyph_for_char_index(&mut self, - i: CharIndex, + /// Adds a single glyph. + pub fn add_glyph_for_byte_index(&mut self, + i: ByteIndex, character: char, data: &GlyphData) { let glyph_is_compressible = is_simple_glyph_id(data.id) && @@ -474,7 +473,7 @@ impl<'a> GlyphStore { data.cluster_start; // others are stored in detail buffer debug_assert!(data.ligature_start); // can't compress ligature continuation glyphs. - debug_assert!(i < self.char_len()); + debug_assert!(i < self.len()); let mut entry = if glyph_is_compressible { GlyphEntry::simple(data.id, data.advance) @@ -492,8 +491,8 @@ impl<'a> GlyphStore { self.entry_buffer[i.to_usize()] = entry; } - pub fn add_glyphs_for_char_index(&mut self, i: CharIndex, data_for_glyphs: &[GlyphData]) { - assert!(i < self.char_len()); + pub fn add_glyphs_for_byte_index(&mut self, i: ByteIndex, data_for_glyphs: &[GlyphData]) { + assert!(i < self.len()); assert!(data_for_glyphs.len() > 0); let glyph_count = data_for_glyphs.len(); @@ -518,48 +517,48 @@ impl<'a> GlyphStore { } #[inline] - pub fn iter_glyphs_for_char_range(&'a self, rang: &Range<CharIndex>) -> GlyphIterator<'a> { - if rang.begin() >= self.char_len() { + pub fn iter_glyphs_for_byte_range(&'a self, range: &Range<ByteIndex>) -> GlyphIterator<'a> { + if range.begin() >= self.len() { panic!("iter_glyphs_for_range: range.begin beyond length!"); } - if rang.end() > self.char_len() { + if range.end() > self.len() { panic!("iter_glyphs_for_range: range.end beyond length!"); } GlyphIterator { store: self, - char_index: if self.is_rtl { rang.end() } else { rang.begin() - CharIndex(1) }, - char_range: *rang, + byte_index: if self.is_rtl { range.end() } else { range.begin() - ByteIndex(1) }, + byte_range: *range, glyph_range: None, } } #[inline] - pub fn advance_for_char_range(&self, rang: &Range<CharIndex>) -> Au { - if rang.begin() == CharIndex(0) && rang.end() == self.char_len() { + pub fn advance_for_byte_range(&self, range: &Range<ByteIndex>) -> Au { + if range.begin() == ByteIndex(0) && range.end() == self.len() { self.total_advance } else if !self.has_detailed_glyphs { - self.advance_for_char_range_simple_glyphs(rang) + self.advance_for_byte_range_simple_glyphs(range) } else { - self.advance_for_char_range_slow_path(rang) + self.advance_for_byte_range_slow_path(range) } } #[inline] - pub fn advance_for_char_range_slow_path(&self, rang: &Range<CharIndex>) -> Au { - self.iter_glyphs_for_char_range(rang) + pub fn advance_for_byte_range_slow_path(&self, range: &Range<ByteIndex>) -> Au { + self.iter_glyphs_for_byte_range(range) .fold(Au(0), |advance, glyph| advance + glyph.advance()) } #[inline] #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] - fn advance_for_char_range_simple_glyphs(&self, rang: &Range<CharIndex>) -> Au { + fn advance_for_byte_range_simple_glyphs(&self, range: &Range<ByteIndex>) -> Au { let mask = u32x4::splat(GLYPH_ADVANCE_MASK); let mut simd_advance = u32x4::splat(0); - let begin = rang.begin().to_usize(); - let len = rang.length().to_usize(); + let begin = range.begin().to_usize(); + let len = range.length().to_usize(); let num_simd_iterations = len / 4; - let leftover_entries = rang.end().to_usize() - (len - num_simd_iterations * 4); + let leftover_entries = range.end().to_usize() - (len - num_simd_iterations * 4); let buf = self.transmute_entry_buffer_to_u32_buffer(); for i in 0..num_simd_iterations { @@ -575,7 +574,7 @@ impl<'a> GlyphStore { simd_advance.extract(2) + simd_advance.extract(3)) as i32; let mut leftover = Au(0); - for i in leftover_entries..rang.end().to_usize() { + for i in leftover_entries..range.end().to_usize() { leftover = leftover + self.entry_buffer[i].advance(); } Au(advance) + leftover @@ -584,8 +583,8 @@ impl<'a> GlyphStore { /// When SIMD isn't available (non-x86_x64/aarch64), fallback to the slow path. #[inline] #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] - fn advance_for_char_range_simple_glyphs(&self, rang: &Range<CharIndex>) -> Au { - self.advance_for_char_range_slow_path(rang) + fn advance_for_byte_range_simple_glyphs(&self, range: &Range<ByteIndex>) -> Au { + self.advance_for_byte_range_slow_path(range) } /// Used for SIMD. @@ -595,12 +594,12 @@ impl<'a> GlyphStore { unsafe { mem::transmute(self.entry_buffer.as_slice()) } } - pub fn char_is_space(&self, i: CharIndex) -> bool { - assert!(i < self.char_len()); + pub fn char_is_space(&self, i: ByteIndex) -> bool { + assert!(i < self.len()); self.entry_buffer[i.to_usize()].char_is_space() } - pub fn space_count_in_range(&self, range: &Range<CharIndex>) -> u32 { + pub fn space_count_in_range(&self, range: &Range<ByteIndex>) -> u32 { let mut spaces = 0; for index in range.each_index() { if self.char_is_space(index) { @@ -610,7 +609,7 @@ impl<'a> GlyphStore { spaces } - pub fn distribute_extra_space_in_range(&mut self, range: &Range<CharIndex>, space: f64) { + pub fn distribute_extra_space_in_range(&mut self, range: &Range<ByteIndex>, space: f64) { debug_assert!(space >= 0.0); if range.is_empty() { return @@ -659,12 +658,12 @@ impl fmt::Debug for GlyphStore { } } -/// An iterator over the glyphs in a character range in a `GlyphStore`. +/// An iterator over the glyphs in a byte range in a `GlyphStore`. pub struct GlyphIterator<'a> { store: &'a GlyphStore, - char_index: CharIndex, - char_range: Range<CharIndex>, - glyph_range: Option<EachIndex<isize, CharIndex>>, + byte_index: ByteIndex, + byte_range: Range<ByteIndex>, + glyph_range: Option<EachIndex<isize, ByteIndex>>, } impl<'a> GlyphIterator<'a> { @@ -673,7 +672,7 @@ impl<'a> GlyphIterator<'a> { fn next_glyph_range(&mut self) -> Option<GlyphInfo<'a>> { match self.glyph_range.as_mut().unwrap().next() { Some(j) => { - Some(GlyphInfo::Detail(self.store, self.char_index, j.get() as u16 /* ??? */)) + Some(GlyphInfo::Detail(self.store, self.byte_index, j.get() as u16 /* ??? */)) } None => { // No more glyphs for current character. Try to get another. @@ -685,9 +684,9 @@ impl<'a> GlyphIterator<'a> { // Slow path when there is a complex glyph. #[inline(never)] - fn next_complex_glyph(&mut self, entry: &GlyphEntry, i: CharIndex) -> Option<GlyphInfo<'a>> { + fn next_complex_glyph(&mut self, entry: &GlyphEntry, i: ByteIndex) -> Option<GlyphInfo<'a>> { let glyphs = self.store.detail_store.detailed_glyphs_for_entry(i, entry.glyph_count()); - self.glyph_range = Some(range::each_index(CharIndex(0), CharIndex(glyphs.len() as isize))); + self.glyph_range = Some(range::each_index(ByteIndex(0), ByteIndex(glyphs.len() as isize))); self.next() } } @@ -708,17 +707,17 @@ impl<'a> Iterator for GlyphIterator<'a> { return self.next_glyph_range() } - // No glyph range. Look at next character. - self.char_index = self.char_index + if self.store.is_rtl { - CharIndex(-1) + // No glyph range. Look at next byte. + self.byte_index = self.byte_index + if self.store.is_rtl { + ByteIndex(-1) } else { - CharIndex(1) + ByteIndex(1) }; - let i = self.char_index; - if !self.char_range.contains(i) { + let i = self.byte_index; + if !self.byte_range.contains(i) { return None } - debug_assert!(i < self.store.char_len()); + debug_assert!(i < self.store.len()); let entry = self.store.entry_buffer[i.to_usize()]; if entry.is_simple() { Some(GlyphInfo::Simple(self.store, i)) diff --git a/components/gfx/text/shaping/harfbuzz.rs b/components/gfx/text/shaping/harfbuzz.rs index e9fc50d7f0e..e8b23e1e487 100644 --- a/components/gfx/text/shaping/harfbuzz.rs +++ b/components/gfx/text/shaping/harfbuzz.rs @@ -35,7 +35,7 @@ use harfbuzz::{hb_position_t, hb_tag_t}; use libc::{c_char, c_int, c_uint, c_void}; use platform::font::FontTable; use std::{char, cmp, ptr}; -use text::glyph::{CharIndex, GlyphData, GlyphId, GlyphStore}; +use text::glyph::{ByteIndex, GlyphData, GlyphId, GlyphStore}; use text::shaping::ShaperMethods; use text::util::{fixed_to_float, float_to_fixed, is_bidi_control}; @@ -45,8 +45,7 @@ macro_rules! hb_tag { ); } -static NO_GLYPH: i32 = -1; -static CONTINUATION_BYTE: i32 = -2; +const NO_GLYPH: i32 = -1; static KERN: u32 = hb_tag!('k', 'e', 'r', 'n'); static LIGA: u32 = hb_tag!('l', 'i', 'g', 'a'); @@ -258,44 +257,18 @@ impl Shaper { let glyph_data = ShapedGlyphData::new(buffer); let glyph_count = glyph_data.len(); let byte_max = text.len(); - let char_max = text.chars().count(); - - // GlyphStore records are indexed by character, not byte offset. - // so, we must be careful to increment this when saving glyph entries. - let (mut char_idx, char_step) = if options.flags.contains(RTL_FLAG) { - (CharIndex(char_max as isize - 1), CharIndex(-1)) - } else { - (CharIndex(0), CharIndex(1)) - }; - debug!("Shaped text[char count={}], got back {} glyph info records.", - char_max, + debug!("Shaped text[byte count={}], got back {} glyph info records.", + byte_max, glyph_count); - if char_max != glyph_count { - debug!("NOTE: Since these are not equal, we probably have been given some complex \ - glyphs."); - } - // make map of what chars have glyphs - let mut byte_to_glyph: Vec<i32>; - - // fast path: all chars are single-byte. - if byte_max == char_max { - byte_to_glyph = vec![NO_GLYPH; byte_max]; - } else { - byte_to_glyph = vec![CONTINUATION_BYTE; byte_max]; - for (i, _) in text.char_indices() { - byte_to_glyph[i] = NO_GLYPH; - } - } + let mut byte_to_glyph = vec![NO_GLYPH; byte_max]; debug!("(glyph idx) -> (text byte offset)"); for i in 0..glyph_data.len() { - // loc refers to a *byte* offset within the utf8 string. let loc = glyph_data.byte_offset_of_glyph(i) as usize; if loc < byte_max { - assert!(byte_to_glyph[loc] != CONTINUATION_BYTE); byte_to_glyph[loc] = i as i32; } else { debug!("ERROR: tried to set out of range byte_to_glyph: idx={}, glyph idx={}", @@ -312,10 +285,7 @@ impl Shaper { } let mut glyph_span = 0..0; - - // This span contains first byte of first char, to last byte of last char in range. - // So, char_byte_span.end points to first byte of last+1 char, if it's less than byte_max. - let mut char_byte_span; + let mut byte_range = 0..0; let mut y_pos = Au(0); @@ -325,106 +295,62 @@ impl Shaper { while glyph_span.start < glyph_count { debug!("Processing glyph at idx={}", glyph_span.start); glyph_span.end = glyph_span.start; + byte_range.end = glyph_data.byte_offset_of_glyph(glyph_span.start) as usize; - let char_byte_start = glyph_data.byte_offset_of_glyph(glyph_span.start) as usize; - char_byte_span = char_byte_start..char_byte_start; - let mut glyph_spans_multiple_characters = false; - - // find a range of chars corresponding to this glyph, plus - // any trailing chars that do not have associated glyphs. - while char_byte_span.end < byte_max { - let ch = text[char_byte_span.end..].chars().next().unwrap(); - char_byte_span.end += ch.len_utf8(); - - debug!("Processing char byte span: off={}, len={} for glyph idx={}", - char_byte_span.start, char_byte_span.len(), glyph_span.start); - - while char_byte_span.end != byte_max && - byte_to_glyph[char_byte_span.end] == NO_GLYPH { - debug!("Extending char byte span to include byte offset={} with no associated \ - glyph", char_byte_span.end); - let ch = text[char_byte_span.end..].chars().next().unwrap(); - char_byte_span.end += ch.len_utf8(); - glyph_spans_multiple_characters = true; + while byte_range.end < byte_max { + byte_range.end += 1; + // Extend the byte range to include any following byte without its own glyph. + while byte_range.end < byte_max && byte_to_glyph[byte_range.end] == NO_GLYPH { + byte_range.end += 1; } - // extend glyph range to max glyph index covered by char_span, - // in cases where one char made several glyphs and left some unassociated chars. + // Extend the glyph range to include all glyphs covered by bytes processed so far. let mut max_glyph_idx = glyph_span.end; - for i in char_byte_span.clone() { - if byte_to_glyph[i] > NO_GLYPH { - max_glyph_idx = cmp::max(byte_to_glyph[i] as usize + 1, max_glyph_idx); + for glyph_idx in &byte_to_glyph[byte_range.clone()] { + if *glyph_idx != NO_GLYPH { + max_glyph_idx = cmp::max(*glyph_idx as usize + 1, max_glyph_idx); } } - if max_glyph_idx > glyph_span.end { glyph_span.end = max_glyph_idx; - debug!("Extended glyph span (off={}, len={}) to cover char byte span's max \ - glyph index", - glyph_span.start, glyph_span.len()); + debug!("Extended glyph span to {:?}", glyph_span); } - // if there's just one glyph, then we don't need further checks. if glyph_span.len() == 1 { break; } // if no glyphs were found yet, extend the char byte range more. if glyph_span.len() == 0 { continue; } - debug!("Complex (multi-glyph to multi-char) association found. This case \ - probably doesn't work."); - + // If byte_range now includes all the byte offsets found in glyph_span, then we + // have found a contiguous "cluster" and can stop extending it. let mut all_glyphs_are_within_cluster: bool = true; for j in glyph_span.clone() { let loc = glyph_data.byte_offset_of_glyph(j); - if !char_byte_span.contains(loc as usize) { + if !byte_range.contains(loc as usize) { all_glyphs_are_within_cluster = false; break } } - - debug!("All glyphs within char_byte_span cluster?: {}", - all_glyphs_are_within_cluster); - - // found a valid range; stop extending char_span. if all_glyphs_are_within_cluster { break } + + // Otherwise, the bytes we have seen so far correspond to a non-contiguous set of + // glyphs. Keep extending byte_range until we fill in all the holes in the glyph + // span or reach the end of the text. } - // character/glyph clump must contain characters. - assert!(char_byte_span.len() > 0); - // character/glyph clump must contain glyphs. + assert!(byte_range.len() > 0); assert!(glyph_span.len() > 0); - // now char_span is a ligature clump, formed by the glyphs in glyph_span. - // we need to find the chars that correspond to actual glyphs (char_extended_span), - //and set glyph info for those and empty infos for the chars that are continuations. - - // a simple example: - // chars: 'f' 't' 't' - // glyphs: 'ftt' '' '' - // cgmap: t f f - // gspan: [-] - // cspan: [-] - // covsp: [---------------] - - let mut covered_byte_span = char_byte_span.clone(); - // extend, clipping at end of text range. - while covered_byte_span.end < byte_max && - byte_to_glyph[covered_byte_span.end] == NO_GLYPH { - let ch = text[covered_byte_span.end..].chars().next().unwrap(); - covered_byte_span.end += ch.len_utf8(); - } - - if covered_byte_span.start >= byte_max { - // oops, out of range. clip and forget this clump. - glyph_span.start = glyph_span.end; - char_byte_span.start = char_byte_span.end; - } + // Now byte_range is the ligature clump formed by the glyphs in glyph_span. + // We will save these glyphs to the glyph store at the index of the first byte. + let byte_idx = ByteIndex(byte_range.start as isize); - // fast path: 1-to-1 mapping of single char and single glyph. - if glyph_span.len() == 1 && !glyph_spans_multiple_characters { + if glyph_span.len() == 1 { + // Fast path: 1-to-1 mapping of byte offset to single glyph. + // // TODO(Issue #214): cluster ranges need to be computed before // shaping, and then consulted here. // for now, just pretend that every character is a cluster start. @@ -433,7 +359,7 @@ impl Shaper { // // NB: When we acquire the ability to handle ligatures that cross word boundaries, // we'll need to do something special to handle `word-spacing` properly. - let character = text[char_byte_span.clone()].chars().next().unwrap(); + let character = text[byte_range.clone()].chars().next().unwrap(); if is_bidi_control(character) { // Don't add any glyphs for bidi control chars } else if character == '\t' { @@ -449,7 +375,7 @@ impl Shaper { Default::default(), true, true); - glyphs.add_glyph_for_char_index(char_idx, character, &data); + glyphs.add_glyph_for_byte_index(byte_idx, character, &data); } else { let shape = glyph_data.entry_for_glyph(glyph_span.start, &mut y_pos); let advance = self.advance_for_shaped_glyph(shape.advance, character, options); @@ -458,7 +384,7 @@ impl Shaper { shape.offset, true, true); - glyphs.add_glyph_for_char_index(char_idx, character, &data); + glyphs.add_glyph_for_byte_index(byte_idx, character, &data); } } else { // collect all glyphs to be assigned to the first character. @@ -473,15 +399,12 @@ impl Shaper { glyph_i > glyph_span.start)); // all but first are ligature continuations } - // now add the detailed glyph entry. - glyphs.add_glyphs_for_char_index(char_idx, &datas); + glyphs.add_glyphs_for_byte_index(byte_idx, &datas); } - // shift up our working spans past things we just handled. glyph_span.start = glyph_span.end; - char_byte_span.start = char_byte_span.end; - char_idx = char_idx + char_step; + byte_range.start = byte_range.end; } // this must be called after adding all glyph data; it sorts the diff --git a/components/gfx/text/text_run.rs b/components/gfx/text/text_run.rs index a7c28cb0a50..45215799eed 100644 --- a/components/gfx/text/text_run.rs +++ b/components/gfx/text/text_run.rs @@ -11,11 +11,11 @@ use std::cell::Cell; use std::cmp::{Ordering, max}; use std::slice::Iter; use std::sync::Arc; -use text::glyph::{CharIndex, GlyphStore}; +use text::glyph::{ByteIndex, GlyphStore}; use webrender_traits; thread_local! { - static INDEX_OF_FIRST_GLYPH_RUN_CACHE: Cell<Option<(*const TextRun, CharIndex, usize)>> = + static INDEX_OF_FIRST_GLYPH_RUN_CACHE: Cell<Option<(*const TextRun, ByteIndex, usize)>> = Cell::new(None) } @@ -51,19 +51,19 @@ impl Drop for TextRun { pub struct GlyphRun { /// The glyphs. pub glyph_store: Arc<GlyphStore>, - /// The range of characters in the containing run. - pub range: Range<CharIndex>, + /// The byte range of characters in the containing run. + pub range: Range<ByteIndex>, } pub struct NaturalWordSliceIterator<'a> { glyphs: &'a [GlyphRun], index: usize, - range: Range<CharIndex>, + range: Range<ByteIndex>, reverse: bool, } impl GlyphRun { - fn compare(&self, key: &CharIndex) -> Ordering { + fn compare(&self, key: &ByteIndex) -> Ordering { if *key < self.range.begin() { Ordering::Greater } else if *key >= self.range.end() { @@ -79,16 +79,16 @@ impl GlyphRun { pub struct TextRunSlice<'a> { /// The glyph store that the glyphs in this slice belong to. pub glyphs: &'a GlyphStore, - /// The character index that this slice begins at, relative to the start of the *text run*. - pub offset: CharIndex, + /// The byte index that this slice begins at, relative to the start of the *text run*. + pub offset: ByteIndex, /// The range that these glyphs encompass, relative to the start of the *glyph store*. - pub range: Range<CharIndex>, + pub range: Range<ByteIndex>, } impl<'a> TextRunSlice<'a> { /// Returns the range that these glyphs encompass, relative to the start of the *text run*. #[inline] - pub fn text_run_range(&self) -> Range<CharIndex> { + pub fn text_run_range(&self) -> Range<ByteIndex> { let mut range = self.range; range.shift_by(self.offset); range @@ -116,15 +116,15 @@ impl<'a> Iterator for NaturalWordSliceIterator<'a> { self.index += 1; } - let mut char_range = self.range.intersect(&slice_glyphs.range); + let mut byte_range = self.range.intersect(&slice_glyphs.range); let slice_range_begin = slice_glyphs.range.begin(); - char_range.shift_by(-slice_range_begin); + byte_range.shift_by(-slice_range_begin); - if !char_range.is_empty() { + if !byte_range.is_empty() { Some(TextRunSlice { glyphs: &*slice_glyphs.glyph_store, offset: slice_range_begin, - range: char_range, + range: byte_range, }) } else { None @@ -133,9 +133,10 @@ impl<'a> Iterator for NaturalWordSliceIterator<'a> { } pub struct CharacterSliceIterator<'a> { + text: &'a str, glyph_run: Option<&'a GlyphRun>, glyph_run_iter: Iter<'a, GlyphRun>, - range: Range<CharIndex>, + range: Range<ByteIndex>, } impl<'a> Iterator for CharacterSliceIterator<'a> { @@ -150,8 +151,13 @@ impl<'a> Iterator for CharacterSliceIterator<'a> { }; debug_assert!(!self.range.is_empty()); - let index_to_return = self.range.begin(); - self.range.adjust_by(CharIndex(1), CharIndex(-1)); + let byte_start = self.range.begin(); + let byte_len = match self.text[byte_start.to_usize()..].chars().next() { + Some(ch) => ByteIndex(ch.len_utf8() as isize), + None => unreachable!() // XXX refactor? + }; + + self.range.adjust_by(byte_len, -byte_len); if self.range.is_empty() { // We're done. self.glyph_run = None @@ -160,11 +166,11 @@ impl<'a> Iterator for CharacterSliceIterator<'a> { self.glyph_run = self.glyph_run_iter.next(); } - let index_within_glyph_run = index_to_return - glyph_run.range.begin(); + let index_within_glyph_run = byte_start - glyph_run.range.begin(); Some(TextRunSlice { glyphs: &*glyph_run.glyph_store, offset: glyph_run.range.begin(), - range: Range::new(index_within_glyph_run, CharIndex(1)), + range: Range::new(index_within_glyph_run, byte_len), }) } } @@ -187,9 +193,9 @@ impl<'a> TextRun { -> Vec<GlyphRun> { // TODO(Issue #230): do a better job. See Gecko's LineBreaker. let mut glyphs = vec!(); - let (mut byte_i, mut char_i) = (0, CharIndex(0)); + let mut byte_i = 0; let mut cur_slice_is_whitespace = false; - let (mut byte_last_boundary, mut char_last_boundary) = (0, CharIndex(0)); + let mut byte_last_boundary = 0; for ch in text.chars() { // Slices alternate between whitespace and non-whitespace, @@ -225,14 +231,13 @@ impl<'a> TextRun { glyphs.push(GlyphRun { glyph_store: font.shape_text(slice, &options), - range: Range::new(char_last_boundary, char_i - char_last_boundary), + range: Range::new(ByteIndex(byte_last_boundary as isize), + ByteIndex((byte_i - byte_last_boundary) as isize)), }); byte_last_boundary = byte_i; - char_last_boundary = char_i; } byte_i = byte_i + ch.len_utf8(); - char_i = char_i + CharIndex(1); } // Create a glyph store for the final slice if it's nonempty. @@ -248,7 +253,8 @@ impl<'a> TextRun { glyphs.push(GlyphRun { glyph_store: font.shape_text(slice, &options), - range: Range::new(char_last_boundary, char_i - char_last_boundary), + range: Range::new(ByteIndex(byte_last_boundary as isize), + ByteIndex((byte_i - byte_last_boundary) as isize)), }); } @@ -263,7 +269,7 @@ impl<'a> TextRun { self.font_metrics.descent } - pub fn advance_for_range(&self, range: &Range<CharIndex>) -> Au { + pub fn advance_for_range(&self, range: &Range<ByteIndex>) -> Au { if range.is_empty() { return Au(0) } @@ -272,24 +278,24 @@ impl<'a> TextRun { // TODO(Issue #98): using inter-char and inter-word spacing settings when measuring text self.natural_word_slices_in_range(range) .fold(Au(0), |advance, slice| { - advance + slice.glyphs.advance_for_char_range(&slice.range) + advance + slice.glyphs.advance_for_byte_range(&slice.range) }) } - pub fn metrics_for_range(&self, range: &Range<CharIndex>) -> RunMetrics { + pub fn metrics_for_range(&self, range: &Range<ByteIndex>) -> RunMetrics { RunMetrics::new(self.advance_for_range(range), self.font_metrics.ascent, self.font_metrics.descent) } - pub fn metrics_for_slice(&self, glyphs: &GlyphStore, slice_range: &Range<CharIndex>) + pub fn metrics_for_slice(&self, glyphs: &GlyphStore, slice_range: &Range<ByteIndex>) -> RunMetrics { - RunMetrics::new(glyphs.advance_for_char_range(slice_range), + RunMetrics::new(glyphs.advance_for_byte_range(slice_range), self.font_metrics.ascent, self.font_metrics.descent) } - pub fn min_width_for_range(&self, range: &Range<CharIndex>) -> Au { + pub fn min_width_for_range(&self, range: &Range<ByteIndex>) -> Au { debug!("iterating outer range {:?}", range); self.natural_word_slices_in_range(range).fold(Au(0), |max_piece_width, slice| { debug!("iterated on {:?}[{:?}]", slice.offset, slice.range); @@ -297,8 +303,8 @@ impl<'a> TextRun { }) } - /// Returns the index of the first glyph run containing the given character index. - fn index_of_first_glyph_run_containing(&self, index: CharIndex) -> Option<usize> { + /// Returns the index of the first glyph run containing the given byte index. + fn index_of_first_glyph_run_containing(&self, index: ByteIndex) -> Option<usize> { let self_ptr = self as *const TextRun; INDEX_OF_FIRST_GLYPH_RUN_CACHE.with(|index_of_first_glyph_run_cache| { if let Some((last_text_run, last_index, last_result)) = @@ -319,7 +325,7 @@ impl<'a> TextRun { /// Returns an iterator that will iterate over all slices of glyphs that represent natural /// words in the given range. - pub fn natural_word_slices_in_range(&'a self, range: &Range<CharIndex>) + pub fn natural_word_slices_in_range(&'a self, range: &Range<ByteIndex>) -> NaturalWordSliceIterator<'a> { let index = match self.index_of_first_glyph_run_containing(range.begin()) { None => self.glyphs.len(), @@ -335,13 +341,13 @@ impl<'a> TextRun { /// Returns an iterator that over natural word slices in visual order (left to right or /// right to left, depending on the bidirectional embedding level). - pub fn natural_word_slices_in_visual_order(&'a self, range: &Range<CharIndex>) + pub fn natural_word_slices_in_visual_order(&'a self, range: &Range<ByteIndex>) -> NaturalWordSliceIterator<'a> { // Iterate in reverse order if bidi level is RTL. let reverse = self.bidi_level % 2 == 1; let index = if reverse { - match self.index_of_first_glyph_run_containing(range.end() - CharIndex(1)) { + match self.index_of_first_glyph_run_containing(range.end() - ByteIndex(1)) { Some(i) => i + 1, // In reverse mode, index points one past the next element. None => 0 } @@ -361,7 +367,7 @@ impl<'a> TextRun { /// Returns an iterator that will iterate over all slices of glyphs that represent individual /// characters in the given range. - pub fn character_slices_in_range(&'a self, range: &Range<CharIndex>) + pub fn character_slices_in_range(&'a self, range: &Range<ByteIndex>) -> CharacterSliceIterator<'a> { let index = match self.index_of_first_glyph_run_containing(range.begin()) { None => self.glyphs.len(), @@ -370,6 +376,7 @@ impl<'a> TextRun { let mut glyph_run_iter = self.glyphs[index..].iter(); let first_glyph_run = glyph_run_iter.next(); CharacterSliceIterator { + text: &self.text, glyph_run: first_glyph_run, glyph_run_iter: glyph_run_iter, range: *range, diff --git a/components/layout/display_list_builder.rs b/components/layout/display_list_builder.rs index fb2847e9d77..1d9d29b4cdd 100644 --- a/components/layout/display_list_builder.rs +++ b/components/layout/display_list_builder.rs @@ -31,7 +31,7 @@ use gfx::display_list::{LineDisplayItem, OpaqueNode, SolidColorDisplayItem}; use gfx::display_list::{StackingContext, StackingContextId, StackingContextType}; use gfx::display_list::{TextDisplayItem, TextOrientation, WebRenderImageInfo}; use gfx::paint_thread::THREAD_TINT_COLORS; -use gfx::text::glyph::CharIndex; +use gfx::text::glyph::ByteIndex; use gfx_traits::{color, ScrollPolicy}; use inline::{FIRST_FRAGMENT_OF_ELEMENT, InlineFlow, LAST_FRAGMENT_OF_ELEMENT}; use ipc_channel::ipc::{self, IpcSharedMemory}; @@ -965,7 +965,7 @@ impl FragmentDisplayListBuilding for Fragment { Some(insertion_point_index) => insertion_point_index, None => return, }; - let range = Range::new(CharIndex(0), insertion_point_index); + let range = Range::new(ByteIndex(0), insertion_point_index); let advance = scanned_text_fragment_info.run.advance_for_range(&range); let insertion_point_bounds; diff --git a/components/layout/fragment.rs b/components/layout/fragment.rs index 5612a322b95..ce9e55496c7 100644 --- a/components/layout/fragment.rs +++ b/components/layout/fragment.rs @@ -15,7 +15,7 @@ use flow::{self, Flow}; use flow_ref::{self, FlowRef}; use gfx; use gfx::display_list::{BLUR_INFLATION_FACTOR, FragmentType, OpaqueNode, StackingContextId}; -use gfx::text::glyph::CharIndex; +use gfx::text::glyph::ByteIndex; use gfx::text::text_run::{TextRun, TextRunSlice}; use gfx_traits::{LayerId, LayerType}; use incremental::{RECONSTRUCT_FLOW, RestyleDamage}; @@ -48,7 +48,6 @@ use text; use text::TextRunScanner; use url::Url; use util; -use util::str::slice_chars; use wrapper::{PseudoElementType, ThreadSafeLayoutElement, ThreadSafeLayoutNode}; /// Fragments (`struct Fragment`) are the leaves of the layout tree. They cannot position @@ -227,13 +226,8 @@ impl SpecificFragmentInfo { impl fmt::Debug for SpecificFragmentInfo { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match *self { - SpecificFragmentInfo::ScannedText(ref info) => { - write!(f, "{:?}", slice_chars(&*info.run.text, info.range.begin().get() as usize, - info.range.end().get() as usize)) - } - SpecificFragmentInfo::UnscannedText(ref info) => { - write!(f, "{:?}", info.text) - } + SpecificFragmentInfo::ScannedText(ref info) => write!(f, "{:?}", info.text()), + SpecificFragmentInfo::UnscannedText(ref info) => write!(f, "{:?}", info.text), _ => Ok(()) } } @@ -657,16 +651,16 @@ pub struct ScannedTextFragmentInfo { /// The intrinsic size of the text fragment. pub content_size: LogicalSize<Au>, - /// The position of the insertion point in characters, if any. - pub insertion_point: Option<CharIndex>, + /// The byte offset of the insertion point, if any. + pub insertion_point: Option<ByteIndex>, /// The range within the above text run that this represents. - pub range: Range<CharIndex>, + pub range: Range<ByteIndex>, /// The endpoint of the above range, including whitespace that was stripped out. This exists /// so that we can restore the range to its original value (before line breaking occurred) when /// performing incremental reflow. - pub range_end_including_stripped_whitespace: CharIndex, + pub range_end_including_stripped_whitespace: ByteIndex, pub flags: ScannedTextFlags, } @@ -685,9 +679,9 @@ bitflags! { impl ScannedTextFragmentInfo { /// Creates the information specific to a scanned text fragment from a range and a text run. pub fn new(run: Arc<TextRun>, - range: Range<CharIndex>, + range: Range<ByteIndex>, content_size: LogicalSize<Au>, - insertion_point: Option<CharIndex>, + insertion_point: Option<ByteIndex>, flags: ScannedTextFlags) -> ScannedTextFragmentInfo { ScannedTextFragmentInfo { @@ -700,6 +694,10 @@ impl ScannedTextFragmentInfo { } } + pub fn text(&self) -> &str { + &self.run.text[self.range.begin().to_usize() .. self.range.end().to_usize()] + } + pub fn requires_line_break_afterward_if_wrapping_on_newlines(&self) -> bool { self.flags.contains(REQUIRES_LINE_BREAK_AFTERWARD_IF_WRAPPING_ON_NEWLINES) } @@ -715,12 +713,12 @@ impl ScannedTextFragmentInfo { pub struct SplitInfo { // TODO(bjz): this should only need to be a single character index, but both values are // currently needed for splitting in the `inline::try_append_*` functions. - pub range: Range<CharIndex>, + pub range: Range<ByteIndex>, pub inline_size: Au, } impl SplitInfo { - fn new(range: Range<CharIndex>, info: &ScannedTextFragmentInfo) -> SplitInfo { + fn new(range: Range<ByteIndex>, info: &ScannedTextFragmentInfo) -> SplitInfo { let inline_size = info.run.advance_for_range(&range); SplitInfo { range: range, @@ -755,13 +753,13 @@ pub struct UnscannedTextFragmentInfo { pub text: Box<str>, /// The selected text range. An empty range represents the insertion point. - pub selection: Option<Range<CharIndex>>, + pub selection: Option<Range<ByteIndex>>, } impl UnscannedTextFragmentInfo { /// Creates a new instance of `UnscannedTextFragmentInfo` from the given text. #[inline] - pub fn new(text: String, selection: Option<Range<CharIndex>>) -> UnscannedTextFragmentInfo { + pub fn new(text: String, selection: Option<Range<ByteIndex>>) -> UnscannedTextFragmentInfo { UnscannedTextFragmentInfo { text: text.into_boxed_str(), selection: selection, @@ -1611,7 +1609,7 @@ impl Fragment { }; let mut remaining_inline_size = max_inline_size; - let mut inline_start_range = Range::new(text_fragment_info.range.begin(), CharIndex(0)); + let mut inline_start_range = Range::new(text_fragment_info.range.begin(), ByteIndex(0)); let mut inline_end_range = None; let mut overflowing = false; @@ -1651,7 +1649,7 @@ impl Fragment { // We're going to overflow the line. overflowing = true; inline_start_range = slice.text_run_range(); - remaining_range = Range::new(slice.text_run_range().end(), CharIndex(0)); + remaining_range = Range::new(slice.text_run_range().end(), ByteIndex(0)); remaining_range.extend_to(text_fragment_info.range.end()); } @@ -2322,32 +2320,20 @@ impl Fragment { match self.specific { SpecificFragmentInfo::ScannedText(ref mut scanned_text_fragment_info) => { - let mut leading_whitespace_character_count = 0; - { - let text = slice_chars( - &*scanned_text_fragment_info.run.text, - scanned_text_fragment_info.range.begin().to_usize(), - scanned_text_fragment_info.range.end().to_usize()); - for character in text.chars() { - if util::str::char_is_whitespace(character) { - leading_whitespace_character_count += 1 - } else { - break - } - } - } + let leading_whitespace_byte_count = scanned_text_fragment_info.text() + .find(|c| !util::str::char_is_whitespace(c)) + .unwrap_or(scanned_text_fragment_info.text().len()); + let whitespace_len = ByteIndex(leading_whitespace_byte_count as isize); let whitespace_range = Range::new(scanned_text_fragment_info.range.begin(), - CharIndex(leading_whitespace_character_count)); + whitespace_len); let text_bounds = scanned_text_fragment_info.run.metrics_for_range(&whitespace_range).bounding_box; self.border_box.size.inline = self.border_box.size.inline - text_bounds.size.width; scanned_text_fragment_info.content_size.inline = scanned_text_fragment_info.content_size.inline - text_bounds.size.width; - scanned_text_fragment_info.range.adjust_by( - CharIndex(leading_whitespace_character_count), - -CharIndex(leading_whitespace_character_count)); + scanned_text_fragment_info.range.adjust_by(whitespace_len, -whitespace_len); WhitespaceStrippingResult::RetainFragment } @@ -2388,43 +2374,29 @@ impl Fragment { match self.specific { SpecificFragmentInfo::ScannedText(ref mut scanned_text_fragment_info) => { - // FIXME(pcwalton): Is there a more clever (i.e. faster) way to do this? - debug!("stripping trailing whitespace: range={:?}, len={}", - scanned_text_fragment_info.range, - scanned_text_fragment_info.run.text.chars().count()); - let mut trailing_whitespace_character_count = 0; - let text_bounds; - { - let text = slice_chars(&*scanned_text_fragment_info.run.text, - scanned_text_fragment_info.range.begin().to_usize(), - scanned_text_fragment_info.range.end().to_usize()); - for ch in text.chars().rev() { - if util::str::char_is_whitespace(ch) { - trailing_whitespace_character_count += 1 - } else { - break - } + let mut trailing_whitespace_start_byte = 0; + for (i, c) in scanned_text_fragment_info.text().char_indices().rev() { + if !util::str::char_is_whitespace(c) { + trailing_whitespace_start_byte = i + c.len_utf8(); + break; } - - let whitespace_range = - Range::new(scanned_text_fragment_info.range.end() - - CharIndex(trailing_whitespace_character_count), - CharIndex(trailing_whitespace_character_count)); - text_bounds = scanned_text_fragment_info.run - .metrics_for_range(&whitespace_range) - .bounding_box; - self.border_box.size.inline = self.border_box.size.inline - - text_bounds.size.width; } + let whitespace_start = ByteIndex(trailing_whitespace_start_byte as isize); + let whitespace_len = scanned_text_fragment_info.range.length() - whitespace_start; + let whitespace_range = Range::new(whitespace_start, whitespace_len); + + // FIXME: This may be unnecessary because these metrics will be recomputed in + // LineBreaker::strip_trailing_whitespace_from_pending_line_if_necessary + let text_bounds = scanned_text_fragment_info.run + .metrics_for_range(&whitespace_range) + .bounding_box; + self.border_box.size.inline = self.border_box.size.inline - + text_bounds.size.width; scanned_text_fragment_info.content_size.inline = scanned_text_fragment_info.content_size.inline - text_bounds.size.width; - if trailing_whitespace_character_count != 0 { - scanned_text_fragment_info.range.extend_by( - CharIndex(-trailing_whitespace_character_count)); - } - + scanned_text_fragment_info.range.extend_by(-whitespace_len); WhitespaceStrippingResult::RetainFragment } SpecificFragmentInfo::UnscannedText(ref mut unscanned_text_fragment_info) => { diff --git a/components/layout/text.rs b/components/layout/text.rs index 22fbeb9ad1a..b7729608ce7 100644 --- a/components/layout/text.rs +++ b/components/layout/text.rs @@ -12,7 +12,7 @@ use fragment::{ScannedTextFragmentInfo, SELECTED, SpecificFragmentInfo, Unscanne use gfx::font::{DISABLE_KERNING_SHAPING_FLAG, FontMetrics, IGNORE_LIGATURES_SHAPING_FLAG}; use gfx::font::{RTL_FLAG, RunMetrics, ShapingFlags, ShapingOptions}; use gfx::font_context::FontContext; -use gfx::text::glyph::CharIndex; +use gfx::text::glyph::ByteIndex; use gfx::text::text_run::TextRun; use gfx::text::util::{self, CompressionMode}; use inline::{FIRST_FRAGMENT_OF_ELEMENT, InlineFragments, LAST_FRAGMENT_OF_ELEMENT}; @@ -174,7 +174,7 @@ impl TextRunScanner { for (fragment_index, in_fragment) in self.clump.iter().enumerate() { debug!(" flushing {:?}", in_fragment); - let mut mapping = RunMapping::new(&run_info_list[..], &run_info, fragment_index); + let mut mapping = RunMapping::new(&run_info_list[..], fragment_index); let text; let selection; match in_fragment.specific { @@ -188,13 +188,13 @@ impl TextRunScanner { Some(range) if range.is_empty() => { // `range` is the range within the current fragment. To get the range // within the text run, offset it by the length of the preceding fragments. - Some(range.begin() + CharIndex(run_info.character_length as isize)) + Some(range.begin() + ByteIndex(run_info.text.len() as isize)) } _ => None }; let (mut start_position, mut end_position) = (0, 0); - for (char_index, character) in text.chars().enumerate() { + for (byte_index, character) in text.char_indices() { // Search for the first font in this font group that contains a glyph for this // character. let mut font_index = 0; @@ -226,7 +226,7 @@ impl TextRunScanner { } let selected = match selection { - Some(range) => range.contains(CharIndex(char_index as isize)), + Some(range) => range.contains(ByteIndex(byte_index as isize)), None => false }; @@ -251,7 +251,6 @@ impl TextRunScanner { run_info = RunInfo::new(); } mapping = RunMapping::new(&run_info_list[..], - &run_info, fragment_index); } run_info.font_index = font_index; @@ -343,11 +342,14 @@ impl TextRunScanner { let mut mapping = mappings.next().unwrap(); let scanned_run = runs[mapping.text_run_index].clone(); + let mut byte_range = Range::new(ByteIndex(mapping.byte_range.begin() as isize), + ByteIndex(mapping.byte_range.length() as isize)); + let requires_line_break_afterward_if_wrapping_on_newlines = !mapping.byte_range.is_empty() && scanned_run.run.text.char_at_reverse(mapping.byte_range.end()) == '\n'; if requires_line_break_afterward_if_wrapping_on_newlines { - mapping.char_range.extend_by(CharIndex(-1)); + byte_range.extend_by(ByteIndex(-1)); // Trim the '\n' } let text_size = old_fragment.border_box.size; @@ -368,12 +370,12 @@ impl TextRunScanner { let mut new_text_fragment_info = box ScannedTextFragmentInfo::new( scanned_run.run, - mapping.char_range, + byte_range, text_size, insertion_point, flags); - let new_metrics = new_text_fragment_info.run.metrics_for_range(&mapping.char_range); + let new_metrics = new_text_fragment_info.run.metrics_for_range(&byte_range); let writing_mode = old_fragment.style.writing_mode; let bounding_box_size = bounding_box_for_run_metrics(&new_metrics, writing_mode); new_text_fragment_info.content_size = bounding_box_size; @@ -490,7 +492,7 @@ fn split_first_fragment_at_newline_if_necessary(fragments: &mut LinkedList<Fragm unscanned_text_fragment_info.text[..(position + 1)].to_owned(); unscanned_text_fragment_info.text = unscanned_text_fragment_info.text[(position + 1)..].to_owned().into_boxed_str(); - let offset = CharIndex(string_before.char_indices().count() as isize); + let offset = ByteIndex(string_before.len() as isize); match unscanned_text_fragment_info.selection { Some(ref mut selection) if selection.begin() >= offset => { // Selection is entirely in the second fragment. @@ -500,7 +502,7 @@ fn split_first_fragment_at_newline_if_necessary(fragments: &mut LinkedList<Fragm Some(ref mut selection) if selection.end() > offset => { // Selection is split across two fragments. selection_before = Some(Range::new(selection.begin(), offset)); - *selection = Range::new(CharIndex(0), selection.end() - offset); + *selection = Range::new(ByteIndex(0), selection.end() - offset); } _ => { // Selection is entirely in the first fragment. @@ -523,11 +525,9 @@ struct RunInfo { /// The text that will go in this text run. text: String, /// The insertion point in this text run, if applicable. - insertion_point: Option<CharIndex>, + insertion_point: Option<ByteIndex>, /// The index of the applicable font in the font group. font_index: usize, - /// A cached copy of the number of Unicode characters in the text run. - character_length: usize, /// The bidirection embedding level of this text run. bidi_level: u8, /// The Unicode script property of this text run. @@ -540,7 +540,6 @@ impl RunInfo { text: String::new(), insertion_point: None, font_index: 0, - character_length: 0, bidi_level: 0, script: Script::Common, } @@ -552,9 +551,9 @@ impl RunInfo { /// of this text run. fn flush(mut self, list: &mut Vec<RunInfo>, - insertion_point: &mut Option<CharIndex>) { + insertion_point: &mut Option<ByteIndex>) { if let Some(idx) = *insertion_point { - let char_len = CharIndex(self.character_length as isize); + let char_len = ByteIndex(self.text.len() as isize); if idx <= char_len { // The insertion point is in this text run. self.insertion_point = insertion_point.take() @@ -571,8 +570,6 @@ impl RunInfo { /// for it. #[derive(Copy, Clone, Debug)] struct RunMapping { - /// The range of characters within the text fragment. - char_range: Range<CharIndex>, /// The range of byte indices within the text fragment. byte_range: Range<usize>, /// The index of the unscanned text fragment that this mapping corresponds to. @@ -585,13 +582,10 @@ struct RunMapping { impl RunMapping { /// Given the current set of text runs, creates a run mapping for the next fragment. - /// `run_info_list` describes the set of runs we've seen already, and `current_run_info` - /// describes the run we just finished processing. - fn new(run_info_list: &[RunInfo], current_run_info: &RunInfo, fragment_index: usize) + /// `run_info_list` describes the set of runs we've seen already. + fn new(run_info_list: &[RunInfo], fragment_index: usize) -> RunMapping { RunMapping { - char_range: Range::new(CharIndex(current_run_info.character_length as isize), - CharIndex(0)), byte_range: Range::new(0, 0), old_fragment_index: fragment_index, text_run_index: run_info_list.len(), @@ -620,26 +614,21 @@ impl RunMapping { // Account for `text-transform`. (Confusingly, this is not handled in "text // transformation" above, but we follow Gecko in the naming.) let is_first_run = *start_position == 0; - let character_count = apply_style_transform_if_necessary(&mut run_info.text, - old_byte_length, - text_transform, - *last_whitespace, - is_first_run); - - run_info.character_length = run_info.character_length + character_count; + apply_style_transform_if_necessary(&mut run_info.text, old_byte_length, text_transform, + *last_whitespace, is_first_run); *start_position = end_position; + let new_byte_length = run_info.text.len(); + let is_empty = new_byte_length == old_byte_length; + // Don't save mappings that contain only discarded characters. // (But keep ones that contained no characters to begin with, since they might have been // generated by an empty flow to draw its borders/padding/insertion point.) - let is_empty = character_count == 0; if is_empty && !was_empty { return; } - let new_byte_length = run_info.text.len(); self.byte_range = Range::new(old_byte_length, new_byte_length - old_byte_length); - self.char_range.extend_by(CharIndex(character_count as isize)); mappings.push(self) } @@ -648,10 +637,10 @@ impl RunMapping { /// NOTE: We treat the range as inclusive at both ends, since the insertion point can lie /// before the first character *or* after the last character, and should be drawn even if the /// text is empty. - fn contains_insertion_point(&self, insertion_point: Option<CharIndex>) -> bool { - match insertion_point { + fn contains_insertion_point(&self, insertion_point: Option<ByteIndex>) -> bool { + match insertion_point.map(ByteIndex::to_usize) { None => false, - Some(idx) => self.char_range.begin() <= idx && idx <= self.char_range.end() + Some(idx) => self.byte_range.begin() <= idx && idx <= self.byte_range.end() } } } @@ -666,39 +655,29 @@ fn apply_style_transform_if_necessary(string: &mut String, first_character_position: usize, text_transform: text_transform::T, last_whitespace: bool, - is_first_run: bool) - -> usize { + is_first_run: bool) { match text_transform { - text_transform::T::none => string[first_character_position..].chars().count(), + text_transform::T::none => {} text_transform::T::uppercase => { let original = string[first_character_position..].to_owned(); string.truncate(first_character_position); - let mut count = 0; for ch in original.chars().flat_map(|ch| ch.to_uppercase()) { string.push(ch); - count += 1; } - count } text_transform::T::lowercase => { let original = string[first_character_position..].to_owned(); string.truncate(first_character_position); - let mut count = 0; for ch in original.chars().flat_map(|ch| ch.to_lowercase()) { string.push(ch); - count += 1; } - count } text_transform::T::capitalize => { let original = string[first_character_position..].to_owned(); string.truncate(first_character_position); let mut capitalize_next_letter = is_first_run || last_whitespace; - let mut count = 0; for character in original.chars() { - count += 1; - // FIXME(#4311, pcwalton): Should be the CSS/Unicode notion of a *typographic // letter unit*, not an *alphabetic* character: // @@ -716,8 +695,6 @@ fn apply_style_transform_if_necessary(string: &mut String, capitalize_next_letter = true } } - - count } } } @@ -725,7 +702,7 @@ fn apply_style_transform_if_necessary(string: &mut String, #[derive(Clone)] struct ScannedTextRun { run: Arc<TextRun>, - insertion_point: Option<CharIndex>, + insertion_point: Option<ByteIndex>, } /// Can a character with script `b` continue a text run with script `a`? diff --git a/components/layout/webrender_helpers.rs b/components/layout/webrender_helpers.rs index c3b28f8ecc1..053ecf87f50 100644 --- a/components/layout/webrender_helpers.rs +++ b/components/layout/webrender_helpers.rs @@ -392,7 +392,7 @@ impl WebRenderDisplayItemConverter for DisplayItem { let mut glyphs = vec!(); for slice in item.text_run.natural_word_slices_in_visual_order(&item.range) { - for glyph in slice.glyphs.iter_glyphs_for_char_range(&slice.range) { + for glyph in slice.glyphs.iter_glyphs_for_byte_range(&slice.range) { let glyph_advance = glyph.advance(); let glyph_offset = glyph.offset().unwrap_or(Point2D::zero()); let glyph = webrender_traits::GlyphInstance { diff --git a/components/layout/wrapper.rs b/components/layout/wrapper.rs index 00c77631707..2e22234e42b 100644 --- a/components/layout/wrapper.rs +++ b/components/layout/wrapper.rs @@ -33,7 +33,7 @@ use core::nonzero::NonZero; use data::{LayoutDataFlags, PrivateLayoutData}; use gfx::display_list::OpaqueNode; -use gfx::text::glyph::CharIndex; +use gfx::text::glyph::ByteIndex; use gfx_traits::{LayerId, LayerType}; use incremental::RestyleDamage; use msg::constellation_msg::PipelineId; @@ -74,7 +74,7 @@ use style::restyle_hints::ElementSnapshot; use style::selector_impl::{NonTSPseudoClass, PseudoElement, ServoSelectorImpl}; use style::servo::PrivateStyleData; use url::Url; -use util::str::{is_whitespace, search_index}; +use util::str::is_whitespace; pub type NonOpaqueStyleAndLayoutData = *mut RefCell<PrivateLayoutData>; @@ -838,7 +838,7 @@ pub trait ThreadSafeLayoutNode : Clone + Copy + Sized + PartialEq { fn text_content(&self) -> TextContent; /// If the insertion point is within this node, returns it. Otherwise, returns `None`. - fn selection(&self) -> Option<Range<CharIndex>>; + fn selection(&self) -> Option<Range<ByteIndex>>; /// If this is an image element, returns its URL. If this is not an image element, fails. /// @@ -1077,27 +1077,18 @@ impl<'ln> ThreadSafeLayoutNode for ServoThreadSafeLayoutNode<'ln> { panic!("not text!") } - fn selection(&self) -> Option<Range<CharIndex>> { - let this = unsafe { - self.get_jsmanaged() - }; + fn selection(&self) -> Option<Range<ByteIndex>> { + let this = unsafe { self.get_jsmanaged() }; - if let Some(area) = this.downcast::<HTMLTextAreaElement>() { - if let Some(selection) = unsafe { area.get_absolute_selection_for_layout() } { - let text = unsafe { area.get_value_for_layout() }; - let begin_byte = selection.begin(); - let begin = search_index(begin_byte, text.char_indices()); - let length = search_index(selection.length(), text[begin_byte..].char_indices()); - return Some(Range::new(CharIndex(begin), CharIndex(length))); - } - } - if let Some(input) = this.downcast::<HTMLInputElement>() { - if let Some(selection) = unsafe { input.selection_for_layout() } { - return Some(Range::new(CharIndex(selection.begin()), - CharIndex(selection.length()))); - } - } - None + let selection = if let Some(area) = this.downcast::<HTMLTextAreaElement>() { + unsafe { area.selection_for_layout() } + } else if let Some(input) = this.downcast::<HTMLInputElement>() { + unsafe { input.selection_for_layout() } + } else { + return None; + }; + selection.map(|range| Range::new(ByteIndex(range.begin() as isize), + ByteIndex(range.length() as isize))) } fn image_url(&self) -> Option<Url> { diff --git a/components/script/dom/htmlinputelement.rs b/components/script/dom/htmlinputelement.rs index ab753053b8f..a5bdacf44a4 100644 --- a/components/script/dom/htmlinputelement.rs +++ b/components/script/dom/htmlinputelement.rs @@ -43,10 +43,11 @@ use style::element_state::*; use textinput::KeyReaction::{DispatchInput, Nothing, RedrawSelection, TriggerDefaultAction}; use textinput::Lines::Single; use textinput::{TextInput, SelectionDirection}; -use util::str::{DOMString, search_index}; +use util::str::{DOMString}; const DEFAULT_SUBMIT_VALUE: &'static str = "Submit"; const DEFAULT_RESET_VALUE: &'static str = "Reset"; +const PASSWORD_REPLACEMENT_CHAR: char = '●'; #[derive(JSTraceable, PartialEq, Copy, Clone)] #[allow(dead_code)] @@ -174,7 +175,7 @@ pub trait LayoutHTMLInputElementHelpers { #[allow(unsafe_code)] unsafe fn size_for_layout(self) -> u32; #[allow(unsafe_code)] - unsafe fn selection_for_layout(self) -> Option<Range<isize>>; + unsafe fn selection_for_layout(self) -> Option<Range<usize>>; #[allow(unsafe_code)] unsafe fn checked_state_for_layout(self) -> bool; #[allow(unsafe_code)] @@ -207,8 +208,7 @@ impl LayoutHTMLInputElementHelpers for LayoutJS<HTMLInputElement> { InputType::InputPassword => { let text = get_raw_textinput_value(self); if !text.is_empty() { - // The implementation of selection_for_layout expects a 1:1 mapping of chars. - text.chars().map(|_| '●').collect() + text.chars().map(|_| PASSWORD_REPLACEMENT_CHAR).collect() } else { String::from((*self.unsafe_get()).placeholder.borrow_for_layout().clone()) } @@ -216,7 +216,6 @@ impl LayoutHTMLInputElementHelpers for LayoutJS<HTMLInputElement> { _ => { let text = get_raw_textinput_value(self); if !text.is_empty() { - // The implementation of selection_for_layout expects a 1:1 mapping of chars. String::from(text) } else { String::from((*self.unsafe_get()).placeholder.borrow_for_layout().clone()) @@ -233,24 +232,29 @@ impl LayoutHTMLInputElementHelpers for LayoutJS<HTMLInputElement> { #[allow(unrooted_must_root)] #[allow(unsafe_code)] - unsafe fn selection_for_layout(self) -> Option<Range<isize>> { + unsafe fn selection_for_layout(self) -> Option<Range<usize>> { if !(*self.unsafe_get()).upcast::<Element>().focus_state() { return None; } - // Use the raw textinput to get the index as long as we use a 1:1 char mapping - // in value_for_layout. - let raw = match (*self.unsafe_get()).input_type.get() { - InputType::InputText | - InputType::InputPassword => get_raw_textinput_value(self), - _ => return None - }; let textinput = (*self.unsafe_get()).textinput.borrow_for_layout(); - let selection = textinput.get_absolute_selection_range(); - let begin_byte = selection.begin(); - let begin = search_index(begin_byte, raw.char_indices()); - let length = search_index(selection.length(), raw[begin_byte..].char_indices()); - Some(Range::new(begin, length)) + + match (*self.unsafe_get()).input_type.get() { + InputType::InputPassword => { + let text = get_raw_textinput_value(self); + let sel = textinput.get_absolute_selection_range(); + + // Translate indices from the raw value to indices in the replacement value. + let char_start = text[.. sel.begin()].chars().count(); + let char_count = text[sel.begin() .. sel.end()].chars().count(); + + let bytes_per_char = PASSWORD_REPLACEMENT_CHAR.len_utf8(); + Some(Range::new(char_start * bytes_per_char, + char_count * bytes_per_char)) + } + InputType::InputText => Some(textinput.get_absolute_selection_range()), + _ => None + } } #[allow(unrooted_must_root)] diff --git a/components/script/dom/htmltextareaelement.rs b/components/script/dom/htmltextareaelement.rs index d3eb7827031..739cb04eb15 100644 --- a/components/script/dom/htmltextareaelement.rs +++ b/components/script/dom/htmltextareaelement.rs @@ -47,7 +47,7 @@ pub trait LayoutHTMLTextAreaElementHelpers { #[allow(unsafe_code)] unsafe fn get_value_for_layout(self) -> String; #[allow(unsafe_code)] - unsafe fn get_absolute_selection_for_layout(self) -> Option<Range<usize>>; + unsafe fn selection_for_layout(self) -> Option<Range<usize>>; #[allow(unsafe_code)] fn get_cols(self) -> u32; #[allow(unsafe_code)] @@ -63,13 +63,12 @@ impl LayoutHTMLTextAreaElementHelpers for LayoutJS<HTMLTextAreaElement> { #[allow(unrooted_must_root)] #[allow(unsafe_code)] - unsafe fn get_absolute_selection_for_layout(self) -> Option<Range<usize>> { - if (*self.unsafe_get()).upcast::<Element>().focus_state() { - Some((*self.unsafe_get()).textinput.borrow_for_layout() - .get_absolute_selection_range()) - } else { - None + unsafe fn selection_for_layout(self) -> Option<Range<usize>> { + if !(*self.unsafe_get()).upcast::<Element>().focus_state() { + return None; } + let textinput = (*self.unsafe_get()).textinput.borrow_for_layout(); + Some(textinput.get_absolute_selection_range()) } #[allow(unsafe_code)] diff --git a/components/util/str.rs b/components/util/str.rs index 8d5a5e74bef..997aee8f53c 100644 --- a/components/util/str.rs +++ b/components/util/str.rs @@ -11,7 +11,7 @@ use std::ffi::CStr; use std::fmt; use std::iter::{Filter, Peekable}; use std::ops::{Deref, DerefMut}; -use std::str::{Bytes, CharIndices, Split, from_utf8}; +use std::str::{Bytes, Split, from_utf8}; use string_cache::Atom; #[derive(Clone, Debug, Deserialize, Eq, Hash, HeapSizeOf, Ord, PartialEq, PartialOrd, Serialize)] @@ -271,40 +271,3 @@ pub fn str_join<I, T>(strs: I, join: &str) -> String acc }) } - -// Lifted from Rust's StrExt implementation, which is being removed. -pub fn slice_chars(s: &str, begin: usize, end: usize) -> &str { - assert!(begin <= end); - let mut count = 0; - let mut begin_byte = None; - let mut end_byte = None; - - // This could be even more efficient by not decoding, - // only finding the char boundaries - for (idx, _) in s.char_indices() { - if count == begin { begin_byte = Some(idx); } - if count == end { end_byte = Some(idx); break; } - count += 1; - } - if begin_byte.is_none() && count == begin { begin_byte = Some(s.len()) } - if end_byte.is_none() && count == end { end_byte = Some(s.len()) } - - match (begin_byte, end_byte) { - (None, _) => panic!("slice_chars: `begin` is beyond end of string"), - (_, None) => panic!("slice_chars: `end` is beyond end of string"), - (Some(a), Some(b)) => unsafe { s.slice_unchecked(a, b) } - } -} - -// searches a character index in CharIndices -// returns indices.count if not found -pub fn search_index(index: usize, indices: CharIndices) -> isize { - let mut character_count = 0; - for (character_index, _) in indices { - if character_index == index { - return character_count; - } - character_count += 1 - } - character_count -} |