diff options
Diffstat (limited to 'components/gfx/text')
-rw-r--r-- | components/gfx/text/glyph.rs | 752 | ||||
-rw-r--r-- | components/gfx/text/mod.rs | 18 | ||||
-rw-r--r-- | components/gfx/text/shaping/harfbuzz.rs | 541 | ||||
-rw-r--r-- | components/gfx/text/shaping/mod.rs | 19 | ||||
-rw-r--r-- | components/gfx/text/text_run.rs | 271 | ||||
-rw-r--r-- | components/gfx/text/util.rs | 285 |
6 files changed, 1886 insertions, 0 deletions
diff --git a/components/gfx/text/glyph.rs b/components/gfx/text/glyph.rs new file mode 100644 index 00000000000..2ea2d7c5d2e --- /dev/null +++ b/components/gfx/text/glyph.rs @@ -0,0 +1,752 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use servo_util::vec::*; +use servo_util::range; +use servo_util::range::{Range, RangeIndex, IntRangeIndex, EachIndex}; +use servo_util::geometry::Au; + +use std::cmp::{PartialOrd, PartialEq}; +use std::num::{NumCast, Zero}; +use std::mem; +use std::u16; +use std::vec::Vec; +use geom::point::Point2D; + +/// GlyphEntry is a port of Gecko's CompressedGlyph scheme for storing glyph data compactly. +/// +/// In the common case (reasonable glyph advances, no offsets from the font em-box, and one glyph +/// per character), we pack glyph advance, glyph id, and some flags into a single u32. +/// +/// In the uncommon case (multiple glyphs per unicode character, large glyph index/advance, or +/// glyph offsets), we pack the glyph count into GlyphEntry, and store the other glyph information +/// in DetailedGlyphStore. +#[deriving(Clone)] +struct GlyphEntry { + value: u32, +} + +impl GlyphEntry { + fn new(value: u32) -> GlyphEntry { + GlyphEntry { + value: value, + } + } + + fn initial() -> GlyphEntry { + GlyphEntry::new(0) + } + + // Creates a GlyphEntry for the common case + fn simple(id: GlyphId, advance: Au) -> GlyphEntry { + assert!(is_simple_glyph_id(id)); + assert!(is_simple_advance(advance)); + + let id_mask = id as u32; + let Au(advance) = advance; + let advance_mask = (advance as u32) << GLYPH_ADVANCE_SHIFT as uint; + + GlyphEntry::new(id_mask | advance_mask | FLAG_IS_SIMPLE_GLYPH) + } + + // Create a GlyphEntry for uncommon case; should be accompanied by + // initialization of the actual DetailedGlyph data in DetailedGlyphStore + fn complex(starts_cluster: bool, starts_ligature: bool, glyph_count: int) -> GlyphEntry { + assert!(glyph_count <= u16::MAX as int); + + debug!("creating complex glyph entry: starts_cluster={}, starts_ligature={}, \ + glyph_count={}", + starts_cluster, + starts_ligature, + glyph_count); + + let mut val = FLAG_NOT_MISSING; + + if !starts_cluster { + val |= FLAG_NOT_CLUSTER_START; + } + if !starts_ligature { + val |= FLAG_NOT_LIGATURE_GROUP_START; + } + val |= (glyph_count as u32) << GLYPH_COUNT_SHIFT as uint; + + GlyphEntry::new(val) + } + + /// Create a GlyphEntry for the case where glyphs couldn't be found for the specified + /// character. + fn missing(glyph_count: int) -> GlyphEntry { + assert!(glyph_count <= u16::MAX as int); + + GlyphEntry::new((glyph_count as u32) << GLYPH_COUNT_SHIFT as uint) + } +} + +/// The id of a particular glyph within a font +pub type GlyphId = u32; + +// TODO: unify with bit flags? +#[deriving(PartialEq)] +pub enum BreakType { + BreakTypeNone, + BreakTypeNormal, + BreakTypeHyphen, +} + +static BREAK_TYPE_NONE: u8 = 0x0; +static BREAK_TYPE_NORMAL: u8 = 0x1; +static BREAK_TYPE_HYPHEN: u8 = 0x2; + +fn break_flag_to_enum(flag: u8) -> BreakType { + if (flag & BREAK_TYPE_NORMAL) != 0 { + BreakTypeNormal + } else if (flag & BREAK_TYPE_HYPHEN) != 0 { + BreakTypeHyphen + } else { + BreakTypeNone + } +} + +fn break_enum_to_flag(e: BreakType) -> u8 { + match e { + BreakTypeNone => BREAK_TYPE_NONE, + BreakTypeNormal => BREAK_TYPE_NORMAL, + BreakTypeHyphen => BREAK_TYPE_HYPHEN, + } +} + +// TODO: make this more type-safe. + +static FLAG_CHAR_IS_SPACE: u32 = 0x10000000; +// These two bits store some BREAK_TYPE_* flags +static FLAG_CAN_BREAK_MASK: u32 = 0x60000000; +static FLAG_CAN_BREAK_SHIFT: u32 = 29; +static FLAG_IS_SIMPLE_GLYPH: u32 = 0x80000000; + +// glyph advance; in Au's. +static GLYPH_ADVANCE_MASK: u32 = 0x0FFF0000; +static GLYPH_ADVANCE_SHIFT: u32 = 16; +static GLYPH_ID_MASK: u32 = 0x0000FFFF; + +// Non-simple glyphs (more than one glyph per char; missing glyph, +// newline, tab, large advance, or nonzero x/y offsets) may have one +// or more detailed glyphs associated with them. They are stored in a +// side array so that there is a 1:1 mapping of GlyphEntry to +// unicode char. + +// The number of detailed glyphs for this char. If the char couldn't +// be mapped to a glyph (!FLAG_NOT_MISSING), then this actually holds +// the UTF8 code point instead. +static GLYPH_COUNT_MASK: u32 = 0x00FFFF00; +static GLYPH_COUNT_SHIFT: u32 = 8; +// N.B. following Gecko, these are all inverted so that a lot of +// missing chars can be memset with zeros in one fell swoop. +static FLAG_NOT_MISSING: u32 = 0x00000001; +static FLAG_NOT_CLUSTER_START: u32 = 0x00000002; +static FLAG_NOT_LIGATURE_GROUP_START: u32 = 0x00000004; + +static FLAG_CHAR_IS_TAB: u32 = 0x00000008; +static FLAG_CHAR_IS_NEWLINE: u32 = 0x00000010; +//static FLAG_CHAR_IS_LOW_SURROGATE: u32 = 0x00000020; +//static CHAR_IDENTITY_FLAGS_MASK: u32 = 0x00000038; + +fn is_simple_glyph_id(id: GlyphId) -> bool { + ((id as u32) & GLYPH_ID_MASK) == id +} + +fn is_simple_advance(advance: Au) -> bool { + let unsignedAu = advance.to_u32().unwrap(); + (unsignedAu & (GLYPH_ADVANCE_MASK >> GLYPH_ADVANCE_SHIFT as uint)) == unsignedAu +} + +type DetailedGlyphCount = u16; + +// Getters and setters for GlyphEntry. Setter methods are functional, +// because GlyphEntry is immutable and only a u32 in size. +impl GlyphEntry { + // getter methods + #[inline(always)] + fn advance(&self) -> Au { + NumCast::from((self.value & GLYPH_ADVANCE_MASK) >> GLYPH_ADVANCE_SHIFT as uint).unwrap() + } + + fn id(&self) -> GlyphId { + self.value & GLYPH_ID_MASK + } + + fn is_ligature_start(&self) -> bool { + self.has_flag(!FLAG_NOT_LIGATURE_GROUP_START) + } + + fn is_cluster_start(&self) -> bool { + self.has_flag(!FLAG_NOT_CLUSTER_START) + } + + // True if original char was normal (U+0020) space. Other chars may + // map to space glyph, but this does not account for them. + fn char_is_space(&self) -> bool { + self.has_flag(FLAG_CHAR_IS_SPACE) + } + + fn char_is_tab(&self) -> bool { + !self.is_simple() && self.has_flag(FLAG_CHAR_IS_TAB) + } + + fn char_is_newline(&self) -> bool { + !self.is_simple() && self.has_flag(FLAG_CHAR_IS_NEWLINE) + } + + fn can_break_before(&self) -> BreakType { + let flag = ((self.value & FLAG_CAN_BREAK_MASK) >> FLAG_CAN_BREAK_SHIFT as uint) as u8; + break_flag_to_enum(flag) + } + + // setter methods + #[inline(always)] + fn set_char_is_space(&self) -> GlyphEntry { + GlyphEntry::new(self.value | FLAG_CHAR_IS_SPACE) + } + + #[inline(always)] + fn set_char_is_tab(&self) -> GlyphEntry { + assert!(!self.is_simple()); + GlyphEntry::new(self.value | FLAG_CHAR_IS_TAB) + } + + #[inline(always)] + fn set_char_is_newline(&self) -> GlyphEntry { + assert!(!self.is_simple()); + GlyphEntry::new(self.value | FLAG_CHAR_IS_NEWLINE) + } + + #[inline(always)] + fn set_can_break_before(&self, e: BreakType) -> GlyphEntry { + let flag = (break_enum_to_flag(e) as u32) << FLAG_CAN_BREAK_SHIFT as uint; + GlyphEntry::new(self.value | flag) + } + + // helper methods + + fn glyph_count(&self) -> u16 { + assert!(!self.is_simple()); + ((self.value & GLYPH_COUNT_MASK) >> GLYPH_COUNT_SHIFT as uint) as u16 + } + + #[inline(always)] + fn is_simple(&self) -> bool { + self.has_flag(FLAG_IS_SIMPLE_GLYPH) + } + + #[inline(always)] + fn has_flag(&self, flag: u32) -> bool { + (self.value & flag) != 0 + } + + #[inline(always)] + fn adapt_character_flags_of_entry(&self, other: GlyphEntry) -> GlyphEntry { + GlyphEntry { value: self.value | other.value } + } +} + +// Stores data for a detailed glyph, in the case that several glyphs +// correspond to one character, or the glyph's data couldn't be packed. +#[deriving(Clone)] +struct DetailedGlyph { + id: GlyphId, + // glyph's advance, in the text's direction (RTL or RTL) + advance: Au, + // glyph's offset from the font's em-box (from top-left) + offset: Point2D<Au>, +} + +impl DetailedGlyph { + fn new(id: GlyphId, advance: Au, offset: Point2D<Au>) -> DetailedGlyph { + DetailedGlyph { + id: id, + advance: advance, + offset: offset, + } + } +} + +#[deriving(PartialEq, Clone, Eq)] +struct DetailedGlyphRecord { + // source string offset/GlyphEntry offset in the TextRun + entry_offset: CharIndex, + // offset into the detailed glyphs buffer + detail_offset: int, +} + +impl PartialOrd for DetailedGlyphRecord { + fn partial_cmp(&self, other: &DetailedGlyphRecord) -> Option<Ordering> { + self.entry_offset.partial_cmp(&other.entry_offset) + } +} + +impl Ord for DetailedGlyphRecord { + fn cmp(&self, other: &DetailedGlyphRecord) -> Ordering { + self.entry_offset.cmp(&other.entry_offset) + } +} + +// Manages the lookup table for detailed glyphs. Sorting is deferred +// until a lookup is actually performed; this matches the expected +// usage pattern of setting/appending all the detailed glyphs, and +// then querying without setting. +struct DetailedGlyphStore { + // TODO(pcwalton): Allocation of this buffer is expensive. Consider a small-vector + // optimization. + detail_buffer: Vec<DetailedGlyph>, + // TODO(pcwalton): Allocation of this buffer is expensive. Consider a small-vector + // optimization. + detail_lookup: Vec<DetailedGlyphRecord>, + lookup_is_sorted: bool, +} + +impl<'a> DetailedGlyphStore { + fn new() -> DetailedGlyphStore { + DetailedGlyphStore { + detail_buffer: vec!(), // TODO: default size? + detail_lookup: vec!(), + lookup_is_sorted: false, + } + } + + fn add_detailed_glyphs_for_entry(&mut self, entry_offset: CharIndex, glyphs: &[DetailedGlyph]) { + let entry = DetailedGlyphRecord { + entry_offset: entry_offset, + detail_offset: self.detail_buffer.len() as int, + }; + + debug!("Adding entry[off={}] for detailed glyphs: {:?}", entry_offset, glyphs); + + /* TODO: don't actually assert this until asserts are compiled + in/out based on severity, debug/release, etc. This assertion + would wreck the complexity of the lookup. + + See Rust Issue #3647, #2228, #3627 for related information. + + do self.detail_lookup.borrow |arr| { + assert !arr.contains(entry) + } + */ + + self.detail_lookup.push(entry); + self.detail_buffer.push_all(glyphs); + self.lookup_is_sorted = false; + } + + fn get_detailed_glyphs_for_entry(&'a self, entry_offset: CharIndex, count: u16) + -> &'a [DetailedGlyph] { + debug!("Requesting detailed glyphs[n={}] for entry[off={}]", count, entry_offset); + + // FIXME: Is this right? --pcwalton + // TODO: should fix this somewhere else + if count == 0 { + return self.detail_buffer.slice(0, 0); + } + + assert!((count as uint) <= self.detail_buffer.len()); + assert!(self.lookup_is_sorted); + + let key = DetailedGlyphRecord { + entry_offset: entry_offset, + detail_offset: 0, // unused + }; + + let i = self.detail_lookup.as_slice().binary_search_index(&key) + .expect("Invalid index not found in detailed glyph lookup table!"); + + assert!(i + (count as uint) <= self.detail_buffer.len()); + // return a slice into the buffer + self.detail_buffer.slice(i, i + count as uint) + } + + fn get_detailed_glyph_with_index(&'a self, + entry_offset: CharIndex, + detail_offset: u16) + -> &'a DetailedGlyph { + assert!((detail_offset as uint) <= self.detail_buffer.len()); + assert!(self.lookup_is_sorted); + + let key = DetailedGlyphRecord { + entry_offset: entry_offset, + detail_offset: 0, // unused + }; + + let i = self.detail_lookup.as_slice().binary_search_index(&key) + .expect("Invalid index not found in detailed glyph lookup table!"); + + assert!(i + (detail_offset as uint) < self.detail_buffer.len()); + &self.detail_buffer[i + (detail_offset as uint)] + } + + fn ensure_sorted(&mut self) { + if self.lookup_is_sorted { + return; + } + + // Sorting a unique vector is surprisingly hard. The follwing + // code is a good argument for using DVecs, but they require + // immutable locations thus don't play well with freezing. + + // Thar be dragons here. You have been warned. (Tips accepted.) + let mut unsorted_records: Vec<DetailedGlyphRecord> = vec!(); + mem::swap(&mut self.detail_lookup, &mut unsorted_records); + let mut mut_records : Vec<DetailedGlyphRecord> = unsorted_records; + mut_records.sort_by(|a, b| { + if a < b { + Less + } else { + Greater + } + }); + let mut sorted_records = mut_records; + mem::swap(&mut self.detail_lookup, &mut sorted_records); + + self.lookup_is_sorted = true; + } +} + +// This struct is used by GlyphStore clients to provide new glyph data. +// It should be allocated on the stack and passed by reference to GlyphStore. +pub struct GlyphData { + id: GlyphId, + advance: Au, + offset: Point2D<Au>, + is_missing: bool, + cluster_start: bool, + ligature_start: bool, +} + +impl GlyphData { + pub fn new(id: GlyphId, + advance: Au, + offset: Option<Point2D<Au>>, + is_missing: bool, + cluster_start: bool, + ligature_start: bool) + -> GlyphData { + GlyphData { + id: id, + advance: advance, + offset: offset.unwrap_or(Zero::zero()), + is_missing: is_missing, + cluster_start: cluster_start, + ligature_start: ligature_start, + } + } +} + +// This enum is a proxy that's provided to GlyphStore clients when iterating +// through glyphs (either for a particular TextRun offset, or all glyphs). +// Rather than eagerly assembling and copying glyph data, it only retrieves +// values as they are needed from the GlyphStore, using provided offsets. +pub enum GlyphInfo<'a> { + SimpleGlyphInfo(&'a GlyphStore, CharIndex), + DetailGlyphInfo(&'a GlyphStore, CharIndex, u16), +} + +impl<'a> GlyphInfo<'a> { + pub fn id(self) -> GlyphId { + match self { + SimpleGlyphInfo(store, entry_i) => store.entry_buffer[entry_i.to_uint()].id(), + DetailGlyphInfo(store, entry_i, detail_j) => { + store.detail_store.get_detailed_glyph_with_index(entry_i, detail_j).id + } + } + } + + #[inline(always)] + // FIXME: Resolution conflicts with IteratorUtil trait so adding trailing _ + pub fn advance(self) -> Au { + match self { + SimpleGlyphInfo(store, entry_i) => store.entry_buffer[entry_i.to_uint()].advance(), + DetailGlyphInfo(store, entry_i, detail_j) => { + store.detail_store.get_detailed_glyph_with_index(entry_i, detail_j).advance + } + } + } + + pub fn offset(self) -> Option<Point2D<Au>> { + match self { + SimpleGlyphInfo(_, _) => None, + DetailGlyphInfo(store, entry_i, detail_j) => { + Some(store.detail_store.get_detailed_glyph_with_index(entry_i, detail_j).offset) + } + } + } +} + +/// Stores the glyph data belonging to a text run. +/// +/// Simple glyphs are stored inline in the `entry_buffer`, detailed glyphs are +/// stored as pointers into the `detail_store`. +/// +/// ~~~ +/// +- GlyphStore --------------------------------+ +/// | +---+---+---+---+---+---+---+ | +/// | entry_buffer: | | s | | s | | s | s | | d = detailed +/// | +-|-+---+-|-+---+-|-+---+---+ | s = simple +/// | | | | | +/// | | +---+-------+ | +/// | | | | +/// | +-V-+-V-+ | +/// | detail_store: | d | d | | +/// | +---+---+ | +/// +---------------------------------------------+ +/// ~~~ +pub struct GlyphStore { + // TODO(pcwalton): Allocation of this buffer is expensive. Consider a small-vector + // optimization. + /// A buffer of glyphs within the text run, in the order in which they + /// appear in the input text + entry_buffer: Vec<GlyphEntry>, + /// A store of the detailed glyph data. Detailed glyphs contained in the + /// `entry_buffer` point to locations in this data structure. + detail_store: DetailedGlyphStore, + + is_whitespace: bool, +} + +int_range_index! { + #[deriving(Encodable)] + #[doc = "An index that refers to a character in a text run. This could \ + point to the middle of a glyph."] + struct CharIndex(int) +} + +impl<'a> GlyphStore { + // Initializes the glyph store, but doesn't actually shape anything. + // Use the set_glyph, set_glyphs() methods to store glyph data. + pub fn new(length: int, is_whitespace: bool) -> GlyphStore { + assert!(length > 0); + + GlyphStore { + entry_buffer: Vec::from_elem(length as uint, GlyphEntry::initial()), + detail_store: DetailedGlyphStore::new(), + is_whitespace: is_whitespace, + } + } + + pub fn char_len(&self) -> CharIndex { + CharIndex(self.entry_buffer.len() as int) + } + + pub fn is_whitespace(&self) -> bool { + self.is_whitespace + } + + pub fn finalize_changes(&mut self) { + self.detail_store.ensure_sorted(); + } + + pub fn add_glyph_for_char_index(&mut self, i: CharIndex, data: &GlyphData) { + fn glyph_is_compressible(data: &GlyphData) -> bool { + is_simple_glyph_id(data.id) + && is_simple_advance(data.advance) + && data.offset.is_zero() + && data.cluster_start // others are stored in detail buffer + } + + assert!(data.ligature_start); // can't compress ligature continuation glyphs. + assert!(i < self.char_len()); + + let entry = match (data.is_missing, glyph_is_compressible(data)) { + (true, _) => GlyphEntry::missing(1), + (false, true) => GlyphEntry::simple(data.id, data.advance), + (false, false) => { + let glyph = [DetailedGlyph::new(data.id, data.advance, data.offset)]; + self.detail_store.add_detailed_glyphs_for_entry(i, glyph); + GlyphEntry::complex(data.cluster_start, data.ligature_start, 1) + } + }.adapt_character_flags_of_entry(self.entry_buffer[i.to_uint()]); + + *self.entry_buffer.get_mut(i.to_uint()) = entry; + } + + pub fn add_glyphs_for_char_index(&mut self, i: CharIndex, data_for_glyphs: &[GlyphData]) { + assert!(i < self.char_len()); + assert!(data_for_glyphs.len() > 0); + + let glyph_count = data_for_glyphs.len() as int; + + let first_glyph_data = data_for_glyphs[0]; + let entry = match first_glyph_data.is_missing { + true => GlyphEntry::missing(glyph_count), + false => { + let glyphs_vec = Vec::from_fn(glyph_count as uint, |i| { + DetailedGlyph::new(data_for_glyphs[i].id, + data_for_glyphs[i].advance, + data_for_glyphs[i].offset) + }); + + self.detail_store.add_detailed_glyphs_for_entry(i, glyphs_vec.as_slice()); + GlyphEntry::complex(first_glyph_data.cluster_start, + first_glyph_data.ligature_start, + glyph_count) + } + }.adapt_character_flags_of_entry(self.entry_buffer[i.to_uint()]); + + debug!("Adding multiple glyphs[idx={}, count={}]: {:?}", i, glyph_count, entry); + + *self.entry_buffer.get_mut(i.to_uint()) = entry; + } + + // used when a character index has no associated glyph---for example, a ligature continuation. + pub fn add_nonglyph_for_char_index(&mut self, i: CharIndex, cluster_start: bool, ligature_start: bool) { + assert!(i < self.char_len()); + + let entry = GlyphEntry::complex(cluster_start, ligature_start, 0); + debug!("adding spacer for chracter without associated glyph[idx={}]", i); + + *self.entry_buffer.get_mut(i.to_uint()) = entry; + } + + pub fn iter_glyphs_for_char_index(&'a self, i: CharIndex) -> GlyphIterator<'a> { + self.iter_glyphs_for_char_range(&Range::new(i, CharIndex(1))) + } + + #[inline] + pub fn iter_glyphs_for_char_range(&'a self, rang: &Range<CharIndex>) -> GlyphIterator<'a> { + if rang.begin() >= self.char_len() { + fail!("iter_glyphs_for_range: range.begin beyond length!"); + } + if rang.end() > self.char_len() { + fail!("iter_glyphs_for_range: range.end beyond length!"); + } + + GlyphIterator { + store: self, + char_index: rang.begin(), + char_range: rang.each_index(), + glyph_range: None, + } + } + + #[inline] + pub fn advance_for_char_range(&self, rang: &Range<CharIndex>) -> Au { + self.iter_glyphs_for_char_range(rang) + .fold(Au(0), |advance, (_, glyph)| advance + glyph.advance()) + } + + // getter methods + pub fn char_is_space(&self, i: CharIndex) -> bool { + assert!(i < self.char_len()); + self.entry_buffer[i.to_uint()].char_is_space() + } + + pub fn char_is_tab(&self, i: CharIndex) -> bool { + assert!(i < self.char_len()); + self.entry_buffer[i.to_uint()].char_is_tab() + } + + pub fn char_is_newline(&self, i: CharIndex) -> bool { + assert!(i < self.char_len()); + self.entry_buffer[i.to_uint()].char_is_newline() + } + + pub fn is_ligature_start(&self, i: CharIndex) -> bool { + assert!(i < self.char_len()); + self.entry_buffer[i.to_uint()].is_ligature_start() + } + + pub fn is_cluster_start(&self, i: CharIndex) -> bool { + assert!(i < self.char_len()); + self.entry_buffer[i.to_uint()].is_cluster_start() + } + + pub fn can_break_before(&self, i: CharIndex) -> BreakType { + assert!(i < self.char_len()); + self.entry_buffer[i.to_uint()].can_break_before() + } + + // setter methods + pub fn set_char_is_space(&mut self, i: CharIndex) { + assert!(i < self.char_len()); + let entry = self.entry_buffer[i.to_uint()]; + *self.entry_buffer.get_mut(i.to_uint()) = entry.set_char_is_space(); + } + + pub fn set_char_is_tab(&mut self, i: CharIndex) { + assert!(i < self.char_len()); + let entry = self.entry_buffer[i.to_uint()]; + *self.entry_buffer.get_mut(i.to_uint()) = entry.set_char_is_tab(); + } + + pub fn set_char_is_newline(&mut self, i: CharIndex) { + assert!(i < self.char_len()); + let entry = self.entry_buffer[i.to_uint()]; + *self.entry_buffer.get_mut(i.to_uint()) = entry.set_char_is_newline(); + } + + pub fn set_can_break_before(&mut self, i: CharIndex, t: BreakType) { + assert!(i < self.char_len()); + let entry = self.entry_buffer[i.to_uint()]; + *self.entry_buffer.get_mut(i.to_uint()) = entry.set_can_break_before(t); + } +} + +/// An iterator over the glyphs in a character range in a `GlyphStore`. +pub struct GlyphIterator<'a> { + store: &'a GlyphStore, + char_index: CharIndex, + char_range: EachIndex<int, CharIndex>, + glyph_range: Option<EachIndex<int, CharIndex>>, +} + +impl<'a> GlyphIterator<'a> { + // Slow path when there is a glyph range. + #[inline(never)] + fn next_glyph_range(&mut self) -> Option<(CharIndex, GlyphInfo<'a>)> { + match self.glyph_range.get_mut_ref().next() { + Some(j) => Some((self.char_index, + DetailGlyphInfo(self.store, self.char_index, j.get() as u16 /* ??? */))), + None => { + // No more glyphs for current character. Try to get another. + self.glyph_range = None; + self.next() + } + } + } + + // Slow path when there is a complex glyph. + #[inline(never)] + fn next_complex_glyph(&mut self, entry: &GlyphEntry, i: CharIndex) + -> Option<(CharIndex, GlyphInfo<'a>)> { + let glyphs = self.store.detail_store.get_detailed_glyphs_for_entry(i, entry.glyph_count()); + self.glyph_range = Some(range::each_index(CharIndex(0), CharIndex(glyphs.len() as int))); + self.next() + } +} + +impl<'a> Iterator<(CharIndex, GlyphInfo<'a>)> for GlyphIterator<'a> { + // I tried to start with something simpler and apply FlatMap, but the + // inability to store free variables in the FlatMap struct was problematic. + // + // This function consists of the fast path and is designed to be inlined into its caller. The + // slow paths, which should not be inlined, are `next_glyph_range()` and + // `next_complex_glyph()`. + #[inline(always)] + fn next(&mut self) -> Option<(CharIndex, GlyphInfo<'a>)> { + // Would use 'match' here but it borrows contents in a way that + // interferes with mutation. + if self.glyph_range.is_some() { + self.next_glyph_range() + } else { + // No glyph range. Look at next character. + self.char_range.next().and_then(|i| { + self.char_index = i; + assert!(i < self.store.char_len()); + let entry = self.store.entry_buffer[i.to_uint()]; + if entry.is_simple() { + Some((self.char_index, SimpleGlyphInfo(self.store, i))) + } else { + // Fall back to the slow path. + self.next_complex_glyph(&entry, i) + } + }) + } + } +} diff --git a/components/gfx/text/mod.rs b/components/gfx/text/mod.rs new file mode 100644 index 00000000000..f705347c441 --- /dev/null +++ b/components/gfx/text/mod.rs @@ -0,0 +1,18 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* This file exists just to make it easier to import things inside of + ./text/ without specifying the file they came out of imports. + +Note that you still must define each of the files as a module in +servo.rc. This is not ideal and may be changed in the future. */ + +pub use text::shaping::Shaper; +pub use text::text_run::TextRun; + +pub mod glyph; +#[path="shaping/mod.rs"] pub mod shaping; +pub mod text_run; +pub mod util; + diff --git a/components/gfx/text/shaping/harfbuzz.rs b/components/gfx/text/shaping/harfbuzz.rs new file mode 100644 index 00000000000..789126e767d --- /dev/null +++ b/components/gfx/text/shaping/harfbuzz.rs @@ -0,0 +1,541 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +extern crate harfbuzz; + +use font::{Font, FontHandleMethods, FontTableMethods, FontTableTag}; +use platform::font::FontTable; +use text::glyph::{CharIndex, GlyphStore, GlyphId, GlyphData}; +use text::shaping::ShaperMethods; +use text::util::{float_to_fixed, fixed_to_float}; + +use geom::Point2D; +use harfbuzz::{HB_MEMORY_MODE_READONLY, HB_DIRECTION_LTR}; +use harfbuzz::{hb_blob_create, hb_face_create_for_tables}; +use harfbuzz::{hb_blob_t}; +use harfbuzz::{hb_bool_t}; +use harfbuzz::{hb_buffer_add_utf8}; +use harfbuzz::{hb_buffer_destroy}; +use harfbuzz::{hb_buffer_get_glyph_positions}; +use harfbuzz::{hb_buffer_set_direction}; +use harfbuzz::{hb_face_destroy}; +use harfbuzz::{hb_face_t, hb_font_t}; +use harfbuzz::{hb_font_create}; +use harfbuzz::{hb_font_destroy, hb_buffer_create}; +use harfbuzz::{hb_font_funcs_create}; +use harfbuzz::{hb_font_funcs_destroy}; +use harfbuzz::{hb_font_funcs_set_glyph_func}; +use harfbuzz::{hb_font_funcs_set_glyph_h_advance_func}; +use harfbuzz::{hb_font_funcs_set_glyph_h_kerning_func}; +use harfbuzz::{hb_font_funcs_t, hb_buffer_t, hb_codepoint_t}; +use harfbuzz::{hb_font_set_funcs}; +use harfbuzz::{hb_font_set_ppem}; +use harfbuzz::{hb_font_set_scale}; +use harfbuzz::{hb_glyph_info_t}; +use harfbuzz::{hb_glyph_position_t}; +use harfbuzz::{hb_position_t, hb_tag_t}; +use harfbuzz::{hb_shape, hb_buffer_get_glyph_infos}; +use libc::{c_uint, c_int, c_void, c_char}; +use servo_util::geometry::Au; +use servo_util::range::Range; +use std::mem; +use std::char; +use std::cmp; +use std::ptr; + +static NO_GLYPH: i32 = -1; +static CONTINUATION_BYTE: i32 = -2; + +pub struct ShapedGlyphData { + count: int, + glyph_infos: *mut hb_glyph_info_t, + pos_infos: *mut hb_glyph_position_t, +} + +pub struct ShapedGlyphEntry { + codepoint: GlyphId, + advance: Au, + offset: Option<Point2D<Au>>, +} + +impl ShapedGlyphData { + pub fn new(buffer: *mut hb_buffer_t) -> ShapedGlyphData { + unsafe { + let mut glyph_count = 0; + let glyph_infos = hb_buffer_get_glyph_infos(buffer, &mut glyph_count); + let glyph_count = glyph_count as int; + assert!(glyph_infos.is_not_null()); + let mut pos_count = 0; + let pos_infos = hb_buffer_get_glyph_positions(buffer, &mut pos_count); + let pos_count = pos_count as int; + assert!(pos_infos.is_not_null()); + assert!(glyph_count == pos_count); + + ShapedGlyphData { + count: glyph_count, + glyph_infos: glyph_infos, + pos_infos: pos_infos, + } + } + } + + #[inline(always)] + fn byte_offset_of_glyph(&self, i: int) -> int { + assert!(i < self.count); + + unsafe { + let glyph_info_i = self.glyph_infos.offset(i); + (*glyph_info_i).cluster as int + } + } + + pub fn len(&self) -> int { + self.count + } + + /// Returns shaped glyph data for one glyph, and updates the y-position of the pen. + pub fn get_entry_for_glyph(&self, i: int, y_pos: &mut Au) -> ShapedGlyphEntry { + assert!(i < self.count); + + unsafe { + let glyph_info_i = self.glyph_infos.offset(i); + let pos_info_i = self.pos_infos.offset(i); + let x_offset = Shaper::fixed_to_float((*pos_info_i).x_offset); + let y_offset = Shaper::fixed_to_float((*pos_info_i).y_offset); + let x_advance = Shaper::fixed_to_float((*pos_info_i).x_advance); + let y_advance = Shaper::fixed_to_float((*pos_info_i).y_advance); + + let x_offset = Au::from_frac_px(x_offset); + let y_offset = Au::from_frac_px(y_offset); + let x_advance = Au::from_frac_px(x_advance); + let y_advance = Au::from_frac_px(y_advance); + + let offset = if x_offset == Au(0) && y_offset == Au(0) && y_advance == Au(0) { + None + } else { + // adjust the pen.. + if y_advance > Au(0) { + *y_pos = *y_pos - y_advance; + } + + Some(Point2D(x_offset, *y_pos - y_offset)) + }; + + ShapedGlyphEntry { + codepoint: (*glyph_info_i).codepoint as GlyphId, + advance: x_advance, + offset: offset, + } + } + } +} + +pub struct Shaper { + hb_face: *mut hb_face_t, + hb_font: *mut hb_font_t, + hb_funcs: *mut hb_font_funcs_t, +} + +#[unsafe_destructor] +impl Drop for Shaper { + fn drop(&mut self) { + unsafe { + assert!(self.hb_face.is_not_null()); + hb_face_destroy(self.hb_face); + + assert!(self.hb_font.is_not_null()); + hb_font_destroy(self.hb_font); + + assert!(self.hb_funcs.is_not_null()); + hb_font_funcs_destroy(self.hb_funcs); + } + } +} + +impl Shaper { + pub fn new(font: &mut Font) -> Shaper { + unsafe { + // Indirection for Rust Issue #6248, dynamic freeze scope artifically extended + let font_ptr = font as *mut Font; + let hb_face: *mut hb_face_t = hb_face_create_for_tables(get_font_table_func, + font_ptr as *mut c_void, + None); + let hb_font: *mut hb_font_t = hb_font_create(hb_face); + + // Set points-per-em. if zero, performs no hinting in that direction. + let pt_size = font.pt_size; + hb_font_set_ppem(hb_font, pt_size as c_uint, pt_size as c_uint); + + // Set scaling. Note that this takes 16.16 fixed point. + hb_font_set_scale(hb_font, + Shaper::float_to_fixed(pt_size) as c_int, + Shaper::float_to_fixed(pt_size) as c_int); + + // configure static function callbacks. + // NB. This funcs structure could be reused globally, as it never changes. + let hb_funcs: *mut hb_font_funcs_t = hb_font_funcs_create(); + hb_font_funcs_set_glyph_func(hb_funcs, glyph_func, ptr::mut_null(), None); + hb_font_funcs_set_glyph_h_advance_func(hb_funcs, glyph_h_advance_func, ptr::mut_null(), None); + hb_font_funcs_set_glyph_h_kerning_func(hb_funcs, glyph_h_kerning_func, ptr::mut_null(), ptr::mut_null()); + hb_font_set_funcs(hb_font, hb_funcs, font_ptr as *mut c_void, None); + + Shaper { + hb_face: hb_face, + hb_font: hb_font, + hb_funcs: hb_funcs, + } + } + } + + fn float_to_fixed(f: f64) -> i32 { + float_to_fixed(16, f) + } + + fn fixed_to_float(i: hb_position_t) -> f64 { + fixed_to_float(16, i) + } +} + +impl ShaperMethods for Shaper { + /// Calculate the layout metrics associated with the given text when rendered in a specific + /// font. + fn shape_text(&self, text: &str, glyphs: &mut GlyphStore) { + unsafe { + let hb_buffer: *mut hb_buffer_t = hb_buffer_create(); + hb_buffer_set_direction(hb_buffer, HB_DIRECTION_LTR); + + hb_buffer_add_utf8(hb_buffer, + text.as_ptr() as *const c_char, + text.len() as c_int, + 0, + text.len() as c_int); + + hb_shape(self.hb_font, hb_buffer, ptr::mut_null(), 0); + self.save_glyph_results(text, glyphs, hb_buffer); + hb_buffer_destroy(hb_buffer); + } + } +} + +impl Shaper { + fn save_glyph_results(&self, text: &str, glyphs: &mut GlyphStore, buffer: *mut hb_buffer_t) { + let glyph_data = ShapedGlyphData::new(buffer); + let glyph_count = glyph_data.len(); + let byte_max = text.len() as int; + let char_max = text.char_len() as int; + + // GlyphStore records are indexed by character, not byte offset. + // so, we must be careful to increment this when saving glyph entries. + let mut char_idx = CharIndex(0); + + assert!(glyph_count <= char_max); + + debug!("Shaped text[char count={}], got back {} glyph info records.", + char_max, + glyph_count); + + if char_max != glyph_count { + debug!("NOTE: Since these are not equal, we probably have been given some complex \ + glyphs."); + } + + // make map of what chars have glyphs + let mut byteToGlyph: Vec<i32>; + + // fast path: all chars are single-byte. + if byte_max == char_max { + byteToGlyph = Vec::from_elem(byte_max as uint, NO_GLYPH); + } else { + byteToGlyph = Vec::from_elem(byte_max as uint, CONTINUATION_BYTE); + for (i, _) in text.char_indices() { + *byteToGlyph.get_mut(i) = NO_GLYPH; + } + } + + debug!("(glyph idx) -> (text byte offset)"); + for i in range(0, glyph_data.len()) { + // loc refers to a *byte* offset within the utf8 string. + let loc = glyph_data.byte_offset_of_glyph(i); + if loc < byte_max { + assert!(*byteToGlyph.get(loc as uint) != CONTINUATION_BYTE); + *byteToGlyph.get_mut(loc as uint) = i as i32; + } else { + debug!("ERROR: tried to set out of range byteToGlyph: idx={}, glyph idx={}", + loc, + i); + } + debug!("{} -> {}", i, loc); + } + + debug!("text: {:s}", text); + debug!("(char idx): char->(glyph index):"); + for (i, ch) in text.char_indices() { + debug!("{}: {} --> {:d}", i, ch, *byteToGlyph.get(i) as int); + } + + // some helpers + let mut glyph_span: Range<int> = Range::empty(); + // this span contains first byte of first char, to last byte of last char in range. + // so, end() points to first byte of last+1 char, if it's less than byte_max. + let mut char_byte_span: Range<int> = Range::empty(); + let mut y_pos = Au(0); + + // main loop over each glyph. each iteration usually processes 1 glyph and 1+ chars. + // in cases with complex glyph-character assocations, 2+ glyphs and 1+ chars can be + // processed. + while glyph_span.begin() < glyph_count { + // start by looking at just one glyph. + glyph_span.extend_by(1); + debug!("Processing glyph at idx={}", glyph_span.begin()); + + let char_byte_start = glyph_data.byte_offset_of_glyph(glyph_span.begin()); + char_byte_span.reset(char_byte_start, 0); + + // find a range of chars corresponding to this glyph, plus + // any trailing chars that do not have associated glyphs. + while char_byte_span.end() < byte_max { + let range = text.char_range_at(char_byte_span.end() as uint); + drop(range.ch); + char_byte_span.extend_to(range.next as int); + + debug!("Processing char byte span: off={}, len={} for glyph idx={}", + char_byte_span.begin(), char_byte_span.length(), glyph_span.begin()); + + while char_byte_span.end() != byte_max && + byteToGlyph[char_byte_span.end() as uint] == NO_GLYPH { + debug!("Extending char byte span to include byte offset={} with no associated \ + glyph", char_byte_span.end()); + let range = text.char_range_at(char_byte_span.end() as uint); + drop(range.ch); + char_byte_span.extend_to(range.next as int); + } + + // extend glyph range to max glyph index covered by char_span, + // in cases where one char made several glyphs and left some unassociated chars. + let mut max_glyph_idx = glyph_span.end(); + for i in char_byte_span.each_index() { + if byteToGlyph[i as uint] > NO_GLYPH { + max_glyph_idx = cmp::max(byteToGlyph[i as uint] as int + 1, max_glyph_idx); + } + } + + if max_glyph_idx > glyph_span.end() { + glyph_span.extend_to(max_glyph_idx); + debug!("Extended glyph span (off={}, len={}) to cover char byte span's max \ + glyph index", + glyph_span.begin(), glyph_span.length()); + } + + + // if there's just one glyph, then we don't need further checks. + if glyph_span.length() == 1 { break; } + + // if no glyphs were found yet, extend the char byte range more. + if glyph_span.length() == 0 { continue; } + + debug!("Complex (multi-glyph to multi-char) association found. This case \ + probably doesn't work."); + + let mut all_glyphs_are_within_cluster: bool = true; + for j in glyph_span.each_index() { + let loc = glyph_data.byte_offset_of_glyph(j); + if !char_byte_span.contains(loc) { + all_glyphs_are_within_cluster = false; + break + } + } + + debug!("All glyphs within char_byte_span cluster?: {}", + all_glyphs_are_within_cluster); + + // found a valid range; stop extending char_span. + if all_glyphs_are_within_cluster { + break + } + } + + // character/glyph clump must contain characters. + assert!(char_byte_span.length() > 0); + // character/glyph clump must contain glyphs. + assert!(glyph_span.length() > 0); + + // now char_span is a ligature clump, formed by the glyphs in glyph_span. + // we need to find the chars that correspond to actual glyphs (char_extended_span), + //and set glyph info for those and empty infos for the chars that are continuations. + + // a simple example: + // chars: 'f' 't' 't' + // glyphs: 'ftt' '' '' + // cgmap: t f f + // gspan: [-] + // cspan: [-] + // covsp: [---------------] + + let mut covered_byte_span = char_byte_span.clone(); + // extend, clipping at end of text range. + while covered_byte_span.end() < byte_max + && byteToGlyph[covered_byte_span.end() as uint] == NO_GLYPH { + let range = text.char_range_at(covered_byte_span.end() as uint); + drop(range.ch); + covered_byte_span.extend_to(range.next as int); + } + + if covered_byte_span.begin() >= byte_max { + // oops, out of range. clip and forget this clump. + let end = glyph_span.end(); // FIXME: borrow checker workaround + glyph_span.reset(end, 0); + let end = char_byte_span.end(); // FIXME: borrow checker workaround + char_byte_span.reset(end, 0); + } + + // clamp to end of text. (I don't think this will be necessary, but..) + let end = covered_byte_span.end(); // FIXME: borrow checker workaround + covered_byte_span.extend_to(cmp::min(end, byte_max)); + + // fast path: 1-to-1 mapping of single char and single glyph. + if glyph_span.length() == 1 { + // TODO(Issue #214): cluster ranges need to be computed before + // shaping, and then consulted here. + // for now, just pretend that every character is a cluster start. + // (i.e., pretend there are no combining character sequences). + // 1-to-1 mapping of character to glyph also treated as ligature start. + let shape = glyph_data.get_entry_for_glyph(glyph_span.begin(), &mut y_pos); + let data = GlyphData::new(shape.codepoint, + shape.advance, + shape.offset, + false, + true, + true); + glyphs.add_glyph_for_char_index(char_idx, &data); + } else { + // collect all glyphs to be assigned to the first character. + let mut datas = vec!(); + + for glyph_i in glyph_span.each_index() { + let shape = glyph_data.get_entry_for_glyph(glyph_i, &mut y_pos); + datas.push(GlyphData::new(shape.codepoint, + shape.advance, + shape.offset, + false, // not missing + true, // treat as cluster start + glyph_i > glyph_span.begin())); + // all but first are ligature continuations + } + + // now add the detailed glyph entry. + glyphs.add_glyphs_for_char_index(char_idx, datas.as_slice()); + + // set the other chars, who have no glyphs + let mut i = covered_byte_span.begin(); + loop { + let range = text.char_range_at(i as uint); + drop(range.ch); + i = range.next as int; + if i >= covered_byte_span.end() { break; } + char_idx = char_idx + CharIndex(1); + glyphs.add_nonglyph_for_char_index(char_idx, false, false); + } + } + + // shift up our working spans past things we just handled. + let end = glyph_span.end(); // FIXME: borrow checker workaround + glyph_span.reset(end, 0); + let end = char_byte_span.end();; // FIXME: borrow checker workaround + char_byte_span.reset(end, 0); + char_idx = char_idx + CharIndex(1); + } + + // this must be called after adding all glyph data; it sorts the + // lookup table for finding detailed glyphs by associated char index. + glyphs.finalize_changes(); + } +} + +/// Callbacks from Harfbuzz when font map and glyph advance lookup needed. +extern fn glyph_func(_: *mut hb_font_t, + font_data: *mut c_void, + unicode: hb_codepoint_t, + _: hb_codepoint_t, + glyph: *mut hb_codepoint_t, + _: *mut c_void) + -> hb_bool_t { + let font: *const Font = font_data as *const Font; + assert!(font.is_not_null()); + + unsafe { + match (*font).glyph_index(char::from_u32(unicode).unwrap()) { + Some(g) => { + *glyph = g as hb_codepoint_t; + true as hb_bool_t + } + None => false as hb_bool_t + } + } +} + +extern fn glyph_h_advance_func(_: *mut hb_font_t, + font_data: *mut c_void, + glyph: hb_codepoint_t, + _: *mut c_void) + -> hb_position_t { + let font: *mut Font = font_data as *mut Font; + assert!(font.is_not_null()); + + unsafe { + let advance = (*font).glyph_h_advance(glyph as GlyphId); + Shaper::float_to_fixed(advance) + } +} + +extern fn glyph_h_kerning_func(_: *mut hb_font_t, + font_data: *mut c_void, + first_glyph: hb_codepoint_t, + second_glyph: hb_codepoint_t, + _: *mut c_void) + -> hb_position_t { + let font: *mut Font = font_data as *mut Font; + assert!(font.is_not_null()); + + unsafe { + let advance = (*font).glyph_h_kerning(first_glyph as GlyphId, second_glyph as GlyphId); + Shaper::float_to_fixed(advance) + } +} + +// Callback to get a font table out of a font. +extern fn get_font_table_func(_: *mut hb_face_t, tag: hb_tag_t, user_data: *mut c_void) -> *mut hb_blob_t { + unsafe { + let font: *const Font = user_data as *const Font; + assert!(font.is_not_null()); + + // TODO(Issue #197): reuse font table data, which will change the unsound trickery here. + match (*font).get_table_for_tag(tag as FontTableTag) { + None => ptr::mut_null(), + Some(ref font_table) => { + let skinny_font_table_ptr: *const FontTable = font_table; // private context + + let mut blob: *mut hb_blob_t = ptr::mut_null(); + (*skinny_font_table_ptr).with_buffer(|buf: *const u8, len: uint| { + // HarfBuzz calls `destroy_blob_func` when the buffer is no longer needed. + blob = hb_blob_create(buf as *const c_char, + len as c_uint, + HB_MEMORY_MODE_READONLY, + mem::transmute(skinny_font_table_ptr), + destroy_blob_func); + }); + + assert!(blob.is_not_null()); + blob + } + } + } +} + +// TODO(Issue #197): reuse font table data, which will change the unsound trickery here. +// In particular, we'll need to cast to a boxed, rather than owned, FontTable. + +// even better, should cache the harfbuzz blobs directly instead of recreating a lot. +extern fn destroy_blob_func(_: *mut c_void) { + // TODO: Previous code here was broken. Rewrite. +} diff --git a/components/gfx/text/shaping/mod.rs b/components/gfx/text/shaping/mod.rs new file mode 100644 index 00000000000..ef4bc2088f0 --- /dev/null +++ b/components/gfx/text/shaping/mod.rs @@ -0,0 +1,19 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +//! Shaper encapsulates a specific shaper, such as Harfbuzz, +//! Uniscribe, Pango, or Coretext. +//! +//! Currently, only harfbuzz bindings are implemented. + +use text::glyph::GlyphStore; + +pub use Shaper = text::shaping::harfbuzz::Shaper; + +pub mod harfbuzz; + +pub trait ShaperMethods { + fn shape_text(&self, text: &str, glyphs: &mut GlyphStore); +} + diff --git a/components/gfx/text/text_run.rs b/components/gfx/text/text_run.rs new file mode 100644 index 00000000000..70c10f1c64c --- /dev/null +++ b/components/gfx/text/text_run.rs @@ -0,0 +1,271 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use font::{Font, RunMetrics, FontMetrics}; +use servo_util::geometry::Au; +use servo_util::range::Range; +use servo_util::vec::{Comparator, FullBinarySearchMethods}; +use std::slice::Items; +use sync::Arc; +use text::glyph::{CharIndex, GlyphStore}; +use font::FontHandleMethods; +use platform::font_template::FontTemplateData; + +/// A single "paragraph" of text in one font size and style. +#[deriving(Clone)] +pub struct TextRun { + pub text: Arc<String>, + pub font_template: Arc<FontTemplateData>, + pub pt_size: f64, + pub font_metrics: FontMetrics, + /// The glyph runs that make up this text run. + pub glyphs: Arc<Vec<GlyphRun>>, +} + +/// A single series of glyphs within a text run. +#[deriving(Clone)] +pub struct GlyphRun { + /// The glyphs. + glyph_store: Arc<GlyphStore>, + /// The range of characters in the containing run. + range: Range<CharIndex>, +} + +pub struct SliceIterator<'a> { + glyph_iter: Items<'a, GlyphRun>, + range: Range<CharIndex>, +} + +struct CharIndexComparator; + +impl Comparator<CharIndex,GlyphRun> for CharIndexComparator { + fn compare(&self, key: &CharIndex, value: &GlyphRun) -> Ordering { + if *key < value.range.begin() { + Less + } else if *key >= value.range.end() { + Greater + } else { + Equal + } + } +} + +impl<'a> Iterator<(&'a GlyphStore, CharIndex, Range<CharIndex>)> for SliceIterator<'a> { + // inline(always) due to the inefficient rt failures messing up inline heuristics, I think. + #[inline(always)] + fn next(&mut self) -> Option<(&'a GlyphStore, CharIndex, Range<CharIndex>)> { + let slice_glyphs = self.glyph_iter.next(); + if slice_glyphs.is_none() { + return None; + } + let slice_glyphs = slice_glyphs.unwrap(); + + let mut char_range = self.range.intersect(&slice_glyphs.range); + let slice_range_begin = slice_glyphs.range.begin(); + char_range.shift_by(-slice_range_begin); + if !char_range.is_empty() { + return Some((&*slice_glyphs.glyph_store, slice_range_begin, char_range)) + } + + return None; + } +} + +pub struct LineIterator<'a> { + range: Range<CharIndex>, + clump: Option<Range<CharIndex>>, + slices: SliceIterator<'a>, +} + +impl<'a> Iterator<Range<CharIndex>> for LineIterator<'a> { + fn next(&mut self) -> Option<Range<CharIndex>> { + // Loop until we hit whitespace and are in a clump. + loop { + match self.slices.next() { + Some((glyphs, offset, slice_range)) => { + match (glyphs.is_whitespace(), self.clump) { + (false, Some(ref mut c)) => { + c.extend_by(slice_range.length()); + } + (false, None) => { + let mut c = slice_range; + c.shift_by(offset); + self.clump = Some(c); + } + (true, None) => { /* chomp whitespace */ } + (true, Some(c)) => { + self.clump = None; + // The final whitespace clump is not included. + return Some(c); + } + } + }, + None => { + // flush any remaining chars as a line + if self.clump.is_some() { + let mut c = self.clump.take_unwrap(); + c.extend_to(self.range.end()); + return Some(c); + } else { + return None; + } + } + } + } + } +} + +impl<'a> TextRun { + pub fn new(font: &mut Font, text: String) -> TextRun { + let glyphs = TextRun::break_and_shape(font, text.as_slice()); + let run = TextRun { + text: Arc::new(text), + font_metrics: font.metrics.clone(), + font_template: font.handle.get_template(), + pt_size: font.pt_size, + glyphs: Arc::new(glyphs), + }; + return run; + } + + pub fn break_and_shape(font: &mut Font, text: &str) -> Vec<GlyphRun> { + // TODO(Issue #230): do a better job. See Gecko's LineBreaker. + let mut glyphs = vec!(); + let (mut byte_i, mut char_i) = (0u, CharIndex(0)); + let mut cur_slice_is_whitespace = false; + let (mut byte_last_boundary, mut char_last_boundary) = (0, CharIndex(0)); + while byte_i < text.len() { + let range = text.char_range_at(byte_i); + let ch = range.ch; + let next = range.next; + + // Slices alternate between whitespace and non-whitespace, + // representing line break opportunities. + let can_break_before = if cur_slice_is_whitespace { + match ch { + ' ' | '\t' | '\n' => false, + _ => { + cur_slice_is_whitespace = false; + true + } + } + } else { + match ch { + ' ' | '\t' | '\n' => { + cur_slice_is_whitespace = true; + true + }, + _ => false + } + }; + + // Create a glyph store for this slice if it's nonempty. + if can_break_before && byte_i > byte_last_boundary { + let slice = text.slice(byte_last_boundary, byte_i).to_string(); + debug!("creating glyph store for slice {} (ws? {}), {} - {} in run {}", + slice, !cur_slice_is_whitespace, byte_last_boundary, byte_i, text); + glyphs.push(GlyphRun { + glyph_store: font.shape_text(slice, !cur_slice_is_whitespace), + range: Range::new(char_last_boundary, char_i - char_last_boundary), + }); + byte_last_boundary = byte_i; + char_last_boundary = char_i; + } + + byte_i = next; + char_i = char_i + CharIndex(1); + } + + // Create a glyph store for the final slice if it's nonempty. + if byte_i > byte_last_boundary { + let slice = text.slice_from(byte_last_boundary).to_string(); + debug!("creating glyph store for final slice {} (ws? {}), {} - {} in run {}", + slice, cur_slice_is_whitespace, byte_last_boundary, text.len(), text); + glyphs.push(GlyphRun { + glyph_store: font.shape_text(slice, cur_slice_is_whitespace), + range: Range::new(char_last_boundary, char_i - char_last_boundary), + }); + } + + glyphs + } + + pub fn char_len(&self) -> CharIndex { + match self.glyphs.last() { + None => CharIndex(0), + Some(ref glyph_run) => glyph_run.range.end(), + } + } + + pub fn glyphs(&'a self) -> &'a Vec<GlyphRun> { + &*self.glyphs + } + + pub fn range_is_trimmable_whitespace(&self, range: &Range<CharIndex>) -> bool { + self.iter_slices_for_range(range).all(|(slice_glyphs, _, _)| { + slice_glyphs.is_whitespace() + }) + } + + pub fn ascent(&self) -> Au { + self.font_metrics.ascent + } + + pub fn descent(&self) -> Au { + self.font_metrics.descent + } + + pub fn advance_for_range(&self, range: &Range<CharIndex>) -> Au { + // TODO(Issue #199): alter advance direction for RTL + // TODO(Issue #98): using inter-char and inter-word spacing settings when measuring text + self.iter_slices_for_range(range) + .fold(Au(0), |advance, (glyphs, _, slice_range)| { + advance + glyphs.advance_for_char_range(&slice_range) + }) + } + + pub fn metrics_for_range(&self, range: &Range<CharIndex>) -> RunMetrics { + RunMetrics::new(self.advance_for_range(range), + self.font_metrics.ascent, + self.font_metrics.descent) + } + + pub fn metrics_for_slice(&self, glyphs: &GlyphStore, slice_range: &Range<CharIndex>) -> RunMetrics { + RunMetrics::new(glyphs.advance_for_char_range(slice_range), + self.font_metrics.ascent, + self.font_metrics.descent) + } + + pub fn min_width_for_range(&self, range: &Range<CharIndex>) -> Au { + debug!("iterating outer range {:?}", range); + self.iter_slices_for_range(range).fold(Au(0), |max_piece_width, (_, offset, slice_range)| { + debug!("iterated on {:?}[{:?}]", offset, slice_range); + Au::max(max_piece_width, self.advance_for_range(&slice_range)) + }) + } + + /// Returns the index of the first glyph run containing the given character index. + fn index_of_first_glyph_run_containing(&self, index: CharIndex) -> Option<uint> { + self.glyphs.as_slice().binary_search_index_by(&index, CharIndexComparator) + } + + pub fn iter_slices_for_range(&'a self, range: &Range<CharIndex>) -> SliceIterator<'a> { + let index = match self.index_of_first_glyph_run_containing(range.begin()) { + None => self.glyphs.len(), + Some(index) => index, + }; + SliceIterator { + glyph_iter: self.glyphs.slice_from(index).iter(), + range: *range, + } + } + + pub fn iter_natural_lines_for_range(&'a self, range: &Range<CharIndex>) -> LineIterator<'a> { + LineIterator { + range: *range, + clump: None, + slices: self.iter_slices_for_range(range), + } + } +} diff --git a/components/gfx/text/util.rs b/components/gfx/text/util.rs new file mode 100644 index 00000000000..c5059bbff10 --- /dev/null +++ b/components/gfx/text/util.rs @@ -0,0 +1,285 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use text::glyph::CharIndex; + +#[deriving(PartialEq)] +pub enum CompressionMode { + CompressNone, + CompressWhitespace, + CompressWhitespaceNewline, + DiscardNewline +} + +// ported from Gecko's nsTextFrameUtils::TransformText. +// +// High level TODOs: +// +// * Issue #113: consider incoming text state (arabic, etc) +// and propogate outgoing text state (dual of above) +// +// * Issue #114: record skipped and kept chars for mapping original to new text +// +// * Untracked: various edge cases for bidi, CJK, etc. +pub fn transform_text(text: &str, mode: CompressionMode, + incoming_whitespace: bool, + new_line_pos: &mut Vec<CharIndex>) -> (String, bool) { + let mut out_str = String::new(); + let out_whitespace = match mode { + CompressNone | DiscardNewline => { + let mut new_line_index = CharIndex(0); + for ch in text.chars() { + if is_discardable_char(ch, mode) { + // TODO: record skipped char + } else { + // TODO: record kept char + if ch == '\t' { + // TODO: set "has tab" flag + } else if ch == '\n' { + // Save new-line's position for line-break + // This value is relative(not absolute) + new_line_pos.push(new_line_index); + new_line_index = CharIndex(0); + } + + if ch != '\n' { + new_line_index = new_line_index + CharIndex(1); + } + out_str.push_char(ch); + } + } + text.len() > 0 && is_in_whitespace(text.char_at_reverse(0), mode) + }, + + CompressWhitespace | CompressWhitespaceNewline => { + let mut in_whitespace: bool = incoming_whitespace; + for ch in text.chars() { + // TODO: discard newlines between CJK chars + let mut next_in_whitespace: bool = is_in_whitespace(ch, mode); + + if !next_in_whitespace { + if is_always_discardable_char(ch) { + // revert whitespace setting, since this char was discarded + next_in_whitespace = in_whitespace; + // TODO: record skipped char + } else { + // TODO: record kept char + out_str.push_char(ch); + } + } else { /* next_in_whitespace; possibly add a space char */ + if in_whitespace { + // TODO: record skipped char + } else { + // TODO: record kept char + out_str.push_char(' '); + } + } + // save whitespace context for next char + in_whitespace = next_in_whitespace; + } /* /for str::each_char */ + in_whitespace + } + }; + + return (out_str.into_string(), out_whitespace); + + fn is_in_whitespace(ch: char, mode: CompressionMode) -> bool { + match (ch, mode) { + (' ', _) => true, + ('\t', _) => true, + ('\n', CompressWhitespaceNewline) => true, + (_, _) => false + } + } + + fn is_discardable_char(ch: char, mode: CompressionMode) -> bool { + if is_always_discardable_char(ch) { + return true; + } + match mode { + DiscardNewline | CompressWhitespaceNewline => ch == '\n', + _ => false + } + } + + fn is_always_discardable_char(_ch: char) -> bool { + // TODO: check for bidi control chars, soft hyphens. + false + } +} + +pub fn float_to_fixed(before: int, f: f64) -> i32 { + (1i32 << before as uint) * (f as i32) +} + +pub fn fixed_to_float(before: int, f: i32) -> f64 { + f as f64 * 1.0f64 / ((1i32 << before as uint) as f64) +} + +pub fn fixed_to_rounded_int(before: int, f: i32) -> int { + let half = 1i32 << (before-1) as uint; + if f > 0i32 { + ((half + f) >> before as uint) as int + } else { + -((half - f) >> before as uint) as int + } +} + +/* Generate a 32-bit TrueType tag from its 4 characters */ +pub fn true_type_tag(a: char, b: char, c: char, d: char) -> u32 { + let a = a as u32; + let b = b as u32; + let c = c as u32; + let d = d as u32; + (a << 24 | b << 16 | c << 8 | d) as u32 +} + +#[test] +fn test_true_type_tag() { + assert_eq!(true_type_tag('c', 'm', 'a', 'p'), 0x_63_6D_61_70_u32); +} + +#[test] +fn test_transform_compress_none() { + let test_strs = vec!( + " foo bar", + "foo bar ", + "foo\n bar", + "foo \nbar", + " foo bar \nbaz", + "foo bar baz", + "foobarbaz\n\n" + ); + let mode = CompressNone; + + for test in test_strs.iter() { + let mut new_line_pos = vec!(); + let (trimmed_str, _out) = transform_text(*test, mode, true, &mut new_line_pos); + assert_eq!(trimmed_str.as_slice(), *test) + } +} + +#[test] +fn test_transform_discard_newline() { + let test_strs = vec!( + " foo bar", + "foo bar ", + "foo\n bar", + "foo \nbar", + " foo bar \nbaz", + "foo bar baz", + "foobarbaz\n\n" + ); + + let oracle_strs = vec!( + " foo bar", + "foo bar ", + "foo bar", + "foo bar", + " foo bar baz", + "foo bar baz", + "foobarbaz" + ); + + assert_eq!(test_strs.len(), oracle_strs.len()); + let mode = DiscardNewline; + + for (test, oracle) in test_strs.iter().zip(oracle_strs.iter()) { + let mut new_line_pos = vec!(); + let (trimmed_str, _out) = transform_text(*test, mode, true, &mut new_line_pos); + assert_eq!(trimmed_str.as_slice(), *oracle) + } +} + +/* FIXME: Fix and re-enable +#[test] +fn test_transform_compress_whitespace() { + let test_strs : ~[String] = ~[" foo bar".to_string(), + "foo bar ".to_string(), + "foo\n bar".to_string(), + "foo \nbar".to_string(), + " foo bar \nbaz".to_string(), + "foo bar baz".to_string(), + "foobarbaz\n\n".to_string()]; + + let oracle_strs : ~[String] = ~[" foo bar".to_string(), + "foo bar ".to_string(), + "foo\n bar".to_string(), + "foo \nbar".to_string(), + " foo bar \nbaz".to_string(), + "foo bar baz".to_string(), + "foobarbaz\n\n".to_string()]; + + assert_eq!(test_strs.len(), oracle_strs.len()); + let mode = CompressWhitespace; + + for i in range(0, test_strs.len()) { + let mut new_line_pos = ~[]; + let (trimmed_str, _out) = transform_text(test_strs[i], mode, true, &mut new_line_pos); + assert_eq!(&trimmed_str, &oracle_strs[i]) + } +} + +#[test] +fn test_transform_compress_whitespace_newline() { + let test_strs : ~[String] = ~[" foo bar".to_string(), + "foo bar ".to_string(), + "foo\n bar".to_string(), + "foo \nbar".to_string(), + " foo bar \nbaz".to_string(), + "foo bar baz".to_string(), + "foobarbaz\n\n".to_string()]; + + let oracle_strs : ~[String] = ~["foo bar".to_string(), + "foo bar ".to_string(), + "foo bar".to_string(), + "foo bar".to_string(), + " foo bar baz".to_string(), + "foo bar baz".to_string(), + "foobarbaz ".to_string()]; + + assert_eq!(test_strs.len(), oracle_strs.len()); + let mode = CompressWhitespaceNewline; + + for i in range(0, test_strs.len()) { + let mut new_line_pos = ~[]; + let (trimmed_str, _out) = transform_text(test_strs[i], mode, true, &mut new_line_pos); + assert_eq!(&trimmed_str, &oracle_strs[i]) + } +} +*/ + +#[test] +fn test_transform_compress_whitespace_newline_no_incoming() { + let test_strs = vec!( + " foo bar", + "\nfoo bar", + "foo bar ", + "foo\n bar", + "foo \nbar", + " foo bar \nbaz", + "foo bar baz", + "foobarbaz\n\n" + ); + + let oracle_strs = vec!( + " foo bar", + " foo bar", + "foo bar ", + "foo bar", + "foo bar", + " foo bar baz", + "foo bar baz", + "foobarbaz " + ); + + assert_eq!(test_strs.len(), oracle_strs.len()); + let mode = CompressWhitespaceNewline; + + for (test, oracle) in test_strs.iter().zip(oracle_strs.iter()) { + let mut new_line_pos = vec!(); + let (trimmed_str, _out) = transform_text(*test, mode, false, &mut new_line_pos); + assert_eq!(trimmed_str.as_slice(), *oracle) + } +} |