aboutsummaryrefslogtreecommitdiffstats
path: root/components/gfx/text
diff options
context:
space:
mode:
Diffstat (limited to 'components/gfx/text')
-rw-r--r--components/gfx/text/glyph.rs752
-rw-r--r--components/gfx/text/mod.rs18
-rw-r--r--components/gfx/text/shaping/harfbuzz.rs541
-rw-r--r--components/gfx/text/shaping/mod.rs19
-rw-r--r--components/gfx/text/text_run.rs271
-rw-r--r--components/gfx/text/util.rs285
6 files changed, 1886 insertions, 0 deletions
diff --git a/components/gfx/text/glyph.rs b/components/gfx/text/glyph.rs
new file mode 100644
index 00000000000..2ea2d7c5d2e
--- /dev/null
+++ b/components/gfx/text/glyph.rs
@@ -0,0 +1,752 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use servo_util::vec::*;
+use servo_util::range;
+use servo_util::range::{Range, RangeIndex, IntRangeIndex, EachIndex};
+use servo_util::geometry::Au;
+
+use std::cmp::{PartialOrd, PartialEq};
+use std::num::{NumCast, Zero};
+use std::mem;
+use std::u16;
+use std::vec::Vec;
+use geom::point::Point2D;
+
+/// GlyphEntry is a port of Gecko's CompressedGlyph scheme for storing glyph data compactly.
+///
+/// In the common case (reasonable glyph advances, no offsets from the font em-box, and one glyph
+/// per character), we pack glyph advance, glyph id, and some flags into a single u32.
+///
+/// In the uncommon case (multiple glyphs per unicode character, large glyph index/advance, or
+/// glyph offsets), we pack the glyph count into GlyphEntry, and store the other glyph information
+/// in DetailedGlyphStore.
+#[deriving(Clone)]
+struct GlyphEntry {
+ value: u32,
+}
+
+impl GlyphEntry {
+ fn new(value: u32) -> GlyphEntry {
+ GlyphEntry {
+ value: value,
+ }
+ }
+
+ fn initial() -> GlyphEntry {
+ GlyphEntry::new(0)
+ }
+
+ // Creates a GlyphEntry for the common case
+ fn simple(id: GlyphId, advance: Au) -> GlyphEntry {
+ assert!(is_simple_glyph_id(id));
+ assert!(is_simple_advance(advance));
+
+ let id_mask = id as u32;
+ let Au(advance) = advance;
+ let advance_mask = (advance as u32) << GLYPH_ADVANCE_SHIFT as uint;
+
+ GlyphEntry::new(id_mask | advance_mask | FLAG_IS_SIMPLE_GLYPH)
+ }
+
+ // Create a GlyphEntry for uncommon case; should be accompanied by
+ // initialization of the actual DetailedGlyph data in DetailedGlyphStore
+ fn complex(starts_cluster: bool, starts_ligature: bool, glyph_count: int) -> GlyphEntry {
+ assert!(glyph_count <= u16::MAX as int);
+
+ debug!("creating complex glyph entry: starts_cluster={}, starts_ligature={}, \
+ glyph_count={}",
+ starts_cluster,
+ starts_ligature,
+ glyph_count);
+
+ let mut val = FLAG_NOT_MISSING;
+
+ if !starts_cluster {
+ val |= FLAG_NOT_CLUSTER_START;
+ }
+ if !starts_ligature {
+ val |= FLAG_NOT_LIGATURE_GROUP_START;
+ }
+ val |= (glyph_count as u32) << GLYPH_COUNT_SHIFT as uint;
+
+ GlyphEntry::new(val)
+ }
+
+ /// Create a GlyphEntry for the case where glyphs couldn't be found for the specified
+ /// character.
+ fn missing(glyph_count: int) -> GlyphEntry {
+ assert!(glyph_count <= u16::MAX as int);
+
+ GlyphEntry::new((glyph_count as u32) << GLYPH_COUNT_SHIFT as uint)
+ }
+}
+
+/// The id of a particular glyph within a font
+pub type GlyphId = u32;
+
+// TODO: unify with bit flags?
+#[deriving(PartialEq)]
+pub enum BreakType {
+ BreakTypeNone,
+ BreakTypeNormal,
+ BreakTypeHyphen,
+}
+
+static BREAK_TYPE_NONE: u8 = 0x0;
+static BREAK_TYPE_NORMAL: u8 = 0x1;
+static BREAK_TYPE_HYPHEN: u8 = 0x2;
+
+fn break_flag_to_enum(flag: u8) -> BreakType {
+ if (flag & BREAK_TYPE_NORMAL) != 0 {
+ BreakTypeNormal
+ } else if (flag & BREAK_TYPE_HYPHEN) != 0 {
+ BreakTypeHyphen
+ } else {
+ BreakTypeNone
+ }
+}
+
+fn break_enum_to_flag(e: BreakType) -> u8 {
+ match e {
+ BreakTypeNone => BREAK_TYPE_NONE,
+ BreakTypeNormal => BREAK_TYPE_NORMAL,
+ BreakTypeHyphen => BREAK_TYPE_HYPHEN,
+ }
+}
+
+// TODO: make this more type-safe.
+
+static FLAG_CHAR_IS_SPACE: u32 = 0x10000000;
+// These two bits store some BREAK_TYPE_* flags
+static FLAG_CAN_BREAK_MASK: u32 = 0x60000000;
+static FLAG_CAN_BREAK_SHIFT: u32 = 29;
+static FLAG_IS_SIMPLE_GLYPH: u32 = 0x80000000;
+
+// glyph advance; in Au's.
+static GLYPH_ADVANCE_MASK: u32 = 0x0FFF0000;
+static GLYPH_ADVANCE_SHIFT: u32 = 16;
+static GLYPH_ID_MASK: u32 = 0x0000FFFF;
+
+// Non-simple glyphs (more than one glyph per char; missing glyph,
+// newline, tab, large advance, or nonzero x/y offsets) may have one
+// or more detailed glyphs associated with them. They are stored in a
+// side array so that there is a 1:1 mapping of GlyphEntry to
+// unicode char.
+
+// The number of detailed glyphs for this char. If the char couldn't
+// be mapped to a glyph (!FLAG_NOT_MISSING), then this actually holds
+// the UTF8 code point instead.
+static GLYPH_COUNT_MASK: u32 = 0x00FFFF00;
+static GLYPH_COUNT_SHIFT: u32 = 8;
+// N.B. following Gecko, these are all inverted so that a lot of
+// missing chars can be memset with zeros in one fell swoop.
+static FLAG_NOT_MISSING: u32 = 0x00000001;
+static FLAG_NOT_CLUSTER_START: u32 = 0x00000002;
+static FLAG_NOT_LIGATURE_GROUP_START: u32 = 0x00000004;
+
+static FLAG_CHAR_IS_TAB: u32 = 0x00000008;
+static FLAG_CHAR_IS_NEWLINE: u32 = 0x00000010;
+//static FLAG_CHAR_IS_LOW_SURROGATE: u32 = 0x00000020;
+//static CHAR_IDENTITY_FLAGS_MASK: u32 = 0x00000038;
+
+fn is_simple_glyph_id(id: GlyphId) -> bool {
+ ((id as u32) & GLYPH_ID_MASK) == id
+}
+
+fn is_simple_advance(advance: Au) -> bool {
+ let unsignedAu = advance.to_u32().unwrap();
+ (unsignedAu & (GLYPH_ADVANCE_MASK >> GLYPH_ADVANCE_SHIFT as uint)) == unsignedAu
+}
+
+type DetailedGlyphCount = u16;
+
+// Getters and setters for GlyphEntry. Setter methods are functional,
+// because GlyphEntry is immutable and only a u32 in size.
+impl GlyphEntry {
+ // getter methods
+ #[inline(always)]
+ fn advance(&self) -> Au {
+ NumCast::from((self.value & GLYPH_ADVANCE_MASK) >> GLYPH_ADVANCE_SHIFT as uint).unwrap()
+ }
+
+ fn id(&self) -> GlyphId {
+ self.value & GLYPH_ID_MASK
+ }
+
+ fn is_ligature_start(&self) -> bool {
+ self.has_flag(!FLAG_NOT_LIGATURE_GROUP_START)
+ }
+
+ fn is_cluster_start(&self) -> bool {
+ self.has_flag(!FLAG_NOT_CLUSTER_START)
+ }
+
+ // True if original char was normal (U+0020) space. Other chars may
+ // map to space glyph, but this does not account for them.
+ fn char_is_space(&self) -> bool {
+ self.has_flag(FLAG_CHAR_IS_SPACE)
+ }
+
+ fn char_is_tab(&self) -> bool {
+ !self.is_simple() && self.has_flag(FLAG_CHAR_IS_TAB)
+ }
+
+ fn char_is_newline(&self) -> bool {
+ !self.is_simple() && self.has_flag(FLAG_CHAR_IS_NEWLINE)
+ }
+
+ fn can_break_before(&self) -> BreakType {
+ let flag = ((self.value & FLAG_CAN_BREAK_MASK) >> FLAG_CAN_BREAK_SHIFT as uint) as u8;
+ break_flag_to_enum(flag)
+ }
+
+ // setter methods
+ #[inline(always)]
+ fn set_char_is_space(&self) -> GlyphEntry {
+ GlyphEntry::new(self.value | FLAG_CHAR_IS_SPACE)
+ }
+
+ #[inline(always)]
+ fn set_char_is_tab(&self) -> GlyphEntry {
+ assert!(!self.is_simple());
+ GlyphEntry::new(self.value | FLAG_CHAR_IS_TAB)
+ }
+
+ #[inline(always)]
+ fn set_char_is_newline(&self) -> GlyphEntry {
+ assert!(!self.is_simple());
+ GlyphEntry::new(self.value | FLAG_CHAR_IS_NEWLINE)
+ }
+
+ #[inline(always)]
+ fn set_can_break_before(&self, e: BreakType) -> GlyphEntry {
+ let flag = (break_enum_to_flag(e) as u32) << FLAG_CAN_BREAK_SHIFT as uint;
+ GlyphEntry::new(self.value | flag)
+ }
+
+ // helper methods
+
+ fn glyph_count(&self) -> u16 {
+ assert!(!self.is_simple());
+ ((self.value & GLYPH_COUNT_MASK) >> GLYPH_COUNT_SHIFT as uint) as u16
+ }
+
+ #[inline(always)]
+ fn is_simple(&self) -> bool {
+ self.has_flag(FLAG_IS_SIMPLE_GLYPH)
+ }
+
+ #[inline(always)]
+ fn has_flag(&self, flag: u32) -> bool {
+ (self.value & flag) != 0
+ }
+
+ #[inline(always)]
+ fn adapt_character_flags_of_entry(&self, other: GlyphEntry) -> GlyphEntry {
+ GlyphEntry { value: self.value | other.value }
+ }
+}
+
+// Stores data for a detailed glyph, in the case that several glyphs
+// correspond to one character, or the glyph's data couldn't be packed.
+#[deriving(Clone)]
+struct DetailedGlyph {
+ id: GlyphId,
+ // glyph's advance, in the text's direction (RTL or RTL)
+ advance: Au,
+ // glyph's offset from the font's em-box (from top-left)
+ offset: Point2D<Au>,
+}
+
+impl DetailedGlyph {
+ fn new(id: GlyphId, advance: Au, offset: Point2D<Au>) -> DetailedGlyph {
+ DetailedGlyph {
+ id: id,
+ advance: advance,
+ offset: offset,
+ }
+ }
+}
+
+#[deriving(PartialEq, Clone, Eq)]
+struct DetailedGlyphRecord {
+ // source string offset/GlyphEntry offset in the TextRun
+ entry_offset: CharIndex,
+ // offset into the detailed glyphs buffer
+ detail_offset: int,
+}
+
+impl PartialOrd for DetailedGlyphRecord {
+ fn partial_cmp(&self, other: &DetailedGlyphRecord) -> Option<Ordering> {
+ self.entry_offset.partial_cmp(&other.entry_offset)
+ }
+}
+
+impl Ord for DetailedGlyphRecord {
+ fn cmp(&self, other: &DetailedGlyphRecord) -> Ordering {
+ self.entry_offset.cmp(&other.entry_offset)
+ }
+}
+
+// Manages the lookup table for detailed glyphs. Sorting is deferred
+// until a lookup is actually performed; this matches the expected
+// usage pattern of setting/appending all the detailed glyphs, and
+// then querying without setting.
+struct DetailedGlyphStore {
+ // TODO(pcwalton): Allocation of this buffer is expensive. Consider a small-vector
+ // optimization.
+ detail_buffer: Vec<DetailedGlyph>,
+ // TODO(pcwalton): Allocation of this buffer is expensive. Consider a small-vector
+ // optimization.
+ detail_lookup: Vec<DetailedGlyphRecord>,
+ lookup_is_sorted: bool,
+}
+
+impl<'a> DetailedGlyphStore {
+ fn new() -> DetailedGlyphStore {
+ DetailedGlyphStore {
+ detail_buffer: vec!(), // TODO: default size?
+ detail_lookup: vec!(),
+ lookup_is_sorted: false,
+ }
+ }
+
+ fn add_detailed_glyphs_for_entry(&mut self, entry_offset: CharIndex, glyphs: &[DetailedGlyph]) {
+ let entry = DetailedGlyphRecord {
+ entry_offset: entry_offset,
+ detail_offset: self.detail_buffer.len() as int,
+ };
+
+ debug!("Adding entry[off={}] for detailed glyphs: {:?}", entry_offset, glyphs);
+
+ /* TODO: don't actually assert this until asserts are compiled
+ in/out based on severity, debug/release, etc. This assertion
+ would wreck the complexity of the lookup.
+
+ See Rust Issue #3647, #2228, #3627 for related information.
+
+ do self.detail_lookup.borrow |arr| {
+ assert !arr.contains(entry)
+ }
+ */
+
+ self.detail_lookup.push(entry);
+ self.detail_buffer.push_all(glyphs);
+ self.lookup_is_sorted = false;
+ }
+
+ fn get_detailed_glyphs_for_entry(&'a self, entry_offset: CharIndex, count: u16)
+ -> &'a [DetailedGlyph] {
+ debug!("Requesting detailed glyphs[n={}] for entry[off={}]", count, entry_offset);
+
+ // FIXME: Is this right? --pcwalton
+ // TODO: should fix this somewhere else
+ if count == 0 {
+ return self.detail_buffer.slice(0, 0);
+ }
+
+ assert!((count as uint) <= self.detail_buffer.len());
+ assert!(self.lookup_is_sorted);
+
+ let key = DetailedGlyphRecord {
+ entry_offset: entry_offset,
+ detail_offset: 0, // unused
+ };
+
+ let i = self.detail_lookup.as_slice().binary_search_index(&key)
+ .expect("Invalid index not found in detailed glyph lookup table!");
+
+ assert!(i + (count as uint) <= self.detail_buffer.len());
+ // return a slice into the buffer
+ self.detail_buffer.slice(i, i + count as uint)
+ }
+
+ fn get_detailed_glyph_with_index(&'a self,
+ entry_offset: CharIndex,
+ detail_offset: u16)
+ -> &'a DetailedGlyph {
+ assert!((detail_offset as uint) <= self.detail_buffer.len());
+ assert!(self.lookup_is_sorted);
+
+ let key = DetailedGlyphRecord {
+ entry_offset: entry_offset,
+ detail_offset: 0, // unused
+ };
+
+ let i = self.detail_lookup.as_slice().binary_search_index(&key)
+ .expect("Invalid index not found in detailed glyph lookup table!");
+
+ assert!(i + (detail_offset as uint) < self.detail_buffer.len());
+ &self.detail_buffer[i + (detail_offset as uint)]
+ }
+
+ fn ensure_sorted(&mut self) {
+ if self.lookup_is_sorted {
+ return;
+ }
+
+ // Sorting a unique vector is surprisingly hard. The follwing
+ // code is a good argument for using DVecs, but they require
+ // immutable locations thus don't play well with freezing.
+
+ // Thar be dragons here. You have been warned. (Tips accepted.)
+ let mut unsorted_records: Vec<DetailedGlyphRecord> = vec!();
+ mem::swap(&mut self.detail_lookup, &mut unsorted_records);
+ let mut mut_records : Vec<DetailedGlyphRecord> = unsorted_records;
+ mut_records.sort_by(|a, b| {
+ if a < b {
+ Less
+ } else {
+ Greater
+ }
+ });
+ let mut sorted_records = mut_records;
+ mem::swap(&mut self.detail_lookup, &mut sorted_records);
+
+ self.lookup_is_sorted = true;
+ }
+}
+
+// This struct is used by GlyphStore clients to provide new glyph data.
+// It should be allocated on the stack and passed by reference to GlyphStore.
+pub struct GlyphData {
+ id: GlyphId,
+ advance: Au,
+ offset: Point2D<Au>,
+ is_missing: bool,
+ cluster_start: bool,
+ ligature_start: bool,
+}
+
+impl GlyphData {
+ pub fn new(id: GlyphId,
+ advance: Au,
+ offset: Option<Point2D<Au>>,
+ is_missing: bool,
+ cluster_start: bool,
+ ligature_start: bool)
+ -> GlyphData {
+ GlyphData {
+ id: id,
+ advance: advance,
+ offset: offset.unwrap_or(Zero::zero()),
+ is_missing: is_missing,
+ cluster_start: cluster_start,
+ ligature_start: ligature_start,
+ }
+ }
+}
+
+// This enum is a proxy that's provided to GlyphStore clients when iterating
+// through glyphs (either for a particular TextRun offset, or all glyphs).
+// Rather than eagerly assembling and copying glyph data, it only retrieves
+// values as they are needed from the GlyphStore, using provided offsets.
+pub enum GlyphInfo<'a> {
+ SimpleGlyphInfo(&'a GlyphStore, CharIndex),
+ DetailGlyphInfo(&'a GlyphStore, CharIndex, u16),
+}
+
+impl<'a> GlyphInfo<'a> {
+ pub fn id(self) -> GlyphId {
+ match self {
+ SimpleGlyphInfo(store, entry_i) => store.entry_buffer[entry_i.to_uint()].id(),
+ DetailGlyphInfo(store, entry_i, detail_j) => {
+ store.detail_store.get_detailed_glyph_with_index(entry_i, detail_j).id
+ }
+ }
+ }
+
+ #[inline(always)]
+ // FIXME: Resolution conflicts with IteratorUtil trait so adding trailing _
+ pub fn advance(self) -> Au {
+ match self {
+ SimpleGlyphInfo(store, entry_i) => store.entry_buffer[entry_i.to_uint()].advance(),
+ DetailGlyphInfo(store, entry_i, detail_j) => {
+ store.detail_store.get_detailed_glyph_with_index(entry_i, detail_j).advance
+ }
+ }
+ }
+
+ pub fn offset(self) -> Option<Point2D<Au>> {
+ match self {
+ SimpleGlyphInfo(_, _) => None,
+ DetailGlyphInfo(store, entry_i, detail_j) => {
+ Some(store.detail_store.get_detailed_glyph_with_index(entry_i, detail_j).offset)
+ }
+ }
+ }
+}
+
+/// Stores the glyph data belonging to a text run.
+///
+/// Simple glyphs are stored inline in the `entry_buffer`, detailed glyphs are
+/// stored as pointers into the `detail_store`.
+///
+/// ~~~
+/// +- GlyphStore --------------------------------+
+/// | +---+---+---+---+---+---+---+ |
+/// | entry_buffer: | | s | | s | | s | s | | d = detailed
+/// | +-|-+---+-|-+---+-|-+---+---+ | s = simple
+/// | | | | |
+/// | | +---+-------+ |
+/// | | | |
+/// | +-V-+-V-+ |
+/// | detail_store: | d | d | |
+/// | +---+---+ |
+/// +---------------------------------------------+
+/// ~~~
+pub struct GlyphStore {
+ // TODO(pcwalton): Allocation of this buffer is expensive. Consider a small-vector
+ // optimization.
+ /// A buffer of glyphs within the text run, in the order in which they
+ /// appear in the input text
+ entry_buffer: Vec<GlyphEntry>,
+ /// A store of the detailed glyph data. Detailed glyphs contained in the
+ /// `entry_buffer` point to locations in this data structure.
+ detail_store: DetailedGlyphStore,
+
+ is_whitespace: bool,
+}
+
+int_range_index! {
+ #[deriving(Encodable)]
+ #[doc = "An index that refers to a character in a text run. This could \
+ point to the middle of a glyph."]
+ struct CharIndex(int)
+}
+
+impl<'a> GlyphStore {
+ // Initializes the glyph store, but doesn't actually shape anything.
+ // Use the set_glyph, set_glyphs() methods to store glyph data.
+ pub fn new(length: int, is_whitespace: bool) -> GlyphStore {
+ assert!(length > 0);
+
+ GlyphStore {
+ entry_buffer: Vec::from_elem(length as uint, GlyphEntry::initial()),
+ detail_store: DetailedGlyphStore::new(),
+ is_whitespace: is_whitespace,
+ }
+ }
+
+ pub fn char_len(&self) -> CharIndex {
+ CharIndex(self.entry_buffer.len() as int)
+ }
+
+ pub fn is_whitespace(&self) -> bool {
+ self.is_whitespace
+ }
+
+ pub fn finalize_changes(&mut self) {
+ self.detail_store.ensure_sorted();
+ }
+
+ pub fn add_glyph_for_char_index(&mut self, i: CharIndex, data: &GlyphData) {
+ fn glyph_is_compressible(data: &GlyphData) -> bool {
+ is_simple_glyph_id(data.id)
+ && is_simple_advance(data.advance)
+ && data.offset.is_zero()
+ && data.cluster_start // others are stored in detail buffer
+ }
+
+ assert!(data.ligature_start); // can't compress ligature continuation glyphs.
+ assert!(i < self.char_len());
+
+ let entry = match (data.is_missing, glyph_is_compressible(data)) {
+ (true, _) => GlyphEntry::missing(1),
+ (false, true) => GlyphEntry::simple(data.id, data.advance),
+ (false, false) => {
+ let glyph = [DetailedGlyph::new(data.id, data.advance, data.offset)];
+ self.detail_store.add_detailed_glyphs_for_entry(i, glyph);
+ GlyphEntry::complex(data.cluster_start, data.ligature_start, 1)
+ }
+ }.adapt_character_flags_of_entry(self.entry_buffer[i.to_uint()]);
+
+ *self.entry_buffer.get_mut(i.to_uint()) = entry;
+ }
+
+ pub fn add_glyphs_for_char_index(&mut self, i: CharIndex, data_for_glyphs: &[GlyphData]) {
+ assert!(i < self.char_len());
+ assert!(data_for_glyphs.len() > 0);
+
+ let glyph_count = data_for_glyphs.len() as int;
+
+ let first_glyph_data = data_for_glyphs[0];
+ let entry = match first_glyph_data.is_missing {
+ true => GlyphEntry::missing(glyph_count),
+ false => {
+ let glyphs_vec = Vec::from_fn(glyph_count as uint, |i| {
+ DetailedGlyph::new(data_for_glyphs[i].id,
+ data_for_glyphs[i].advance,
+ data_for_glyphs[i].offset)
+ });
+
+ self.detail_store.add_detailed_glyphs_for_entry(i, glyphs_vec.as_slice());
+ GlyphEntry::complex(first_glyph_data.cluster_start,
+ first_glyph_data.ligature_start,
+ glyph_count)
+ }
+ }.adapt_character_flags_of_entry(self.entry_buffer[i.to_uint()]);
+
+ debug!("Adding multiple glyphs[idx={}, count={}]: {:?}", i, glyph_count, entry);
+
+ *self.entry_buffer.get_mut(i.to_uint()) = entry;
+ }
+
+ // used when a character index has no associated glyph---for example, a ligature continuation.
+ pub fn add_nonglyph_for_char_index(&mut self, i: CharIndex, cluster_start: bool, ligature_start: bool) {
+ assert!(i < self.char_len());
+
+ let entry = GlyphEntry::complex(cluster_start, ligature_start, 0);
+ debug!("adding spacer for chracter without associated glyph[idx={}]", i);
+
+ *self.entry_buffer.get_mut(i.to_uint()) = entry;
+ }
+
+ pub fn iter_glyphs_for_char_index(&'a self, i: CharIndex) -> GlyphIterator<'a> {
+ self.iter_glyphs_for_char_range(&Range::new(i, CharIndex(1)))
+ }
+
+ #[inline]
+ pub fn iter_glyphs_for_char_range(&'a self, rang: &Range<CharIndex>) -> GlyphIterator<'a> {
+ if rang.begin() >= self.char_len() {
+ fail!("iter_glyphs_for_range: range.begin beyond length!");
+ }
+ if rang.end() > self.char_len() {
+ fail!("iter_glyphs_for_range: range.end beyond length!");
+ }
+
+ GlyphIterator {
+ store: self,
+ char_index: rang.begin(),
+ char_range: rang.each_index(),
+ glyph_range: None,
+ }
+ }
+
+ #[inline]
+ pub fn advance_for_char_range(&self, rang: &Range<CharIndex>) -> Au {
+ self.iter_glyphs_for_char_range(rang)
+ .fold(Au(0), |advance, (_, glyph)| advance + glyph.advance())
+ }
+
+ // getter methods
+ pub fn char_is_space(&self, i: CharIndex) -> bool {
+ assert!(i < self.char_len());
+ self.entry_buffer[i.to_uint()].char_is_space()
+ }
+
+ pub fn char_is_tab(&self, i: CharIndex) -> bool {
+ assert!(i < self.char_len());
+ self.entry_buffer[i.to_uint()].char_is_tab()
+ }
+
+ pub fn char_is_newline(&self, i: CharIndex) -> bool {
+ assert!(i < self.char_len());
+ self.entry_buffer[i.to_uint()].char_is_newline()
+ }
+
+ pub fn is_ligature_start(&self, i: CharIndex) -> bool {
+ assert!(i < self.char_len());
+ self.entry_buffer[i.to_uint()].is_ligature_start()
+ }
+
+ pub fn is_cluster_start(&self, i: CharIndex) -> bool {
+ assert!(i < self.char_len());
+ self.entry_buffer[i.to_uint()].is_cluster_start()
+ }
+
+ pub fn can_break_before(&self, i: CharIndex) -> BreakType {
+ assert!(i < self.char_len());
+ self.entry_buffer[i.to_uint()].can_break_before()
+ }
+
+ // setter methods
+ pub fn set_char_is_space(&mut self, i: CharIndex) {
+ assert!(i < self.char_len());
+ let entry = self.entry_buffer[i.to_uint()];
+ *self.entry_buffer.get_mut(i.to_uint()) = entry.set_char_is_space();
+ }
+
+ pub fn set_char_is_tab(&mut self, i: CharIndex) {
+ assert!(i < self.char_len());
+ let entry = self.entry_buffer[i.to_uint()];
+ *self.entry_buffer.get_mut(i.to_uint()) = entry.set_char_is_tab();
+ }
+
+ pub fn set_char_is_newline(&mut self, i: CharIndex) {
+ assert!(i < self.char_len());
+ let entry = self.entry_buffer[i.to_uint()];
+ *self.entry_buffer.get_mut(i.to_uint()) = entry.set_char_is_newline();
+ }
+
+ pub fn set_can_break_before(&mut self, i: CharIndex, t: BreakType) {
+ assert!(i < self.char_len());
+ let entry = self.entry_buffer[i.to_uint()];
+ *self.entry_buffer.get_mut(i.to_uint()) = entry.set_can_break_before(t);
+ }
+}
+
+/// An iterator over the glyphs in a character range in a `GlyphStore`.
+pub struct GlyphIterator<'a> {
+ store: &'a GlyphStore,
+ char_index: CharIndex,
+ char_range: EachIndex<int, CharIndex>,
+ glyph_range: Option<EachIndex<int, CharIndex>>,
+}
+
+impl<'a> GlyphIterator<'a> {
+ // Slow path when there is a glyph range.
+ #[inline(never)]
+ fn next_glyph_range(&mut self) -> Option<(CharIndex, GlyphInfo<'a>)> {
+ match self.glyph_range.get_mut_ref().next() {
+ Some(j) => Some((self.char_index,
+ DetailGlyphInfo(self.store, self.char_index, j.get() as u16 /* ??? */))),
+ None => {
+ // No more glyphs for current character. Try to get another.
+ self.glyph_range = None;
+ self.next()
+ }
+ }
+ }
+
+ // Slow path when there is a complex glyph.
+ #[inline(never)]
+ fn next_complex_glyph(&mut self, entry: &GlyphEntry, i: CharIndex)
+ -> Option<(CharIndex, GlyphInfo<'a>)> {
+ let glyphs = self.store.detail_store.get_detailed_glyphs_for_entry(i, entry.glyph_count());
+ self.glyph_range = Some(range::each_index(CharIndex(0), CharIndex(glyphs.len() as int)));
+ self.next()
+ }
+}
+
+impl<'a> Iterator<(CharIndex, GlyphInfo<'a>)> for GlyphIterator<'a> {
+ // I tried to start with something simpler and apply FlatMap, but the
+ // inability to store free variables in the FlatMap struct was problematic.
+ //
+ // This function consists of the fast path and is designed to be inlined into its caller. The
+ // slow paths, which should not be inlined, are `next_glyph_range()` and
+ // `next_complex_glyph()`.
+ #[inline(always)]
+ fn next(&mut self) -> Option<(CharIndex, GlyphInfo<'a>)> {
+ // Would use 'match' here but it borrows contents in a way that
+ // interferes with mutation.
+ if self.glyph_range.is_some() {
+ self.next_glyph_range()
+ } else {
+ // No glyph range. Look at next character.
+ self.char_range.next().and_then(|i| {
+ self.char_index = i;
+ assert!(i < self.store.char_len());
+ let entry = self.store.entry_buffer[i.to_uint()];
+ if entry.is_simple() {
+ Some((self.char_index, SimpleGlyphInfo(self.store, i)))
+ } else {
+ // Fall back to the slow path.
+ self.next_complex_glyph(&entry, i)
+ }
+ })
+ }
+ }
+}
diff --git a/components/gfx/text/mod.rs b/components/gfx/text/mod.rs
new file mode 100644
index 00000000000..f705347c441
--- /dev/null
+++ b/components/gfx/text/mod.rs
@@ -0,0 +1,18 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* This file exists just to make it easier to import things inside of
+ ./text/ without specifying the file they came out of imports.
+
+Note that you still must define each of the files as a module in
+servo.rc. This is not ideal and may be changed in the future. */
+
+pub use text::shaping::Shaper;
+pub use text::text_run::TextRun;
+
+pub mod glyph;
+#[path="shaping/mod.rs"] pub mod shaping;
+pub mod text_run;
+pub mod util;
+
diff --git a/components/gfx/text/shaping/harfbuzz.rs b/components/gfx/text/shaping/harfbuzz.rs
new file mode 100644
index 00000000000..789126e767d
--- /dev/null
+++ b/components/gfx/text/shaping/harfbuzz.rs
@@ -0,0 +1,541 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+extern crate harfbuzz;
+
+use font::{Font, FontHandleMethods, FontTableMethods, FontTableTag};
+use platform::font::FontTable;
+use text::glyph::{CharIndex, GlyphStore, GlyphId, GlyphData};
+use text::shaping::ShaperMethods;
+use text::util::{float_to_fixed, fixed_to_float};
+
+use geom::Point2D;
+use harfbuzz::{HB_MEMORY_MODE_READONLY, HB_DIRECTION_LTR};
+use harfbuzz::{hb_blob_create, hb_face_create_for_tables};
+use harfbuzz::{hb_blob_t};
+use harfbuzz::{hb_bool_t};
+use harfbuzz::{hb_buffer_add_utf8};
+use harfbuzz::{hb_buffer_destroy};
+use harfbuzz::{hb_buffer_get_glyph_positions};
+use harfbuzz::{hb_buffer_set_direction};
+use harfbuzz::{hb_face_destroy};
+use harfbuzz::{hb_face_t, hb_font_t};
+use harfbuzz::{hb_font_create};
+use harfbuzz::{hb_font_destroy, hb_buffer_create};
+use harfbuzz::{hb_font_funcs_create};
+use harfbuzz::{hb_font_funcs_destroy};
+use harfbuzz::{hb_font_funcs_set_glyph_func};
+use harfbuzz::{hb_font_funcs_set_glyph_h_advance_func};
+use harfbuzz::{hb_font_funcs_set_glyph_h_kerning_func};
+use harfbuzz::{hb_font_funcs_t, hb_buffer_t, hb_codepoint_t};
+use harfbuzz::{hb_font_set_funcs};
+use harfbuzz::{hb_font_set_ppem};
+use harfbuzz::{hb_font_set_scale};
+use harfbuzz::{hb_glyph_info_t};
+use harfbuzz::{hb_glyph_position_t};
+use harfbuzz::{hb_position_t, hb_tag_t};
+use harfbuzz::{hb_shape, hb_buffer_get_glyph_infos};
+use libc::{c_uint, c_int, c_void, c_char};
+use servo_util::geometry::Au;
+use servo_util::range::Range;
+use std::mem;
+use std::char;
+use std::cmp;
+use std::ptr;
+
+static NO_GLYPH: i32 = -1;
+static CONTINUATION_BYTE: i32 = -2;
+
+pub struct ShapedGlyphData {
+ count: int,
+ glyph_infos: *mut hb_glyph_info_t,
+ pos_infos: *mut hb_glyph_position_t,
+}
+
+pub struct ShapedGlyphEntry {
+ codepoint: GlyphId,
+ advance: Au,
+ offset: Option<Point2D<Au>>,
+}
+
+impl ShapedGlyphData {
+ pub fn new(buffer: *mut hb_buffer_t) -> ShapedGlyphData {
+ unsafe {
+ let mut glyph_count = 0;
+ let glyph_infos = hb_buffer_get_glyph_infos(buffer, &mut glyph_count);
+ let glyph_count = glyph_count as int;
+ assert!(glyph_infos.is_not_null());
+ let mut pos_count = 0;
+ let pos_infos = hb_buffer_get_glyph_positions(buffer, &mut pos_count);
+ let pos_count = pos_count as int;
+ assert!(pos_infos.is_not_null());
+ assert!(glyph_count == pos_count);
+
+ ShapedGlyphData {
+ count: glyph_count,
+ glyph_infos: glyph_infos,
+ pos_infos: pos_infos,
+ }
+ }
+ }
+
+ #[inline(always)]
+ fn byte_offset_of_glyph(&self, i: int) -> int {
+ assert!(i < self.count);
+
+ unsafe {
+ let glyph_info_i = self.glyph_infos.offset(i);
+ (*glyph_info_i).cluster as int
+ }
+ }
+
+ pub fn len(&self) -> int {
+ self.count
+ }
+
+ /// Returns shaped glyph data for one glyph, and updates the y-position of the pen.
+ pub fn get_entry_for_glyph(&self, i: int, y_pos: &mut Au) -> ShapedGlyphEntry {
+ assert!(i < self.count);
+
+ unsafe {
+ let glyph_info_i = self.glyph_infos.offset(i);
+ let pos_info_i = self.pos_infos.offset(i);
+ let x_offset = Shaper::fixed_to_float((*pos_info_i).x_offset);
+ let y_offset = Shaper::fixed_to_float((*pos_info_i).y_offset);
+ let x_advance = Shaper::fixed_to_float((*pos_info_i).x_advance);
+ let y_advance = Shaper::fixed_to_float((*pos_info_i).y_advance);
+
+ let x_offset = Au::from_frac_px(x_offset);
+ let y_offset = Au::from_frac_px(y_offset);
+ let x_advance = Au::from_frac_px(x_advance);
+ let y_advance = Au::from_frac_px(y_advance);
+
+ let offset = if x_offset == Au(0) && y_offset == Au(0) && y_advance == Au(0) {
+ None
+ } else {
+ // adjust the pen..
+ if y_advance > Au(0) {
+ *y_pos = *y_pos - y_advance;
+ }
+
+ Some(Point2D(x_offset, *y_pos - y_offset))
+ };
+
+ ShapedGlyphEntry {
+ codepoint: (*glyph_info_i).codepoint as GlyphId,
+ advance: x_advance,
+ offset: offset,
+ }
+ }
+ }
+}
+
+pub struct Shaper {
+ hb_face: *mut hb_face_t,
+ hb_font: *mut hb_font_t,
+ hb_funcs: *mut hb_font_funcs_t,
+}
+
+#[unsafe_destructor]
+impl Drop for Shaper {
+ fn drop(&mut self) {
+ unsafe {
+ assert!(self.hb_face.is_not_null());
+ hb_face_destroy(self.hb_face);
+
+ assert!(self.hb_font.is_not_null());
+ hb_font_destroy(self.hb_font);
+
+ assert!(self.hb_funcs.is_not_null());
+ hb_font_funcs_destroy(self.hb_funcs);
+ }
+ }
+}
+
+impl Shaper {
+ pub fn new(font: &mut Font) -> Shaper {
+ unsafe {
+ // Indirection for Rust Issue #6248, dynamic freeze scope artifically extended
+ let font_ptr = font as *mut Font;
+ let hb_face: *mut hb_face_t = hb_face_create_for_tables(get_font_table_func,
+ font_ptr as *mut c_void,
+ None);
+ let hb_font: *mut hb_font_t = hb_font_create(hb_face);
+
+ // Set points-per-em. if zero, performs no hinting in that direction.
+ let pt_size = font.pt_size;
+ hb_font_set_ppem(hb_font, pt_size as c_uint, pt_size as c_uint);
+
+ // Set scaling. Note that this takes 16.16 fixed point.
+ hb_font_set_scale(hb_font,
+ Shaper::float_to_fixed(pt_size) as c_int,
+ Shaper::float_to_fixed(pt_size) as c_int);
+
+ // configure static function callbacks.
+ // NB. This funcs structure could be reused globally, as it never changes.
+ let hb_funcs: *mut hb_font_funcs_t = hb_font_funcs_create();
+ hb_font_funcs_set_glyph_func(hb_funcs, glyph_func, ptr::mut_null(), None);
+ hb_font_funcs_set_glyph_h_advance_func(hb_funcs, glyph_h_advance_func, ptr::mut_null(), None);
+ hb_font_funcs_set_glyph_h_kerning_func(hb_funcs, glyph_h_kerning_func, ptr::mut_null(), ptr::mut_null());
+ hb_font_set_funcs(hb_font, hb_funcs, font_ptr as *mut c_void, None);
+
+ Shaper {
+ hb_face: hb_face,
+ hb_font: hb_font,
+ hb_funcs: hb_funcs,
+ }
+ }
+ }
+
+ fn float_to_fixed(f: f64) -> i32 {
+ float_to_fixed(16, f)
+ }
+
+ fn fixed_to_float(i: hb_position_t) -> f64 {
+ fixed_to_float(16, i)
+ }
+}
+
+impl ShaperMethods for Shaper {
+ /// Calculate the layout metrics associated with the given text when rendered in a specific
+ /// font.
+ fn shape_text(&self, text: &str, glyphs: &mut GlyphStore) {
+ unsafe {
+ let hb_buffer: *mut hb_buffer_t = hb_buffer_create();
+ hb_buffer_set_direction(hb_buffer, HB_DIRECTION_LTR);
+
+ hb_buffer_add_utf8(hb_buffer,
+ text.as_ptr() as *const c_char,
+ text.len() as c_int,
+ 0,
+ text.len() as c_int);
+
+ hb_shape(self.hb_font, hb_buffer, ptr::mut_null(), 0);
+ self.save_glyph_results(text, glyphs, hb_buffer);
+ hb_buffer_destroy(hb_buffer);
+ }
+ }
+}
+
+impl Shaper {
+ fn save_glyph_results(&self, text: &str, glyphs: &mut GlyphStore, buffer: *mut hb_buffer_t) {
+ let glyph_data = ShapedGlyphData::new(buffer);
+ let glyph_count = glyph_data.len();
+ let byte_max = text.len() as int;
+ let char_max = text.char_len() as int;
+
+ // GlyphStore records are indexed by character, not byte offset.
+ // so, we must be careful to increment this when saving glyph entries.
+ let mut char_idx = CharIndex(0);
+
+ assert!(glyph_count <= char_max);
+
+ debug!("Shaped text[char count={}], got back {} glyph info records.",
+ char_max,
+ glyph_count);
+
+ if char_max != glyph_count {
+ debug!("NOTE: Since these are not equal, we probably have been given some complex \
+ glyphs.");
+ }
+
+ // make map of what chars have glyphs
+ let mut byteToGlyph: Vec<i32>;
+
+ // fast path: all chars are single-byte.
+ if byte_max == char_max {
+ byteToGlyph = Vec::from_elem(byte_max as uint, NO_GLYPH);
+ } else {
+ byteToGlyph = Vec::from_elem(byte_max as uint, CONTINUATION_BYTE);
+ for (i, _) in text.char_indices() {
+ *byteToGlyph.get_mut(i) = NO_GLYPH;
+ }
+ }
+
+ debug!("(glyph idx) -> (text byte offset)");
+ for i in range(0, glyph_data.len()) {
+ // loc refers to a *byte* offset within the utf8 string.
+ let loc = glyph_data.byte_offset_of_glyph(i);
+ if loc < byte_max {
+ assert!(*byteToGlyph.get(loc as uint) != CONTINUATION_BYTE);
+ *byteToGlyph.get_mut(loc as uint) = i as i32;
+ } else {
+ debug!("ERROR: tried to set out of range byteToGlyph: idx={}, glyph idx={}",
+ loc,
+ i);
+ }
+ debug!("{} -> {}", i, loc);
+ }
+
+ debug!("text: {:s}", text);
+ debug!("(char idx): char->(glyph index):");
+ for (i, ch) in text.char_indices() {
+ debug!("{}: {} --> {:d}", i, ch, *byteToGlyph.get(i) as int);
+ }
+
+ // some helpers
+ let mut glyph_span: Range<int> = Range::empty();
+ // this span contains first byte of first char, to last byte of last char in range.
+ // so, end() points to first byte of last+1 char, if it's less than byte_max.
+ let mut char_byte_span: Range<int> = Range::empty();
+ let mut y_pos = Au(0);
+
+ // main loop over each glyph. each iteration usually processes 1 glyph and 1+ chars.
+ // in cases with complex glyph-character assocations, 2+ glyphs and 1+ chars can be
+ // processed.
+ while glyph_span.begin() < glyph_count {
+ // start by looking at just one glyph.
+ glyph_span.extend_by(1);
+ debug!("Processing glyph at idx={}", glyph_span.begin());
+
+ let char_byte_start = glyph_data.byte_offset_of_glyph(glyph_span.begin());
+ char_byte_span.reset(char_byte_start, 0);
+
+ // find a range of chars corresponding to this glyph, plus
+ // any trailing chars that do not have associated glyphs.
+ while char_byte_span.end() < byte_max {
+ let range = text.char_range_at(char_byte_span.end() as uint);
+ drop(range.ch);
+ char_byte_span.extend_to(range.next as int);
+
+ debug!("Processing char byte span: off={}, len={} for glyph idx={}",
+ char_byte_span.begin(), char_byte_span.length(), glyph_span.begin());
+
+ while char_byte_span.end() != byte_max &&
+ byteToGlyph[char_byte_span.end() as uint] == NO_GLYPH {
+ debug!("Extending char byte span to include byte offset={} with no associated \
+ glyph", char_byte_span.end());
+ let range = text.char_range_at(char_byte_span.end() as uint);
+ drop(range.ch);
+ char_byte_span.extend_to(range.next as int);
+ }
+
+ // extend glyph range to max glyph index covered by char_span,
+ // in cases where one char made several glyphs and left some unassociated chars.
+ let mut max_glyph_idx = glyph_span.end();
+ for i in char_byte_span.each_index() {
+ if byteToGlyph[i as uint] > NO_GLYPH {
+ max_glyph_idx = cmp::max(byteToGlyph[i as uint] as int + 1, max_glyph_idx);
+ }
+ }
+
+ if max_glyph_idx > glyph_span.end() {
+ glyph_span.extend_to(max_glyph_idx);
+ debug!("Extended glyph span (off={}, len={}) to cover char byte span's max \
+ glyph index",
+ glyph_span.begin(), glyph_span.length());
+ }
+
+
+ // if there's just one glyph, then we don't need further checks.
+ if glyph_span.length() == 1 { break; }
+
+ // if no glyphs were found yet, extend the char byte range more.
+ if glyph_span.length() == 0 { continue; }
+
+ debug!("Complex (multi-glyph to multi-char) association found. This case \
+ probably doesn't work.");
+
+ let mut all_glyphs_are_within_cluster: bool = true;
+ for j in glyph_span.each_index() {
+ let loc = glyph_data.byte_offset_of_glyph(j);
+ if !char_byte_span.contains(loc) {
+ all_glyphs_are_within_cluster = false;
+ break
+ }
+ }
+
+ debug!("All glyphs within char_byte_span cluster?: {}",
+ all_glyphs_are_within_cluster);
+
+ // found a valid range; stop extending char_span.
+ if all_glyphs_are_within_cluster {
+ break
+ }
+ }
+
+ // character/glyph clump must contain characters.
+ assert!(char_byte_span.length() > 0);
+ // character/glyph clump must contain glyphs.
+ assert!(glyph_span.length() > 0);
+
+ // now char_span is a ligature clump, formed by the glyphs in glyph_span.
+ // we need to find the chars that correspond to actual glyphs (char_extended_span),
+ //and set glyph info for those and empty infos for the chars that are continuations.
+
+ // a simple example:
+ // chars: 'f' 't' 't'
+ // glyphs: 'ftt' '' ''
+ // cgmap: t f f
+ // gspan: [-]
+ // cspan: [-]
+ // covsp: [---------------]
+
+ let mut covered_byte_span = char_byte_span.clone();
+ // extend, clipping at end of text range.
+ while covered_byte_span.end() < byte_max
+ && byteToGlyph[covered_byte_span.end() as uint] == NO_GLYPH {
+ let range = text.char_range_at(covered_byte_span.end() as uint);
+ drop(range.ch);
+ covered_byte_span.extend_to(range.next as int);
+ }
+
+ if covered_byte_span.begin() >= byte_max {
+ // oops, out of range. clip and forget this clump.
+ let end = glyph_span.end(); // FIXME: borrow checker workaround
+ glyph_span.reset(end, 0);
+ let end = char_byte_span.end(); // FIXME: borrow checker workaround
+ char_byte_span.reset(end, 0);
+ }
+
+ // clamp to end of text. (I don't think this will be necessary, but..)
+ let end = covered_byte_span.end(); // FIXME: borrow checker workaround
+ covered_byte_span.extend_to(cmp::min(end, byte_max));
+
+ // fast path: 1-to-1 mapping of single char and single glyph.
+ if glyph_span.length() == 1 {
+ // TODO(Issue #214): cluster ranges need to be computed before
+ // shaping, and then consulted here.
+ // for now, just pretend that every character is a cluster start.
+ // (i.e., pretend there are no combining character sequences).
+ // 1-to-1 mapping of character to glyph also treated as ligature start.
+ let shape = glyph_data.get_entry_for_glyph(glyph_span.begin(), &mut y_pos);
+ let data = GlyphData::new(shape.codepoint,
+ shape.advance,
+ shape.offset,
+ false,
+ true,
+ true);
+ glyphs.add_glyph_for_char_index(char_idx, &data);
+ } else {
+ // collect all glyphs to be assigned to the first character.
+ let mut datas = vec!();
+
+ for glyph_i in glyph_span.each_index() {
+ let shape = glyph_data.get_entry_for_glyph(glyph_i, &mut y_pos);
+ datas.push(GlyphData::new(shape.codepoint,
+ shape.advance,
+ shape.offset,
+ false, // not missing
+ true, // treat as cluster start
+ glyph_i > glyph_span.begin()));
+ // all but first are ligature continuations
+ }
+
+ // now add the detailed glyph entry.
+ glyphs.add_glyphs_for_char_index(char_idx, datas.as_slice());
+
+ // set the other chars, who have no glyphs
+ let mut i = covered_byte_span.begin();
+ loop {
+ let range = text.char_range_at(i as uint);
+ drop(range.ch);
+ i = range.next as int;
+ if i >= covered_byte_span.end() { break; }
+ char_idx = char_idx + CharIndex(1);
+ glyphs.add_nonglyph_for_char_index(char_idx, false, false);
+ }
+ }
+
+ // shift up our working spans past things we just handled.
+ let end = glyph_span.end(); // FIXME: borrow checker workaround
+ glyph_span.reset(end, 0);
+ let end = char_byte_span.end();; // FIXME: borrow checker workaround
+ char_byte_span.reset(end, 0);
+ char_idx = char_idx + CharIndex(1);
+ }
+
+ // this must be called after adding all glyph data; it sorts the
+ // lookup table for finding detailed glyphs by associated char index.
+ glyphs.finalize_changes();
+ }
+}
+
+/// Callbacks from Harfbuzz when font map and glyph advance lookup needed.
+extern fn glyph_func(_: *mut hb_font_t,
+ font_data: *mut c_void,
+ unicode: hb_codepoint_t,
+ _: hb_codepoint_t,
+ glyph: *mut hb_codepoint_t,
+ _: *mut c_void)
+ -> hb_bool_t {
+ let font: *const Font = font_data as *const Font;
+ assert!(font.is_not_null());
+
+ unsafe {
+ match (*font).glyph_index(char::from_u32(unicode).unwrap()) {
+ Some(g) => {
+ *glyph = g as hb_codepoint_t;
+ true as hb_bool_t
+ }
+ None => false as hb_bool_t
+ }
+ }
+}
+
+extern fn glyph_h_advance_func(_: *mut hb_font_t,
+ font_data: *mut c_void,
+ glyph: hb_codepoint_t,
+ _: *mut c_void)
+ -> hb_position_t {
+ let font: *mut Font = font_data as *mut Font;
+ assert!(font.is_not_null());
+
+ unsafe {
+ let advance = (*font).glyph_h_advance(glyph as GlyphId);
+ Shaper::float_to_fixed(advance)
+ }
+}
+
+extern fn glyph_h_kerning_func(_: *mut hb_font_t,
+ font_data: *mut c_void,
+ first_glyph: hb_codepoint_t,
+ second_glyph: hb_codepoint_t,
+ _: *mut c_void)
+ -> hb_position_t {
+ let font: *mut Font = font_data as *mut Font;
+ assert!(font.is_not_null());
+
+ unsafe {
+ let advance = (*font).glyph_h_kerning(first_glyph as GlyphId, second_glyph as GlyphId);
+ Shaper::float_to_fixed(advance)
+ }
+}
+
+// Callback to get a font table out of a font.
+extern fn get_font_table_func(_: *mut hb_face_t, tag: hb_tag_t, user_data: *mut c_void) -> *mut hb_blob_t {
+ unsafe {
+ let font: *const Font = user_data as *const Font;
+ assert!(font.is_not_null());
+
+ // TODO(Issue #197): reuse font table data, which will change the unsound trickery here.
+ match (*font).get_table_for_tag(tag as FontTableTag) {
+ None => ptr::mut_null(),
+ Some(ref font_table) => {
+ let skinny_font_table_ptr: *const FontTable = font_table; // private context
+
+ let mut blob: *mut hb_blob_t = ptr::mut_null();
+ (*skinny_font_table_ptr).with_buffer(|buf: *const u8, len: uint| {
+ // HarfBuzz calls `destroy_blob_func` when the buffer is no longer needed.
+ blob = hb_blob_create(buf as *const c_char,
+ len as c_uint,
+ HB_MEMORY_MODE_READONLY,
+ mem::transmute(skinny_font_table_ptr),
+ destroy_blob_func);
+ });
+
+ assert!(blob.is_not_null());
+ blob
+ }
+ }
+ }
+}
+
+// TODO(Issue #197): reuse font table data, which will change the unsound trickery here.
+// In particular, we'll need to cast to a boxed, rather than owned, FontTable.
+
+// even better, should cache the harfbuzz blobs directly instead of recreating a lot.
+extern fn destroy_blob_func(_: *mut c_void) {
+ // TODO: Previous code here was broken. Rewrite.
+}
diff --git a/components/gfx/text/shaping/mod.rs b/components/gfx/text/shaping/mod.rs
new file mode 100644
index 00000000000..ef4bc2088f0
--- /dev/null
+++ b/components/gfx/text/shaping/mod.rs
@@ -0,0 +1,19 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+//! Shaper encapsulates a specific shaper, such as Harfbuzz,
+//! Uniscribe, Pango, or Coretext.
+//!
+//! Currently, only harfbuzz bindings are implemented.
+
+use text::glyph::GlyphStore;
+
+pub use Shaper = text::shaping::harfbuzz::Shaper;
+
+pub mod harfbuzz;
+
+pub trait ShaperMethods {
+ fn shape_text(&self, text: &str, glyphs: &mut GlyphStore);
+}
+
diff --git a/components/gfx/text/text_run.rs b/components/gfx/text/text_run.rs
new file mode 100644
index 00000000000..70c10f1c64c
--- /dev/null
+++ b/components/gfx/text/text_run.rs
@@ -0,0 +1,271 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use font::{Font, RunMetrics, FontMetrics};
+use servo_util::geometry::Au;
+use servo_util::range::Range;
+use servo_util::vec::{Comparator, FullBinarySearchMethods};
+use std::slice::Items;
+use sync::Arc;
+use text::glyph::{CharIndex, GlyphStore};
+use font::FontHandleMethods;
+use platform::font_template::FontTemplateData;
+
+/// A single "paragraph" of text in one font size and style.
+#[deriving(Clone)]
+pub struct TextRun {
+ pub text: Arc<String>,
+ pub font_template: Arc<FontTemplateData>,
+ pub pt_size: f64,
+ pub font_metrics: FontMetrics,
+ /// The glyph runs that make up this text run.
+ pub glyphs: Arc<Vec<GlyphRun>>,
+}
+
+/// A single series of glyphs within a text run.
+#[deriving(Clone)]
+pub struct GlyphRun {
+ /// The glyphs.
+ glyph_store: Arc<GlyphStore>,
+ /// The range of characters in the containing run.
+ range: Range<CharIndex>,
+}
+
+pub struct SliceIterator<'a> {
+ glyph_iter: Items<'a, GlyphRun>,
+ range: Range<CharIndex>,
+}
+
+struct CharIndexComparator;
+
+impl Comparator<CharIndex,GlyphRun> for CharIndexComparator {
+ fn compare(&self, key: &CharIndex, value: &GlyphRun) -> Ordering {
+ if *key < value.range.begin() {
+ Less
+ } else if *key >= value.range.end() {
+ Greater
+ } else {
+ Equal
+ }
+ }
+}
+
+impl<'a> Iterator<(&'a GlyphStore, CharIndex, Range<CharIndex>)> for SliceIterator<'a> {
+ // inline(always) due to the inefficient rt failures messing up inline heuristics, I think.
+ #[inline(always)]
+ fn next(&mut self) -> Option<(&'a GlyphStore, CharIndex, Range<CharIndex>)> {
+ let slice_glyphs = self.glyph_iter.next();
+ if slice_glyphs.is_none() {
+ return None;
+ }
+ let slice_glyphs = slice_glyphs.unwrap();
+
+ let mut char_range = self.range.intersect(&slice_glyphs.range);
+ let slice_range_begin = slice_glyphs.range.begin();
+ char_range.shift_by(-slice_range_begin);
+ if !char_range.is_empty() {
+ return Some((&*slice_glyphs.glyph_store, slice_range_begin, char_range))
+ }
+
+ return None;
+ }
+}
+
+pub struct LineIterator<'a> {
+ range: Range<CharIndex>,
+ clump: Option<Range<CharIndex>>,
+ slices: SliceIterator<'a>,
+}
+
+impl<'a> Iterator<Range<CharIndex>> for LineIterator<'a> {
+ fn next(&mut self) -> Option<Range<CharIndex>> {
+ // Loop until we hit whitespace and are in a clump.
+ loop {
+ match self.slices.next() {
+ Some((glyphs, offset, slice_range)) => {
+ match (glyphs.is_whitespace(), self.clump) {
+ (false, Some(ref mut c)) => {
+ c.extend_by(slice_range.length());
+ }
+ (false, None) => {
+ let mut c = slice_range;
+ c.shift_by(offset);
+ self.clump = Some(c);
+ }
+ (true, None) => { /* chomp whitespace */ }
+ (true, Some(c)) => {
+ self.clump = None;
+ // The final whitespace clump is not included.
+ return Some(c);
+ }
+ }
+ },
+ None => {
+ // flush any remaining chars as a line
+ if self.clump.is_some() {
+ let mut c = self.clump.take_unwrap();
+ c.extend_to(self.range.end());
+ return Some(c);
+ } else {
+ return None;
+ }
+ }
+ }
+ }
+ }
+}
+
+impl<'a> TextRun {
+ pub fn new(font: &mut Font, text: String) -> TextRun {
+ let glyphs = TextRun::break_and_shape(font, text.as_slice());
+ let run = TextRun {
+ text: Arc::new(text),
+ font_metrics: font.metrics.clone(),
+ font_template: font.handle.get_template(),
+ pt_size: font.pt_size,
+ glyphs: Arc::new(glyphs),
+ };
+ return run;
+ }
+
+ pub fn break_and_shape(font: &mut Font, text: &str) -> Vec<GlyphRun> {
+ // TODO(Issue #230): do a better job. See Gecko's LineBreaker.
+ let mut glyphs = vec!();
+ let (mut byte_i, mut char_i) = (0u, CharIndex(0));
+ let mut cur_slice_is_whitespace = false;
+ let (mut byte_last_boundary, mut char_last_boundary) = (0, CharIndex(0));
+ while byte_i < text.len() {
+ let range = text.char_range_at(byte_i);
+ let ch = range.ch;
+ let next = range.next;
+
+ // Slices alternate between whitespace and non-whitespace,
+ // representing line break opportunities.
+ let can_break_before = if cur_slice_is_whitespace {
+ match ch {
+ ' ' | '\t' | '\n' => false,
+ _ => {
+ cur_slice_is_whitespace = false;
+ true
+ }
+ }
+ } else {
+ match ch {
+ ' ' | '\t' | '\n' => {
+ cur_slice_is_whitespace = true;
+ true
+ },
+ _ => false
+ }
+ };
+
+ // Create a glyph store for this slice if it's nonempty.
+ if can_break_before && byte_i > byte_last_boundary {
+ let slice = text.slice(byte_last_boundary, byte_i).to_string();
+ debug!("creating glyph store for slice {} (ws? {}), {} - {} in run {}",
+ slice, !cur_slice_is_whitespace, byte_last_boundary, byte_i, text);
+ glyphs.push(GlyphRun {
+ glyph_store: font.shape_text(slice, !cur_slice_is_whitespace),
+ range: Range::new(char_last_boundary, char_i - char_last_boundary),
+ });
+ byte_last_boundary = byte_i;
+ char_last_boundary = char_i;
+ }
+
+ byte_i = next;
+ char_i = char_i + CharIndex(1);
+ }
+
+ // Create a glyph store for the final slice if it's nonempty.
+ if byte_i > byte_last_boundary {
+ let slice = text.slice_from(byte_last_boundary).to_string();
+ debug!("creating glyph store for final slice {} (ws? {}), {} - {} in run {}",
+ slice, cur_slice_is_whitespace, byte_last_boundary, text.len(), text);
+ glyphs.push(GlyphRun {
+ glyph_store: font.shape_text(slice, cur_slice_is_whitespace),
+ range: Range::new(char_last_boundary, char_i - char_last_boundary),
+ });
+ }
+
+ glyphs
+ }
+
+ pub fn char_len(&self) -> CharIndex {
+ match self.glyphs.last() {
+ None => CharIndex(0),
+ Some(ref glyph_run) => glyph_run.range.end(),
+ }
+ }
+
+ pub fn glyphs(&'a self) -> &'a Vec<GlyphRun> {
+ &*self.glyphs
+ }
+
+ pub fn range_is_trimmable_whitespace(&self, range: &Range<CharIndex>) -> bool {
+ self.iter_slices_for_range(range).all(|(slice_glyphs, _, _)| {
+ slice_glyphs.is_whitespace()
+ })
+ }
+
+ pub fn ascent(&self) -> Au {
+ self.font_metrics.ascent
+ }
+
+ pub fn descent(&self) -> Au {
+ self.font_metrics.descent
+ }
+
+ pub fn advance_for_range(&self, range: &Range<CharIndex>) -> Au {
+ // TODO(Issue #199): alter advance direction for RTL
+ // TODO(Issue #98): using inter-char and inter-word spacing settings when measuring text
+ self.iter_slices_for_range(range)
+ .fold(Au(0), |advance, (glyphs, _, slice_range)| {
+ advance + glyphs.advance_for_char_range(&slice_range)
+ })
+ }
+
+ pub fn metrics_for_range(&self, range: &Range<CharIndex>) -> RunMetrics {
+ RunMetrics::new(self.advance_for_range(range),
+ self.font_metrics.ascent,
+ self.font_metrics.descent)
+ }
+
+ pub fn metrics_for_slice(&self, glyphs: &GlyphStore, slice_range: &Range<CharIndex>) -> RunMetrics {
+ RunMetrics::new(glyphs.advance_for_char_range(slice_range),
+ self.font_metrics.ascent,
+ self.font_metrics.descent)
+ }
+
+ pub fn min_width_for_range(&self, range: &Range<CharIndex>) -> Au {
+ debug!("iterating outer range {:?}", range);
+ self.iter_slices_for_range(range).fold(Au(0), |max_piece_width, (_, offset, slice_range)| {
+ debug!("iterated on {:?}[{:?}]", offset, slice_range);
+ Au::max(max_piece_width, self.advance_for_range(&slice_range))
+ })
+ }
+
+ /// Returns the index of the first glyph run containing the given character index.
+ fn index_of_first_glyph_run_containing(&self, index: CharIndex) -> Option<uint> {
+ self.glyphs.as_slice().binary_search_index_by(&index, CharIndexComparator)
+ }
+
+ pub fn iter_slices_for_range(&'a self, range: &Range<CharIndex>) -> SliceIterator<'a> {
+ let index = match self.index_of_first_glyph_run_containing(range.begin()) {
+ None => self.glyphs.len(),
+ Some(index) => index,
+ };
+ SliceIterator {
+ glyph_iter: self.glyphs.slice_from(index).iter(),
+ range: *range,
+ }
+ }
+
+ pub fn iter_natural_lines_for_range(&'a self, range: &Range<CharIndex>) -> LineIterator<'a> {
+ LineIterator {
+ range: *range,
+ clump: None,
+ slices: self.iter_slices_for_range(range),
+ }
+ }
+}
diff --git a/components/gfx/text/util.rs b/components/gfx/text/util.rs
new file mode 100644
index 00000000000..c5059bbff10
--- /dev/null
+++ b/components/gfx/text/util.rs
@@ -0,0 +1,285 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use text::glyph::CharIndex;
+
+#[deriving(PartialEq)]
+pub enum CompressionMode {
+ CompressNone,
+ CompressWhitespace,
+ CompressWhitespaceNewline,
+ DiscardNewline
+}
+
+// ported from Gecko's nsTextFrameUtils::TransformText.
+//
+// High level TODOs:
+//
+// * Issue #113: consider incoming text state (arabic, etc)
+// and propogate outgoing text state (dual of above)
+//
+// * Issue #114: record skipped and kept chars for mapping original to new text
+//
+// * Untracked: various edge cases for bidi, CJK, etc.
+pub fn transform_text(text: &str, mode: CompressionMode,
+ incoming_whitespace: bool,
+ new_line_pos: &mut Vec<CharIndex>) -> (String, bool) {
+ let mut out_str = String::new();
+ let out_whitespace = match mode {
+ CompressNone | DiscardNewline => {
+ let mut new_line_index = CharIndex(0);
+ for ch in text.chars() {
+ if is_discardable_char(ch, mode) {
+ // TODO: record skipped char
+ } else {
+ // TODO: record kept char
+ if ch == '\t' {
+ // TODO: set "has tab" flag
+ } else if ch == '\n' {
+ // Save new-line's position for line-break
+ // This value is relative(not absolute)
+ new_line_pos.push(new_line_index);
+ new_line_index = CharIndex(0);
+ }
+
+ if ch != '\n' {
+ new_line_index = new_line_index + CharIndex(1);
+ }
+ out_str.push_char(ch);
+ }
+ }
+ text.len() > 0 && is_in_whitespace(text.char_at_reverse(0), mode)
+ },
+
+ CompressWhitespace | CompressWhitespaceNewline => {
+ let mut in_whitespace: bool = incoming_whitespace;
+ for ch in text.chars() {
+ // TODO: discard newlines between CJK chars
+ let mut next_in_whitespace: bool = is_in_whitespace(ch, mode);
+
+ if !next_in_whitespace {
+ if is_always_discardable_char(ch) {
+ // revert whitespace setting, since this char was discarded
+ next_in_whitespace = in_whitespace;
+ // TODO: record skipped char
+ } else {
+ // TODO: record kept char
+ out_str.push_char(ch);
+ }
+ } else { /* next_in_whitespace; possibly add a space char */
+ if in_whitespace {
+ // TODO: record skipped char
+ } else {
+ // TODO: record kept char
+ out_str.push_char(' ');
+ }
+ }
+ // save whitespace context for next char
+ in_whitespace = next_in_whitespace;
+ } /* /for str::each_char */
+ in_whitespace
+ }
+ };
+
+ return (out_str.into_string(), out_whitespace);
+
+ fn is_in_whitespace(ch: char, mode: CompressionMode) -> bool {
+ match (ch, mode) {
+ (' ', _) => true,
+ ('\t', _) => true,
+ ('\n', CompressWhitespaceNewline) => true,
+ (_, _) => false
+ }
+ }
+
+ fn is_discardable_char(ch: char, mode: CompressionMode) -> bool {
+ if is_always_discardable_char(ch) {
+ return true;
+ }
+ match mode {
+ DiscardNewline | CompressWhitespaceNewline => ch == '\n',
+ _ => false
+ }
+ }
+
+ fn is_always_discardable_char(_ch: char) -> bool {
+ // TODO: check for bidi control chars, soft hyphens.
+ false
+ }
+}
+
+pub fn float_to_fixed(before: int, f: f64) -> i32 {
+ (1i32 << before as uint) * (f as i32)
+}
+
+pub fn fixed_to_float(before: int, f: i32) -> f64 {
+ f as f64 * 1.0f64 / ((1i32 << before as uint) as f64)
+}
+
+pub fn fixed_to_rounded_int(before: int, f: i32) -> int {
+ let half = 1i32 << (before-1) as uint;
+ if f > 0i32 {
+ ((half + f) >> before as uint) as int
+ } else {
+ -((half - f) >> before as uint) as int
+ }
+}
+
+/* Generate a 32-bit TrueType tag from its 4 characters */
+pub fn true_type_tag(a: char, b: char, c: char, d: char) -> u32 {
+ let a = a as u32;
+ let b = b as u32;
+ let c = c as u32;
+ let d = d as u32;
+ (a << 24 | b << 16 | c << 8 | d) as u32
+}
+
+#[test]
+fn test_true_type_tag() {
+ assert_eq!(true_type_tag('c', 'm', 'a', 'p'), 0x_63_6D_61_70_u32);
+}
+
+#[test]
+fn test_transform_compress_none() {
+ let test_strs = vec!(
+ " foo bar",
+ "foo bar ",
+ "foo\n bar",
+ "foo \nbar",
+ " foo bar \nbaz",
+ "foo bar baz",
+ "foobarbaz\n\n"
+ );
+ let mode = CompressNone;
+
+ for test in test_strs.iter() {
+ let mut new_line_pos = vec!();
+ let (trimmed_str, _out) = transform_text(*test, mode, true, &mut new_line_pos);
+ assert_eq!(trimmed_str.as_slice(), *test)
+ }
+}
+
+#[test]
+fn test_transform_discard_newline() {
+ let test_strs = vec!(
+ " foo bar",
+ "foo bar ",
+ "foo\n bar",
+ "foo \nbar",
+ " foo bar \nbaz",
+ "foo bar baz",
+ "foobarbaz\n\n"
+ );
+
+ let oracle_strs = vec!(
+ " foo bar",
+ "foo bar ",
+ "foo bar",
+ "foo bar",
+ " foo bar baz",
+ "foo bar baz",
+ "foobarbaz"
+ );
+
+ assert_eq!(test_strs.len(), oracle_strs.len());
+ let mode = DiscardNewline;
+
+ for (test, oracle) in test_strs.iter().zip(oracle_strs.iter()) {
+ let mut new_line_pos = vec!();
+ let (trimmed_str, _out) = transform_text(*test, mode, true, &mut new_line_pos);
+ assert_eq!(trimmed_str.as_slice(), *oracle)
+ }
+}
+
+/* FIXME: Fix and re-enable
+#[test]
+fn test_transform_compress_whitespace() {
+ let test_strs : ~[String] = ~[" foo bar".to_string(),
+ "foo bar ".to_string(),
+ "foo\n bar".to_string(),
+ "foo \nbar".to_string(),
+ " foo bar \nbaz".to_string(),
+ "foo bar baz".to_string(),
+ "foobarbaz\n\n".to_string()];
+
+ let oracle_strs : ~[String] = ~[" foo bar".to_string(),
+ "foo bar ".to_string(),
+ "foo\n bar".to_string(),
+ "foo \nbar".to_string(),
+ " foo bar \nbaz".to_string(),
+ "foo bar baz".to_string(),
+ "foobarbaz\n\n".to_string()];
+
+ assert_eq!(test_strs.len(), oracle_strs.len());
+ let mode = CompressWhitespace;
+
+ for i in range(0, test_strs.len()) {
+ let mut new_line_pos = ~[];
+ let (trimmed_str, _out) = transform_text(test_strs[i], mode, true, &mut new_line_pos);
+ assert_eq!(&trimmed_str, &oracle_strs[i])
+ }
+}
+
+#[test]
+fn test_transform_compress_whitespace_newline() {
+ let test_strs : ~[String] = ~[" foo bar".to_string(),
+ "foo bar ".to_string(),
+ "foo\n bar".to_string(),
+ "foo \nbar".to_string(),
+ " foo bar \nbaz".to_string(),
+ "foo bar baz".to_string(),
+ "foobarbaz\n\n".to_string()];
+
+ let oracle_strs : ~[String] = ~["foo bar".to_string(),
+ "foo bar ".to_string(),
+ "foo bar".to_string(),
+ "foo bar".to_string(),
+ " foo bar baz".to_string(),
+ "foo bar baz".to_string(),
+ "foobarbaz ".to_string()];
+
+ assert_eq!(test_strs.len(), oracle_strs.len());
+ let mode = CompressWhitespaceNewline;
+
+ for i in range(0, test_strs.len()) {
+ let mut new_line_pos = ~[];
+ let (trimmed_str, _out) = transform_text(test_strs[i], mode, true, &mut new_line_pos);
+ assert_eq!(&trimmed_str, &oracle_strs[i])
+ }
+}
+*/
+
+#[test]
+fn test_transform_compress_whitespace_newline_no_incoming() {
+ let test_strs = vec!(
+ " foo bar",
+ "\nfoo bar",
+ "foo bar ",
+ "foo\n bar",
+ "foo \nbar",
+ " foo bar \nbaz",
+ "foo bar baz",
+ "foobarbaz\n\n"
+ );
+
+ let oracle_strs = vec!(
+ " foo bar",
+ " foo bar",
+ "foo bar ",
+ "foo bar",
+ "foo bar",
+ " foo bar baz",
+ "foo bar baz",
+ "foobarbaz "
+ );
+
+ assert_eq!(test_strs.len(), oracle_strs.len());
+ let mode = CompressWhitespaceNewline;
+
+ for (test, oracle) in test_strs.iter().zip(oracle_strs.iter()) {
+ let mut new_line_pos = vec!();
+ let (trimmed_str, _out) = transform_text(*test, mode, false, &mut new_line_pos);
+ assert_eq!(trimmed_str.as_slice(), *oracle)
+ }
+}