diff options
author | bors-servo <lbergstrom+bors@mozilla.com> | 2016-04-28 20:22:09 -0700 |
---|---|---|
committer | bors-servo <lbergstrom+bors@mozilla.com> | 2016-04-28 20:22:09 -0700 |
commit | cf121ad8dff90b8fa55558ca9bdcbfe29512a617 (patch) | |
tree | 8f8ff7f5e68b18eb228acbc1448a7c4f451e1e69 /components/util | |
parent | 1177ef5869e02b5129ebde6fa9780c93d362e16c (diff) | |
parent | c4872d95445636ef4dec45cbfc5c2d643c4b9441 (diff) | |
download | servo-cf121ad8dff90b8fa55558ca9bdcbfe29512a617.tar.gz servo-cf121ad8dff90b8fa55558ca9bdcbfe29512a617.zip |
Auto merge of #10895 - mbrubeck:byteindex, r=pcwalton
Use byte indices instead of char indices for text runs
Replace character indices with UTF-8 byte offsets throughout all code dealing with text runs. This eliminates a lot of complexity when converting from one to the other, and interoperates better with the rest of the Rust ecosystem.
For most code this is just a simple replacement of char indices with byte indices. In a few places like glyph storage and text fragment scanning, it also lets us get rid of code that existed only to map between bytes and chars.
Also includes some related fixes to text shaping, discovered while working on this conversion. See the commit messages for details.
r? @pcwalton
<!-- Reviewable:start -->
---
This change is [<img src="https://reviewable.io/review_button.svg" height="35" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/servo/10895)
<!-- Reviewable:end -->
Diffstat (limited to 'components/util')
-rw-r--r-- | components/util/str.rs | 39 |
1 files changed, 1 insertions, 38 deletions
diff --git a/components/util/str.rs b/components/util/str.rs index 8d5a5e74bef..997aee8f53c 100644 --- a/components/util/str.rs +++ b/components/util/str.rs @@ -11,7 +11,7 @@ use std::ffi::CStr; use std::fmt; use std::iter::{Filter, Peekable}; use std::ops::{Deref, DerefMut}; -use std::str::{Bytes, CharIndices, Split, from_utf8}; +use std::str::{Bytes, Split, from_utf8}; use string_cache::Atom; #[derive(Clone, Debug, Deserialize, Eq, Hash, HeapSizeOf, Ord, PartialEq, PartialOrd, Serialize)] @@ -271,40 +271,3 @@ pub fn str_join<I, T>(strs: I, join: &str) -> String acc }) } - -// Lifted from Rust's StrExt implementation, which is being removed. -pub fn slice_chars(s: &str, begin: usize, end: usize) -> &str { - assert!(begin <= end); - let mut count = 0; - let mut begin_byte = None; - let mut end_byte = None; - - // This could be even more efficient by not decoding, - // only finding the char boundaries - for (idx, _) in s.char_indices() { - if count == begin { begin_byte = Some(idx); } - if count == end { end_byte = Some(idx); break; } - count += 1; - } - if begin_byte.is_none() && count == begin { begin_byte = Some(s.len()) } - if end_byte.is_none() && count == end { end_byte = Some(s.len()) } - - match (begin_byte, end_byte) { - (None, _) => panic!("slice_chars: `begin` is beyond end of string"), - (_, None) => panic!("slice_chars: `end` is beyond end of string"), - (Some(a), Some(b)) => unsafe { s.slice_unchecked(a, b) } - } -} - -// searches a character index in CharIndices -// returns indices.count if not found -pub fn search_index(index: usize, indices: CharIndices) -> isize { - let mut character_count = 0; - for (character_index, _) in indices { - if character_index == index { - return character_count; - } - character_count += 1 - } - character_count -} |