diff options
author | bors-servo <metajack+bors@gmail.com> | 2015-08-28 05:16:03 -0600 |
---|---|---|
committer | bors-servo <metajack+bors@gmail.com> | 2015-08-28 05:16:03 -0600 |
commit | 2ca48ca4047e83e69abf1fad6978de46ef11c3a7 (patch) | |
tree | 11f8f3097a3593dbbd22b96e8b78d14325b58442 /components/script/dom/characterdata.rs | |
parent | 18de1f2357144d86ea83cd0cb66922e8a2157597 (diff) | |
parent | dcc8f63d52e9b1a91fda9af50b43533eb12e9568 (diff) | |
download | servo-2ca48ca4047e83e69abf1fad6978de46ef11c3a7.tar.gz servo-2ca48ca4047e83e69abf1fad6978de46ef11c3a7.zip |
Auto merge of #6854 - servo:slice_chars, r=jdm+Ms2ger
Remove usage of slice_chars in script
It’s deprecated in the #6850 rustup.
The first commit changes some behavior which was previously incorrect: the spec says indices in DOM strings are UTF-16 code units, not `char` code points.
The second commit should not change behavior, unless I made a mistake.
r? @jdm
<!-- Reviewable:start -->
[<img src="https://reviewable.io/review_button.png" height=40 alt="Review on Reviewable"/>](https://reviewable.io/reviews/servo/servo/6854)
<!-- Reviewable:end -->
Diffstat (limited to 'components/script/dom/characterdata.rs')
-rw-r--r-- | components/script/dom/characterdata.rs | 93 |
1 files changed, 65 insertions, 28 deletions
diff --git a/components/script/dom/characterdata.rs b/components/script/dom/characterdata.rs index 47427eb4444..d9c558ee2e8 100644 --- a/components/script/dom/characterdata.rs +++ b/components/script/dom/characterdata.rs @@ -17,7 +17,7 @@ use dom::element::Element; use dom::eventtarget::{EventTarget, EventTargetTypeId}; use dom::node::{Node, NodeTypeId}; -use util::str::{DOMString, slice_chars}; +use util::str::DOMString; use std::borrow::ToOwned; use std::cell::Ref; @@ -60,21 +60,25 @@ impl CharacterDataMethods for CharacterData { // https://dom.spec.whatwg.org/#dom-characterdata-length fn Length(&self) -> u32 { - self.data.borrow().chars().count() as u32 + self.data.borrow().chars().map(|c| c.len_utf16()).sum::<usize>() as u32 } - // https://dom.spec.whatwg.org/#dom-characterdata-substringdataoffset-count + // https://dom.spec.whatwg.org/#dom-characterdata-substringdata fn SubstringData(&self, offset: u32, count: u32) -> Fallible<DOMString> { let data = self.data.borrow(); // Step 1. - let length = data.chars().count() as u32; - if offset > length { + let data_from_offset = match find_utf16_code_unit_offset(&data, offset) { + Some(offset_bytes) => &data[offset_bytes..], // Step 2. - return Err(IndexSize); - } - // Steps 3-4. - let end = if length - offset < count { length } else { offset + count }; - Ok(slice_chars(&*data, offset as usize, end as usize).to_owned()) + None => return Err(IndexSize) + }; + let substring = match find_utf16_code_unit_offset(data_from_offset, count) { + // Steps 3. + None => data_from_offset, + // Steps 4. + Some(count_bytes) => &data_from_offset[..count_bytes], + }; + Ok(substring.to_owned()) } // https://dom.spec.whatwg.org/#dom-characterdata-appenddatadata @@ -92,26 +96,30 @@ impl CharacterDataMethods for CharacterData { self.ReplaceData(offset, count, "".to_owned()) } - // https://dom.spec.whatwg.org/#dom-characterdata-replacedataoffset-count-data + // https://dom.spec.whatwg.org/#dom-characterdata-replacedata fn ReplaceData(&self, offset: u32, count: u32, arg: DOMString) -> ErrorResult { - // Step 1. - let length = self.data.borrow().chars().count() as u32; - if offset > length { - // Step 2. - return Err(IndexSize); - } - // Step 3. - let count = match length - offset { - diff if diff < count => diff, - _ => count, + let new_data = { + let data = self.data.borrow(); + let (prefix, data_from_offset) = match find_utf16_code_unit_offset(&data, offset) { + Some(offset_bytes) => data.split_at(offset_bytes), + // Step 2. + None => return Err(IndexSize) + }; + let suffix = match find_utf16_code_unit_offset(data_from_offset, count) { + // Steps 3. + None => "", + Some(count_bytes) => &data_from_offset[count_bytes..], + }; + // Step 4: Mutation observers. + // Step 5 to 7. + let mut new_data = String::with_capacity(prefix.len() + arg.len() + suffix.len()); + new_data.push_str(prefix); + new_data.push_str(&arg); + new_data.push_str(suffix); + new_data }; - // Step 4: Mutation observers. - // Step 5. - let mut data = slice_chars(&*self.data.borrow(), 0, offset as usize).to_owned(); - data.push_str(&arg); - data.push_str(slice_chars(&*self.data.borrow(), (offset + count) as usize, length as usize)); - *self.data.borrow_mut() = data; - // FIXME: Once we have `Range`, we should implement step7 to step11 + *self.data.borrow_mut() = new_data; + // FIXME: Once we have `Range`, we should implement step 8 to step 11 Ok(()) } @@ -181,3 +189,32 @@ impl LayoutCharacterDataHelpers for LayoutJS<CharacterData> { &(*self.unsafe_get()).data.borrow_for_layout() } } + +/// Given a number of UTF-16 code units from the start of the given string, +/// return the corresponding number of UTF-8 bytes. +/// +/// s[find_utf16_code_unit_offset(s, o).unwrap()..] == s.to_utf16()[o..].to_utf8() +fn find_utf16_code_unit_offset(s: &str, offset: u32) -> Option<usize> { + let mut code_units = 0; + for (i, c) in s.char_indices() { + if code_units == offset { + return Some(i) + } + code_units += 1; + if c > '\u{FFFF}' { + if code_units == offset { + panic!("\n\n\ + Would split a surrogate pair in CharacterData API.\n\ + If you see this in real content, please comment with the URL\n\ + on https://github.com/servo/servo/issues/6873\n\ + \n"); + } + code_units += 1; + } + } + if code_units == offset { + Some(s.len()) + } else { + None + } +} |