aboutsummaryrefslogtreecommitdiffstats
path: root/components/script/dom/characterdata.rs
diff options
context:
space:
mode:
authorbors-servo <metajack+bors@gmail.com>2015-08-28 05:16:03 -0600
committerbors-servo <metajack+bors@gmail.com>2015-08-28 05:16:03 -0600
commit2ca48ca4047e83e69abf1fad6978de46ef11c3a7 (patch)
tree11f8f3097a3593dbbd22b96e8b78d14325b58442 /components/script/dom/characterdata.rs
parent18de1f2357144d86ea83cd0cb66922e8a2157597 (diff)
parentdcc8f63d52e9b1a91fda9af50b43533eb12e9568 (diff)
downloadservo-2ca48ca4047e83e69abf1fad6978de46ef11c3a7.tar.gz
servo-2ca48ca4047e83e69abf1fad6978de46ef11c3a7.zip
Auto merge of #6854 - servo:slice_chars, r=jdm+Ms2ger
Remove usage of slice_chars in script It’s deprecated in the #6850 rustup. The first commit changes some behavior which was previously incorrect: the spec says indices in DOM strings are UTF-16 code units, not `char` code points. The second commit should not change behavior, unless I made a mistake. r? @jdm <!-- Reviewable:start --> [<img src="https://reviewable.io/review_button.png" height=40 alt="Review on Reviewable"/>](https://reviewable.io/reviews/servo/servo/6854) <!-- Reviewable:end -->
Diffstat (limited to 'components/script/dom/characterdata.rs')
-rw-r--r--components/script/dom/characterdata.rs93
1 files changed, 65 insertions, 28 deletions
diff --git a/components/script/dom/characterdata.rs b/components/script/dom/characterdata.rs
index 47427eb4444..d9c558ee2e8 100644
--- a/components/script/dom/characterdata.rs
+++ b/components/script/dom/characterdata.rs
@@ -17,7 +17,7 @@ use dom::element::Element;
use dom::eventtarget::{EventTarget, EventTargetTypeId};
use dom::node::{Node, NodeTypeId};
-use util::str::{DOMString, slice_chars};
+use util::str::DOMString;
use std::borrow::ToOwned;
use std::cell::Ref;
@@ -60,21 +60,25 @@ impl CharacterDataMethods for CharacterData {
// https://dom.spec.whatwg.org/#dom-characterdata-length
fn Length(&self) -> u32 {
- self.data.borrow().chars().count() as u32
+ self.data.borrow().chars().map(|c| c.len_utf16()).sum::<usize>() as u32
}
- // https://dom.spec.whatwg.org/#dom-characterdata-substringdataoffset-count
+ // https://dom.spec.whatwg.org/#dom-characterdata-substringdata
fn SubstringData(&self, offset: u32, count: u32) -> Fallible<DOMString> {
let data = self.data.borrow();
// Step 1.
- let length = data.chars().count() as u32;
- if offset > length {
+ let data_from_offset = match find_utf16_code_unit_offset(&data, offset) {
+ Some(offset_bytes) => &data[offset_bytes..],
// Step 2.
- return Err(IndexSize);
- }
- // Steps 3-4.
- let end = if length - offset < count { length } else { offset + count };
- Ok(slice_chars(&*data, offset as usize, end as usize).to_owned())
+ None => return Err(IndexSize)
+ };
+ let substring = match find_utf16_code_unit_offset(data_from_offset, count) {
+ // Steps 3.
+ None => data_from_offset,
+ // Steps 4.
+ Some(count_bytes) => &data_from_offset[..count_bytes],
+ };
+ Ok(substring.to_owned())
}
// https://dom.spec.whatwg.org/#dom-characterdata-appenddatadata
@@ -92,26 +96,30 @@ impl CharacterDataMethods for CharacterData {
self.ReplaceData(offset, count, "".to_owned())
}
- // https://dom.spec.whatwg.org/#dom-characterdata-replacedataoffset-count-data
+ // https://dom.spec.whatwg.org/#dom-characterdata-replacedata
fn ReplaceData(&self, offset: u32, count: u32, arg: DOMString) -> ErrorResult {
- // Step 1.
- let length = self.data.borrow().chars().count() as u32;
- if offset > length {
- // Step 2.
- return Err(IndexSize);
- }
- // Step 3.
- let count = match length - offset {
- diff if diff < count => diff,
- _ => count,
+ let new_data = {
+ let data = self.data.borrow();
+ let (prefix, data_from_offset) = match find_utf16_code_unit_offset(&data, offset) {
+ Some(offset_bytes) => data.split_at(offset_bytes),
+ // Step 2.
+ None => return Err(IndexSize)
+ };
+ let suffix = match find_utf16_code_unit_offset(data_from_offset, count) {
+ // Steps 3.
+ None => "",
+ Some(count_bytes) => &data_from_offset[count_bytes..],
+ };
+ // Step 4: Mutation observers.
+ // Step 5 to 7.
+ let mut new_data = String::with_capacity(prefix.len() + arg.len() + suffix.len());
+ new_data.push_str(prefix);
+ new_data.push_str(&arg);
+ new_data.push_str(suffix);
+ new_data
};
- // Step 4: Mutation observers.
- // Step 5.
- let mut data = slice_chars(&*self.data.borrow(), 0, offset as usize).to_owned();
- data.push_str(&arg);
- data.push_str(slice_chars(&*self.data.borrow(), (offset + count) as usize, length as usize));
- *self.data.borrow_mut() = data;
- // FIXME: Once we have `Range`, we should implement step7 to step11
+ *self.data.borrow_mut() = new_data;
+ // FIXME: Once we have `Range`, we should implement step 8 to step 11
Ok(())
}
@@ -181,3 +189,32 @@ impl LayoutCharacterDataHelpers for LayoutJS<CharacterData> {
&(*self.unsafe_get()).data.borrow_for_layout()
}
}
+
+/// Given a number of UTF-16 code units from the start of the given string,
+/// return the corresponding number of UTF-8 bytes.
+///
+/// s[find_utf16_code_unit_offset(s, o).unwrap()..] == s.to_utf16()[o..].to_utf8()
+fn find_utf16_code_unit_offset(s: &str, offset: u32) -> Option<usize> {
+ let mut code_units = 0;
+ for (i, c) in s.char_indices() {
+ if code_units == offset {
+ return Some(i)
+ }
+ code_units += 1;
+ if c > '\u{FFFF}' {
+ if code_units == offset {
+ panic!("\n\n\
+ Would split a surrogate pair in CharacterData API.\n\
+ If you see this in real content, please comment with the URL\n\
+ on https://github.com/servo/servo/issues/6873\n\
+ \n");
+ }
+ code_units += 1;
+ }
+ }
+ if code_units == offset {
+ Some(s.len())
+ } else {
+ None
+ }
+}