diff options
author | Emilio Cobos Álvarez <emilio@crisal.io> | 2018-11-13 12:47:40 +0000 |
---|---|---|
committer | Emilio Cobos Álvarez <emilio@crisal.io> | 2018-11-17 09:56:01 +0100 |
commit | f486ef7e47b7262a7ab0146f510aa7a3de602213 (patch) | |
tree | e7d6bb26bedbe2d8cd0faf5e08e5de5a18c3cbc8 /components/style/gecko_string_cache | |
parent | d8bd29292e32f8398d90ce942d23c363f9adef43 (diff) | |
download | servo-f486ef7e47b7262a7ab0146f510aa7a3de602213.tar.gz servo-f486ef7e47b7262a7ab0146f510aa7a3de602213.zip |
style: Add an atom bit to know whether we're ascii lowercase.
And thus massively speed up ascii-case-insensitive atom comparisons when both
atoms are lowercase (which is the common case by far).
This removes almost all the slow selector-matching in this page, and it seems
an easier fix than storing the lowercased version of all class-names in quirks
mode in elements and selectors...
Differential Revision: https://phabricator.services.mozilla.com/D10945
Diffstat (limited to 'components/style/gecko_string_cache')
-rw-r--r-- | components/style/gecko_string_cache/mod.rs | 76 |
1 files changed, 44 insertions, 32 deletions
diff --git a/components/style/gecko_string_cache/mod.rs b/components/style/gecko_string_cache/mod.rs index 6d9e5c60f80..ed0cc19dae2 100644 --- a/components/style/gecko_string_cache/mod.rs +++ b/components/style/gecko_string_cache/mod.rs @@ -175,13 +175,19 @@ impl WeakAtom { /// Returns whether this atom is static. #[inline] pub fn is_static(&self) -> bool { - unsafe { (*self.as_ptr()).mIsStatic() != 0 } + self.0.mIsStatic() != 0 + } + + /// Returns whether this atom is ascii lowercase. + #[inline] + fn is_ascii_lowercase(&self) -> bool { + self.0.mIsAsciiLowercase() != 0 } /// Returns the length of the atom string. #[inline] pub fn len(&self) -> u32 { - unsafe { (*self.as_ptr()).mLength() } + self.0.mLength() } /// Returns whether this atom is the empty string. @@ -199,41 +205,54 @@ impl WeakAtom { /// Convert this atom to ASCII lower-case pub fn to_ascii_lowercase(&self) -> Atom { + if self.is_ascii_lowercase() { + return self.clone(); + } + let slice = self.as_slice(); - match slice - .iter() - .position(|&char16| (b'A' as u16) <= char16 && char16 <= (b'Z' as u16)) - { - None => self.clone(), - Some(i) => { - let mut buffer: [u16; 64] = unsafe { mem::uninitialized() }; - let mut vec; - let mutable_slice = if let Some(buffer_prefix) = buffer.get_mut(..slice.len()) { - buffer_prefix.copy_from_slice(slice); - buffer_prefix - } else { - vec = slice.to_vec(); - &mut vec - }; - for char16 in &mut mutable_slice[i..] { - if *char16 <= 0x7F { - *char16 = (*char16 as u8).to_ascii_lowercase() as u16 - } - } - Atom::from(&*mutable_slice) - }, + let mut buffer: [u16; 64] = unsafe { mem::uninitialized() }; + let mut vec; + let mutable_slice = if let Some(buffer_prefix) = buffer.get_mut(..slice.len()) { + buffer_prefix.copy_from_slice(slice); + buffer_prefix + } else { + vec = slice.to_vec(); + &mut vec + }; + for char16 in &mut *mutable_slice { + if *char16 <= 0x7F { + *char16 = (*char16 as u8).to_ascii_lowercase() as u16 + } } + Atom::from(&*mutable_slice) } /// Return whether two atoms are ASCII-case-insensitive matches + #[inline] pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool { if self == other { return true; } + // If we know both atoms are ascii-lowercase, then we can stick with + // pointer equality. + if self.is_ascii_lowercase() && other.is_ascii_lowercase() { + debug_assert!(!self.eq_ignore_ascii_case_slow(other)); + return false; + } + + self.eq_ignore_ascii_case_slow(other) + } + + fn eq_ignore_ascii_case_slow(&self, other: &Self) -> bool { let a = self.as_slice(); let b = other.as_slice(); - a.len() == b.len() && a.iter().zip(b).all(|(&a16, &b16)| { + + if a.len() != b.len() { + return false; + } + + a.iter().zip(b).all(|(&a16, &b16)| { if a16 <= 0x7F && b16 <= 0x7F { (a16 as u8).eq_ignore_ascii_case(&(b16 as u8)) } else { @@ -241,13 +260,6 @@ impl WeakAtom { } }) } - - /// Return whether this atom is an ASCII-case-insensitive match for the given string - pub fn eq_str_ignore_ascii_case(&self, other: &str) -> bool { - self.chars() - .map(|r| r.map(|c: char| c.to_ascii_lowercase())) - .eq(other.chars().map(|c: char| Ok(c.to_ascii_lowercase()))) - } } impl fmt::Debug for WeakAtom { |