diff options
-rw-r--r-- | components/selectors/builder.rs | 212 | ||||
-rw-r--r-- | components/selectors/matching.rs | 35 | ||||
-rw-r--r-- | components/selectors/parser.rs | 112 |
3 files changed, 281 insertions, 78 deletions
diff --git a/components/selectors/builder.rs b/components/selectors/builder.rs index 9fd751b9bb8..6e626239aea 100644 --- a/components/selectors/builder.rs +++ b/components/selectors/builder.rs @@ -2,15 +2,208 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -use parser::{Component, SelectorImpl, SelectorIter}; +//! Helper module to build up a selector safely and efficiently. +//! +//! Our selector representation is designed to optimize matching, and has +//! several requirements: +//! * All simple selectors and combinators are stored inline in the same buffer +//! as Component instances. +//! * We store the top-level compound selectors from right to left, i.e. in +//! matching order. +//! * We store the simple selectors for each combinator from left to right, so +//! that we match the cheaper simple selectors first. +//! +//! Meeting all these constraints without extra memmove traffic during parsing +//! is non-trivial. This module encapsulates those details and presents an +//! easy-to-use API for the parser. + +use parser::{Combinator, Component, SelectorImpl}; +use servo_arc::{Arc, HeaderWithLength, ThinArc}; +use sink::Push; +use smallvec::SmallVec; use std::cmp; use std::ops::Add; +use std::ptr; +use std::slice; -#[derive(Copy, Clone, Debug, Eq, PartialEq)] -pub struct SpecificityAndFlags(pub u32); +/// Top-level SelectorBuilder struct. This should be stack-allocated by the +/// consumer and never moved (because it contains a lot of inline data that +/// would be slow to memmov). +/// +/// After instantation, callers may call the push_simple_selector() and +/// push_combinator() methods to append selector data as it is encountered +/// (from left to right). Once the process is complete, callers should invoke +/// build(), which transforms the contents of the SelectorBuilder into a heap- +/// allocated Selector and leaves the builder in a drained state. +pub struct SelectorBuilder<Impl: SelectorImpl> { + /// The entire sequence of simple selectors, from left to right, without combinators. + /// + /// We make this large because the result of parsing a selector is fed into a new + /// Arc-ed allocation, so any spilled vec would be a wasted allocation. Also, + /// Components are large enough that we don't have much cache locality benefit + /// from reserving stack space for fewer of them. + simple_selectors: SmallVec<[Component<Impl>; 32]>, + /// The combinators, and the length of the compound selector to their left. + combinators: SmallVec<[(Combinator, usize); 16]>, + /// The length of the current compount selector. + current_len: usize, +} + +impl<Impl: SelectorImpl> Default for SelectorBuilder<Impl> { + #[inline(always)] + fn default() -> Self { + SelectorBuilder { + simple_selectors: SmallVec::new(), + combinators: SmallVec::new(), + current_len: 0, + } + } +} + +impl<Impl: SelectorImpl> Push<Component<Impl>> for SelectorBuilder<Impl> { + fn push(&mut self, value: Component<Impl>) { + self.push_simple_selector(value); + } +} + +impl<Impl: SelectorImpl> SelectorBuilder<Impl> { + /// Pushes a simple selector onto the current compound selector. + #[inline(always)] + pub fn push_simple_selector(&mut self, ss: Component<Impl>) { + debug_assert!(!ss.is_combinator()); + self.simple_selectors.push(ss); + self.current_len += 1; + } + + /// Completes the current compound selector and starts a new one, delimited + /// by the given combinator. + #[inline(always)] + pub fn push_combinator(&mut self, c: Combinator) { + self.combinators.push((c, self.current_len)); + self.current_len = 0; + } + + /// Returns true if no simple selectors have ever been pushed to this builder. + #[inline(always)] + pub fn is_empty(&self) -> bool { + self.simple_selectors.is_empty() + } + + /// Consumes the builder, producing a Selector. + #[inline(always)] + pub fn build(&mut self, parsed_pseudo: bool) -> ThinArc<SpecificityAndFlags, Component<Impl>> { + // Compute the specificity and flags. + let mut spec = SpecificityAndFlags(specificity(self.simple_selectors.iter())); + if parsed_pseudo { + spec.0 |= HAS_PSEUDO_BIT; + } + + self.build_with_specificity_and_flags(spec) + } + + + /// Builds with an explicit SpecificityAndFlags. This is separated from build() so + /// that unit tests can pass an explicit specificity. + #[inline(always)] + pub fn build_with_specificity_and_flags(&mut self, spec: SpecificityAndFlags) + -> ThinArc<SpecificityAndFlags, Component<Impl>> { + // First, compute the total number of Components we'll need to allocate + // space for. + let full_len = self.simple_selectors.len() + self.combinators.len(); + + // Create the header. + let header = HeaderWithLength::new(spec, full_len); + + // Create the Arc using an iterator that drains our buffers. + let iter = SelectorBuilderIter::new(self, full_len); + Arc::into_thin(Arc::from_header_and_iter(header, iter)) + } +} + +struct SelectorBuilderIter<'a, Impl: SelectorImpl> { + builder: &'a mut SelectorBuilder<Impl>, + end: *const Component<Impl>, + base: *const Component<Impl>, + ptr: *const Component<Impl>, + full_len: usize, +} + +impl<'a, Impl: SelectorImpl> SelectorBuilderIter<'a, Impl> { + fn new(builder: &'a mut SelectorBuilder<Impl>, full_len: usize) -> Self { + // Store a pointer to the end of the array of simple selectors, + // and set ourselves up to iterate the rightmost compound selector. + let sequence_base = &*builder.simple_selectors as *const [Component<Impl>] as *const Component<Impl>; + let end = unsafe { sequence_base.offset(builder.simple_selectors.len() as isize) }; + let base = unsafe { end.offset(-(builder.current_len as isize)) }; + let ptr = base; + + // Next, tell the SmallVec to forget about its entries so that they + // won't be dropped when it frees its buffer. We're transferring + // ownership into the selector. + unsafe { builder.simple_selectors.set_len(0); } + + SelectorBuilderIter { + builder: builder, + end: end, + base: base, + ptr: ptr, + full_len: full_len, + } + } +} + +impl<'a, Impl: SelectorImpl> ExactSizeIterator for SelectorBuilderIter<'a, Impl> { + fn len(&self) -> usize { + self.full_len + } +} + +impl<'a, Impl: SelectorImpl> Iterator for SelectorBuilderIter<'a, Impl> { + type Item = Component<Impl>; + #[inline(always)] + fn next(&mut self) -> Option<Self::Item> { + // If ptr is below end, continue walking this compound selector. + if self.ptr != self.end { + debug_assert!(self.ptr < self.end); + let result = unsafe { Some(ptr::read(self.ptr)) }; + self.ptr = unsafe { self.ptr.offset(1) }; + return result; + } + + if let Some((combinator, len)) = self.builder.combinators.pop() { + // There's another compound selector. Reset the pointers to iterate it, + // and then return the combinator. + self.end = self.base; + self.base = unsafe { self.end.offset(-(len as isize)) }; + self.ptr = self.base; + Some(Component::Combinator(combinator)) + } else { + // All done. + None + } + } + + fn size_hint(&self) -> (usize, Option<usize>) { + (self.len(), Some(self.len())) + } +} + +#[cfg(debug_assertions)] +impl<'a, Impl: SelectorImpl> Drop for SelectorBuilderIter<'a, Impl> { + fn drop(&mut self) { + // Failing to iterate the entire builder would cause us to leak (but not + // crash). Verify that this doesn't happen. + debug_assert!(self.builder.simple_selectors.len() == 0); + debug_assert!(self.builder.combinators.len() == 0); + debug_assert!(self.ptr == self.end); + } +} pub const HAS_PSEUDO_BIT: u32 = 1 << 30; +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub struct SpecificityAndFlags(pub u32); + impl SpecificityAndFlags { pub fn specificity(&self) -> u32 { self.0 & !HAS_PSEUDO_BIT @@ -73,13 +266,13 @@ impl From<Specificity> for u32 { } } -pub fn specificity<Impl>(iter: SelectorIter<Impl>) -> u32 +fn specificity<Impl>(iter: slice::Iter<Component<Impl>>) -> u32 where Impl: SelectorImpl { complex_selector_specificity(iter).into() } -fn complex_selector_specificity<Impl>(mut iter: SelectorIter<Impl>) +fn complex_selector_specificity<Impl>(mut iter: slice::Iter<Component<Impl>>) -> Specificity where Impl: SelectorImpl { @@ -129,13 +322,8 @@ fn complex_selector_specificity<Impl>(mut iter: SelectorIter<Impl>) } let mut specificity = Default::default(); - loop { - for simple_selector in &mut iter { - simple_selector_specificity(&simple_selector, &mut specificity); - } - if iter.next_sequence().is_none() { - break; - } + for simple_selector in &mut iter { + simple_selector_specificity(&simple_selector, &mut specificity); } specificity } diff --git a/components/selectors/matching.rs b/components/selectors/matching.rs index 135f7b2d24a..73dd6c9a945 100644 --- a/components/selectors/matching.rs +++ b/components/selectors/matching.rs @@ -460,24 +460,27 @@ pub fn matches_complex_selector<E, F>(mut iter: SelectorIter<E::Impl>, } } + // If this is the special pseudo-element mode, consume the ::pseudo-element + // before proceeding, since the caller has already handled that part. if context.shared.matching_mode == MatchingMode::ForStatelessPseudoElement { - match *iter.next().unwrap() { - // Stateful pseudo, just don't match. - Component::NonTSPseudoClass(..) => return false, - Component::PseudoElement(..) => { - // Pseudo, just eat the whole sequence. - let next = iter.next(); - debug_assert!(next.is_none(), - "Someone messed up pseudo-element parsing?"); - - if iter.next_sequence().is_none() { - return true; - } - // Inform the context that the we've advanced to the next compound selector. - context.note_next_sequence(&mut iter); - } - _ => panic!("Used MatchingMode::ForStatelessPseudoElement in a non-pseudo selector"), + // Consume the pseudo. + let pseudo = iter.next().unwrap(); + debug_assert!(matches!(*pseudo, Component::PseudoElement(..)), + "Used MatchingMode::ForStatelessPseudoElement in a non-pseudo selector"); + + // The only other parser-allowed Component in this sequence is a state + // class. We just don't match in that case. + if let Some(s) = iter.next() { + debug_assert!(matches!(*s, Component::NonTSPseudoClass(..)), + "Someone messed up pseudo-element parsing"); + return false; + } + + // Advance to the non-pseudo-element part of the selector, and inform the context. + if iter.next_sequence().is_none() { + return true; } + context.note_next_sequence(&mut iter); } match matches_complex_selector_internal(iter, diff --git a/components/selectors/parser.rs b/components/selectors/parser.rs index f61a42c51d8..c73ac10f442 100644 --- a/components/selectors/parser.rs +++ b/components/selectors/parser.rs @@ -5,12 +5,11 @@ use attr::{AttrSelectorWithNamespace, ParsedAttrSelectorOperation, AttrSelectorOperator}; use attr::{ParsedCaseSensitivity, SELECTOR_WHITESPACE, NamespaceConstraint}; use bloom::BLOOM_HASH_MASK; -use builder::{HAS_PSEUDO_BIT, SpecificityAndFlags}; -use builder::specificity; +use builder::{SelectorBuilder, SpecificityAndFlags}; use cssparser::{ParseError, BasicParseError, CompactCowStr}; use cssparser::{Token, Parser as CssParser, parse_nth, ToCss, serialize_identifier, CssStringWriter}; use precomputed_hash::PrecomputedHash; -use servo_arc::{Arc, HeaderWithLength, ThinArc}; +use servo_arc::ThinArc; use sink::Push; use smallvec::SmallVec; use std::ascii::AsciiExt; @@ -374,6 +373,14 @@ pub fn namespace_empty_string<Impl: SelectorImpl>() -> Impl::NamespaceUrl { /// callers want the higher-level iterator. /// /// We store compound selectors internally right-to-left (in matching order). +/// Additionally, we invert the order of top-level compound selectors so that +/// each one matches left-to-right. This is because matching namespace, local name, +/// id, and class are all relatively cheap, whereas matching pseudo-classes might +/// be expensive (depending on the pseudo-class). Since authors tend to put the +/// pseudo-classes on the right, it's faster to start matching on the left. +/// +/// This reordering doesn't change the semantics of selector matching, and we +/// handle it in to_css to make it invisible to serialization. #[derive(Clone, Eq, PartialEq)] pub struct Selector<Impl: SelectorImpl>(ThinArc<SpecificityAndFlags, Component<Impl>>); @@ -454,12 +461,6 @@ impl<Impl: SelectorImpl> Selector<Impl> { self.0.slice.iter() } - /// Returns an iterator over the entire sequence of simple selectors and - /// combinators, in parse order (from left to right). - pub fn iter_raw_parse_order(&self) -> Rev<slice::Iter<Component<Impl>>> { - self.0.slice.iter().rev() - } - /// Returns an iterator over the sequence of simple selectors and /// combinators, in parse order (from left to right), _starting_ /// 'offset_from_right' entries from the past-the-end sentinel on @@ -472,11 +473,18 @@ impl<Impl: SelectorImpl> Selector<Impl> { self.0.slice[..offset_from_right].iter().rev() } - /// Creates a Selector from a vec of Components. Used in tests. - pub fn from_vec(mut vec: Vec<Component<Impl>>, specificity_and_flags: u32) -> Self { - vec.reverse(); // FIXME(bholley): This goes away in the next patch. - let header = HeaderWithLength::new(SpecificityAndFlags(specificity_and_flags), vec.len()); - Selector(Arc::into_thin(Arc::from_header_and_iter(header, vec.into_iter()))) + /// Creates a Selector from a vec of Components, specified in parse order. Used in tests. + pub fn from_vec(vec: Vec<Component<Impl>>, specificity_and_flags: u32) -> Self { + let mut builder = SelectorBuilder::default(); + for component in vec.into_iter() { + if let Some(combinator) = component.as_combinator() { + builder.push_combinator(combinator); + } else { + builder.push_simple_selector(component); + } + } + let spec = SpecificityAndFlags(specificity_and_flags); + Selector(builder.build_with_specificity_and_flags(spec)) } /// Returns count of simple selectors and combinators in the Selector. @@ -751,9 +759,31 @@ impl<Impl: SelectorImpl> ToCss for SelectorList<Impl> { impl<Impl: SelectorImpl> ToCss for Selector<Impl> { fn to_css<W>(&self, dest: &mut W) -> fmt::Result where W: fmt::Write { - for item in self.iter_raw_parse_order() { - item.to_css(dest)?; - } + // Compound selectors invert the order of their contents, so we need to + // undo that during serialization. + // + // This two-iterator strategy involves walking over the selector twice. + // We could do something more clever, but selector serialization probably + // isn't hot enough to justify it, and the stringification likely + // dominates anyway. + // + // NB: A parse-order iterator is a Rev<>, which doesn't expose as_slice(), + // which we need for |split|. So we split by combinators on a match-order + // sequence and then reverse. + let mut combinators = self.iter_raw_match_order().rev().filter(|x| x.is_combinator()); + let compound_selectors = self.iter_raw_match_order().as_slice().split(|x| x.is_combinator()).rev(); + + let mut combinators_exhausted = false; + for compound in compound_selectors { + debug_assert!(!combinators_exhausted); + for item in compound.iter() { + item.to_css(dest)?; + } + match combinators.next() { + Some(c) => c.to_css(dest)?, + None => combinators_exhausted = true, + }; + } Ok(()) } @@ -905,12 +935,6 @@ fn display_to_css_identifier<T: Display, W: fmt::Write>(x: &T, dest: &mut W) -> serialize_identifier(&string, dest) } -/// We make this large because the result of parsing a selector is fed into a new -/// Arc-ed allocation, so any spilled vec would be a wasted allocation. Also, -/// Components are large enough that we don't have much cache locality benefit -/// from reserving stack space for fewer of them. -type ParseVec<Impl> = SmallVec<[Component<Impl>; 32]>; - /// Build up a Selector. /// selector : simple_selector_sequence [ combinator simple_selector_sequence ]* ; /// @@ -921,12 +945,12 @@ fn parse_selector<'i, 't, P, E, Impl>( -> Result<Selector<Impl>, ParseError<'i, SelectorParseError<'i, E>>> where P: Parser<'i, Impl=Impl, Error=E>, Impl: SelectorImpl { - let mut sequence = ParseVec::new(); + let mut builder = SelectorBuilder::default(); + let mut parsed_pseudo_element; 'outer_loop: loop { // Parse a sequence of simple selectors. - parsed_pseudo_element = - parse_compound_selector(parser, input, &mut sequence)?; + parsed_pseudo_element = parse_compound_selector(parser, input, &mut builder)?; if parsed_pseudo_element { break; } @@ -962,23 +986,10 @@ fn parse_selector<'i, 't, P, E, Impl>( } } } - sequence.push(Component::Combinator(combinator)); + builder.push_combinator(combinator); } - let mut spec = SpecificityAndFlags(specificity(SelectorIter { - iter: sequence.iter(), - next_combinator: None, - })); - if parsed_pseudo_element { - spec.0 |= HAS_PSEUDO_BIT; - } - - // FIXME(bholley): This is just temporary to maintain the semantics of this patch. - sequence.reverse(); - - let header = HeaderWithLength::new(spec, sequence.len()); - let complex = Selector(Arc::into_thin(Arc::from_header_and_iter(header, sequence.into_iter()))); - Ok(complex) + Ok(Selector(builder.build(parsed_pseudo_element))) } impl<Impl: SelectorImpl> Selector<Impl> { @@ -1342,7 +1353,7 @@ fn parse_negation<'i, 't, P, E, Impl>(parser: &P, fn parse_compound_selector<'i, 't, P, E, Impl>( parser: &P, input: &mut CssParser<'i, 't>, - mut sequence: &mut ParseVec<Impl>) + mut builder: &mut SelectorBuilder<Impl>) -> Result<bool, ParseError<'i, SelectorParseError<'i, E>>> where P: Parser<'i, Impl=Impl, Error=E>, Impl: SelectorImpl { @@ -1355,12 +1366,12 @@ fn parse_compound_selector<'i, 't, P, E, Impl>( } } let mut empty = true; - if !parse_type_selector(parser, input, sequence)? { + if !parse_type_selector(parser, input, builder)? { if let Some(url) = parser.default_namespace() { // If there was no explicit type selector, but there is a // default namespace, there is an implicit "<defaultns>|*" type // selector. - sequence.push(Component::DefaultNamespace(url)) + builder.push_simple_selector(Component::DefaultNamespace(url)) } } else { empty = false; @@ -1371,7 +1382,7 @@ fn parse_compound_selector<'i, 't, P, E, Impl>( match parse_one_simple_selector(parser, input, /* inside_negation = */ false)? { None => break, Some(SimpleSelectorParseResult::SimpleSelector(s)) => { - sequence.push(s); + builder.push_simple_selector(s); empty = false } Some(SimpleSelectorParseResult::PseudoElement(p)) => { @@ -1401,13 +1412,13 @@ fn parse_compound_selector<'i, 't, P, E, Impl>( state_selectors.push(Component::NonTSPseudoClass(pseudo_class)); } - if !sequence.is_empty() { - sequence.push(Component::Combinator(Combinator::PseudoElement)); + if !builder.is_empty() { + builder.push_combinator(Combinator::PseudoElement); } - sequence.push(Component::PseudoElement(p)); - for state_selector in state_selectors { - sequence.push(state_selector); + builder.push_simple_selector(Component::PseudoElement(p)); + for state_selector in state_selectors.into_iter() { + builder.push_simple_selector(state_selector); } pseudo = true; @@ -1558,6 +1569,7 @@ fn parse_simple_pseudo_class<'i, P, E, Impl>(parser: &P, name: CompactCowStr<'i> // NB: pub module in order to access the DummyParser #[cfg(test)] pub mod tests { + use builder::HAS_PSEUDO_BIT; use cssparser::{Parser as CssParser, ToCss, serialize_identifier, ParserInput}; use parser; use std::borrow::Cow; |