aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--components/selectors/builder.rs212
-rw-r--r--components/selectors/matching.rs35
-rw-r--r--components/selectors/parser.rs112
3 files changed, 281 insertions, 78 deletions
diff --git a/components/selectors/builder.rs b/components/selectors/builder.rs
index 9fd751b9bb8..6e626239aea 100644
--- a/components/selectors/builder.rs
+++ b/components/selectors/builder.rs
@@ -2,15 +2,208 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-use parser::{Component, SelectorImpl, SelectorIter};
+//! Helper module to build up a selector safely and efficiently.
+//!
+//! Our selector representation is designed to optimize matching, and has
+//! several requirements:
+//! * All simple selectors and combinators are stored inline in the same buffer
+//! as Component instances.
+//! * We store the top-level compound selectors from right to left, i.e. in
+//! matching order.
+//! * We store the simple selectors for each combinator from left to right, so
+//! that we match the cheaper simple selectors first.
+//!
+//! Meeting all these constraints without extra memmove traffic during parsing
+//! is non-trivial. This module encapsulates those details and presents an
+//! easy-to-use API for the parser.
+
+use parser::{Combinator, Component, SelectorImpl};
+use servo_arc::{Arc, HeaderWithLength, ThinArc};
+use sink::Push;
+use smallvec::SmallVec;
use std::cmp;
use std::ops::Add;
+use std::ptr;
+use std::slice;
-#[derive(Copy, Clone, Debug, Eq, PartialEq)]
-pub struct SpecificityAndFlags(pub u32);
+/// Top-level SelectorBuilder struct. This should be stack-allocated by the
+/// consumer and never moved (because it contains a lot of inline data that
+/// would be slow to memmov).
+///
+/// After instantation, callers may call the push_simple_selector() and
+/// push_combinator() methods to append selector data as it is encountered
+/// (from left to right). Once the process is complete, callers should invoke
+/// build(), which transforms the contents of the SelectorBuilder into a heap-
+/// allocated Selector and leaves the builder in a drained state.
+pub struct SelectorBuilder<Impl: SelectorImpl> {
+ /// The entire sequence of simple selectors, from left to right, without combinators.
+ ///
+ /// We make this large because the result of parsing a selector is fed into a new
+ /// Arc-ed allocation, so any spilled vec would be a wasted allocation. Also,
+ /// Components are large enough that we don't have much cache locality benefit
+ /// from reserving stack space for fewer of them.
+ simple_selectors: SmallVec<[Component<Impl>; 32]>,
+ /// The combinators, and the length of the compound selector to their left.
+ combinators: SmallVec<[(Combinator, usize); 16]>,
+ /// The length of the current compount selector.
+ current_len: usize,
+}
+
+impl<Impl: SelectorImpl> Default for SelectorBuilder<Impl> {
+ #[inline(always)]
+ fn default() -> Self {
+ SelectorBuilder {
+ simple_selectors: SmallVec::new(),
+ combinators: SmallVec::new(),
+ current_len: 0,
+ }
+ }
+}
+
+impl<Impl: SelectorImpl> Push<Component<Impl>> for SelectorBuilder<Impl> {
+ fn push(&mut self, value: Component<Impl>) {
+ self.push_simple_selector(value);
+ }
+}
+
+impl<Impl: SelectorImpl> SelectorBuilder<Impl> {
+ /// Pushes a simple selector onto the current compound selector.
+ #[inline(always)]
+ pub fn push_simple_selector(&mut self, ss: Component<Impl>) {
+ debug_assert!(!ss.is_combinator());
+ self.simple_selectors.push(ss);
+ self.current_len += 1;
+ }
+
+ /// Completes the current compound selector and starts a new one, delimited
+ /// by the given combinator.
+ #[inline(always)]
+ pub fn push_combinator(&mut self, c: Combinator) {
+ self.combinators.push((c, self.current_len));
+ self.current_len = 0;
+ }
+
+ /// Returns true if no simple selectors have ever been pushed to this builder.
+ #[inline(always)]
+ pub fn is_empty(&self) -> bool {
+ self.simple_selectors.is_empty()
+ }
+
+ /// Consumes the builder, producing a Selector.
+ #[inline(always)]
+ pub fn build(&mut self, parsed_pseudo: bool) -> ThinArc<SpecificityAndFlags, Component<Impl>> {
+ // Compute the specificity and flags.
+ let mut spec = SpecificityAndFlags(specificity(self.simple_selectors.iter()));
+ if parsed_pseudo {
+ spec.0 |= HAS_PSEUDO_BIT;
+ }
+
+ self.build_with_specificity_and_flags(spec)
+ }
+
+
+ /// Builds with an explicit SpecificityAndFlags. This is separated from build() so
+ /// that unit tests can pass an explicit specificity.
+ #[inline(always)]
+ pub fn build_with_specificity_and_flags(&mut self, spec: SpecificityAndFlags)
+ -> ThinArc<SpecificityAndFlags, Component<Impl>> {
+ // First, compute the total number of Components we'll need to allocate
+ // space for.
+ let full_len = self.simple_selectors.len() + self.combinators.len();
+
+ // Create the header.
+ let header = HeaderWithLength::new(spec, full_len);
+
+ // Create the Arc using an iterator that drains our buffers.
+ let iter = SelectorBuilderIter::new(self, full_len);
+ Arc::into_thin(Arc::from_header_and_iter(header, iter))
+ }
+}
+
+struct SelectorBuilderIter<'a, Impl: SelectorImpl> {
+ builder: &'a mut SelectorBuilder<Impl>,
+ end: *const Component<Impl>,
+ base: *const Component<Impl>,
+ ptr: *const Component<Impl>,
+ full_len: usize,
+}
+
+impl<'a, Impl: SelectorImpl> SelectorBuilderIter<'a, Impl> {
+ fn new(builder: &'a mut SelectorBuilder<Impl>, full_len: usize) -> Self {
+ // Store a pointer to the end of the array of simple selectors,
+ // and set ourselves up to iterate the rightmost compound selector.
+ let sequence_base = &*builder.simple_selectors as *const [Component<Impl>] as *const Component<Impl>;
+ let end = unsafe { sequence_base.offset(builder.simple_selectors.len() as isize) };
+ let base = unsafe { end.offset(-(builder.current_len as isize)) };
+ let ptr = base;
+
+ // Next, tell the SmallVec to forget about its entries so that they
+ // won't be dropped when it frees its buffer. We're transferring
+ // ownership into the selector.
+ unsafe { builder.simple_selectors.set_len(0); }
+
+ SelectorBuilderIter {
+ builder: builder,
+ end: end,
+ base: base,
+ ptr: ptr,
+ full_len: full_len,
+ }
+ }
+}
+
+impl<'a, Impl: SelectorImpl> ExactSizeIterator for SelectorBuilderIter<'a, Impl> {
+ fn len(&self) -> usize {
+ self.full_len
+ }
+}
+
+impl<'a, Impl: SelectorImpl> Iterator for SelectorBuilderIter<'a, Impl> {
+ type Item = Component<Impl>;
+ #[inline(always)]
+ fn next(&mut self) -> Option<Self::Item> {
+ // If ptr is below end, continue walking this compound selector.
+ if self.ptr != self.end {
+ debug_assert!(self.ptr < self.end);
+ let result = unsafe { Some(ptr::read(self.ptr)) };
+ self.ptr = unsafe { self.ptr.offset(1) };
+ return result;
+ }
+
+ if let Some((combinator, len)) = self.builder.combinators.pop() {
+ // There's another compound selector. Reset the pointers to iterate it,
+ // and then return the combinator.
+ self.end = self.base;
+ self.base = unsafe { self.end.offset(-(len as isize)) };
+ self.ptr = self.base;
+ Some(Component::Combinator(combinator))
+ } else {
+ // All done.
+ None
+ }
+ }
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ (self.len(), Some(self.len()))
+ }
+}
+
+#[cfg(debug_assertions)]
+impl<'a, Impl: SelectorImpl> Drop for SelectorBuilderIter<'a, Impl> {
+ fn drop(&mut self) {
+ // Failing to iterate the entire builder would cause us to leak (but not
+ // crash). Verify that this doesn't happen.
+ debug_assert!(self.builder.simple_selectors.len() == 0);
+ debug_assert!(self.builder.combinators.len() == 0);
+ debug_assert!(self.ptr == self.end);
+ }
+}
pub const HAS_PSEUDO_BIT: u32 = 1 << 30;
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+pub struct SpecificityAndFlags(pub u32);
+
impl SpecificityAndFlags {
pub fn specificity(&self) -> u32 {
self.0 & !HAS_PSEUDO_BIT
@@ -73,13 +266,13 @@ impl From<Specificity> for u32 {
}
}
-pub fn specificity<Impl>(iter: SelectorIter<Impl>) -> u32
+fn specificity<Impl>(iter: slice::Iter<Component<Impl>>) -> u32
where Impl: SelectorImpl
{
complex_selector_specificity(iter).into()
}
-fn complex_selector_specificity<Impl>(mut iter: SelectorIter<Impl>)
+fn complex_selector_specificity<Impl>(mut iter: slice::Iter<Component<Impl>>)
-> Specificity
where Impl: SelectorImpl
{
@@ -129,13 +322,8 @@ fn complex_selector_specificity<Impl>(mut iter: SelectorIter<Impl>)
}
let mut specificity = Default::default();
- loop {
- for simple_selector in &mut iter {
- simple_selector_specificity(&simple_selector, &mut specificity);
- }
- if iter.next_sequence().is_none() {
- break;
- }
+ for simple_selector in &mut iter {
+ simple_selector_specificity(&simple_selector, &mut specificity);
}
specificity
}
diff --git a/components/selectors/matching.rs b/components/selectors/matching.rs
index 135f7b2d24a..73dd6c9a945 100644
--- a/components/selectors/matching.rs
+++ b/components/selectors/matching.rs
@@ -460,24 +460,27 @@ pub fn matches_complex_selector<E, F>(mut iter: SelectorIter<E::Impl>,
}
}
+ // If this is the special pseudo-element mode, consume the ::pseudo-element
+ // before proceeding, since the caller has already handled that part.
if context.shared.matching_mode == MatchingMode::ForStatelessPseudoElement {
- match *iter.next().unwrap() {
- // Stateful pseudo, just don't match.
- Component::NonTSPseudoClass(..) => return false,
- Component::PseudoElement(..) => {
- // Pseudo, just eat the whole sequence.
- let next = iter.next();
- debug_assert!(next.is_none(),
- "Someone messed up pseudo-element parsing?");
-
- if iter.next_sequence().is_none() {
- return true;
- }
- // Inform the context that the we've advanced to the next compound selector.
- context.note_next_sequence(&mut iter);
- }
- _ => panic!("Used MatchingMode::ForStatelessPseudoElement in a non-pseudo selector"),
+ // Consume the pseudo.
+ let pseudo = iter.next().unwrap();
+ debug_assert!(matches!(*pseudo, Component::PseudoElement(..)),
+ "Used MatchingMode::ForStatelessPseudoElement in a non-pseudo selector");
+
+ // The only other parser-allowed Component in this sequence is a state
+ // class. We just don't match in that case.
+ if let Some(s) = iter.next() {
+ debug_assert!(matches!(*s, Component::NonTSPseudoClass(..)),
+ "Someone messed up pseudo-element parsing");
+ return false;
+ }
+
+ // Advance to the non-pseudo-element part of the selector, and inform the context.
+ if iter.next_sequence().is_none() {
+ return true;
}
+ context.note_next_sequence(&mut iter);
}
match matches_complex_selector_internal(iter,
diff --git a/components/selectors/parser.rs b/components/selectors/parser.rs
index f61a42c51d8..c73ac10f442 100644
--- a/components/selectors/parser.rs
+++ b/components/selectors/parser.rs
@@ -5,12 +5,11 @@
use attr::{AttrSelectorWithNamespace, ParsedAttrSelectorOperation, AttrSelectorOperator};
use attr::{ParsedCaseSensitivity, SELECTOR_WHITESPACE, NamespaceConstraint};
use bloom::BLOOM_HASH_MASK;
-use builder::{HAS_PSEUDO_BIT, SpecificityAndFlags};
-use builder::specificity;
+use builder::{SelectorBuilder, SpecificityAndFlags};
use cssparser::{ParseError, BasicParseError, CompactCowStr};
use cssparser::{Token, Parser as CssParser, parse_nth, ToCss, serialize_identifier, CssStringWriter};
use precomputed_hash::PrecomputedHash;
-use servo_arc::{Arc, HeaderWithLength, ThinArc};
+use servo_arc::ThinArc;
use sink::Push;
use smallvec::SmallVec;
use std::ascii::AsciiExt;
@@ -374,6 +373,14 @@ pub fn namespace_empty_string<Impl: SelectorImpl>() -> Impl::NamespaceUrl {
/// callers want the higher-level iterator.
///
/// We store compound selectors internally right-to-left (in matching order).
+/// Additionally, we invert the order of top-level compound selectors so that
+/// each one matches left-to-right. This is because matching namespace, local name,
+/// id, and class are all relatively cheap, whereas matching pseudo-classes might
+/// be expensive (depending on the pseudo-class). Since authors tend to put the
+/// pseudo-classes on the right, it's faster to start matching on the left.
+///
+/// This reordering doesn't change the semantics of selector matching, and we
+/// handle it in to_css to make it invisible to serialization.
#[derive(Clone, Eq, PartialEq)]
pub struct Selector<Impl: SelectorImpl>(ThinArc<SpecificityAndFlags, Component<Impl>>);
@@ -454,12 +461,6 @@ impl<Impl: SelectorImpl> Selector<Impl> {
self.0.slice.iter()
}
- /// Returns an iterator over the entire sequence of simple selectors and
- /// combinators, in parse order (from left to right).
- pub fn iter_raw_parse_order(&self) -> Rev<slice::Iter<Component<Impl>>> {
- self.0.slice.iter().rev()
- }
-
/// Returns an iterator over the sequence of simple selectors and
/// combinators, in parse order (from left to right), _starting_
/// 'offset_from_right' entries from the past-the-end sentinel on
@@ -472,11 +473,18 @@ impl<Impl: SelectorImpl> Selector<Impl> {
self.0.slice[..offset_from_right].iter().rev()
}
- /// Creates a Selector from a vec of Components. Used in tests.
- pub fn from_vec(mut vec: Vec<Component<Impl>>, specificity_and_flags: u32) -> Self {
- vec.reverse(); // FIXME(bholley): This goes away in the next patch.
- let header = HeaderWithLength::new(SpecificityAndFlags(specificity_and_flags), vec.len());
- Selector(Arc::into_thin(Arc::from_header_and_iter(header, vec.into_iter())))
+ /// Creates a Selector from a vec of Components, specified in parse order. Used in tests.
+ pub fn from_vec(vec: Vec<Component<Impl>>, specificity_and_flags: u32) -> Self {
+ let mut builder = SelectorBuilder::default();
+ for component in vec.into_iter() {
+ if let Some(combinator) = component.as_combinator() {
+ builder.push_combinator(combinator);
+ } else {
+ builder.push_simple_selector(component);
+ }
+ }
+ let spec = SpecificityAndFlags(specificity_and_flags);
+ Selector(builder.build_with_specificity_and_flags(spec))
}
/// Returns count of simple selectors and combinators in the Selector.
@@ -751,9 +759,31 @@ impl<Impl: SelectorImpl> ToCss for SelectorList<Impl> {
impl<Impl: SelectorImpl> ToCss for Selector<Impl> {
fn to_css<W>(&self, dest: &mut W) -> fmt::Result where W: fmt::Write {
- for item in self.iter_raw_parse_order() {
- item.to_css(dest)?;
- }
+ // Compound selectors invert the order of their contents, so we need to
+ // undo that during serialization.
+ //
+ // This two-iterator strategy involves walking over the selector twice.
+ // We could do something more clever, but selector serialization probably
+ // isn't hot enough to justify it, and the stringification likely
+ // dominates anyway.
+ //
+ // NB: A parse-order iterator is a Rev<>, which doesn't expose as_slice(),
+ // which we need for |split|. So we split by combinators on a match-order
+ // sequence and then reverse.
+ let mut combinators = self.iter_raw_match_order().rev().filter(|x| x.is_combinator());
+ let compound_selectors = self.iter_raw_match_order().as_slice().split(|x| x.is_combinator()).rev();
+
+ let mut combinators_exhausted = false;
+ for compound in compound_selectors {
+ debug_assert!(!combinators_exhausted);
+ for item in compound.iter() {
+ item.to_css(dest)?;
+ }
+ match combinators.next() {
+ Some(c) => c.to_css(dest)?,
+ None => combinators_exhausted = true,
+ };
+ }
Ok(())
}
@@ -905,12 +935,6 @@ fn display_to_css_identifier<T: Display, W: fmt::Write>(x: &T, dest: &mut W) ->
serialize_identifier(&string, dest)
}
-/// We make this large because the result of parsing a selector is fed into a new
-/// Arc-ed allocation, so any spilled vec would be a wasted allocation. Also,
-/// Components are large enough that we don't have much cache locality benefit
-/// from reserving stack space for fewer of them.
-type ParseVec<Impl> = SmallVec<[Component<Impl>; 32]>;
-
/// Build up a Selector.
/// selector : simple_selector_sequence [ combinator simple_selector_sequence ]* ;
///
@@ -921,12 +945,12 @@ fn parse_selector<'i, 't, P, E, Impl>(
-> Result<Selector<Impl>, ParseError<'i, SelectorParseError<'i, E>>>
where P: Parser<'i, Impl=Impl, Error=E>, Impl: SelectorImpl
{
- let mut sequence = ParseVec::new();
+ let mut builder = SelectorBuilder::default();
+
let mut parsed_pseudo_element;
'outer_loop: loop {
// Parse a sequence of simple selectors.
- parsed_pseudo_element =
- parse_compound_selector(parser, input, &mut sequence)?;
+ parsed_pseudo_element = parse_compound_selector(parser, input, &mut builder)?;
if parsed_pseudo_element {
break;
}
@@ -962,23 +986,10 @@ fn parse_selector<'i, 't, P, E, Impl>(
}
}
}
- sequence.push(Component::Combinator(combinator));
+ builder.push_combinator(combinator);
}
- let mut spec = SpecificityAndFlags(specificity(SelectorIter {
- iter: sequence.iter(),
- next_combinator: None,
- }));
- if parsed_pseudo_element {
- spec.0 |= HAS_PSEUDO_BIT;
- }
-
- // FIXME(bholley): This is just temporary to maintain the semantics of this patch.
- sequence.reverse();
-
- let header = HeaderWithLength::new(spec, sequence.len());
- let complex = Selector(Arc::into_thin(Arc::from_header_and_iter(header, sequence.into_iter())));
- Ok(complex)
+ Ok(Selector(builder.build(parsed_pseudo_element)))
}
impl<Impl: SelectorImpl> Selector<Impl> {
@@ -1342,7 +1353,7 @@ fn parse_negation<'i, 't, P, E, Impl>(parser: &P,
fn parse_compound_selector<'i, 't, P, E, Impl>(
parser: &P,
input: &mut CssParser<'i, 't>,
- mut sequence: &mut ParseVec<Impl>)
+ mut builder: &mut SelectorBuilder<Impl>)
-> Result<bool, ParseError<'i, SelectorParseError<'i, E>>>
where P: Parser<'i, Impl=Impl, Error=E>, Impl: SelectorImpl
{
@@ -1355,12 +1366,12 @@ fn parse_compound_selector<'i, 't, P, E, Impl>(
}
}
let mut empty = true;
- if !parse_type_selector(parser, input, sequence)? {
+ if !parse_type_selector(parser, input, builder)? {
if let Some(url) = parser.default_namespace() {
// If there was no explicit type selector, but there is a
// default namespace, there is an implicit "<defaultns>|*" type
// selector.
- sequence.push(Component::DefaultNamespace(url))
+ builder.push_simple_selector(Component::DefaultNamespace(url))
}
} else {
empty = false;
@@ -1371,7 +1382,7 @@ fn parse_compound_selector<'i, 't, P, E, Impl>(
match parse_one_simple_selector(parser, input, /* inside_negation = */ false)? {
None => break,
Some(SimpleSelectorParseResult::SimpleSelector(s)) => {
- sequence.push(s);
+ builder.push_simple_selector(s);
empty = false
}
Some(SimpleSelectorParseResult::PseudoElement(p)) => {
@@ -1401,13 +1412,13 @@ fn parse_compound_selector<'i, 't, P, E, Impl>(
state_selectors.push(Component::NonTSPseudoClass(pseudo_class));
}
- if !sequence.is_empty() {
- sequence.push(Component::Combinator(Combinator::PseudoElement));
+ if !builder.is_empty() {
+ builder.push_combinator(Combinator::PseudoElement);
}
- sequence.push(Component::PseudoElement(p));
- for state_selector in state_selectors {
- sequence.push(state_selector);
+ builder.push_simple_selector(Component::PseudoElement(p));
+ for state_selector in state_selectors.into_iter() {
+ builder.push_simple_selector(state_selector);
}
pseudo = true;
@@ -1558,6 +1569,7 @@ fn parse_simple_pseudo_class<'i, P, E, Impl>(parser: &P, name: CompactCowStr<'i>
// NB: pub module in order to access the DummyParser
#[cfg(test)]
pub mod tests {
+ use builder::HAS_PSEUDO_BIT;
use cssparser::{Parser as CssParser, ToCss, serialize_identifier, ParserInput};
use parser;
use std::borrow::Cow;