Use Gecko's simpler Bloom filter instead of one based on hash

stretching. This preserves the usage of the Bloom filter throughout style recalc, but the implementation is rewritten. Provides a 15% improvement on Guardians of the Galaxy.
author: Patrick Walton <pcwalton@mimiga.net> 2014-09-16 22:58:52 -0700
committer: Patrick Walton <pcwalton@mimiga.net> 2014-10-10 17:02:27 -0700
commit: 2a790d06dd74b1de0c47d433c7fa3a9d8af03efc (patch)
tree: 83346e183c3bf7ef3d8d4edf554667bc263e73c4
parent: 878ece58da7f60b45e9230356ac7a5bbf7351e5b (diff)
download: servo-2a790d06dd74b1de0c47d433c7fa3a9d8af03efc.tar.gz
servo-2a790d06dd74b1de0c47d433c7fa3a9d8af03efc.zip
10 files changed, 311 insertions, 333 deletions
diff --git a/components/layout/css/matching.rs b/components/layout/css/matching.rs
index d618851a175..2c5207e7fd9 100644
--- a/components/layout/css/matching.rs
+++ b/components/layout/css/matching.rs
@@ -19,7 +19,6 @@ use servo_util::str::DOMString;
 use std::mem;
 use std::hash::{Hash, sip};
 use std::slice::Items;
-use style;
 use style::{After, Before, ComputedValues, DeclarationBlock, Stylist, TElement, TNode};
 use style::cascade;
 use sync::Arc;
@@ -299,13 +298,13 @@ pub trait MatchMethods {
     fn recalc_style_for_subtree(&self,
                                 stylist: &Stylist,
                                 layout_context: &LayoutContext,
-                                parent_bf: &mut Option<BloomFilter>,
+                                parent_bf: &mut Option<Box<BloomFilter>>,
                                 applicable_declarations: &mut ApplicableDeclarations,
                                 parent: Option<LayoutNode>);
 
     fn match_node(&self,
                   stylist: &Stylist,
-                  parent_bf: &Option<BloomFilter>,
+                  parent_bf: &Option<Box<BloomFilter>>,
                   applicable_declarations: &mut ApplicableDeclarations,
                   shareable: &mut bool);
 
@@ -421,7 +420,7 @@ impl<'ln> PrivateMatchMethods for LayoutNode<'ln> {
 impl<'ln> MatchMethods for LayoutNode<'ln> {
     fn match_node(&self,
                   stylist: &Stylist,
-                  parent_bf: &Option<BloomFilter>,
+                  parent_bf: &Option<Box<BloomFilter>>,
                   applicable_declarations: &mut ApplicableDeclarations,
                   shareable: &mut bool) {
         let style_attribute = self.as_element().style_attribute().as_ref();
@@ -506,13 +505,7 @@ impl<'ln> MatchMethods for LayoutNode<'ln> {
         element.get_id().map(|id| bf.insert(&id));
 
         // TODO: case-sensitivity depends on the document type and quirks mode
-        element
-            .get_attr(&ns!(""), "class")
-            .map(|attr| {
-                for c in attr.split(style::SELECTOR_WHITESPACE) {
-                    bf.insert(&c);
-                }
-            });
+        element.each_class(|class| bf.insert(class));
     }
 
     fn remove_from_bloom_filter(&self, bf: &mut BloomFilter) {
@@ -525,19 +518,13 @@ impl<'ln> MatchMethods for LayoutNode<'ln> {
         element.get_id().map(|id| bf.remove(&id));
 
         // TODO: case-sensitivity depends on the document type and quirks mode
-        element
-            .get_attr(&ns!(""), "class")
-            .map(|attr| {
-                for c in attr.split(style::SELECTOR_WHITESPACE) {
-                    bf.remove(&c);
-                }
-            });
+        element.each_class(|class| bf.remove(class));
     }
 
     fn recalc_style_for_subtree(&self,
                                 stylist: &Stylist,
                                 layout_context: &LayoutContext,
-                                parent_bf: &mut Option<BloomFilter>,
+                                parent_bf: &mut Option<Box<BloomFilter>>,
                                 applicable_declarations: &mut ApplicableDeclarations,
                                 parent: Option<LayoutNode>) {
         self.initialize_layout_data(layout_context.shared.layout_chan.clone());
@@ -573,7 +560,7 @@ impl<'ln> MatchMethods for LayoutNode<'ln> {
 
         match *parent_bf {
             None => {},
-            Some(ref mut pbf) => self.insert_into_bloom_filter(pbf),
+            Some(ref mut pbf) => self.insert_into_bloom_filter(&mut **pbf),
         }
 
         for kid in self.children() {
@@ -586,7 +573,7 @@ impl<'ln> MatchMethods for LayoutNode<'ln> {
 
         match *parent_bf {
             None => {},
-            Some(ref mut pbf) => self.remove_from_bloom_filter(pbf),
+            Some(ref mut pbf) => self.remove_from_bloom_filter(&mut **pbf),
         }
 
         // Construct flows.
diff --git a/components/layout/layout_task.rs b/components/layout/layout_task.rs
index d252b856f06..22068ae460b 100644
--- a/components/layout/layout_task.rs
+++ b/components/layout/layout_task.rs
@@ -63,9 +63,7 @@ use std::cell::Cell;
 use std::comm::{channel, Sender, Receiver, Select};
 use std::mem;
 use std::ptr;
-use style;
-use style::{TNode, AuthorOrigin, Stylesheet, Stylist};
-use style::iter_font_face_rules;
+use style::{AuthorOrigin, Stylesheet, Stylist, TNode, iter_font_face_rules};
 use sync::{Arc, Mutex, MutexGuard};
 use url::Url;
 
@@ -647,8 +645,7 @@ impl LayoutTask {
                 None => {
                     let layout_ctx = LayoutContext::new(&shared_layout_ctx);
                     let mut applicable_declarations = ApplicableDeclarations::new();
-                    let mut parent_bf = Some(BloomFilter::new(
-                        style::RECOMMENDED_SELECTOR_BLOOM_FILTER_SIZE));
+                    let mut parent_bf = Some(box BloomFilter::new());
                     node.recalc_style_for_subtree(&*rw_data.stylist,
                                                    &layout_ctx,
                                                    &mut parent_bf,
diff --git a/components/layout/parallel.rs b/components/layout/parallel.rs
index 3a85ee9a629..90e19f6db76 100644
--- a/components/layout/parallel.rs
+++ b/components/layout/parallel.rs
@@ -16,7 +16,7 @@ use url::Url;
 use util::{LayoutDataAccess, LayoutDataWrapper};
 use wrapper::{layout_node_to_unsafe_layout_node, layout_node_from_unsafe_layout_node, LayoutNode};
 use wrapper::{PostorderNodeMutTraversal, UnsafeLayoutNode};
-use wrapper::{PreorderDOMTraversal, PostorderDOMTraversal};
+use wrapper::{PreorderDomTraversal, PostorderDomTraversal};
 
 use servo_util::time::{TimeProfilerChan, profile};
 use servo_util::time;
@@ -78,7 +78,7 @@ impl DomParallelInfo {
 }
 
 /// A parallel top-down DOM traversal.
-pub trait ParallelPreorderDOMTraversal : PreorderDOMTraversal {
+pub trait ParallelPreorderDomTraversal : PreorderDomTraversal {
     fn run_parallel(&mut self,
                     node: UnsafeLayoutNode,
                     proxy: &mut WorkerProxy<*const SharedLayoutContext,UnsafeLayoutNode>);
@@ -127,7 +127,7 @@ pub trait ParallelPreorderDOMTraversal : PreorderDOMTraversal {
 }
 
 /// A parallel bottom-up DOM traversal.
-trait ParallelPostorderDOMTraversal : PostorderDOMTraversal {
+trait ParallelPostorderDomTraversal : PostorderDomTraversal {
     /// Process current node and potentially traverse its ancestors.
     ///
     /// If we are the last child that finished processing, recursively process
@@ -319,9 +319,9 @@ impl<'a> ParallelPreorderFlowTraversal for AssignISizes<'a> {
 
 impl<'a> ParallelPostorderFlowTraversal for AssignBSizesAndStoreOverflow<'a> {}
 
-impl<'a> ParallelPostorderDOMTraversal for ConstructFlows<'a> {}
+impl<'a> ParallelPostorderDomTraversal for ConstructFlows<'a> {}
 
-impl <'a> ParallelPreorderDOMTraversal for RecalcStyleForNode<'a> {
+impl <'a> ParallelPreorderDomTraversal for RecalcStyleForNode<'a> {
     fn run_parallel(&mut self,
                     unsafe_node: UnsafeLayoutNode,
                     proxy: &mut WorkerProxy<*const SharedLayoutContext, UnsafeLayoutNode>) {
diff --git a/components/layout/traversal.rs b/components/layout/traversal.rs
index a4417b55d45..d217e785321 100644
--- a/components/layout/traversal.rs
+++ b/components/layout/traversal.rs
@@ -13,11 +13,10 @@ use flow;
 use incremental::RestyleDamage;
 use wrapper::{layout_node_to_unsafe_layout_node, LayoutNode};
 use wrapper::{PostorderNodeMutTraversal, ThreadSafeLayoutNode, UnsafeLayoutNode};
-use wrapper::{PreorderDOMTraversal, PostorderDOMTraversal};
+use wrapper::{PreorderDomTraversal, PostorderDomTraversal};
 
 use servo_util::bloom::BloomFilter;
 use servo_util::tid::tid;
-use style;
 use style::TNode;
 
 /// Every time we do another layout, the old bloom filters are invalid. This is
@@ -44,48 +43,47 @@ type Generation = uint;
 /// Since a work-stealing queue is used for styling, sometimes, the bloom filter
 /// will no longer be the for the parent of the node we're currently on. When
 /// this happens, the task local bloom filter will be thrown away and rebuilt.
-local_data_key!(style_bloom: (BloomFilter, UnsafeLayoutNode, Generation))
+local_data_key!(style_bloom: (Box<BloomFilter>, UnsafeLayoutNode, Generation))
 
 /// Returns the task local bloom filter.
 ///
 /// If one does not exist, a new one will be made for you. If it is out of date,
 /// it will be thrown out and a new one will be made for you.
-fn take_task_local_bloom_filter(
-  parent_node: Option<LayoutNode>,
-  layout_context: &LayoutContext)
-      -> BloomFilter {
-
-    let new_bloom =
-        |p: Option<LayoutNode>| -> BloomFilter {
-            let mut bf = BloomFilter::new(style::RECOMMENDED_SELECTOR_BLOOM_FILTER_SIZE);
-            p.map(|p| insert_ancestors_into_bloom_filter(&mut bf, p, layout_context));
-            if p.is_none() {
-                debug!("[{}] No parent, but new bloom filter!", tid());
-            }
-            bf
-        };
-
+fn take_task_local_bloom_filter(parent_node: Option<LayoutNode>, layout_context: &LayoutContext)
+                                -> Box<BloomFilter> {
     match (parent_node, style_bloom.replace(None)) {
         // Root node. Needs new bloom filter.
-        (None,     _  ) => new_bloom(None),
+        (None,     _  ) => {
+            debug!("[{}] No parent, but new bloom filter!", tid());
+            box BloomFilter::new()
+        }
         // No bloom filter for this thread yet.
-        (Some(p), None) => new_bloom(Some(p)),
+        (Some(parent), None) => {
+            let mut bloom_filter = box BloomFilter::new();
+            insert_ancestors_into_bloom_filter(&mut bloom_filter, parent, layout_context);
+            bloom_filter
+        }
         // Found cached bloom filter.
-        (Some(p), Some((bf, old_node, old_generation))) => {
+        (Some(parent), Some((mut bloom_filter, old_node, old_generation))) => {
             // Hey, the cached parent is our parent! We can reuse the bloom filter.
-            if old_node == layout_node_to_unsafe_layout_node(&p) &&
+            if old_node == layout_node_to_unsafe_layout_node(&parent) &&
                 old_generation == layout_context.shared.generation {
                 debug!("[{}] Parent matches (={}). Reusing bloom filter.", tid(), old_node.val0());
-                bf
-            // Oh no. the cached parent is stale. I guess we need a new one...
+                bloom_filter
             } else {
-                new_bloom(Some(p))
+                // Oh no. the cached parent is stale. I guess we need a new one. Reuse the existing
+                // allocation to avoid malloc churn.
+                *bloom_filter = BloomFilter::new();
+                insert_ancestors_into_bloom_filter(&mut bloom_filter, parent, layout_context);
+                bloom_filter
             }
         },
     }
 }
 
-fn put_task_local_bloom_filter(bf: BloomFilter, unsafe_node: &UnsafeLayoutNode, layout_context: &LayoutContext) {
+fn put_task_local_bloom_filter(bf: Box<BloomFilter>,
+                               unsafe_node: &UnsafeLayoutNode,
+                               layout_context: &LayoutContext) {
     match style_bloom.replace(Some((bf, *unsafe_node, layout_context.shared.generation))) {
         None => {},
         Some(_) => fail!("Putting into a never-taken task-local bloom filter"),
@@ -93,14 +91,15 @@ fn put_task_local_bloom_filter(bf: BloomFilter, unsafe_node: &UnsafeLayoutNode,
 }
 
 /// "Ancestors" in this context is inclusive of ourselves.
-fn insert_ancestors_into_bloom_filter(
-  bf: &mut BloomFilter, mut n: LayoutNode, layout_context: &LayoutContext) {
+fn insert_ancestors_into_bloom_filter(bf: &mut Box<BloomFilter>,
+                                      mut n: LayoutNode,
+                                      layout_context: &LayoutContext) {
     debug!("[{}] Inserting ancestors.", tid());
     let mut ancestors = 0u;
     loop {
         ancestors += 1;
 
-        n.insert_into_bloom_filter(bf);
+        n.insert_into_bloom_filter(&mut **bf);
         n = match n.layout_parent_node(layout_context.shared) {
             None => break,
             Some(p) => p,
@@ -115,7 +114,7 @@ pub struct RecalcStyleForNode<'a> {
     pub layout_context: &'a LayoutContext<'a>,
 }
 
-impl<'a> PreorderDOMTraversal for RecalcStyleForNode<'a> {
+impl<'a> PreorderDomTraversal for RecalcStyleForNode<'a> {
     #[inline]
     fn process(&self, node: LayoutNode) {
         // Initialize layout data.
@@ -135,7 +134,8 @@ impl<'a> PreorderDOMTraversal for RecalcStyleForNode<'a> {
 
         if node.is_dirty() {
             // First, check to see whether we can share a style with someone.
-            let style_sharing_candidate_cache = self.layout_context.style_sharing_candidate_cache();
+            let style_sharing_candidate_cache =
+                self.layout_context.style_sharing_candidate_cache();
             let sharing_result = unsafe {
                 node.share_style_if_possible(style_sharing_candidate_cache,
                                              parent_opt.clone())
@@ -148,8 +148,11 @@ impl<'a> PreorderDOMTraversal for RecalcStyleForNode<'a> {
                     if node.is_element() {
                         // Perform the CSS selector matching.
                         let stylist = unsafe { &*self.layout_context.shared.stylist };
-                        node.match_node(stylist, &some_bf, &mut applicable_declarations, &mut shareable);
-                    }
+                        node.match_node(stylist,
+                                        &some_bf,
+                                        &mut applicable_declarations,
+                                        &mut shareable);
+                   }
 
                     // Perform the CSS cascade.
                     unsafe {
@@ -174,7 +177,7 @@ impl<'a> PreorderDOMTraversal for RecalcStyleForNode<'a> {
         // Before running the children, we need to insert our nodes into the bloom
         // filter.
         debug!("[{}] + {:X}", tid(), unsafe_layout_node.val0());
-        node.insert_into_bloom_filter(&mut bf);
+        node.insert_into_bloom_filter(&mut *bf);
 
         // NB: flow construction updates the bloom filter on the way up.
         put_task_local_bloom_filter(bf, &unsafe_layout_node, self.layout_context);
@@ -186,7 +189,7 @@ pub struct ConstructFlows<'a> {
     pub layout_context: &'a LayoutContext<'a>,
 }
 
-impl<'a> PostorderDOMTraversal for ConstructFlows<'a> {
+impl<'a> PostorderDomTraversal for ConstructFlows<'a> {
     #[inline]
     fn process(&self, node: LayoutNode) {
         // Construct flows for this node.
@@ -222,7 +225,7 @@ impl<'a> PostorderDOMTraversal for ConstructFlows<'a> {
             }
             Some(parent) => {
                 // Otherwise, put it back, but remove this node.
-                node.remove_from_bloom_filter(&mut bf);
+                node.remove_from_bloom_filter(&mut *bf);
                 let unsafe_parent = layout_node_to_unsafe_layout_node(&parent);
                 put_task_local_bloom_filter(bf, &unsafe_parent, self.layout_context);
             },
@@ -248,8 +251,8 @@ impl PreorderFlow for FlowTreeVerification {
     }
 }
 
-/// The bubble-inline-sizes traversal, the first part of layout computation. This computes preferred
-/// and intrinsic inline-sizes and bubbles them up the tree.
+/// The bubble-inline-sizes traversal, the first part of layout computation. This computes
+/// preferred and intrinsic inline-sizes and bubbles them up the tree.
 pub struct BubbleISizes<'a> {
     pub layout_context: &'a LayoutContext<'a>,
 }
@@ -283,9 +286,10 @@ impl<'a> PreorderFlowTraversal for AssignISizes<'a> {
     }
 }
 
-/// The assign-block-sizes-and-store-overflow traversal, the last (and most expensive) part of layout
-/// computation. Determines the final block-sizes for all layout objects, computes positions, and
-/// computes overflow regions. In Gecko this corresponds to `FinishAndStoreOverflow`.
+/// The assign-block-sizes-and-store-overflow traversal, the last (and most expensive) part of
+/// layout computation. Determines the final block-sizes for all layout objects, computes
+/// positions, and computes overflow regions. In Gecko this corresponds to `Reflow` and
+/// `FinishAndStoreOverflow`.
 pub struct AssignBSizesAndStoreOverflow<'a> {
     pub layout_context: &'a LayoutContext<'a>,
 }
diff --git a/components/layout/wrapper.rs b/components/layout/wrapper.rs
index 0a10bf0315d..7cde321f058 100644
--- a/components/layout/wrapper.rs
+++ b/components/layout/wrapper.rs
@@ -496,6 +496,20 @@ impl<'le> TElement<'le> for LayoutElement<'le> {
             self.element.has_class_for_layout(name)
         }
     }
+
+    #[inline(always)]
+    fn each_class(self, callback: |&Atom|) {
+        unsafe {
+            match self.element.get_classes_for_layout() {
+                None => {}
+                Some(mut classes) => {
+                    for class in classes {
+                        callback(class)
+                    }
+                }
+            }
+        }
+    }
 }
 
 fn get_content(content_list: &content::T) -> String {
@@ -890,13 +904,13 @@ pub unsafe fn layout_node_from_unsafe_layout_node(node: &UnsafeLayoutNode) -> La
 }
 
 /// A top-down traversal.
-pub trait PreorderDOMTraversal {
+pub trait PreorderDomTraversal {
     /// The operation to perform. Return true to continue or false to stop.
-    fn process(&self, _node: LayoutNode);
+    fn process(&self, node: LayoutNode);
 }
 
 /// A bottom-up traversal, with a optional in-order pass.
-pub trait PostorderDOMTraversal {
+pub trait PostorderDomTraversal {
     /// The operation to perform. Return true to continue or false to stop.
-    fn process(&self, _node: LayoutNode);
+    fn process(&self, node: LayoutNode);
 }
diff --git a/components/script/dom/element.rs b/components/script/dom/element.rs
index ce1371f74ef..ef5ec630670 100644
--- a/components/script/dom/element.rs
+++ b/components/script/dom/element.rs
@@ -38,6 +38,7 @@ use std::ascii::StrAsciiExt;
 use std::cell::RefCell;
 use std::default::Default;
 use std::mem;
+use std::slice::Items;
 use string_cache::{Atom, Namespace};
 use url::UrlParser;
 
@@ -172,6 +173,7 @@ pub trait RawLayoutElementHelpers {
     unsafe fn get_attr_vals_for_layout<'a>(&'a self, name: &str) -> Vec<&'a str>;
     unsafe fn get_attr_atom_for_layout(&self, namespace: &Namespace, name: &str) -> Option<Atom>;
     unsafe fn has_class_for_layout(&self, name: &str) -> bool;
+    unsafe fn get_classes_for_layout<'a>(&'a self) -> Option<Items<'a,Atom>>;
 }
 
 impl RawLayoutElementHelpers for Element {
@@ -234,6 +236,19 @@ impl RawLayoutElementHelpers for Element {
             (*attr).value_tokens_forever().map(|mut tokens| { tokens.any(|atom| atom.as_slice() == name) })
         }.take().unwrap())
     }
+
+    #[inline]
+    #[allow(unrooted_must_root)]
+    unsafe fn get_classes_for_layout<'a>(&'a self) -> Option<Items<'a,Atom>> {
+        let attrs: *const Vec<JS<Attr>> = mem::transmute(&self.attrs);
+        (*attrs).iter().find(|attr: & &JS<Attr>| {
+            let attr = attr.unsafe_get();
+            (*attr).local_name_atom_forever().as_slice() == "class"
+        }).and_then(|attr| {
+            let attr = attr.unsafe_get();
+            (*attr).value_tokens_forever()
+        })
+    }
 }
 
 pub trait LayoutElementHelpers {
@@ -1052,4 +1067,19 @@ impl<'a> style::TElement<'a> for JSRef<'a, Element> {
 
         has_class(self, name)
     }
+    fn each_class(self, callback: |&Atom|) {
+        match self.get_attribute(ns!(""), "class").root() {
+            None => {}
+            Some(attr) => {
+                match attr.deref().value().tokens() {
+                    None => {}
+                    Some(mut tokens) => {
+                        for token in tokens {
+                            callback(token)
+                        }
+                    }
+                }
+            }
+        }
+    }
 }
diff --git a/components/style/node.rs b/components/style/node.rs
index 8a8930fdc81..8fc74d5738c 100644
--- a/components/style/node.rs
+++ b/components/style/node.rs
@@ -38,4 +38,5 @@ pub trait TElement<'a> : Copy {
     fn get_disabled_state(self) -> bool;
     fn get_enabled_state(self) -> bool;
     fn has_class(self, name: &str) -> bool;
+    fn each_class(self, callback: |&Atom|);
 }
diff --git a/components/style/selector_matching.rs b/components/style/selector_matching.rs
index fc37688cea0..6c5ed1e10f5 100644
--- a/components/style/selector_matching.rs
+++ b/components/style/selector_matching.rs
@@ -85,7 +85,7 @@ impl SelectorMap {
                               V:VecLike<DeclarationBlock>>(
                               &self,
                               node: &N,
-                              parent_bf: &Option<BloomFilter>,
+                              parent_bf: &Option<Box<BloomFilter>>,
                               matching_rules_list: &mut V,
                               shareable: &mut bool) {
         if self.empty {
@@ -153,7 +153,7 @@ impl SelectorMap {
                                     N:TNode<'a, E>,
                                     V:VecLike<DeclarationBlock>>(
                                     node: &N,
-                                    parent_bf: &Option<BloomFilter>,
+                                    parent_bf: &Option<Box<BloomFilter>>,
                                     hash: &HashMap<Atom, Vec<Rule>>,
                                     key: &Atom,
                                     matching_rules: &mut V,
@@ -172,7 +172,7 @@ impl SelectorMap {
                           N:TNode<'a, E>,
                           V:VecLike<DeclarationBlock>>(
                           node: &N,
-                          parent_bf: &Option<BloomFilter>,
+                          parent_bf: &Option<Box<BloomFilter>>,
                           rules: &[Rule],
                           matching_rules: &mut V,
                           shareable: &mut bool) {
@@ -353,7 +353,7 @@ impl Stylist {
                                         V:VecLike<DeclarationBlock>>(
                                         &self,
                                         element: &N,
-                                        parent_bf: &Option<BloomFilter>,
+                                        parent_bf: &Option<Box<BloomFilter>>,
                                         style_attribute: Option<&PropertyDeclarationBlock>,
                                         pseudo_element: Option<PseudoElement>,
                                         applicable_declarations: &mut V)
@@ -471,7 +471,12 @@ impl DeclarationBlock {
     }
 }
 
-pub fn matches<'a, E:TElement<'a>, N:TNode<'a, E>>(selector_list: &SelectorList, element: &N, parent_bf: &Option<BloomFilter>) -> bool {
+pub fn matches<'a,E,N>(
+               selector_list: &SelectorList,
+               element: &N,
+               parent_bf: &Option<Box<BloomFilter>>)
+               -> bool
+               where E: TElement<'a>, N: TNode<'a,E> {
     get_selector_list_selectors(selector_list).iter().any(|selector|
         selector.pseudo_element.is_none() &&
         matches_compound_selector(&*selector.compound_selectors, element, parent_bf, &mut false))
@@ -488,7 +493,7 @@ fn matches_compound_selector<'a,
                              N:TNode<'a, E>>(
                              selector: &CompoundSelector,
                              element: &N,
-                             parent_bf: &Option<BloomFilter>,
+                             parent_bf: &Option<Box<BloomFilter>>,
                              shareable: &mut bool)
                              -> bool {
     match matches_compound_selector_internal(selector, element, parent_bf, shareable) {
@@ -549,21 +554,22 @@ enum SelectorMatchingResult {
 /// Quickly figures out whether or not the compound selector is worth doing more
 /// work on. If the simple selectors don't match, or there's a child selector
 /// that does not appear in the bloom parent bloom filter, we can exit early.
-fn can_fast_reject<'a, E: TElement<'a>, N: TNode<'a, E>>(
-  mut selector: &CompoundSelector,
-  element: &N,
-  parent_bf: &Option<BloomFilter>,
-  shareable: &mut bool) -> Option<SelectorMatchingResult> {
+fn can_fast_reject<'a,E,N>(
+                   mut selector: &CompoundSelector,
+                   element: &N,
+                   parent_bf: &Option<Box<BloomFilter>>,
+                   shareable: &mut bool)
+                   -> Option<SelectorMatchingResult>
+                   where E: TElement<'a>, N: TNode<'a,E> {
     if !selector.simple_selectors.iter().all(|simple_selector| {
       matches_simple_selector(simple_selector, element, shareable) }) {
         return Some(NotMatchedAndRestartFromClosestLaterSibling);
     }
 
-    let bf: &BloomFilter =
-        match *parent_bf {
-            None => return None,
-            Some(ref bf) => bf,
-        };
+    let bf: &BloomFilter = match *parent_bf {
+        None => return None,
+        Some(ref bf) => &**bf,
+    };
 
     // See if the bloom filter can exclude any of the descendant selectors, and
     // reject if we can.
@@ -580,23 +586,23 @@ fn can_fast_reject<'a, E: TElement<'a>, N: TNode<'a, E>>(
         for ss in selector.simple_selectors.iter() {
             match *ss {
                 LocalNameSelector(LocalName { ref name, ref lower_name })  => {
-                    if bf.definitely_excludes(name)
-                    && bf.definitely_excludes(lower_name) {
+                    if !bf.might_contain(name)
+                    && !bf.might_contain(lower_name) {
                         return Some(NotMatchedGlobally);
                     }
                 },
                 NamespaceSelector(ref namespace) => {
-                    if bf.definitely_excludes(namespace) {
+                    if !bf.might_contain(namespace) {
                         return Some(NotMatchedGlobally);
                     }
                 },
                 IDSelector(ref id) => {
-                    if bf.definitely_excludes(id) {
+                    if !bf.might_contain(id) {
                         return Some(NotMatchedGlobally);
                     }
                 },
                 ClassSelector(ref class) => {
-                    if bf.definitely_excludes(&class.as_slice()) {
+                    if !bf.might_contain(class) {
                         return Some(NotMatchedGlobally);
                     }
                 },
@@ -615,7 +621,7 @@ fn matches_compound_selector_internal<'a,
                                       N:TNode<'a, E>>(
                                       selector: &CompoundSelector,
                                       element: &N,
-                                      parent_bf: &Option<BloomFilter>,
+                                      parent_bf: &Option<Box<BloomFilter>>,
                                       shareable: &mut bool)
                                       -> SelectorMatchingResult {
     match can_fast_reject(selector, element, parent_bf, shareable) {
@@ -994,7 +1000,6 @@ impl<K: Eq + Hash, V> FindPush<K, V> for HashMap<K, Vec<V>> {
     }
 }
 
-
 #[cfg(test)]
 mod tests {
     use sync::Arc;
diff --git a/components/util/bloom.rs b/components/util/bloom.rs
index 4621697fa50..6795cb889e8 100644
--- a/components/util/bloom.rs
+++ b/components/util/bloom.rs
@@ -4,288 +4,230 @@
 
 //! Simple counting bloom filters.
 
-extern crate rand;
+use string_cache::{Atom, Namespace};
 
-use fnv::{FnvState, hash};
-use rand::Rng;
-use std::hash::Hash;
-use std::iter;
-use std::num;
-use std::uint;
+static KEY_SIZE: uint = 12;
+static ARRAY_SIZE: uint = 1 << KEY_SIZE;
+static KEY_MASK: u32 = (1 << KEY_SIZE) - 1;
+static KEY_SHIFT: uint = 16;
 
-// Just a quick and dirty xxhash embedding.
-
-/// A counting bloom filter.
+/// A counting Bloom filter with 8-bit counters.  For now we assume
+/// that having two hash functions is enough, but we may revisit that
+/// decision later.
+///
+/// The filter uses an array with 2**KeySize entries.
+///
+/// Assuming a well-distributed hash function, a Bloom filter with
+/// array size M containing N elements and
+/// using k hash function has expected false positive rate exactly
+///
+/// $  (1 - (1 - 1/M)^{kN})^k  $
+///
+/// because each array slot has a
+///
+/// $  (1 - 1/M)^{kN}  $
+///
+/// chance of being 0, and the expected false positive rate is the
+/// probability that all of the k hash functions will hit a nonzero
+/// slot.
+///
+/// For reasonable assumptions (M large, kN large, which should both
+/// hold if we're worried about false positives) about M and kN this
+/// becomes approximately
 ///
-/// A bloom filter is a probabilistic data structure which allows you to add and
-/// remove elements from a set, query the set for whether it may contain an
-/// element or definitely exclude it, and uses much less ram than an equivalent
-/// hashtable.
-#[deriving(Clone)]
+/// $$  (1 - \exp(-kN/M))^k   $$
+///
+/// For our special case of k == 2, that's $(1 - \exp(-2N/M))^2$,
+/// or in other words
+///
+/// $$    N/M = -0.5 * \ln(1 - \sqrt(r))   $$
+///
+/// where r is the false positive rate.  This can be used to compute
+/// the desired KeySize for a given load N and false positive rate r.
+///
+/// If N/M is assumed small, then the false positive rate can
+/// further be approximated as 4*N^2/M^2.  So increasing KeySize by
+/// 1, which doubles M, reduces the false positive rate by about a
+/// factor of 4, and a false positive rate of 1% corresponds to
+/// about M/N == 20.
+///
+/// What this means in practice is that for a few hundred keys using a
+/// KeySize of 12 gives false positive rates on the order of 0.25-4%.
+///
+/// Similarly, using a KeySize of 10 would lead to a 4% false
+/// positive rate for N == 100 and to quite bad false positive
+/// rates for larger N.
 pub struct BloomFilter {
-    buf: Vec<uint>,
-    number_of_insertions: uint,
-}
-
-// Here's where some of the magic numbers came from:
-//
-// m = number of elements in the filter
-// n = size of the filter
-// k = number of hash functions
-//
-// p = Pr[false positive] = 0.01 false positive rate
-//
-// if we have an estimation of the number of elements in the bloom filter, we
-// know m.
-//
-// p = (1 - exp(-kn/m))^k
-// k = (m/n)ln2
-// lnp = -(m/n)(ln2)^2
-// m = -nlnp/(ln2)^2
-// => n = -m(ln2)^2/lnp
-//     ~= 10*m
-//
-// k = (m/n)ln2 = 10ln2 ~= 7
-
-static NUMBER_OF_HASHES: uint = 7;
-
-static BITS_PER_BUCKET: uint = 4;
-static BUCKETS_PER_WORD: uint = uint::BITS / BITS_PER_BUCKET;
-
-/// Returns a tuple of (array index, lsr shift amount) to get to the bits you
-/// need. Don't forget to mask with 0xF!
-fn bucket_index_to_array_index(bucket_index: uint) -> (uint, uint) {
-    let arr_index = bucket_index / BUCKETS_PER_WORD;
-    let shift_amount = (bucket_index % BUCKETS_PER_WORD) * BITS_PER_BUCKET;
-    (arr_index, shift_amount)
-}
-
-// Key Stretching
-// ==============
-//
-// Siphash is expensive. Instead of running it `NUMBER_OF_HASHES`, which would
-// be a pretty big hit on performance, we just use it to see a non-cryptographic
-// random number generator. This stretches the hash to get us our
-// `NUMBER_OF_HASHES` array indicies.
-//
-// A hash is a `u64` and comes from SipHash.
-// A shash is a `uint` stretched hash which comes from the XorShiftRng.
-
-fn to_rng(hash: u64) -> rand::XorShiftRng {
-    let bottom = (hash & 0xFFFFFFFF) as u32;
-    let top    = ((hash >> 32) & 0xFFFFFFFF) as u32;
-    rand::SeedableRng::from_seed([ 0x97830e05, 0x113ba7bb, bottom, top ])
+    counters: [u8, ..ARRAY_SIZE],
 }
 
-fn stretch<'a>(r: &'a mut rand::XorShiftRng)
-  -> iter::Take<rand::Generator<'a, uint, rand::XorShiftRng>> {
-    r.gen_iter().take(NUMBER_OF_HASHES)
+impl Clone for BloomFilter {
+    #[inline]
+    fn clone(&self) -> BloomFilter {
+        BloomFilter {
+            counters: self.counters,
+        }
+    }
 }
 
 impl BloomFilter {
-    /// This bloom filter is tuned to have ~1% false positive rate. In exchange
-    /// for this guarantee, you need to have a reasonable upper bound on the
-    /// number of elements that will ever be inserted into it. If you guess too
-    /// low, your false positive rate will suffer. If you guess too high, you'll
-    /// use more memory than is really necessary.
-    pub fn new(expected_number_of_insertions: uint) -> BloomFilter {
-        let size_in_buckets = 10 * expected_number_of_insertions;
-
-        let size_in_words = size_in_buckets / BUCKETS_PER_WORD;
-
-        let nonzero_size = if size_in_words == 0 { 1 } else { size_in_words };
-
-        let num_words =
-            num::checked_next_power_of_two(nonzero_size)
-            .unwrap();
-
+    /// Creates a new bloom filter.
+    #[inline]
+    pub fn new() -> BloomFilter {
         BloomFilter {
-            buf: Vec::from_elem(num_words, 0),
-            number_of_insertions: 0,
+            counters: [0, ..ARRAY_SIZE],
         }
     }
 
-    /// Since the array length must be a power of two, this will return a
-    /// bitmask that can be `&`ed with a number to bring it into the range of
-    /// the array.
-    fn mask(&self) -> uint {
-        (self.buf.len()*BUCKETS_PER_WORD) - 1 // guaranteed to be a power of two
+    #[inline]
+    fn first_slot(&self, hash: u32) -> &u8 {
+        &self.counters[hash1(hash) as uint]
     }
 
-    /// Converts a stretched hash into a bucket index.
-    fn shash_to_bucket_index(&self, shash: uint) -> uint {
-        shash & self.mask()
+    #[inline]
+    fn first_mut_slot(&mut self, hash: u32) -> &mut u8 {
+        &mut self.counters[hash1(hash) as uint]
     }
 
-    /// Converts a stretched hash into an array and bit index. See the comment
-    /// on `bucket_index_to_array_index` for details about the return value.
-    fn shash_to_array_index(&self, shash: uint) -> (uint, uint) {
-        bucket_index_to_array_index(self.shash_to_bucket_index(shash))
+    #[inline]
+    fn second_slot(&self, hash: u32) -> &u8 {
+        &self.counters[hash2(hash) as uint]
     }
 
-    /// Gets the value at a given bucket.
-    fn bucket_get(&self, a_idx: uint, shift_amount: uint) -> uint {
-        let array_val = self.buf[a_idx];
-        (array_val >> shift_amount) & 0xF
+    #[inline]
+    fn second_mut_slot(&mut self, hash: u32) -> &mut u8 {
+        &mut self.counters[hash2(hash) as uint]
     }
 
-    /// Sets the value at a given bucket. This will not bounds check, but that's
-    /// ok because you've called `bucket_get` first, anyhow.
-    fn bucket_set(&mut self, a_idx: uint, shift_amount: uint, new_val: uint) {
-        // We can avoid bounds checking here since in order to do a bucket_set
-        // we have to had done a `bucket_get` at the same index for it to make
-        // sense.
-        let old_val = self.buf.as_mut_slice().get_mut(a_idx).unwrap();
-        let mask = (1 << BITS_PER_BUCKET) - 1;                // selects the right-most bucket
-        let select_in_bucket = mask << shift_amount;          // selects the correct bucket
-        let select_out_of_bucket = !select_in_bucket;         // selects everything except the correct bucket
-        let new_array_val = (new_val << shift_amount)         // move the new_val into the right spot
-                          | (*old_val & select_out_of_bucket); // mask out the old value, and or it with the new one
-        *old_val = new_array_val;
+    #[inline]
+    pub fn clear(&mut self) {
+        self.counters = [0, ..ARRAY_SIZE]
     }
 
-    /// Insert a stretched hash into the bloom filter, remembering to saturate
-    /// the counter instead of overflowing.
-    fn insert_shash(&mut self, shash: uint) {
-        let (a_idx, shift_amount) = self.shash_to_array_index(shash);
-        let b_val = self.bucket_get(a_idx, shift_amount);
-
-
-        // saturate the count.
-        if b_val == 0xF {
-            return;
+    #[inline]
+    fn insert_hash(&mut self, hash: u32) {
+        {
+            let slot1 = self.first_mut_slot(hash);
+            if !full(slot1) {
+                *slot1 += 1
+            }
         }
-
-        let new_val = b_val + 1;
-
-        self.bucket_set(a_idx, shift_amount, new_val);
-    }
-
-    /// Insert a hashed value into the bloom filter.
-    fn insert_hashed(&mut self, hash: u64) {
-        self.number_of_insertions += 1;
-        for h in stretch(&mut to_rng(hash)) {
-            self.insert_shash(h);
+        {
+            let slot2 = self.second_mut_slot(hash);
+            if !full(slot2) {
+                *slot2 += 1
+            }
         }
     }
 
-    /// Inserts a value into the bloom filter. Note that the bloom filter isn't
-    /// parameterized over the values it holds. That's because it can hold
-    /// values of different types, as long as it can get a hash out of them.
-    pub fn insert<H: Hash<FnvState>>(&mut self, h: &H) {
-        self.insert_hashed(hash(h))
-    }
-
-    /// Removes a stretched hash from the bloom filter, taking care not to
-    /// decrememnt saturated counters.
-    ///
-    /// It is an error to remove never-inserted elements.
-    fn remove_shash(&mut self, shash: uint) {
-        let (a_idx, shift_amount) = self.shash_to_array_index(shash);
-        let b_val = self.bucket_get(a_idx, shift_amount);
-        assert!(b_val != 0, "Removing an element that was never inserted.");
+    /// Inserts an item into the bloom filter.
+    #[inline]
+    pub fn insert<T:BloomHash>(&mut self, elem: &T) {
+        self.insert_hash(elem.bloom_hash())
 
-        // can't do anything if the counter saturated.
-        if b_val == 0xF { return; }
-
-        self.bucket_set(a_idx, shift_amount, b_val - 1);
     }
 
-    /// Removes a hashed value from the bloom filter.
-    fn remove_hashed(&mut self, hash: u64) {
-        self.number_of_insertions -= 1;
-        for h in stretch(&mut to_rng(hash)) {
-            self.remove_shash(h);
+    #[inline]
+    fn remove_hash(&mut self, hash: u32) {
+        {
+            let slot1 = self.first_mut_slot(hash);
+            if !full(slot1) {
+                *slot1 -= 1
+            }
+        }
+        {
+            let slot2 = self.second_mut_slot(hash);
+            if !full(slot2) {
+                *slot2 -= 1
+            }
         }
     }
 
-    /// Removes a value from the bloom filter.
-    ///
-    /// Be careful of adding and removing lots of elements, especially for
-    /// long-lived bloom filters. The counters in each bucket will saturate if
-    /// 16 or more elements hash to it, and then stick there. This will hurt
-    /// your false positive rate. To fix this, you might consider refreshing the
-    /// bloom filter by `clear`ing it, and then reinserting elements at regular,
-    /// long intervals.
-    ///
-    /// It is an error to remove never-inserted elements.
-    pub fn remove<H: Hash<FnvState>>(&mut self, h: &H) {
-        self.remove_hashed(hash(h))
+    /// Removes an item from the bloom filter.
+    #[inline]
+    pub fn remove<T:BloomHash>(&mut self, elem: &T) {
+        self.remove_hash(elem.bloom_hash())
     }
 
-    /// Returns `true` if the bloom filter cannot possibly contain the given
-    /// stretched hash.
-    fn definitely_excludes_shash(&self, shash: uint) -> bool {
-        let (a_idx, shift_amount) = self.shash_to_array_index(shash);
-        self.bucket_get(a_idx, shift_amount) == 0
+    #[inline]
+    fn might_contain_hash(&self, hash: u32) -> bool {
+        *self.first_slot(hash) != 0 && *self.second_slot(hash) != 0
     }
 
-    /// A hash is definitely excluded iff none of the stretched hashes are in
-    /// the bloom filter.
-    fn definitely_excludes_hashed(&self, hash: u64) -> bool {
-        let mut ret = false;
+    /// Check whether the filter might contain an item.  This can
+    /// sometimes return true even if the item is not in the filter,
+    /// but will never return false for items that are actually in the
+    /// filter.
+    #[inline]
+    pub fn might_contain<T:BloomHash>(&self, elem: &T) -> bool {
+        self.might_contain_hash(elem.bloom_hash())
+    }
+}
 
-        // Doing `.any` is slower than this branch-free version.
-        for shash in stretch(&mut to_rng(hash)) {
-            ret |= self.definitely_excludes_shash(shash);
-        }
+pub trait BloomHash {
+    fn bloom_hash(&self) -> u32;
+}
 
-        ret
+impl BloomHash for int {
+    #[inline]
+    fn bloom_hash(&self) -> u32 {
+        ((*self >> 32) ^ *self) as u32
     }
+}
 
-    /// A bloom filter can tell you whether or not a value has definitely never
-    /// been inserted. Note that bloom filters can give false positives.
-    pub fn definitely_excludes<H: Hash<FnvState>>(&self, h: &H) -> bool {
-        self.definitely_excludes_hashed(hash(h))
+impl BloomHash for uint {
+    #[inline]
+    fn bloom_hash(&self) -> u32 {
+        ((*self >> 32) ^ *self) as u32
     }
+}
 
-    /// A bloom filter can tell you if an element /may/ be in it. It cannot be
-    /// certain. But, assuming correct usage, this query will have a low false
-    /// positive rate.
-    pub fn may_include<H: Hash<FnvState>>(&self, h: &H) -> bool {
-        !self.definitely_excludes(h)
+impl BloomHash for Atom {
+    #[inline]
+    fn bloom_hash(&self) -> u32 {
+        ((self.data >> 32) ^ self.data) as u32
     }
+}
 
-    /// Returns the number of elements ever inserted into the bloom filter - the
-    /// number of elements removed.
-    pub fn number_of_insertions(&self) -> uint {
-        self.number_of_insertions
+impl BloomHash for Namespace {
+    #[inline]
+    fn bloom_hash(&self) -> u32 {
+        let Namespace(ref atom) = *self;
+        atom.bloom_hash()
     }
+}
 
-    /// Returns the number of bytes of memory the bloom filter uses.
-    pub fn size(&self) -> uint {
-        self.buf.len() * uint::BYTES
-    }
+#[inline]
+fn full(slot: &u8) -> bool {
+    *slot == 0xff
+}
 
-    /// Removes all elements from the bloom filter. This is both more efficient
-    /// and has better false-positive properties than repeatedly calling `remove`
-    /// on every element.
-    pub fn clear(&mut self) {
-        self.number_of_insertions = 0;
-        for x in self.buf.as_mut_slice().iter_mut() {
-            *x = 0u;
-        }
-    }
+#[inline]
+fn hash1(hash: u32) -> u32 {
+    hash & KEY_MASK
+}
+
+#[inline]
+fn hash2(hash: u32) -> u32 {
+    (hash >> KEY_SHIFT) & KEY_MASK
 }
 
 #[test]
 fn create_and_insert_some_stuff() {
     use std::iter::range;
 
-    let mut bf = BloomFilter::new(1000);
+    let mut bf = BloomFilter::new();
 
     for i in range(0u, 1000) {
         bf.insert(&i);
     }
 
-    assert_eq!(bf.number_of_insertions(), 1000);
-
     for i in range(0u, 1000) {
-        assert!(bf.may_include(&i));
+        assert!(bf.might_contain(&i));
     }
 
     let false_positives =
-        range(1001u, 2000).filter(|i| bf.may_include(&i)).count();
+        range(1001u, 2000).filter(|i| bf.might_contain(i)).count();
 
     assert!(false_positives < 10) // 1%.
 
@@ -293,22 +235,18 @@ fn create_and_insert_some_stuff() {
         bf.remove(&i);
     }
 
-    assert_eq!(bf.number_of_insertions(), 900);
-
     for i in range(100u, 1000) {
-        assert!(bf.may_include(&i));
+        assert!(bf.might_contain(&i));
     }
 
-    let false_positives = range(0u, 100).filter(|i| bf.may_include(&i)).count();
+    let false_positives = range(0u, 100).filter(|i| bf.might_contain(i)).count();
 
     assert!(false_positives < 2); // 2%.
 
     bf.clear();
 
-    assert_eq!(bf.number_of_insertions(), 0);
-
     for i in range(0u, 2000) {
-        assert!(bf.definitely_excludes(&i));
+        assert!(!bf.might_contain(&i));
     }
 }
 
@@ -323,7 +261,7 @@ mod bench {
     #[bench]
     fn create_insert_1000_remove_100_lookup_100(b: &mut test::Bencher) {
         b.iter(|| {
-            let mut bf = BloomFilter::new(1000);
+            let mut bf = BloomFilter::new();
             for i in iter::range(0u, 1000) {
                 bf.insert(&i);
             }
@@ -331,14 +269,14 @@ mod bench {
                 bf.remove(&i);
             }
             for i in iter::range(100u, 200) {
-                test::black_box(bf.may_include(&i));
+                test::black_box(bf.might_contain(&i));
             }
         });
     }
 
     #[bench]
-    fn may_include(b: &mut test::Bencher) {
-        let mut bf = BloomFilter::new(1000);
+    fn might_contain(b: &mut test::Bencher) {
+        let mut bf = BloomFilter::new();
 
         for i in iter::range(0u, 1000) {
             bf.insert(&i);
@@ -348,7 +286,7 @@ mod bench {
 
         b.bench_n(1000, |b| {
             b.iter(|| {
-                test::black_box(bf.may_include(&i));
+                test::black_box(bf.might_contain(&i));
                 i += 1;
             });
         });
@@ -356,7 +294,7 @@ mod bench {
 
     #[bench]
     fn insert(b: &mut test::Bencher) {
-        let mut bf = BloomFilter::new(1000);
+        let mut bf = BloomFilter::new();
 
         b.bench_n(1000, |b| {
             let mut i = 0u;
@@ -370,7 +308,7 @@ mod bench {
 
     #[bench]
     fn remove(b: &mut test::Bencher) {
-        let mut bf = BloomFilter::new(1000);
+        let mut bf = BloomFilter::new();
         for i in range(0u, 1000) {
             bf.insert(&i);
         }
@@ -384,7 +322,7 @@ mod bench {
             });
         });
 
-        test::black_box(bf.may_include(&0u));
+        test::black_box(bf.might_contain(&0u));
     }
 
     #[bench]
@@ -396,3 +334,4 @@ mod bench {
         })
     }
 }
+
diff --git a/components/util/namespace.rs b/components/util/namespace.rs
index 810ac7c4456..c138a29706a 100644
--- a/components/util/namespace.rs
+++ b/components/util/namespace.rs
@@ -11,3 +11,4 @@ pub fn from_domstring(url: Option<DOMString>) -> Namespace {
         Some(ref s) => Namespace(Atom::from_slice(s.as_slice())),
     }
 }
+
author	Patrick Walton <pcwalton@mimiga.net>	2014-09-16 22:58:52 -0700
committer	Patrick Walton <pcwalton@mimiga.net>	2014-10-10 17:02:27 -0700
commit	2a790d06dd74b1de0c47d433c7fa3a9d8af03efc (patch)
tree	83346e183c3bf7ef3d8d4edf554667bc263e73c4
parent	878ece58da7f60b45e9230356ac7a5bbf7351e5b (diff)
download	servo-2a790d06dd74b1de0c47d433c7fa3a9d8af03efc.tar.gz servo-2a790d06dd74b1de0c47d433c7fa3a9d8af03efc.zip