Auto merge of #17192 - mbrubeck:layout, r=pcwalton

Parallel layout optimizations This takes some of the optimizations made to parallel styling in #16971 and applies them to parallel layout. Specifically: * Reduce the chunk size, to increase chances for parallelism on trees with small fan-out. * Reduce allocations by using SmallVec. * Reduce task switching by processing up to one chunk of children within the same rayon task as the parent. This cuts the "Primary Layout Pass" time in **half** on the MySpace page from [tp5n], and on my other real-world test pages it is a small improvement or close to no change. [tp5n]: https://wiki.mozilla.org/Buildbot/Talos/Tests#tp5n_pages_set --- - [x] `./mach build -d` does not report any errors - [x] `./mach test-tidy` does not report any errors - [x] These changes do not require tests because they affect performance only  --- This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/servo/17192)
author: bors-servo <lbergstrom+bors@mozilla.com> 2017-06-07 19:57:57 -0700
committer: GitHub <noreply@github.com> 2017-06-07 19:57:57 -0700
commit: c0f3ec87806a0d718d7f9ef1ccb912c78fc482d2 (patch)
tree: 35c89008254a588e87a45bf9cb9dd80778ea3b09
parent: ad47d33511c318fe208158bb16deb5086979e0c7 (diff)
parent: 7fd1626901af324f26208ceaba66e75c4d1c2154 (diff)
download: servo-c0f3ec87806a0d718d7f9ef1ccb912c78fc482d2.tar.gz
servo-c0f3ec87806a0d718d7f9ef1ccb912c78fc482d2.zip
2 files changed, 34 insertions, 13 deletions
diff --git a/components/layout/parallel.rs b/components/layout/parallel.rs
index ada231b8119..713fdadcf38 100644
--- a/components/layout/parallel.rs
+++ b/components/layout/parallel.rs
@@ -14,6 +14,7 @@ use flow_ref::FlowRef;
 use profile_traits::time::{self, TimerMetadata, profile};
 use rayon;
 use servo_config::opts;
+use smallvec::SmallVec;
 use std::mem;
 use std::sync::atomic::{AtomicIsize, Ordering};
 use style::dom::UnsafeNode;
@@ -23,10 +24,9 @@ use traversal::AssignBSizes;
 pub use style::parallel::traverse_dom;
 
 /// Traversal chunk size.
-///
-/// FIXME(bholley): This is all likely very inefficient and should probably be
-/// reworked to mirror the style system's parallel.rs.
-pub const CHUNK_SIZE: usize = 64;
+const CHUNK_SIZE: usize = 16;
+
+pub type FlowList = SmallVec<[UnsafeNode; CHUNK_SIZE]>;
 
 #[allow(dead_code)]
 fn static_assertion(node: UnsafeNode) {
@@ -131,7 +131,7 @@ fn top_down_flow<'scope>(unsafe_flows: &[UnsafeFlow],
                          assign_isize_traversal: &'scope AssignISizes,
                          assign_bsize_traversal: &'scope AssignBSizes)
 {
-    let mut discovered_child_flows = vec![];
+    let mut discovered_child_flows = FlowList::new();
 
     for unsafe_flow in unsafe_flows {
         let mut had_children = false;
@@ -164,12 +164,29 @@ fn top_down_flow<'scope>(unsafe_flows: &[UnsafeFlow],
         }
     }
 
-    for chunk in discovered_child_flows.chunks(CHUNK_SIZE) {
-        let nodes = chunk.iter().cloned().collect::<Vec<_>>().into_boxed_slice();
+    if discovered_child_flows.is_empty() {
+        return
+    }
 
-        scope.spawn(move |scope| {
-            top_down_flow(&nodes, scope, &assign_isize_traversal, &assign_bsize_traversal);
-        });
+    if discovered_child_flows.len() <= CHUNK_SIZE {
+        // We can handle all the children in this work unit.
+        top_down_flow(&discovered_child_flows,
+                      scope,
+                      &assign_isize_traversal,
+                      &assign_bsize_traversal);
+    } else {
+        // Spawn a new work unit for each chunk after the first.
+        let mut chunks = discovered_child_flows.chunks(CHUNK_SIZE);
+        let first_chunk = chunks.next();
+        for chunk in chunks {
+            let nodes = chunk.iter().cloned().collect::<FlowList>();
+            scope.spawn(move |scope| {
+                top_down_flow(&nodes, scope, &assign_isize_traversal, &assign_bsize_traversal);
+            });
+        }
+        if let Some(chunk) = first_chunk {
+            top_down_flow(chunk, scope, &assign_isize_traversal, &assign_bsize_traversal);
+        }
     }
 }
 
@@ -186,7 +203,7 @@ pub fn traverse_flow_tree_preorder(
 
     let assign_isize_traversal = &AssignISizes { layout_context: &context };
     let assign_bsize_traversal = &AssignBSizes { layout_context: &context };
-    let nodes = vec![borrowed_flow_to_unsafe_flow(root)].into_boxed_slice();
+    let nodes = [borrowed_flow_to_unsafe_flow(root)];
 
     queue.install(move || {
         rayon::scope(move |scope| {
diff --git a/components/layout_thread/lib.rs b/components/layout_thread/lib.rs
index 4e91da113a6..364ce2745ac 100644
--- a/components/layout_thread/lib.rs
+++ b/components/layout_thread/lib.rs
@@ -443,7 +443,11 @@ impl LayoutThread {
 
         let configuration =
             rayon::Configuration::new().num_threads(layout_threads);
-        let parallel_traversal = rayon::ThreadPool::new(configuration).ok();
+        let parallel_traversal = if layout_threads > 1 {
+            Some(rayon::ThreadPool::new(configuration).expect("ThreadPool creation failed"))
+        } else {
+            None
+        };
         debug!("Possible layout Threads: {}", layout_threads);
 
         // Create the channel on which new animations can be sent.
@@ -1074,7 +1078,7 @@ impl LayoutThread {
 
         debug!("layout: processing reflow request for: {:?} ({}) (query={:?})",
                element, self.url, data.query_type);
-        debug!("{:?}", ShowSubtree(element.as_node()));
+        trace!("{:?}", ShowSubtree(element.as_node()));
 
         let initial_viewport = data.window_size.initial_viewport;
         let old_viewport_size = self.viewport_size;
author	bors-servo <lbergstrom+bors@mozilla.com>	2017-06-07 19:57:57 -0700
committer	GitHub <noreply@github.com>	2017-06-07 19:57:57 -0700
commit	c0f3ec87806a0d718d7f9ef1ccb912c78fc482d2 (patch)
tree	35c89008254a588e87a45bf9cb9dd80778ea3b09
parent	ad47d33511c318fe208158bb16deb5086979e0c7 (diff)
parent	7fd1626901af324f26208ceaba66e75c4d1c2154 (diff)
download	servo-c0f3ec87806a0d718d7f9ef1ccb912c78fc482d2.tar.gz servo-c0f3ec87806a0d718d7f9ef1ccb912c78fc482d2.zip