aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorbors-servo <lbergstrom+bors@mozilla.com>2017-06-07 19:57:57 -0700
committerGitHub <noreply@github.com>2017-06-07 19:57:57 -0700
commitc0f3ec87806a0d718d7f9ef1ccb912c78fc482d2 (patch)
tree35c89008254a588e87a45bf9cb9dd80778ea3b09
parentad47d33511c318fe208158bb16deb5086979e0c7 (diff)
parent7fd1626901af324f26208ceaba66e75c4d1c2154 (diff)
downloadservo-c0f3ec87806a0d718d7f9ef1ccb912c78fc482d2.tar.gz
servo-c0f3ec87806a0d718d7f9ef1ccb912c78fc482d2.zip
Auto merge of #17192 - mbrubeck:layout, r=pcwalton
Parallel layout optimizations This takes some of the optimizations made to parallel styling in #16971 and applies them to parallel layout. Specifically: * Reduce the chunk size, to increase chances for parallelism on trees with small fan-out. * Reduce allocations by using SmallVec. * Reduce task switching by processing up to one chunk of children within the same rayon task as the parent. This cuts the "Primary Layout Pass" time in **half** on the MySpace page from [tp5n], and on my other real-world test pages it is a small improvement or close to no change. [tp5n]: https://wiki.mozilla.org/Buildbot/Talos/Tests#tp5n_pages_set --- - [x] `./mach build -d` does not report any errors - [x] `./mach test-tidy` does not report any errors - [x] These changes do not require tests because they affect performance only <!-- Reviewable:start --> --- This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/servo/17192) <!-- Reviewable:end -->
-rw-r--r--components/layout/parallel.rs39
-rw-r--r--components/layout_thread/lib.rs8
2 files changed, 34 insertions, 13 deletions
diff --git a/components/layout/parallel.rs b/components/layout/parallel.rs
index ada231b8119..713fdadcf38 100644
--- a/components/layout/parallel.rs
+++ b/components/layout/parallel.rs
@@ -14,6 +14,7 @@ use flow_ref::FlowRef;
use profile_traits::time::{self, TimerMetadata, profile};
use rayon;
use servo_config::opts;
+use smallvec::SmallVec;
use std::mem;
use std::sync::atomic::{AtomicIsize, Ordering};
use style::dom::UnsafeNode;
@@ -23,10 +24,9 @@ use traversal::AssignBSizes;
pub use style::parallel::traverse_dom;
/// Traversal chunk size.
-///
-/// FIXME(bholley): This is all likely very inefficient and should probably be
-/// reworked to mirror the style system's parallel.rs.
-pub const CHUNK_SIZE: usize = 64;
+const CHUNK_SIZE: usize = 16;
+
+pub type FlowList = SmallVec<[UnsafeNode; CHUNK_SIZE]>;
#[allow(dead_code)]
fn static_assertion(node: UnsafeNode) {
@@ -131,7 +131,7 @@ fn top_down_flow<'scope>(unsafe_flows: &[UnsafeFlow],
assign_isize_traversal: &'scope AssignISizes,
assign_bsize_traversal: &'scope AssignBSizes)
{
- let mut discovered_child_flows = vec![];
+ let mut discovered_child_flows = FlowList::new();
for unsafe_flow in unsafe_flows {
let mut had_children = false;
@@ -164,12 +164,29 @@ fn top_down_flow<'scope>(unsafe_flows: &[UnsafeFlow],
}
}
- for chunk in discovered_child_flows.chunks(CHUNK_SIZE) {
- let nodes = chunk.iter().cloned().collect::<Vec<_>>().into_boxed_slice();
+ if discovered_child_flows.is_empty() {
+ return
+ }
- scope.spawn(move |scope| {
- top_down_flow(&nodes, scope, &assign_isize_traversal, &assign_bsize_traversal);
- });
+ if discovered_child_flows.len() <= CHUNK_SIZE {
+ // We can handle all the children in this work unit.
+ top_down_flow(&discovered_child_flows,
+ scope,
+ &assign_isize_traversal,
+ &assign_bsize_traversal);
+ } else {
+ // Spawn a new work unit for each chunk after the first.
+ let mut chunks = discovered_child_flows.chunks(CHUNK_SIZE);
+ let first_chunk = chunks.next();
+ for chunk in chunks {
+ let nodes = chunk.iter().cloned().collect::<FlowList>();
+ scope.spawn(move |scope| {
+ top_down_flow(&nodes, scope, &assign_isize_traversal, &assign_bsize_traversal);
+ });
+ }
+ if let Some(chunk) = first_chunk {
+ top_down_flow(chunk, scope, &assign_isize_traversal, &assign_bsize_traversal);
+ }
}
}
@@ -186,7 +203,7 @@ pub fn traverse_flow_tree_preorder(
let assign_isize_traversal = &AssignISizes { layout_context: &context };
let assign_bsize_traversal = &AssignBSizes { layout_context: &context };
- let nodes = vec![borrowed_flow_to_unsafe_flow(root)].into_boxed_slice();
+ let nodes = [borrowed_flow_to_unsafe_flow(root)];
queue.install(move || {
rayon::scope(move |scope| {
diff --git a/components/layout_thread/lib.rs b/components/layout_thread/lib.rs
index 4e91da113a6..364ce2745ac 100644
--- a/components/layout_thread/lib.rs
+++ b/components/layout_thread/lib.rs
@@ -443,7 +443,11 @@ impl LayoutThread {
let configuration =
rayon::Configuration::new().num_threads(layout_threads);
- let parallel_traversal = rayon::ThreadPool::new(configuration).ok();
+ let parallel_traversal = if layout_threads > 1 {
+ Some(rayon::ThreadPool::new(configuration).expect("ThreadPool creation failed"))
+ } else {
+ None
+ };
debug!("Possible layout Threads: {}", layout_threads);
// Create the channel on which new animations can be sent.
@@ -1074,7 +1078,7 @@ impl LayoutThread {
debug!("layout: processing reflow request for: {:?} ({}) (query={:?})",
element, self.url, data.query_type);
- debug!("{:?}", ShowSubtree(element.as_node()));
+ trace!("{:?}", ShowSubtree(element.as_node()));
let initial_viewport = data.window_size.initial_viewport;
let old_viewport_size = self.viewport_size;