diff options
author | bors-servo <lbergstrom+bors@mozilla.com> | 2017-06-07 19:57:57 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-06-07 19:57:57 -0700 |
commit | c0f3ec87806a0d718d7f9ef1ccb912c78fc482d2 (patch) | |
tree | 35c89008254a588e87a45bf9cb9dd80778ea3b09 /components/layout | |
parent | ad47d33511c318fe208158bb16deb5086979e0c7 (diff) | |
parent | 7fd1626901af324f26208ceaba66e75c4d1c2154 (diff) | |
download | servo-c0f3ec87806a0d718d7f9ef1ccb912c78fc482d2.tar.gz servo-c0f3ec87806a0d718d7f9ef1ccb912c78fc482d2.zip |
Auto merge of #17192 - mbrubeck:layout, r=pcwalton
Parallel layout optimizations
This takes some of the optimizations made to parallel styling in #16971 and applies them to parallel layout. Specifically:
* Reduce the chunk size, to increase chances for parallelism on trees with small fan-out.
* Reduce allocations by using SmallVec.
* Reduce task switching by processing up to one chunk of children within the same rayon task as the parent.
This cuts the "Primary Layout Pass" time in **half** on the MySpace page from [tp5n], and on my other real-world test pages it is a small improvement or close to no change.
[tp5n]: https://wiki.mozilla.org/Buildbot/Talos/Tests#tp5n_pages_set
---
- [x] `./mach build -d` does not report any errors
- [x] `./mach test-tidy` does not report any errors
- [x] These changes do not require tests because they affect performance only
<!-- Reviewable:start -->
---
This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/servo/17192)
<!-- Reviewable:end -->
Diffstat (limited to 'components/layout')
-rw-r--r-- | components/layout/parallel.rs | 39 |
1 files changed, 28 insertions, 11 deletions
diff --git a/components/layout/parallel.rs b/components/layout/parallel.rs index ada231b8119..713fdadcf38 100644 --- a/components/layout/parallel.rs +++ b/components/layout/parallel.rs @@ -14,6 +14,7 @@ use flow_ref::FlowRef; use profile_traits::time::{self, TimerMetadata, profile}; use rayon; use servo_config::opts; +use smallvec::SmallVec; use std::mem; use std::sync::atomic::{AtomicIsize, Ordering}; use style::dom::UnsafeNode; @@ -23,10 +24,9 @@ use traversal::AssignBSizes; pub use style::parallel::traverse_dom; /// Traversal chunk size. -/// -/// FIXME(bholley): This is all likely very inefficient and should probably be -/// reworked to mirror the style system's parallel.rs. -pub const CHUNK_SIZE: usize = 64; +const CHUNK_SIZE: usize = 16; + +pub type FlowList = SmallVec<[UnsafeNode; CHUNK_SIZE]>; #[allow(dead_code)] fn static_assertion(node: UnsafeNode) { @@ -131,7 +131,7 @@ fn top_down_flow<'scope>(unsafe_flows: &[UnsafeFlow], assign_isize_traversal: &'scope AssignISizes, assign_bsize_traversal: &'scope AssignBSizes) { - let mut discovered_child_flows = vec![]; + let mut discovered_child_flows = FlowList::new(); for unsafe_flow in unsafe_flows { let mut had_children = false; @@ -164,12 +164,29 @@ fn top_down_flow<'scope>(unsafe_flows: &[UnsafeFlow], } } - for chunk in discovered_child_flows.chunks(CHUNK_SIZE) { - let nodes = chunk.iter().cloned().collect::<Vec<_>>().into_boxed_slice(); + if discovered_child_flows.is_empty() { + return + } - scope.spawn(move |scope| { - top_down_flow(&nodes, scope, &assign_isize_traversal, &assign_bsize_traversal); - }); + if discovered_child_flows.len() <= CHUNK_SIZE { + // We can handle all the children in this work unit. + top_down_flow(&discovered_child_flows, + scope, + &assign_isize_traversal, + &assign_bsize_traversal); + } else { + // Spawn a new work unit for each chunk after the first. + let mut chunks = discovered_child_flows.chunks(CHUNK_SIZE); + let first_chunk = chunks.next(); + for chunk in chunks { + let nodes = chunk.iter().cloned().collect::<FlowList>(); + scope.spawn(move |scope| { + top_down_flow(&nodes, scope, &assign_isize_traversal, &assign_bsize_traversal); + }); + } + if let Some(chunk) = first_chunk { + top_down_flow(chunk, scope, &assign_isize_traversal, &assign_bsize_traversal); + } } } @@ -186,7 +203,7 @@ pub fn traverse_flow_tree_preorder( let assign_isize_traversal = &AssignISizes { layout_context: &context }; let assign_bsize_traversal = &AssignBSizes { layout_context: &context }; - let nodes = vec![borrowed_flow_to_unsafe_flow(root)].into_boxed_slice(); + let nodes = [borrowed_flow_to_unsafe_flow(root)]; queue.install(move || { rayon::scope(move |scope| { |