aboutsummaryrefslogtreecommitdiffstats
path: root/components/layout
diff options
context:
space:
mode:
authorbors-servo <lbergstrom+bors@mozilla.com>2017-06-07 19:57:57 -0700
committerGitHub <noreply@github.com>2017-06-07 19:57:57 -0700
commitc0f3ec87806a0d718d7f9ef1ccb912c78fc482d2 (patch)
tree35c89008254a588e87a45bf9cb9dd80778ea3b09 /components/layout
parentad47d33511c318fe208158bb16deb5086979e0c7 (diff)
parent7fd1626901af324f26208ceaba66e75c4d1c2154 (diff)
downloadservo-c0f3ec87806a0d718d7f9ef1ccb912c78fc482d2.tar.gz
servo-c0f3ec87806a0d718d7f9ef1ccb912c78fc482d2.zip
Auto merge of #17192 - mbrubeck:layout, r=pcwalton
Parallel layout optimizations This takes some of the optimizations made to parallel styling in #16971 and applies them to parallel layout. Specifically: * Reduce the chunk size, to increase chances for parallelism on trees with small fan-out. * Reduce allocations by using SmallVec. * Reduce task switching by processing up to one chunk of children within the same rayon task as the parent. This cuts the "Primary Layout Pass" time in **half** on the MySpace page from [tp5n], and on my other real-world test pages it is a small improvement or close to no change. [tp5n]: https://wiki.mozilla.org/Buildbot/Talos/Tests#tp5n_pages_set --- - [x] `./mach build -d` does not report any errors - [x] `./mach test-tidy` does not report any errors - [x] These changes do not require tests because they affect performance only <!-- Reviewable:start --> --- This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/servo/17192) <!-- Reviewable:end -->
Diffstat (limited to 'components/layout')
-rw-r--r--components/layout/parallel.rs39
1 files changed, 28 insertions, 11 deletions
diff --git a/components/layout/parallel.rs b/components/layout/parallel.rs
index ada231b8119..713fdadcf38 100644
--- a/components/layout/parallel.rs
+++ b/components/layout/parallel.rs
@@ -14,6 +14,7 @@ use flow_ref::FlowRef;
use profile_traits::time::{self, TimerMetadata, profile};
use rayon;
use servo_config::opts;
+use smallvec::SmallVec;
use std::mem;
use std::sync::atomic::{AtomicIsize, Ordering};
use style::dom::UnsafeNode;
@@ -23,10 +24,9 @@ use traversal::AssignBSizes;
pub use style::parallel::traverse_dom;
/// Traversal chunk size.
-///
-/// FIXME(bholley): This is all likely very inefficient and should probably be
-/// reworked to mirror the style system's parallel.rs.
-pub const CHUNK_SIZE: usize = 64;
+const CHUNK_SIZE: usize = 16;
+
+pub type FlowList = SmallVec<[UnsafeNode; CHUNK_SIZE]>;
#[allow(dead_code)]
fn static_assertion(node: UnsafeNode) {
@@ -131,7 +131,7 @@ fn top_down_flow<'scope>(unsafe_flows: &[UnsafeFlow],
assign_isize_traversal: &'scope AssignISizes,
assign_bsize_traversal: &'scope AssignBSizes)
{
- let mut discovered_child_flows = vec![];
+ let mut discovered_child_flows = FlowList::new();
for unsafe_flow in unsafe_flows {
let mut had_children = false;
@@ -164,12 +164,29 @@ fn top_down_flow<'scope>(unsafe_flows: &[UnsafeFlow],
}
}
- for chunk in discovered_child_flows.chunks(CHUNK_SIZE) {
- let nodes = chunk.iter().cloned().collect::<Vec<_>>().into_boxed_slice();
+ if discovered_child_flows.is_empty() {
+ return
+ }
- scope.spawn(move |scope| {
- top_down_flow(&nodes, scope, &assign_isize_traversal, &assign_bsize_traversal);
- });
+ if discovered_child_flows.len() <= CHUNK_SIZE {
+ // We can handle all the children in this work unit.
+ top_down_flow(&discovered_child_flows,
+ scope,
+ &assign_isize_traversal,
+ &assign_bsize_traversal);
+ } else {
+ // Spawn a new work unit for each chunk after the first.
+ let mut chunks = discovered_child_flows.chunks(CHUNK_SIZE);
+ let first_chunk = chunks.next();
+ for chunk in chunks {
+ let nodes = chunk.iter().cloned().collect::<FlowList>();
+ scope.spawn(move |scope| {
+ top_down_flow(&nodes, scope, &assign_isize_traversal, &assign_bsize_traversal);
+ });
+ }
+ if let Some(chunk) = first_chunk {
+ top_down_flow(chunk, scope, &assign_isize_traversal, &assign_bsize_traversal);
+ }
}
}
@@ -186,7 +203,7 @@ pub fn traverse_flow_tree_preorder(
let assign_isize_traversal = &AssignISizes { layout_context: &context };
let assign_bsize_traversal = &AssignBSizes { layout_context: &context };
- let nodes = vec![borrowed_flow_to_unsafe_flow(root)].into_boxed_slice();
+ let nodes = [borrowed_flow_to_unsafe_flow(root)];
queue.install(move || {
rayon::scope(move |scope| {