diff options
-rw-r--r-- | components/style/context.rs | 70 | ||||
-rw-r--r-- | components/style/gecko/global_style_data.rs | 5 | ||||
-rw-r--r-- | components/style/parallel.rs | 41 |
3 files changed, 96 insertions, 20 deletions
diff --git a/components/style/context.rs b/components/style/context.rs index c7c5d2ae354..3183cd05747 100644 --- a/components/style/context.rs +++ b/components/style/context.rs @@ -16,6 +16,7 @@ use euclid::Size2D; use fnv::FnvHashMap; use font_metrics::FontMetricsProvider; #[cfg(feature = "gecko")] use gecko_bindings::structs; +use parallel::STYLE_THREAD_STACK_SIZE_KB; #[cfg(feature = "servo")] use parking_lot::RwLock; use properties::ComputedValues; #[cfg(feature = "servo")] use properties::PropertyId; @@ -605,6 +606,61 @@ where } } + +/// A helper type for stack limit checking. This assumes that stacks grow +/// down, which is true for all non-ancient CPU architectures. +pub struct StackLimitChecker { + lower_limit: usize +} + +impl StackLimitChecker { + /// Create a new limit checker, for this thread, allowing further use + /// of up to |stack_size| bytes beyond (below) the current stack pointer. + #[inline(never)] + pub fn new(stack_size_limit: usize) -> Self { + StackLimitChecker { + lower_limit: StackLimitChecker::get_sp() - stack_size_limit + } + } + + /// Checks whether the previously stored stack limit has now been exceeded. + #[inline(never)] + pub fn limit_exceeded(&self) -> bool { + let curr_sp = StackLimitChecker::get_sp(); + + // Try to assert if we're called from a different thread than the + // one that originally created this object. This is a bit subtle + // and relies on wraparound behaviour of unsigned integers. + // + // * If we're called from a thread whose stack has a higher address + // than the one that created this object, then + // |curr_sp - self.lower_limit| will (almost certainly) be larger + // than the thread stack size, so the check will fail. + // + // * If we're called from a thread whose stack has a lower address + // than the one that created this object, then + // |curr_sp - self.lower_limit| will be negative, which will look + // like a very large unsigned value, so the check will also fail. + // + // The correctness of depends on the assumption that no stack wraps + // around the end of the address space. + debug_assert!(curr_sp - self.lower_limit + <= STYLE_THREAD_STACK_SIZE_KB * 1024); + + // The actual bounds check. + curr_sp <= self.lower_limit + } + + // Technically, rustc can optimize this away, but shouldn't for now. + // We should fix this once black_box is stable. + #[inline(always)] + fn get_sp() -> usize { + let mut foo: usize = 42; + (&mut foo as *mut usize) as usize + } +} + + /// A thread-local style context. /// /// This context contains data that needs to be used during restyling, but is @@ -639,6 +695,9 @@ pub struct ThreadLocalStyleContext<E: TElement> { /// The struct used to compute and cache font metrics from style /// for evaluation of the font-relative em/ch units and font-size pub font_metrics_provider: E::FontMetricsProvider, + /// A checker used to ensure that parallel.rs does not recurse indefinitely + /// even on arbitrarily deep trees. See Gecko bug 1376883. + pub stack_limit_checker: StackLimitChecker, } impl<E: TElement> ThreadLocalStyleContext<E> { @@ -654,6 +713,8 @@ impl<E: TElement> ThreadLocalStyleContext<E> { statistics: TraversalStatistics::default(), current_element_info: None, font_metrics_provider: E::FontMetricsProvider::create_from(shared), + stack_limit_checker: StackLimitChecker::new( + (STYLE_THREAD_STACK_SIZE_KB - 40) * 1024), } } @@ -668,6 +729,15 @@ impl<E: TElement> ThreadLocalStyleContext<E> { statistics: TraversalStatistics::default(), current_element_info: None, font_metrics_provider: E::FontMetricsProvider::create_from(shared), + // Threads in the styling pool have small stacks, and we have to + // be careful not to run out of stack during recursion in + // parallel.rs. Therefore set up a stack limit checker, in + // which we reserve 40KB of stack as a safety buffer. Currently + // the stack size is 128KB, so this allows 88KB for recursive + // DOM traversal, which encompasses 53 levels of recursion before + // the limiter kicks in, on x86_64-Linux. See Gecko bug 1376883. + stack_limit_checker: StackLimitChecker::new( + (STYLE_THREAD_STACK_SIZE_KB - 40) * 1024), } } diff --git a/components/style/gecko/global_style_data.rs b/components/style/gecko/global_style_data.rs index b5698e02193..8ba55191c03 100644 --- a/components/style/gecko/global_style_data.rs +++ b/components/style/gecko/global_style_data.rs @@ -9,6 +9,7 @@ use gecko_bindings::bindings; use gecko_bindings::bindings::{Gecko_RegisterProfilerThread, Gecko_UnregisterProfilerThread}; use gecko_bindings::bindings::Gecko_SetJemallocThreadLocalArena; use num_cpus; +use parallel::STYLE_THREAD_STACK_SIZE_KB; use rayon; use shared_lock::SharedRwLock; use std::cmp; @@ -92,7 +93,9 @@ lazy_static! { .breadth_first() .thread_name(thread_name) .start_handler(thread_startup) - .exit_handler(thread_shutdown); + .exit_handler(thread_shutdown) + // Set thread stack size to 128KB. See Gecko bug 1376883. + .stack_size(STYLE_THREAD_STACK_SIZE_KB * 1024); let pool = rayon::ThreadPool::new(configuration).ok(); pool }; diff --git a/components/style/parallel.rs b/components/style/parallel.rs index db5466e12a3..7e9e8154122 100644 --- a/components/style/parallel.rs +++ b/components/style/parallel.rs @@ -32,6 +32,9 @@ use std::borrow::Borrow; use time; use traversal::{DomTraversal, PerLevelTraversalData, PreTraverseToken}; +/// The minimum stack size for a thread in the styling pool, in kilobytes. +pub const STYLE_THREAD_STACK_SIZE_KB: usize = 128; + /// The maximum number of child nodes that we will process as a single unit. /// /// Larger values will increase style sharing cache hits and general DOM @@ -77,7 +80,7 @@ pub fn traverse_dom<E, D>(traversal: &D, let root_opaque = root.opaque(); traverse_nodes(&[root], DispatchMode::TailCall, - 0, + true, root_opaque, traversal_data, scope, @@ -132,7 +135,6 @@ fn create_thread_local_context<'scope, E, D>( #[inline(always)] #[allow(unsafe_code)] fn top_down_dom<'a, 'scope, E, D>(nodes: &'a [SendNode<E::ConcreteNode>], - recursion_depth: usize, root: OpaqueNode, mut traversal_data: PerLevelTraversalData, scope: &'a rayon::Scope<'scope>, @@ -144,6 +146,10 @@ fn top_down_dom<'a, 'scope, E, D>(nodes: &'a [SendNode<E::ConcreteNode>], { debug_assert!(nodes.len() <= WORK_UNIT_MAX); + // We set this below, when we have a borrow of the thread-local-context + // available. + let recursion_ok; + // Collect all the children of the elements in our work unit. This will // contain the combined children of up to WORK_UNIT_MAX nodes, which may // be numerous. As such, we store it in a large SmallVec to minimize heap- @@ -154,6 +160,10 @@ fn top_down_dom<'a, 'scope, E, D>(nodes: &'a [SendNode<E::ConcreteNode>], // a potential recursive call when we pass TailCall. let mut tlc = tls.ensure( |slot: &mut Option<ThreadLocalStyleContext<E>>| create_thread_local_context(traversal, slot)); + + // Check that we're not in danger of running out of stack. + recursion_ok = !tlc.stack_limit_checker.limit_exceeded(); + let mut context = StyleContext { shared: traversal.shared_context(), thread_local: &mut *tlc, @@ -202,7 +212,7 @@ fn top_down_dom<'a, 'scope, E, D>(nodes: &'a [SendNode<E::ConcreteNode>], traversal_data_copy.current_dom_depth += 1; traverse_nodes(&*discovered_child_nodes, DispatchMode::NotTailCall, - recursion_depth, + recursion_ok, root, traversal_data_copy, scope, @@ -232,7 +242,7 @@ fn top_down_dom<'a, 'scope, E, D>(nodes: &'a [SendNode<E::ConcreteNode>], traversal_data.current_dom_depth += 1; traverse_nodes(&discovered_child_nodes, DispatchMode::TailCall, - recursion_depth, + recursion_ok, root, traversal_data, scope, @@ -254,16 +264,10 @@ impl DispatchMode { fn is_tail_call(&self) -> bool { matches!(*self, DispatchMode::TailCall) } } -// On x86_64-linux, a recursive cycle requires 3472 bytes of stack. Limiting -// the depth to 150 therefore should keep the stack use by the recursion to -// 520800 bytes, which would give a generously conservative margin should we -// decide to reduce the thread stack size from its default of 2MB down to 1MB. -const RECURSION_DEPTH_LIMIT: usize = 150; - #[inline] fn traverse_nodes<'a, 'scope, E, D>(nodes: &[SendNode<E::ConcreteNode>], mode: DispatchMode, - recursion_depth: usize, + recursion_ok: bool, root: OpaqueNode, traversal_data: PerLevelTraversalData, scope: &'a rayon::Scope<'scope>, @@ -279,12 +283,11 @@ fn traverse_nodes<'a, 'scope, E, D>(nodes: &[SendNode<E::ConcreteNode>], // want to actually dispatch the job as a tail call if there's nothing left // in our local queue. Otherwise we need to return to it to maintain proper // breadth-first ordering. We also need to take care to avoid stack - // overflow due to excessive tail recursion. The stack overflow isn't - // observable to content -- we're still completely correct, just not - // using tail recursion any more. See bug 1368302. - debug_assert!(recursion_depth <= RECURSION_DEPTH_LIMIT); + // overflow due to excessive tail recursion. The stack overflow avoidance + // isn't observable to content -- we're still completely correct, just not + // using tail recursion any more. See Gecko bugs 1368302 and 1376883. let may_dispatch_tail = mode.is_tail_call() && - recursion_depth != RECURSION_DEPTH_LIMIT && + recursion_ok && !pool.current_thread_has_pending_tasks().unwrap(); // In the common case, our children fit within a single work unit, in which @@ -292,12 +295,12 @@ fn traverse_nodes<'a, 'scope, E, D>(nodes: &[SendNode<E::ConcreteNode>], if nodes.len() <= WORK_UNIT_MAX { let work = nodes.iter().cloned().collect::<WorkUnit<E::ConcreteNode>>(); if may_dispatch_tail { - top_down_dom(&work, recursion_depth + 1, root, + top_down_dom(&work, root, traversal_data, scope, pool, traversal, tls); } else { scope.spawn(move |scope| { let work = work; - top_down_dom(&work, 0, root, + top_down_dom(&work, root, traversal_data, scope, pool, traversal, tls); }); } @@ -307,7 +310,7 @@ fn traverse_nodes<'a, 'scope, E, D>(nodes: &[SendNode<E::ConcreteNode>], let traversal_data_copy = traversal_data.clone(); scope.spawn(move |scope| { let n = nodes; - top_down_dom(&*n, 0, root, + top_down_dom(&*n, root, traversal_data_copy, scope, pool, traversal, tls) }); } |