aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--components/style/context.rs70
-rw-r--r--components/style/gecko/global_style_data.rs5
-rw-r--r--components/style/parallel.rs41
3 files changed, 96 insertions, 20 deletions
diff --git a/components/style/context.rs b/components/style/context.rs
index c7c5d2ae354..3183cd05747 100644
--- a/components/style/context.rs
+++ b/components/style/context.rs
@@ -16,6 +16,7 @@ use euclid::Size2D;
use fnv::FnvHashMap;
use font_metrics::FontMetricsProvider;
#[cfg(feature = "gecko")] use gecko_bindings::structs;
+use parallel::STYLE_THREAD_STACK_SIZE_KB;
#[cfg(feature = "servo")] use parking_lot::RwLock;
use properties::ComputedValues;
#[cfg(feature = "servo")] use properties::PropertyId;
@@ -605,6 +606,61 @@ where
}
}
+
+/// A helper type for stack limit checking. This assumes that stacks grow
+/// down, which is true for all non-ancient CPU architectures.
+pub struct StackLimitChecker {
+ lower_limit: usize
+}
+
+impl StackLimitChecker {
+ /// Create a new limit checker, for this thread, allowing further use
+ /// of up to |stack_size| bytes beyond (below) the current stack pointer.
+ #[inline(never)]
+ pub fn new(stack_size_limit: usize) -> Self {
+ StackLimitChecker {
+ lower_limit: StackLimitChecker::get_sp() - stack_size_limit
+ }
+ }
+
+ /// Checks whether the previously stored stack limit has now been exceeded.
+ #[inline(never)]
+ pub fn limit_exceeded(&self) -> bool {
+ let curr_sp = StackLimitChecker::get_sp();
+
+ // Try to assert if we're called from a different thread than the
+ // one that originally created this object. This is a bit subtle
+ // and relies on wraparound behaviour of unsigned integers.
+ //
+ // * If we're called from a thread whose stack has a higher address
+ // than the one that created this object, then
+ // |curr_sp - self.lower_limit| will (almost certainly) be larger
+ // than the thread stack size, so the check will fail.
+ //
+ // * If we're called from a thread whose stack has a lower address
+ // than the one that created this object, then
+ // |curr_sp - self.lower_limit| will be negative, which will look
+ // like a very large unsigned value, so the check will also fail.
+ //
+ // The correctness of depends on the assumption that no stack wraps
+ // around the end of the address space.
+ debug_assert!(curr_sp - self.lower_limit
+ <= STYLE_THREAD_STACK_SIZE_KB * 1024);
+
+ // The actual bounds check.
+ curr_sp <= self.lower_limit
+ }
+
+ // Technically, rustc can optimize this away, but shouldn't for now.
+ // We should fix this once black_box is stable.
+ #[inline(always)]
+ fn get_sp() -> usize {
+ let mut foo: usize = 42;
+ (&mut foo as *mut usize) as usize
+ }
+}
+
+
/// A thread-local style context.
///
/// This context contains data that needs to be used during restyling, but is
@@ -639,6 +695,9 @@ pub struct ThreadLocalStyleContext<E: TElement> {
/// The struct used to compute and cache font metrics from style
/// for evaluation of the font-relative em/ch units and font-size
pub font_metrics_provider: E::FontMetricsProvider,
+ /// A checker used to ensure that parallel.rs does not recurse indefinitely
+ /// even on arbitrarily deep trees. See Gecko bug 1376883.
+ pub stack_limit_checker: StackLimitChecker,
}
impl<E: TElement> ThreadLocalStyleContext<E> {
@@ -654,6 +713,8 @@ impl<E: TElement> ThreadLocalStyleContext<E> {
statistics: TraversalStatistics::default(),
current_element_info: None,
font_metrics_provider: E::FontMetricsProvider::create_from(shared),
+ stack_limit_checker: StackLimitChecker::new(
+ (STYLE_THREAD_STACK_SIZE_KB - 40) * 1024),
}
}
@@ -668,6 +729,15 @@ impl<E: TElement> ThreadLocalStyleContext<E> {
statistics: TraversalStatistics::default(),
current_element_info: None,
font_metrics_provider: E::FontMetricsProvider::create_from(shared),
+ // Threads in the styling pool have small stacks, and we have to
+ // be careful not to run out of stack during recursion in
+ // parallel.rs. Therefore set up a stack limit checker, in
+ // which we reserve 40KB of stack as a safety buffer. Currently
+ // the stack size is 128KB, so this allows 88KB for recursive
+ // DOM traversal, which encompasses 53 levels of recursion before
+ // the limiter kicks in, on x86_64-Linux. See Gecko bug 1376883.
+ stack_limit_checker: StackLimitChecker::new(
+ (STYLE_THREAD_STACK_SIZE_KB - 40) * 1024),
}
}
diff --git a/components/style/gecko/global_style_data.rs b/components/style/gecko/global_style_data.rs
index b5698e02193..8ba55191c03 100644
--- a/components/style/gecko/global_style_data.rs
+++ b/components/style/gecko/global_style_data.rs
@@ -9,6 +9,7 @@ use gecko_bindings::bindings;
use gecko_bindings::bindings::{Gecko_RegisterProfilerThread, Gecko_UnregisterProfilerThread};
use gecko_bindings::bindings::Gecko_SetJemallocThreadLocalArena;
use num_cpus;
+use parallel::STYLE_THREAD_STACK_SIZE_KB;
use rayon;
use shared_lock::SharedRwLock;
use std::cmp;
@@ -92,7 +93,9 @@ lazy_static! {
.breadth_first()
.thread_name(thread_name)
.start_handler(thread_startup)
- .exit_handler(thread_shutdown);
+ .exit_handler(thread_shutdown)
+ // Set thread stack size to 128KB. See Gecko bug 1376883.
+ .stack_size(STYLE_THREAD_STACK_SIZE_KB * 1024);
let pool = rayon::ThreadPool::new(configuration).ok();
pool
};
diff --git a/components/style/parallel.rs b/components/style/parallel.rs
index db5466e12a3..7e9e8154122 100644
--- a/components/style/parallel.rs
+++ b/components/style/parallel.rs
@@ -32,6 +32,9 @@ use std::borrow::Borrow;
use time;
use traversal::{DomTraversal, PerLevelTraversalData, PreTraverseToken};
+/// The minimum stack size for a thread in the styling pool, in kilobytes.
+pub const STYLE_THREAD_STACK_SIZE_KB: usize = 128;
+
/// The maximum number of child nodes that we will process as a single unit.
///
/// Larger values will increase style sharing cache hits and general DOM
@@ -77,7 +80,7 @@ pub fn traverse_dom<E, D>(traversal: &D,
let root_opaque = root.opaque();
traverse_nodes(&[root],
DispatchMode::TailCall,
- 0,
+ true,
root_opaque,
traversal_data,
scope,
@@ -132,7 +135,6 @@ fn create_thread_local_context<'scope, E, D>(
#[inline(always)]
#[allow(unsafe_code)]
fn top_down_dom<'a, 'scope, E, D>(nodes: &'a [SendNode<E::ConcreteNode>],
- recursion_depth: usize,
root: OpaqueNode,
mut traversal_data: PerLevelTraversalData,
scope: &'a rayon::Scope<'scope>,
@@ -144,6 +146,10 @@ fn top_down_dom<'a, 'scope, E, D>(nodes: &'a [SendNode<E::ConcreteNode>],
{
debug_assert!(nodes.len() <= WORK_UNIT_MAX);
+ // We set this below, when we have a borrow of the thread-local-context
+ // available.
+ let recursion_ok;
+
// Collect all the children of the elements in our work unit. This will
// contain the combined children of up to WORK_UNIT_MAX nodes, which may
// be numerous. As such, we store it in a large SmallVec to minimize heap-
@@ -154,6 +160,10 @@ fn top_down_dom<'a, 'scope, E, D>(nodes: &'a [SendNode<E::ConcreteNode>],
// a potential recursive call when we pass TailCall.
let mut tlc = tls.ensure(
|slot: &mut Option<ThreadLocalStyleContext<E>>| create_thread_local_context(traversal, slot));
+
+ // Check that we're not in danger of running out of stack.
+ recursion_ok = !tlc.stack_limit_checker.limit_exceeded();
+
let mut context = StyleContext {
shared: traversal.shared_context(),
thread_local: &mut *tlc,
@@ -202,7 +212,7 @@ fn top_down_dom<'a, 'scope, E, D>(nodes: &'a [SendNode<E::ConcreteNode>],
traversal_data_copy.current_dom_depth += 1;
traverse_nodes(&*discovered_child_nodes,
DispatchMode::NotTailCall,
- recursion_depth,
+ recursion_ok,
root,
traversal_data_copy,
scope,
@@ -232,7 +242,7 @@ fn top_down_dom<'a, 'scope, E, D>(nodes: &'a [SendNode<E::ConcreteNode>],
traversal_data.current_dom_depth += 1;
traverse_nodes(&discovered_child_nodes,
DispatchMode::TailCall,
- recursion_depth,
+ recursion_ok,
root,
traversal_data,
scope,
@@ -254,16 +264,10 @@ impl DispatchMode {
fn is_tail_call(&self) -> bool { matches!(*self, DispatchMode::TailCall) }
}
-// On x86_64-linux, a recursive cycle requires 3472 bytes of stack. Limiting
-// the depth to 150 therefore should keep the stack use by the recursion to
-// 520800 bytes, which would give a generously conservative margin should we
-// decide to reduce the thread stack size from its default of 2MB down to 1MB.
-const RECURSION_DEPTH_LIMIT: usize = 150;
-
#[inline]
fn traverse_nodes<'a, 'scope, E, D>(nodes: &[SendNode<E::ConcreteNode>],
mode: DispatchMode,
- recursion_depth: usize,
+ recursion_ok: bool,
root: OpaqueNode,
traversal_data: PerLevelTraversalData,
scope: &'a rayon::Scope<'scope>,
@@ -279,12 +283,11 @@ fn traverse_nodes<'a, 'scope, E, D>(nodes: &[SendNode<E::ConcreteNode>],
// want to actually dispatch the job as a tail call if there's nothing left
// in our local queue. Otherwise we need to return to it to maintain proper
// breadth-first ordering. We also need to take care to avoid stack
- // overflow due to excessive tail recursion. The stack overflow isn't
- // observable to content -- we're still completely correct, just not
- // using tail recursion any more. See bug 1368302.
- debug_assert!(recursion_depth <= RECURSION_DEPTH_LIMIT);
+ // overflow due to excessive tail recursion. The stack overflow avoidance
+ // isn't observable to content -- we're still completely correct, just not
+ // using tail recursion any more. See Gecko bugs 1368302 and 1376883.
let may_dispatch_tail = mode.is_tail_call() &&
- recursion_depth != RECURSION_DEPTH_LIMIT &&
+ recursion_ok &&
!pool.current_thread_has_pending_tasks().unwrap();
// In the common case, our children fit within a single work unit, in which
@@ -292,12 +295,12 @@ fn traverse_nodes<'a, 'scope, E, D>(nodes: &[SendNode<E::ConcreteNode>],
if nodes.len() <= WORK_UNIT_MAX {
let work = nodes.iter().cloned().collect::<WorkUnit<E::ConcreteNode>>();
if may_dispatch_tail {
- top_down_dom(&work, recursion_depth + 1, root,
+ top_down_dom(&work, root,
traversal_data, scope, pool, traversal, tls);
} else {
scope.spawn(move |scope| {
let work = work;
- top_down_dom(&work, 0, root,
+ top_down_dom(&work, root,
traversal_data, scope, pool, traversal, tls);
});
}
@@ -307,7 +310,7 @@ fn traverse_nodes<'a, 'scope, E, D>(nodes: &[SendNode<E::ConcreteNode>],
let traversal_data_copy = traversal_data.clone();
scope.spawn(move |scope| {
let n = nodes;
- top_down_dom(&*n, 0, root,
+ top_down_dom(&*n, root,
traversal_data_copy, scope, pool, traversal, tls)
});
}