75 files changed, 20840 insertions, 31229 deletions
diff --git a/third_party/webrender/webrender/src/api_resources.rs b/third_party/webrender/webrender/src/api_resources.rs
deleted file mode 100644
index 0a48858fc42..00000000000
--- a/third_party/webrender/webrender/src/api_resources.rs
+++ /dev/null
@@ -1,299 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-use crate::api::{BlobImageKey, ImageDescriptor, DirtyRect, TileSize};
-use crate::api::{BlobImageHandler, AsyncBlobImageRasterizer, BlobImageData, BlobImageParams};
-use crate::api::{BlobImageRequest, BlobImageDescriptor, BlobImageResources};
-use crate::api::{FontKey, FontTemplate, FontInstanceData, FontInstanceKey};
-use crate::api::SharedFontInstanceMap;
-use crate::api::units::*;
-use crate::render_api::{ResourceUpdate, TransactionMsg, AddFont};
-use crate::image_tiling::*;
-use crate::profiler;
-
-use std::collections::HashMap;
-use std::sync::Arc;
-
-/// We use this to generate the async blob rendering requests.
-struct BlobImageTemplate {
-    descriptor: ImageDescriptor,
-    tile_size: TileSize,
-    dirty_rect: BlobDirtyRect,
-    /// See ImageResource::visible_rect.
-    visible_rect: DeviceIntRect,
-    // If the active rect of the blob changes, this represents the
-    // range of tiles that remain valid. This must be taken into
-    // account in addition to the valid rect when submitting blob
-    // rasterization requests.
-    // `None` means the bounds have not changed (tiles are still valid).
-    // `Some(TileRange::zero())` means all of the tiles are invalid.
-    valid_tiles_after_bounds_change: Option<TileRange>,
-}
-
-struct FontResources {
-    templates: HashMap<FontKey, FontTemplate>,
-    instances: SharedFontInstanceMap,
-}
-
-pub struct ApiResources {
-    blob_image_templates: HashMap<BlobImageKey, BlobImageTemplate>,
-    pub blob_image_handler: Option<Box<dyn BlobImageHandler>>,
-    fonts: FontResources,
-}
-
-impl BlobImageResources for FontResources {
-    fn get_font_data(&self, key: FontKey) -> &FontTemplate {
-        self.templates.get(&key).unwrap()
-    }
-    fn get_font_instance_data(&self, key: FontInstanceKey) -> Option<FontInstanceData> {
-        self.instances.get_font_instance_data(key)
-    }
-}
-
-impl ApiResources {
-    pub fn new(
-        blob_image_handler: Option<Box<dyn BlobImageHandler>>,
-        instances: SharedFontInstanceMap,
-    ) -> Self {
-        ApiResources {
-            blob_image_templates: HashMap::new(),
-            blob_image_handler,
-            fonts: FontResources {
-                templates: HashMap::new(),
-                instances,
-            }
-        }
-    }
-
-    pub fn get_shared_font_instances(&self) -> SharedFontInstanceMap {
-        self.fonts.instances.clone()
-    }
-
-    pub fn update(&mut self, transaction: &mut TransactionMsg) {
-        let mut blobs_to_rasterize = Vec::new();
-        for update in &transaction.resource_updates {
-            match *update {
-                ResourceUpdate::AddBlobImage(ref img) => {
-                    self.blob_image_handler
-                        .as_mut()
-                        .unwrap()
-                        .add(img.key, Arc::clone(&img.data), &img.visible_rect, img.tile_size);
-
-                    self.blob_image_templates.insert(
-                        img.key,
-                        BlobImageTemplate {
-                            descriptor: img.descriptor,
-                            tile_size: img.tile_size,
-                            dirty_rect: DirtyRect::All,
-                            valid_tiles_after_bounds_change: None,
-                            visible_rect: img.visible_rect,
-                        },
-                    );
-                    blobs_to_rasterize.push(img.key);
-                }
-                ResourceUpdate::UpdateBlobImage(ref img) => {
-                    debug_assert_eq!(img.visible_rect.size, img.descriptor.size);
-                    self.update_blob_image(
-                        img.key,
-                        Some(&img.descriptor),
-                        Some(&img.dirty_rect),
-                        Some(Arc::clone(&img.data)),
-                        &img.visible_rect,
-                    );
-                    blobs_to_rasterize.push(img.key);
-                }
-                ResourceUpdate::DeleteBlobImage(key) => {
-                    transaction.use_scene_builder_thread = true;
-                    self.blob_image_templates.remove(&key);
-                    if let Some(ref mut handler) = self.blob_image_handler {
-                        handler.delete(key);
-                    }
-                }
-                ResourceUpdate::SetBlobImageVisibleArea(ref key, ref area) => {
-                    self.update_blob_image(*key, None, None, None, area);
-                    blobs_to_rasterize.push(*key);
-                }
-                ResourceUpdate::AddFont(ref font) => {
-                    match font {
-                        AddFont::Raw(key, bytes, index) => {
-                            self.fonts.templates.insert(
-                                *key,
-                                FontTemplate::Raw(Arc::clone(bytes), *index),
-                            );
-                        }
-                        AddFont::Native(key, native_font_handle) => {
-                            self.fonts.templates.insert(
-                                *key,
-                                FontTemplate::Native(native_font_handle.clone()),
-                            );
-                        }
-                    }
-                }
-                ResourceUpdate::AddFontInstance(ref instance) => {
-                    // TODO(nical): Don't clone these.
-                    self.fonts.instances.add_font_instance(
-                        instance.key,
-                        instance.font_key,
-                        instance.glyph_size,
-                        instance.options.clone(),
-                        instance.platform_options.clone(),
-                        instance.variations.clone(),
-                    );
-                }
-                ResourceUpdate::DeleteFont(key) => {
-                    transaction.use_scene_builder_thread = true;
-                    self.fonts.templates.remove(&key);
-                    if let Some(ref mut handler) = self.blob_image_handler {
-                        handler.delete_font(key);
-                    }
-                }
-                ResourceUpdate::DeleteFontInstance(key) => {
-                    transaction.use_scene_builder_thread = true;
-                    // We will delete from the shared font instance map in the resource cache
-                    // after scene swap.
-
-                    if let Some(ref mut r) = self.blob_image_handler {
-                        r.delete_font_instance(key);
-                    }
-                }
-                ResourceUpdate::DeleteImage(..) => {
-                    transaction.use_scene_builder_thread = true;
-                }
-                _ => {}
-            }
-        }
-
-        let (rasterizer, requests) = self.create_blob_scene_builder_requests(&blobs_to_rasterize);
-        transaction.profile.set(profiler::RASTERIZED_BLOBS, blobs_to_rasterize.len());
-        transaction.profile.set(profiler::RASTERIZED_BLOB_TILES, requests.len());
-        transaction.use_scene_builder_thread |= !requests.is_empty();
-        transaction.use_scene_builder_thread |= !transaction.scene_ops.is_empty();
-        transaction.blob_rasterizer = rasterizer;
-        transaction.blob_requests = requests;
-    }
-
-    pub fn enable_multithreading(&mut self, enable: bool) {
-        if let Some(ref mut handler) = self.blob_image_handler {
-            handler.enable_multithreading(enable);
-        }
-    }
-
-    fn update_blob_image(
-        &mut self,
-        key: BlobImageKey,
-        descriptor: Option<&ImageDescriptor>,
-        dirty_rect: Option<&BlobDirtyRect>,
-        data: Option<Arc<BlobImageData>>,
-        visible_rect: &DeviceIntRect,
-    ) {
-        if let Some(data) = data {
-            let dirty_rect = dirty_rect.unwrap();
-            self.blob_image_handler.as_mut().unwrap().update(key, data, visible_rect, dirty_rect);
-        }
-
-        let image = self.blob_image_templates
-            .get_mut(&key)
-            .expect("Attempt to update non-existent blob image");
-
-        let mut valid_tiles_after_bounds_change = compute_valid_tiles_if_bounds_change(
-            &image.visible_rect,
-            visible_rect,
-            image.tile_size,
-        );
-
-        match (image.valid_tiles_after_bounds_change, valid_tiles_after_bounds_change) {
-            (Some(old), Some(ref mut new)) => {
-                *new = new.intersection(&old).unwrap_or_else(TileRange::zero);
-            }
-            (Some(old), None) => {
-                valid_tiles_after_bounds_change = Some(old);
-            }
-            _ => {}
-        }
-
-        let blob_size = visible_rect.size;
-
-        if let Some(descriptor) = descriptor {
-            image.descriptor = *descriptor;
-        } else {
-            // make sure the descriptor size matches the visible rect.
-            // This might not be necessary but let's stay on the safe side.
-            image.descriptor.size = blob_size;
-        }
-
-        if let Some(dirty_rect) = dirty_rect {
-            image.dirty_rect = image.dirty_rect.union(dirty_rect);
-        }
-
-        image.valid_tiles_after_bounds_change = valid_tiles_after_bounds_change;
-        image.visible_rect = *visible_rect;
-    }
-
-    pub fn create_blob_scene_builder_requests(
-        &mut self,
-        keys: &[BlobImageKey]
-    ) -> (Option<Box<dyn AsyncBlobImageRasterizer>>, Vec<BlobImageParams>) {
-        if self.blob_image_handler.is_none() || keys.is_empty() {
-            return (None, Vec::new());
-        }
-
-        let mut blob_request_params = Vec::new();
-        for key in keys {
-            let template = self.blob_image_templates.get_mut(key).unwrap();
-
-            // If we know that only a portion of the blob image is in the viewport,
-            // only request these visible tiles since blob images can be huge.
-            let tiles = compute_tile_range(
-                &template.visible_rect,
-                template.tile_size,
-            );
-
-            // Don't request tiles that weren't invalidated.
-            let dirty_tiles = match template.dirty_rect {
-                DirtyRect::Partial(dirty_rect) => {
-                    compute_tile_range(
-                        &dirty_rect.cast_unit(),
-                        template.tile_size,
-                    )
-                }
-                DirtyRect::All => tiles,
-            };
-
-            for_each_tile_in_range(&tiles, |tile| {
-                let still_valid = template.valid_tiles_after_bounds_change
-                    .map(|valid_tiles| valid_tiles.contains(tile))
-                    .unwrap_or(true);
-
-                if still_valid && !dirty_tiles.contains(tile) {
-                    return;
-                }
-
-                let descriptor = BlobImageDescriptor {
-                    rect: compute_tile_rect(
-                        &template.visible_rect,
-                        template.tile_size,
-                        tile,
-                    ).cast_unit(),
-                    format: template.descriptor.format,
-                };
-
-                assert!(descriptor.rect.size.width > 0 && descriptor.rect.size.height > 0);
-                blob_request_params.push(
-                    BlobImageParams {
-                        request: BlobImageRequest { key: *key, tile },
-                        descriptor,
-                        dirty_rect: DirtyRect::All,
-                    }
-                );
-            });
-
-            template.dirty_rect = DirtyRect::empty();
-            template.valid_tiles_after_bounds_change = None;
-        }
-
-        let handler = self.blob_image_handler.as_mut().unwrap();
-        handler.prepare_resources(&self.fonts, &blob_request_params);
-        (Some(handler.create_blob_rasterizer()), blob_request_params)
-    }
-}
diff --git a/third_party/webrender/webrender/src/batch.rs b/third_party/webrender/webrender/src/batch.rs
index 588afdfea81..1ee371c6531 100644
--- a/third_party/webrender/webrender/src/batch.rs
+++ b/third_party/webrender/webrender/src/batch.rs
@@ -2,38 +2,35 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{AlphaType, ClipMode, ImageRendering, ImageBufferKind};
-use api::{FontInstanceFlags, YuvColorSpace, YuvFormat, ColorDepth, ColorRange, PremultipliedColorF};
+use api::{AlphaType, ClipMode, ExternalImageType, ImageRendering, EdgeAaSegmentMask};
+use api::{YuvColorSpace, YuvFormat, ColorDepth, ColorRange, PremultipliedColorF};
 use api::units::*;
 use crate::clip::{ClipDataStore, ClipNodeFlags, ClipNodeRange, ClipItemKind, ClipStore};
 use crate::spatial_tree::{SpatialTree, ROOT_SPATIAL_NODE_INDEX, SpatialNodeIndex, CoordinateSystemId};
 use crate::composite::{CompositeState};
-use crate::glyph_rasterizer::{GlyphFormat, SubpixelDirection};
-use crate::gpu_cache::{GpuBlockData, GpuCache, GpuCacheAddress};
+use crate::glyph_rasterizer::GlyphFormat;
+use crate::gpu_cache::{GpuBlockData, GpuCache, GpuCacheHandle, GpuCacheAddress};
 use crate::gpu_types::{BrushFlags, BrushInstance, PrimitiveHeaders, ZBufferId, ZBufferIdGenerator};
-use crate::gpu_types::{SplitCompositeInstance};
+use crate::gpu_types::{ClipMaskInstance, SplitCompositeInstance, BrushShaderKind};
 use crate::gpu_types::{PrimitiveInstanceData, RasterizationSpace, GlyphInstance};
 use crate::gpu_types::{PrimitiveHeader, PrimitiveHeaderIndex, TransformPaletteId, TransformPalette};
-use crate::gpu_types::{ImageBrushData, get_shader_opacity, BoxShadowData};
-use crate::gpu_types::{ClipMaskInstanceCommon, ClipMaskInstanceImage, ClipMaskInstanceRect, ClipMaskInstanceBoxShadow};
-use crate::internal_types::{FastHashMap, Swizzle, TextureSource, Filter};
-use crate::picture::{ClusterFlags, Picture3DContext, PictureCompositeMode, PicturePrimitive, SubSliceIndex};
-use crate::prim_store::{DeferredResolve, PrimitiveInstanceKind, ClipData};
-use crate::prim_store::{PrimitiveInstance, PrimitiveOpacity, SegmentInstanceIndex};
-use crate::prim_store::{BrushSegment, ClipMaskKind, ClipTaskIndex};
-use crate::prim_store::VECS_PER_SEGMENT;
+use crate::gpu_types::{ImageBrushData, get_shader_opacity};
+use crate::internal_types::{FastHashMap, SavedTargetIndex, Swizzle, TextureSource, Filter};
+use crate::picture::{Picture3DContext, PictureCompositeMode, PicturePrimitive};
+use crate::prim_store::{DeferredResolve, PrimitiveInstanceKind, PrimitiveVisibilityIndex, PrimitiveVisibilityMask};
+use crate::prim_store::{VisibleGradientTile, PrimitiveInstance, PrimitiveOpacity, SegmentInstanceIndex};
+use crate::prim_store::{BrushSegment, ClipMaskKind, ClipTaskIndex, PrimitiveVisibility, PrimitiveVisibilityFlags};
+use crate::prim_store::{VECS_PER_SEGMENT, SpaceMapper};
+use crate::prim_store::image::ImageSource;
 use crate::render_target::RenderTargetContext;
 use crate::render_task_graph::{RenderTaskId, RenderTaskGraph};
 use crate::render_task::RenderTaskAddress;
-use crate::renderer::{BlendMode, ShaderColorMode};
-use crate::renderer::MAX_VERTEX_TEXTURE_WIDTH;
-use crate::resource_cache::{GlyphFetchResult, ImageProperties, ImageRequest, ResourceCache};
-use crate::space::SpaceMapper;
-use crate::visibility::{PrimitiveVisibilityFlags, VisibilityState};
+use crate::renderer::{BlendMode, ImageBufferKind, ShaderColorMode};
+use crate::renderer::{BLOCKS_PER_UV_RECT, MAX_VERTEX_TEXTURE_WIDTH};
+use crate::resource_cache::{CacheItem, GlyphFetchResult, ImageRequest, ResourceCache};
 use smallvec::SmallVec;
 use std::{f32, i32, usize};
-use crate::util::{project_rect, MaxRect, MatrixHelpers, TransformedRectKind};
-use crate::segment::EdgeAaSegmentMask;
+use crate::util::{project_rect, TransformedRectKind};
 
 // Special sentinel value recognized by the shader. It is considered to be
 // a dummy task that doesn't mask out anything.
@@ -46,22 +43,7 @@ const INVALID_SEGMENT_INDEX: i32 = 0xffff;
 const CLIP_RECTANGLE_TILE_SIZE: i32 = 128;
 
 /// The minimum size of a clip mask before trying to draw in tiles.
-const CLIP_RECTANGLE_AREA_THRESHOLD: f32 = (CLIP_RECTANGLE_TILE_SIZE * CLIP_RECTANGLE_TILE_SIZE * 4) as f32;
-
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-#[derive(Copy, Clone, Debug)]
-pub struct BatchFilter {
-    pub rect_in_pic_space: PictureRect,
-    pub sub_slice_index: SubSliceIndex,
-}
-
-impl BatchFilter {
-    pub fn matches(&self, other: &BatchFilter) -> bool {
-        self.sub_slice_index == other.sub_slice_index &&
-        self.rect_in_pic_space.intersects(&other.rect_in_pic_space)
-    }
-}
+const CLIP_RECTANGLE_AREA_THRESHOLD: i32 = CLIP_RECTANGLE_TILE_SIZE * CLIP_RECTANGLE_TILE_SIZE * 4;
 
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
@@ -72,9 +54,12 @@ pub enum BrushBatchKind {
     Blend,
     MixBlend {
         task_id: RenderTaskId,
+        source_id: RenderTaskId,
         backdrop_id: RenderTaskId,
     },
     YuvImage(ImageBufferKind, YuvFormat, ColorDepth, YuvColorSpace, ColorRange),
+    ConicGradient,
+    RadialGradient,
     LinearGradient,
     Opacity,
 }
@@ -88,49 +73,20 @@ pub enum BatchKind {
     Brush(BrushBatchKind),
 }
 
-/// Input textures for a primitive, without consideration of clip mask
-#[derive(Copy, Clone, Debug)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct TextureSet {
-    pub colors: [TextureSource; 3],
-}
-
-impl TextureSet {
-    const UNTEXTURED: TextureSet = TextureSet {
-        colors: [
-            TextureSource::Invalid,
-            TextureSource::Invalid,
-            TextureSource::Invalid,
-        ],
-    };
-
-    /// A textured primitive
-    fn prim_textured(
-        color: TextureSource,
-    ) -> Self {
-        TextureSet {
-            colors: [
-                color,
-                TextureSource::Invalid,
-                TextureSource::Invalid,
-            ],
-        }
-    }
-
-    fn is_compatible_with(&self, other: &TextureSet) -> bool {
-        self.colors[0].is_compatible(&other.colors[0]) &&
-        self.colors[1].is_compatible(&other.colors[1]) &&
-        self.colors[2].is_compatible(&other.colors[2])
-    }
-}
-
-impl TextureSource {
-    fn combine(&self, other: TextureSource) -> TextureSource {
-        if other == TextureSource::Invalid {
-            *self
-        } else {
-            other
+impl BatchKind {
+    fn shader_kind(&self) -> BrushShaderKind {
+        match self {
+            BatchKind::Brush(BrushBatchKind::Solid) => BrushShaderKind::Solid,
+            BatchKind::Brush(BrushBatchKind::Image(..)) => BrushShaderKind::Image,
+            BatchKind::Brush(BrushBatchKind::LinearGradient) => BrushShaderKind::LinearGradient,
+            BatchKind::Brush(BrushBatchKind::RadialGradient) => BrushShaderKind::RadialGradient,
+            BatchKind::Brush(BrushBatchKind::ConicGradient) => BrushShaderKind::ConicGradient,
+            BatchKind::Brush(BrushBatchKind::Blend) => BrushShaderKind::Blend,
+            BatchKind::Brush(BrushBatchKind::MixBlend { .. }) => BrushShaderKind::MixBlend,
+            BatchKind::Brush(BrushBatchKind::YuvImage(..)) => BrushShaderKind::Yuv,
+            BatchKind::Brush(BrushBatchKind::Opacity) => BrushShaderKind::Opacity,
+            BatchKind::TextRun(..) => BrushShaderKind::Text,
+            _ => BrushShaderKind::None,
         }
     }
 }
@@ -141,76 +97,34 @@ impl TextureSource {
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct BatchTextures {
-    pub input: TextureSet,
-    pub clip_mask: TextureSource,
+    pub colors: [TextureSource; 3],
 }
 
 impl BatchTextures {
-    /// An empty batch textures (no binding slots set)
-    pub fn empty() -> BatchTextures {
+    pub fn no_texture() -> Self {
         BatchTextures {
-            input: TextureSet::UNTEXTURED,
-            clip_mask: TextureSource::Invalid,
+            colors: [TextureSource::Invalid; 3],
         }
     }
 
-    /// A textured primitive with optional clip mask
-    pub fn prim_textured(
-        color: TextureSource,
-        clip_mask: TextureSource,
-    ) -> BatchTextures {
+    pub fn render_target_cache() -> Self {
         BatchTextures {
-            input: TextureSet::prim_textured(color),
-            clip_mask,
-        }
-    }
-
-    /// An untextured primitive with optional clip mask
-    pub fn prim_untextured(
-        clip_mask: TextureSource,
-    ) -> BatchTextures {
-        BatchTextures {
-            input: TextureSet::UNTEXTURED,
-            clip_mask,
-        }
-    }
-
-    /// A composite style effect with single input texture
-    pub fn composite_rgb(
-        texture: TextureSource,
-    ) -> BatchTextures {
-        BatchTextures {
-            input: TextureSet {
-                colors: [
-                    texture,
-                    TextureSource::Invalid,
-                    TextureSource::Invalid,
-                ],
-            },
-            clip_mask: TextureSource::Invalid,
+            colors: [
+                TextureSource::PrevPassColor,
+                TextureSource::PrevPassAlpha,
+                TextureSource::Invalid,
+            ],
         }
     }
 
-    /// A composite style effect with up to 3 input textures
-    pub fn composite_yuv(
-        color0: TextureSource,
-        color1: TextureSource,
-        color2: TextureSource,
-    ) -> BatchTextures {
+    pub fn color(texture: TextureSource) -> Self {
         BatchTextures {
-            input: TextureSet {
-                colors: [color0, color1, color2],
-            },
-            clip_mask: TextureSource::Invalid,
+            colors: [texture, texture, TextureSource::Invalid],
         }
     }
 
     pub fn is_compatible_with(&self, other: &BatchTextures) -> bool {
-        if !self.clip_mask.is_compatible(&other.clip_mask) {
-            return false;
-        }
-
-        self.input.is_compatible_with(&other.input)
+        self.colors.iter().zip(other.colors.iter()).all(|(t1, t2)| textures_compatible(*t1, *t2))
     }
 
     pub fn combine_textures(&self, other: BatchTextures) -> Option<BatchTextures> {
@@ -218,24 +132,17 @@ impl BatchTextures {
             return None;
         }
 
-        let mut new_textures = BatchTextures::empty();
-
-        new_textures.clip_mask = self.clip_mask.combine(other.clip_mask);
-
-        for i in 0 .. 3 {
-            new_textures.input.colors[i] = self.input.colors[i].combine(other.input.colors[i]);
+        let mut new_textures = BatchTextures::no_texture();
+        for (i, (color, other_color)) in self.colors.iter().zip(other.colors.iter()).enumerate() {
+            // If these textures are compatible, for each source either both sources are invalid or only one is not invalid.
+            new_textures.colors[i] = if *color == TextureSource::Invalid {
+                *other_color
+            } else {
+                *color
+            };
         }
-
         Some(new_textures)
     }
-
-    fn merge(&mut self, other: &BatchTextures) {
-        self.clip_mask = self.clip_mask.combine(other.clip_mask);
-
-        for (s, o) in self.input.colors.iter_mut().zip(other.input.colors.iter()) {
-            *s = s.combine(*o);
-        }
-    }
 }
 
 #[derive(Copy, Clone, Debug)]
@@ -261,76 +168,29 @@ impl BatchKey {
     }
 }
 
-pub struct BatchRects {
-    /// Union of all of the batch's item rects.
-    ///
-    /// Very often we can skip iterating over item rects by testing against
-    /// this one first.
-    batch: PictureRect,
-    /// When the batch rectangle above isn't a good enough approximation, we
-    /// store per item rects.
-    items: Option<Vec<PictureRect>>,
+#[inline]
+fn textures_compatible(t1: TextureSource, t2: TextureSource) -> bool {
+    t1 == TextureSource::Invalid || t2 == TextureSource::Invalid || t1 == t2
 }
 
-impl BatchRects {
-    fn new() -> Self {
-        BatchRects {
-            batch: PictureRect::zero(),
-            items: None,
-        }
-    }
-
-    #[inline]
-    fn add_rect(&mut self, rect: &PictureRect) {
-        let union = self.batch.union(rect);
-        // If we have already started storing per-item rects, continue doing so.
-        // Otherwise, check whether only storing the batch rect is a good enough
-        // approximation.
-        if let Some(items) = &mut self.items {
-            items.push(*rect);
-        } else if self.batch.area() + rect.area() < union.area() {
-            let mut items = Vec::with_capacity(16);
-            items.push(self.batch);
-            items.push(*rect);
-            self.items = Some(items);
-        }
-
-        self.batch = union;
-    }
-
-    #[inline]
-    fn intersects(&mut self, rect: &PictureRect) -> bool {
-        if !self.batch.intersects(rect) {
-            return false;
-        }
-
-        if let Some(items) = &self.items {
-            items.iter().any(|item| item.intersects(rect))
-        } else {
-            // If we don't have per-item rects it means the batch rect is a good
-            // enough approximation and we didn't bother storing per-rect items.
-            true
-        }
-    }
-}
-
-
 pub struct AlphaBatchList {
     pub batches: Vec<PrimitiveBatch>,
-    pub batch_rects: Vec<BatchRects>,
+    pub item_rects: Vec<Vec<PictureRect>>,
     current_batch_index: usize,
     current_z_id: ZBufferId,
     break_advanced_blend_batches: bool,
+    lookback_count: usize,
 }
 
 impl AlphaBatchList {
-    fn new(break_advanced_blend_batches: bool, preallocate: usize) -> Self {
+    fn new(break_advanced_blend_batches: bool, lookback_count: usize) -> Self {
         AlphaBatchList {
-            batches: Vec::with_capacity(preallocate),
-            batch_rects: Vec::with_capacity(preallocate),
+            batches: Vec::new(),
+            item_rects: Vec::new(),
             current_z_id: ZBufferId::invalid(),
             current_batch_index: usize::MAX,
             break_advanced_blend_batches,
+            lookback_count,
         }
     }
 
@@ -341,7 +201,7 @@ impl AlphaBatchList {
         self.current_batch_index = usize::MAX;
         self.current_z_id = ZBufferId::invalid();
         self.batches.clear();
-        self.batch_rects.clear();
+        self.item_rects.clear();
     }
 
     pub fn set_params_and_get_batch(
@@ -361,12 +221,14 @@ impl AlphaBatchList {
 
             match key.blend_mode {
                 BlendMode::SubpixelWithBgColor => {
-                    for (batch_index, batch) in self.batches.iter().enumerate().rev() {
+                    'outer_multipass: for (batch_index, batch) in self.batches.iter().enumerate().rev().take(self.lookback_count) {
                         // Some subpixel batches are drawn in two passes. Because of this, we need
                         // to check for overlaps with every batch (which is a bit different
                         // than the normal batching below).
-                        if self.batch_rects[batch_index].intersects(z_bounding_rect) {
-                            break;
+                        for item_rect in &self.item_rects[batch_index] {
+                            if item_rect.intersects(z_bounding_rect) {
+                                break 'outer_multipass;
+                            }
                         }
 
                         if batch.key.is_compatible_with(&key) {
@@ -379,7 +241,7 @@ impl AlphaBatchList {
                     // don't try to find a batch
                 }
                 _ => {
-                    for (batch_index, batch) in self.batches.iter().enumerate().rev() {
+                    'outer_default: for (batch_index, batch) in self.batches.iter().enumerate().rev().take(self.lookback_count) {
                         // For normal batches, we only need to check for overlaps for batches
                         // other than the first batch we consider. If the first batch
                         // is compatible, then we know there isn't any potential overlap
@@ -390,40 +252,33 @@ impl AlphaBatchList {
                         }
 
                         // check for intersections
-                        if self.batch_rects[batch_index].intersects(z_bounding_rect) {
-                            break;
+                        for item_rect in &self.item_rects[batch_index] {
+                            if item_rect.intersects(z_bounding_rect) {
+                                break 'outer_default;
+                            }
                         }
                     }
                 }
             }
 
             if selected_batch_index.is_none() {
-                // Text runs tend to have a lot of instances per batch, causing a lot of reallocation
-                // churn as items are added one by one, so we give it a head start. Ideally we'd start
-                // with a larger number, closer to 1k but in some bad cases with lots of batch break
-                // we would be wasting a lot of memory.
-                // Generally it is safe to preallocate small-ish values for other batch kinds because
-                // the items are small and there are no zero-sized batches so there will always be
-                // at least one allocation.
-                let prealloc = match key.kind {
-                    BatchKind::TextRun(..) => 128,
-                    _ => 16,
-                };
-                let mut new_batch = PrimitiveBatch::new(key);
-                new_batch.instances.reserve(prealloc);
+                let new_batch = PrimitiveBatch::new(key);
                 selected_batch_index = Some(self.batches.len());
                 self.batches.push(new_batch);
-                self.batch_rects.push(BatchRects::new());
+                self.item_rects.push(Vec::new());
             }
 
             self.current_batch_index = selected_batch_index.unwrap();
-            self.batch_rects[self.current_batch_index].add_rect(z_bounding_rect);
+            self.item_rects[self.current_batch_index].push(*z_bounding_rect);
             self.current_z_id = z_id;
+        } else if cfg!(debug_assertions) {
+            // If it's a different segment of the same (larger) primitive, we expect the bounding box
+            // to be the same - coming from the primitive itself, not the segment.
+            assert_eq!(self.item_rects[self.current_batch_index].last(), Some(z_bounding_rect));
         }
 
         let batch = &mut self.batches[self.current_batch_index];
         batch.features |= features;
-        batch.key.textures.merge(&key.textures);
 
         &mut batch.instances
     }
@@ -499,7 +354,6 @@ impl OpaqueBatchList {
 
         let batch = &mut self.batches[self.current_batch_index];
         batch.features |= features;
-        batch.key.textures.merge(&key.textures);
 
         &mut batch.instances
     }
@@ -540,8 +394,6 @@ bitflags! {
         const ALPHA_PASS = 1 << 0;
         const ANTIALIASING = 1 << 1;
         const REPETITION = 1 << 2;
-        /// Indicates a primitive in this batch may use a clip mask.
-        const CLIP_MASK = 1 << 3;
     }
 }
 
@@ -557,7 +409,6 @@ impl PrimitiveBatch {
     fn merge(&mut self, other: PrimitiveBatch) {
         self.instances.extend(other.instances);
         self.features |= other.features;
-        self.key.textures.merge(&other.key.textures);
     }
 }
 
@@ -635,7 +486,7 @@ impl AlphaBatchContainer {
 /// texture set and one user data field.
 #[derive(Debug, Copy, Clone)]
 struct SegmentInstanceData {
-    textures: TextureSet,
+    textures: BatchTextures,
     specific_resource_address: i32,
 }
 
@@ -645,7 +496,7 @@ pub struct AlphaBatchBuilder {
     pub opaque_batch_list: OpaqueBatchList,
     pub render_task_id: RenderTaskId,
     render_task_address: RenderTaskAddress,
-    pub batch_filter: Option<BatchFilter>,
+    pub vis_mask: PrimitiveVisibilityMask,
 }
 
 impl AlphaBatchBuilder {
@@ -655,19 +506,18 @@ impl AlphaBatchBuilder {
         lookback_count: usize,
         render_task_id: RenderTaskId,
         render_task_address: RenderTaskAddress,
-        batch_filter: Option<BatchFilter>,
-        preallocate: usize,
+        vis_mask: PrimitiveVisibilityMask,
     ) -> Self {
         // The threshold for creating a new batch is
         // one quarter the screen size.
         let batch_area_threshold = (screen_size.width * screen_size.height) as f32 / 4.0;
 
         AlphaBatchBuilder {
-            alpha_batch_list: AlphaBatchList::new(break_advanced_blend_batches, preallocate),
+            alpha_batch_list: AlphaBatchList::new(break_advanced_blend_batches, lookback_count),
             opaque_batch_list: OpaqueBatchList::new(batch_area_threshold, lookback_count),
             render_task_id,
             render_task_address,
-            batch_filter,
+            vis_mask,
         }
     }
 
@@ -679,15 +529,6 @@ impl AlphaBatchBuilder {
         self.opaque_batch_list.clear();
     }
 
-    /// Return true if a primitive occupying `rect_in_pic_space` should be
-    /// added this batcher.
-    fn should_draw(
-        &self,
-        prim_filter: &BatchFilter,
-    ) -> bool {
-        self.batch_filter.map_or(true, |f| f.matches(prim_filter))
-    }
-
     pub fn build(
         mut self,
         batch_containers: &mut Vec<AlphaBatchContainer>,
@@ -739,10 +580,7 @@ impl AlphaBatchBuilder {
             BlendMode::SubpixelConstantTextColor(..) |
             BlendMode::SubpixelWithBgColor |
             BlendMode::SubpixelDualSource |
-            BlendMode::Advanced(_) |
-            BlendMode::MultiplyDualSource |
-            BlendMode::Screen |
-            BlendMode::Exclusion => {
+            BlendMode::Advanced(_) => {
                 self.alpha_batch_list
                     .set_params_and_get_batch(key, features, bounding_rect, z_id)
             }
@@ -785,10 +623,10 @@ impl BatchBuilder {
         brush_flags: BrushFlags,
         prim_header_index: PrimitiveHeaderIndex,
         resource_address: i32,
-        batch_filter: &BatchFilter,
+        prim_vis_mask: PrimitiveVisibilityMask,
     ) {
         for batcher in &mut self.batchers {
-            if batcher.should_draw(batch_filter) {
+            if batcher.vis_mask.intersects(prim_vis_mask) {
                 let render_task_address = batcher.render_task_address;
 
                 let instance = BrushInstance {
@@ -799,6 +637,7 @@ impl BatchBuilder {
                     brush_flags,
                     prim_header_index,
                     resource_address,
+                    brush_kind: batch_key.kind.shader_kind(),
                 };
 
                 batcher.push_single_instance(
@@ -815,20 +654,19 @@ impl BatchBuilder {
     fn add_split_composite_instance_to_batches(
         &mut self,
         batch_key: BatchKey,
-        features: BatchFeatures,
         bounding_rect: &PictureRect,
         z_id: ZBufferId,
         prim_header_index: PrimitiveHeaderIndex,
         polygons_address: GpuCacheAddress,
-        batch_filter: &BatchFilter,
+        prim_vis_mask: PrimitiveVisibilityMask,
     ) {
         for batcher in &mut self.batchers {
-            if batcher.should_draw(batch_filter) {
+            if batcher.vis_mask.intersects(prim_vis_mask) {
                 let render_task_address = batcher.render_task_address;
 
                 batcher.push_single_instance(
                     batch_key,
-                    features,
+                    BatchFeatures::empty(),
                     bounding_rect,
                     z_id,
                     PrimitiveInstanceData::from(SplitCompositeInstance {
@@ -866,11 +704,9 @@ impl BatchBuilder {
         composite_state: &mut CompositeState,
     ) {
         for cluster in &pic.prim_list.clusters {
-            if !cluster.flags.contains(ClusterFlags::IS_VISIBLE) {
-                continue;
-            }
-            for prim_instance in &pic.prim_list.prim_instances[cluster.prim_range()] {
-                // Add each run in this picture to the batch.
+            profile_scope!("cluster");
+            // Add each run in this picture to the batch.
+            for prim_instance in &cluster.prim_instances {
                 self.add_prim_to_batch(
                     prim_instance,
                     cluster.spatial_node_index,
@@ -889,6 +725,73 @@ impl BatchBuilder {
         }
     }
 
+    // If an image is being drawn as a compositor surface, we don't want
+    // to draw the surface itself into the tile. Instead, we draw a transparent
+    // rectangle that writes to the z-buffer where this compositor surface is.
+    // That ensures we 'cut out' the part of the tile that has the compositor
+    // surface on it, allowing us to draw this tile as an overlay on top of
+    // the compositor surface.
+    // TODO(gw): There's a slight performance cost to doing this cutout rectangle
+    //           if we end up not needing to use overlay mode. Consider skipping
+    //           the cutout completely in this path.
+    fn emit_placeholder(
+        &mut self,
+        prim_rect: LayoutRect,
+        prim_info: &PrimitiveVisibility,
+        z_id: ZBufferId,
+        transform_id: TransformPaletteId,
+        batch_features: BatchFeatures,
+        ctx: &RenderTargetContext,
+        gpu_cache: &mut GpuCache,
+        render_tasks: &RenderTaskGraph,
+        prim_headers: &mut PrimitiveHeaders,
+    ) {
+        let batch_params = BrushBatchParameters::shared(
+            BrushBatchKind::Solid,
+            BatchTextures::no_texture(),
+            [get_shader_opacity(0.0), 0, 0, 0],
+            0,
+        );
+
+        let prim_cache_address = gpu_cache.get_address(
+            &ctx.globals.default_transparent_rect_handle,
+        );
+
+        let prim_header = PrimitiveHeader {
+            local_rect: prim_rect,
+            local_clip_rect: prim_info.combined_local_clip_rect,
+            specific_prim_address: prim_cache_address,
+            transform_id,
+        };
+
+        let prim_header_index = prim_headers.push(
+            &prim_header,
+            z_id,
+            batch_params.prim_user_data,
+        );
+
+        let bounding_rect = &prim_info.clip_chain.pic_clip_rect;
+        let transform_kind = transform_id.transform_kind();
+        let prim_vis_mask = prim_info.visibility_mask;
+
+        self.add_segmented_prim_to_batch(
+            None,
+            PrimitiveOpacity::translucent(),
+            &batch_params,
+            BlendMode::None,
+            BlendMode::None,
+            batch_features,
+            prim_header_index,
+            bounding_rect,
+            transform_kind,
+            render_tasks,
+            z_id,
+            prim_info.clip_task_index,
+            prim_vis_mask,
+            ctx,
+        );
+    }
+
     // Adds a primitive to a batch.
     // It can recursively call itself in some situations, for
     // example if it encounters a picture where the items
@@ -908,137 +811,15 @@ impl BatchBuilder {
         z_generator: &mut ZBufferIdGenerator,
         composite_state: &mut CompositeState,
     ) {
-        let (batch_filter, vis_flags) = match prim_instance.vis.state {
-            VisibilityState::Culled => {
-                return;
-            }
-            VisibilityState::Unset | VisibilityState::Coarse { .. } => {
-                panic!("bug: invalid visibility state");
-            }
-            VisibilityState::Detailed { ref filter, vis_flags } => {
-                (filter, vis_flags)
-            }
-            VisibilityState::PassThrough => {
-                let pic_index = match prim_instance.kind {
-                    PrimitiveInstanceKind::Picture { pic_index, .. } => pic_index,
-                    _ => unreachable!("Only picture prims can be pass through"),
-                };
-                let picture = &ctx.prim_store.pictures[pic_index.0];
-
-                match picture.context_3d {
-                    // Convert all children of the 3D hierarchy root into batches.
-                    Picture3DContext::In { root_data: Some(ref list), .. } => {
-                        for child in list {
-                            let child_prim_instance = &picture.prim_list.prim_instances[child.anchor.instance_index];
-                            let child_prim_info = &child_prim_instance.vis;
-
-                            let child_pic_index = match child_prim_instance.kind {
-                                PrimitiveInstanceKind::Picture { pic_index, .. } => pic_index,
-                                _ => unreachable!(),
-                            };
-                            let pic = &ctx.prim_store.pictures[child_pic_index.0];
-
-                            let child_batch_filter = match child_prim_info.state {
-                                VisibilityState::Detailed { ref filter, .. } => filter,
-                                _ => panic!("bug: culled prim should not be in child list"),
-                            };
-
-                            // Get clip task, if set, for the picture primitive.
-                            let (child_clip_task_address, clip_mask_texture_id) = ctx.get_prim_clip_task_and_texture(
-                                child_prim_info.clip_task_index,
-                                render_tasks,
-                            ).unwrap();
-
-                            let prim_header = PrimitiveHeader {
-                                local_rect: pic.precise_local_rect,
-                                local_clip_rect: child_prim_info.combined_local_clip_rect,
-                                specific_prim_address: GpuCacheAddress::INVALID,
-                                transform_id: transforms
-                                    .get_id(
-                                        child.spatial_node_index,
-                                        root_spatial_node_index,
-                                        ctx.spatial_tree,
-                                    ),
-                            };
-
-                            let raster_config = pic
-                                .raster_config
-                                .as_ref()
-                                .expect("BUG: 3d primitive was not assigned a surface");
-
-                            let child_pic_task_id = pic
-                                .primary_render_task_id
-                                .unwrap();
-
-                            let (uv_rect_address, texture) = render_tasks.resolve_location(
-                                child_pic_task_id,
-                                gpu_cache,
-                            ).unwrap();
-                            let textures = BatchTextures::prim_textured(
-                                texture,
-                                clip_mask_texture_id,
-                            );
-
-                            // Need a new z-id for each child preserve-3d context added
-                            // by this inner loop.
-                            let z_id = z_generator.next();
-
-                            let prim_header_index = prim_headers.push(&prim_header, z_id, [
-                                uv_rect_address.as_int(),
-                                if raster_config.establishes_raster_root { 1 } else { 0 },
-                                0,
-                                child_clip_task_address.0 as i32,
-                            ]);
-
-                            let key = BatchKey::new(
-                                BatchKind::SplitComposite,
-                                BlendMode::PremultipliedAlpha,
-                                textures,
-                            );
-
-                            self.add_split_composite_instance_to_batches(
-                                key,
-                                BatchFeatures::CLIP_MASK,
-                                &child_prim_info.clip_chain.pic_clip_rect,
-                                z_id,
-                                prim_header_index,
-                                child.gpu_address,
-                                child_batch_filter,
-                            );
-                        }
-                    }
-                    // Ignore the 3D pictures that are not in the root of preserve-3D
-                    // hierarchy, since we process them with the root.
-                    Picture3DContext::In { root_data: None, .. } => {
-                        unreachable!();
-                    }
-                    // Proceed for non-3D pictures.
-                    Picture3DContext::Out => {
-                        // If this picture is being drawn into an existing target (i.e. with
-                        // no composition operation), recurse and add to the current batch list.
-                        self.add_pic_to_batch(
-                            picture,
-                            ctx,
-                            gpu_cache,
-                            render_tasks,
-                            deferred_resolves,
-                            prim_headers,
-                            transforms,
-                            root_spatial_node_index,
-                            surface_spatial_node_index,
-                            z_generator,
-                            composite_state,
-                        );
-                    }
-                }
-
-                return;
-            }
-        };
+        if prim_instance.visibility_info == PrimitiveVisibilityIndex::INVALID {
+            return;
+        }
 
         #[cfg(debug_assertions)] //TODO: why is this needed?
         debug_assert_eq!(prim_instance.prepared_frame_id, render_tasks.frame_id());
 
+        let is_chased = prim_instance.is_chased();
+
         let transform_id = transforms
             .get_id(
                 prim_spatial_node_index,
@@ -1050,7 +831,7 @@ impl BatchBuilder {
         //           wasteful. We should probably cache this in
         //           the scroll node...
         let transform_kind = transform_id.transform_kind();
-        let prim_info = &prim_instance.vis;
+        let prim_info = &ctx.scratch.prim_info[prim_instance.visibility_info.0 as usize];
         let bounding_rect = &prim_info.clip_chain.pic_clip_rect;
 
         // If this primitive is a backdrop, that means that it is known to cover
@@ -1058,7 +839,7 @@ impl BatchBuilder {
         // use the backdrop color as a clear color, and so we can drop this
         // primitive and any prior primitives from the batch lists for this
         // picture cache slice.
-        if vis_flags.contains(PrimitiveVisibilityFlags::IS_BACKDROP) {
+        if prim_info.flags.contains(PrimitiveVisibilityFlags::IS_BACKDROP) {
             self.clear_batches();
             return;
         }
@@ -1079,9 +860,14 @@ impl BatchBuilder {
             batch_features |= BatchFeatures::ANTIALIASING;
         }
 
-        // Check if the primitive might require a clip mask.
-        if prim_info.clip_task_index != ClipTaskIndex::INVALID {
-            batch_features |= BatchFeatures::CLIP_MASK;
+        let prim_vis_mask = prim_info.visibility_mask;
+        let clip_task_address = ctx.get_prim_clip_task_address(
+            prim_info.clip_task_index,
+            render_tasks,
+        );
+
+        if is_chased {
+            println!("\tbatch {:?} with bound {:?} and clip task {:?}", prim_rect, bounding_rect, clip_task_address);
         }
 
         if !bounding_rect.is_empty() {
@@ -1094,11 +880,6 @@ impl BatchBuilder {
                 let prim_data = &ctx.data_stores.prim[data_handle];
                 let prim_cache_address = gpu_cache.get_address(&prim_data.gpu_cache_handle);
 
-                let (clip_task_address, clip_mask_texture_id) = ctx.get_prim_clip_task_and_texture(
-                    prim_info.clip_task_index,
-                    render_tasks,
-                ).unwrap();
-
                 // TODO(gw): We can abstract some of the common code below into
                 //           helper methods, as we port more primitives to make
                 //           use of interning.
@@ -1119,7 +900,7 @@ impl BatchBuilder {
                 let batch_key = BatchKey {
                     blend_mode: BlendMode::PremultipliedDestOut,
                     kind: BatchKind::Brush(BrushBatchKind::Solid),
-                    textures: BatchTextures::prim_untextured(clip_mask_texture_id),
+                    textures: BatchTextures::no_texture(),
                 };
 
                 self.add_brush_instance_to_batches(
@@ -1129,38 +910,37 @@ impl BatchBuilder {
                     z_id,
                     INVALID_SEGMENT_INDEX,
                     EdgeAaSegmentMask::all(),
-                    clip_task_address,
+                    clip_task_address.unwrap(),
                     BrushFlags::PERSPECTIVE_INTERPOLATION,
                     prim_header_index,
                     0,
-                    &batch_filter,
+                    prim_vis_mask,
                 );
             }
-            PrimitiveInstanceKind::NormalBorder { data_handle, ref render_task_ids, .. } => {
+            PrimitiveInstanceKind::NormalBorder { data_handle, ref cache_handles, .. } => {
                 let prim_data = &ctx.data_stores.normal_border[data_handle];
                 let common_data = &prim_data.common;
                 let prim_cache_address = gpu_cache.get_address(&common_data.gpu_cache_handle);
-                let task_ids = &ctx.scratch.border_cache_handles[*render_task_ids];
+                let cache_handles = &ctx.scratch.border_cache_handles[*cache_handles];
                 let specified_blend_mode = BlendMode::PremultipliedAlpha;
                 let mut segment_data: SmallVec<[SegmentInstanceData; 8]> = SmallVec::new();
 
                 // Collect the segment instance data from each render
                 // task for each valid edge / corner of the border.
 
-                for task_id in task_ids {
-                    if let Some((uv_rect_address, texture)) = render_tasks.resolve_location(*task_id, gpu_cache) {
-                        segment_data.push(
-                            SegmentInstanceData {
-                                textures: TextureSet::prim_textured(texture),
-                                specific_resource_address: uv_rect_address.as_int(),
-                            }
-                        );
-                    }
+                for handle in cache_handles {
+                    let rt_cache_entry = ctx.resource_cache
+                        .get_cached_render_task(handle);
+                    let cache_item = ctx.resource_cache
+                        .get_texture_cache_item(&rt_cache_entry.handle);
+                    segment_data.push(
+                        SegmentInstanceData {
+                            textures: BatchTextures::color(cache_item.texture_id),
+                            specific_resource_address: cache_item.uv_rect_handle.as_int(gpu_cache),
+                        }
+                    );
                 }
 
-                // TODO: it would be less error-prone to get this info from the texture cache.
-                let image_buffer_kind = ImageBufferKind::Texture2D;
-
                 let non_segmented_blend_mode = if !common_data.opacity.is_opaque ||
                     prim_info.clip_task_index != ClipTaskIndex::INVALID ||
                     transform_kind == TransformedRectKind::Complex
@@ -1178,7 +958,7 @@ impl BatchBuilder {
                 };
 
                 let batch_params = BrushBatchParameters::instanced(
-                    BrushBatchKind::Image(image_buffer_kind),
+                    BrushBatchKind::Image(ImageBufferKind::Texture2DArray),
                     ImageBrushData {
                         color_mode: ShaderColorMode::Image,
                         alpha_type: AlphaType::PremultipliedAlpha,
@@ -1205,11 +985,11 @@ impl BatchBuilder {
                     prim_header_index,
                     bounding_rect,
                     transform_kind,
+                    render_tasks,
                     z_id,
                     prim_info.clip_task_index,
-                    &batch_filter,
+                    prim_vis_mask,
                     ctx,
-                    render_tasks,
                 );
             }
             PrimitiveInstanceKind::TextRun { data_handle, run_index, .. } => {
@@ -1242,11 +1022,12 @@ impl BatchBuilder {
                 };
 
                 let glyph_keys = &ctx.scratch.glyph_keys[run.glyph_keys_range];
+                let raster_scale = run.raster_space.local_scale().unwrap_or(1.0).max(0.001);
                 let prim_header_index = prim_headers.push(
                     &prim_header,
                     z_id,
                     [
-                        (run.raster_scale * 65535.0).round() as i32,
+                        (raster_scale * 65535.0).round() as i32,
                         0,
                         0,
                         0,
@@ -1257,29 +1038,28 @@ impl BatchBuilder {
                 );
                 let batchers = &mut self.batchers;
 
-                let (clip_task_address, clip_mask_texture_id) = ctx.get_prim_clip_task_and_texture(
-                    prim_info.clip_task_index,
-                    render_tasks,
-                ).unwrap();
-
-                // The run.used_font.clone() is here instead of instead of inline in the `fetch_glyph`
-                // function call to work around a miscompilation.
-                // https://github.com/rust-lang/rust/issues/80111
-                let font = run.used_font.clone();
                 ctx.resource_cache.fetch_glyphs(
-                    font,
+                    run.used_font.clone(),
                     &glyph_keys,
                     &mut self.glyph_fetch_buffer,
                     gpu_cache,
-                    |texture_id, glyph_format, glyphs| {
+                    |texture_id, mut glyph_format, glyphs| {
                         debug_assert_ne!(texture_id, TextureSource::Invalid);
 
+                        // Ignore color and only sample alpha when shadowing.
+                        if run.shadow {
+                            glyph_format = glyph_format.ignore_color();
+                        }
+
                         let subpx_dir = subpx_dir.limit_by(glyph_format);
 
-                        let textures = BatchTextures::prim_textured(
-                            texture_id,
-                            clip_mask_texture_id,
-                        );
+                        let textures = BatchTextures {
+                            colors: [
+                                texture_id,
+                                TextureSource::Invalid,
+                                TextureSource::Invalid,
+                            ],
+                        };
 
                         let kind = BatchKind::TextRun(glyph_format);
 
@@ -1304,136 +1084,42 @@ impl BatchBuilder {
                                 }
                             }
                             GlyphFormat::Alpha |
-                            GlyphFormat::TransformedAlpha |
-                            GlyphFormat::Bitmap => {
+                            GlyphFormat::TransformedAlpha => {
                                 (
                                     BlendMode::PremultipliedAlpha,
                                     ShaderColorMode::Alpha,
                                 )
                             }
+                            GlyphFormat::Bitmap => {
+                                (
+                                    BlendMode::PremultipliedAlpha,
+                                    ShaderColorMode::Bitmap,
+                                )
+                            }
                             GlyphFormat::ColorBitmap => {
                                 (
                                     BlendMode::PremultipliedAlpha,
-                                    if run.shadow {
-                                        // Ignore color and only sample alpha when shadowing.
-                                        ShaderColorMode::BitmapShadow
-                                    } else {
-                                        ShaderColorMode::ColorBitmap
-                                    },
+                                    ShaderColorMode::ColorBitmap,
                                 )
                             }
                         };
 
-                        // Calculate a tighter bounding rect of just the glyphs passed to this
-                        // callback from request_glyphs(), rather than using the bounds of the
-                        // entire text run. This improves batching when glyphs are fragmented
-                        // over multiple textures in the texture cache.
-                        // This code is taken from the ps_text_run shader.
-                        let tight_bounding_rect = {
-                            let snap_bias = match subpx_dir {
-                                SubpixelDirection::None => DeviceVector2D::new(0.5, 0.5),
-                                SubpixelDirection::Horizontal => DeviceVector2D::new(0.125, 0.5),
-                                SubpixelDirection::Vertical => DeviceVector2D::new(0.5, 0.125),
-                                SubpixelDirection::Mixed => DeviceVector2D::new(0.125, 0.125),
-                            };
-                            let text_offset = prim_header.local_rect.size.to_vector();
-
-                            let pic_bounding_rect = if run.used_font.flags.contains(FontInstanceFlags::TRANSFORM_GLYPHS) {
-                                let mut device_bounding_rect = DeviceRect::default();
-
-                                let glyph_transform = ctx.spatial_tree.get_relative_transform(
-                                    prim_spatial_node_index,
-                                    root_spatial_node_index,
-                                ).into_transform()
-                                    .with_destination::<WorldPixel>()
-                                    .then(&euclid::Transform3D::from_scale(ctx.global_device_pixel_scale));
-
-                                let glyph_translation = DeviceVector2D::new(glyph_transform.m41, glyph_transform.m42);
-
-                                for glyph in glyphs {
-                                    let glyph_offset = prim_data.glyphs[glyph.index_in_text_run as usize].point + prim_header.local_rect.origin.to_vector();
-
-                                    let raster_glyph_offset = (glyph_transform.transform_point2d(glyph_offset).unwrap() + snap_bias).floor();
-                                    let raster_text_offset = (
-                                        glyph_transform.transform_vector2d(text_offset) +
-                                        glyph_translation +
-                                        DeviceVector2D::new(0.5, 0.5)
-                                    ).floor() - glyph_translation;
-
-                                    let device_glyph_rect = DeviceRect::new(
-                                        glyph.offset + raster_glyph_offset.to_vector() + raster_text_offset,
-                                        glyph.size.to_f32(),
-                                    );
-
-                                    device_bounding_rect = device_bounding_rect.union(&device_glyph_rect);
-                                }
-
-                                let map_device_to_surface: SpaceMapper<PicturePixel, DevicePixel> = SpaceMapper::new_with_target(
-                                    root_spatial_node_index,
-                                    surface_spatial_node_index,
-                                    device_bounding_rect,
-                                    ctx.spatial_tree,
-                                );
-
-                                match map_device_to_surface.unmap(&device_bounding_rect) {
-                                    Some(r) => r.intersection(&bounding_rect),
-                                    None => Some(*bounding_rect),
-                                }
-                            } else {
-                                let mut local_bounding_rect = LayoutRect::default();
-
-                                let glyph_raster_scale = run.raster_scale * ctx.global_device_pixel_scale.get();
-
-                                for glyph in glyphs {
-                                    let glyph_offset = prim_data.glyphs[glyph.index_in_text_run as usize].point + prim_header.local_rect.origin.to_vector();
-                                    let glyph_scale = LayoutToDeviceScale::new(glyph_raster_scale / glyph.scale);
-                                    let raster_glyph_offset = (glyph_offset * LayoutToDeviceScale::new(glyph_raster_scale) + snap_bias).floor() / glyph.scale;
-                                    let local_glyph_rect = LayoutRect::new(
-                                        (glyph.offset + raster_glyph_offset.to_vector()) / glyph_scale + text_offset,
-                                        glyph.size.to_f32() / glyph_scale,
-                                    );
-
-                                    local_bounding_rect = local_bounding_rect.union(&local_glyph_rect);
-                                }
-
-                                let map_prim_to_surface: SpaceMapper<LayoutPixel, PicturePixel> = SpaceMapper::new_with_target(
-                                    surface_spatial_node_index,
-                                    prim_spatial_node_index,
-                                    *bounding_rect,
-                                    ctx.spatial_tree,
-                                );
-                                map_prim_to_surface.map(&local_bounding_rect)
-                            };
-
-                            let intersected = match pic_bounding_rect {
-                                // The text run may have been clipped, for example if part of it is offscreen.
-                                // So intersect our result with the original bounding rect.
-                                Some(rect) => rect.intersection(bounding_rect).unwrap_or_else(PictureRect::zero),
-                                // If space mapping went off the rails, fall back to the old behavior.
-                                //TODO: consider skipping the glyph run completely in this case.
-                                None => *bounding_rect,
-                            };
-
-                            intersected
-                        };
-
                         let key = BatchKey::new(kind, blend_mode, textures);
 
                         for batcher in batchers.iter_mut() {
-                            if batcher.should_draw(&batch_filter) {
+                            if batcher.vis_mask.intersects(prim_vis_mask) {
                                 let render_task_address = batcher.render_task_address;
                                 let batch = batcher.alpha_batch_list.set_params_and_get_batch(
                                     key,
-                                    batch_features,
-                                    &tight_bounding_rect,
+                                    BatchFeatures::empty(),
+                                    bounding_rect,
                                     z_id,
                                 );
 
-                                batch.reserve(glyphs.len());
                                 for glyph in glyphs {
                                     batch.push(base_instance.build(
                                         render_task_address,
-                                        clip_task_address,
+                                        clip_task_address.unwrap(),
                                         subpx_dir,
                                         glyph.index_in_text_run,
                                         glyph.uv_rect_address,
@@ -1445,26 +1131,23 @@ impl BatchBuilder {
                     },
                 );
             }
-            PrimitiveInstanceKind::LineDecoration { data_handle, ref render_task, .. } => {
+            PrimitiveInstanceKind::LineDecoration { data_handle, ref cache_handle, .. } => {
                 // The GPU cache data is stored in the template and reused across
                 // frames and display lists.
                 let common_data = &ctx.data_stores.line_decoration[data_handle].common;
                 let prim_cache_address = gpu_cache.get_address(&common_data.gpu_cache_handle);
 
-                let (clip_task_address, clip_mask_texture_id) = ctx.get_prim_clip_task_and_texture(
-                    prim_info.clip_task_index,
-                    render_tasks,
-                ).unwrap();
-
-                let (batch_kind, textures, prim_user_data, specific_resource_address) = match render_task {
-                    Some(task_id) => {
-                        let (uv_rect_address, texture) = render_tasks.resolve_location(*task_id, gpu_cache).unwrap();
-                        let textures = BatchTextures::prim_textured(
-                            texture,
-                            clip_mask_texture_id,
-                        );
+                let (batch_kind, textures, prim_user_data, specific_resource_address) = match cache_handle {
+                    Some(cache_handle) => {
+                        let rt_cache_entry = ctx
+                            .resource_cache
+                            .get_cached_render_task(cache_handle);
+                        let cache_item = ctx
+                            .resource_cache
+                            .get_texture_cache_item(&rt_cache_entry.handle);
+                        let textures = BatchTextures::color(cache_item.texture_id);
                         (
-                            BrushBatchKind::Image(texture.image_buffer_kind()),
+                            BrushBatchKind::Image(get_buffer_kind(cache_item.texture_id)),
                             textures,
                             ImageBrushData {
                                 color_mode: ShaderColorMode::Image,
@@ -1472,13 +1155,13 @@ impl BatchBuilder {
                                 raster_space: RasterizationSpace::Local,
                                 opacity: 1.0,
                             }.encode(),
-                            uv_rect_address.as_int(),
+                            cache_item.uv_rect_handle.as_int(gpu_cache),
                         )
                     }
                     None => {
                         (
                             BrushBatchKind::Solid,
-                            BatchTextures::prim_untextured(clip_mask_texture_id),
+                            BatchTextures::no_texture(),
                             [get_shader_opacity(1.0), 0, 0, 0],
                             0,
                         )
@@ -1523,11 +1206,11 @@ impl BatchBuilder {
                     z_id,
                     INVALID_SEGMENT_INDEX,
                     EdgeAaSegmentMask::all(),
-                    clip_task_address,
+                    clip_task_address.unwrap(),
                     BrushFlags::PERSPECTIVE_INTERPOLATION,
                     prim_header_index,
                     specific_resource_address,
-                    &batch_filter,
+                    prim_vis_mask,
                 );
             }
             PrimitiveInstanceKind::Picture { pic_index, segment_instance_index, .. } => {
@@ -1544,8 +1227,74 @@ impl BatchBuilder {
 
                 match picture.context_3d {
                     // Convert all children of the 3D hierarchy root into batches.
-                    Picture3DContext::In { root_data: Some(_), .. } => {
-                        unreachable!("bug: handled above");
+                    Picture3DContext::In { root_data: Some(ref list), .. } => {
+                        for child in list {
+                            let cluster = &picture.prim_list.clusters[child.anchor.cluster_index];
+                            let child_prim_instance = &cluster.prim_instances[child.anchor.instance_index];
+                            let child_prim_info = &ctx.scratch.prim_info[child_prim_instance.visibility_info.0 as usize];
+
+                            let child_pic_index = match child_prim_instance.kind {
+                                PrimitiveInstanceKind::Picture { pic_index, .. } => pic_index,
+                                _ => unreachable!(),
+                            };
+                            let pic = &ctx.prim_store.pictures[child_pic_index.0];
+
+                            // Get clip task, if set, for the picture primitive.
+                            let child_clip_task_address = ctx.get_prim_clip_task_address(
+                                child_prim_info.clip_task_index,
+                                render_tasks,
+                            );
+
+                            let prim_header = PrimitiveHeader {
+                                local_rect: pic.precise_local_rect,
+                                local_clip_rect: child_prim_info.combined_local_clip_rect,
+                                specific_prim_address: GpuCacheAddress::INVALID,
+                                transform_id: transforms
+                                    .get_id(
+                                        child.spatial_node_index,
+                                        root_spatial_node_index,
+                                        ctx.spatial_tree,
+                                    ),
+                            };
+
+                            let raster_config = pic
+                                .raster_config
+                                .as_ref()
+                                .expect("BUG: 3d primitive was not assigned a surface");
+                            let (uv_rect_address, _) = render_tasks.resolve_surface(
+                                ctx.surfaces[raster_config.surface_index.0]
+                                    .render_tasks
+                                    .expect("BUG: no surface")
+                                    .root,
+                                gpu_cache,
+                            );
+
+                            // Need a new z-id for each child preserve-3d context added
+                            // by this inner loop.
+                            let z_id = z_generator.next();
+
+                            let prim_header_index = prim_headers.push(&prim_header, z_id, [
+                                uv_rect_address.as_int(),
+                                if raster_config.establishes_raster_root { 1 } else { 0 },
+                                0,
+                                child_clip_task_address.unwrap().0 as i32,
+                            ]);
+
+                            let key = BatchKey::new(
+                                BatchKind::SplitComposite,
+                                BlendMode::PremultipliedAlpha,
+                                BatchTextures::no_texture(),
+                            );
+
+                            self.add_split_composite_instance_to_batches(
+                                key,
+                                &child_prim_info.clip_chain.pic_clip_rect,
+                                z_id,
+                                prim_header_index,
+                                child.gpu_address,
+                                child_prim_info.visibility_mask,
+                            );
+                        }
                     }
                     // Ignore the 3D pictures that are not in the root of preserve-3D
                     // hierarchy, since we process them with the root.
@@ -1565,42 +1314,49 @@ impl BatchBuilder {
                         };
 
                         let surface = &ctx.surfaces[raster_config.surface_index.0];
-
-                        let mut is_opaque = prim_info.clip_task_index == ClipTaskIndex::INVALID
-                            && surface.opaque_rect.contains_rect(&surface.rect)
-                            && transform_kind == TransformedRectKind::AxisAligned;
-
-                        let pic_task_id = picture.primary_render_task_id.unwrap();
+                        let surface_task = surface.render_tasks.map(|s| s.root);
 
                         match raster_config.composite_mode {
                             PictureCompositeMode::TileCache { .. } => {
-                                // TODO(gw): For now, TileCache is still a composite mode, even though
-                                //           it will only exist as a top level primitive and never
-                                //           be encountered during batching. Consider making TileCache
-                                //           a standalone type, not a picture.
+                                // Tile cache instances are added to the composite config, rather than
+                                // directly added to batches. This allows them to be drawn with various
+                                // present modes during render, such as partial present etc.
+                                let tile_cache = picture.tile_cache.as_ref().unwrap();
+                                let map_local_to_world = SpaceMapper::new_with_target(
+                                    ROOT_SPATIAL_NODE_INDEX,
+                                    tile_cache.spatial_node_index,
+                                    ctx.screen_world_rect,
+                                    ctx.spatial_tree,
+                                );
+                                // TODO(gw): As a follow up to the valid_rect work, see why we use
+                                //           prim_info.combined_local_clip_rect here instead of the
+                                //           local_clip_rect built in the TileCacheInstance. Perhaps
+                                //           these can be unified or are different for a good reason?
+                                let world_clip_rect = map_local_to_world
+                                    .map(&prim_info.combined_local_clip_rect)
+                                    .expect("bug: unable to map clip rect");
+                                let device_clip_rect = (world_clip_rect * ctx.global_device_pixel_scale).round();
+
+                                composite_state.push_surface(
+                                    tile_cache,
+                                    device_clip_rect,
+                                    ctx.global_device_pixel_scale,
+                                    ctx.resource_cache,
+                                    gpu_cache,
+                                    deferred_resolves,
+                                );
                             }
                             PictureCompositeMode::Filter(ref filter) => {
                                 assert!(filter.is_visible());
                                 match filter {
                                     Filter::Blur(..) => {
-                                        let (clip_task_address, clip_mask_texture_id) = ctx.get_prim_clip_task_and_texture(
-                                            prim_info.clip_task_index,
-                                            render_tasks,
-                                        ).unwrap();
-
                                         let kind = BatchKind::Brush(
-                                            BrushBatchKind::Image(ImageBufferKind::Texture2D)
+                                            BrushBatchKind::Image(ImageBufferKind::Texture2DArray)
                                         );
-
-                                        let (uv_rect_address, texture) = render_tasks.resolve_location(
-                                            pic_task_id,
+                                        let (uv_rect_address, textures) = render_tasks.resolve_surface(
+                                            surface_task.expect("bug: surface must be allocated by now"),
                                             gpu_cache,
-                                        ).unwrap();
-                                        let textures = BatchTextures::prim_textured(
-                                            texture,
-                                            clip_mask_texture_id,
                                         );
-
                                         let key = BatchKey::new(
                                             kind,
                                             non_segmented_blend_mode,
@@ -1624,24 +1380,19 @@ impl BatchBuilder {
                                             z_id,
                                             INVALID_SEGMENT_INDEX,
                                             EdgeAaSegmentMask::empty(),
-                                            clip_task_address,
+                                            clip_task_address.unwrap(),
                                             brush_flags,
                                             prim_header_index,
                                             uv_rect_address.as_int(),
-                                            &batch_filter,
+                                            prim_vis_mask,
                                         );
                                     }
                                     Filter::DropShadows(shadows) => {
-                                        let (clip_task_address, clip_mask_texture_id) = ctx.get_prim_clip_task_and_texture(
-                                            prim_info.clip_task_index,
-                                            render_tasks,
-                                        ).unwrap();
-
                                         // Draw an instance per shadow first, following by the content.
 
                                         // The shadows and the content get drawn as a brush image.
                                         let kind = BatchKind::Brush(
-                                            BrushBatchKind::Image(ImageBufferKind::Texture2D),
+                                            BrushBatchKind::Image(ImageBufferKind::Texture2DArray),
                                         );
 
                                         // Gets the saved render task ID of the content, which is
@@ -1649,37 +1400,35 @@ impl BatchBuilder {
                                         let secondary_id = picture.secondary_render_task_id.expect("no secondary!?");
                                         let content_source = {
                                             let secondary_task = &render_tasks[secondary_id];
-                                            let texture_id = secondary_task.get_target_texture();
-                                            TextureSource::TextureCache(
-                                                texture_id,
-                                                Swizzle::default(),
-                                            )
+                                            let saved_index = secondary_task.saved_index.expect("no saved index!?");
+                                            debug_assert_ne!(saved_index, SavedTargetIndex::PENDING);
+                                            TextureSource::RenderTaskCache(saved_index, Swizzle::default())
                                         };
 
-                                        // Retrieve the UV rect addresses for shadow/content.
-                                        let (shadow_uv_rect_address, shadow_texture) = render_tasks.resolve_location(
-                                            pic_task_id,
-                                            gpu_cache,
-                                        ).unwrap();
-                                        let shadow_textures = BatchTextures::prim_textured(
-                                            shadow_texture,
-                                            clip_mask_texture_id,
-                                        );
-
-                                        let content_uv_rect_address = render_tasks[secondary_id]
-                                            .get_texture_address(gpu_cache)
-                                            .as_int();
-
                                         // Build BatchTextures for shadow/content
-                                        let content_textures = BatchTextures::prim_textured(
-                                            content_source,
-                                            clip_mask_texture_id,
-                                        );
+                                        let shadow_textures = BatchTextures::render_target_cache();
+                                        let content_textures = BatchTextures {
+                                            colors: [
+                                                content_source,
+                                                TextureSource::Invalid,
+                                                TextureSource::Invalid,
+                                            ],
+                                        };
 
                                         // Build batch keys for shadow/content
                                         let shadow_key = BatchKey::new(kind, non_segmented_blend_mode, shadow_textures);
                                         let content_key = BatchKey::new(kind, non_segmented_blend_mode, content_textures);
 
+                                        // Retrieve the UV rect addresses for shadow/content.
+                                        let cache_task_id = surface_task
+                                            .expect("bug: surface must be allocated by now");
+                                        let shadow_uv_rect_address = render_tasks[cache_task_id]
+                                            .get_texture_address(gpu_cache)
+                                            .as_int();
+                                        let content_uv_rect_address = render_tasks[secondary_id]
+                                            .get_texture_address(gpu_cache)
+                                            .as_int();
+
                                         for (shadow, shadow_gpu_data) in shadows.iter().zip(picture.extra_gpu_data_handles.iter()) {
                                             // Get the GPU cache address of the extra data handle.
                                             let shadow_prim_address = gpu_cache.get_address(shadow_gpu_data);
@@ -1710,11 +1459,11 @@ impl BatchBuilder {
                                                 z_id,
                                                 INVALID_SEGMENT_INDEX,
                                                 EdgeAaSegmentMask::empty(),
-                                                clip_task_address,
+                                                clip_task_address.unwrap(),
                                                 brush_flags,
                                                 shadow_prim_header_index,
-                                                shadow_uv_rect_address.as_int(),
-                                                &batch_filter,
+                                                shadow_uv_rect_address,
+                                                prim_vis_mask,
                                             );
                                         }
                                         let z_id_content = z_generator.next();
@@ -1737,31 +1486,21 @@ impl BatchBuilder {
                                             z_id_content,
                                             INVALID_SEGMENT_INDEX,
                                             EdgeAaSegmentMask::empty(),
-                                            clip_task_address,
+                                            clip_task_address.unwrap(),
                                             brush_flags,
                                             content_prim_header_index,
                                             content_uv_rect_address,
-                                            &batch_filter,
+                                            prim_vis_mask,
                                         );
                                     }
                                     Filter::Opacity(_, amount) => {
-                                        let (clip_task_address, clip_mask_texture_id) = ctx.get_prim_clip_task_and_texture(
-                                            prim_info.clip_task_index,
-                                            render_tasks,
-                                        ).unwrap();
-
                                         let amount = (amount * 65536.0) as i32;
 
-                                        let (uv_rect_address, texture) = render_tasks.resolve_location(
-                                            pic_task_id,
+                                        let (uv_rect_address, textures) = render_tasks.resolve_surface(
+                                            surface_task.expect("bug: surface must be allocated by now"),
                                             gpu_cache,
-                                        ).unwrap();
-                                        let textures = BatchTextures::prim_textured(
-                                            texture,
-                                            clip_mask_texture_id,
                                         );
 
-
                                         let key = BatchKey::new(
                                             BatchKind::Brush(BrushBatchKind::Opacity),
                                             BlendMode::PremultipliedAlpha,
@@ -1782,19 +1521,14 @@ impl BatchBuilder {
                                             z_id,
                                             INVALID_SEGMENT_INDEX,
                                             EdgeAaSegmentMask::empty(),
-                                            clip_task_address,
+                                            clip_task_address.unwrap(),
                                             brush_flags,
                                             prim_header_index,
                                             0,
-                                            &batch_filter,
+                                            prim_vis_mask,
                                         );
                                     }
                                     _ => {
-                                        let (clip_task_address, clip_mask_texture_id) = ctx.get_prim_clip_task_and_texture(
-                                            prim_info.clip_task_index,
-                                            render_tasks,
-                                        ).unwrap();
-
                                         // Must be kept in sync with brush_blend.glsl
                                         let filter_mode = filter.as_int();
 
@@ -1826,30 +1560,14 @@ impl BatchBuilder {
                                             Filter::Opacity(..) => unreachable!(),
                                         };
 
-                                        // Other filters that may introduce opacity are handled via different
-                                        // paths.
-                                        if let Filter::ColorMatrix(..) = filter {
-                                            is_opaque = false;
-                                        }
-
-                                        let (uv_rect_address, texture) = render_tasks.resolve_location(
-                                            pic_task_id,
+                                        let (uv_rect_address, textures) = render_tasks.resolve_surface(
+                                            surface_task.expect("bug: surface must be allocated by now"),
                                             gpu_cache,
-                                        ).unwrap();
-                                        let textures = BatchTextures::prim_textured(
-                                            texture,
-                                            clip_mask_texture_id,
                                         );
 
-                                        let blend_mode = if is_opaque {
-                                            BlendMode::None
-                                        } else {
-                                            BlendMode::PremultipliedAlpha
-                                        };
-
                                         let key = BatchKey::new(
                                             BatchKind::Brush(BrushBatchKind::Blend),
-                                            blend_mode,
+                                            BlendMode::PremultipliedAlpha,
                                             textures,
                                         );
 
@@ -1867,11 +1585,11 @@ impl BatchBuilder {
                                             z_id,
                                             INVALID_SEGMENT_INDEX,
                                             EdgeAaSegmentMask::empty(),
-                                            clip_task_address,
+                                            clip_task_address.unwrap(),
                                             brush_flags,
                                             prim_header_index,
                                             0,
-                                            &batch_filter,
+                                            prim_vis_mask,
                                         );
                                     }
                                 }
@@ -1889,18 +1607,9 @@ impl BatchBuilder {
 
                                 let user_data = filter_data.gpu_cache_handle.as_int(gpu_cache);
 
-                                let (clip_task_address, clip_mask_texture_id) = ctx.get_prim_clip_task_and_texture(
-                                    prim_info.clip_task_index,
-                                    render_tasks,
-                                ).unwrap();
-
-                                let (uv_rect_address, texture) = render_tasks.resolve_location(
-                                    pic_task_id,
+                                let (uv_rect_address, textures) = render_tasks.resolve_surface(
+                                    surface_task.expect("bug: surface must be allocated by now"),
                                     gpu_cache,
-                                ).unwrap();
-                                let textures = BatchTextures::prim_textured(
-                                    texture,
-                                    clip_mask_texture_id,
                                 );
 
                                 let key = BatchKey::new(
@@ -1923,56 +1632,32 @@ impl BatchBuilder {
                                     z_id,
                                     INVALID_SEGMENT_INDEX,
                                     EdgeAaSegmentMask::empty(),
-                                    clip_task_address,
+                                    clip_task_address.unwrap(),
                                     brush_flags,
                                     prim_header_index,
                                     0,
-                                    &batch_filter,
+                                    prim_vis_mask,
                                 );
                             }
-                            PictureCompositeMode::MixBlend(mode) if BlendMode::from_mix_blend_mode(
-                                mode,
-                                ctx.use_advanced_blending,
-                                !ctx.break_advanced_blend_batches,
-                                ctx.use_dual_source_blending,
-                            ).is_some() => {
-                                let (clip_task_address, clip_mask_texture_id) = ctx.get_prim_clip_task_and_texture(
-                                    prim_info.clip_task_index,
-                                    render_tasks,
-                                ).unwrap();
-
-                                let (uv_rect_address, texture) = render_tasks.resolve_location(
-                                    pic_task_id,
+                            PictureCompositeMode::MixBlend(mode) if ctx.use_advanced_blending => {
+                                let (uv_rect_address, textures) = render_tasks.resolve_surface(
+                                    surface_task.expect("bug: surface must be allocated by now"),
                                     gpu_cache,
-                                ).unwrap();
-                                let textures = BatchTextures::prim_textured(
-                                    texture,
-                                    clip_mask_texture_id,
                                 );
-
-
                                 let key = BatchKey::new(
                                     BatchKind::Brush(
-                                        BrushBatchKind::Image(ImageBufferKind::Texture2D),
+                                        BrushBatchKind::Image(ImageBufferKind::Texture2DArray),
                                     ),
-                                    BlendMode::from_mix_blend_mode(
-                                        mode,
-                                        ctx.use_advanced_blending,
-                                        !ctx.break_advanced_blend_batches,
-                                        ctx.use_dual_source_blending,
-                                    ).unwrap(),
+                                    BlendMode::Advanced(mode),
                                     textures,
                                 );
                                 let prim_header_index = prim_headers.push(
                                     &prim_header,
                                     z_id,
                                     ImageBrushData {
-                                        color_mode: match key.blend_mode {
-                                            BlendMode::MultiplyDualSource => ShaderColorMode::MultiplyDualSource,
-                                            _ => ShaderColorMode::Image,
-                                        },
+                                        color_mode: ShaderColorMode::Image,
                                         alpha_type: AlphaType::PremultipliedAlpha,
-                                        raster_space: RasterizationSpace::Screen,
+                                        raster_space: RasterizationSpace::Local,
                                         opacity: 1.0,
                                     }.encode(),
                                 );
@@ -1984,107 +1669,73 @@ impl BatchBuilder {
                                     z_id,
                                     INVALID_SEGMENT_INDEX,
                                     EdgeAaSegmentMask::empty(),
-                                    clip_task_address,
+                                    clip_task_address.unwrap(),
                                     brush_flags,
                                     prim_header_index,
                                     uv_rect_address.as_int(),
-                                    &batch_filter,
+                                    prim_vis_mask,
                                 );
                             }
                             PictureCompositeMode::MixBlend(mode) => {
-                                let (clip_task_address, clip_mask_texture_id) = ctx.get_prim_clip_task_and_texture(
-                                    prim_info.clip_task_index,
-                                    render_tasks,
-                                ).unwrap();
+                                let cache_task_id = surface_task.expect("bug: surface must be allocated by now");
                                 let backdrop_id = picture.secondary_render_task_id.expect("no backdrop!?");
 
-                                let color0 = render_tasks[backdrop_id].get_target_texture();
-                                let color1 = render_tasks[pic_task_id].get_target_texture();
-
-                                // Create a separate brush instance for each batcher. For most cases,
-                                // there is only one batcher. However, in the case of drawing onto
-                                // a picture cache, there is one batcher per tile. Although not
-                                // currently used, the implementation of mix-blend-mode now supports
-                                // doing partial readbacks per-tile. In future, this will be enabled
-                                // and allow mix-blends to operate on picture cache surfaces without
-                                // a separate isolated intermediate surface.
-
-                                for batcher in &mut self.batchers {
-                                    if batcher.should_draw(&batch_filter) {
-                                        let render_task_address = batcher.render_task_address;
-
-                                        let batch_key = BatchKey::new(
-                                            BatchKind::Brush(
-                                                BrushBatchKind::MixBlend {
-                                                    task_id: batcher.render_task_id,
-                                                    backdrop_id,
-                                                },
-                                            ),
-                                            BlendMode::PremultipliedAlpha,
-                                            BatchTextures {
-                                                input: TextureSet {
-                                                    colors: [
-                                                        TextureSource::TextureCache(
-                                                            color0,
-                                                            Swizzle::default(),
-                                                        ),
-                                                        TextureSource::TextureCache(
-                                                            color1,
-                                                            Swizzle::default(),
-                                                        ),
-                                                        TextureSource::Invalid,
-                                                    ],
-                                                },
-                                                clip_mask: clip_mask_texture_id,
-                                            },
-                                        );
-                                        let src_uv_address = render_tasks[pic_task_id].get_texture_address(gpu_cache);
-                                        let readback_uv_address = render_tasks[backdrop_id].get_texture_address(gpu_cache);
-                                        let prim_header_index = prim_headers.push(&prim_header, z_id, [
-                                            mode as u32 as i32,
-                                            readback_uv_address.as_int(),
-                                            src_uv_address.as_int(),
-                                            0,
-                                        ]);
+                                // TODO(gw): For now, mix-blend is not supported as a picture
+                                //           caching root, so we can safely assume there is
+                                //           only a single batcher present.
+                                assert_eq!(self.batchers.len(), 1);
 
-                                        let instance = BrushInstance {
-                                            segment_index: INVALID_SEGMENT_INDEX,
-                                            edge_flags: EdgeAaSegmentMask::empty(),
-                                            clip_task_address,
-                                            render_task_address,
-                                            brush_flags,
-                                            prim_header_index,
-                                            resource_address: 0,
-                                        };
+                                let key = BatchKey::new(
+                                    BatchKind::Brush(
+                                        BrushBatchKind::MixBlend {
+                                            task_id: self.batchers[0].render_task_id,
+                                            source_id: cache_task_id,
+                                            backdrop_id,
+                                        },
+                                    ),
+                                    BlendMode::PremultipliedAlpha,
+                                    BatchTextures::no_texture(),
+                                );
+                                let backdrop_task_address = render_tasks.get_task_address(backdrop_id);
+                                let source_task_address = render_tasks.get_task_address(cache_task_id);
+                                let prim_header_index = prim_headers.push(&prim_header, z_id, [
+                                    mode as u32 as i32,
+                                    backdrop_task_address.0 as i32,
+                                    source_task_address.0 as i32,
+                                    0,
+                                ]);
 
-                                        batcher.push_single_instance(
-                                            batch_key,
-                                            batch_features,
-                                            bounding_rect,
-                                            z_id,
-                                            PrimitiveInstanceData::from(instance),
-                                        );
-                                    }
-                                }
+                                self.add_brush_instance_to_batches(
+                                    key,
+                                    batch_features,
+                                    bounding_rect,
+                                    z_id,
+                                    INVALID_SEGMENT_INDEX,
+                                    EdgeAaSegmentMask::empty(),
+                                    clip_task_address.unwrap(),
+                                    brush_flags,
+                                    prim_header_index,
+                                    0,
+                                    prim_vis_mask,
+                                );
                             }
                             PictureCompositeMode::Blit(_) => {
-                                let uv_rect_address = render_tasks[pic_task_id]
+                                let cache_task_id = surface_task.expect("bug: surface must be allocated by now");
+                                let uv_rect_address = render_tasks[cache_task_id]
                                     .get_texture_address(gpu_cache)
                                     .as_int();
-                                let cache_render_task = &render_tasks[pic_task_id];
-                                let texture_id = cache_render_task.get_target_texture();
-                                let textures = TextureSet {
-                                    colors: [
-                                        TextureSource::TextureCache(
-                                            texture_id,
-                                            Swizzle::default(),
-                                        ),
-                                        TextureSource::Invalid,
-                                        TextureSource::Invalid,
-                                    ],
+                                let textures = match render_tasks[cache_task_id].saved_index {
+                                    Some(saved_index) => BatchTextures {
+                                        colors: [
+                                            TextureSource::RenderTaskCache(saved_index, Swizzle::default()),
+                                            TextureSource::PrevPassAlpha,
+                                            TextureSource::Invalid,
+                                        ]
+                                    },
+                                    None => BatchTextures::render_target_cache(),
                                 };
                                 let batch_params = BrushBatchParameters::shared(
-                                    BrushBatchKind::Image(ImageBufferKind::Texture2D),
+                                    BrushBatchKind::Image(ImageBufferKind::Texture2DArray),
                                     textures,
                                     ImageBrushData {
                                         color_mode: ShaderColorMode::Image,
@@ -2120,11 +1771,14 @@ impl BatchBuilder {
                                     batch_params.prim_user_data,
                                 );
 
-                                let (opacity, specified_blend_mode) = if is_opaque {
-                                    (PrimitiveOpacity::opaque(), BlendMode::None)
-                                } else {
-                                    (PrimitiveOpacity::translucent(), BlendMode::PremultipliedAlpha)
-                                };
+                                // TODO(gw): As before, all pictures that get blitted are assumed
+                                //           to have alpha. However, we could determine (at least for
+                                //           simple, common cases) if the picture content is opaque.
+                                //           That would allow inner segments of pictures to be drawn
+                                //           with blend disabled, which is a big performance win on
+                                //           integrated GPUs.
+                                let opacity = PrimitiveOpacity::translucent();
+                                let specified_blend_mode = BlendMode::PremultipliedAlpha;
 
                                 self.add_segmented_prim_to_batch(
                                     segments,
@@ -2136,29 +1790,20 @@ impl BatchBuilder {
                                     prim_header_index,
                                     bounding_rect,
                                     transform_kind,
+                                    render_tasks,
                                     z_id,
                                     prim_info.clip_task_index,
-                                    &batch_filter,
+                                    prim_vis_mask,
                                     ctx,
-                                    render_tasks,
                                 );
                             }
                             PictureCompositeMode::SvgFilter(..) => {
-                                let (clip_task_address, clip_mask_texture_id) = ctx.get_prim_clip_task_and_texture(
-                                    prim_info.clip_task_index,
-                                    render_tasks,
-                                ).unwrap();
-
                                 let kind = BatchKind::Brush(
-                                    BrushBatchKind::Image(ImageBufferKind::Texture2D)
+                                    BrushBatchKind::Image(ImageBufferKind::Texture2DArray)
                                 );
-                                let (uv_rect_address, texture) = render_tasks.resolve_location(
-                                    pic_task_id,
+                                let (uv_rect_address, textures) = render_tasks.resolve_surface(
+                                    surface_task.expect("bug: surface must be allocated by now"),
                                     gpu_cache,
-                                ).unwrap();
-                                let textures = BatchTextures::prim_textured(
-                                    texture,
-                                    clip_mask_texture_id,
                                 );
                                 let key = BatchKey::new(
                                     kind,
@@ -2183,17 +1828,31 @@ impl BatchBuilder {
                                     z_id,
                                     INVALID_SEGMENT_INDEX,
                                     EdgeAaSegmentMask::empty(),
-                                    clip_task_address,
+                                    clip_task_address.unwrap(),
                                     brush_flags,
                                     prim_header_index,
                                     uv_rect_address.as_int(),
-                                    &batch_filter,
+                                    prim_vis_mask,
                                 );
                             }
                         }
                     }
                     None => {
-                        unreachable!();
+                        // If this picture is being drawn into an existing target (i.e. with
+                        // no composition operation), recurse and add to the current batch list.
+                        self.add_pic_to_batch(
+                            picture,
+                            ctx,
+                            gpu_cache,
+                            render_tasks,
+                            deferred_resolves,
+                            prim_headers,
+                            transforms,
+                            root_spatial_node_index,
+                            surface_spatial_node_index,
+                            z_generator,
+                            composite_state,
+                        );
                     }
                 }
             }
@@ -2202,14 +1861,17 @@ impl BatchBuilder {
                 let common_data = &prim_data.common;
                 let border_data = &prim_data.kind;
 
-                let (uv_rect_address, texture) = match render_tasks.resolve_location(border_data.src_color, gpu_cache) {
-                    Some(src) => src,
-                    None => {
-                        return;
-                    }
-                };
+                let cache_item = resolve_image(
+                    border_data.request,
+                    ctx.resource_cache,
+                    gpu_cache,
+                    deferred_resolves,
+                );
+                if cache_item.texture_id == TextureSource::Invalid {
+                    return;
+                }
 
-                let textures = TextureSet::prim_textured(texture);
+                let textures = BatchTextures::color(cache_item.texture_id);
                 let prim_cache_address = gpu_cache.get_address(&common_data.gpu_cache_handle);
                 let specified_blend_mode = BlendMode::PremultipliedAlpha;
                 let non_segmented_blend_mode = if !common_data.opacity.is_opaque ||
@@ -2229,7 +1891,7 @@ impl BatchBuilder {
                 };
 
                 let batch_params = BrushBatchParameters::shared(
-                    BrushBatchKind::Image(texture.image_buffer_kind()),
+                    BrushBatchKind::Image(get_buffer_kind(cache_item.texture_id)),
                     textures,
                     ImageBrushData {
                         color_mode: ShaderColorMode::Image,
@@ -2237,7 +1899,7 @@ impl BatchBuilder {
                         raster_space: RasterizationSpace::Local,
                         opacity: 1.0,
                     }.encode(),
-                    uv_rect_address.as_int(),
+                    cache_item.uv_rect_handle.as_int(gpu_cache),
                 );
 
                 let prim_header_index = prim_headers.push(
@@ -2256,18 +1918,22 @@ impl BatchBuilder {
                     prim_header_index,
                     bounding_rect,
                     transform_kind,
+                    render_tasks,
                     z_id,
                     prim_info.clip_task_index,
-                    &batch_filter,
+                    prim_vis_mask,
                     ctx,
-                    render_tasks,
                 );
             }
-            PrimitiveInstanceKind::Rectangle { data_handle, segment_instance_index, .. } => {
+            PrimitiveInstanceKind::Rectangle { data_handle, segment_instance_index, opacity_binding_index, .. } => {
                 let prim_data = &ctx.data_stores.prim[data_handle];
                 let specified_blend_mode = BlendMode::PremultipliedAlpha;
+                let opacity_binding = ctx.prim_store.get_opacity_binding(opacity_binding_index);
 
-                let non_segmented_blend_mode = if !prim_data.opacity.is_opaque ||
+                let opacity = PrimitiveOpacity::from_alpha(opacity_binding);
+                let opacity = opacity.combine(prim_data.opacity);
+
+                let non_segmented_blend_mode = if !opacity.is_opaque ||
                     prim_info.clip_task_index != ClipTaskIndex::INVALID ||
                     transform_kind == TransformedRectKind::Complex
                 {
@@ -2278,8 +1944,8 @@ impl BatchBuilder {
 
                 let batch_params = BrushBatchParameters::shared(
                     BrushBatchKind::Solid,
-                    TextureSet::UNTEXTURED,
-                    [get_shader_opacity(1.0), 0, 0, 0],
+                    BatchTextures::no_texture(),
+                    [get_shader_opacity(opacity_binding), 0, 0, 0],
                     0,
                 );
 
@@ -2306,7 +1972,7 @@ impl BatchBuilder {
 
                 self.add_segmented_prim_to_batch(
                     segments,
-                    prim_data.opacity,
+                    opacity,
                     &batch_params,
                     specified_blend_mode,
                     non_segmented_blend_mode,
@@ -2314,45 +1980,63 @@ impl BatchBuilder {
                     prim_header_index,
                     bounding_rect,
                     transform_kind,
+                    render_tasks,
                     z_id,
                     prim_info.clip_task_index,
-                    &batch_filter,
+                    prim_vis_mask,
                     ctx,
-                    render_tasks,
                 );
             }
             PrimitiveInstanceKind::YuvImage { data_handle, segment_instance_index, is_compositor_surface, .. } => {
-                debug_assert!(!is_compositor_surface);
+                if is_compositor_surface {
+                    self.emit_placeholder(prim_rect,
+                                          prim_info,
+                                          z_id,
+                                          transform_id,
+                                          batch_features,
+                                          ctx,
+                                          gpu_cache,
+                                          render_tasks,
+                                          prim_headers);
+                    return;
+                }
 
                 let yuv_image_data = &ctx.data_stores.yuv_image[data_handle].kind;
-                let mut textures = TextureSet::UNTEXTURED;
+                let mut textures = BatchTextures::no_texture();
                 let mut uv_rect_addresses = [0; 3];
 
                 //yuv channel
                 let channel_count = yuv_image_data.format.get_plane_num();
                 debug_assert!(channel_count <= 3);
                 for channel in 0 .. channel_count {
+                    let image_key = yuv_image_data.yuv_key[channel];
 
-                    let src_channel = render_tasks.resolve_location(yuv_image_data.src_yuv[channel], gpu_cache);
+                    let cache_item = resolve_image(
+                        ImageRequest {
+                            key: image_key,
+                            rendering: yuv_image_data.image_rendering,
+                            tile: None,
+                        },
+                        ctx.resource_cache,
+                        gpu_cache,
+                        deferred_resolves,
+                    );
 
-                    let (uv_rect_address, texture_source) = match src_channel {
-                        Some(src) => src,
-                        None => {
-                            warn!("Warnings: skip a PrimitiveKind::YuvImage");
-                            return;
-                        }
-                    };
+                    if cache_item.texture_id == TextureSource::Invalid {
+                        warn!("Warnings: skip a PrimitiveKind::YuvImage");
+                        return;
+                    }
 
-                    textures.colors[channel] = texture_source;
-                    uv_rect_addresses[channel] = uv_rect_address.as_int();
+                    textures.colors[channel] = cache_item.texture_id;
+                    uv_rect_addresses[channel] = cache_item.uv_rect_handle.as_int(gpu_cache);
                 }
 
                 // All yuv textures should be the same type.
-                let buffer_kind = textures.colors[0].image_buffer_kind();
+                let buffer_kind = get_buffer_kind(textures.colors[0]);
                 assert!(
                     textures.colors[1 .. yuv_image_data.format.get_plane_num()]
                         .iter()
-                        .all(|&tid| buffer_kind == tid.image_buffer_kind())
+                        .all(|&tid| buffer_kind == get_buffer_kind(tid))
                 );
 
                 let kind = BrushBatchKind::YuvImage(
@@ -2419,48 +2103,76 @@ impl BatchBuilder {
                     prim_header_index,
                     bounding_rect,
                     transform_kind,
+                    render_tasks,
                     z_id,
                     prim_info.clip_task_index,
-                    &batch_filter,
+                    prim_vis_mask,
                     ctx,
-                    render_tasks,
                 );
             }
             PrimitiveInstanceKind::Image { data_handle, image_instance_index, is_compositor_surface, .. } => {
-                debug_assert!(!is_compositor_surface);
-
+                if is_compositor_surface {
+                    self.emit_placeholder(prim_rect,
+                                          prim_info,
+                                          z_id,
+                                          transform_id,
+                                          batch_features,
+                                          ctx,
+                                          gpu_cache,
+                                          render_tasks,
+                                          prim_headers);
+                    return;
+                }
                 let image_data = &ctx.data_stores.image[data_handle].kind;
                 let common_data = &ctx.data_stores.image[data_handle].common;
                 let image_instance = &ctx.prim_store.images[image_instance_index];
+                let opacity_binding = ctx.prim_store.get_opacity_binding(image_instance.opacity_binding_index);
                 let specified_blend_mode = match image_data.alpha_type {
                     AlphaType::PremultipliedAlpha => BlendMode::PremultipliedAlpha,
                     AlphaType::Alpha => BlendMode::Alpha,
                 };
+                let request = ImageRequest {
+                    key: image_data.key,
+                    rendering: image_data.image_rendering,
+                    tile: None,
+                };
                 let prim_user_data = ImageBrushData {
                     color_mode: ShaderColorMode::Image,
                     alpha_type: image_data.alpha_type,
                     raster_space: RasterizationSpace::Local,
-                    opacity: 1.0,
+                    opacity: opacity_binding,
                 }.encode();
 
                 if image_instance.visible_tiles.is_empty() {
-                    if cfg!(debug_assertions) {
-                        match ctx.resource_cache.get_image_properties(image_data.key) {
-                            Some(ImageProperties { tiling: None, .. }) | None => (),
-                            other => panic!("Non-tiled image with no visible images detected! Properties {:?}", other),
+                    let cache_item = match image_data.source {
+                        ImageSource::Default => {
+                            resolve_image(
+                                request,
+                                ctx.resource_cache,
+                                gpu_cache,
+                                deferred_resolves,
+                            )
                         }
+                        ImageSource::Cache { ref handle, .. } => {
+                            let rt_handle = handle
+                                .as_ref()
+                                .expect("bug: render task handle not allocated");
+                            let rt_cache_entry = ctx.resource_cache
+                                .get_cached_render_task(rt_handle);
+                            ctx.resource_cache.get_texture_cache_item(&rt_cache_entry.handle)
+                        }
+                    };
+
+                    if cache_item.texture_id == TextureSource::Invalid {
+                        return;
                     }
 
-                    let src_color = render_tasks.resolve_location(image_instance.src_color, gpu_cache);
+                    let textures = BatchTextures::color(cache_item.texture_id);
 
-                    let (uv_rect_address, texture_source) = match src_color {
-                        Some(src) => src,
-                        None => {
-                            return;
-                        }
-                    };
+                    let opacity = PrimitiveOpacity::from_alpha(opacity_binding);
+                    let opacity = opacity.combine(common_data.opacity);
 
-                    let non_segmented_blend_mode = if !common_data.opacity.is_opaque ||
+                    let non_segmented_blend_mode = if !opacity.is_opaque ||
                         prim_info.clip_task_index != ClipTaskIndex::INVALID ||
                         transform_kind == TransformedRectKind::Complex
                     {
@@ -2470,10 +2182,10 @@ impl BatchBuilder {
                     };
 
                     let batch_params = BrushBatchParameters::shared(
-                        BrushBatchKind::Image(texture_source.image_buffer_kind()),
-                        TextureSet::prim_textured(texture_source),
+                        BrushBatchKind::Image(get_buffer_kind(cache_item.texture_id)),
+                        textures,
                         prim_user_data,
-                        uv_rect_address.as_int(),
+                        cache_item.uv_rect_handle.as_int(gpu_cache),
                     );
 
                     debug_assert_ne!(image_instance.segment_instance_index, SegmentInstanceIndex::INVALID);
@@ -2500,7 +2212,7 @@ impl BatchBuilder {
 
                     self.add_segmented_prim_to_batch(
                         segments,
-                        common_data.opacity,
+                        opacity,
                         &batch_params,
                         specified_blend_mode,
                         non_segmented_blend_mode,
@@ -2508,23 +2220,18 @@ impl BatchBuilder {
                         prim_header_index,
                         bounding_rect,
                         transform_kind,
+                        render_tasks,
                         z_id,
                         prim_info.clip_task_index,
-                        &batch_filter,
+                        prim_vis_mask,
                         ctx,
-                        render_tasks,
                     );
                 } else {
                     const VECS_PER_SPECIFIC_BRUSH: usize = 3;
                     let max_tiles_per_header = (MAX_VERTEX_TEXTURE_WIDTH - VECS_PER_SPECIFIC_BRUSH) / VECS_PER_SEGMENT;
 
-                    let (clip_task_address, clip_mask_texture_id) = ctx.get_prim_clip_task_and_texture(
-                        prim_info.clip_task_index,
-                        render_tasks,
-                    ).unwrap();
-
                     // use temporary block storage since we don't know the number of visible tiles beforehand
-                    let mut gpu_blocks = Vec::<GpuBlockData>::with_capacity(3 + max_tiles_per_header * 2);
+                    let mut gpu_blocks = Vec::<GpuBlockData>::new();
                     for chunk in image_instance.visible_tiles.chunks(max_tiles_per_header) {
                         gpu_blocks.clear();
                         gpu_blocks.push(PremultipliedColorF::WHITE.into()); //color
@@ -2547,42 +2254,37 @@ impl BatchBuilder {
                         let prim_header_index = prim_headers.push(&prim_header, z_id, prim_user_data);
 
                         for (i, tile) in chunk.iter().enumerate() {
-                            let (uv_rect_address, texture) = match render_tasks.resolve_location(tile.src_color, gpu_cache) {
-                                Some(result) => result,
-                                None => {
-                                    return;
-                                }
-                            };
-
-                            let textures = BatchTextures::prim_textured(
-                                texture,
-                                clip_mask_texture_id,
-                            );
-
-                            let batch_key = BatchKey {
-                                blend_mode: specified_blend_mode,
-                                kind: BatchKind::Brush(BrushBatchKind::Image(texture.image_buffer_kind())),
-                                textures,
-                            };
-
-                            self.add_brush_instance_to_batches(
-                                batch_key,
-                                batch_features,
-                                bounding_rect,
-                                z_id,
-                                i as i32,
-                                tile.edge_flags,
-                                clip_task_address,
-                                BrushFlags::SEGMENT_RELATIVE | BrushFlags::PERSPECTIVE_INTERPOLATION,
-                                prim_header_index,
-                                uv_rect_address.as_int(),
-                                &batch_filter,
-                            );
+                            if let Some((batch_kind, textures, uv_rect_address)) = get_image_tile_params(
+                                ctx.resource_cache,
+                                gpu_cache,
+                                deferred_resolves,
+                                request.with_tile(tile.tile_offset),
+                            ) {
+                                let batch_key = BatchKey {
+                                    blend_mode: specified_blend_mode,
+                                    kind: BatchKind::Brush(batch_kind),
+                                    textures,
+                                };
+                                self.add_brush_instance_to_batches(
+                                    batch_key,
+                                    batch_features,
+                                    bounding_rect,
+                                    z_id,
+                                    i as i32,
+                                    tile.edge_flags,
+                                    clip_task_address.unwrap(),
+                                    BrushFlags::SEGMENT_RELATIVE | BrushFlags::PERSPECTIVE_INTERPOLATION,
+                                    prim_header_index,
+                                    uv_rect_address.as_int(),
+                                    prim_vis_mask,
+                                );
+                            }
                         }
                     }
                 }
             }
-            PrimitiveInstanceKind::LinearGradient { data_handle, ref visible_tiles_range, .. } => {
+            PrimitiveInstanceKind::LinearGradient { data_handle, gradient_index, .. } => {
+                let gradient = &ctx.prim_store.linear_gradients[gradient_index];
                 let prim_data = &ctx.data_stores.linear_grad[data_handle];
                 let specified_blend_mode = BlendMode::PremultipliedAlpha;
 
@@ -2602,135 +2304,83 @@ impl BatchBuilder {
                     BlendMode::None
                 };
 
-                let user_data = [prim_data.stops_handle.as_int(gpu_cache), 0, 0, 0];
-
-                if visible_tiles_range.is_empty() {
-                    let batch_params = BrushBatchParameters::shared(
-                        BrushBatchKind::LinearGradient,
-                        TextureSet::UNTEXTURED,
-                        user_data,
-                        0,
-                    );
-
-                    prim_header.specific_prim_address = gpu_cache.get_address(&prim_data.gpu_cache_handle);
+                if !gradient.cache_segments.is_empty() {
 
-                    let prim_header_index = prim_headers.push(&prim_header, z_id, user_data);
+                    for segment in &gradient.cache_segments {
+                        let ref cache_handle = segment.handle;
+                        let rt_cache_entry = ctx.resource_cache
+                            .get_cached_render_task(cache_handle);
+                        let cache_item = ctx.resource_cache
+                            .get_texture_cache_item(&rt_cache_entry.handle);
 
-                    let segments = if prim_data.brush_segments.is_empty() {
-                        None
-                    } else {
-                        Some(prim_data.brush_segments.as_slice())
-                    };
+                        if cache_item.texture_id == TextureSource::Invalid {
+                            return;
+                        }
 
-                    self.add_segmented_prim_to_batch(
-                        segments,
-                        prim_data.opacity,
-                        &batch_params,
-                        specified_blend_mode,
-                        non_segmented_blend_mode,
-                        batch_features,
-                        prim_header_index,
-                        bounding_rect,
-                        transform_kind,
-                        z_id,
-                        prim_info.clip_task_index,
-                        &batch_filter,
-                        ctx,
-                        render_tasks,
-                    );
-                } else {
-                    let visible_tiles = &ctx.scratch.gradient_tiles[*visible_tiles_range];
+                        let textures = BatchTextures::color(cache_item.texture_id);
+                        let batch_kind = BrushBatchKind::Image(get_buffer_kind(cache_item.texture_id));
+                        let prim_user_data = ImageBrushData {
+                            color_mode: ShaderColorMode::Image,
+                            alpha_type: AlphaType::PremultipliedAlpha,
+                            raster_space: RasterizationSpace::Local,
+                            opacity: 1.0,
+                        }.encode();
+
+                        let specific_resource_address = cache_item.uv_rect_handle.as_int(gpu_cache);
+                        prim_header.specific_prim_address = gpu_cache.get_address(&ctx.globals.default_image_handle);
+
+                        let segment_local_clip_rect = match prim_header.local_clip_rect.intersection(&segment.local_rect) {
+                            Some(rect) => rect,
+                            None => { continue; }
+                        };
 
-                    let (clip_task_address, clip_mask_texture_id) = ctx.get_prim_clip_task_and_texture(
-                        prim_info.clip_task_index,
-                        render_tasks,
-                    ).unwrap();
+                        let segment_prim_header = PrimitiveHeader {
+                            local_rect: segment.local_rect,
+                            local_clip_rect: segment_local_clip_rect,
+                            specific_prim_address: prim_header.specific_prim_address,
+                            transform_id: prim_header.transform_id,
+                        };
 
-                    let key = BatchKey {
-                        blend_mode: specified_blend_mode,
-                        kind: BatchKind::Brush(BrushBatchKind::LinearGradient),
-                        textures: BatchTextures::prim_untextured(clip_mask_texture_id),
-                    };
+                        let prim_header_index = prim_headers.push(
+                            &segment_prim_header,
+                            z_id,
+                            prim_user_data,
+                        );
 
-                    for tile in visible_tiles {
-                        let tile_prim_header = PrimitiveHeader {
-                            specific_prim_address: gpu_cache.get_address(&tile.handle),
-                            local_rect: tile.local_rect,
-                            local_clip_rect: tile.local_clip_rect,
-                            ..prim_header
+                        let batch_key = BatchKey {
+                            blend_mode: non_segmented_blend_mode,
+                            kind: BatchKind::Brush(batch_kind),
+                            textures,
                         };
-                        let prim_header_index = prim_headers.push(&tile_prim_header, z_id, user_data);
 
                         self.add_brush_instance_to_batches(
-                            key,
+                            batch_key,
                             batch_features,
                             bounding_rect,
                             z_id,
                             INVALID_SEGMENT_INDEX,
                             EdgeAaSegmentMask::all(),
-                            clip_task_address,
+                            clip_task_address.unwrap(),
                             BrushFlags::PERSPECTIVE_INTERPOLATION,
                             prim_header_index,
-                            0,
-                            &batch_filter,
+                            specific_resource_address,
+                            prim_vis_mask,
                         );
                     }
-                }
-            }
-            PrimitiveInstanceKind::CachedLinearGradient { data_handle, ref visible_tiles_range, .. } => {
-                let prim_data = &ctx.data_stores.linear_grad[data_handle];
-                let common_data = &prim_data.common;
-                let specified_blend_mode = BlendMode::PremultipliedAlpha;
-
-                let src_color = render_tasks.resolve_location(prim_data.src_color, gpu_cache);
-
-                let (uv_rect_address, texture_source) = match src_color {
-                    Some(src) => src,
-                    None => {
-                        return;
-                    }
-                };
-
-                let textures = TextureSet::prim_textured(texture_source);
-
-                let prim_header = PrimitiveHeader {
-                    local_rect: prim_rect,
-                    local_clip_rect: prim_info.combined_local_clip_rect,
-                    specific_prim_address: gpu_cache.get_address(&common_data.gpu_cache_handle),
-                    transform_id,
-                };
-
-                let prim_user_data = ImageBrushData {
-                    color_mode: ShaderColorMode::Image,
-                    alpha_type: AlphaType::PremultipliedAlpha,
-                    raster_space: RasterizationSpace::Local,
-                    opacity: 1.0,
-                }.encode();
-
-                let non_segmented_blend_mode = if !common_data.opacity.is_opaque ||
-                    prim_info.clip_task_index != ClipTaskIndex::INVALID ||
-                    transform_kind == TransformedRectKind::Complex
-                {
-                    specified_blend_mode
-                } else {
-                    BlendMode::None
-                };
-
-                let batch_kind = BrushBatchKind::Image(texture_source.image_buffer_kind());
-
-                if visible_tiles_range.is_empty() {
+                } else if gradient.visible_tiles_range.is_empty() {
                     let batch_params = BrushBatchParameters::shared(
-                        batch_kind,
-                        textures,
-                        prim_user_data,
-                        uv_rect_address.as_int(),
+                        BrushBatchKind::LinearGradient,
+                        BatchTextures::no_texture(),
+                        [
+                            prim_data.stops_handle.as_int(gpu_cache),
+                            0,
+                            0,
+                            0,
+                        ],
+                        0,
                     );
 
-                    let segments = if prim_data.brush_segments.is_empty() {
-                        None
-                    } else {
-                        Some(&prim_data.brush_segments[..])
-                    };
+                    prim_header.specific_prim_address = gpu_cache.get_address(&prim_data.gpu_cache_handle);
 
                     let prim_header_index = prim_headers.push(
                         &prim_header,
@@ -2738,9 +2388,15 @@ impl BatchBuilder {
                         batch_params.prim_user_data,
                     );
 
+                    let segments = if prim_data.brush_segments.is_empty() {
+                        None
+                    } else {
+                        Some(prim_data.brush_segments.as_slice())
+                    };
+
                     self.add_segmented_prim_to_batch(
                         segments,
-                        common_data.opacity,
+                        prim_data.opacity,
                         &batch_params,
                         specified_blend_mode,
                         non_segmented_blend_mode,
@@ -2748,108 +2404,64 @@ impl BatchBuilder {
                         prim_header_index,
                         bounding_rect,
                         transform_kind,
+                        render_tasks,
                         z_id,
                         prim_info.clip_task_index,
-                        &batch_filter,
+                        prim_vis_mask,
                         ctx,
-                        render_tasks,
                     );
                 } else {
-                    let visible_tiles = &ctx.scratch.gradient_tiles[*visible_tiles_range];
-
-                    let (clip_task_address, clip_mask) = ctx.get_prim_clip_task_and_texture(
-                        prim_info.clip_task_index,
-                        render_tasks,
-                    ).unwrap();
-
-                    let batch_key = BatchKey {
-                        blend_mode: non_segmented_blend_mode,
-                        kind: BatchKind::Brush(batch_kind),
-                        textures: BatchTextures {
-                            input: textures,
-                            clip_mask,
-                        },
-                    };
-
-                    for tile in visible_tiles {
-                        let tile_prim_header = PrimitiveHeader {
-                            local_rect: tile.local_rect,
-                            local_clip_rect: tile.local_clip_rect,
-                            ..prim_header
-                        };
-                        let prim_header_index = prim_headers.push(&tile_prim_header, z_id, prim_user_data);
+                    let visible_tiles = &ctx.scratch.gradient_tiles[gradient.visible_tiles_range];
 
-                        self.add_brush_instance_to_batches(
-                            batch_key,
-                            batch_features,
-                            bounding_rect,
-                            z_id,
-                            INVALID_SEGMENT_INDEX,
-                            EdgeAaSegmentMask::all(),
-                            clip_task_address,
-                            BrushFlags::PERSPECTIVE_INTERPOLATION,
-                            prim_header_index,
-                            uv_rect_address.as_int(),
-                            &batch_filter,
-                        );
-                    }
+                    self.add_gradient_tiles(
+                        visible_tiles,
+                        &prim_data.stops_handle,
+                        BrushBatchKind::LinearGradient,
+                        specified_blend_mode,
+                        bounding_rect,
+                        clip_task_address.unwrap(),
+                        gpu_cache,
+                        &prim_header,
+                        prim_headers,
+                        z_id,
+                        prim_vis_mask,
+                    );
                 }
             }
             PrimitiveInstanceKind::RadialGradient { data_handle, ref visible_tiles_range, .. } => {
                 let prim_data = &ctx.data_stores.radial_grad[data_handle];
-                let common_data = &prim_data.common;
                 let specified_blend_mode = BlendMode::PremultipliedAlpha;
 
-                let src_color = render_tasks.resolve_location(prim_data.src_color, gpu_cache);
-
-                let (uv_rect_address, texture_source) = match src_color {
-                    Some(src) => src,
-                    None => {
-                        return;
-                    }
-                };
-
-                let textures = TextureSet::prim_textured(texture_source);
-
-                let prim_header = PrimitiveHeader {
+                let mut prim_header = PrimitiveHeader {
                     local_rect: prim_rect,
                     local_clip_rect: prim_info.combined_local_clip_rect,
-                    specific_prim_address: gpu_cache.get_address(&common_data.gpu_cache_handle),
+                    specific_prim_address: GpuCacheAddress::INVALID,
                     transform_id,
                 };
 
-                let prim_user_data = ImageBrushData {
-                    color_mode: ShaderColorMode::Image,
-                    alpha_type: AlphaType::PremultipliedAlpha,
-                    raster_space: RasterizationSpace::Local,
-                    opacity: 1.0,
-                }.encode();
-
-
-                let non_segmented_blend_mode = if !common_data.opacity.is_opaque ||
-                    prim_info.clip_task_index != ClipTaskIndex::INVALID ||
-                    transform_kind == TransformedRectKind::Complex
-                {
-                    specified_blend_mode
-                } else {
-                    BlendMode::None
-                };
-
-                let batch_kind = BrushBatchKind::Image(texture_source.image_buffer_kind());
-
                 if visible_tiles_range.is_empty() {
+                    let non_segmented_blend_mode = if !prim_data.opacity.is_opaque ||
+                        prim_info.clip_task_index != ClipTaskIndex::INVALID ||
+                        transform_kind == TransformedRectKind::Complex
+                    {
+                        specified_blend_mode
+                    } else {
+                        BlendMode::None
+                    };
+
                     let batch_params = BrushBatchParameters::shared(
-                        batch_kind,
-                        textures,
-                        prim_user_data,
-                        uv_rect_address.as_int(),
+                        BrushBatchKind::RadialGradient,
+                        BatchTextures::no_texture(),
+                        [
+                            prim_data.stops_handle.as_int(gpu_cache),
+                            0,
+                            0,
+                            0,
+                        ],
+                        0,
                     );
 
-                    let segments = if prim_data.brush_segments.is_empty() {
-                        None
-                    } else {
-                        Some(&prim_data.brush_segments[..])
-                    };
+                    prim_header.specific_prim_address = gpu_cache.get_address(&prim_data.gpu_cache_handle);
 
                     let prim_header_index = prim_headers.push(
                         &prim_header,
@@ -2857,9 +2469,15 @@ impl BatchBuilder {
                         batch_params.prim_user_data,
                     );
 
+                    let segments = if prim_data.brush_segments.is_empty() {
+                        None
+                    } else {
+                        Some(prim_data.brush_segments.as_slice())
+                    };
+
                     self.add_segmented_prim_to_batch(
                         segments,
-                        common_data.opacity,
+                        prim_data.opacity,
                         &batch_params,
                         specified_blend_mode,
                         non_segmented_blend_mode,
@@ -2867,109 +2485,64 @@ impl BatchBuilder {
                         prim_header_index,
                         bounding_rect,
                         transform_kind,
+                        render_tasks,
                         z_id,
                         prim_info.clip_task_index,
-                        &batch_filter,
+                        prim_vis_mask,
                         ctx,
-                        render_tasks,
                     );
                 } else {
                     let visible_tiles = &ctx.scratch.gradient_tiles[*visible_tiles_range];
 
-                    let (clip_task_address, clip_mask) = ctx.get_prim_clip_task_and_texture(
-                        prim_info.clip_task_index,
-                        render_tasks,
-                    ).unwrap();
-
-                    let batch_key = BatchKey {
-                        blend_mode: non_segmented_blend_mode,
-                        kind: BatchKind::Brush(batch_kind),
-                        textures: BatchTextures {
-                            input: textures,
-                            clip_mask,
-                        },
-                    };
-
-                    for tile in visible_tiles {
-                        let tile_prim_header = PrimitiveHeader {
-                            local_rect: tile.local_rect,
-                            local_clip_rect: tile.local_clip_rect,
-                            ..prim_header
-                        };
-                        let prim_header_index = prim_headers.push(&tile_prim_header, z_id, prim_user_data);
-
-                        self.add_brush_instance_to_batches(
-                            batch_key,
-                            batch_features,
-                            bounding_rect,
-                            z_id,
-                            INVALID_SEGMENT_INDEX,
-                            EdgeAaSegmentMask::all(),
-                            clip_task_address,
-                            BrushFlags::PERSPECTIVE_INTERPOLATION,
-                            prim_header_index,
-                            uv_rect_address.as_int(),
-                            &batch_filter,
-                        );
-                    }
+                    self.add_gradient_tiles(
+                        visible_tiles,
+                        &prim_data.stops_handle,
+                        BrushBatchKind::RadialGradient,
+                        specified_blend_mode,
+                        bounding_rect,
+                        clip_task_address.unwrap(),
+                        gpu_cache,
+                        &prim_header,
+                        prim_headers,
+                        z_id,
+                        prim_vis_mask,
+                    );
                 }
-
             }
             PrimitiveInstanceKind::ConicGradient { data_handle, ref visible_tiles_range, .. } => {
                 let prim_data = &ctx.data_stores.conic_grad[data_handle];
-                let common_data = &prim_data.common;
                 let specified_blend_mode = BlendMode::PremultipliedAlpha;
 
-                let src_color = render_tasks.resolve_location(prim_data.src_color, gpu_cache);
-
-                let (uv_rect_address, texture_source) = match src_color {
-                    Some(src) => src,
-                    None => {
-                        return;
-                    }
-                };
-
-                let textures = TextureSet::prim_textured(texture_source);
-
-                let prim_header = PrimitiveHeader {
+                let mut prim_header = PrimitiveHeader {
                     local_rect: prim_rect,
                     local_clip_rect: prim_info.combined_local_clip_rect,
-                    specific_prim_address: gpu_cache.get_address(&common_data.gpu_cache_handle),
+                    specific_prim_address: GpuCacheAddress::INVALID,
                     transform_id,
                 };
 
-                let prim_user_data = ImageBrushData {
-                    color_mode: ShaderColorMode::Image,
-                    alpha_type: AlphaType::PremultipliedAlpha,
-                    raster_space: RasterizationSpace::Local,
-                    opacity: 1.0,
-                }.encode();
-
-
-                let non_segmented_blend_mode = if !common_data.opacity.is_opaque ||
-                    prim_info.clip_task_index != ClipTaskIndex::INVALID ||
-                    transform_kind == TransformedRectKind::Complex
-                {
-                    specified_blend_mode
-                } else {
-                    BlendMode::None
-                };
-
-                let batch_kind = BrushBatchKind::Image(texture_source.image_buffer_kind());
-
                 if visible_tiles_range.is_empty() {
+                    let non_segmented_blend_mode = if !prim_data.opacity.is_opaque ||
+                        prim_info.clip_task_index != ClipTaskIndex::INVALID ||
+                        transform_kind == TransformedRectKind::Complex
+                    {
+                        specified_blend_mode
+                    } else {
+                        BlendMode::None
+                    };
+
                     let batch_params = BrushBatchParameters::shared(
-                        batch_kind,
-                        textures,
-                        prim_user_data,
-                        uv_rect_address.as_int(),
+                        BrushBatchKind::ConicGradient,
+                        BatchTextures::no_texture(),
+                        [
+                            prim_data.stops_handle.as_int(gpu_cache),
+                            0,
+                            0,
+                            0,
+                        ],
+                        0,
                     );
 
-                    let segments = if prim_data.brush_segments.is_empty() {
-                        None
-                    } else {
-                        Some(&prim_data.brush_segments[..])
-                    };
+                    prim_header.specific_prim_address = gpu_cache.get_address(&prim_data.gpu_cache_handle);
 
                     let prim_header_index = prim_headers.push(
                         &prim_header,
@@ -2977,9 +2550,15 @@ impl BatchBuilder {
                         batch_params.prim_user_data,
                     );
 
+                    let segments = if prim_data.brush_segments.is_empty() {
+                        None
+                    } else {
+                        Some(prim_data.brush_segments.as_slice())
+                    };
+
                     self.add_segmented_prim_to_batch(
                         segments,
-                        common_data.opacity,
+                        prim_data.opacity,
                         &batch_params,
                         specified_blend_mode,
                         non_segmented_blend_mode,
@@ -2987,70 +2566,52 @@ impl BatchBuilder {
                         prim_header_index,
                         bounding_rect,
                         transform_kind,
+                        render_tasks,
                         z_id,
                         prim_info.clip_task_index,
-                        &batch_filter,
+                        prim_vis_mask,
                         ctx,
-                        render_tasks,
                     );
                 } else {
                     let visible_tiles = &ctx.scratch.gradient_tiles[*visible_tiles_range];
 
-                    let (clip_task_address, clip_mask) = ctx.get_prim_clip_task_and_texture(
-                        prim_info.clip_task_index,
-                        render_tasks,
-                    ).unwrap();
-
-                    let batch_key = BatchKey {
-                        blend_mode: non_segmented_blend_mode,
-                        kind: BatchKind::Brush(batch_kind),
-                        textures: BatchTextures {
-                            input: textures,
-                            clip_mask,
-                        },
-                    };
-
-                    for tile in visible_tiles {
-                        let tile_prim_header = PrimitiveHeader {
-                            local_rect: tile.local_rect,
-                            local_clip_rect: tile.local_clip_rect,
-                            ..prim_header
-                        };
-                        let prim_header_index = prim_headers.push(&tile_prim_header, z_id, prim_user_data);
-
-                        self.add_brush_instance_to_batches(
-                            batch_key,
-                            batch_features,
-                            bounding_rect,
-                            z_id,
-                            INVALID_SEGMENT_INDEX,
-                            EdgeAaSegmentMask::all(),
-                            clip_task_address,
-                            BrushFlags::PERSPECTIVE_INTERPOLATION,
-                            prim_header_index,
-                            uv_rect_address.as_int(),
-                            &batch_filter,
-                        );
-                    }
+                    self.add_gradient_tiles(
+                        visible_tiles,
+                        &prim_data.stops_handle,
+                        BrushBatchKind::ConicGradient,
+                        specified_blend_mode,
+                        bounding_rect,
+                        clip_task_address.unwrap(),
+                        gpu_cache,
+                        &prim_header,
+                        prim_headers,
+                        z_id,
+                        prim_vis_mask,
+                    );
                 }
             }
             PrimitiveInstanceKind::Backdrop { data_handle } => {
                 let prim_data = &ctx.data_stores.backdrop[data_handle];
                 let backdrop_pic_index = prim_data.kind.pic_index;
-
-                let backdrop_task_id = ctx.prim_store
-                    .pictures[backdrop_pic_index.0]
-                    .primary_render_task_id
-                    .expect("backdrop surface should be resolved by now");
-
-                let (backdrop_uv_rect_address, texture) = render_tasks.resolve_location(
-                    backdrop_task_id,
-                    gpu_cache,
-                ).unwrap();
-                let textures = BatchTextures::prim_textured(texture, TextureSource::Invalid);
-
+                let backdrop_surface_index = ctx.prim_store.pictures[backdrop_pic_index.0]
+                    .raster_config
+                    .as_ref()
+                    .expect("backdrop surface should be alloc by now")
+                    .surface_index;
+
+                let backdrop_task_id = ctx.surfaces[backdrop_surface_index.0]
+                    .render_tasks
+                    .as_ref()
+                    .expect("backdrop task not available")
+                    .root;
+
+                let backdrop_uv_rect_address = render_tasks[backdrop_task_id]
+                    .get_texture_address(gpu_cache)
+                    .as_int();
+
+                let textures = BatchTextures::render_target_cache();
                 let batch_key = BatchKey::new(
-                    BatchKind::Brush(BrushBatchKind::Image(ImageBufferKind::Texture2D)),
+                    BatchKind::Brush(BrushBatchKind::Image(ImageBufferKind::Texture2DArray)),
                     BlendMode::PremultipliedAlpha,
                     textures,
                 );
@@ -3085,8 +2646,8 @@ impl BatchBuilder {
                     OPAQUE_TASK_ADDRESS,
                     BrushFlags::empty(),
                     prim_header_index,
-                    backdrop_uv_rect_address.as_int(),
-                    &batch_filter,
+                    backdrop_uv_rect_address,
+                    prim_vis_mask,
                 );
             }
         }
@@ -3104,53 +2665,51 @@ impl BatchBuilder {
         features: BatchFeatures,
         bounding_rect: &PictureRect,
         transform_kind: TransformedRectKind,
+        render_tasks: &RenderTaskGraph,
         z_id: ZBufferId,
         prim_opacity: PrimitiveOpacity,
         clip_task_index: ClipTaskIndex,
-        batch_filter: &BatchFilter,
+        prim_vis_mask: PrimitiveVisibilityMask,
         ctx: &RenderTargetContext,
-        render_tasks: &RenderTaskGraph,
     ) {
         debug_assert!(clip_task_index != ClipTaskIndex::INVALID);
 
         // Get GPU address of clip task for this segment, or None if
         // the entire segment is clipped out.
-        if let Some((clip_task_address, clip_mask)) = ctx.get_clip_task_and_texture(
+        let clip_task_address = match ctx.get_clip_task_address(
             clip_task_index,
             segment_index,
             render_tasks,
         ) {
-            // If a got a valid (or OPAQUE) clip task address, add the segment.
-            let is_inner = segment.edge_flags.is_empty();
-            let needs_blending = !prim_opacity.is_opaque ||
-                                 clip_task_address != OPAQUE_TASK_ADDRESS ||
-                                 (!is_inner && transform_kind == TransformedRectKind::Complex);
-
-            let textures = BatchTextures {
-                input: segment_data.textures,
-                clip_mask,
-            };
+            Some(clip_task_address) => clip_task_address,
+            None => return,
+        };
 
-            let batch_key = BatchKey {
-                blend_mode: if needs_blending { alpha_blend_mode } else { BlendMode::None },
-                kind: BatchKind::Brush(batch_kind),
-                textures,
-            };
+        // If a got a valid (or OPAQUE) clip task address, add the segment.
+        let is_inner = segment.edge_flags.is_empty();
+        let needs_blending = !prim_opacity.is_opaque ||
+                             clip_task_address != OPAQUE_TASK_ADDRESS ||
+                             (!is_inner && transform_kind == TransformedRectKind::Complex);
 
-            self.add_brush_instance_to_batches(
-                batch_key,
-                features,
-                bounding_rect,
-                z_id,
-                segment_index,
-                segment.edge_flags,
-                clip_task_address,
-                BrushFlags::PERSPECTIVE_INTERPOLATION | segment.brush_flags,
-                prim_header_index,
-                segment_data.specific_resource_address,
-                batch_filter,
-            );
-        }
+        let batch_key = BatchKey {
+            blend_mode: if needs_blending { alpha_blend_mode } else { BlendMode::None },
+            kind: BatchKind::Brush(batch_kind),
+            textures: segment_data.textures,
+        };
+
+        self.add_brush_instance_to_batches(
+            batch_key,
+            features,
+            bounding_rect,
+            z_id,
+            segment_index,
+            segment.edge_flags,
+            clip_task_address,
+            BrushFlags::PERSPECTIVE_INTERPOLATION | segment.brush_flags,
+            prim_header_index,
+            segment_data.specific_resource_address,
+            prim_vis_mask,
+        );
     }
 
     /// Add any segment(s) from a brush to batches.
@@ -3165,11 +2724,11 @@ impl BatchBuilder {
         prim_header_index: PrimitiveHeaderIndex,
         bounding_rect: &PictureRect,
         transform_kind: TransformedRectKind,
+        render_tasks: &RenderTaskGraph,
         z_id: ZBufferId,
         clip_task_index: ClipTaskIndex,
-        batch_filter: &BatchFilter,
+        prim_vis_mask: PrimitiveVisibilityMask,
         ctx: &RenderTargetContext,
-        render_tasks: &RenderTaskGraph,
     ) {
         match (brush_segments, &params.segment_data) {
             (Some(ref brush_segments), SegmentDataKind::Instanced(ref segment_data)) => {
@@ -3191,12 +2750,12 @@ impl BatchBuilder {
                         features,
                         bounding_rect,
                         transform_kind,
+                        render_tasks,
                         z_id,
                         prim_opacity,
                         clip_task_index,
-                        batch_filter,
+                        prim_vis_mask,
                         ctx,
-                        render_tasks,
                     );
                 }
             }
@@ -3217,35 +2776,27 @@ impl BatchBuilder {
                         features,
                         bounding_rect,
                         transform_kind,
+                        render_tasks,
                         z_id,
                         prim_opacity,
                         clip_task_index,
-                        batch_filter,
+                        prim_vis_mask,
                         ctx,
-                        render_tasks,
                     );
                 }
             }
             (None, SegmentDataKind::Shared(ref segment_data)) => {
                 // No segments, and thus no per-segment instance data.
                 // Note: the blend mode already takes opacity into account
-
-                let (clip_task_address, clip_mask) = ctx.get_prim_clip_task_and_texture(
-                    clip_task_index,
-                    render_tasks,
-                ).unwrap();
-
-                let textures = BatchTextures {
-                    input: segment_data.textures,
-                    clip_mask,
-                };
-
                 let batch_key = BatchKey {
                     blend_mode: non_segmented_blend_mode,
                     kind: BatchKind::Brush(params.batch_kind),
-                    textures,
+                    textures: segment_data.textures,
                 };
-
+                let clip_task_address = ctx.get_prim_clip_task_address(
+                    clip_task_index,
+                    render_tasks,
+                ).unwrap();
                 self.add_brush_instance_to_batches(
                     batch_key,
                     features,
@@ -3257,7 +2808,7 @@ impl BatchBuilder {
                     BrushFlags::PERSPECTIVE_INTERPOLATION,
                     prim_header_index,
                     segment_data.specific_resource_address,
-                    batch_filter,
+                    prim_vis_mask,
                 );
             }
             (None, SegmentDataKind::Instanced(..)) => {
@@ -3267,6 +2818,79 @@ impl BatchBuilder {
             }
         }
     }
+
+    fn add_gradient_tiles(
+        &mut self,
+        visible_tiles: &[VisibleGradientTile],
+        stops_handle: &GpuCacheHandle,
+        kind: BrushBatchKind,
+        blend_mode: BlendMode,
+        bounding_rect: &PictureRect,
+        clip_task_address: RenderTaskAddress,
+        gpu_cache: &GpuCache,
+        base_prim_header: &PrimitiveHeader,
+        prim_headers: &mut PrimitiveHeaders,
+        z_id: ZBufferId,
+        prim_vis_mask: PrimitiveVisibilityMask,
+    ) {
+        let key = BatchKey {
+            blend_mode,
+            kind: BatchKind::Brush(kind),
+            textures: BatchTextures::no_texture(),
+        };
+
+        let user_data = [stops_handle.as_int(gpu_cache), 0, 0, 0];
+
+        for tile in visible_tiles {
+            let prim_header = PrimitiveHeader {
+                specific_prim_address: gpu_cache.get_address(&tile.handle),
+                local_rect: tile.local_rect,
+                local_clip_rect: tile.local_clip_rect,
+                ..*base_prim_header
+            };
+            let prim_header_index = prim_headers.push(&prim_header, z_id, user_data);
+
+            self.add_brush_instance_to_batches(
+                key,
+                BatchFeatures::empty(),
+                bounding_rect,
+                z_id,
+                INVALID_SEGMENT_INDEX,
+                EdgeAaSegmentMask::all(),
+                clip_task_address,
+                BrushFlags::PERSPECTIVE_INTERPOLATION,
+                prim_header_index,
+                0,
+                prim_vis_mask,
+            );
+        }
+    }
+}
+
+fn get_image_tile_params(
+    resource_cache: &ResourceCache,
+    gpu_cache: &mut GpuCache,
+    deferred_resolves: &mut Vec<DeferredResolve>,
+    request: ImageRequest,
+) -> Option<(BrushBatchKind, BatchTextures, GpuCacheAddress)> {
+
+    let cache_item = resolve_image(
+        request,
+        resource_cache,
+        gpu_cache,
+        deferred_resolves,
+    );
+
+    if cache_item.texture_id == TextureSource::Invalid {
+        None
+    } else {
+        let textures = BatchTextures::color(cache_item.texture_id);
+        Some((
+            BrushBatchKind::Image(get_buffer_kind(cache_item.texture_id)),
+            textures,
+            gpu_cache.get_address(&cache_item.uv_rect_handle),
+        ))
+    }
 }
 
 /// Either a single texture / user data for all segments,
@@ -3303,7 +2927,7 @@ impl BrushBatchParameters {
     /// across all segments.
     fn shared(
         batch_kind: BrushBatchKind,
-        textures: TextureSet,
+        textures: BatchTextures,
         prim_user_data: [i32; 4],
         specific_resource_address: i32,
     ) -> Self {
@@ -3320,17 +2944,80 @@ impl BrushBatchParameters {
     }
 }
 
+impl RenderTaskGraph {
+    fn resolve_surface(
+        &self,
+        task_id: RenderTaskId,
+        gpu_cache: &GpuCache,
+    ) -> (GpuCacheAddress, BatchTextures) {
+        (
+            self[task_id].get_texture_address(gpu_cache),
+            BatchTextures::render_target_cache(),
+        )
+    }
+}
+
+pub fn resolve_image(
+    request: ImageRequest,
+    resource_cache: &ResourceCache,
+    gpu_cache: &mut GpuCache,
+    deferred_resolves: &mut Vec<DeferredResolve>,
+) -> CacheItem {
+    match resource_cache.get_image_properties(request.key) {
+        Some(image_properties) => {
+            // Check if an external image that needs to be resolved
+            // by the render thread.
+            match image_properties.external_image {
+                Some(external_image) => {
+                    // This is an external texture - we will add it to
+                    // the deferred resolves list to be patched by
+                    // the render thread...
+                    let cache_handle = gpu_cache.push_deferred_per_frame_blocks(BLOCKS_PER_UV_RECT);
+                    let cache_item = CacheItem {
+                        texture_id: TextureSource::External(external_image),
+                        uv_rect_handle: cache_handle,
+                        uv_rect: DeviceIntRect::new(
+                            DeviceIntPoint::zero(),
+                            image_properties.descriptor.size,
+                        ),
+                        texture_layer: 0,
+                    };
+
+                    deferred_resolves.push(DeferredResolve {
+                        image_properties,
+                        address: gpu_cache.get_address(&cache_handle),
+                        rendering: request.rendering,
+                    });
+
+                    cache_item
+                }
+                None => {
+                    if let Ok(cache_item) = resource_cache.get_cached_image(request) {
+                        cache_item
+                    } else {
+                        // There is no usable texture entry for the image key. Just return an invalid texture here.
+                        CacheItem::invalid()
+                    }
+                }
+            }
+        }
+        None => {
+            CacheItem::invalid()
+        }
+    }
+}
+
 /// A list of clip instances to be drawn into a target.
 #[derive(Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct ClipBatchList {
     /// Rectangle draws fill up the rectangles with rounded corners.
-    pub slow_rectangles: Vec<ClipMaskInstanceRect>,
-    pub fast_rectangles: Vec<ClipMaskInstanceRect>,
+    pub slow_rectangles: Vec<ClipMaskInstance>,
+    pub fast_rectangles: Vec<ClipMaskInstance>,
     /// Image draws apply the image masking.
-    pub images: FastHashMap<(TextureSource, Option<DeviceIntRect>), Vec<ClipMaskInstanceImage>>,
-    pub box_shadows: FastHashMap<TextureSource, Vec<ClipMaskInstanceBoxShadow>>,
+    pub images: FastHashMap<TextureSource, Vec<ClipMaskInstance>>,
+    pub box_shadows: FastHashMap<TextureSource, Vec<ClipMaskInstance>>,
 }
 
 impl ClipBatchList {
@@ -3373,24 +3060,24 @@ impl ClipBatcher {
 
     pub fn add_clip_region(
         &mut self,
+        clip_data_address: GpuCacheAddress,
         local_pos: LayoutPoint,
         sub_rect: DeviceRect,
-        clip_data: ClipData,
         task_origin: DevicePoint,
         screen_origin: DevicePoint,
         device_pixel_scale: f32,
     ) {
-        let instance = ClipMaskInstanceRect {
-            common: ClipMaskInstanceCommon {
-                clip_transform_id: TransformPaletteId::IDENTITY,
-                prim_transform_id: TransformPaletteId::IDENTITY,
-                sub_rect,
-                task_origin,
-                screen_origin,
-                device_pixel_scale,
-            },
+        let instance = ClipMaskInstance {
+            clip_transform_id: TransformPaletteId::IDENTITY,
+            prim_transform_id: TransformPaletteId::IDENTITY,
+            clip_data_address,
+            resource_address: GpuCacheAddress::INVALID,
             local_pos,
-            clip_data,
+            tile_rect: LayoutRect::zero(),
+            sub_rect,
+            task_origin,
+            screen_origin,
+            device_pixel_scale,
         };
 
         self.primary_clips.slow_rectangles.push(instance);
@@ -3400,13 +3087,14 @@ impl ClipBatcher {
     /// instead of one large rectangle.
     fn add_tiled_clip_mask(
         &mut self,
-        mask_screen_rect: DeviceRect,
+        mask_screen_rect: DeviceIntRect,
         local_clip_rect: LayoutRect,
         clip_spatial_node_index: SpatialNodeIndex,
         spatial_tree: &SpatialTree,
         world_rect: &WorldRect,
-        global_device_pixel_scale: DevicePixelScale,
-        common: &ClipMaskInstanceCommon,
+        device_pixel_scale: DevicePixelScale,
+        gpu_address: GpuCacheAddress,
+        instance: &ClipMaskInstance,
         is_first_clip: bool,
     ) -> bool {
         // Only try to draw in tiles if the clip mark is big enough.
@@ -3414,7 +3102,6 @@ impl ClipBatcher {
             return false;
         }
 
-        let mask_screen_rect_size = mask_screen_rect.size.to_i32();
         let clip_spatial_node = &spatial_tree
             .spatial_nodes[clip_spatial_node_index.0 as usize];
 
@@ -3441,14 +3128,14 @@ impl ClipBatcher {
 
         // Work out how many tiles to draw this clip mask in, stretched across the
         // device rect of the primitive clip mask.
-        let world_device_rect = world_clip_rect * global_device_pixel_scale;
-        let x_tiles = (mask_screen_rect_size.width + CLIP_RECTANGLE_TILE_SIZE-1) / CLIP_RECTANGLE_TILE_SIZE;
-        let y_tiles = (mask_screen_rect_size.height + CLIP_RECTANGLE_TILE_SIZE-1) / CLIP_RECTANGLE_TILE_SIZE;
+        let world_device_rect = world_clip_rect * device_pixel_scale;
+        let x_tiles = (mask_screen_rect.size.width + CLIP_RECTANGLE_TILE_SIZE-1) / CLIP_RECTANGLE_TILE_SIZE;
+        let y_tiles = (mask_screen_rect.size.height + CLIP_RECTANGLE_TILE_SIZE-1) / CLIP_RECTANGLE_TILE_SIZE;
 
         // Because we only run this code path for axis-aligned rects (the root coord system check above),
         // and only for rectangles (not rounded etc), the world_device_rect is not conservative - we know
         // that there is no inner_rect, and the world_device_rect should be the real, axis-aligned clip rect.
-        let mask_origin = mask_screen_rect.origin.to_vector();
+        let mask_origin = mask_screen_rect.origin.to_f32().to_vector();
         let clip_list = self.get_batch_list(is_first_clip);
 
         for y in 0 .. y_tiles {
@@ -3458,8 +3145,8 @@ impl ClipBatcher {
                     y * CLIP_RECTANGLE_TILE_SIZE,
                 );
                 let p1 = DeviceIntPoint::new(
-                    (p0.x + CLIP_RECTANGLE_TILE_SIZE).min(mask_screen_rect_size.width),
-                    (p0.y + CLIP_RECTANGLE_TILE_SIZE).min(mask_screen_rect_size.height),
+                    (p0.x + CLIP_RECTANGLE_TILE_SIZE).min(mask_screen_rect.size.width),
+                    (p0.y + CLIP_RECTANGLE_TILE_SIZE).min(mask_screen_rect.size.height),
                 );
                 let normalized_sub_rect = DeviceIntRect::new(
                     p0,
@@ -3474,13 +3161,11 @@ impl ClipBatcher {
                 // these pixels would be redundant - since this clip can't possibly
                 // affect the pixels in this tile, skip them!
                 if !world_device_rect.contains_rect(&world_sub_rect) {
-                    clip_list.slow_rectangles.push(ClipMaskInstanceRect {
-                        common: ClipMaskInstanceCommon {
-                            sub_rect: normalized_sub_rect,
-                            ..*common
-                        },
+                    clip_list.slow_rectangles.push(ClipMaskInstance {
+                        clip_data_address: gpu_address,
+                        sub_rect: normalized_sub_rect,
                         local_pos: local_clip_rect.origin,
-                        clip_data: ClipData::uniform(local_clip_rect.size, 0.0, ClipMode::Clip),
+                        ..*instance
                     });
                 }
             }
@@ -3506,22 +3191,19 @@ impl ClipBatcher {
         &mut self,
         clip_node_range: ClipNodeRange,
         root_spatial_node_index: SpatialNodeIndex,
-        render_tasks: &RenderTaskGraph,
         resource_cache: &ResourceCache,
         gpu_cache: &GpuCache,
         clip_store: &ClipStore,
         spatial_tree: &SpatialTree,
         transforms: &mut TransformPalette,
         clip_data_store: &ClipDataStore,
-        actual_rect: DeviceRect,
+        actual_rect: DeviceIntRect,
         world_rect: &WorldRect,
-        surface_device_pixel_scale: DevicePixelScale,
-        global_device_pixel_scale: DevicePixelScale,
+        device_pixel_scale: DevicePixelScale,
         task_origin: DevicePoint,
         screen_origin: DevicePoint,
-    ) -> bool {
+    ) {
         let mut is_first_clip = true;
-        let mut clear_to_one = false;
 
         for i in 0 .. clip_node_range.count {
             let clip_instance = clip_store.get_instance_from_range(&clip_node_range, i);
@@ -3533,37 +3215,26 @@ impl ClipBatcher {
                 spatial_tree,
             );
 
-            // For clip mask images, we need to map from the primitive's layout space to
-            // the target space, as the cs_clip_image shader needs to forward transform
-            // the local image bounds, rather than backwards transform the target bounds
-            // as in done in write_clip_tile_vertex.
-            let prim_transform_id = match clip_node.item.kind {
-                ClipItemKind::Image { .. } => {
-                    transforms.get_id(
-                        clip_instance.spatial_node_index,
-                        root_spatial_node_index,
-                        spatial_tree,
-                    )
-                }
-                _ => {
-                    transforms.get_id(
-                        root_spatial_node_index,
-                        ROOT_SPATIAL_NODE_INDEX,
-                        spatial_tree,
-                    )
-                }
-            };
+            let prim_transform_id = transforms.get_id(
+                root_spatial_node_index,
+                ROOT_SPATIAL_NODE_INDEX,
+                spatial_tree,
+            );
 
-            let common = ClipMaskInstanceCommon {
+            let instance = ClipMaskInstance {
+                clip_transform_id,
+                prim_transform_id,
+                clip_data_address: GpuCacheAddress::INVALID,
+                resource_address: GpuCacheAddress::INVALID,
+                local_pos: LayoutPoint::zero(),
+                tile_rect: LayoutRect::zero(),
                 sub_rect: DeviceRect::new(
                     DevicePoint::zero(),
-                    actual_rect.size,
+                    actual_rect.size.to_f32(),
                 ),
                 task_origin,
                 screen_origin,
-                device_pixel_scale: surface_device_pixel_scale.0,
-                clip_transform_id,
-                prim_transform_id,
+                device_pixel_scale: device_pixel_scale.0,
             };
 
             let added_clip = match clip_node.item.kind {
@@ -3574,14 +3245,10 @@ impl ClipBatcher {
                         tile: None,
                     };
 
-                    let map_local_to_world = SpaceMapper::new_with_target(
-                        ROOT_SPATIAL_NODE_INDEX,
-                        clip_instance.spatial_node_index,
-                        WorldRect::max_rect(),
-                        spatial_tree,
-                    );
+                    let clip_data_address =
+                        gpu_cache.get_address(&clip_node.gpu_cache_handle);
 
-                    let mut add_image = |request: ImageRequest, tile_rect: LayoutRect, sub_rect: DeviceRect| {
+                    let mut add_image = |request: ImageRequest, local_tile_rect: LayoutRect| {
                         let cache_item = match resource_cache.get_cached_image(request) {
                             Ok(item) => item,
                             Err(..) => {
@@ -3591,126 +3258,69 @@ impl ClipBatcher {
                             }
                         };
 
-                        // If the clip transform is axis-aligned, we can skip any need for scissoring
-                        // by clipping the local clip rect with the backwards transformed target bounds.
-                        // If it is not axis-aligned, then we pass the local clip rect through unmodified
-                        // to the shader and also set up a scissor rect for the overall target bounds to
-                        // ensure nothing is drawn outside the target. If for some reason we can't map the
-                        // rect back to local space, we also fall back to just using a scissor rectangle.
-                        let world_rect =
-                            sub_rect.translate(actual_rect.origin.to_vector()) / surface_device_pixel_scale;
-                        let (clip_transform_id, local_rect, scissor) = match map_local_to_world.unmap(&world_rect) {
-                            Some(local_rect)
-                                if clip_transform_id.transform_kind() == TransformedRectKind::AxisAligned &&
-                                   !map_local_to_world.get_transform().has_perspective_component() => {
-                                    match local_rect.intersection(&rect) {
-                                        Some(local_rect) => (clip_transform_id, local_rect, None),
-                                        None => return,
-                                    }
-                            }
-                            _ => {
-                                // If for some reason inverting the transform failed, then don't consider
-                                // the transform to be axis-aligned if it was.
-                                (clip_transform_id.override_transform_kind(TransformedRectKind::Complex),
-                                 rect,
-                                 Some(common.sub_rect
-                                    .translate(task_origin.to_vector())
-                                    .round_out()
-                                    .to_i32()))
-                            }
-                        };
-
                         self.get_batch_list(is_first_clip)
                             .images
-                            .entry((cache_item.texture_id, scissor))
+                            .entry(cache_item.texture_id)
                             .or_insert_with(Vec::new)
-                            .push(ClipMaskInstanceImage {
-                                common: ClipMaskInstanceCommon {
-                                    sub_rect,
-                                    clip_transform_id,
-                                    ..common
-                                },
+                            .push(ClipMaskInstance {
+                                clip_data_address,
                                 resource_address: gpu_cache.get_address(&cache_item.uv_rect_handle),
-                                tile_rect,
-                                local_rect,
+                                tile_rect: local_tile_rect,
+                                local_pos: rect.origin,
+                                ..instance
                             });
                     };
 
-                    let clip_spatial_node = &spatial_tree.spatial_nodes[clip_instance.spatial_node_index.0 as usize];
-                    let clip_is_axis_aligned = clip_spatial_node.coordinate_system_id == CoordinateSystemId::root();
-
-                    if clip_instance.has_visible_tiles() {
-                        let sub_rect_bounds = actual_rect.size.into();
-
-                        for tile in clip_store.visible_mask_tiles(&clip_instance) {
-                            let tile_sub_rect = if clip_is_axis_aligned {
-                                let tile_world_rect = map_local_to_world
-                                    .map(&tile.tile_rect)
-                                    .expect("bug: should always map as axis-aligned");
-                                let tile_device_rect = tile_world_rect * surface_device_pixel_scale;
-                                tile_device_rect
-                                    .translate(-actual_rect.origin.to_vector())
-                                    .round_out()
-                                    .intersection(&sub_rect_bounds)
-                            } else {
-                                Some(common.sub_rect)
-                            };
-
-                            if let Some(tile_sub_rect) = tile_sub_rect {
-                                assert!(sub_rect_bounds.contains_rect(&tile_sub_rect));
+                    match clip_instance.visible_tiles {
+                        Some(ref tiles) => {
+                            for tile in tiles {
                                 add_image(
                                     request.with_tile(tile.tile_offset),
                                     tile.tile_rect,
-                                    tile_sub_rect,
                                 )
                             }
                         }
-                    } else {
-                        add_image(request, rect, common.sub_rect)
+                        None => {
+                            add_image(request, rect)
+                        }
                     }
 
-                    // If this is the first clip and either there is a transform or the image rect
-                    // doesn't cover the entire task, then request a clear so that pixels outside
-                    // the image boundaries will be properly initialized.
-                    if is_first_clip &&
-                        (!clip_is_axis_aligned ||
-                         !(map_local_to_world.map(&rect).expect("bug: should always map as axis-aligned")
-                            * surface_device_pixel_scale).contains_rect(&actual_rect)) {
-                        clear_to_one = true;
-                    }
                     true
                 }
                 ClipItemKind::BoxShadow { ref source }  => {
-                    let task_id = source
-                        .render_task
+                    let gpu_address =
+                        gpu_cache.get_address(&clip_node.gpu_cache_handle);
+                    let rt_handle = source
+                        .cache_handle
+                        .as_ref()
                         .expect("bug: render task handle not allocated");
-                    let (uv_rect_address, texture) = render_tasks.resolve_location(task_id, gpu_cache).unwrap();
+                    let rt_cache_entry = resource_cache
+                        .get_cached_render_task(rt_handle);
+                    let cache_item = resource_cache
+                        .get_texture_cache_item(&rt_cache_entry.handle);
+                    debug_assert_ne!(cache_item.texture_id, TextureSource::Invalid);
 
                     self.get_batch_list(is_first_clip)
                         .box_shadows
-                        .entry(texture)
+                        .entry(cache_item.texture_id)
                         .or_insert_with(Vec::new)
-                        .push(ClipMaskInstanceBoxShadow {
-                            common,
-                            resource_address: uv_rect_address,
-                            shadow_data: BoxShadowData {
-                                src_rect_size: source.original_alloc_size,
-                                clip_mode: source.clip_mode as i32,
-                                stretch_mode_x: source.stretch_mode_x as i32,
-                                stretch_mode_y: source.stretch_mode_y as i32,
-                                dest_rect: source.prim_shadow_rect,
-                            },
+                        .push(ClipMaskInstance {
+                            clip_data_address: gpu_address,
+                            resource_address: gpu_cache.get_address(&cache_item.uv_rect_handle),
+                            ..instance
                         });
 
                     true
                 }
                 ClipItemKind::Rectangle { rect, mode: ClipMode::ClipOut } => {
+                    let gpu_address =
+                        gpu_cache.get_address(&clip_node.gpu_cache_handle);
                     self.get_batch_list(is_first_clip)
                         .slow_rectangles
-                        .push(ClipMaskInstanceRect {
-                            common,
+                        .push(ClipMaskInstance {
                             local_pos: rect.origin,
-                            clip_data: ClipData::uniform(rect.size, 0.0, ClipMode::ClipOut),
+                            clip_data_address: gpu_address,
+                            ..instance
                         });
 
                     true
@@ -3719,36 +3329,39 @@ impl ClipBatcher {
                     if clip_instance.flags.contains(ClipNodeFlags::SAME_COORD_SYSTEM) {
                         false
                     } else {
-                        if self.add_tiled_clip_mask(
+                        let gpu_address = gpu_cache.get_address(&clip_node.gpu_cache_handle);
+
+                        if !self.add_tiled_clip_mask(
                             actual_rect,
                             rect,
                             clip_instance.spatial_node_index,
                             spatial_tree,
                             world_rect,
-                            global_device_pixel_scale,
-                            &common,
+                            device_pixel_scale,
+                            gpu_address,
+                            &instance,
                             is_first_clip,
                         ) {
-                            clear_to_one |= is_first_clip;
-                        } else {
                             self.get_batch_list(is_first_clip)
                                 .slow_rectangles
-                                .push(ClipMaskInstanceRect {
-                                    common,
+                                .push(ClipMaskInstance {
+                                    clip_data_address: gpu_address,
                                     local_pos: rect.origin,
-                                    clip_data: ClipData::uniform(rect.size, 0.0, ClipMode::Clip),
+                                    ..instance
                                 });
                         }
 
                         true
                     }
                 }
-                ClipItemKind::RoundedRectangle { rect, ref radius, mode, .. } => {
+                ClipItemKind::RoundedRectangle { rect, .. } => {
+                    let gpu_address =
+                        gpu_cache.get_address(&clip_node.gpu_cache_handle);
                     let batch_list = self.get_batch_list(is_first_clip);
-                    let instance = ClipMaskInstanceRect {
-                        common,
+                    let instance = ClipMaskInstance {
+                        clip_data_address: gpu_address,
                         local_pos: rect.origin,
-                        clip_data: ClipData::rounded_rect(rect.size, radius, mode),
+                        ..instance
                     };
                     if clip_instance.flags.contains(ClipNodeFlags::USE_FAST_PATH) {
                         batch_list.fast_rectangles.push(instance);
@@ -3762,8 +3375,25 @@ impl ClipBatcher {
 
             is_first_clip &= !added_clip;
         }
+    }
+}
 
-        clear_to_one
+// TODO(gw): This should probably be a method on TextureSource
+pub fn get_buffer_kind(texture: TextureSource) -> ImageBufferKind {
+    match texture {
+        TextureSource::External(ext_image) => {
+            match ext_image.image_type {
+                ExternalImageType::TextureHandle(target) => {
+                    target.into()
+                }
+                ExternalImageType::Buffer => {
+                    // The ExternalImageType::Buffer should be handled by resource_cache.
+                    // It should go through the non-external case.
+                    panic!("Unexpected non-texture handle type");
+                }
+            }
+        }
+        _ => ImageBufferKind::Texture2DArray,
     }
 }
 
@@ -3772,39 +3402,35 @@ impl<'a, 'rc> RenderTargetContext<'a, 'rc> {
     /// Returns None if the segment was completely clipped out.
     /// Returns Some(OPAQUE_TASK_ADDRESS) if no clip mask is needed.
     /// Returns Some(task_address) if there was a valid clip mask.
-    fn get_clip_task_and_texture(
+    fn get_clip_task_address(
         &self,
         clip_task_index: ClipTaskIndex,
         offset: i32,
         render_tasks: &RenderTaskGraph,
-    ) -> Option<(RenderTaskAddress, TextureSource)> {
-        match self.scratch.clip_mask_instances[clip_task_index.0 as usize + offset as usize] {
+    ) -> Option<RenderTaskAddress> {
+        let address = match self.scratch.clip_mask_instances[clip_task_index.0 as usize + offset as usize] {
             ClipMaskKind::Mask(task_id) => {
-                Some((
-                    task_id.into(),
-                    TextureSource::TextureCache(
-                        render_tasks[task_id].get_target_texture(),
-                        Swizzle::default(),
-                    )
-                ))
+                render_tasks.get_task_address(task_id)
             }
             ClipMaskKind::None => {
-                Some((OPAQUE_TASK_ADDRESS, TextureSource::Invalid))
+                OPAQUE_TASK_ADDRESS
             }
             ClipMaskKind::Clipped => {
-                None
+                return None;
             }
-        }
+        };
+
+        Some(address)
     }
 
     /// Helper function to get the clip task address for a
     /// non-segmented primitive.
-    fn get_prim_clip_task_and_texture(
+    fn get_prim_clip_task_address(
         &self,
         clip_task_index: ClipTaskIndex,
         render_tasks: &RenderTaskGraph,
-    ) -> Option<(RenderTaskAddress, TextureSource)> {
-        self.get_clip_task_and_texture(
+    ) -> Option<RenderTaskAddress> {
+        self.get_clip_task_address(
             clip_task_index,
             0,
             render_tasks,
diff --git a/third_party/webrender/webrender/src/border.rs b/third_party/webrender/webrender/src/border.rs
index d37b5e6f3ee..3185acd42c5 100644
--- a/third_party/webrender/webrender/src/border.rs
+++ b/third_party/webrender/webrender/src/border.rs
@@ -3,7 +3,7 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{BorderRadius, BorderSide, BorderStyle, ColorF, ColorU};
-use api::{NormalBorder as ApiNormalBorder, RepeatMode};
+use api::{NormalBorder as ApiNormalBorder, RepeatMode, EdgeAaSegmentMask};
 use api::units::*;
 use crate::clip::ClipChainId;
 use crate::ellipse::Ellipse;
@@ -15,7 +15,6 @@ use crate::prim_store::{BorderSegmentInfo, BrushSegment, NinePatchDescriptor};
 use crate::prim_store::borders::{NormalBorderPrim, NormalBorderData};
 use crate::util::{lerp, RectHelpers};
 use crate::internal_types::LayoutPrimitiveInfo;
-use crate::segment::EdgeAaSegmentMask;
 
 // Using 2048 as the maximum radius in device space before which we
 // start stretching is up for debate.
@@ -35,7 +34,7 @@ pub const MAX_DASH_COUNT: u32 = 2048;
 //           all the border structs with hashable
 //           variants...
 
-#[derive(Copy, Clone, Debug, Hash, MallocSizeOf, PartialEq, Eq)]
+#[derive(Clone, Debug, Hash, MallocSizeOf, PartialEq, Eq)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct BorderRadiusAu {
diff --git a/third_party/webrender/webrender/src/box_shadow.rs b/third_party/webrender/webrender/src/box_shadow.rs
index 6c6e9135741..49d6f884ffb 100644
--- a/third_party/webrender/webrender/src/box_shadow.rs
+++ b/third_party/webrender/webrender/src/box_shadow.rs
@@ -8,8 +8,10 @@ use api::units::*;
 use crate::clip::{ClipItemKey, ClipItemKeyKind, ClipChainId};
 use crate::scene_building::SceneBuilder;
 use crate::spatial_tree::SpatialNodeIndex;
+use crate::gpu_cache::GpuCacheHandle;
 use crate::gpu_types::BoxShadowStretchMode;
-use crate::render_task_graph::RenderTaskId;
+use crate::render_task_cache::RenderTaskCacheEntryHandle;
+use crate::util::RectHelpers;
 use crate::internal_types::LayoutPrimitiveInfo;
 
 #[derive(Debug, Clone, MallocSizeOf)]
@@ -26,7 +28,8 @@ pub struct BoxShadowClipSource {
     // The current cache key (in device-pixels), and handles
     // to the cached clip region and blurred texture.
     pub cache_key: Option<(DeviceIntSize, BoxShadowCacheKey)>,
-    pub render_task: Option<RenderTaskId>,
+    pub cache_handle: Option<RenderTaskCacheEntryHandle>,
+    pub clip_data_handle: GpuCacheHandle,
 
     // Local-space size of the required render task size.
     pub shadow_rect_alloc_size: LayoutSize,
@@ -67,7 +70,6 @@ pub struct BoxShadowCacheKey {
     pub br_top_right: DeviceIntSize,
     pub br_bottom_right: DeviceIntSize,
     pub br_bottom_left: DeviceIntSize,
-    pub device_pixel_scale: Au,
 }
 
 impl<'a> SceneBuilder<'a> {
@@ -120,7 +122,7 @@ impl<'a> SceneBuilder<'a> {
             let mut clips = Vec::with_capacity(2);
             let (final_prim_rect, clip_radius) = match clip_mode {
                 BoxShadowClipMode::Outset => {
-                    if shadow_rect.is_empty() {
+                    if !shadow_rect.is_well_formed_and_nonempty() {
                         return;
                     }
 
@@ -136,7 +138,7 @@ impl<'a> SceneBuilder<'a> {
                     (shadow_rect, shadow_radius)
                 }
                 BoxShadowClipMode::Inset => {
-                    if !shadow_rect.is_empty() {
+                    if shadow_rect.is_well_formed_and_nonempty() {
                         clips.push(ClipItemKey {
                             kind: ClipItemKeyKind::rounded_rect(
                                 shadow_rect,
@@ -206,7 +208,7 @@ impl<'a> SceneBuilder<'a> {
             let prim_info = match clip_mode {
                 BoxShadowClipMode::Outset => {
                     // Certain spread-radii make the shadow invalid.
-                    if shadow_rect.is_empty() {
+                    if !shadow_rect.is_well_formed_and_nonempty() {
                         return;
                     }
 
@@ -230,7 +232,7 @@ impl<'a> SceneBuilder<'a> {
                     // Inset shadows are still visible, even if the
                     // inset shadow rect becomes invalid (they will
                     // just look like a solid rectangle).
-                    if !shadow_rect.is_empty() {
+                    if shadow_rect.is_well_formed_and_nonempty() {
                         extra_clips.push(shadow_clip_source);
                     }
 
diff --git a/third_party/webrender/webrender/src/capture.rs b/third_party/webrender/webrender/src/capture.rs
index 0414fcb25e4..d6952b7f82d 100644
--- a/third_party/webrender/webrender/src/capture.rs
+++ b/third_party/webrender/webrender/src/capture.rs
@@ -5,7 +5,7 @@
 use std::fs::File;
 use std::path::{Path, PathBuf};
 
-use api::{ExternalImageData, ImageDescriptor};
+use api::{CaptureBits, ExternalImageData, ImageDescriptor};
 #[cfg(feature = "png")]
 use api::ImageFormat;
 use api::units::TexelRect;
@@ -13,7 +13,6 @@ use api::units::TexelRect;
 use api::units::DeviceIntSize;
 #[cfg(feature = "capture")]
 use crate::print_tree::{PrintableTree, PrintTree};
-use crate::render_api::CaptureBits;
 use ron;
 use serde;
 
@@ -42,9 +41,10 @@ impl CaptureConfig {
             frame_id: 0,
             resource_id: 0,
             #[cfg(feature = "capture")]
-            pretty: ron::ser::PrettyConfig::new()
-                .with_enumerate_arrays(true)
-                .with_indentor(" ".to_string()),
+            pretty: ron::ser::PrettyConfig {
+                enumerate_arrays: true,
+                .. ron::ser::PrettyConfig::default()
+            },
         }
     }
 
diff --git a/third_party/webrender/webrender/src/clip.rs b/third_party/webrender/webrender/src/clip.rs
index 7a839a68fc9..391170f0b08 100644
--- a/third_party/webrender/webrender/src/clip.rs
+++ b/third_party/webrender/webrender/src/clip.rs
@@ -92,40 +92,35 @@
 //! [segment.rs]: ../segment/index.html
 //!
 
-use api::{BorderRadius, ClipMode, ComplexClipRegion, ImageMask};
-use api::{BoxShadowClipMode, ClipId, FillRule, ImageKey, ImageRendering, PipelineId};
+use api::{BorderRadius, ClipIntern, ClipMode, ComplexClipRegion, ImageMask};
+use api::{BoxShadowClipMode, ClipId, ImageKey, ImageRendering, PipelineId};
 use api::units::*;
-use crate::image_tiling::{self, Repetition};
+use api::image_tiling::{self, Repetition};
 use crate::border::{ensure_no_corner_overlap, BorderRadiusAu};
 use crate::box_shadow::{BLUR_SAMPLE_SCALE, BoxShadowClipSource, BoxShadowCacheKey};
 use crate::spatial_tree::{ROOT_SPATIAL_NODE_INDEX, SpatialTree, SpatialNodeIndex, CoordinateSystemId};
 use crate::ellipse::Ellipse;
-use crate::gpu_cache::GpuCache;
+use crate::gpu_cache::{GpuCache, GpuCacheHandle, ToGpuBlocks};
 use crate::gpu_types::{BoxShadowStretchMode};
 use crate::intern::{self, ItemUid};
 use crate::internal_types::{FastHashMap, FastHashSet};
-use crate::prim_store::{VisibleMaskImageTile};
-use crate::prim_store::{PointKey, SizeKey, RectangleKey, PolygonKey};
+use crate::prim_store::{ClipData, ImageMaskData, SpaceMapper, VisibleMaskImageTile};
+use crate::prim_store::{PointKey, SizeKey, RectangleKey};
 use crate::render_task_cache::to_cache_size;
 use crate::resource_cache::{ImageRequest, ResourceCache};
-use crate::space::SpaceMapper;
-use crate::util::{clamp_to_scale_factor, MaxRect, extract_inner_rect_safe, project_rect, ScaleOffset, VecHelper};
+use crate::util::{extract_inner_rect_safe, project_rect, ScaleOffset};
 use euclid::approxeq::ApproxEq;
-use std::{iter, ops, u32, mem};
+use std::{iter, ops, u32};
+use smallvec::SmallVec;
 
 // Type definitions for interning clip nodes.
 
-#[derive(Copy, Clone, Debug, MallocSizeOf, PartialEq)]
-#[cfg_attr(any(feature = "serde"), derive(Deserialize, Serialize))]
-pub enum ClipIntern {}
-
 pub type ClipDataStore = intern::DataStore<ClipIntern>;
 pub type ClipDataHandle = intern::Handle<ClipIntern>;
 
 /// Defines a clip that is positioned by a specific spatial node
 #[cfg_attr(feature = "capture", derive(Serialize))]
-#[derive(Copy, Clone, PartialEq)]
-#[derive(MallocSizeOf)]
+#[derive(Copy, Clone)]
 pub struct ClipInstance {
     /// Handle to the interned clip
     pub handle: ClipDataHandle,
@@ -146,19 +141,6 @@ impl ClipInstance {
     }
 }
 
-/// Defines a clip instance with some extra information that is available
-/// during scene building (since interned clips cannot retrieve the underlying
-/// data from the scene building thread).
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[derive(MallocSizeOf)]
-#[derive(Copy, Clone)]
-pub struct SceneClipInstance {
-    /// The interned clip + positioning information that is used during frame building.
-    pub clip: ClipInstance,
-    /// The definition of the clip, used during scene building to optimize clip-chains.
-    pub key: ClipItemKey,
-}
-
 /// A clip template defines clips in terms of the public API. Specifically,
 /// this is a parent `ClipId` and some number of clip instances. See the
 /// CLIPPING_AND_POSITIONING.md document in doc/ for more information.
@@ -166,8 +148,8 @@ pub struct SceneClipInstance {
 pub struct ClipTemplate {
     /// Parent of this clip, in terms of the public clip API
     pub parent: ClipId,
-    /// Range of instances that define this clip template
-    pub clips: ops::Range<u32>,
+    /// List of instances that define this clip template
+    pub instances: SmallVec<[ClipInstance; 2]>,
 }
 
 /// A helper used during scene building to construct (internal) clip chains from
@@ -198,7 +180,6 @@ impl ClipChainBuilder {
         clip_id: Option<ClipId>,
         clip_chain_nodes: &mut Vec<ClipChainNode>,
         templates: &FastHashMap<ClipId, ClipTemplate>,
-        instances: &[SceneClipInstance],
     ) -> Self {
         let mut parent_clips = FastHashSet::default();
 
@@ -219,13 +200,10 @@ impl ClipChainBuilder {
                     &mut parent_clips,
                     clip_chain_nodes,
                     templates,
-                    instances,
                 )
             }
             None => {
-                // Even if the clip id is None, it's possible that there were parent clips in the builder
-                // that need to be applied and set as the root of this clip-chain builder.
-                parent_clip_chain_id
+                ClipChainId::NONE
             }
         };
 
@@ -246,14 +224,12 @@ impl ClipChainBuilder {
         existing_clips: &mut FastHashSet<(ItemUid, SpatialNodeIndex)>,
         clip_chain_nodes: &mut Vec<ClipChainNode>,
         templates: &FastHashMap<ClipId, ClipTemplate>,
-        clip_instances: &[SceneClipInstance],
     ) -> ClipChainId {
         let template = &templates[&clip_id];
-        let instances = &clip_instances[template.clips.start as usize .. template.clips.end as usize];
         let mut clip_chain_id = parent_clip_chain_id;
 
-        for clip in instances {
-            let key = (clip.clip.handle.uid(), clip.clip.spatial_node_index);
+        for clip in &template.instances {
+            let key = (clip.handle.uid(), clip.spatial_node_index);
 
             // If this clip chain already has this clip instance, skip it
             if existing_clips.contains(&key) {
@@ -264,8 +240,8 @@ impl ClipChainBuilder {
             let new_clip_chain_id = ClipChainId(clip_chain_nodes.len() as u32);
             existing_clips.insert(key);
             clip_chain_nodes.push(ClipChainNode {
-                handle: clip.clip.handle,
-                spatial_node_index: clip.clip.spatial_node_index,
+                handle: clip.handle,
+                spatial_node_index: clip.spatial_node_index,
                 parent_clip_chain_id: clip_chain_id,
             });
             clip_chain_id = new_clip_chain_id;
@@ -282,40 +258,6 @@ impl ClipChainBuilder {
             existing_clips,
             clip_chain_nodes,
             templates,
-            clip_instances,
-        )
-    }
-
-    /// Return true if any of the clips in the hierarchy from clip_id to the
-    /// root clip are complex.
-    // TODO(gw): This method should only be required until the shared_clip
-    //           optimization patches are complete, and can then be removed.
-    fn has_complex_clips(
-        &self,
-        clip_id: ClipId,
-        templates: &FastHashMap<ClipId, ClipTemplate>,
-        instances: &[SceneClipInstance],
-    ) -> bool {
-        let template = &templates[&clip_id];
-
-        // Check if any of the clips in this template are complex
-        let clips = &instances[template.clips.start as usize .. template.clips.end as usize];
-        for clip in clips {
-            if let ClipNodeKind::Complex = clip.key.kind.node_kind() {
-                return true;
-            }
-        }
-
-        // The ClipId parenting is terminated when we reach the root ClipId
-        if clip_id == template.parent {
-            return false;
-        }
-
-        // Recurse into parent clip template to also check those
-        self.has_complex_clips(
-            template.parent,
-            templates,
-            instances,
         )
     }
 
@@ -327,7 +269,6 @@ impl ClipChainBuilder {
         clip_id: ClipId,
         clip_chain_nodes: &mut Vec<ClipChainNode>,
         templates: &FastHashMap<ClipId, ClipTemplate>,
-        instances: &[SceneClipInstance],
     ) -> ClipChainId {
         if self.prev_clip_id == clip_id {
             return self.prev_clip_chain_id;
@@ -348,7 +289,6 @@ impl ClipChainBuilder {
             &mut self.existing_clips_cache,
             clip_chain_nodes,
             templates,
-            instances,
         );
 
         self.prev_clip_id = clip_id;
@@ -392,6 +332,7 @@ enum ClipResult {
 #[derive(MallocSizeOf)]
 pub struct ClipNode {
     pub item: ClipItem,
+    pub gpu_cache_handle: GpuCacheHandle,
 }
 
 // Convert from an interning key for a clip item
@@ -409,12 +350,11 @@ impl From<ClipItemKey> for ClipNode {
                     mode,
                 }
             }
-            ClipItemKeyKind::ImageMask(rect, image, repeat, polygon_handle) => {
+            ClipItemKeyKind::ImageMask(rect, image, repeat) => {
                 ClipItemKind::Image {
                     image,
                     rect: rect.into(),
                     repeat,
-                    polygon_handle,
                 }
             }
             ClipItemKeyKind::BoxShadow(shadow_rect_fract_offset, shadow_rect_size, shadow_radius, prim_shadow_rect, blur_radius, clip_mode) => {
@@ -433,6 +373,7 @@ impl From<ClipItemKey> for ClipNode {
             item: ClipItem {
                 kind,
             },
+            gpu_cache_handle: GpuCacheHandle::new(),
         }
     }
 }
@@ -475,16 +416,6 @@ pub struct ClipChainNode {
     pub parent_clip_chain_id: ClipChainId,
 }
 
-#[derive(Debug)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-pub struct ClipSet {
-    /// Local space clip rect
-    pub local_clip_rect: LayoutRect,
-
-    /// ID of the clip chain that this set is clipped by.
-    pub clip_chain_id: ClipChainId,
-}
-
 // When a clip node is found to be valid for a
 // clip chain instance, it's stored in an index
 // buffer style structure. This struct contains
@@ -498,13 +429,7 @@ pub struct ClipNodeInstance {
     pub handle: ClipDataHandle,
     pub spatial_node_index: SpatialNodeIndex,
     pub flags: ClipNodeFlags,
-    pub visible_tiles: Option<ops::Range<usize>>,
-}
-
-impl ClipNodeInstance {
-    pub fn has_visible_tiles(&self) -> bool {
-        self.visible_tiles.is_some()
-    }
+    pub visible_tiles: Option<Vec<VisibleMaskImageTile>>,
 }
 
 // A range of clip node instances that were found by
@@ -606,7 +531,6 @@ impl ClipNodeInfo {
         clipped_rect: &LayoutRect,
         gpu_cache: &mut GpuCache,
         resource_cache: &mut ResourceCache,
-        mask_tiles: &mut Vec<VisibleMaskImageTile>,
         spatial_tree: &SpatialTree,
         request_resources: bool,
     ) -> Option<ClipNodeInstance> {
@@ -628,7 +552,7 @@ impl ClipNodeInfo {
 
         let mut visible_tiles = None;
 
-        if let ClipItemKind::Image { rect, image, repeat, .. } = node.item.kind {
+        if let ClipItemKind::Image { rect, image, repeat } = node.item.kind {
             let request = ImageRequest {
                 key: image,
                 rendering: ImageRendering::Auto,
@@ -637,16 +561,12 @@ impl ClipNodeInfo {
 
             if let Some(props) = resource_cache.get_image_properties(image) {
                 if let Some(tile_size) = props.tiling {
-                    let tile_range_start = mask_tiles.len();
+                    let mut mask_tiles = Vec::new();
 
                     let visible_rect = if repeat {
                         *clipped_rect
                     } else {
-                        // Bug 1648323 - It is unclear why on rare occasions we get
-                        // a clipped_rect that does not intersect the clip's mask rect.
-                        // defaulting to clipped_rect here results in zero repetitions
-                        // which clips the primitive entirely.
-                        clipped_rect.intersection(&rect).unwrap_or(*clipped_rect)
+                        clipped_rect.intersection(&rect).unwrap()
                     };
 
                     let repetitions = image_tiling::repetitions(
@@ -679,7 +599,7 @@ impl ClipNodeInfo {
                             });
                         }
                     }
-                    visible_tiles = Some(tile_range_start..mask_tiles.len());
+                    visible_tiles = Some(mask_tiles);
                 } else if request_resources {
                     resource_cache.request_image(request, gpu_cache);
                 }
@@ -703,26 +623,45 @@ impl ClipNodeInfo {
 impl ClipNode {
     pub fn update(
         &mut self,
+        gpu_cache: &mut GpuCache,
         device_pixel_scale: DevicePixelScale,
     ) {
         match self.item.kind {
-            ClipItemKind::Image { .. } |
-            ClipItemKind::Rectangle { .. } |
-            ClipItemKind::RoundedRectangle { .. } => {}
-
+            ClipItemKind::Image { rect, .. } => {
+                if let Some(request) = gpu_cache.request(&mut self.gpu_cache_handle) {
+                    let data = ImageMaskData {
+                        local_mask_size: rect.size,
+                    };
+                    data.write_gpu_blocks(request);
+                }
+            }
             ClipItemKind::BoxShadow { ref mut source } => {
+                if let Some(mut request) = gpu_cache.request(&mut self.gpu_cache_handle) {
+                    request.push([
+                        source.original_alloc_size.width,
+                        source.original_alloc_size.height,
+                        source.clip_mode as i32 as f32,
+                        0.0,
+                    ]);
+                    request.push([
+                        source.stretch_mode_x as i32 as f32,
+                        source.stretch_mode_y as i32 as f32,
+                        0.0,
+                        0.0,
+                    ]);
+                    request.push(source.prim_shadow_rect);
+                }
+
                 // Quote from https://drafts.csswg.org/css-backgrounds-3/#shadow-blur
                 // "the image that would be generated by applying to the shadow a
                 // Gaussian blur with a standard deviation equal to half the blur radius."
                 let blur_radius_dp = source.blur_radius * 0.5;
 
                 // Create scaling from requested size to cache size.
-                let mut content_scale = LayoutToWorldScale::new(1.0) * device_pixel_scale;
-                content_scale.0 = clamp_to_scale_factor(content_scale.0, false);
+                let content_scale = LayoutToWorldScale::new(1.0) * device_pixel_scale;
 
                 // Create the cache key for this box-shadow render task.
-                let cache_size = to_cache_size(source.shadow_rect_alloc_size, &mut content_scale);
-
+                let cache_size = to_cache_size(source.shadow_rect_alloc_size * content_scale);
                 let bs_cache_key = BoxShadowCacheKey {
                     blur_radius_dp: (blur_radius_dp * content_scale.0).round() as i32,
                     clip_mode: source.clip_mode,
@@ -731,54 +670,52 @@ impl ClipNode {
                     br_top_right: (source.shadow_radius.top_right * content_scale).round().to_i32(),
                     br_bottom_right: (source.shadow_radius.bottom_right * content_scale).round().to_i32(),
                     br_bottom_left: (source.shadow_radius.bottom_left * content_scale).round().to_i32(),
-                    device_pixel_scale: Au::from_f32_px(content_scale.0),
                 };
 
                 source.cache_key = Some((cache_size, bs_cache_key));
-            }
-        }
-    }
-}
 
-pub struct ClipStoreStats {
-    templates_capacity: usize,
-    instances_capacity: usize,
-}
+                if let Some(mut request) = gpu_cache.request(&mut source.clip_data_handle) {
+                    let data = ClipData::rounded_rect(
+                        source.minimal_shadow_rect.size,
+                        &source.shadow_radius,
+                        ClipMode::Clip,
+                    );
 
-impl ClipStoreStats {
-    pub fn empty() -> Self {
-        ClipStoreStats {
-            templates_capacity: 0,
-            instances_capacity: 0,
+                    data.write(&mut request);
+                }
+            }
+            ClipItemKind::Rectangle { rect, mode } => {
+                if let Some(mut request) = gpu_cache.request(&mut self.gpu_cache_handle) {
+                    let data = ClipData::uniform(rect.size, 0.0, mode);
+                    data.write(&mut request);
+                }
+            }
+            ClipItemKind::RoundedRectangle { rect, ref radius, mode } => {
+                if let Some(mut request) = gpu_cache.request(&mut self.gpu_cache_handle) {
+                    let data = ClipData::rounded_rect(rect.size, radius, mode);
+                    data.write(&mut request);
+                }
+            }
         }
     }
 }
 
-#[derive(Default)]
-pub struct ClipStoreScratchBuffer {
-    clip_node_instances: Vec<ClipNodeInstance>,
-    mask_tiles: Vec<VisibleMaskImageTile>,
-}
-
 /// The main clipping public interface that other modules access.
 #[derive(MallocSizeOf)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 pub struct ClipStore {
     pub clip_chain_nodes: Vec<ClipChainNode>,
     pub clip_node_instances: Vec<ClipNodeInstance>,
-    mask_tiles: Vec<VisibleMaskImageTile>,
 
     active_clip_node_info: Vec<ClipNodeInfo>,
     active_local_clip_rect: Option<LayoutRect>,
-    active_pic_clip_rect: PictureRect,
 
     // No malloc sizeof since it's not implemented for ops::Range, but these
     // allocations are tiny anyway.
 
     /// Map of all clip templates defined by the public API to templates
     #[ignore_malloc_size_of = "range missing"]
-    pub templates: FastHashMap<ClipId, ClipTemplate>,
-    pub instances: Vec<SceneClipInstance>,
+    templates: FastHashMap<ClipId, ClipTemplate>,
 
     /// A stack of current clip-chain builders. A new clip-chain builder is
     /// typically created each time a clip root (such as an iframe or stacking
@@ -896,25 +833,6 @@ impl ClipChainStack {
         }
     }
 
-    pub fn clear(&mut self) {
-        self.clips.clear();
-        self.clip_counts.clear();
-        self.levels.clear();
-        self.levels.push(ClipChainLevel {
-            shared_clips: Vec::new(),
-            first_clip_index: 0,
-            initial_clip_counts_len: 0,
-        });
-    }
-
-    pub fn take(&mut self) -> Self {
-        ClipChainStack {
-            levels: self.levels.take(),
-            clips: self.clips.take(),
-            clip_counts: self.clip_counts.take(),
-        }
-    }
-
     /// Push a clip chain root onto the currently active list.
     pub fn push_clip(
         &mut self,
@@ -969,7 +887,7 @@ impl ClipChainStack {
         maybe_shared_clips: &[ClipInstance],
         spatial_tree: &SpatialTree,
     ) {
-        let mut shared_clips = Vec::with_capacity(maybe_shared_clips.len());
+        let mut shared_clips = Vec::new();
 
         // If there are clips in the shared list for a picture cache, only include
         // them if they are simple, axis-aligned clips (i.e. in the root coordinate
@@ -988,7 +906,7 @@ impl ClipChainStack {
         }
 
         let level = ClipChainLevel {
-            shared_clips,
+            shared_clips: shared_clips.to_vec(),
             first_clip_index: self.clips.len(),
             initial_clip_counts_len: self.clip_counts.len(),
         };
@@ -1011,50 +929,27 @@ impl ClipChainStack {
 }
 
 impl ClipStore {
-    pub fn new(stats: &ClipStoreStats) -> Self {
-        let mut templates = FastHashMap::default();
-        templates.reserve(stats.templates_capacity);
-
+    pub fn new() -> Self {
         ClipStore {
             clip_chain_nodes: Vec::new(),
             clip_node_instances: Vec::new(),
-            mask_tiles: Vec::new(),
             active_clip_node_info: Vec::new(),
             active_local_clip_rect: None,
-            active_pic_clip_rect: PictureRect::max_rect(),
-            templates,
-            instances: Vec::with_capacity(stats.instances_capacity),
+            templates: FastHashMap::default(),
             chain_builder_stack: Vec::new(),
         }
     }
 
-    pub fn get_stats(&self) -> ClipStoreStats {
-        // Selecting the smaller of the current capacity and 2*len ensures we don't
-        // retain a huge hashmap alloc after navigating away from a page with a large
-        // number of clip templates.
-        let templates_capacity = self.templates.capacity().min(self.templates.len() * 2);
-        let instances_capacity = self.instances.capacity().min(self.instances.len() * 2);
-
-        ClipStoreStats {
-            templates_capacity,
-            instances_capacity,
-        }
-    }
-
     /// Register a new clip template for the clip_id defined in the display list.
     pub fn register_clip_template(
         &mut self,
         clip_id: ClipId,
         parent: ClipId,
-        clips: &[SceneClipInstance],
+        instances: &[ClipInstance],
     ) {
-        let start = self.instances.len() as u32;
-        self.instances.extend_from_slice(clips);
-        let end = self.instances.len() as u32;
-
         self.templates.insert(clip_id, ClipTemplate {
             parent,
-            clips: start..end,
+            instances: instances.into(),
         });
     }
 
@@ -1081,25 +976,6 @@ impl ClipStore {
                 clip_id,
                 &mut self.clip_chain_nodes,
                 &self.templates,
-                &self.instances,
-            )
-    }
-
-    /// Return true if any of the clips in the hierarchy from clip_id to the
-    /// root clip are complex.
-    // TODO(gw): This method should only be required until the shared_clip
-    //           optimization patches are complete, and can then be removed.
-    pub fn has_complex_clips(
-        &self,
-        clip_id: ClipId,
-    ) -> bool {
-        self.chain_builder_stack
-            .last()
-            .unwrap()
-            .has_complex_clips(
-                clip_id,
-                &self.templates,
-                &self.instances,
             )
     }
 
@@ -1123,7 +999,6 @@ impl ClipStore {
             clip_id,
             &mut self.clip_chain_nodes,
             &self.templates,
-            &self.instances,
         );
 
         self.chain_builder_stack.push(builder);
@@ -1167,15 +1042,13 @@ impl ClipStore {
     pub fn set_active_clips(
         &mut self,
         local_prim_clip_rect: LayoutRect,
-        prim_spatial_node_index: SpatialNodeIndex,
-        pic_spatial_node_index: SpatialNodeIndex,
+        spatial_node_index: SpatialNodeIndex,
         clip_chains: &[ClipChainId],
         spatial_tree: &SpatialTree,
         clip_data_store: &ClipDataStore,
     ) {
         self.active_clip_node_info.clear();
         self.active_local_clip_rect = None;
-        self.active_pic_clip_rect = PictureRect::max_rect();
 
         let mut local_clip_rect = local_prim_clip_rect;
 
@@ -1184,11 +1057,9 @@ impl ClipStore {
 
             if !add_clip_node_to_current_chain(
                 clip_chain_node,
-                prim_spatial_node_index,
-                pic_spatial_node_index,
+                spatial_node_index,
                 &mut local_clip_rect,
                 &mut self.active_clip_node_info,
-                &mut self.active_pic_clip_rect,
                 clip_data_store,
                 spatial_tree,
             ) {
@@ -1212,7 +1083,6 @@ impl ClipStore {
 
         self.active_clip_node_info.clear();
         self.active_local_clip_rect = Some(prim_clip_chain.local_clip_rect);
-        self.active_pic_clip_rect = prim_clip_chain.pic_clip_rect;
 
         let clip_instances = &self
             .clip_node_instances[prim_clip_chain.clips_range.to_range()];
@@ -1256,7 +1126,7 @@ impl ClipStore {
         }
 
         let local_bounding_rect = local_prim_rect.intersection(&local_clip_rect)?;
-        let mut pic_clip_rect = prim_to_pic_mapper.map(&local_bounding_rect)?;
+        let pic_clip_rect = prim_to_pic_mapper.map(&local_bounding_rect)?;
         let world_clip_rect = pic_to_world_mapper.map(&pic_clip_rect)?;
 
         // Now, we've collected all the clip nodes that *potentially* affect this
@@ -1308,7 +1178,10 @@ impl ClipStore {
                     // Needs a mask -> add to clip node indices
 
                     // TODO(gw): Ensure this only runs once on each node per frame?
-                    node.update(device_pixel_scale);
+                    node.update(
+                        gpu_cache,
+                        device_pixel_scale,
+                    );
 
                     // Create the clip node instance for this clip node
                     if let Some(instance) = node_info.create_instance(
@@ -1316,7 +1189,6 @@ impl ClipStore {
                         &local_bounding_rect,
                         gpu_cache,
                         resource_cache,
-                        &mut self.mask_tiles,
                         spatial_tree,
                         request_resources,
                     ) {
@@ -1352,16 +1224,6 @@ impl ClipStore {
             count: self.clip_node_instances.len() as u32 - first_clip_node_index,
         };
 
-        // If this clip chain needs a mask, reduce the size of the mask allocation
-        // by any clips that were in the same space as the picture. This can result
-        // in much smaller clip mask allocations in some cases. Note that the ordering
-        // here is important - the reduction must occur *after* the clip item accept
-        // reject checks above, so that we don't eliminate masks accidentally (since
-        // we currently only support a local clip rect in the vertex shader).
-        if needs_mask {
-            pic_clip_rect = pic_clip_rect.intersection(&self.active_pic_clip_rect)?;
-        }
-
         // Return a valid clip chain instance
         Some(ClipChainInstance {
             clips_range,
@@ -1373,24 +1235,8 @@ impl ClipStore {
         })
     }
 
-    pub fn begin_frame(&mut self, scratch: &mut ClipStoreScratchBuffer) {
-        mem::swap(&mut self.clip_node_instances, &mut scratch.clip_node_instances);
-        mem::swap(&mut self.mask_tiles, &mut scratch.mask_tiles);
+    pub fn clear_old_instances(&mut self) {
         self.clip_node_instances.clear();
-        self.mask_tiles.clear();
-    }
-
-    pub fn end_frame(&mut self, scratch: &mut ClipStoreScratchBuffer) {
-        mem::swap(&mut self.clip_node_instances, &mut scratch.clip_node_instances);
-        mem::swap(&mut self.mask_tiles, &mut scratch.mask_tiles);
-    }
-
-    pub fn visible_mask_tiles(&self, instance: &ClipNodeInstance) -> &[VisibleMaskImageTile] {
-        if let Some(range) = &instance.visible_tiles {
-            &self.mask_tiles[range.clone()]
-        } else {
-            &[]
-        }
     }
 }
 
@@ -1436,6 +1282,18 @@ impl<J> ClipRegion<ComplexTranslateIter<J>> {
     }
 }
 
+impl ClipRegion<Option<ComplexClipRegion>> {
+    pub fn create_for_clip_node_with_local_clip(
+        local_clip: &LayoutRect,
+        reference_frame_relative_offset: &LayoutVector2D
+    ) -> Self {
+        ClipRegion {
+            main: local_clip.translate(*reference_frame_relative_offset),
+            complex_clips: None,
+        }
+    }
+}
+
 // The ClipItemKey is a hashable representation of the contents
 // of a clip item. It is used during interning to de-duplicate
 // clip nodes between frames and display lists. This allows quick
@@ -1443,13 +1301,13 @@ impl<J> ClipRegion<ComplexTranslateIter<J>> {
 // the uploaded GPU cache handle to be retained between display lists.
 // TODO(gw): Maybe we should consider constructing these directly
 //           in the DL builder?
-#[derive(Copy, Debug, Clone, Eq, MallocSizeOf, PartialEq, Hash)]
+#[derive(Debug, Clone, Eq, MallocSizeOf, PartialEq, Hash)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub enum ClipItemKeyKind {
     Rectangle(RectangleKey, ClipMode),
     RoundedRectangle(RectangleKey, BorderRadiusAu, ClipMode),
-    ImageMask(RectangleKey, ImageKey, bool, Option<PolygonDataHandle>),
+    ImageMask(RectangleKey, ImageKey, bool),
     BoxShadow(PointKey, SizeKey, BorderRadiusAu, RectangleKey, Au, BoxShadowClipMode),
 }
 
@@ -1471,13 +1329,11 @@ impl ClipItemKeyKind {
         }
     }
 
-    pub fn image_mask(image_mask: &ImageMask, mask_rect: LayoutRect,
-                      polygon_handle: Option<PolygonDataHandle>) -> Self {
+    pub fn image_mask(image_mask: &ImageMask, mask_rect: LayoutRect) -> Self {
         ClipItemKeyKind::ImageMask(
             mask_rect.into(),
             image_mask.image,
             image_mask.repeat,
-            polygon_handle,
         )
     }
 
@@ -1517,7 +1373,7 @@ impl ClipItemKeyKind {
     }
 }
 
-#[derive(Debug, Copy, Clone, Eq, MallocSizeOf, PartialEq, Hash)]
+#[derive(Debug, Clone, Eq, MallocSizeOf, PartialEq, Hash)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct ClipItemKey {
@@ -1539,7 +1395,6 @@ impl intern::Internable for ClipIntern {
     type Key = ClipItemKey;
     type StoreData = ClipNode;
     type InternData = ClipInternData;
-    const PROFILE_COUNTER: usize = crate::profiler::INTERNED_CLIPS;
 }
 
 #[derive(Debug, MallocSizeOf)]
@@ -1559,7 +1414,6 @@ pub enum ClipItemKind {
         image: ImageKey,
         rect: LayoutRect,
         repeat: bool,
-        polygon_handle: Option<PolygonDataHandle>,
     },
     BoxShadow {
         source: BoxShadowClipSource,
@@ -1660,8 +1514,9 @@ fn compute_box_shadow_parameters(
         clip_mode,
         stretch_mode_x,
         stretch_mode_y,
-        render_task: None,
+        cache_handle: None,
         cache_key: None,
+        clip_data_handle: GpuCacheHandle::new(),
         minimal_shadow_rect,
     }
 }
@@ -1866,8 +1721,13 @@ impl ClipItemKind {
             ClipItemKind::RoundedRectangle { rect, ref radius, mode: ClipMode::Clip } => {
                 // TODO(gw): Consider caching this in the ClipNode
                 //           if it ever shows in profiles.
-                if rounded_rectangle_contains_rect_quick(&rect, radius, &prim_rect) {
-                    return ClipResult::Accept;
+                // TODO(gw): extract_inner_rect_safe is overly
+                //           conservative for this code!
+                let inner_clip_rect = extract_inner_rect_safe(&rect, radius);
+                if let Some(inner_clip_rect) = inner_clip_rect {
+                    if inner_clip_rect.contains_rect(prim_rect) {
+                        return ClipResult::Accept;
+                    }
                 }
 
                 match rect.intersection(prim_rect) {
@@ -1882,8 +1742,13 @@ impl ClipItemKind {
             ClipItemKind::RoundedRectangle { rect, ref radius, mode: ClipMode::ClipOut } => {
                 // TODO(gw): Consider caching this in the ClipNode
                 //           if it ever shows in profiles.
-                if rounded_rectangle_contains_rect_quick(&rect, radius, &prim_rect) {
-                    return ClipResult::Reject;
+                // TODO(gw): extract_inner_rect_safe is overly
+                //           conservative for this code!
+                let inner_clip_rect = extract_inner_rect_safe(&rect, radius);
+                if let Some(inner_clip_rect) = inner_clip_rect {
+                    if inner_clip_rect.contains_rect(prim_rect) {
+                        return ClipResult::Reject;
+                    }
                 }
 
                 match rect.intersection(prim_rect) {
@@ -1971,103 +1836,6 @@ pub fn rounded_rectangle_contains_point(
     true
 }
 
-/// Return true if the rounded rectangle described by `container` and `radii`
-/// definitely contains `containee`. May return false negatives, but never false
-/// positives.
-fn rounded_rectangle_contains_rect_quick(
-    container: &LayoutRect,
-    radii: &BorderRadius,
-    containee: &LayoutRect,
-) -> bool {
-    if !container.contains_rect(containee) {
-        return false;
-    }
-
-    /// Return true if `point` falls within `corner`. This only covers the
-    /// upper-left case; we transform the other corners into that form.
-    fn foul(point: LayoutPoint, corner: LayoutPoint) -> bool {
-        point.x < corner.x && point.y < corner.y
-    }
-
-    /// Flip `pt` about the y axis (i.e. negate `x`).
-    fn flip_x(pt: LayoutPoint) -> LayoutPoint {
-        LayoutPoint { x: -pt.x, .. pt }
-    }
-
-    /// Flip `pt` about the x axis (i.e. negate `y`).
-    fn flip_y(pt: LayoutPoint) -> LayoutPoint {
-        LayoutPoint { y: -pt.y, .. pt }
-    }
-
-    if foul(containee.top_left(), container.top_left() + radii.top_left) ||
-        foul(flip_x(containee.top_right()), flip_x(container.top_right()) + radii.top_right) ||
-        foul(flip_y(containee.bottom_left()), flip_y(container.bottom_left()) + radii.bottom_left) ||
-        foul(-containee.bottom_right(), -container.bottom_right() + radii.bottom_right)
-    {
-        return false;
-    }
-
-    true
-}
-
-/// Test where point p is relative to the infinite line that passes through the segment
-/// defined by p0 and p1. Point p is on the "left" of the line if the triangle (p0, p1, p)
-/// forms a counter-clockwise triangle.
-/// > 0 is left of the line
-/// < 0 is right of the line
-/// == 0 is on the line
-pub fn is_left_of_line(
-    p_x: f32,
-    p_y: f32,
-    p0_x: f32,
-    p0_y: f32,
-    p1_x: f32,
-    p1_y: f32,
-) -> f32 {
-    (p1_x - p0_x) * (p_y - p0_y) - (p_x - p0_x) * (p1_y - p0_y)
-}
-
-pub fn polygon_contains_point(
-    point: &LayoutPoint,
-    rect: &LayoutRect,
-    polygon: &PolygonKey,
-) -> bool {
-    if !rect.contains(*point) {
-        return false;
-    }
-
-    // p is a LayoutPoint that we'll be comparing to dimensionless PointKeys,
-    // which were created from LayoutPoints, so it all works out.
-    let p = LayoutPoint::new(point.x - rect.origin.x, point.y - rect.origin.y);
-
-    // Calculate a winding number for this point.
-    let mut winding_number: i32 = 0;
-
-    let count = polygon.point_count as usize;
-
-    for i in 0..count {
-        let p0 = polygon.points[i];
-        let p1 = polygon.points[(i + 1) % count];
-
-        if p0.y <= p.y {
-            if p1.y > p.y {
-                if is_left_of_line(p.x, p.y, p0.x, p0.y, p1.x, p1.y) > 0.0 {
-                    winding_number = winding_number + 1;
-                }
-            }
-        } else if p1.y <= p.y {
-            if is_left_of_line(p.x, p.y, p0.x, p0.y, p1.x, p1.y) < 0.0 {
-                winding_number = winding_number - 1;
-            }
-        }
-    }
-
-    match polygon.fill_rule {
-        FillRule::Nonzero => winding_number != 0,
-        FillRule::Evenodd => winding_number.abs() % 2 == 1,
-    }
-}
-
 pub fn projected_rect_contains(
     source_rect: &LayoutRect,
     transform: &LayoutToWorldTransform,
@@ -2109,11 +1877,9 @@ pub fn projected_rect_contains(
 // results in the entire primitive being culled out.
 fn add_clip_node_to_current_chain(
     node: &ClipChainNode,
-    prim_spatial_node_index: SpatialNodeIndex,
-    pic_spatial_node_index: SpatialNodeIndex,
+    spatial_node_index: SpatialNodeIndex,
     local_clip_rect: &mut LayoutRect,
     clip_node_info: &mut Vec<ClipNodeInfo>,
-    current_pic_clip_rect: &mut PictureRect,
     clip_data_store: &ClipDataStore,
     spatial_tree: &SpatialTree,
 ) -> bool {
@@ -2122,7 +1888,7 @@ fn add_clip_node_to_current_chain(
     // Determine the most efficient way to convert between coordinate
     // systems of the primitive and clip node.
     let conversion = ClipSpaceConversion::new(
-        prim_spatial_node_index,
+        spatial_node_index,
         node.spatial_node_index,
         spatial_tree,
     );
@@ -2145,37 +1911,15 @@ fn add_clip_node_to_current_chain(
                 };
             }
             ClipSpaceConversion::Transform(..) => {
-                // Map the local clip rect directly into the same space as the picture
-                // surface. This will often be the same space as the clip itself, which
-                // results in a reduction in allocated clip mask size.
-
-                // For simplicity, only apply this optimization if the clip is in the
-                // same coord system as the picture. There are some 'advanced' perspective
-                // clip tests in wrench that break without this check. Those cases are
-                // never used in Gecko, and we aim to remove support in WR for that
-                // in future to simplify the clipping pipeline.
-                let pic_coord_system = spatial_tree
-                    .spatial_nodes[pic_spatial_node_index.0 as usize]
-                    .coordinate_system_id;
-
-                let clip_coord_system = spatial_tree
-                    .spatial_nodes[node.spatial_node_index.0 as usize]
-                    .coordinate_system_id;
-
-                if pic_coord_system == clip_coord_system {
-                    let mapper = SpaceMapper::new_with_target(
-                        pic_spatial_node_index,
-                        node.spatial_node_index,
-                        PictureRect::max_rect(),
-                        spatial_tree,
-                    );
-
-                    if let Some(pic_clip_rect) = mapper.map(&clip_rect) {
-                        *current_pic_clip_rect = pic_clip_rect
-                            .intersection(current_pic_clip_rect)
-                            .unwrap_or(PictureRect::zero());
-                    }
-                }
+                // TODO(gw): In the future, we can reduce the size
+                //           of the pic_clip_rect here. To do this,
+                //           we can use project_rect or the
+                //           inverse_rect_footprint method, depending
+                //           on the relationship of the clip, pic
+                //           and primitive spatial nodes.
+                //           I have left this for now until we
+                //           find some good test cases where this
+                //           would be a worthwhile perf win.
             }
         }
     }
@@ -2207,24 +1951,3 @@ mod tests {
         );
     }
 }
-
-/// PolygonKeys get interned, because it's a convenient way to move the data
-/// for the polygons out of the ClipItemKind and ClipItemKeyKind enums. The
-/// polygon data is both interned and retrieved by the scene builder, and not
-/// accessed at all by the frame builder. Another oddity is that the
-/// PolygonKey contains the totality of the information about the polygon, so
-/// the InternData and StoreData types are both PolygonKey.
-#[derive(Copy, Clone, Debug, Hash, MallocSizeOf, PartialEq, Eq)]
-#[cfg_attr(any(feature = "serde"), derive(Deserialize, Serialize))]
-pub enum PolygonIntern {}
-
-pub type PolygonDataHandle = intern::Handle<PolygonIntern>;
-
-impl intern::InternDebug for PolygonKey {}
-
-impl intern::Internable for PolygonIntern {
-    type Key = PolygonKey;
-    type StoreData = PolygonKey;
-    type InternData = PolygonKey;
-    const PROFILE_COUNTER: usize = crate::profiler::INTERNED_POLYGONS;
-}
diff --git a/third_party/webrender/webrender/src/composite.rs b/third_party/webrender/webrender/src/composite.rs
index d07119268be..bdc11097689 100644
--- a/third_party/webrender/webrender/src/composite.rs
+++ b/third_party/webrender/webrender/src/composite.rs
@@ -2,20 +2,18 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{ColorF, YuvColorSpace, YuvFormat, ImageRendering, ExternalImageId, ImageBufferKind};
-use api::units::*;
-use api::ColorDepth;
-use crate::image_source::resolve_image;
-use euclid::Transform3D;
+use api::{ColorF, YuvColorSpace, YuvFormat, ImageRendering};
+use api::units::{DeviceRect, DeviceIntSize, DeviceIntRect, DeviceIntPoint, WorldRect};
+use api::units::{DevicePixelScale, DevicePoint, PictureRect, TexelRect};
+use crate::batch::{resolve_image, get_buffer_kind};
 use crate::gpu_cache::GpuCache;
 use crate::gpu_types::{ZBufferId, ZBufferIdGenerator};
 use crate::internal_types::TextureSource;
 use crate::picture::{ImageDependency, ResolvedSurfaceTexture, TileCacheInstance, TileId, TileSurface};
 use crate::prim_store::DeferredResolve;
+use crate::renderer::ImageBufferKind;
 use crate::resource_cache::{ImageRequest, ResourceCache};
-use crate::util::Preallocator;
-use crate::tile_cache::PictureCacheDebugInfo;
-use std::{ops, u64, os::raw::c_void};
+use std::{ops, u64};
 
 /*
  Types and definitions related to compositing picture cache tiles
@@ -33,10 +31,6 @@ pub enum NativeSurfaceOperationDetails {
         tile_size: DeviceIntSize,
         is_opaque: bool,
     },
-    CreateExternalSurface {
-        id: NativeSurfaceId,
-        is_opaque: bool,
-    },
     DestroySurface {
         id: NativeSurfaceId,
     },
@@ -45,10 +39,6 @@ pub enum NativeSurfaceOperationDetails {
     },
     DestroyTile {
         id: NativeTileId,
-    },
-    AttachExternalImage {
-        id: NativeSurfaceId,
-        external_image: ExternalImageId,
     }
 }
 
@@ -85,27 +75,6 @@ pub enum CompositeSurfaceFormat {
     Yuv,
 }
 
-bitflags! {
-    /// Optional features that can be opted-out of when compositing,
-    /// possibly allowing a fast path to be selected.
-    pub struct CompositeFeatures: u8 {
-        // UV coordinates do not require clamping, for example because the
-        // entire texture is being composited.
-        const NO_UV_CLAMP = 1 << 0;
-        // The texture sample should not be modulated by a specified color.
-        const NO_COLOR_MODULATION = 1 << 1;
-    }
-}
-
-#[derive(Copy, Clone, Debug, PartialEq)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub enum TileKind {
-    Opaque,
-    Alpha,
-    Clear,
-}
-
 /// Describes the geometry and surface of a tile to be composited
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
@@ -115,29 +84,7 @@ pub struct CompositeTile {
     pub clip_rect: DeviceRect,
     pub dirty_rect: DeviceRect,
     pub valid_rect: DeviceRect,
-    pub transform: Option<CompositorSurfaceTransform>,
     pub z_id: ZBufferId,
-    pub kind: TileKind,
-}
-
-fn tile_kind(surface: &CompositeTileSurface, is_opaque: bool) -> TileKind {
-    match surface {
-        // Color tiles are, by definition, opaque. We might support non-opaque color
-        // tiles if we ever find pages that have a lot of these.
-        CompositeTileSurface::Color { .. } => TileKind::Opaque,
-        // Clear tiles have a special bucket
-        CompositeTileSurface::Clear => TileKind::Clear,
-        CompositeTileSurface::Texture { .. }
-        | CompositeTileSurface::ExternalSurface { .. } => {
-            // Texture surfaces get bucketed by opaque/alpha, for z-rejection
-            // on the Draw compositor mode.
-            if is_opaque {
-                TileKind::Opaque
-            } else {
-                TileKind::Alpha
-            }
-        }
-    }
 }
 
 pub enum ExternalSurfaceDependency {
@@ -157,16 +104,11 @@ pub enum ExternalSurfaceDependency {
 /// For now, we support only YUV images as compositor surfaces, but in future
 /// this will also support RGBA images.
 pub struct ExternalSurfaceDescriptor {
-    // Rectangle of this surface in owning picture's coordinate space
     pub local_rect: PictureRect,
-    // Rectangle of this surface in the compositor local space
-    // TODO(gw): Switch this to CompositorSurfaceRect (CompositorSurfacePixel) in compositor trait.
-    pub surface_rect: DeviceRect,
-    // Rectangle of this surface in true device pixels
+    pub world_rect: WorldRect,
     pub device_rect: DeviceRect,
     pub local_clip_rect: PictureRect,
     pub clip_rect: DeviceRect,
-    pub transform: CompositorSurfaceTransform,
     pub image_rendering: ImageRendering,
     pub z_id: ZBufferId,
     pub dependency: ExternalSurfaceDependency,
@@ -184,6 +126,7 @@ pub struct ExternalSurfaceDescriptor {
 #[derive(Debug, Copy, Clone)]
 pub struct ExternalPlaneDescriptor {
     pub texture: TextureSource,
+    pub texture_layer: i32,
     pub uv_rect: TexelRect,
 }
 
@@ -191,6 +134,7 @@ impl ExternalPlaneDescriptor {
     fn invalid() -> Self {
         ExternalPlaneDescriptor {
             texture: TextureSource::Invalid,
+            texture_layer: 0,
             uv_rect: TexelRect::invalid(),
         }
     }
@@ -198,13 +142,9 @@ impl ExternalPlaneDescriptor {
 
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
-#[derive(Debug, Copy, Clone, PartialEq)]
+#[derive(Debug, Copy, Clone)]
 pub struct ResolvedExternalSurfaceIndex(pub usize);
 
-impl ResolvedExternalSurfaceIndex {
-    pub const INVALID: ResolvedExternalSurfaceIndex = ResolvedExternalSurfaceIndex(usize::MAX);
-}
-
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub enum ResolvedExternalSurfaceColorData {
@@ -246,13 +186,10 @@ pub enum CompositorConfig {
         /// then the operating system supports a form of 'partial present' where
         /// only dirty regions of the framebuffer need to be updated.
         max_partial_present_rects: usize,
-        /// If this is true, WR must draw the previous frames' dirty regions when
+        /// If this is true, WR would draw the previous frame's dirty region when
         /// doing a partial present. This is used for EGL which requires the front
         /// buffer to always be fully consistent.
         draw_previous_partial_present_regions: bool,
-        /// A client provided interface to a compositor handling partial present.
-        /// Required if webrender must query the backbuffer's age.
-        partial_present: Option<Box<dyn PartialPresentCompositor>>,
     },
     /// Use a native OS compositor to draw tiles. This requires clients to implement
     /// the Compositor trait, but can be significantly more power efficient on operating
@@ -278,18 +215,6 @@ impl CompositorConfig {
             }
         }
     }
-
-    pub fn partial_present(&mut self) -> Option<&mut Box<dyn PartialPresentCompositor>> {
-        match self {
-            CompositorConfig::Native { .. } => {
-                None
-            }
-            CompositorConfig::Draw { ref mut partial_present, .. } => {
-                partial_present.as_mut()
-            }
-        }
-    }
-
 }
 
 impl Default for CompositorConfig {
@@ -298,7 +223,6 @@ impl Default for CompositorConfig {
         CompositorConfig::Draw {
             max_partial_present_rects: 0,
             draw_previous_partial_present_regions: false,
-            partial_present: None,
         }
     }
 }
@@ -321,8 +245,8 @@ pub enum CompositorKind {
     Native {
         /// Maximum dirty rects per compositor surface.
         max_update_rects: usize,
-        /// The capabilities of the underlying platform.
-        capabilities: CompositorCapabilities,
+        /// The virtual surface size used by underlying platform.
+        virtual_surface_size: i32,
     },
 }
 
@@ -340,28 +264,17 @@ impl CompositorKind {
     pub fn get_virtual_surface_size(&self) -> i32 {
         match self {
             CompositorKind::Draw { .. } => 0,
-            CompositorKind::Native { capabilities, .. } => capabilities.virtual_surface_size,
-        }
-    }
-
-    // We currently only support transforms for Native compositors,
-    // bug 1655639 is filed for adding support to Draw.
-    pub fn supports_transforms(&self) -> bool {
-        match self {
-            CompositorKind::Draw { .. } => false,
-            CompositorKind::Native { .. } => true,
+            CompositorKind::Native { virtual_surface_size, .. } => *virtual_surface_size,
         }
     }
+}
 
-    pub fn should_redraw_on_invalidation(&self) -> bool {
-        match self {
-            CompositorKind::Draw { max_partial_present_rects, .. } => {
-                // When partial present is enabled, we need to force redraw.
-                *max_partial_present_rects > 0
-            }
-            CompositorKind::Native { capabilities, .. } => capabilities.redraw_on_invalidation,
-        }
-    }
+/// Information about an opaque surface used to occlude tiles.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct Occluder {
+    z_id: ZBufferId,
+    device_rect: DeviceIntRect,
 }
 
 /// The backing surface kind for a tile. Same as `TileSurface`, minus
@@ -403,14 +316,13 @@ pub struct CompositeTileDescriptor {
 #[derive(PartialEq, Clone)]
 pub struct CompositeSurfaceDescriptor {
     pub surface_id: Option<NativeSurfaceId>,
+    pub offset: DevicePoint,
     pub clip_rect: DeviceRect,
-    pub transform: CompositorSurfaceTransform,
     // A list of image keys and generations that this compositor surface
     // depends on. This avoids composites being skipped when the only
     // thing that has changed is the generation of an compositor surface
     // image dependency.
     pub image_dependencies: [ImageDependency; 3],
-    pub image_rendering: ImageRendering,
     // List of the surface information for each tile added to this virtual surface
     pub tile_descriptors: Vec<CompositeTileDescriptor>,
 }
@@ -433,48 +345,6 @@ impl CompositeDescriptor {
     }
 }
 
-pub struct CompositeStatePreallocator {
-    tiles: Preallocator,
-    external_surfaces: Preallocator,
-    occluders: Preallocator,
-    occluders_events: Preallocator,
-    occluders_active: Preallocator,
-    descriptor_surfaces: Preallocator,
-}
-
-impl CompositeStatePreallocator {
-    pub fn record(&mut self, state: &CompositeState) {
-        self.tiles.record_vec(&state.tiles);
-        self.external_surfaces.record_vec(&state.external_surfaces);
-        self.occluders.record_vec(&state.occluders.occluders);
-        self.occluders_events.record_vec(&state.occluders.events);
-        self.occluders_active.record_vec(&state.occluders.active);
-        self.descriptor_surfaces.record_vec(&state.descriptor.surfaces);
-    }
-
-    pub fn preallocate(&self, state: &mut CompositeState) {
-        self.tiles.preallocate_vec(&mut state.tiles);
-        self.external_surfaces.preallocate_vec(&mut state.external_surfaces);
-        self.occluders.preallocate_vec(&mut state.occluders.occluders);
-        self.occluders_events.preallocate_vec(&mut state.occluders.events);
-        self.occluders_active.preallocate_vec(&mut state.occluders.active);
-        self.descriptor_surfaces.preallocate_vec(&mut state.descriptor.surfaces);
-    }
-}
-
-impl Default for CompositeStatePreallocator {
-    fn default() -> Self {
-        CompositeStatePreallocator {
-            tiles: Preallocator::new(56),
-            external_surfaces: Preallocator::new(0),
-            occluders: Preallocator::new(16),
-            occluders_events: Preallocator::new(32),
-            occluders_active: Preallocator::new(16),
-            descriptor_surfaces: Preallocator::new(8),
-        }
-    }
-}
-
 /// The list of tiles to be drawn this frame
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
@@ -482,10 +352,12 @@ pub struct CompositeState {
     // TODO(gw): Consider splitting up CompositeState into separate struct types depending
     //           on the selected compositing mode. Many of the fields in this state struct
     //           are only applicable to either Native or Draw compositing mode.
-    /// List of tiles to be drawn by the Draw compositor.
-    /// Tiles are accumulated in this vector and sorted from front to back at the end of the
-    /// frame.
-    pub tiles: Vec<CompositeTile>,
+    /// List of opaque tiles to be drawn by the Draw compositor.
+    pub opaque_tiles: Vec<CompositeTile>,
+    /// List of alpha tiles to be drawn by the Draw compositor.
+    pub alpha_tiles: Vec<CompositeTile>,
+    /// List of clear tiles to be drawn by the Draw compositor.
+    pub clear_tiles: Vec<CompositeTile>,
     /// List of primitives that were promoted to be compositor surfaces.
     pub external_surfaces: Vec<ResolvedExternalSurface>,
     /// Used to generate z-id values for tiles in the Draw compositor mode.
@@ -499,14 +371,14 @@ pub struct CompositeState {
     pub dirty_rects_are_valid: bool,
     /// The kind of compositor for picture cache tiles (e.g. drawn by WR, or OS compositor)
     pub compositor_kind: CompositorKind,
+    /// Picture caching may be disabled dynamically, based on debug flags, pinch zoom etc.
+    pub picture_caching_is_enabled: bool,
     /// The overall device pixel scale, used for tile occlusion conversions.
     global_device_pixel_scale: DevicePixelScale,
     /// List of registered occluders
-    pub occluders: Occluders,
+    occluders: Vec<Occluder>,
     /// Description of the surfaces and properties that are being composited.
     pub descriptor: CompositeDescriptor,
-    /// Debugging information about the state of the pictures cached for regression testing.
-    pub picture_cache_debug: PictureCacheDebugInfo,
 }
 
 impl CompositeState {
@@ -514,20 +386,31 @@ impl CompositeState {
     /// during each frame construction and passed to the renderer.
     pub fn new(
         compositor_kind: CompositorKind,
+        mut picture_caching_is_enabled: bool,
         global_device_pixel_scale: DevicePixelScale,
         max_depth_ids: i32,
-        dirty_rects_are_valid: bool,
     ) -> Self {
+        // The native compositor interface requires picture caching to work, so
+        // force it here and warn if it was disabled.
+        if let CompositorKind::Native { .. } = compositor_kind {
+            if !picture_caching_is_enabled {
+                warn!("Picture caching cannot be disabled in native compositor config");
+            }
+            picture_caching_is_enabled = true;
+        }
+
         CompositeState {
-            tiles: Vec::new(),
-            z_generator: ZBufferIdGenerator::new(max_depth_ids),
-            dirty_rects_are_valid,
+            opaque_tiles: Vec::new(),
+            alpha_tiles: Vec::new(),
+            clear_tiles: Vec::new(),
+            z_generator: ZBufferIdGenerator::new(0, max_depth_ids),
+            dirty_rects_are_valid: true,
             compositor_kind,
+            picture_caching_is_enabled,
             global_device_pixel_scale,
-            occluders: Occluders::new(),
+            occluders: Vec::new(),
             descriptor: CompositeDescriptor::empty(),
             external_surfaces: Vec::new(),
-            picture_cache_debug: PictureCacheDebugInfo::new(),
         }
     }
 
@@ -540,7 +423,41 @@ impl CompositeState {
     ) {
         let device_rect = (rect * self.global_device_pixel_scale).round().to_i32();
 
-        self.occluders.push(device_rect, z_id);
+        self.occluders.push(Occluder {
+            device_rect,
+            z_id,
+        });
+    }
+
+    /// Returns true if a tile with the specified rectangle and z_id
+    /// is occluded by an opaque surface in front of it.
+    pub fn is_tile_occluded(
+        &self,
+        z_id: ZBufferId,
+        device_rect: DeviceRect,
+    ) -> bool {
+        // It's often the case that a tile is only occluded by considering multiple
+        // picture caches in front of it (for example, the background tiles are
+        // often occluded by a combination of the content slice + the scrollbar slices).
+
+        // The basic algorithm is:
+        //    For every occluder:
+        //      If this occluder is in front of the tile we are querying:
+        //         Clip the occluder rectangle to the query rectangle.
+        //    Calculate the total non-overlapping area of those clipped occluders.
+        //    If the cumulative area of those occluders is the same as the area of the query tile,
+        //       Then the entire tile must be occluded and can be skipped during rasterization and compositing.
+
+        // Get the reference area we will compare against.
+        let device_rect = device_rect.round().to_i32();
+        let ref_area = device_rect.size.width * device_rect.size.height;
+
+        // Calculate the non-overlapping area of the valid occluders.
+        let cover_area = area_of_occluders(&self.occluders, z_id, &device_rect);
+        debug_assert!(cover_area <= ref_area);
+
+        // Check if the tile area is completely covered
+        ref_area == cover_area
     }
 
     /// Add a picture cache to be composited
@@ -553,318 +470,276 @@ impl CompositeState {
         gpu_cache: &mut GpuCache,
         deferred_resolves: &mut Vec<DeferredResolve>,
     ) {
-        for sub_slice in &tile_cache.sub_slices {
-            let mut visible_opaque_tile_count = 0;
-            let mut visible_alpha_tile_count = 0;
-            let mut opaque_tile_descriptors = Vec::new();
-            let mut alpha_tile_descriptors = Vec::new();
-            let mut surface_device_rect = DeviceRect::zero();
-
-            for tile in sub_slice.tiles.values() {
-                if !tile.is_visible {
-                    // This can occur when a tile is found to be occluded during frame building.
-                    continue;
-                }
+        let mut visible_opaque_tile_count = 0;
+        let mut visible_alpha_tile_count = 0;
+        let mut opaque_tile_descriptors = Vec::new();
+        let mut alpha_tile_descriptors = Vec::new();
+
+        for tile in tile_cache.tiles.values() {
+            if !tile.is_visible {
+                // This can occur when a tile is found to be occluded during frame building.
+                continue;
+            }
 
-                let device_rect = (tile.world_tile_rect * global_device_pixel_scale).round();
-                let surface = tile.surface.as_ref().expect("no tile surface set!");
-
-                // Accumulate this tile into the overall surface bounds. This is used below
-                // to clamp the size of the supplied clip rect to a reasonable value.
-                // NOTE: This clip rect must include the device_valid_rect rather than
-                //       the tile device rect. This ensures that in the case of a picture
-                //       cache slice that is smaller than a single tile, the clip rect in
-                //       the composite descriptor will change if the position of that slice
-                //       is changed. Otherwise, WR may conclude that no composite is needed
-                //       if the tile itself was not invalidated due to changing content.
-                //       See bug #1675414 for more detail.
-                surface_device_rect = surface_device_rect.union(&tile.device_valid_rect);
-
-                let descriptor = CompositeTileDescriptor {
-                    surface_kind: surface.into(),
-                    tile_id: tile.id,
-                };
+            let device_rect = (tile.world_tile_rect * global_device_pixel_scale).round();
+            let surface = tile.surface.as_ref().expect("no tile surface set!");
 
-                let (surface, is_opaque) = match surface {
-                    TileSurface::Color { color } => {
-                        (CompositeTileSurface::Color { color: *color }, true)
-                    }
-                    TileSurface::Clear => {
-                        // Clear tiles are rendered with blend mode pre-multiply-dest-out.
-                        (CompositeTileSurface::Clear, false)
-                    }
-                    TileSurface::Texture { descriptor, .. } => {
-                        let surface = descriptor.resolve(resource_cache, tile_cache.current_tile_size);
-                        (
-                            CompositeTileSurface::Texture { surface },
-                            tile.is_opaque 
-                        )
-                    }
-                };
+            let descriptor = CompositeTileDescriptor {
+                surface_kind: surface.into(),
+                tile_id: tile.id,
+            };
 
-                if is_opaque {
-                    opaque_tile_descriptors.push(descriptor);
-                    visible_opaque_tile_count += 1;
-                } else {
-                    alpha_tile_descriptors.push(descriptor);
-                    visible_alpha_tile_count += 1;
+            let (surface, is_opaque) = match surface {
+                TileSurface::Color { color } => {
+                    (CompositeTileSurface::Color { color: *color }, true)
                 }
+                TileSurface::Clear => {
+                    (CompositeTileSurface::Clear, false)
+                }
+                TileSurface::Texture { descriptor, .. } => {
+                    let surface = descriptor.resolve(resource_cache, tile_cache.current_tile_size);
+                    (
+                        CompositeTileSurface::Texture { surface },
+                        // If a tile has compositor surface intersecting with it, we need to
+                        // respect the tile.is_opaque property even if the overall tile cache
+                        // is opaque. In this case, the tile.is_opaque property is required
+                        // in order to ensure correct draw order with compositor surfaces.
+                        tile.is_opaque || (!tile.has_compositor_surface && tile_cache.is_opaque()),
+                    )
+                }
+            };
 
-                let tile = CompositeTile {
-                    kind: tile_kind(&surface, is_opaque),
-                    surface,
-                    rect: device_rect,
-                    valid_rect: tile.device_valid_rect.translate(-device_rect.origin.to_vector()),
-                    dirty_rect: tile.device_dirty_rect.translate(-device_rect.origin.to_vector()),
-                    clip_rect: device_clip_rect,
-                    transform: None,
-                    z_id: tile.z_id,
-                };
-
-                self.tiles.push(tile);
-            }
-
-            // Sort the tile descriptor lists, since iterating values in the tile_cache.tiles
-            // hashmap doesn't provide any ordering guarantees, but we want to detect the
-            // composite descriptor as equal if the tiles list is the same, regardless of
-            // ordering.
-            opaque_tile_descriptors.sort_by_key(|desc| desc.tile_id);
-            alpha_tile_descriptors.sort_by_key(|desc| desc.tile_id);
-
-            // If the clip rect is too large, it can cause accuracy and correctness problems
-            // for some native compositors (specifically, CoreAnimation in this case). To
-            // work around that, intersect the supplied clip rect with the current bounds
-            // of the native surface, which ensures it is a reasonable size.
-            let surface_clip_rect = device_clip_rect
-                .intersection(&surface_device_rect)
-                .unwrap_or(DeviceRect::zero());
-
-            // Add opaque surface before any compositor surfaces
-            if visible_opaque_tile_count > 0 {
-                self.descriptor.surfaces.push(
-                    CompositeSurfaceDescriptor {
-                        surface_id: sub_slice.native_surface.as_ref().map(|s| s.opaque),
-                        clip_rect: surface_clip_rect,
-                        transform: CompositorSurfaceTransform::translation(
-                            tile_cache.device_position.x,
-                            tile_cache.device_position.y,
-                            0.0,
-                        ),
-                        image_dependencies: [ImageDependency::INVALID; 3],
-                        image_rendering: ImageRendering::CrispEdges,
-                        tile_descriptors: opaque_tile_descriptors,
-                    }
-                );
-            }
-
-            // Add alpha tiles after opaque surfaces
-            if visible_alpha_tile_count > 0 {
-                self.descriptor.surfaces.push(
-                    CompositeSurfaceDescriptor {
-                        surface_id: sub_slice.native_surface.as_ref().map(|s| s.alpha),
-                        clip_rect: surface_clip_rect,
-                        transform: CompositorSurfaceTransform::translation(
-                            tile_cache.device_position.x,
-                            tile_cache.device_position.y,
-                            0.0,
-                        ),
-                        image_dependencies: [ImageDependency::INVALID; 3],
-                        image_rendering: ImageRendering::CrispEdges,
-                        tile_descriptors: alpha_tile_descriptors,
-                    }
-                );
+            if is_opaque {
+                opaque_tile_descriptors.push(descriptor);
+                visible_opaque_tile_count += 1;
+            } else {
+                alpha_tile_descriptors.push(descriptor);
+                visible_alpha_tile_count += 1;
             }
 
-            // For each compositor surface that was promoted, build the
-            // information required for the compositor to draw it
-            for compositor_surface in &sub_slice.compositor_surfaces {
-                let external_surface = &compositor_surface.descriptor;
-
-                let clip_rect = external_surface
-                    .clip_rect
-                    .intersection(&device_clip_rect)
-                    .unwrap_or_else(DeviceRect::zero);
-
-                let required_plane_count =
-                    match external_surface.dependency {
-                        ExternalSurfaceDependency::Yuv { format, .. } => {
-                            format.get_plane_num()
-                        },
-                        ExternalSurfaceDependency::Rgb { .. } => {
-                            1
-                        }
-                    };
+            let tile = CompositeTile {
+                surface,
+                rect: device_rect,
+                valid_rect: tile.device_valid_rect.translate(-device_rect.origin.to_vector()),
+                dirty_rect: tile.device_dirty_rect.translate(-device_rect.origin.to_vector()),
+                clip_rect: device_clip_rect,
+                z_id: tile.z_id,
+            };
 
-                let mut image_dependencies = [ImageDependency::INVALID; 3];
+            self.push_tile(tile, is_opaque);
+        }
 
-                for i in 0 .. required_plane_count {
-                    let dependency = match external_surface.dependency {
-                        ExternalSurfaceDependency::Yuv { image_dependencies, .. } => {
-                            image_dependencies[i]
-                        },
-                        ExternalSurfaceDependency::Rgb { image_dependency, .. } => {
-                            image_dependency
-                        }
-                    };
-                    image_dependencies[i] = dependency;
+        // Sort the tile descriptor lists, since iterating values in the tile_cache.tiles
+        // hashmap doesn't provide any ordering guarantees, but we want to detect the
+        // composite descriptor as equal if the tiles list is the same, regardless of
+        // ordering.
+        opaque_tile_descriptors.sort_by_key(|desc| desc.tile_id);
+        alpha_tile_descriptors.sort_by_key(|desc| desc.tile_id);
+
+        // Add opaque surface before any compositor surfaces
+        if visible_opaque_tile_count > 0 {
+            self.descriptor.surfaces.push(
+                CompositeSurfaceDescriptor {
+                    surface_id: tile_cache.native_surface.as_ref().map(|s| s.opaque),
+                    offset: tile_cache.device_position,
+                    clip_rect: device_clip_rect,
+                    image_dependencies: [ImageDependency::INVALID; 3],
+                    tile_descriptors: opaque_tile_descriptors,
                 }
+            );
+        }
 
-                // Get a new z_id for each compositor surface, to ensure correct ordering
-                // when drawing with the simple (Draw) compositor, and to schedule compositing
-                // of any required updates into the surfaces.
-                let needs_external_surface_update = match self.compositor_kind {
-                    CompositorKind::Draw { .. } => true,
-                    _ => external_surface.update_params.is_some(),
+        // For each compositor surface that was promoted, build the
+        // information required for the compositor to draw it
+        for external_surface in &tile_cache.external_surfaces {
+
+            let mut planes = [
+                ExternalPlaneDescriptor::invalid(),
+                ExternalPlaneDescriptor::invalid(),
+                ExternalPlaneDescriptor::invalid(),
+            ];
+
+            // Step through the image keys, and build a plane descriptor for each
+            let required_plane_count =
+                match external_surface.dependency {
+                    ExternalSurfaceDependency::Yuv { format, .. } => {
+                        format.get_plane_num()
+                    },
+                    ExternalSurfaceDependency::Rgb { .. } => {
+                        1
+                    }
                 };
-                let external_surface_index = if needs_external_surface_update {
-                    let external_surface_index = self.compute_external_surface_dependencies(
-                        &external_surface,
-                        &image_dependencies,
-                        required_plane_count,
-                        resource_cache,
-                        gpu_cache,
-                        deferred_resolves,
-                    );
-                    if external_surface_index == ResolvedExternalSurfaceIndex::INVALID {
-                        continue;
+            let mut valid_plane_count = 0;
+
+            let mut image_dependencies = [ImageDependency::INVALID; 3];
+
+            for i in 0 .. required_plane_count {
+                let dependency = match external_surface.dependency {
+                    ExternalSurfaceDependency::Yuv { image_dependencies, .. } => {
+                        image_dependencies[i]
+                    },
+                    ExternalSurfaceDependency::Rgb { image_dependency, .. } => {
+                        image_dependency
                     }
-                    external_surface_index
-                } else {
-                    ResolvedExternalSurfaceIndex::INVALID
                 };
+                image_dependencies[i] = dependency;
 
-                let surface = CompositeTileSurface::ExternalSurface { external_surface_index };
-                let tile = CompositeTile {
-                    kind: tile_kind(&surface, compositor_surface.is_opaque),
-                    surface,
-                    rect: external_surface.surface_rect,
-                    valid_rect: external_surface.surface_rect.translate(-external_surface.surface_rect.origin.to_vector()),
-                    dirty_rect: external_surface.surface_rect.translate(-external_surface.surface_rect.origin.to_vector()),
-                    clip_rect,
-                    transform: Some(external_surface.transform),
-                    z_id: external_surface.z_id,
+                let request = ImageRequest {
+                    key: dependency.key,
+                    rendering: external_surface.image_rendering,
+                    tile: None,
                 };
 
-                // Add a surface descriptor for each compositor surface. For the Draw
-                // compositor, this is used to avoid composites being skipped by adding
-                // a dependency on the compositor surface external image keys / generations.
-                self.descriptor.surfaces.push(
-                    CompositeSurfaceDescriptor {
-                        surface_id: external_surface.native_surface_id,
-                        clip_rect,
-                        transform: external_surface.transform,
-                        image_dependencies: image_dependencies,
-                        image_rendering: external_surface.image_rendering,
-                        tile_descriptors: Vec::new(),
-                    }
+                let cache_item = resolve_image(
+                    request,
+                    resource_cache,
+                    gpu_cache,
+                    deferred_resolves,
                 );
 
-                self.tiles.push(tile);
+                if cache_item.texture_id != TextureSource::Invalid {
+                    valid_plane_count += 1;
+                    let plane = &mut planes[i];
+                    *plane = ExternalPlaneDescriptor {
+                        texture: cache_item.texture_id,
+                        texture_layer: cache_item.texture_layer,
+                        uv_rect: cache_item.uv_rect.into(),
+                    };
+                }
             }
-        }
-    }
 
-    fn compute_external_surface_dependencies(
-        &mut self,
-        external_surface: &ExternalSurfaceDescriptor,
-        image_dependencies: &[ImageDependency; 3],
-        required_plane_count: usize,
-        resource_cache: &ResourceCache,
-        gpu_cache: &mut GpuCache,
-        deferred_resolves: &mut Vec<DeferredResolve>,
-    ) -> ResolvedExternalSurfaceIndex {
-        let mut planes = [
-            ExternalPlaneDescriptor::invalid(),
-            ExternalPlaneDescriptor::invalid(),
-            ExternalPlaneDescriptor::invalid(),
-        ];
-
-        let mut valid_plane_count = 0;
-        for i in 0 .. required_plane_count {
-            let request = ImageRequest {
-                key: image_dependencies[i].key,
-                rendering: external_surface.image_rendering,
-                tile: None,
-            };
+            // Check if there are valid images added for each YUV plane
+            if valid_plane_count < required_plane_count {
+                warn!("Warnings: skip a YUV/RGB compositor surface, found {}/{} valid images",
+                    valid_plane_count,
+                    required_plane_count,
+                );
+                continue;
+            }
 
-            let cache_item = resolve_image(
-                request,
-                resource_cache,
-                gpu_cache,
-                deferred_resolves,
-            );
+            let clip_rect = external_surface
+                .clip_rect
+                .intersection(&device_clip_rect)
+                .unwrap_or_else(DeviceRect::zero);
 
-            if cache_item.texture_id != TextureSource::Invalid {
-                valid_plane_count += 1;
-                let plane = &mut planes[i];
-                *plane = ExternalPlaneDescriptor {
-                    texture: cache_item.texture_id,
-                    uv_rect: cache_item.uv_rect.into(),
-                };
-            }
-        }
+            // Get a new z_id for each compositor surface, to ensure correct ordering
+            // when drawing with the simple (Draw) compositor.
 
-        // Check if there are valid images added for each YUV plane
-        if valid_plane_count < required_plane_count {
-            warn!("Warnings: skip a YUV/RGB compositor surface, found {}/{} valid images",
-                valid_plane_count,
-                required_plane_count,
-            );
-            return ResolvedExternalSurfaceIndex::INVALID;
-        }
+            let surface = CompositeTileSurface::ExternalSurface {
+                external_surface_index: ResolvedExternalSurfaceIndex(self.external_surfaces.len()),
+            };
 
-        let external_surface_index = ResolvedExternalSurfaceIndex(self.external_surfaces.len());
+            // If the external surface descriptor reports that the native surface
+            // needs to be updated, create an update params tuple for the renderer
+            // to use.
+            let update_params = external_surface.update_params.map(|surface_size| {
+                (
+                    external_surface.native_surface_id.expect("bug: no native surface!"),
+                    surface_size
+                )
+            });
+
+            match external_surface.dependency {
+                ExternalSurfaceDependency::Yuv{ color_space, format, rescale, .. } => {
+
+                    let image_buffer_kind = get_buffer_kind(planes[0].texture);
+
+                    self.external_surfaces.push(ResolvedExternalSurface {
+                        color_data: ResolvedExternalSurfaceColorData::Yuv {
+                            image_dependencies,
+                            planes,
+                            color_space,
+                            format,
+                            rescale,
+                        },
+                        image_buffer_kind,
+                        update_params,
+                    });
+                },
+                ExternalSurfaceDependency::Rgb{ flip_y, .. } => {
+
+                    let image_buffer_kind = get_buffer_kind(planes[0].texture);
+
+                    self.external_surfaces.push(ResolvedExternalSurface {
+                        color_data: ResolvedExternalSurfaceColorData::Rgb {
+                            image_dependency: image_dependencies[0],
+                            plane: planes[0],
+                            flip_y,
+                        },
+                        image_buffer_kind,
+                        update_params,
+                    });
+                },
+            }
 
-        // If the external surface descriptor reports that the native surface
-        // needs to be updated, create an update params tuple for the renderer
-        // to use.
-        let update_params = external_surface.update_params.map(|surface_size| {
-            (
-                external_surface.native_surface_id.expect("bug: no native surface!"),
-                surface_size
-            )
-        });
+            let tile = CompositeTile {
+                surface,
+                rect: external_surface.device_rect,
+                valid_rect: external_surface.device_rect.translate(-external_surface.device_rect.origin.to_vector()),
+                dirty_rect: external_surface.device_rect.translate(-external_surface.device_rect.origin.to_vector()),
+                clip_rect,
+                z_id: external_surface.z_id,
+            };
 
-        match external_surface.dependency {
-            ExternalSurfaceDependency::Yuv{ color_space, format, rescale, .. } => {
+            // Add a surface descriptor for each compositor surface. For the Draw
+            // compositor, this is used to avoid composites being skipped by adding
+            // a dependency on the compositor surface external image keys / generations.
+            self.descriptor.surfaces.push(
+                CompositeSurfaceDescriptor {
+                    surface_id: external_surface.native_surface_id,
+                    offset: tile.rect.origin,
+                    clip_rect: tile.clip_rect,
+                    image_dependencies: image_dependencies,
+                    tile_descriptors: Vec::new(),
+                }
+            );
 
-                let image_buffer_kind = planes[0].texture.image_buffer_kind();
+            self.push_tile(tile, true);
+        }
 
-                self.external_surfaces.push(ResolvedExternalSurface {
-                    color_data: ResolvedExternalSurfaceColorData::Yuv {
-                        image_dependencies: *image_dependencies,
-                        planes,
-                        color_space,
-                        format,
-                        rescale,
-                    },
-                    image_buffer_kind,
-                    update_params,
-                });
-            },
-            ExternalSurfaceDependency::Rgb{ flip_y, .. } => {
-
-                let image_buffer_kind = planes[0].texture.image_buffer_kind();
-
-                // Only propagate flip_y if the compositor doesn't support transforms,
-                // since otherwise it'll be handled as part of the transform.
-                self.external_surfaces.push(ResolvedExternalSurface {
-                    color_data: ResolvedExternalSurfaceColorData::Rgb {
-                        image_dependency: image_dependencies[0],
-                        plane: planes[0],
-                        flip_y: flip_y && !self.compositor_kind.supports_transforms(),
-                    },
-                    image_buffer_kind,
-                    update_params,
-                });
-            },
+        // Add alpha / overlay tiles after compositor surfaces
+        if visible_alpha_tile_count > 0 {
+            self.descriptor.surfaces.push(
+                CompositeSurfaceDescriptor {
+                    surface_id: tile_cache.native_surface.as_ref().map(|s| s.alpha),
+                    offset: tile_cache.device_position,
+                    clip_rect: device_clip_rect,
+                    image_dependencies: [ImageDependency::INVALID; 3],
+                    tile_descriptors: alpha_tile_descriptors,
+                }
+            );
         }
-        external_surface_index
     }
 
-    pub fn end_frame(&mut self) {
-        // Sort tiles from front to back.
-        self.tiles.sort_by_key(|tile| tile.z_id.0);
+    /// Add a tile to the appropriate array, depending on tile properties and compositor mode.
+    fn push_tile(
+        &mut self,
+        tile: CompositeTile,
+        is_opaque: bool,
+    ) {
+        match tile.surface {
+            CompositeTileSurface::Color { .. } => {
+                // Color tiles are, by definition, opaque. We might support non-opaque color
+                // tiles if we ever find pages that have a lot of these.
+                self.opaque_tiles.push(tile);
+            }
+            CompositeTileSurface::Clear => {
+                // Clear tiles have a special bucket
+                self.clear_tiles.push(tile);
+            }
+            CompositeTileSurface::Texture { .. } => {
+                // Texture surfaces get bucketed by opaque/alpha, for z-rejection
+                // on the Draw compositor mode.
+                if is_opaque {
+                    self.opaque_tiles.push(tile);
+                } else {
+                    self.alpha_tiles.push(tile);
+                }
+            }
+            CompositeTileSurface::ExternalSurface { .. } => {
+                self.opaque_tiles.push(tile);
+            }
+        }
     }
 }
 
@@ -921,38 +796,10 @@ pub struct NativeSurfaceInfo {
 }
 
 #[repr(C)]
-#[derive(Debug, Copy, Clone, PartialEq)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct CompositorCapabilities {
-    /// The virtual surface size used by the underlying platform.
     pub virtual_surface_size: i32,
-    /// Whether the compositor requires redrawing on invalidation.
-    pub redraw_on_invalidation: bool,
-}
-
-impl Default for CompositorCapabilities {
-    fn default() -> Self {
-        // The default set of compositor capabilities for a given platform.
-        // These should only be modified if a compositor diverges specifically
-        // from the default behavior so that compositors don't have to track
-        // which changes to this structure unless necessary.
-        CompositorCapabilities {
-            virtual_surface_size: 0,
-            redraw_on_invalidation: false,
-        }
-    }
 }
 
-/// The transform type to apply to Compositor surfaces.
-// TODO: Should transform from CompositorSurfacePixel instead, but this requires a cleanup of the
-// Compositor API to use CompositorSurface-space geometry instead of Device-space where necessary
-// to avoid a bunch of noisy cast_unit calls and make it actually type-safe. May be difficult due
-// to pervasive use of Device-space nomenclature inside WR.
-// pub struct CompositorSurfacePixel;
-// pub type CompositorSurfaceTransform = Transform3D<f32, CompositorSurfacePixel, DevicePixel>;
-pub type CompositorSurfaceTransform = Transform3D<f32, DevicePixel, DevicePixel>;
-
 /// Defines an interface to a native (OS level) compositor. If supplied
 /// by the client application, then picture cache slices will be
 /// composited by the OS compositor, rather than drawn via WR batches.
@@ -966,16 +813,6 @@ pub trait Compositor {
         is_opaque: bool,
     );
 
-    /// Create a new OS compositor surface that can be used with an
-    /// existing ExternalImageId, instead of being drawn to by WebRender.
-    /// Surfaces created by this can only be used with attach_external_image,
-    /// and not create_tile/destroy_tile/bind/unbind.
-    fn create_external_surface(
-        &mut self,
-        id: NativeSurfaceId,
-        is_opaque: bool,
-    );
-
     /// Destroy the surface with the specified id. WR may call this
     /// at any time the surface is no longer required (including during
     /// renderer deinit). It's the responsibility of the embedder
@@ -999,26 +836,6 @@ pub trait Compositor {
         id: NativeTileId,
     );
 
-    /// Attaches an ExternalImageId to an OS compositor surface created
-    /// by create_external_surface, and uses that as the contents of
-    /// the surface. It is expected that a single surface will have
-    /// many different images attached (like one for each video frame).
-    fn attach_external_image(
-        &mut self,
-        id: NativeSurfaceId,
-        external_image: ExternalImageId
-    );
-
-    /// Mark a tile as invalid before any surfaces are queued for
-    /// composition and before it is updated with bind. This is useful
-    /// for early composition, allowing for dependency tracking of which
-    /// surfaces can be composited early while others are still updating.
-    fn invalidate_tile(
-        &mut self,
-        _id: NativeTileId,
-        _valid_rect: DeviceIntRect
-    ) {}
-
     /// Bind this surface such that WR can issue OpenGL commands
     /// that will target the surface. Returns an (x, y) offset
     /// where WR should draw into the surface. This can be set
@@ -1055,26 +872,14 @@ pub trait Compositor {
     //           We might need to change the interface to maintain a visual
     //           tree that can be mutated?
     // TODO(gw): We might need to add a concept of a hierachy in future.
+    // TODO(gw): In future, expand to support a more complete transform matrix.
     fn add_surface(
         &mut self,
         id: NativeSurfaceId,
-        transform: CompositorSurfaceTransform,
+        position: DeviceIntPoint,
         clip_rect: DeviceIntRect,
-        image_rendering: ImageRendering,
     );
 
-    /// Notify the compositor that all tiles have been invalidated and all
-    /// native surfaces have been added, thus it is safe to start compositing
-    /// valid surfaces. The dirty rects array allows native compositors that
-    /// support partial present to skip copying unchanged areas.
-    /// Optionally provides a set of rectangles for the areas known to be
-    /// opaque, this is currently only computed if the caller is SwCompositor.
-    fn start_compositing(
-        &mut self,
-        _dirty_rects: &[DeviceIntRect],
-        _opaque_rects: &[DeviceIntRect],
-    ) {}
-
     /// Commit any changes in the compositor tree for this frame. WR calls
     /// this once when all surface and visual updates are complete, to signal
     /// that the OS composite transaction should be applied.
@@ -1092,244 +897,111 @@ pub trait Compositor {
     fn get_capabilities(&self) -> CompositorCapabilities;
 }
 
-/// Information about the underlying data buffer of a mapped tile.
-#[repr(C)]
-#[derive(Copy, Clone)]
-pub struct MappedTileInfo {
-    pub data: *mut c_void,
-    pub stride: i32,
-}
-
-/// Descriptor for a locked surface that will be directly composited by SWGL.
-#[repr(C)]
-pub struct SWGLCompositeSurfaceInfo {
-    /// The number of YUV planes in the surface. 0 indicates non-YUV BGRA.
-    /// 1 is interleaved YUV. 2 is NV12. 3 is planar YUV.
-    pub yuv_planes: u32,
-    /// Textures for planes of the surface, or 0 if not applicable.
-    pub textures: [u32; 3],
-    /// Color space of surface if using a YUV format.
-    pub color_space: YuvColorSpace,
-    /// Color depth of surface if using a YUV format.
-    pub color_depth: ColorDepth,
-    /// The actual source surface size before transformation.
-    pub size: DeviceIntSize,
-}
-
-/// A Compositor variant that supports mapping tiles into CPU memory.
-pub trait MappableCompositor: Compositor {
-    /// Map a tile's underlying buffer so it can be used as the backing for
-    /// a SWGL framebuffer. This is intended to be a replacement for 'bind'
-    /// in any compositors that intend to directly interoperate with SWGL
-    /// while supporting some form of native layers.
-    fn map_tile(
-        &mut self,
-        id: NativeTileId,
-        dirty_rect: DeviceIntRect,
-        valid_rect: DeviceIntRect,
-    ) -> Option<MappedTileInfo>;
-
-    /// Unmap a tile that was was previously mapped via map_tile to signal
-    /// that SWGL is done rendering to the buffer.
-    fn unmap_tile(&mut self);
-
-    fn lock_composite_surface(
-        &mut self,
-        ctx: *mut c_void,
-        external_image_id: ExternalImageId,
-        composite_info: *mut SWGLCompositeSurfaceInfo,
-    ) -> bool;
-    fn unlock_composite_surface(&mut self, ctx: *mut c_void, external_image_id: ExternalImageId);
-}
-
-/// Defines an interface to a non-native (application-level) Compositor which handles
-/// partial present. This is required if webrender must query the backbuffer's age.
-/// TODO: Use the Compositor trait for native and non-native compositors, and integrate
-/// this functionality there.
-pub trait PartialPresentCompositor {
-    /// Allows webrender to specify the total region that will be rendered to this frame,
-    /// ie the frame's dirty region and some previous frames' dirty regions, if applicable
-    /// (calculated using the buffer age). Must be called before anything has been rendered
-    /// to the main framebuffer.
-    fn set_buffer_damage_region(&mut self, rects: &[DeviceIntRect]);
-}
-
-/// Information about an opaque surface used to occlude tiles.
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-struct Occluder {
+/// Return the total area covered by a set of occluders, accounting for
+/// overlapping areas between those rectangles.
+fn area_of_occluders(
+    occluders: &[Occluder],
     z_id: ZBufferId,
-    device_rect: DeviceIntRect,
-}
-
-// Whether this event is the start or end of a rectangle
-#[derive(Debug)]
-enum OcclusionEventKind {
-    Begin,
-    End,
-}
+    clip_rect: &DeviceIntRect,
+) -> i32 {
+    // This implementation is based on the article https://leetcode.com/articles/rectangle-area-ii/.
+    // This is not a particularly efficient implementation (it skips building segment trees), however
+    // we typically use this where the length of the rectangles array is < 10, so simplicity is more important.
+
+    let mut area = 0;
+
+    // Whether this event is the start or end of a rectangle
+    #[derive(Debug)]
+    enum EventKind {
+        Begin,
+        End,
+    }
 
-// A list of events on the y-axis, with the rectangle range that it affects on the x-axis
-#[derive(Debug)]
-struct OcclusionEvent {
-    y: i32,
-    x_range: ops::Range<i32>,
-    kind: OcclusionEventKind,
-}
+    // A list of events on the y-axis, with the rectangle range that it affects on the x-axis
+    #[derive(Debug)]
+    struct Event {
+        y: i32,
+        x_range: ops::Range<i32>,
+        kind: EventKind,
+    }
 
-impl OcclusionEvent {
-    fn new(y: i32, kind: OcclusionEventKind, x0: i32, x1: i32) -> Self {
-        OcclusionEvent {
-            y,
-            x_range: ops::Range {
-                start: x0,
-                end: x1,
-            },
-            kind,
+    impl Event {
+        fn new(y: i32, kind: EventKind, x0: i32, x1: i32) -> Self {
+            Event {
+                y,
+                x_range: ops::Range {
+                    start: x0,
+                    end: x1,
+                },
+                kind,
+            }
         }
     }
-}
-
-/// List of registered occluders.
-///
-/// Also store a couple of vectors for reuse.
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct Occluders {
-    occluders: Vec<Occluder>,
-
-    // The two vectors below are kept to avoid unnecessary reallocations in area().
-
-    #[cfg_attr(feature = "serde", serde(skip))]
-    events: Vec<OcclusionEvent>,
-
-    #[cfg_attr(feature = "serde", serde(skip))]
-    active: Vec<ops::Range<i32>>,
-}
 
-impl Occluders {
-    fn new() -> Self {
-        Occluders {
-            occluders: Vec::new(),
-            events: Vec::new(),
-            active: Vec::new(),
+    // Step through each rectangle and build the y-axis event list
+    let mut events = Vec::with_capacity(occluders.len() * 2);
+    for occluder in occluders {
+        // Only consider occluders in front of this rect
+        if occluder.z_id.0 > z_id.0 {
+            // Clip the source rect to the rectangle we care about, since we only
+            // want to record area for the tile we are comparing to.
+            if let Some(rect) = occluder.device_rect.intersection(clip_rect) {
+                let x0 = rect.origin.x;
+                let x1 = x0 + rect.size.width;
+                events.push(Event::new(rect.origin.y, EventKind::Begin, x0, x1));
+                events.push(Event::new(rect.origin.y + rect.size.height, EventKind::End, x0, x1));
+            }
         }
     }
 
-    fn push(&mut self, device_rect: DeviceIntRect, z_id: ZBufferId) {
-        self.occluders.push(Occluder { device_rect, z_id });
+    // If we didn't end up with any valid events, the area must be 0
+    if events.is_empty() {
+        return 0;
     }
 
-    /// Returns true if a tile with the specified rectangle and z_id
-    /// is occluded by an opaque surface in front of it.
-    pub fn is_tile_occluded(
-        &mut self,
-        z_id: ZBufferId,
-        device_rect: DeviceRect,
-    ) -> bool {
-        // It's often the case that a tile is only occluded by considering multiple
-        // picture caches in front of it (for example, the background tiles are
-        // often occluded by a combination of the content slice + the scrollbar slices).
-
-        // The basic algorithm is:
-        //    For every occluder:
-        //      If this occluder is in front of the tile we are querying:
-        //         Clip the occluder rectangle to the query rectangle.
-        //    Calculate the total non-overlapping area of those clipped occluders.
-        //    If the cumulative area of those occluders is the same as the area of the query tile,
-        //       Then the entire tile must be occluded and can be skipped during rasterization and compositing.
-
-        // Get the reference area we will compare against.
-        let device_rect = device_rect.round().to_i32();
-        let ref_area = device_rect.size.width * device_rect.size.height;
-
-        // Calculate the non-overlapping area of the valid occluders.
-        let cover_area = self.area(z_id, &device_rect);
-        debug_assert!(cover_area <= ref_area);
-
-        // Check if the tile area is completely covered
-        ref_area == cover_area
-    }
-
-    /// Return the total area covered by a set of occluders, accounting for
-    /// overlapping areas between those rectangles.
-    fn area(
-        &mut self,
-        z_id: ZBufferId,
-        clip_rect: &DeviceIntRect,
-    ) -> i32 {
-        // This implementation is based on the article https://leetcode.com/articles/rectangle-area-ii/.
-        // This is not a particularly efficient implementation (it skips building segment trees), however
-        // we typically use this where the length of the rectangles array is < 10, so simplicity is more important.
-
-        self.events.clear();
-        self.active.clear();
-
-        let mut area = 0;
-
-        // Step through each rectangle and build the y-axis event list
-        for occluder in &self.occluders {
-            // Only consider occluders in front of this rect
-            if occluder.z_id.0 < z_id.0 {
-                // Clip the source rect to the rectangle we care about, since we only
-                // want to record area for the tile we are comparing to.
-                if let Some(rect) = occluder.device_rect.intersection(clip_rect) {
-                    let x0 = rect.origin.x;
-                    let x1 = x0 + rect.size.width;
-                    self.events.push(OcclusionEvent::new(rect.origin.y, OcclusionEventKind::Begin, x0, x1));
-                    self.events.push(OcclusionEvent::new(rect.origin.y + rect.size.height, OcclusionEventKind::End, x0, x1));
-                }
+    // Sort the events by y-value
+    events.sort_by_key(|e| e.y);
+    let mut active: Vec<ops::Range<i32>> = Vec::new();
+    let mut cur_y = events[0].y;
+
+    // Step through each y interval
+    for event in &events {
+        // This is the dimension of the y-axis we are accumulating areas for
+        let dy = event.y - cur_y;
+
+        // If we have active events covering x-ranges in this y-interval, process them
+        if dy != 0 && !active.is_empty() {
+            assert!(dy > 0);
+
+            // Step through the x-ranges, ordered by x0 of each event
+            active.sort_by_key(|i| i.start);
+            let mut query = 0;
+            let mut cur = active[0].start;
+
+            // Accumulate the non-overlapping x-interval that contributes to area for this y-interval.
+            for interval in &active {
+                cur = interval.start.max(cur);
+                query += (interval.end - cur).max(0);
+                cur = cur.max(interval.end);
             }
-        }
 
-        // If we didn't end up with any valid events, the area must be 0
-        if self.events.is_empty() {
-            return 0;
+            // Accumulate total area for this y-interval
+            area += query * dy;
         }
 
-        // Sort the events by y-value
-        self.events.sort_by_key(|e| e.y);
-        let mut cur_y = self.events[0].y;
-
-        // Step through each y interval
-        for event in &self.events {
-            // This is the dimension of the y-axis we are accumulating areas for
-            let dy = event.y - cur_y;
-
-            // If we have active events covering x-ranges in this y-interval, process them
-            if dy != 0 && !self.active.is_empty() {
-                assert!(dy > 0);
-
-                // Step through the x-ranges, ordered by x0 of each event
-                self.active.sort_by_key(|i| i.start);
-                let mut query = 0;
-                let mut cur = self.active[0].start;
-
-                // Accumulate the non-overlapping x-interval that contributes to area for this y-interval.
-                for interval in &self.active {
-                    cur = interval.start.max(cur);
-                    query += (interval.end - cur).max(0);
-                    cur = cur.max(interval.end);
-                }
-
-                // Accumulate total area for this y-interval
-                area += query * dy;
+        // Update the active events list
+        match event.kind {
+            EventKind::Begin => {
+                active.push(event.x_range.clone());
             }
-
-            // Update the active events list
-            match event.kind {
-                OcclusionEventKind::Begin => {
-                    self.active.push(event.x_range.clone());
-                }
-                OcclusionEventKind::End => {
-                    let index = self.active.iter().position(|i| *i == event.x_range).unwrap();
-                    self.active.remove(index);
-                }
+            EventKind::End => {
+                let index = active.iter().position(|i| *i == event.x_range).unwrap();
+                active.remove(index);
             }
-
-            cur_y = event.y;
         }
 
-        area
+        cur_y = event.y;
     }
+
+    area
 }
diff --git a/third_party/webrender/webrender/src/compositor/mod.rs b/third_party/webrender/webrender/src/compositor/mod.rs
deleted file mode 100644
index e517f227193..00000000000
--- a/third_party/webrender/webrender/src/compositor/mod.rs
+++ /dev/null
@@ -1,6 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#[cfg(feature = "sw_compositor")]
-pub mod sw_compositor;
diff --git a/third_party/webrender/webrender/src/compositor/sw_compositor.rs b/third_party/webrender/webrender/src/compositor/sw_compositor.rs
deleted file mode 100644
index 03cd2b20b66..00000000000
--- a/third_party/webrender/webrender/src/compositor/sw_compositor.rs
+++ /dev/null
@@ -1,1484 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-use gleam::{gl, gl::Gl};
-use std::cell::{Cell, UnsafeCell};
-use std::collections::{hash_map::HashMap, VecDeque};
-use std::ops::{Deref, DerefMut, Range};
-use std::ptr;
-use std::sync::atomic::{AtomicBool, AtomicI8, AtomicIsize, AtomicPtr, AtomicU32, AtomicU8, Ordering};
-use std::sync::{Arc, Condvar, Mutex, MutexGuard};
-use std::thread;
-use crate::{
-    api::units::*, api::ColorDepth, api::ExternalImageId, api::ImageRendering, api::YuvColorSpace, Compositor,
-    CompositorCapabilities, CompositorSurfaceTransform, NativeSurfaceId, NativeSurfaceInfo, NativeTileId,
-    host_utils::{thread_started, thread_stopped}, MappableCompositor, SWGLCompositeSurfaceInfo,
-};
-
-pub struct SwTile {
-    x: i32,
-    y: i32,
-    fbo_id: u32,
-    color_id: u32,
-    valid_rect: DeviceIntRect,
-    /// Composition of tiles must be ordered such that any tiles that may overlap
-    /// an invalidated tile in an earlier surface only get drawn after that tile
-    /// is actually updated. We store a count of the number of overlapping invalid
-    /// here, that gets decremented when the invalid tiles are finally updated so
-    /// that we know when it is finally safe to draw. Must use a Cell as we might
-    /// be analyzing multiple tiles and surfaces
-    overlaps: Cell<u32>,
-    /// Whether the tile's contents has been invalidated
-    invalid: Cell<bool>,
-    /// Graph node for job dependencies of this tile
-    graph_node: SwCompositeGraphNodeRef,
-}
-
-impl SwTile {
-    fn new(x: i32, y: i32) -> Self {
-        SwTile {
-            x,
-            y,
-            fbo_id: 0,
-            color_id: 0,
-            valid_rect: DeviceIntRect::zero(),
-            overlaps: Cell::new(0),
-            invalid: Cell::new(false),
-            graph_node: SwCompositeGraphNode::new(),
-        }
-    }
-
-    /// The offset of the tile in the local space of the surface before any
-    /// transform is applied.
-    fn origin(&self, surface: &SwSurface) -> DeviceIntPoint {
-        DeviceIntPoint::new(self.x * surface.tile_size.width, self.y * surface.tile_size.height)
-    }
-
-    /// The offset valid rect positioned within the local space of the surface
-    /// before any transform is applied.
-    fn local_bounds(&self, surface: &SwSurface) -> DeviceIntRect {
-        self.valid_rect.translate(self.origin(surface).to_vector())
-    }
-
-    /// Bounds used for determining overlap dependencies. This may either be the
-    /// full tile bounds or the actual valid rect, depending on whether the tile
-    /// is invalidated this frame. These bounds are more conservative as such and
-    /// may differ from the precise bounds used to actually composite the tile.
-    fn overlap_rect(
-        &self,
-        surface: &SwSurface,
-        transform: &CompositorSurfaceTransform,
-        clip_rect: &DeviceIntRect,
-    ) -> Option<DeviceIntRect> {
-        let bounds = self.local_bounds(surface);
-        let device_rect = transform.outer_transformed_rect(&bounds.to_f32())?.round_out().to_i32();
-        device_rect.intersection(clip_rect)
-    }
-
-    /// Determine if the tile's bounds may overlap the dependency rect if it were
-    /// to be composited at the given position.
-    fn may_overlap(
-        &self,
-        surface: &SwSurface,
-        transform: &CompositorSurfaceTransform,
-        clip_rect: &DeviceIntRect,
-        dep_rect: &DeviceIntRect,
-    ) -> bool {
-        self.overlap_rect(surface, transform, clip_rect)
-            .map_or(false, |r| r.intersects(dep_rect))
-    }
-
-    /// Get valid source and destination rectangles for composition of the tile
-    /// within a surface, bounded by the clipping rectangle. May return None if
-    /// it falls outside of the clip rect.
-    fn composite_rects(
-        &self,
-        surface: &SwSurface,
-        transform: &CompositorSurfaceTransform,
-        clip_rect: &DeviceIntRect,
-    ) -> Option<(DeviceIntRect, DeviceIntRect, bool)> {
-        // Offset the valid rect to the appropriate surface origin.
-        let valid = self.local_bounds(surface);
-        // The destination rect is the valid rect transformed and then clipped.
-        let dest_rect = transform.outer_transformed_rect(&valid.to_f32())?.round_out().to_i32();
-        if !dest_rect.intersects(clip_rect) {
-            return None;
-        }
-        // To get a valid source rect, we need to inverse transform the clipped destination rect to find out the effect
-        // of the clip rect in source-space. After this, we subtract off the source-space valid rect origin to get
-        // a source rect that is now relative to the surface origin rather than absolute.
-        let inv_transform = transform.inverse()?;
-        let src_rect = inv_transform
-            .outer_transformed_rect(&dest_rect.to_f32())?
-            .round()
-            .to_i32()
-            .translate(-valid.origin.to_vector());
-        Some((src_rect, dest_rect, transform.m22 < 0.0))
-    }
-}
-
-pub struct SwSurface {
-    tile_size: DeviceIntSize,
-    is_opaque: bool,
-    tiles: Vec<SwTile>,
-    /// An attached external image for this surface.
-    external_image: Option<ExternalImageId>,
-    /// Descriptor for the external image if successfully locked for composite.
-    composite_surface: Option<SWGLCompositeSurfaceInfo>,
-}
-
-impl SwSurface {
-    fn new(tile_size: DeviceIntSize, is_opaque: bool) -> Self {
-        SwSurface {
-            tile_size,
-            is_opaque,
-            tiles: Vec::new(),
-            external_image: None,
-            composite_surface: None,
-        }
-    }
-
-    /// Conserative approximation of local bounds of the surface by combining
-    /// the local bounds of all enclosed tiles.
-    fn local_bounds(&self) -> DeviceIntRect {
-        let mut bounds = DeviceIntRect::zero();
-        for tile in &self.tiles {
-            bounds = bounds.union(&tile.local_bounds(self));
-        }
-        bounds
-    }
-
-    /// The transformed and clipped conservative device-space bounds of the
-    /// surface.
-    fn device_bounds(
-        &self,
-        transform: &CompositorSurfaceTransform,
-        clip_rect: &DeviceIntRect,
-    ) -> Option<DeviceIntRect> {
-        let bounds = self.local_bounds();
-        let device_rect = transform.outer_transformed_rect(&bounds.to_f32())?.round_out().to_i32();
-        device_rect.intersection(clip_rect)
-    }
-}
-
-fn image_rendering_to_gl_filter(filter: ImageRendering) -> gl::GLenum {
-    match filter {
-        ImageRendering::Pixelated => gl::NEAREST,
-        ImageRendering::Auto | ImageRendering::CrispEdges => gl::LINEAR,
-    }
-}
-
-/// A source for a composite job which can either be a single BGRA locked SWGL
-/// resource or a collection of SWGL resources representing a YUV surface.
-#[derive(Clone)]
-enum SwCompositeSource {
-    BGRA(swgl::LockedResource),
-    YUV(
-        swgl::LockedResource,
-        swgl::LockedResource,
-        swgl::LockedResource,
-        YuvColorSpace,
-        ColorDepth,
-    ),
-}
-
-/// Mark ExternalImage's renderer field as safe to send to SwComposite thread.
-unsafe impl Send for SwCompositeSource {}
-
-/// A tile composition job to be processed by the SwComposite thread.
-/// Stores relevant details about the tile and where to composite it.
-#[derive(Clone)]
-struct SwCompositeJob {
-    /// Locked texture that will be unlocked immediately following the job
-    locked_src: SwCompositeSource,
-    /// Locked framebuffer that may be shared among many jobs
-    locked_dst: swgl::LockedResource,
-    src_rect: DeviceIntRect,
-    dst_rect: DeviceIntRect,
-    clipped_dst: DeviceIntRect,
-    opaque: bool,
-    flip_y: bool,
-    filter: ImageRendering,
-    /// The total number of bands for this job
-    num_bands: u8,
-}
-
-impl SwCompositeJob {
-    /// Process a composite job
-    fn process(&self, band_index: i32) {
-        // Bands are allocated in reverse order, but we want to process them in increasing order.
-        let num_bands = self.num_bands as i32;
-        let band_index = num_bands - 1 - band_index;
-        // Calculate the Y extents for the job's band, starting at the current index and spanning to
-        // the following index.
-        let band_offset = (self.clipped_dst.size.height * band_index) / num_bands;
-        let band_height = (self.clipped_dst.size.height * (band_index + 1)) / num_bands - band_offset;
-        // Create a rect that is the intersection of the band with the clipped dest
-        let band_clip = DeviceIntRect::new(
-            DeviceIntPoint::new(self.clipped_dst.origin.x, self.clipped_dst.origin.y + band_offset),
-            DeviceIntSize::new(self.clipped_dst.size.width, band_height),
-        );
-        match self.locked_src {
-            SwCompositeSource::BGRA(ref resource) => {
-                self.locked_dst.composite(
-                    resource,
-                    self.src_rect.origin.x,
-                    self.src_rect.origin.y,
-                    self.src_rect.size.width,
-                    self.src_rect.size.height,
-                    self.dst_rect.origin.x,
-                    self.dst_rect.origin.y,
-                    self.dst_rect.size.width,
-                    self.dst_rect.size.height,
-                    self.opaque,
-                    self.flip_y,
-                    image_rendering_to_gl_filter(self.filter),
-                    band_clip.origin.x,
-                    band_clip.origin.y,
-                    band_clip.size.width,
-                    band_clip.size.height,
-                );
-            }
-            SwCompositeSource::YUV(ref y, ref u, ref v, color_space, color_depth) => {
-                let swgl_color_space = match color_space {
-                    YuvColorSpace::Rec601 => swgl::YUVColorSpace::Rec601,
-                    YuvColorSpace::Rec709 => swgl::YUVColorSpace::Rec709,
-                    YuvColorSpace::Rec2020 => swgl::YUVColorSpace::Rec2020,
-                    YuvColorSpace::Identity => swgl::YUVColorSpace::Identity,
-                };
-                self.locked_dst.composite_yuv(
-                    y,
-                    u,
-                    v,
-                    swgl_color_space,
-                    color_depth.bit_depth(),
-                    self.src_rect.origin.x,
-                    self.src_rect.origin.y,
-                    self.src_rect.size.width,
-                    self.src_rect.size.height,
-                    self.dst_rect.origin.x,
-                    self.dst_rect.origin.y,
-                    self.dst_rect.size.width,
-                    self.dst_rect.size.height,
-                    self.flip_y,
-                    band_clip.origin.x,
-                    band_clip.origin.y,
-                    band_clip.size.width,
-                    band_clip.size.height,
-                );
-            }
-        }
-    }
-}
-
-/// A reference to a SwCompositeGraph node that can be passed from the render
-/// thread to the SwComposite thread. Consistency of mutation is ensured in
-/// SwCompositeGraphNode via use of Atomic operations that prevent more than
-/// one thread from mutating SwCompositeGraphNode at once. This avoids using
-/// messy and not-thread-safe RefCells or expensive Mutexes inside the graph
-/// node and at least signals to the compiler that potentially unsafe coercions
-/// are occurring.
-#[derive(Clone)]
-struct SwCompositeGraphNodeRef(Arc<UnsafeCell<SwCompositeGraphNode>>);
-
-impl SwCompositeGraphNodeRef {
-    fn new(graph_node: SwCompositeGraphNode) -> Self {
-        SwCompositeGraphNodeRef(Arc::new(UnsafeCell::new(graph_node)))
-    }
-
-    fn get(&self) -> &SwCompositeGraphNode {
-        unsafe { &*self.0.get() }
-    }
-
-    fn get_mut(&self) -> &mut SwCompositeGraphNode {
-        unsafe { &mut *self.0.get() }
-    }
-
-    fn get_ptr_mut(&self) -> *mut SwCompositeGraphNode {
-        self.0.get()
-    }
-}
-
-unsafe impl Send for SwCompositeGraphNodeRef {}
-
-impl Deref for SwCompositeGraphNodeRef {
-    type Target = SwCompositeGraphNode;
-
-    fn deref(&self) -> &Self::Target {
-        self.get()
-    }
-}
-
-impl DerefMut for SwCompositeGraphNodeRef {
-    fn deref_mut(&mut self) -> &mut Self::Target {
-        self.get_mut()
-    }
-}
-
-/// Dependency graph of composite jobs to be completed. Keeps a list of child jobs that are dependent on the completion of this job.
-/// Also keeps track of the number of parent jobs that this job is dependent upon before it can be processed. Once there are no more
-/// in-flight parent jobs that it depends on, the graph node is finally added to the job queue for processing.
-struct SwCompositeGraphNode {
-    /// Job to be queued for this graph node once ready.
-    job: Option<SwCompositeJob>,
-    /// The number of remaining bands associated with this job. When this is
-    /// non-zero and the node has no more parents left, then the node is being
-    /// actively used by the composite thread to process jobs. Once it hits
-    /// zero, the owning thread (which brought it to zero) can safely retire
-    /// the node as no other thread is using it.
-    remaining_bands: AtomicU8,
-    /// The number of bands that are available for processing.
-    available_bands: AtomicI8,
-    /// Count of parents this graph node depends on. While this is non-zero the
-    /// node must ensure that it is only being actively mutated by the render
-    /// thread and otherwise never being accessed by the render thread.
-    parents: AtomicU32,
-    /// Graph nodes of child jobs that are dependent on this job
-    children: Vec<SwCompositeGraphNodeRef>,
-}
-
-unsafe impl Sync for SwCompositeGraphNode {}
-
-impl SwCompositeGraphNode {
-    fn new() -> SwCompositeGraphNodeRef {
-        SwCompositeGraphNodeRef::new(SwCompositeGraphNode {
-            job: None,
-            remaining_bands: AtomicU8::new(0),
-            available_bands: AtomicI8::new(0),
-            parents: AtomicU32::new(0),
-            children: Vec::new(),
-        })
-    }
-
-    /// Reset the node's state for a new frame
-    fn reset(&mut self) {
-        self.job = None;
-        self.remaining_bands.store(0, Ordering::SeqCst);
-        self.available_bands.store(0, Ordering::SeqCst);
-        // Initialize parents to 1 as sentinel dependency for uninitialized job
-        // to avoid queuing unitialized job as unblocked child dependency.
-        self.parents.store(1, Ordering::SeqCst);
-        self.children.clear();
-    }
-
-    /// Add a dependent child node to dependency list. Update its parent count.
-    fn add_child(&mut self, child: SwCompositeGraphNodeRef) {
-        child.parents.fetch_add(1, Ordering::SeqCst);
-        self.children.push(child);
-    }
-
-    /// Install a job for this node. Return whether or not the job has any unprocessed parents
-    /// that would block immediate composition.
-    fn set_job(&mut self, job: SwCompositeJob, num_bands: u8) -> bool {
-        self.job = Some(job);
-        self.remaining_bands.store(num_bands, Ordering::SeqCst);
-        self.available_bands.store(num_bands as _, Ordering::SeqCst);
-        // Subtract off the sentinel parent dependency now that job is initialized and check
-        // whether there are any remaining parent dependencies to see if this job is ready.
-        self.parents.fetch_sub(1, Ordering::SeqCst) <= 1
-    }
-
-    /// Take an available band if possible. Also return whether there are no more bands left
-    /// so the caller may properly clean up after.
-    fn take_band(&self) -> (Option<i32>, bool) {
-        let available = self.available_bands.fetch_sub(1, Ordering::SeqCst);
-        if available > 0 {
-            (Some(available as i32 - 1), available == 1)
-        } else {
-            (None, true)
-        }
-    }
-
-    /// Try to take the job from this node for processing and then process it within the current band.
-    fn process_job(&self, band_index: i32) {
-        if let Some(ref job) = self.job {
-            job.process(band_index);
-        }
-    }
-
-    /// After processing a band, check all child dependencies and remove this parent from
-    /// their dependency counts. If applicable, queue the new child bands for composition.
-    fn unblock_children(&mut self, thread: &SwCompositeThread) {
-        if self.remaining_bands.fetch_sub(1, Ordering::SeqCst) > 1 {
-            return;
-        }
-        // Clear the job to release any locked resources.
-        self.job = None;
-        let mut lock = None;
-        for child in self.children.drain(..) {
-            // Remove the child's parent dependency on this node. If there are no more
-            // parent dependencies left, send the child job bands for composition.
-            if child.parents.fetch_sub(1, Ordering::SeqCst) <= 1 {
-                if lock.is_none() {
-                    lock = Some(thread.lock());
-                }
-                thread.send_job(lock.as_mut().unwrap(), child);
-            }
-        }
-    }
-}
-
-/// The SwComposite thread processes a queue of composite jobs, also signaling
-/// via a condition when all available jobs have been processed, as tracked by
-/// the job count.
-struct SwCompositeThread {
-    /// Queue of available composite jobs
-    jobs: Mutex<SwCompositeJobQueue>,
-    /// Cache of the current job being processed. This maintains a pointer to
-    /// the contents of the SwCompositeGraphNodeRef, which is safe due to the
-    /// fact that SwCompositor maintains a strong reference to the contents
-    /// in an SwTile to keep it alive while this is in use.
-    current_job: AtomicPtr<SwCompositeGraphNode>,
-    /// Count of unprocessed jobs still in the queue
-    job_count: AtomicIsize,
-    /// Condition signaled when either there are jobs available to process or
-    /// there are no more jobs left to process. Otherwise stated, this signals
-    /// when the job queue transitions from an empty to non-empty state or from
-    /// a non-empty to empty state.
-    jobs_available: Condvar,
-    /// Whether all available jobs have been processed.
-    jobs_completed: AtomicBool,
-}
-
-/// The SwCompositeThread struct is shared between the SwComposite thread
-/// and the rendering thread so that both ends can access the job queue.
-unsafe impl Sync for SwCompositeThread {}
-
-/// A FIFO queue of composite jobs to be processed.
-type SwCompositeJobQueue = VecDeque<SwCompositeGraphNodeRef>;
-
-/// Locked access to the composite job queue.
-type SwCompositeThreadLock<'a> = MutexGuard<'a, SwCompositeJobQueue>;
-
-impl SwCompositeThread {
-    /// Create the SwComposite thread. Requires a SWGL context in which
-    /// to do the composition.
-    fn new() -> Arc<SwCompositeThread> {
-        let info = Arc::new(SwCompositeThread {
-            jobs: Mutex::new(SwCompositeJobQueue::new()),
-            current_job: AtomicPtr::new(ptr::null_mut()),
-            job_count: AtomicIsize::new(0),
-            jobs_available: Condvar::new(),
-            jobs_completed: AtomicBool::new(false),
-        });
-        let result = info.clone();
-        let thread_name = "SwComposite";
-        thread::Builder::new()
-            .name(thread_name.into())
-            // The composite thread only calls into SWGL to composite, and we
-            // have potentially many composite threads for different windows,
-            // so using the default stack size is excessive. A reasonably small
-            // stack size should be more than enough for SWGL and reduce memory
-            // overhead.
-            .stack_size(32 * 1024)
-            .spawn(move || {
-                thread_started(thread_name);
-                // Process any available jobs. This will return a non-Ok
-                // result when the job queue is dropped, causing the thread
-                // to eventually exit.
-                while let Some((job, band)) = info.take_job(true) {
-                    info.process_job(job, band);
-                }
-                thread_stopped();
-            })
-            .expect("Failed creating SwComposite thread");
-        result
-    }
-
-    fn deinit(&self) {
-        // Force the job count to be negative to signal the thread needs to exit.
-        self.job_count.store(isize::MIN / 2, Ordering::SeqCst);
-        // Wake up the thread in case it is blocked waiting for new jobs
-        self.jobs_available.notify_all();
-    }
-
-    /// Process a job contained in a dependency graph node received from the job queue.
-    /// Any child dependencies will be unblocked as appropriate after processing. The
-    /// job count will be updated to reflect this.
-    fn process_job(&self, graph_node: &mut SwCompositeGraphNode, band: i32) {
-        // Do the actual processing of the job contained in this node.
-        graph_node.process_job(band);
-        // Unblock any child dependencies now that this job has been processed.
-        graph_node.unblock_children(self);
-        // Decrement the job count.
-        self.job_count.fetch_sub(1, Ordering::SeqCst);
-    }
-
-    /// Queue a tile for composition by adding to the queue and increasing the job count.
-    fn queue_composite(
-        &self,
-        locked_src: SwCompositeSource,
-        locked_dst: swgl::LockedResource,
-        src_rect: DeviceIntRect,
-        dst_rect: DeviceIntRect,
-        clip_rect: DeviceIntRect,
-        opaque: bool,
-        flip_y: bool,
-        filter: ImageRendering,
-        mut graph_node: SwCompositeGraphNodeRef,
-        job_queue: &mut SwCompositeJobQueue,
-    ) {
-        // For jobs that would span a sufficiently large destination rectangle, split
-        // it into multiple horizontal bands so that multiple threads can process them.
-        let clipped_dst = match dst_rect.intersection(&clip_rect) {
-            Some(clipped_dst) => clipped_dst,
-            None => return,
-        };
-
-        let num_bands = if clipped_dst.size.width >= 64 && clipped_dst.size.height >= 64 {
-            (clipped_dst.size.height / 64).min(4) as u8
-        } else {
-            1
-        };
-        let job = SwCompositeJob {
-            locked_src,
-            locked_dst,
-            src_rect,
-            dst_rect,
-            clipped_dst,
-            opaque,
-            flip_y,
-            filter,
-            num_bands,
-        };
-        self.job_count.fetch_add(num_bands as isize, Ordering::SeqCst);
-        if graph_node.set_job(job, num_bands) {
-            self.send_job(job_queue, graph_node);
-        }
-    }
-
-    fn prepare_for_composites(&self) {
-        // Initialize the job count to 1 to prevent spurious signaling of job completion
-        // in the middle of queuing compositing jobs until we're actually waiting for
-        // composition.
-        self.job_count.store(1, Ordering::SeqCst);
-    }
-
-    /// Lock the thread for access to the job queue.
-    fn lock(&self) -> SwCompositeThreadLock {
-        self.jobs.lock().unwrap()
-    }
-
-    /// Send a job to the composite thread by adding it to the job queue.
-    /// Signal that this job has been added in case the queue was empty and the
-    /// SwComposite thread is waiting for jobs.
-    fn send_job(&self, queue: &mut SwCompositeJobQueue, job: SwCompositeGraphNodeRef) {
-        if queue.is_empty() {
-            self.jobs_completed.store(false, Ordering::SeqCst);
-            self.jobs_available.notify_all();
-        }
-        queue.push_back(job);
-    }
-
-    /// Try to get a band of work from the currently cached job when available.
-    /// If there is a job, but it has no available bands left, null out the job
-    /// so that other threads do not bother checking the job.
-    fn try_take_job(&self) -> Option<(&mut SwCompositeGraphNode, i32)> {
-        let current_job_ptr = self.current_job.load(Ordering::SeqCst);
-        if let Some(current_job) = unsafe { current_job_ptr.as_mut() } {
-            let (band, done) = current_job.take_band();
-            if done {
-                let _ = self.current_job.compare_exchange(
-                    current_job_ptr,
-                    ptr::null_mut(),
-                    Ordering::SeqCst,
-                    Ordering::SeqCst,
-                );
-            }
-            if let Some(band) = band {
-                return Some((current_job, band));
-            }
-        }
-        return None;
-    }
-
-    /// Take a job from the queue. Optionally block waiting for jobs to become
-    /// available if this is called from the SwComposite thread.
-    fn take_job(&self, wait: bool) -> Option<(&mut SwCompositeGraphNode, i32)> {
-        // First try checking the cached job outside the scope of the mutex.
-        // For jobs that have multiple bands, this allows us to avoid having
-        // to lock the mutex multiple times to check the job for each band.
-        if let Some((job, band)) = self.try_take_job() {
-            return Some((job, band));
-        }
-        // Lock the job queue while checking for available jobs. The lock
-        // won't be held while the job is processed later outside of this
-        // function so that other threads can pull from the queue meanwhile.
-        let mut jobs = self.lock();
-        loop {
-            // While inside the mutex, check the cached job again to see if it
-            // has been updated.
-            if let Some((job, band)) = self.try_take_job() {
-                return Some((job, band));
-            }
-            // If no cached job was available, try to take a job from the queue
-            // and install it as the current job.
-            if let Some(job) = jobs.pop_front() {
-                self.current_job.store(job.get_ptr_mut(), Ordering::SeqCst);
-                continue;
-            }
-            // Otherwise, the job queue is currently empty. Depending on the
-            // value of the job count we may either wait for jobs to become
-            // available or exit.
-            if wait {
-                self.jobs_completed.store(true, Ordering::SeqCst);
-            }
-            match self.job_count.load(Ordering::SeqCst) {
-                // If we completed all available jobs, signal completion. If
-                // waiting inside the SwCompositeThread, then block waiting for
-                // more jobs to become available in a new frame. Otherwise,
-                // return immediately.
-                0 => {
-                    self.jobs_available.notify_all();
-                    if !wait {
-                        return None;
-                    }
-                }
-                // A negative job count signals to exit immediately.
-                job_count if job_count < 0 => return None,
-                _ => {}
-            }
-            // The SwCompositeThread needs to wait for jobs to become
-            // available to avoid busy waiting on the queue.
-            jobs = self.jobs_available.wait(jobs).unwrap();
-        }
-    }
-
-    /// Wait for all queued composition jobs to be processed.
-    /// Instead of blocking on the SwComposite thread to complete all jobs,
-    /// this may steal some jobs and attempt to process them while waiting.
-    /// This may optionally process jobs synchronously. When normally doing
-    /// asynchronous processing, the graph dependencies are relied upon to
-    /// properly order the jobs, which makes it safe for the render thread
-    /// to steal jobs from the composite thread without violating those
-    /// dependencies. Synchronous processing just disables this job stealing
-    /// so that the composite thread always handles the jobs in the order
-    /// they were queued without having to rely upon possibly unavailable
-    /// graph dependencies.
-    fn wait_for_composites(&self, sync: bool) {
-        // Subtract off the bias to signal we're now waiting on composition and
-        // need to know if jobs are completed.
-        self.job_count.fetch_sub(1, Ordering::SeqCst);
-        // If processing asynchronously, try to steal jobs from the composite
-        // thread if it is busy.
-        if !sync {
-            while let Some((job, band)) = self.take_job(false) {
-                self.process_job(job, band);
-            }
-            // Once there are no more jobs, just fall through to waiting
-            // synchronously for the composite thread to finish processing.
-        }
-        // If processing synchronously, just wait for the composite thread
-        // to complete processing any in-flight jobs, then bail.
-        let mut jobs = self.lock();
-        // If the job count is non-zero here, then there are in-flight jobs.
-        while !self.jobs_completed.load(Ordering::SeqCst) {
-            jobs = self.jobs_available.wait(jobs).unwrap();
-        }
-    }
-
-    /// Check if there is a non-zero job count (including sentinel job) that
-    /// would indicate we are starting to already process jobs in the composite
-    /// thread.
-    fn is_busy_compositing(&self) -> bool {
-        self.job_count.load(Ordering::SeqCst) > 0
-    }
-}
-
-/// Parameters describing how to composite a surface within a frame
-type FrameSurface = (
-    NativeSurfaceId,
-    CompositorSurfaceTransform,
-    DeviceIntRect,
-    ImageRendering,
-);
-
-/// Adapter for RenderCompositors to work with SWGL that shuttles between
-/// WebRender and the RenderCompositr via the Compositor API.
-pub struct SwCompositor {
-    gl: swgl::Context,
-    compositor: Box<dyn MappableCompositor>,
-    use_native_compositor: bool,
-    surfaces: HashMap<NativeSurfaceId, SwSurface>,
-    frame_surfaces: Vec<FrameSurface>,
-    /// Any surface added after we're already compositing (i.e. debug overlay)
-    /// needs to be processed after those frame surfaces. For simplicity we
-    /// store them in a separate queue that gets processed later.
-    late_surfaces: Vec<FrameSurface>,
-    cur_tile: NativeTileId,
-    /// The maximum tile size required for any of the allocated surfaces.
-    max_tile_size: DeviceIntSize,
-    /// Reuse the same depth texture amongst all tiles in all surfaces.
-    /// This depth texture must be big enough to accommodate the largest used
-    /// tile size for any surface. The maximum requested tile size is tracked
-    /// to ensure that this depth texture is at least that big.
-    depth_id: u32,
-    /// Instance of the SwComposite thread, only created if we are not relying
-    /// on a native RenderCompositor.
-    composite_thread: Option<Arc<SwCompositeThread>>,
-    /// SWGL locked resource for sharing framebuffer with SwComposite thread
-    locked_framebuffer: Option<swgl::LockedResource>,
-}
-
-impl SwCompositor {
-    pub fn new(
-        gl: swgl::Context,
-        compositor: Box<dyn MappableCompositor>,
-        use_native_compositor: bool,
-    ) -> Self {
-        let depth_id = gl.gen_textures(1)[0];
-        // Only create the SwComposite thread if we're not using a native render
-        // compositor. Thus, we are compositing into the main software framebuffer,
-        // which benefits from compositing asynchronously while updating tiles.
-        let composite_thread = if !use_native_compositor {
-            Some(SwCompositeThread::new())
-        } else {
-            None
-        };
-        SwCompositor {
-            gl,
-            compositor,
-            use_native_compositor,
-            surfaces: HashMap::new(),
-            frame_surfaces: Vec::new(),
-            late_surfaces: Vec::new(),
-            cur_tile: NativeTileId {
-                surface_id: NativeSurfaceId(0),
-                x: 0,
-                y: 0,
-            },
-            max_tile_size: DeviceIntSize::zero(),
-            depth_id,
-            composite_thread,
-            locked_framebuffer: None,
-        }
-    }
-
-    fn deinit_tile(&self, tile: &SwTile) {
-        self.gl.delete_framebuffers(&[tile.fbo_id]);
-        self.gl.delete_textures(&[tile.color_id]);
-    }
-
-    fn deinit_surface(&self, surface: &SwSurface) {
-        for tile in &surface.tiles {
-            self.deinit_tile(tile);
-        }
-    }
-
-    /// Attempt to occlude any queued surfaces with an opaque occluder rect. If
-    /// an existing surface is occluded, we attempt to restrict its clip rect
-    /// so long as it can remain a single clip rect. Existing frame surfaces
-    /// that are opaque will be fused if possible with the supplied occluder
-    /// rect to further try and restrict any underlying surfaces.
-    fn occlude_surfaces(&mut self) {
-        // Check if inner rect is fully included in outer rect
-        fn includes(outer: &Range<i32>, inner: &Range<i32>) -> bool {
-            outer.start <= inner.start && outer.end >= inner.end
-        }
-
-        // Check if outer range overlaps either the start or end of a range. If
-        // there is overlap, return the portion of the inner range remaining
-        // after the overlap has been removed.
-        fn overlaps(outer: &Range<i32>, inner: &Range<i32>) -> Option<Range<i32>> {
-            if outer.start <= inner.start && outer.end >= inner.start {
-                Some(outer.end..inner.end.max(outer.end))
-            } else if outer.start <= inner.end && outer.end >= inner.end {
-                Some(inner.start..outer.start.max(inner.start))
-            } else {
-                None
-            }
-        }
-
-        fn set_x_range(rect: &mut DeviceIntRect, range: &Range<i32>) {
-            rect.origin.x = range.start;
-            rect.size.width = range.end - range.start;
-        }
-
-        fn set_y_range(rect: &mut DeviceIntRect, range: &Range<i32>) {
-            rect.origin.y = range.start;
-            rect.size.height = range.end - range.start;
-        }
-
-        fn union(base: Range<i32>, extra: Range<i32>) -> Range<i32> {
-            base.start.min(extra.start)..base.end.max(extra.end)
-        }
-
-        // Before we can try to occlude any surfaces, we need to fix their clip rects to tightly
-        // bound the valid region. The clip rect might otherwise enclose an invalid area that
-        // can't fully occlude anything even if the surface is opaque.
-        for &mut (ref id, ref transform, ref mut clip_rect, _) in &mut self.frame_surfaces {
-            if let Some(surface) = self.surfaces.get(id) {
-                // Restrict the clip rect to fall within the valid region of the surface.
-                *clip_rect = surface.device_bounds(transform, clip_rect).unwrap_or_default();
-            }
-        }
-
-        // For each frame surface, treat it as an occluder if it is non-empty and opaque. Look
-        // through the preceding surfaces to see if any can be occluded.
-        for occlude_index in 0..self.frame_surfaces.len() {
-            let (ref occlude_id, _, ref occlude_rect, _) = self.frame_surfaces[occlude_index];
-            match self.surfaces.get(occlude_id) {
-                Some(occluder) if occluder.is_opaque && !occlude_rect.is_empty() => {}
-                _ => continue,
-            }
-
-            // Traverse the queued surfaces for this frame in the reverse order of
-            // how they are composited, or rather, in order of visibility. For each
-            // surface, check if the occluder can restrict the clip rect such that
-            // the clip rect can remain a single rect. If the clip rect overlaps
-            // the occluder on one axis interval while remaining fully included in
-            // the occluder's other axis interval, then we can chop down the edge
-            // of the clip rect on the overlapped axis. Further, if the surface is
-            // opaque and its clip rect exactly matches the occluder rect on one
-            // axis interval while overlapping on the other, fuse it with the
-            // occluder rect before considering any underlying surfaces.
-            let (mut occlude_x, mut occlude_y) = (occlude_rect.x_range(), occlude_rect.y_range());
-            for &mut (ref id, _, ref mut clip_rect, _) in self.frame_surfaces[..occlude_index].iter_mut().rev() {
-                if let Some(surface) = self.surfaces.get(id) {
-                    let (clip_x, clip_y) = (clip_rect.x_range(), clip_rect.y_range());
-                    if includes(&occlude_x, &clip_x) {
-                        if let Some(visible) = overlaps(&occlude_y, &clip_y) {
-                            set_y_range(clip_rect, &visible);
-                            if surface.is_opaque && occlude_x == clip_x {
-                                occlude_y = union(occlude_y, visible);
-                            }
-                        }
-                    } else if includes(&occlude_y, &clip_y) {
-                        if let Some(visible) = overlaps(&occlude_x, &clip_x) {
-                            set_x_range(clip_rect, &visible);
-                            if surface.is_opaque && occlude_y == clip_y {
-                                occlude_x = union(occlude_x, visible);
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    /// Reset tile dependency state for a new frame.
-    fn reset_overlaps(&mut self) {
-        for surface in self.surfaces.values_mut() {
-            for tile in &mut surface.tiles {
-                tile.overlaps.set(0);
-                tile.invalid.set(false);
-                tile.graph_node.reset();
-            }
-        }
-    }
-
-    /// Computes an overlap count for a tile that falls within the given composite
-    /// destination rectangle. This requires checking all surfaces currently queued for
-    /// composition so far in this frame and seeing if they have any invalidated tiles
-    /// whose destination rectangles would also overlap the supplied tile. If so, then the
-    /// increment the overlap count to account for all such dependencies on invalid tiles.
-    /// Tiles with the same overlap count will still be drawn with a stable ordering in
-    /// the order the surfaces were queued, so it is safe to ignore other possible sources
-    /// of composition ordering dependencies, as the later queued tile will still be drawn
-    /// later than the blocking tiles within that stable order. We assume that the tile's
-    /// surface hasn't yet been added to the current frame list of surfaces to composite
-    /// so that we only process potential blockers from surfaces that would come earlier
-    /// in composition.
-    fn init_overlaps(
-        &self,
-        overlap_id: &NativeSurfaceId,
-        overlap_surface: &SwSurface,
-        overlap_tile: &SwTile,
-        overlap_transform: &CompositorSurfaceTransform,
-        overlap_clip_rect: &DeviceIntRect,
-    ) {
-        // Record an extra overlap for an invalid tile to track the tile's dependency
-        // on its own future update.
-        let mut overlaps = if overlap_tile.invalid.get() { 1 } else { 0 };
-
-        let overlap_rect = match overlap_tile.overlap_rect(overlap_surface, overlap_transform, overlap_clip_rect) {
-            Some(overlap_rect) => overlap_rect,
-            None => {
-                overlap_tile.overlaps.set(overlaps);
-                return;
-            }
-        };
-
-        for &(ref id, ref transform, ref clip_rect, _) in &self.frame_surfaces {
-            // We only want to consider surfaces that were added before the current one we're
-            // checking for overlaps. If we find that surface, then we're done.
-            if id == overlap_id {
-                break;
-            }
-            // If the surface's clip rect doesn't overlap the tile's rect,
-            // then there is no need to check any tiles within the surface.
-            if !overlap_rect.intersects(clip_rect) {
-                continue;
-            }
-            if let Some(surface) = self.surfaces.get(id) {
-                for tile in &surface.tiles {
-                    // If there is a deferred tile that might overlap the destination rectangle,
-                    // record the overlap.
-                    if tile.may_overlap(surface, transform, clip_rect, &overlap_rect) {
-                        if tile.overlaps.get() > 0 {
-                            overlaps += 1;
-                        }
-                        // Regardless of whether this tile is deferred, if it has dependency
-                        // overlaps, then record that it is potentially a dependency parent.
-                        tile.graph_node.get_mut().add_child(overlap_tile.graph_node.clone());
-                    }
-                }
-            }
-        }
-        if overlaps > 0 {
-            // Has a dependency on some invalid tiles, so need to defer composition.
-            overlap_tile.overlaps.set(overlaps);
-        }
-    }
-
-    /// Helper function that queues a composite job to the current locked framebuffer
-    fn queue_composite(
-        &self,
-        surface: &SwSurface,
-        transform: &CompositorSurfaceTransform,
-        clip_rect: &DeviceIntRect,
-        filter: ImageRendering,
-        tile: &SwTile,
-        job_queue: &mut SwCompositeJobQueue,
-    ) {
-        if let Some(ref composite_thread) = self.composite_thread {
-            if let Some((src_rect, dst_rect, flip_y)) = tile.composite_rects(surface, transform, clip_rect) {
-                let source = if surface.external_image.is_some() {
-                    // If the surface has an attached external image, lock any textures supplied in the descriptor.
-                    match surface.composite_surface {
-                        Some(ref info) => match info.yuv_planes {
-                            0 => match self.gl.lock_texture(info.textures[0]) {
-                                Some(texture) => SwCompositeSource::BGRA(texture),
-                                None => return,
-                            },
-                            3 => match (
-                                self.gl.lock_texture(info.textures[0]),
-                                self.gl.lock_texture(info.textures[1]),
-                                self.gl.lock_texture(info.textures[2]),
-                            ) {
-                                (Some(y_texture), Some(u_texture), Some(v_texture)) => SwCompositeSource::YUV(
-                                    y_texture,
-                                    u_texture,
-                                    v_texture,
-                                    info.color_space,
-                                    info.color_depth,
-                                ),
-                                _ => return,
-                            },
-                            _ => panic!("unsupported number of YUV planes: {}", info.yuv_planes),
-                        },
-                        None => return,
-                    }
-                } else if let Some(texture) = self.gl.lock_texture(tile.color_id) {
-                    // Lock the texture representing the picture cache tile.
-                    SwCompositeSource::BGRA(texture)
-                } else {
-                    return;
-                };
-                if let Some(ref framebuffer) = self.locked_framebuffer {
-                    composite_thread.queue_composite(
-                        source,
-                        framebuffer.clone(),
-                        src_rect,
-                        dst_rect,
-                        *clip_rect,
-                        surface.is_opaque,
-                        flip_y,
-                        filter,
-                        tile.graph_node.clone(),
-                        job_queue,
-                    );
-                }
-            }
-        }
-    }
-
-    /// Lock a surface with an attached external image for compositing.
-    fn try_lock_composite_surface(&mut self, id: &NativeSurfaceId) {
-        if let Some(surface) = self.surfaces.get_mut(id) {
-            if let Some(external_image) = surface.external_image {
-                // If the surface has an attached external image, attempt to lock the external image
-                // for compositing. Yields a descriptor of textures and data necessary for their
-                // interpretation on success.
-                let mut info = SWGLCompositeSurfaceInfo {
-                    yuv_planes: 0,
-                    textures: [0; 3],
-                    color_space: YuvColorSpace::Identity,
-                    color_depth: ColorDepth::Color8,
-                    size: DeviceIntSize::zero(),
-                };
-                assert!(!surface.tiles.is_empty());
-                let mut tile = &mut surface.tiles[0];
-                if self.compositor.lock_composite_surface(self.gl.into(), external_image, &mut info) {
-                    tile.valid_rect = DeviceIntRect::from_size(info.size);
-                    surface.composite_surface = Some(info);
-                } else {
-                    tile.valid_rect = DeviceIntRect::zero();
-                    surface.composite_surface = None;
-                }
-            }
-        }
-    }
-
-    /// Look for any attached external images that have been locked and then unlock them.
-    fn unlock_composite_surfaces(&mut self) {
-        for &(ref id, _, _, _) in self.frame_surfaces.iter().chain(self.late_surfaces.iter()) {
-            if let Some(surface) = self.surfaces.get_mut(id) {
-                if let Some(external_image) = surface.external_image {
-                    if surface.composite_surface.is_some() {
-                        self.compositor.unlock_composite_surface(self.gl.into(), external_image);
-                        surface.composite_surface = None;
-                    }
-                }
-            }
-        }
-    }
-
-    /// Issue composites for any tiles that are no longer blocked following a tile update.
-    /// We process all surfaces and tiles in the order they were queued.
-    fn flush_composites(&self, tile_id: &NativeTileId, surface: &SwSurface, tile: &SwTile) {
-        let composite_thread = match &self.composite_thread {
-            Some(composite_thread) => composite_thread,
-            None => return,
-        };
-
-        // Look for the tile in the frame list and composite it if it has no dependencies.
-        let mut frame_surfaces = self
-            .frame_surfaces
-            .iter()
-            .skip_while(|&(ref id, _, _, _)| *id != tile_id.surface_id);
-        let (overlap_rect, mut lock) = match frame_surfaces.next() {
-            Some(&(_, ref transform, ref clip_rect, filter)) => {
-                // Remove invalid tile's update dependency.
-                if tile.invalid.get() {
-                    tile.overlaps.set(tile.overlaps.get() - 1);
-                }
-                // If the tile still has overlaps, keep deferring it till later.
-                if tile.overlaps.get() > 0 {
-                    return;
-                }
-                // Otherwise, the tile's dependencies are all resolved, so composite it.
-                let mut lock = composite_thread.lock();
-                self.queue_composite(surface, transform, clip_rect, filter, tile, &mut lock);
-                // Finally, get the tile's overlap rect used for tracking dependencies
-                match tile.overlap_rect(surface, transform, clip_rect) {
-                    Some(overlap_rect) => (overlap_rect, lock),
-                    None => return,
-                }
-            }
-            None => return,
-        };
-
-        // Accumulate rects whose dependencies have been satisfied from this update.
-        // Store the union of all these bounds to quickly reject unaffected tiles.
-        let mut flushed_bounds = overlap_rect;
-        let mut flushed_rects = vec![overlap_rect];
-
-        // Check surfaces following the update in the frame list and see if they would overlap it.
-        for &(ref id, ref transform, ref clip_rect, filter) in frame_surfaces {
-            // If the clip rect doesn't overlap the conservative bounds, we can skip the whole surface.
-            if !flushed_bounds.intersects(clip_rect) {
-                continue;
-            }
-            if let Some(surface) = self.surfaces.get(&id) {
-                // Search through the surface's tiles for any blocked on this update and queue jobs for them.
-                for tile in &surface.tiles {
-                    let mut overlaps = tile.overlaps.get();
-                    // Only check tiles that have existing unresolved dependencies
-                    if overlaps == 0 {
-                        continue;
-                    }
-                    // Get this tile's overlap rect for tracking dependencies
-                    let overlap_rect = match tile.overlap_rect(surface, transform, clip_rect) {
-                        Some(overlap_rect) => overlap_rect,
-                        None => continue,
-                    };
-                    // Do a quick check to see if the tile overlaps the conservative bounds.
-                    if !overlap_rect.intersects(&flushed_bounds) {
-                        continue;
-                    }
-                    // Decrement the overlap count if this tile is dependent on any flushed rects.
-                    for flushed_rect in &flushed_rects {
-                        if overlap_rect.intersects(flushed_rect) {
-                            overlaps -= 1;
-                        }
-                    }
-                    if overlaps != tile.overlaps.get() {
-                        // If the overlap count changed, this tile had a dependency on some flush rects.
-                        // If the count hit zero, it is ready to composite.
-                        tile.overlaps.set(overlaps);
-                        if overlaps == 0 {
-                            self.queue_composite(surface, transform, clip_rect, filter, tile, &mut lock);
-                            // Record that the tile got flushed to update any downwind dependencies.
-                            flushed_bounds = flushed_bounds.union(&overlap_rect);
-                            flushed_rects.push(overlap_rect);
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
-
-impl Compositor for SwCompositor {
-    fn create_surface(
-        &mut self,
-        id: NativeSurfaceId,
-        virtual_offset: DeviceIntPoint,
-        tile_size: DeviceIntSize,
-        is_opaque: bool,
-    ) {
-        if self.use_native_compositor {
-            self.compositor.create_surface(id, virtual_offset, tile_size, is_opaque);
-        }
-        self.max_tile_size = DeviceIntSize::new(
-            self.max_tile_size.width.max(tile_size.width),
-            self.max_tile_size.height.max(tile_size.height),
-        );
-        self.surfaces.insert(id, SwSurface::new(tile_size, is_opaque));
-    }
-
-    fn create_external_surface(&mut self, id: NativeSurfaceId, is_opaque: bool) {
-        if self.use_native_compositor {
-            self.compositor.create_external_surface(id, is_opaque);
-        }
-        self.surfaces
-            .insert(id, SwSurface::new(DeviceIntSize::zero(), is_opaque));
-    }
-
-    fn destroy_surface(&mut self, id: NativeSurfaceId) {
-        if let Some(surface) = self.surfaces.remove(&id) {
-            self.deinit_surface(&surface);
-        }
-        if self.use_native_compositor {
-            self.compositor.destroy_surface(id);
-        }
-    }
-
-    fn deinit(&mut self) {
-        if let Some(ref composite_thread) = self.composite_thread {
-            composite_thread.deinit();
-        }
-
-        for surface in self.surfaces.values() {
-            self.deinit_surface(surface);
-        }
-
-        self.gl.delete_textures(&[self.depth_id]);
-
-        if self.use_native_compositor {
-            self.compositor.deinit();
-        }
-    }
-
-    fn create_tile(&mut self, id: NativeTileId) {
-        if self.use_native_compositor {
-            self.compositor.create_tile(id);
-        }
-        if let Some(surface) = self.surfaces.get_mut(&id.surface_id) {
-            let mut tile = SwTile::new(id.x, id.y);
-            tile.color_id = self.gl.gen_textures(1)[0];
-            tile.fbo_id = self.gl.gen_framebuffers(1)[0];
-            self.gl.bind_framebuffer(gl::DRAW_FRAMEBUFFER, tile.fbo_id);
-            self.gl.framebuffer_texture_2d(
-                gl::DRAW_FRAMEBUFFER,
-                gl::COLOR_ATTACHMENT0,
-                gl::TEXTURE_2D,
-                tile.color_id,
-                0,
-            );
-            self.gl.framebuffer_texture_2d(
-                gl::DRAW_FRAMEBUFFER,
-                gl::DEPTH_ATTACHMENT,
-                gl::TEXTURE_2D,
-                self.depth_id,
-                0,
-            );
-            self.gl.bind_framebuffer(gl::DRAW_FRAMEBUFFER, 0);
-
-            surface.tiles.push(tile);
-        }
-    }
-
-    fn destroy_tile(&mut self, id: NativeTileId) {
-        if let Some(surface) = self.surfaces.get_mut(&id.surface_id) {
-            if let Some(idx) = surface.tiles.iter().position(|t| t.x == id.x && t.y == id.y) {
-                let tile = surface.tiles.remove(idx);
-                self.deinit_tile(&tile);
-            }
-        }
-        if self.use_native_compositor {
-            self.compositor.destroy_tile(id);
-        }
-    }
-
-    fn attach_external_image(&mut self, id: NativeSurfaceId, external_image: ExternalImageId) {
-        if self.use_native_compositor {
-            self.compositor.attach_external_image(id, external_image);
-        }
-        if let Some(surface) = self.surfaces.get_mut(&id) {
-            // Surfaces with attached external images have a single tile at the origin encompassing
-            // the entire surface.
-            assert!(surface.tile_size.is_empty());
-            surface.external_image = Some(external_image);
-            if surface.tiles.is_empty() {
-                surface.tiles.push(SwTile::new(0, 0));
-            }
-        }
-    }
-
-    fn invalidate_tile(&mut self, id: NativeTileId, valid_rect: DeviceIntRect) {
-        if self.use_native_compositor {
-            self.compositor.invalidate_tile(id, valid_rect);
-        }
-        if let Some(surface) = self.surfaces.get_mut(&id.surface_id) {
-            if let Some(tile) = surface.tiles.iter_mut().find(|t| t.x == id.x && t.y == id.y) {
-                tile.invalid.set(true);
-                tile.valid_rect = valid_rect;
-            }
-        }
-    }
-
-    fn bind(&mut self, id: NativeTileId, dirty_rect: DeviceIntRect, valid_rect: DeviceIntRect) -> NativeSurfaceInfo {
-        let mut surface_info = NativeSurfaceInfo {
-            origin: DeviceIntPoint::zero(),
-            fbo_id: 0,
-        };
-
-        self.cur_tile = id;
-
-        if let Some(surface) = self.surfaces.get_mut(&id.surface_id) {
-            if let Some(tile) = surface.tiles.iter_mut().find(|t| t.x == id.x && t.y == id.y) {
-                assert_eq!(tile.valid_rect, valid_rect);
-                if valid_rect.is_empty() {
-                    return surface_info;
-                }
-
-                let mut stride = 0;
-                let mut buf = ptr::null_mut();
-                if self.use_native_compositor {
-                    if let Some(tile_info) = self.compositor.map_tile(id, dirty_rect, valid_rect) {
-                        stride = tile_info.stride;
-                        buf = tile_info.data;
-                    }
-                }
-                self.gl.set_texture_buffer(
-                    tile.color_id,
-                    gl::RGBA8,
-                    valid_rect.size.width,
-                    valid_rect.size.height,
-                    stride,
-                    buf,
-                    surface.tile_size.width,
-                    surface.tile_size.height,
-                );
-                // Reallocate the shared depth buffer to fit the valid rect, but within
-                // a buffer sized to actually fit at least the maximum possible tile size.
-                // The maximum tile size is supplied to avoid reallocation by ensuring the
-                // allocated buffer is actually big enough to accommodate the largest tile
-                // size requested by any used surface, even though supplied valid rect may
-                // actually be much smaller than this. This will only force a texture
-                // reallocation inside SWGL if the maximum tile size has grown since the
-                // last time it was supplied, instead simply reusing the buffer if the max
-                // tile size is not bigger than what was previously allocated.
-                self.gl.set_texture_buffer(
-                    self.depth_id,
-                    gl::DEPTH_COMPONENT,
-                    valid_rect.size.width,
-                    valid_rect.size.height,
-                    0,
-                    ptr::null_mut(),
-                    self.max_tile_size.width,
-                    self.max_tile_size.height,
-                );
-                surface_info.fbo_id = tile.fbo_id;
-                surface_info.origin -= valid_rect.origin.to_vector();
-            }
-        }
-
-        surface_info
-    }
-
-    fn unbind(&mut self) {
-        let id = self.cur_tile;
-        if let Some(surface) = self.surfaces.get(&id.surface_id) {
-            if let Some(tile) = surface.tiles.iter().find(|t| t.x == id.x && t.y == id.y) {
-                if tile.valid_rect.is_empty() {
-                    // If we didn't actually render anything, then just queue any
-                    // dependencies.
-                    self.flush_composites(&id, surface, tile);
-                    return;
-                }
-
-                // Force any delayed clears to be resolved.
-                self.gl.resolve_framebuffer(tile.fbo_id);
-
-                if self.use_native_compositor {
-                    self.compositor.unmap_tile();
-                } else {
-                    // If we're not relying on a native compositor, then composite
-                    // any tiles that are dependent on this tile being updated but
-                    // are otherwise ready to composite.
-                    self.flush_composites(&id, surface, tile);
-                }
-            }
-        }
-    }
-
-    fn begin_frame(&mut self) {
-        if self.use_native_compositor {
-            self.compositor.begin_frame();
-        }
-    }
-
-    fn add_surface(
-        &mut self,
-        id: NativeSurfaceId,
-        transform: CompositorSurfaceTransform,
-        clip_rect: DeviceIntRect,
-        filter: ImageRendering,
-    ) {
-        if self.use_native_compositor {
-            self.compositor.add_surface(id, transform, clip_rect, filter);
-        }
-
-        if self.composite_thread.is_some() {
-            // If the surface has an attached external image, try to lock that now.
-            self.try_lock_composite_surface(&id);
-
-            // If we're already busy compositing, then add to the queue of late
-            // surfaces instead of trying to sort into the main frame queue.
-            // These late surfaces will not have any overlap tracking done for
-            // them and must be processed synchronously at the end of the frame.
-            if self.composite_thread.as_ref().unwrap().is_busy_compositing() {
-                self.late_surfaces.push((id, transform, clip_rect, filter));
-                return;
-            }
-        }
-
-        self.frame_surfaces.push((id, transform, clip_rect, filter));
-    }
-
-    /// Now that all the dependency graph nodes have been built, start queuing
-    /// composition jobs. Any surfaces that get added after this point in the
-    /// frame will not have overlap dependencies assigned and so must instead
-    /// be added to the late_surfaces queue to be processed at the end of the
-    /// frame.
-    fn start_compositing(&mut self, dirty_rects: &[DeviceIntRect], _opaque_rects: &[DeviceIntRect]) {
-        // Opaque rects are currently only computed here, not by WR itself, so we
-        // ignore the passed parameter and forward our own version onto the native
-        // compositor.
-        let mut opaque_rects: Vec<DeviceIntRect> = Vec::new();
-        for &(ref id, ref transform, ref clip_rect, _filter) in &self.frame_surfaces {
-            if let Some(surface) = self.surfaces.get(id) {
-                if !surface.is_opaque {
-                    continue;
-                }
-
-                for tile in &surface.tiles {
-                    if let Some(rect) = tile.overlap_rect(surface, transform, clip_rect) {
-                        opaque_rects.push(rect);
-                    }
-                }
-            }
-        }
-
-        self.compositor.start_compositing(dirty_rects, &opaque_rects);
-
-        if let Some(dirty_rect) = dirty_rects
-            .iter()
-            .fold(DeviceIntRect::zero(), |acc, dirty_rect| acc.union(dirty_rect))
-            .to_non_empty()
-        {
-            // Factor dirty rect into surface clip rects
-            for &mut (_, _, ref mut clip_rect, _) in &mut self.frame_surfaces {
-                *clip_rect = clip_rect.intersection(&dirty_rect).unwrap_or_default();
-            }
-        }
-
-        self.occlude_surfaces();
-
-        // Discard surfaces that are entirely clipped out
-        self.frame_surfaces
-            .retain(|&(_, _, clip_rect, _)| !clip_rect.is_empty());
-
-        if let Some(ref composite_thread) = self.composite_thread {
-            // Compute overlap dependencies for surfaces.
-            for &(ref id, ref transform, ref clip_rect, _filter) in &self.frame_surfaces {
-                if let Some(surface) = self.surfaces.get(id) {
-                    for tile in &surface.tiles {
-                        self.init_overlaps(id, surface, tile, transform, clip_rect);
-                    }
-                }
-            }
-
-            self.locked_framebuffer = self.gl.lock_framebuffer(0);
-
-            composite_thread.prepare_for_composites();
-
-            // Issue any initial composite jobs for the SwComposite thread.
-            let mut lock = composite_thread.lock();
-            for &(ref id, ref transform, ref clip_rect, filter) in &self.frame_surfaces {
-                if let Some(surface) = self.surfaces.get(id) {
-                    for tile in &surface.tiles {
-                        if tile.overlaps.get() == 0 {
-                            // Not dependent on any tiles, so go ahead and composite now.
-                            self.queue_composite(surface, transform, clip_rect, filter, tile, &mut lock);
-                        }
-                    }
-                }
-            }
-        }
-    }
-
-    fn end_frame(&mut self) {
-        if self.use_native_compositor {
-            self.compositor.end_frame();
-        } else if let Some(ref composite_thread) = self.composite_thread {
-            // Need to wait for the SwComposite thread to finish any queued jobs.
-            composite_thread.wait_for_composites(false);
-
-            if !self.late_surfaces.is_empty() {
-                // All of the main frame surface have been processed by now. But if there
-                // are any late surfaces, we need to kick off a new synchronous composite
-                // phase. These late surfaces don't have any overlap/dependency tracking,
-                // so we just queue them directly and wait synchronously for the composite
-                // thread to process them in order.
-                composite_thread.prepare_for_composites();
-                {
-                    let mut lock = composite_thread.lock();
-                    for &(ref id, ref transform, ref clip_rect, filter) in &self.late_surfaces {
-                        if let Some(surface) = self.surfaces.get(id) {
-                            for tile in &surface.tiles {
-                                self.queue_composite(surface, transform, clip_rect, filter, tile, &mut lock);
-                            }
-                        }
-                    }
-                }
-                composite_thread.wait_for_composites(true);
-            }
-
-            self.locked_framebuffer = None;
-
-            self.unlock_composite_surfaces();
-        }
-
-        self.frame_surfaces.clear();
-        self.late_surfaces.clear();
-
-        self.reset_overlaps();
-    }
-
-    fn enable_native_compositor(&mut self, enable: bool) {
-        // TODO: The SwComposite thread is not properly instantiated if this is
-        // ever actually toggled.
-        assert_eq!(self.use_native_compositor, enable);
-        self.compositor.enable_native_compositor(enable);
-        self.use_native_compositor = enable;
-    }
-
-    fn get_capabilities(&self) -> CompositorCapabilities {
-        self.compositor.get_capabilities()
-    }
-}
diff --git a/third_party/webrender/webrender/src/debug_item.rs b/third_party/webrender/webrender/src/debug_item.rs
deleted file mode 100644
index 04ba632f464..00000000000
--- a/third_party/webrender/webrender/src/debug_item.rs
+++ /dev/null
@@ -1,20 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-use api::{units::*, ColorF};
-
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub enum DebugItem {
-    Text {
-        msg: String,
-        color: ColorF,
-        position: DevicePoint,
-    },
-    Rect {
-        outer_color: ColorF,
-        inner_color: ColorF,
-        rect: DeviceRect,
-    },
-}
diff --git a/third_party/webrender/webrender/src/renderer/debug.rs b/third_party/webrender/webrender/src/debug_render.rs
index 57092720cfe..c43bcd1d832 100644
--- a/third_party/webrender/webrender/src/renderer/debug.rs
+++ b/third_party/webrender/webrender/src/debug_render.rs
@@ -2,7 +2,7 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{ColorU, ImageFormat, ImageBufferKind};
+use api::{ColorU, ColorF, ImageFormat, TextureTarget};
 use api::units::*;
 use crate::debug_font_data;
 use crate::device::{Device, Program, Texture, TextureSlot, VertexDescriptor, ShaderError, VAO};
@@ -11,6 +11,21 @@ use euclid::{Point2D, Rect, Size2D, Transform3D, default};
 use crate::internal_types::Swizzle;
 use std::f32;
 
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum DebugItem {
+    Text {
+        msg: String,
+        color: ColorF,
+        position: DevicePoint,
+    },
+    Rect {
+        outer_color: ColorF,
+        inner_color: ColorF,
+        rect: DeviceRect,
+    },
+}
+
 #[derive(Debug, Copy, Clone)]
 enum DebugSampler {
     Font,
@@ -120,17 +135,18 @@ impl DebugRenderer {
             &DESC_COLOR,
         )?;
 
-        let font_vao = device.create_vao(&DESC_FONT, 1);
-        let line_vao = device.create_vao(&DESC_COLOR, 1);
-        let tri_vao = device.create_vao(&DESC_COLOR, 1);
+        let font_vao = device.create_vao(&DESC_FONT);
+        let line_vao = device.create_vao(&DESC_COLOR);
+        let tri_vao = device.create_vao(&DESC_COLOR);
 
         let font_texture = device.create_texture(
-            ImageBufferKind::Texture2D,
+            TextureTarget::Array,
             ImageFormat::R8,
             debug_font_data::BMP_WIDTH,
             debug_font_data::BMP_HEIGHT,
             TextureFilter::Linear,
             None,
+            1,
         );
         device.upload_texture_immediate(
             &font_texture,
@@ -371,45 +387,3 @@ impl DebugRenderer {
         self.tri_indices.clear();
     }
 }
-
-pub struct LazyInitializedDebugRenderer {
-    debug_renderer: Option<DebugRenderer>,
-    failed: bool,
-}
-
-impl LazyInitializedDebugRenderer {
-    pub fn new() -> Self {
-        Self {
-            debug_renderer: None,
-            failed: false,
-        }
-    }
-
-    pub fn get_mut<'a>(&'a mut self, device: &mut Device) -> Option<&'a mut DebugRenderer> {
-        if self.failed {
-            return None;
-        }
-        if self.debug_renderer.is_none() {
-            match DebugRenderer::new(device) {
-                Ok(renderer) => { self.debug_renderer = Some(renderer); }
-                Err(_) => {
-                    // The shader compilation code already logs errors.
-                    self.failed = true;
-                }
-            }
-        }
-
-        self.debug_renderer.as_mut()
-    }
-
-    /// Returns mut ref to `debug::DebugRenderer` if one already exists, otherwise returns `None`.
-    pub fn try_get_mut<'a>(&'a mut self) -> Option<&'a mut DebugRenderer> {
-        self.debug_renderer.as_mut()
-    }
-
-    pub fn deinit(self, device: &mut Device) {
-        if let Some(debug_renderer) = self.debug_renderer {
-            debug_renderer.deinit(device);
-        }
-    }
-}
diff --git a/third_party/webrender/webrender/src/debug_server.rs b/third_party/webrender/webrender/src/debug_server.rs
new file mode 100644
index 00000000000..c3cd29549ad
--- /dev/null
+++ b/third_party/webrender/webrender/src/debug_server.rs
@@ -0,0 +1,402 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{ApiMsg, DebugCommand, DebugFlags};
+use api::units::DeviceIntSize;
+use crate::print_tree::PrintTreePrinter;
+use crate::renderer;
+use std::sync::mpsc::{channel, Receiver};
+use std::sync::mpsc::Sender;
+use std::thread;
+use ws;
+use base64::encode;
+use image_loader;
+
+// Messages that are sent from the render backend to the renderer
+// debug command queue. These are sent in a separate queue so
+// that none of these types are exposed to the RenderApi interfaces.
+// We can't use select!() as it's not stable...
+enum DebugMsg {
+    AddSender(ws::Sender),
+    RemoveSender(ws::util::Token),
+}
+
+// Represents a connection to a client.
+struct Server {
+    ws: ws::Sender,
+    debug_tx: Sender<DebugMsg>,
+    api_tx: Sender<ApiMsg>,
+    debug_flags: DebugFlags,
+}
+
+impl ws::Handler for Server {
+    fn on_open(&mut self, _: ws::Handshake) -> ws::Result<()> {
+        self.debug_tx
+            .send(DebugMsg::AddSender(self.ws.clone()))
+            .ok();
+
+        Ok(())
+    }
+
+    fn on_close(&mut self, _: ws::CloseCode, _: &str) {
+        self.debug_tx
+            .send(DebugMsg::RemoveSender(self.ws.token()))
+            .ok();
+    }
+
+    fn on_message(&mut self, msg: ws::Message) -> ws::Result<()> {
+        match msg {
+            ws::Message::Text(string) => {
+                // First, check for flag change commands.
+                let mut set_flags = true;
+                match string.as_str() {
+                    "enable_profiler" => self.debug_flags.insert(DebugFlags::PROFILER_DBG),
+                    "disable_profiler" => self.debug_flags.remove(DebugFlags::PROFILER_DBG),
+                    "enable_texture_cache_debug" => self.debug_flags.insert(DebugFlags::TEXTURE_CACHE_DBG),
+                    "disable_texture_cache_debug" => self.debug_flags.remove(DebugFlags::TEXTURE_CACHE_DBG),
+                    "enable_render_target_debug" => self.debug_flags.insert(DebugFlags::RENDER_TARGET_DBG),
+                    "disable_render_target_debug" => self.debug_flags.remove(DebugFlags::RENDER_TARGET_DBG),
+                    "enable_gpu_time_queries" => self.debug_flags.insert(DebugFlags::GPU_TIME_QUERIES),
+                    "disable_gpu_time_queries" => self.debug_flags.remove(DebugFlags::GPU_TIME_QUERIES),
+                    "enable_gpu_sample_queries" => self.debug_flags.insert(DebugFlags::GPU_SAMPLE_QUERIES),
+                    "disable_gpu_sample_queries" => self.debug_flags.remove(DebugFlags::GPU_SAMPLE_QUERIES),
+                    "disable_opaque_pass" => self.debug_flags.insert(DebugFlags::DISABLE_OPAQUE_PASS),
+                    "enable_opaque_pass" => self.debug_flags.remove(DebugFlags::DISABLE_OPAQUE_PASS),
+                    "disable_alpha_pass" => self.debug_flags.insert(DebugFlags::DISABLE_ALPHA_PASS),
+                    "enable_alpha_pass" => self.debug_flags.remove(DebugFlags::DISABLE_ALPHA_PASS),
+                    "disable_clip_masks" => self.debug_flags.insert(DebugFlags::DISABLE_CLIP_MASKS),
+                    "enable_clip_masks" => self.debug_flags.remove(DebugFlags::DISABLE_CLIP_MASKS),
+                    "disable_text_prims" => self.debug_flags.insert(DebugFlags::DISABLE_TEXT_PRIMS),
+                    "enable_text_prims" => self.debug_flags.remove(DebugFlags::DISABLE_TEXT_PRIMS),
+                    "disable_gradient_prims" => self.debug_flags.insert(DebugFlags::DISABLE_GRADIENT_PRIMS),
+                    "enable_gradient_prims" => self.debug_flags.remove(DebugFlags::DISABLE_GRADIENT_PRIMS),
+                    _ => set_flags = false,
+                };
+
+                let cmd = if set_flags {
+                    DebugCommand::SetFlags(self.debug_flags)
+                } else {
+                    match string.as_str() {
+                        "fetch_passes" => DebugCommand::FetchPasses,
+                        "fetch_screenshot" => DebugCommand::FetchScreenshot,
+                        "fetch_documents" => DebugCommand::FetchDocuments,
+                        "fetch_spatial_tree" => DebugCommand::FetchClipScrollTree,
+                        "fetch_render_tasks" => DebugCommand::FetchRenderTasks,
+                        msg => {
+                            error!("unknown msg {}", msg);
+                            return Ok(());
+                        }
+                    }
+                };
+
+                let msg = ApiMsg::DebugCommand(cmd);
+                self.api_tx.send(msg).unwrap();
+            }
+            ws::Message::Binary(..) => {}
+        }
+
+        Ok(())
+    }
+}
+
+// Spawn a thread for a given renderer, and wait for
+// client connections.
+pub struct DebugServerImpl {
+    join_handle: Option<thread::JoinHandle<()>>,
+    broadcaster: ws::Sender,
+    debug_rx: Receiver<DebugMsg>,
+    senders: Vec<ws::Sender>,
+}
+
+impl DebugServerImpl {
+    pub fn new(api_tx: Sender<ApiMsg>) -> DebugServerImpl {
+        let (debug_tx, debug_rx) = channel();
+
+        let socket = ws::Builder::new()
+            .build(move |out| {
+                Server {
+                    ws: out,
+                    debug_tx: debug_tx.clone(),
+                    api_tx: api_tx.clone(),
+                    debug_flags: DebugFlags::empty(),
+                }
+            })
+            .unwrap();
+
+        let broadcaster = socket.broadcaster();
+
+        let join_handle = Some(thread::spawn(move || {
+            let address = "127.0.0.1:3583";
+            debug!("WebRender debug server started: {}", address);
+            if let Err(..) = socket.listen(address) {
+                error!("ERROR: Unable to bind debugger websocket (port may be in use).");
+            }
+        }));
+
+        DebugServerImpl {
+            join_handle,
+            broadcaster,
+            debug_rx,
+            senders: Vec::new(),
+        }
+    }
+}
+
+impl renderer::DebugServer for DebugServerImpl {
+    fn send(&mut self, message: String) {
+        // Add any new connections that have been queued.
+        while let Ok(msg) = self.debug_rx.try_recv() {
+            match msg {
+                DebugMsg::AddSender(sender) => {
+                    self.senders.push(sender);
+                }
+                DebugMsg::RemoveSender(token) => {
+                    self.senders.retain(|sender| sender.token() != token);
+                }
+            }
+        }
+
+        // Broadcast the message to all senders. Keep
+        // track of the ones that failed, so they can
+        // be removed from the active sender list.
+        let mut disconnected_senders = Vec::new();
+
+        for (i, sender) in self.senders.iter().enumerate() {
+            if let Err(..) = sender.send(message.clone()) {
+                disconnected_senders.push(i);
+            }
+        }
+
+        // Remove the broken senders from the list
+        // for next broadcast. Remove in reverse
+        // order so the indices are valid for the
+        // entire loop.
+        for i in disconnected_senders.iter().rev() {
+            self.senders.remove(*i);
+        }
+    }
+}
+
+impl Drop for DebugServerImpl {
+    fn drop(&mut self) {
+        self.broadcaster.shutdown().ok();
+        self.join_handle.take().unwrap().join().ok();
+    }
+}
+
+// A serializable list of debug information about passes
+// that can be sent to the client.
+
+#[derive(Serialize)]
+pub enum BatchKind {
+    Clip,
+    Cache,
+    Opaque,
+    Alpha,
+}
+
+#[derive(Serialize)]
+pub struct PassList {
+    kind: &'static str,
+    passes: Vec<Pass>,
+}
+
+impl PassList {
+    pub fn new() -> PassList {
+        PassList {
+            kind: "passes",
+            passes: Vec::new(),
+        }
+    }
+
+    pub fn add(&mut self, pass: Pass) {
+        self.passes.push(pass);
+    }
+}
+
+#[derive(Serialize)]
+pub struct Pass {
+    pub targets: Vec<Target>,
+}
+
+#[derive(Serialize)]
+pub struct Target {
+    kind: &'static str,
+    batches: Vec<Batch>,
+}
+
+impl Target {
+    pub fn new(kind: &'static str) -> Target {
+        Target {
+            kind,
+            batches: Vec::new(),
+        }
+    }
+
+    pub fn add(&mut self, kind: BatchKind, description: &str, count: usize) {
+        if count > 0 {
+            self.batches.push(Batch {
+                kind,
+                description: description.to_owned(),
+                count,
+            });
+        }
+    }
+}
+
+#[derive(Serialize)]
+struct Batch {
+    kind: BatchKind,
+    description: String,
+    count: usize,
+}
+
+#[derive(Serialize)]
+pub struct TreeNode {
+    description: String,
+    children: Vec<TreeNode>,
+}
+
+impl TreeNode {
+    pub fn new(description: &str) -> TreeNode {
+        TreeNode {
+            description: description.to_owned(),
+            children: Vec::new(),
+        }
+    }
+
+    pub fn add_child(&mut self, child: TreeNode) {
+        self.children.push(child);
+    }
+
+    pub fn add_item(&mut self, description: &str) {
+        self.children.push(TreeNode::new(description));
+    }
+}
+
+#[derive(Serialize)]
+pub struct DocumentList {
+    kind: &'static str,
+    root: TreeNode,
+}
+
+impl DocumentList {
+    pub fn new() -> Self {
+        DocumentList {
+            kind: "documents",
+            root: TreeNode::new("root"),
+        }
+    }
+
+    pub fn add(&mut self, item: TreeNode) {
+        self.root.add_child(item);
+    }
+}
+
+#[derive(Serialize)]
+pub struct Screenshot {
+    kind: &'static str,
+    data: String
+}
+
+impl Screenshot {
+    pub fn new(size: DeviceIntSize, data: Vec<u8>) -> Self {
+        let mut output = Vec::with_capacity((size.width * size.height) as usize);
+        {
+            let encoder = image_loader::png::PNGEncoder::new(&mut output);
+            encoder.encode(
+                &data,
+                size.width as u32,
+                size.height as u32,
+                image_loader::ColorType::Rgba8,
+            ).unwrap();
+        }
+
+        let data = encode(&output);
+        Screenshot {
+            kind: "screenshot",
+            data
+        }
+    }
+}
+
+// A serializable list of debug information about spatial trees
+// that can be sent to the client
+
+#[derive(Serialize)]
+pub struct SpatialTreeList {
+    kind: &'static str,
+    root: TreeNode,
+}
+
+impl SpatialTreeList {
+    pub fn new() -> Self {
+        SpatialTreeList {
+            kind: "spatial_tree",
+            root: TreeNode::new("root"),
+        }
+    }
+
+    pub fn add(&mut self, item: TreeNode) {
+        self.root.add_child(item);
+    }
+}
+
+#[derive(Serialize)]
+pub struct RenderTaskList {
+    kind: &'static str,
+    root: TreeNode,
+}
+
+impl RenderTaskList {
+    pub fn new() -> Self {
+        RenderTaskList {
+            kind: "render_tasks",
+            root: TreeNode::new("root"),
+        }
+    }
+
+    pub fn add(&mut self, item: TreeNode) {
+        self.root.add_child(item);
+    }
+}
+
+// A TreeNode-based PrintTreePrinter to serialize pretty-printed
+// trees as json
+pub struct TreeNodeBuilder {
+    levels: Vec<TreeNode>,
+}
+
+impl TreeNodeBuilder {
+    pub fn new(root: TreeNode) -> TreeNodeBuilder {
+        TreeNodeBuilder { levels: vec![root] }
+    }
+
+    fn current_level_mut(&mut self) -> &mut TreeNode {
+        assert!(!self.levels.is_empty());
+        self.levels.last_mut().unwrap()
+    }
+
+    pub fn build(mut self) -> TreeNode {
+        assert!(self.levels.len() == 1);
+        self.levels.pop().unwrap()
+    }
+}
+
+impl PrintTreePrinter for TreeNodeBuilder {
+    fn new_level(&mut self, title: String) {
+        let level = TreeNode::new(&title);
+        self.levels.push(level);
+    }
+
+    fn end_level(&mut self) {
+        assert!(!self.levels.is_empty());
+        let last_level = self.levels.pop().unwrap();
+        self.current_level_mut().add_child(last_level);
+    }
+
+    fn add_item(&mut self, text: String) {
+        self.current_level_mut().add_item(&text);
+    }
+}
diff --git a/third_party/webrender/webrender/src/device/gl.rs b/third_party/webrender/webrender/src/device/gl.rs
index 5e0c0503782..3eac572081b 100644
--- a/third_party/webrender/webrender/src/device/gl.rs
+++ b/third_party/webrender/webrender/src/device/gl.rs
@@ -3,14 +3,12 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use super::super::shader_source::{OPTIMIZED_SHADERS, UNOPTIMIZED_SHADERS};
-use api::{ColorF, ImageDescriptor, ImageFormat};
-use api::{MixBlendMode, ImageBufferKind, VoidPtrToSizeFn};
-use api::{CrashAnnotator, CrashAnnotation, CrashAnnotatorGuard};
+use api::{ColorF, ImageDescriptor, ImageFormat, MemoryReport};
+use api::{MixBlendMode, TextureTarget, VoidPtrToSizeFn};
 use api::units::*;
 use euclid::default::Transform3D;
 use gleam::gl;
-use crate::render_api::MemoryReport;
-use crate::internal_types::{FastHashMap, RenderTargetInfo, Swizzle, SwizzleSettings};
+use crate::internal_types::{FastHashMap, LayerIndex, RenderTargetInfo, Swizzle, SwizzleSettings};
 use crate::util::round_up_to_multiple;
 use crate::profiler;
 use log::Level;
@@ -37,7 +35,6 @@ use webrender_build::shader::{
     ProgramSourceDigest, ShaderKind, ShaderVersion, build_shader_main_string,
     build_shader_prefix_string, do_build_shader_string, shader_source_from_file,
 };
-use malloc_size_of::MallocSizeOfOps;
 
 /// Sequence number for frames, as tracked by the device layer.
 #[derive(Debug, Copy, Clone, PartialEq, Ord, Eq, PartialOrd)]
@@ -72,7 +69,7 @@ pub enum DepthFunction {
 }
 
 #[repr(u32)]
-#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+#[derive(Copy, Clone, Debug, PartialEq)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub enum TextureFilter {
@@ -150,20 +147,12 @@ fn depth_target_size_in_bytes(dimensions: &DeviceIntSize) -> usize {
     (pixels as usize) * 4
 }
 
-pub fn get_gl_target(target: ImageBufferKind) -> gl::GLuint {
+pub fn get_gl_target(target: TextureTarget) -> gl::GLuint {
     match target {
-        ImageBufferKind::Texture2D => gl::TEXTURE_2D,
-        ImageBufferKind::TextureRect => gl::TEXTURE_RECTANGLE,
-        ImageBufferKind::TextureExternal => gl::TEXTURE_EXTERNAL_OES,
-    }
-}
-
-pub fn from_gl_target(target: gl::GLuint) -> ImageBufferKind {
-    match target {
-        gl::TEXTURE_2D => ImageBufferKind::Texture2D,
-        gl::TEXTURE_RECTANGLE => ImageBufferKind::TextureRect,
-        gl::TEXTURE_EXTERNAL_OES => ImageBufferKind::TextureExternal,
-        _ => panic!("Unexpected target {:?}", target),
+        TextureTarget::Default => gl::TEXTURE_2D,
+        TextureTarget::Array => gl::TEXTURE_2D_ARRAY,
+        TextureTarget::Rect => gl::TEXTURE_RECTANGLE,
+        TextureTarget::External => gl::TEXTURE_EXTERNAL_OES,
     }
 }
 
@@ -309,16 +298,14 @@ impl VertexDescriptor {
         }
     }
 
-    fn bind(&self, gl: &dyn gl::Gl, main: VBOId, instance: VBOId, instance_divisor: u32) {
+    fn bind(&self, gl: &dyn gl::Gl, main: VBOId, instance: VBOId) {
         Self::bind_attributes(self.vertex_attributes, 0, 0, gl, main);
 
         if !self.instance_attributes.is_empty() {
             Self::bind_attributes(
                 self.instance_attributes,
                 self.vertex_attributes.len(),
-                instance_divisor,
-                gl,
-                instance,
+                1, gl, instance,
             );
         }
     }
@@ -386,22 +373,21 @@ impl<T> Drop for VBO<T> {
 pub struct ExternalTexture {
     id: gl::GLuint,
     target: gl::GLuint,
-    #[allow(dead_code)]
-    swizzle: Swizzle,
+    _swizzle: Swizzle,
     uv_rect: TexelRect,
 }
 
 impl ExternalTexture {
     pub fn new(
         id: u32,
-        target: ImageBufferKind,
+        target: TextureTarget,
         swizzle: Swizzle,
         uv_rect: TexelRect,
     ) -> Self {
         ExternalTexture {
             id,
             target: get_gl_target(target),
-            swizzle,
+            _swizzle: swizzle,
             uv_rect,
         }
     }
@@ -433,16 +419,17 @@ bitflags! {
 pub struct Texture {
     id: gl::GLuint,
     target: gl::GLuint,
+    layer_count: i32,
     format: ImageFormat,
     size: DeviceIntSize,
     filter: TextureFilter,
     flags: TextureFlags,
     /// An internally mutable swizzling state that may change between batches.
     active_swizzle: Cell<Swizzle>,
-    /// Framebuffer Object allowing this texture to be rendered to.
-    ///
-    /// Empty if this texture is not used as a render target or if a depth buffer is needed.
-    fbo: Option<FBOId>,
+    /// Framebuffer Objects, one for each layer of the texture, allowing this
+    /// texture to be rendered to. Empty if this texture is not used as a render
+    /// target.
+    fbos: Vec<FBOId>,
     /// Same as the above, but with a depth buffer attached.
     ///
     /// FBOs are cheap to create but expensive to reconfigure (since doing so
@@ -455,12 +442,15 @@ pub struct Texture {
     /// empty if this texture is not used as a render target _or_ if it is, but
     /// the depth buffer has never been requested.
     ///
-    /// Note that we always fill fbo, and then lazily create fbo_with_depth
+    /// Note that we always fill fbos, and then lazily create fbos_with_depth
     /// when needed. We could make both lazy (i.e. render targets would have one
     /// or the other, but not both, unless they were actually used in both
     /// configurations). But that would complicate a lot of logic in this module,
     /// and FBOs are cheap enough to create.
-    fbo_with_depth: Option<FBOId>,
+    fbos_with_depth: Vec<FBOId>,
+    /// If we are unable to blit directly to a texture array then we need
+    /// an intermediate renderbuffer.
+    blit_workaround_buffer: Option<(RBOId, FBOId)>,
     last_frame_used: GpuFrameId,
 }
 
@@ -469,6 +459,10 @@ impl Texture {
         self.size
     }
 
+    pub fn get_layer_count(&self) -> i32 {
+        self.layer_count
+    }
+
     pub fn get_format(&self) -> ImageFormat {
         self.format
     }
@@ -477,12 +471,8 @@ impl Texture {
         self.filter
     }
 
-    pub fn get_target(&self) -> ImageBufferKind {
-        from_gl_target(self.target)
-    }
-
     pub fn supports_depth(&self) -> bool {
-        self.fbo_with_depth.is_some()
+        !self.fbos_with_depth.is_empty()
     }
 
     pub fn last_frame_used(&self) -> GpuFrameId {
@@ -493,10 +483,6 @@ impl Texture {
         self.last_frame_used == frame_id
     }
 
-    pub fn is_render_target(&self) -> bool {
-        self.fbo.is_some()
-    }
-
     /// Returns true if this texture was used within `threshold` frames of
     /// the current frame.
     pub fn used_recently(&self, current_frame_id: GpuFrameId, threshold: usize) -> bool {
@@ -513,21 +499,28 @@ impl Texture {
         &mut self.flags
     }
 
-    /// Returns the number of bytes (generally in GPU memory) that this texture
-    /// consumes.
-    pub fn size_in_bytes(&self) -> usize {
+    /// Returns the number of bytes (generally in GPU memory) that each layer of
+    /// this texture consumes.
+    pub fn layer_size_in_bytes(&self) -> usize {
+        assert!(self.layer_count > 0 || self.size.width + self.size.height == 0);
         let bpp = self.format.bytes_per_pixel() as usize;
         let w = self.size.width as usize;
         let h = self.size.height as usize;
         bpp * w * h
     }
 
+    /// Returns the number of bytes (generally in GPU memory) that this texture
+    /// consumes.
+    pub fn size_in_bytes(&self) -> usize {
+        self.layer_size_in_bytes() * (self.layer_count as usize)
+    }
+
     #[cfg(feature = "replay")]
     pub fn into_external(mut self) -> ExternalTexture {
         let ext = ExternalTexture {
             id: self.id,
             target: self.target,
-            swizzle: Swizzle::default(),
+            _swizzle: Swizzle::default(),
             // TODO(gw): Support custom UV rect for external textures during captures
             uv_rect: TexelRect::new(
                 0.0,
@@ -551,7 +544,6 @@ pub struct Program {
     id: gl::GLuint,
     u_transform: gl::GLint,
     u_mode: gl::GLint,
-    u_texture_size: gl::GLint,
     source_info: ProgramSourceInfo,
     is_initialized: bool,
 }
@@ -590,7 +582,6 @@ pub struct VAO {
     main_vbo_id: VBOId,
     instance_vbo_id: VBOId,
     instance_stride: usize,
-    instance_divisor: u32,
     owns_vertices_and_indices: bool,
 }
 
@@ -603,7 +594,6 @@ impl Drop for VAO {
     }
 }
 
-#[derive(Debug)]
 pub struct PBO {
     id: gl::GLuint,
     reserved_size: usize,
@@ -619,7 +609,7 @@ impl Drop for PBO {
     fn drop(&mut self) {
         debug_assert!(
             thread::panicking() || self.id == 0,
-            "renderer::deinit not called or PBO not returned to pool"
+            "renderer::deinit not called"
         );
     }
 }
@@ -658,7 +648,6 @@ enum ProgramSourceType {
 pub struct ProgramSourceInfo {
     base_filename: &'static str,
     features: Vec<&'static str>,
-    full_name_cstr: Rc<std::ffi::CString>,
     source_type: ProgramSourceType,
     digest: ProgramSourceDigest,
 }
@@ -683,11 +672,11 @@ impl ProgramSourceInfo {
         // Hash the renderer name.
         hasher.write(device.capabilities.renderer_name.as_bytes());
 
-        let full_name = Self::make_full_name(name, features);
+        let full_name = &Self::full_name(name, features);
 
         let optimized_source = if device.use_optimized_shaders {
-            OPTIMIZED_SHADERS.get(&(gl_version, &full_name)).or_else(|| {
-                warn!("Missing optimized shader source for {}", &full_name);
+            OPTIMIZED_SHADERS.get(&(gl_version, full_name)).or_else(|| {
+                warn!("Missing optimized shader source for {}", full_name);
                 None
             })
         } else {
@@ -759,14 +748,13 @@ impl ProgramSourceInfo {
         ProgramSourceInfo {
             base_filename: name,
             features: features.to_vec(),
-            full_name_cstr: Rc::new(std::ffi::CString::new(full_name).unwrap()),
             source_type,
             digest: hasher.into(),
         }
     }
 
     fn compute_source(&self, device: &Device, kind: ShaderKind) -> String {
-        let full_name = self.full_name();
+        let full_name = Self::full_name(self.base_filename, &self.features);
         match self.source_type {
             ProgramSourceType::Optimized(gl_version) => {
                 let shader = OPTIMIZED_SHADERS
@@ -791,17 +779,13 @@ impl ProgramSourceInfo {
         }
     }
 
-    fn make_full_name(base_filename: &'static str, features: &[&'static str]) -> String {
+    fn full_name(base_filename: &'static str, features: &[&'static str]) -> String {
         if features.is_empty() {
             base_filename.to_string()
         } else {
             format!("{}_{}", base_filename, features.join("_"))
         }
     }
-
-    fn full_name(&self) -> String {
-        Self::make_full_name(self.base_filename, &self.features)
-    }
 }
 
 #[cfg_attr(feature = "serialize_program", derive(Deserialize, Serialize))]
@@ -947,10 +931,14 @@ pub struct Capabilities {
     pub supports_multisampling: bool,
     /// Whether the function `glCopyImageSubData` is available.
     pub supports_copy_image_sub_data: bool,
-    /// Whether the RGBAF32 textures can be bound to framebuffers.
-    pub supports_color_buffer_float: bool,
-    /// Whether the device supports persistently mapped buffers, via glBufferStorage.
-    pub supports_buffer_storage: bool,
+    /// Whether we are able to use `glBlitFramebuffers` with the draw fbo
+    /// bound to a non-0th layer of a texture array. This is buggy on
+    /// Adreno devices.
+    pub supports_blit_to_texture_array: bool,
+    /// Whether we can use the pixel local storage functionality that
+    /// is available on some mobile GPUs. This allows fast access to
+    /// the per-pixel tile memory.
+    pub supports_pixel_local_storage: bool,
     /// Whether advanced blend equations are supported.
     pub supports_advanced_blend_equation: bool,
     /// Whether dual-source blending is supported.
@@ -965,28 +953,6 @@ pub struct Capabilities {
     pub supports_nonzero_pbo_offsets: bool,
     /// Whether the driver supports specifying the texture usage up front.
     pub supports_texture_usage: bool,
-    /// Whether offscreen render targets can be partially updated.
-    pub supports_render_target_partial_update: bool,
-    /// Whether we can use SSBOs.
-    pub supports_shader_storage_object: bool,
-    /// Whether to enforce that texture uploads be batched regardless of what
-    /// the pref says.
-    pub requires_batched_texture_uploads: Option<bool>,
-    /// Whether we are able to ue glClear to clear regions of an alpha render target.
-    /// If false, we must use a shader to clear instead.
-    pub supports_alpha_target_clears: bool,
-    /// Whether the driver can reliably upload data to R8 format textures.
-    pub supports_r8_texture_upload: bool,
-    /// Whether clip-masking is supported natively by the GL implementation
-    /// rather than emulated in shaders.
-    pub uses_native_clip_mask: bool,
-    /// Whether anti-aliasing is supported natively by the GL implementation
-    /// rather than emulated in shaders.
-    pub uses_native_antialiasing: bool,
-    /// Whether the extension GL_OES_EGL_image_external_essl3 is supported. If true, external
-    /// textures can be used as normal. If false, external textures can only be rendered with
-    /// certain shaders, and must first be copied in to regular textures for others.
-    pub supports_image_external_essl3: bool,
     /// The name of the renderer, as reported by GL
     pub renderer_name: String,
 }
@@ -1058,9 +1024,8 @@ pub struct Device {
     // device state
     bound_textures: [gl::GLuint; 16],
     bound_program: gl::GLuint,
-    bound_program_name: Rc<std::ffi::CString>,
     bound_vao: gl::GLuint,
-    bound_read_fbo: (FBOId, DeviceIntPoint),
+    bound_read_fbo: FBOId,
     bound_draw_fbo: FBOId,
     program_mode_id: UniformLocation,
     default_read_fbo: FBOId,
@@ -1071,19 +1036,12 @@ pub struct Device {
     depth_available: bool,
 
     upload_method: UploadMethod,
-    use_batched_texture_uploads: bool,
-    /// Whether to use draw calls instead of regular blitting commands.
-    ///
-    /// Note: this currently only applies to the batched texture uploads
-    /// path.
-    use_draw_calls_for_texture_copy: bool,
 
     // HW or API capabilities
     capabilities: Capabilities,
 
     color_formats: TextureFormatPair<ImageFormat>,
     bgra_formats: TextureFormatPair<gl::GLuint>,
-    bgra_pixel_type: gl::GLuint,
     swizzle_settings: SwizzleSettings,
     depth_format: gl::GLuint,
 
@@ -1095,7 +1053,6 @@ pub struct Device {
 
     // debug
     inside_frame: bool,
-    crash_annotator: Option<Box<dyn CrashAnnotator>>,
 
     // resources
     resource_override_path: Option<PathBuf>,
@@ -1104,6 +1061,7 @@ pub struct Device {
     use_optimized_shaders: bool,
 
     max_texture_size: i32,
+    max_texture_layers: u32,
     cached_programs: Option<Rc<ProgramCache>>,
 
     // Frame counter. This is used to map between CPU
@@ -1117,10 +1075,7 @@ pub struct Device {
     /// format, we fall back to glTexImage*.
     texture_storage_usage: TexStorageUsage,
 
-    /// Required stride alignment for pixel transfers. This may be required for
-    /// correctness reasons due to driver bugs, or for performance reasons to
-    /// ensure we remain on the fast-path for transfers.
-    required_pbo_stride: StrideAlignment,
+    optimal_pbo_stride: StrideAlignment,
 
     /// Whether we must ensure the source strings passed to glShaderSource()
     /// are null-terminated, to work around driver bugs.
@@ -1170,8 +1125,12 @@ pub enum DrawTarget {
     Texture {
         /// Size of the texture in pixels
         dimensions: DeviceIntSize,
+        /// The slice within the texture array to draw to
+        layer: LayerIndex,
         /// Whether to draw with the texture's associated depth target
         with_depth: bool,
+        /// Workaround buffers for devices with broken texture array copy implementation
+        blit_workaround_buffer: Option<(RBOId, FBOId)>,
         /// FBO that corresponds to the selected layer / depth mode
         fbo_id: FBOId,
         /// Native GL texture ID
@@ -1212,18 +1171,21 @@ impl DrawTarget {
 
     pub fn from_texture(
         texture: &Texture,
+        layer: usize,
         with_depth: bool,
     ) -> Self {
         let fbo_id = if with_depth {
-            texture.fbo_with_depth.unwrap()
+            texture.fbos_with_depth[layer]
         } else {
-            texture.fbo.unwrap()
+            texture.fbos[layer]
         };
 
         DrawTarget::Texture {
             dimensions: texture.get_dimensions(),
             fbo_id,
             with_depth,
+            layer,
+            blit_workaround_buffer: texture.blit_workaround_buffer,
             id: texture.id,
             target: texture.target,
         }
@@ -1249,31 +1211,28 @@ impl DrawTarget {
                     fb_rect.origin.x += rect.origin.x;
                 }
             }
-            DrawTarget::Texture { .. } | DrawTarget::External { .. } | DrawTarget::NativeSurface { .. } => (),
+            DrawTarget::Texture { .. } | DrawTarget::External { .. } => (),
+            DrawTarget::NativeSurface { .. } => {
+                panic!("bug: is this ever used for native surfaces?");
+            }
         }
         fb_rect
     }
 
-    pub fn surface_origin_is_top_left(&self) -> bool {
-        match *self {
-            DrawTarget::Default { surface_origin_is_top_left, .. } => surface_origin_is_top_left,
-            DrawTarget::Texture { .. } | DrawTarget::External { .. } | DrawTarget::NativeSurface { .. } => true,
-        }
-    }
-
     /// Given a scissor rect, convert it to the right coordinate space
     /// depending on the draw target kind. If no scissor rect was supplied,
     /// returns a scissor rect that encloses the entire render target.
     pub fn build_scissor_rect(
         &self,
         scissor_rect: Option<DeviceIntRect>,
+        content_origin: DeviceIntPoint,
     ) -> FramebufferIntRect {
         let dimensions = self.dimensions();
 
         match scissor_rect {
             Some(scissor_rect) => match *self {
                 DrawTarget::Default { ref rect, .. } => {
-                    self.to_framebuffer_rect(scissor_rect)
+                    self.to_framebuffer_rect(scissor_rect.translate(-content_origin.to_vector()))
                         .intersection(rect)
                         .unwrap_or_else(FramebufferIntRect::zero)
                 }
@@ -1308,33 +1267,15 @@ pub enum ReadTarget {
     External {
         fbo: FBOId,
     },
-    /// An FBO bound to a native (OS compositor) surface
-    NativeSurface {
-        fbo_id: FBOId,
-        offset: DeviceIntPoint,
-    },
 }
 
 impl ReadTarget {
     pub fn from_texture(
         texture: &Texture,
+        layer: usize,
     ) -> Self {
         ReadTarget::Texture {
-            fbo_id: texture.fbo.unwrap(),
-        }
-    }
-
-    fn offset(&self) -> DeviceIntPoint {
-        match *self {
-            ReadTarget::Default |
-            ReadTarget::Texture { .. } |
-            ReadTarget::External { .. } => {
-                DeviceIntPoint::zero()
-            }
-
-            ReadTarget::NativeSurface { offset, .. } => {
-                offset
-            }
+            fbo_id: texture.fbos[layer],
         }
     }
 }
@@ -1342,21 +1283,14 @@ impl ReadTarget {
 impl From<DrawTarget> for ReadTarget {
     fn from(t: DrawTarget) -> Self {
         match t {
-            DrawTarget::Default { .. } => {
-                ReadTarget::Default
-            }
-            DrawTarget::NativeSurface { external_fbo_id, offset, .. } => {
-                ReadTarget::NativeSurface {
-                    fbo_id: FBOId(external_fbo_id),
-                    offset,
-                }
-            }
-            DrawTarget::Texture { fbo_id, .. } => {
-                ReadTarget::Texture { fbo_id }
-            }
-            DrawTarget::External { fbo, .. } => {
-                ReadTarget::External { fbo }
+            DrawTarget::Default { .. } => ReadTarget::Default,
+            DrawTarget::NativeSurface { .. } => {
+                unreachable!("bug: native surfaces cannot be read targets");
             }
+            DrawTarget::Texture { fbo_id, .. } =>
+                ReadTarget::Texture { fbo_id },
+            DrawTarget::External { fbo, .. } =>
+                ReadTarget::External { fbo },
         }
     }
 }
@@ -1364,11 +1298,11 @@ impl From<DrawTarget> for ReadTarget {
 impl Device {
     pub fn new(
         mut gl: Rc<dyn gl::Gl>,
-        crash_annotator: Option<Box<dyn CrashAnnotator>>,
         resource_override_path: Option<PathBuf>,
         use_optimized_shaders: bool,
         upload_method: UploadMethod,
         cached_programs: Option<Rc<ProgramCache>>,
+        allow_pixel_local_storage_support: bool,
         allow_texture_storage_support: bool,
         allow_texture_swizzling: bool,
         dump_shader_source: Option<String>,
@@ -1376,18 +1310,15 @@ impl Device {
         panic_on_gl_error: bool,
     ) -> Device {
         let mut max_texture_size = [0];
+        let mut max_texture_layers = [0];
         unsafe {
             gl.get_integer_v(gl::MAX_TEXTURE_SIZE, &mut max_texture_size);
+            gl.get_integer_v(gl::MAX_ARRAY_TEXTURE_LAYERS, &mut max_texture_layers);
         }
 
-        // We cap the max texture size at 16384. Some hardware report higher
-        // capabilities but get very unstable with very large textures.
-        // Bug 1702494 tracks re-evaluating this cap.
-        let max_texture_size = max_texture_size[0].min(16384);
-
+        let max_texture_size = max_texture_size[0];
+        let max_texture_layers = max_texture_layers[0] as u32;
         let renderer_name = gl.get_string(gl::RENDERER);
-        info!("Renderer: {}", renderer_name);
-        info!("Max texture size: {}", max_texture_size);
 
         let mut extension_count = [0];
         unsafe {
@@ -1467,140 +1398,101 @@ impl Device {
         // So we must use glTexStorage instead. See bug 1591436.
         let is_emulator = renderer_name.starts_with("Android Emulator");
         let avoid_tex_image = is_emulator;
-        let mut gl_version = [0; 2];
-        unsafe {
-            gl.get_integer_v(gl::MAJOR_VERSION, &mut gl_version[0..1]);
-            gl.get_integer_v(gl::MINOR_VERSION, &mut gl_version[1..2]);
-        }
-        info!("GL context {:?} {}.{}", gl.get_type(), gl_version[0], gl_version[1]);
+        let gl_version = gl.get_string(gl::VERSION);
 
-        // We block texture storage on mac because it doesn't support BGRA
-        let supports_texture_storage = allow_texture_storage_support && !cfg!(target_os = "macos") &&
+        let supports_texture_storage = allow_texture_storage_support &&
             match gl.get_type() {
                 gl::GlType::Gl => supports_extension(&extensions, "GL_ARB_texture_storage"),
-                gl::GlType::Gles => true,
+                // ES 3 technically always supports glTexStorage, but only check here for the extension
+                // necessary to interact with BGRA.
+                gl::GlType::Gles => supports_extension(&extensions, "GL_EXT_texture_storage"),
             };
         let supports_texture_swizzle = allow_texture_swizzling &&
             match gl.get_type() {
                 // see https://www.g-truc.net/post-0734.html
-                gl::GlType::Gl => gl_version >= [3, 3] ||
+                gl::GlType::Gl => gl_version.as_str() >= "3.3" ||
                     supports_extension(&extensions, "GL_ARB_texture_swizzle"),
                 gl::GlType::Gles => true,
             };
 
-        let (color_formats, bgra_formats, bgra_pixel_type, bgra8_sampling_swizzle, texture_storage_usage) = match gl.get_type() {
+        let (color_formats, bgra_formats, bgra8_sampling_swizzle, texture_storage_usage) = match gl.get_type() {
             // There is `glTexStorage`, use it and expect RGBA on the input.
             gl::GlType::Gl if supports_texture_storage && supports_texture_swizzle => (
                 TextureFormatPair::from(ImageFormat::RGBA8),
                 TextureFormatPair { internal: gl::RGBA8, external: gl::RGBA },
-                gl::UNSIGNED_BYTE,
                 Swizzle::Bgra, // pretend it's RGBA, rely on swizzling
                 TexStorageUsage::Always
             ),
             // There is no `glTexStorage`, upload as `glTexImage` with BGRA input.
             gl::GlType::Gl => (
-                TextureFormatPair { internal: ImageFormat::BGRA8, external: ImageFormat::BGRA8 },
+                TextureFormatPair { internal: ImageFormat::RGBA8, external: ImageFormat::BGRA8 },
                 TextureFormatPair { internal: gl::RGBA, external: gl::BGRA },
-                gl::UNSIGNED_INT_8_8_8_8_REV,
                 Swizzle::Rgba, // converted on uploads by the driver, no swizzling needed
                 TexStorageUsage::Never
             ),
-            // glTexStorage is always supported in GLES 3, but because the GL_EXT_texture_storage
-            // extension is supported we can use glTexStorage with BGRA8 as the internal format.
-            // Prefer BGRA textures over RGBA.
-            gl::GlType::Gles if supports_gles_bgra
-                && supports_extension(&extensions, "GL_EXT_texture_storage") =>
-            (
+            // We can use glTexStorage with BGRA8 as the internal format.
+            gl::GlType::Gles if supports_gles_bgra && supports_texture_storage => (
                 TextureFormatPair::from(ImageFormat::BGRA8),
                 TextureFormatPair { internal: gl::BGRA8_EXT, external: gl::BGRA_EXT },
-                gl::UNSIGNED_BYTE,
                 Swizzle::Rgba, // no conversion needed
                 TexStorageUsage::Always,
             ),
-            // BGRA is not supported as an internal format with glTexStorage, therefore we will
-            // use RGBA textures instead and pretend BGRA data is RGBA when uploading.
-            // The swizzling will happen at the texture unit.
+            // For BGRA8 textures we must use the unsized BGRA internal
+            // format and glTexImage. If texture storage is supported we can
+            // use it for other formats, which is always the case for ES 3.
+            // We can't use glTexStorage with BGRA8 as the internal format.
+            gl::GlType::Gles if supports_gles_bgra && !avoid_tex_image => (
+                TextureFormatPair::from(ImageFormat::RGBA8),
+                TextureFormatPair::from(gl::BGRA_EXT),
+                Swizzle::Rgba, // no conversion needed
+                TexStorageUsage::NonBGRA8,
+            ),
+            // BGRA is not supported as an internal format, therefore we will
+            // use RGBA. The swizzling will happen at the texture unit.
             gl::GlType::Gles if supports_texture_swizzle => (
                 TextureFormatPair::from(ImageFormat::RGBA8),
                 TextureFormatPair { internal: gl::RGBA8, external: gl::RGBA },
-                gl::UNSIGNED_BYTE,
                 Swizzle::Bgra, // pretend it's RGBA, rely on swizzling
                 TexStorageUsage::Always,
             ),
-            // BGRA is not supported as an internal format with glTexStorage, and we cannot use
-            // swizzling either. Therefore prefer BGRA textures over RGBA, but use glTexImage
-            // to initialize BGRA textures. glTexStorage can still be used for other formats.
-            gl::GlType::Gles if supports_gles_bgra && !avoid_tex_image => (
-                TextureFormatPair::from(ImageFormat::BGRA8),
-                TextureFormatPair::from(gl::BGRA_EXT),
-                gl::UNSIGNED_BYTE,
-                Swizzle::Rgba, // no conversion needed
-                TexStorageUsage::NonBGRA8,
+            // BGRA and swizzling are not supported. We force the conversion done by the driver.
+            gl::GlType::Gles => (
+                TextureFormatPair::from(ImageFormat::RGBA8),
+                TextureFormatPair { internal: gl::RGBA8, external: gl::BGRA },
+                Swizzle::Rgba,
+                TexStorageUsage::Always,
             ),
-            // Neither BGRA or swizzling are supported. GLES does not allow format conversion
-            // during upload so we must use RGBA textures and pretend BGRA data is RGBA when
-            // uploading. Images may be rendered incorrectly as a result.
-            gl::GlType::Gles => {
-                warn!("Neither BGRA or texture swizzling are supported. Images may be rendered incorrectly.");
-                (
-                    TextureFormatPair::from(ImageFormat::RGBA8),
-                    TextureFormatPair { internal: gl::RGBA8, external: gl::RGBA },
-                    gl::UNSIGNED_BYTE,
-                    Swizzle::Rgba,
-                    TexStorageUsage::Always,
-                )
-            }
         };
 
         let is_software_webrender = renderer_name.starts_with("Software WebRender");
-        let upload_method = if is_software_webrender {
-            // Uploads in SWGL generally reduce to simple memory copies.
-            UploadMethod::Immediate
+        let (depth_format, upload_method) = if is_software_webrender {
+            (gl::DEPTH_COMPONENT16, UploadMethod::Immediate)
         } else {
-            upload_method
+            (gl::DEPTH_COMPONENT24, upload_method)
         };
-        // Prefer 24-bit depth format. While 16-bit depth also works, it may exhaust depth ids easily.
-        let depth_format = gl::DEPTH_COMPONENT24;
 
         info!("GL texture cache {:?}, bgra {:?} swizzle {:?}, texture storage {:?}, depth {:?}",
             color_formats, bgra_formats, bgra8_sampling_swizzle, texture_storage_usage, depth_format);
-
-        // On Mali-T devices glCopyImageSubData appears to stall the pipeline until any pending
-        // renders to the source texture have completed. On Mali-G, it has been observed to
-        // indefinitely hang in some circumstances. Using an alternative such as glBlitFramebuffer
-        // is preferable on such devices, so pretend we don't support glCopyImageSubData.
-        // See bugs 1669494 and 1677757.
-        let supports_copy_image_sub_data = if renderer_name.starts_with("Mali") {
-            false
-        } else {
-            supports_extension(&extensions, "GL_EXT_copy_image") ||
-            supports_extension(&extensions, "GL_ARB_copy_image")
-        };
-
-        // We have seen crashes on x86 PowerVR Rogue G6430 devices during GPU cache
-        // updates using the scatter shader. It seems likely that GL_EXT_color_buffer_float
-        // is broken. See bug 1709408.
-        let is_x86_powervr_rogue_g6430 = renderer_name.starts_with("PowerVR Rogue G6430")
-            && cfg!(target_arch = "x86");
-        let supports_color_buffer_float = match gl.get_type() {
-            gl::GlType::Gl => true,
-            gl::GlType::Gles if is_x86_powervr_rogue_g6430 => false,
-            gl::GlType::Gles => supports_extension(&extensions, "GL_EXT_color_buffer_float"),
-        };
+        let supports_copy_image_sub_data = supports_extension(&extensions, "GL_EXT_copy_image") ||
+            supports_extension(&extensions, "GL_ARB_copy_image");
+
+        // Due to a bug on Adreno devices, blitting to an fbo bound to
+        // a non-0th layer of a texture array is not supported.
+        let supports_blit_to_texture_array = !renderer_name.starts_with("Adreno");
+
+        // Check if the device supports the two extensions needed in order to use
+        // pixel local storage.
+        // TODO(gw): Consider if we can remove fb fetch / init, by using PLS for opaque pass too.
+        // TODO(gw): Support EXT_shader_framebuffer_fetch as well.
+        let ext_pixel_local_storage = supports_extension(&extensions, "GL_EXT_shader_pixel_local_storage");
+        let ext_framebuffer_fetch = supports_extension(&extensions, "GL_ARM_shader_framebuffer_fetch");
+        let supports_pixel_local_storage =
+            allow_pixel_local_storage_support &&
+            ext_framebuffer_fetch &&
+            ext_pixel_local_storage;
 
         let is_adreno = renderer_name.starts_with("Adreno");
 
-        // There appears to be a driver bug on older versions of the Adreno
-        // driver which prevents usage of persistenly mapped buffers.
-        // See bugs 1678585 and 1683936.
-        // TODO: only disable feature for affected driver versions.
-        let supports_buffer_storage = if is_adreno {
-            false
-        } else {
-            supports_extension(&extensions, "GL_EXT_buffer_storage") ||
-            supports_extension(&extensions, "GL_ARB_buffer_storage")
-        };
-
         // KHR_blend_equation_advanced renders incorrectly on Adreno
         // devices. This has only been confirmed up to Adreno 5xx, and has been
         // fixed for Android 9, so this condition could be made more specific.
@@ -1625,134 +1517,52 @@ impl Device {
         // from GL_TEXTURE_EXTERNAL_OES before binding another to GL_TEXTURE_2D. See bug 1636085.
         let requires_texture_external_unbind = is_emulator;
 
-        let is_macos = cfg!(target_os = "macos");
-             //  && renderer_name.starts_with("AMD");
-             //  (XXX: we apply this restriction to all GPUs to handle switching)
-
-        let is_angle = renderer_name.starts_with("ANGLE");
-        let is_adreno_3xx = renderer_name.starts_with("Adreno (TM) 3");
-
-        // Some GPUs require the stride of the data during texture uploads to be
-        // aligned to certain requirements, either for correctness or performance
-        // reasons.
-        let required_pbo_stride = if is_adreno_3xx {
-            // On Adreno 3xx, alignments of < 128 bytes can result in corrupted
-            // glyphs. See bug 1696039.
-            StrideAlignment::Bytes(NonZeroUsize::new(128).unwrap())
-        } else if is_adreno {
-            // On later Adreno devices it must be a multiple of 64 *pixels* to
-            // hit the fast path, meaning value in bytes varies with the texture
-            // format. This is purely an optimization.
+        let is_amd_macos = cfg!(target_os = "macos") && renderer_name.starts_with("AMD");
+
+        // On certain GPUs PBO texture upload is only performed asynchronously
+        // if the stride of the data is a multiple of a certain value.
+        // On Adreno it must be a multiple of 64 pixels, meaning value in bytes
+        // varies with the texture format.
+        // On AMD Mac, it must always be a multiple of 256 bytes.
+        // Other platforms may have similar requirements and should be added
+        // here.
+        // The default value should be 4 bytes.
+        let optimal_pbo_stride = if is_adreno {
             StrideAlignment::Pixels(NonZeroUsize::new(64).unwrap())
-        } else if is_macos {
-            // On AMD Mac, it must always be a multiple of 256 bytes.
-            // We apply this restriction to all GPUs to handle switching
+        } else if is_amd_macos {
             StrideAlignment::Bytes(NonZeroUsize::new(256).unwrap())
-        } else if is_angle {
-            // On ANGLE, PBO texture uploads get incorrectly truncated if
-            // the stride is greater than the width * bpp.
-            StrideAlignment::Bytes(NonZeroUsize::new(1).unwrap())
         } else {
-            // Other platforms may have similar requirements and should be added
-            // here. The default value should be 4 bytes.
             StrideAlignment::Bytes(NonZeroUsize::new(4).unwrap())
         };
 
         // On AMD Macs there is a driver bug which causes some texture uploads
         // from a non-zero offset within a PBO to fail. See bug 1603783.
-        let supports_nonzero_pbo_offsets = !is_macos;
-
-        let is_mali = renderer_name.starts_with("Mali");
-
-        // On Mali-Gxx and Txxx there is a driver bug when rendering partial updates to
-        // offscreen render targets, so we must ensure we render to the entire target.
-        // See bug 1663355.
-        let supports_render_target_partial_update = !is_mali;
-
-        let supports_shader_storage_object = match gl.get_type() {
-            // see https://www.g-truc.net/post-0734.html
-            gl::GlType::Gl => supports_extension(&extensions, "GL_ARB_shader_storage_buffer_object"),
-            gl::GlType::Gles => gl_version >= [3, 1],
-        };
-
-        // SWGL uses swgl_clipMask() instead of implementing clip-masking in shaders.
-        // This allows certain shaders to potentially bypass the more expensive alpha-
-        // pass variants if they know the alpha-pass was only required to deal with
-        // clip-masking.
-        let uses_native_clip_mask = is_software_webrender;
-
-        // SWGL uses swgl_antiAlias() instead of implementing anti-aliasing in shaders.
-        // As above, this allows bypassing certain alpha-pass variants.
-        let uses_native_antialiasing = is_software_webrender;
-
-        let supports_image_external_essl3 = supports_extension(&extensions, "GL_OES_EGL_image_external_essl3");
-
-        let is_mali_g = renderer_name.starts_with("Mali-G");
-
-        let mut requires_batched_texture_uploads = None;
-        if is_software_webrender {
-            // No benefit to batching texture uploads with swgl.
-            requires_batched_texture_uploads = Some(false);
-        } else if is_mali_g {
-            // On Mali-Gxx the driver really struggles with many small texture uploads,
-            // and handles fewer, larger uploads better.
-            requires_batched_texture_uploads = Some(true);
-        }
-
-        // On Mali-Txxx devices we have observed crashes during draw calls when rendering
-        // to an alpha target immediately after using glClear to clear regions of it.
-        // Using a shader to clear the regions avoids the crash. See bug 1638593.
-        let is_mali_t = renderer_name.starts_with("Mali-T");
-        let supports_alpha_target_clears = !is_mali_t;
-
-        // On Linux we we have seen uploads to R8 format textures result in
-        // corruption on some AMD cards.
-        // See https://bugzilla.mozilla.org/show_bug.cgi?id=1687554#c13
-        let supports_r8_texture_upload = if cfg!(target_os = "linux")
-            && renderer_name.starts_with("AMD Radeon RX")
-        {
-            false
-        } else {
-            true
-        };
+        let supports_nonzero_pbo_offsets = !is_amd_macos;
 
         Device {
             gl,
             base_gl: None,
-            crash_annotator,
             resource_override_path,
             use_optimized_shaders,
             upload_method,
-            use_batched_texture_uploads: requires_batched_texture_uploads.unwrap_or(false),
-            use_draw_calls_for_texture_copy: false,
-
             inside_frame: false,
 
             capabilities: Capabilities {
                 supports_multisampling: false, //TODO
                 supports_copy_image_sub_data,
-                supports_color_buffer_float,
-                supports_buffer_storage,
+                supports_blit_to_texture_array,
+                supports_pixel_local_storage,
                 supports_advanced_blend_equation,
                 supports_dual_source_blending,
                 supports_khr_debug,
                 supports_texture_swizzle,
                 supports_nonzero_pbo_offsets,
                 supports_texture_usage,
-                supports_render_target_partial_update,
-                supports_shader_storage_object,
-                requires_batched_texture_uploads,
-                supports_alpha_target_clears,
-                supports_r8_texture_upload,
-                uses_native_clip_mask,
-                uses_native_antialiasing,
-                supports_image_external_essl3,
                 renderer_name,
             },
 
             color_formats,
             bgra_formats,
-            bgra_pixel_type,
             swizzle_settings: SwizzleSettings {
                 bgra8_sampling_swizzle,
             },
@@ -1762,9 +1572,8 @@ impl Device {
 
             bound_textures: [0; 16],
             bound_program: 0,
-            bound_program_name: Rc::new(std::ffi::CString::new("").unwrap()),
             bound_vao: 0,
-            bound_read_fbo: (FBOId(0), DeviceIntPoint::zero()),
+            bound_read_fbo: FBOId(0),
             bound_draw_fbo: FBOId(0),
             program_mode_id: UniformLocation::INVALID,
             default_read_fbo: FBOId(0),
@@ -1773,13 +1582,14 @@ impl Device {
             depth_available: true,
 
             max_texture_size,
+            max_texture_layers,
             cached_programs,
             frame_id: GpuFrameId(0),
             extensions,
             texture_storage_usage,
             requires_null_terminated_shader_source,
             requires_texture_external_unbind,
-            required_pbo_stride,
+            optimal_pbo_stride,
             dump_shader_source,
             surface_origin_is_top_left,
 
@@ -1812,6 +1622,11 @@ impl Device {
         self.surface_origin_is_top_left
     }
 
+    /// Returns the limit on texture array layers.
+    pub fn max_texture_layers(&self) -> usize {
+        self.max_texture_layers as usize
+    }
+
     pub fn get_capabilities(&self) -> &Capabilities {
         &self.capabilities
     }
@@ -1850,31 +1665,8 @@ impl Device {
         return (self.max_depth_ids() - 1) as f32;
     }
 
-    pub fn required_pbo_stride(&self) -> StrideAlignment {
-        self.required_pbo_stride
-    }
-
-    pub fn upload_method(&self) -> &UploadMethod {
-        &self.upload_method
-    }
-
-    pub fn use_batched_texture_uploads(&self) -> bool {
-        self.use_batched_texture_uploads
-    }
-
-    pub fn use_draw_calls_for_texture_copy(&self) -> bool {
-        self.use_draw_calls_for_texture_copy
-    }
-
-    pub fn set_use_batched_texture_uploads(&mut self, enabled: bool) {
-        if self.capabilities.requires_batched_texture_uploads.is_some() {
-            return;
-        }
-        self.use_batched_texture_uploads = enabled;
-    }
-
-    pub fn set_use_draw_calls_for_texture_copy(&mut self, enabled: bool) {
-        self.use_draw_calls_for_texture_copy = enabled;
+    pub fn optimal_pbo_stride(&self) -> StrideAlignment {
+        self.optimal_pbo_stride
     }
 
     pub fn reset_state(&mut self) {
@@ -1887,8 +1679,8 @@ impl Device {
         self.bound_vao = 0;
         self.gl.bind_vertex_array(0);
 
-        self.bound_read_fbo = (self.default_read_fbo, DeviceIntPoint::zero());
-        self.gl.bind_framebuffer(gl::READ_FRAMEBUFFER, self.default_read_fbo.0);
+        self.bound_read_fbo = self.default_read_fbo;
+        self.gl.bind_framebuffer(gl::READ_FRAMEBUFFER, self.bound_read_fbo.0);
 
         self.bound_draw_fbo = self.default_draw_fbo;
         self.gl.bind_framebuffer(gl::DRAW_FRAMEBUFFER, self.bound_draw_fbo.0);
@@ -1914,35 +1706,31 @@ impl Device {
     }
 
     pub fn compile_shader(
-        &self,
+        gl: &dyn gl::Gl,
         name: &str,
         shader_type: gl::GLenum,
         source: &String,
+        requires_null_terminated_shader_source: bool,
     ) -> Result<gl::GLuint, ShaderError> {
         debug!("compile {}", name);
-        let id = self.gl.create_shader(shader_type);
-
-        let mut new_source = Cow::from(source.as_str());
-        // Ensure the source strings we pass to glShaderSource are
-        // null-terminated on buggy platforms.
-        if self.requires_null_terminated_shader_source {
-            new_source.to_mut().push('\0');
+        let id = gl.create_shader(shader_type);
+        if requires_null_terminated_shader_source {
+            // Ensure the source strings we pass to glShaderSource are
+            // null-terminated on buggy platforms.
+            use std::ffi::CString;
+            let terminated_source = CString::new(source.as_bytes()).unwrap();
+            gl.shader_source(id, &[terminated_source.as_bytes_with_nul()]);
+        } else {
+            gl.shader_source(id, &[source.as_bytes()]);
         }
-
-        self.gl.shader_source(id, &[new_source.as_bytes()]);
-        self.gl.compile_shader(id);
-        let log = self.gl.get_shader_info_log(id);
+        gl.compile_shader(id);
+        let log = gl.get_shader_info_log(id);
         let mut status = [0];
         unsafe {
-            self.gl.get_shader_iv(id, gl::COMPILE_STATUS, &mut status);
+            gl.get_shader_iv(id, gl::COMPILE_STATUS, &mut status);
         }
         if status[0] == 0 {
-            let type_str = match shader_type {
-                gl::VERTEX_SHADER => "vertex",
-                gl::FRAGMENT_SHADER => "fragment",
-                _ => panic!("Unexpected shader type {:x}", shader_type),
-            };
-            error!("Failed to compile {} shader: {}\n{}", type_str, name, log);
+            error!("Failed to compile shader: {}\n{}", name, log);
             #[cfg(debug_assertions)]
             Self::print_shader_errors(source, &log);
             Err(ShaderError::Compilation(name.to_string(), log))
@@ -1966,15 +1754,7 @@ impl Device {
         // wrapper from our GL context.
         let being_profiled = profiler::thread_is_being_profiled();
         let using_wrapper = self.base_gl.is_some();
-
-        // We can usually unwind driver stacks on x86 so we don't need to manually instrument
-        // gl calls there. Timestamps can be pretty expensive on Windows (2us each and perhaps
-        // an opportunity to be descheduled?) which makes the profiles gathered with this
-        // turned on less useful so only profile on ARM.
-        if cfg!(any(target_arch = "arm", target_arch = "aarch64"))
-            && being_profiled
-            && !using_wrapper
-        {
+        if being_profiled && !using_wrapper {
             fn note(name: &str, duration: Duration) {
                 profiler::add_text_marker(cstr!("OpenGL Calls"), name, duration);
             }
@@ -2068,18 +1848,13 @@ impl Device {
         self.bind_texture_impl(slot.into(), external_texture.id, external_texture.target, None);
     }
 
-    pub fn bind_read_target_impl(
-        &mut self,
-        fbo_id: FBOId,
-        offset: DeviceIntPoint,
-    ) {
+    pub fn bind_read_target_impl(&mut self, fbo_id: FBOId) {
         debug_assert!(self.inside_frame);
 
-        if self.bound_read_fbo != (fbo_id, offset) {
+        if self.bound_read_fbo != fbo_id {
+            self.bound_read_fbo = fbo_id;
             fbo_id.bind(self.gl(), FBOTarget::Read);
         }
-
-        self.bound_read_fbo = (fbo_id, offset);
     }
 
     pub fn bind_read_target(&mut self, target: ReadTarget) {
@@ -2087,10 +1862,9 @@ impl Device {
             ReadTarget::Default => self.default_read_fbo,
             ReadTarget::Texture { fbo_id } => fbo_id,
             ReadTarget::External { fbo } => fbo,
-            ReadTarget::NativeSurface { fbo_id, .. } => fbo_id,
         };
 
-        self.bind_read_target_impl(fbo_id, target.offset())
+        self.bind_read_target_impl(fbo_id)
     }
 
     fn bind_draw_target_impl(&mut self, fbo_id: FBOId) {
@@ -2104,7 +1878,7 @@ impl Device {
 
     pub fn reset_read_target(&mut self) {
         let fbo = self.default_read_fbo;
-        self.bind_read_target_impl(fbo, DeviceIntPoint::zero());
+        self.bind_read_target_impl(fbo);
     }
 
 
@@ -2120,7 +1894,7 @@ impl Device {
     ) {
         let (fbo_id, rect, depth_available) = match target {
             DrawTarget::Default { rect, .. } => {
-                (self.default_draw_fbo, rect, false)
+                (self.default_draw_fbo, rect, true)
             }
             DrawTarget::Texture { dimensions, fbo_id, with_depth, .. } => {
                 let rect = FramebufferIntRect::new(
@@ -2206,12 +1980,6 @@ impl Device {
         program: &mut Program,
         descriptor: &VertexDescriptor,
     ) -> Result<(), ShaderError> {
-        let _guard = CrashAnnotatorGuard::new(
-            &self.crash_annotator,
-            CrashAnnotation::CompileShader,
-            &program.source_info.full_name_cstr
-        );
-
         assert!(!program.is_initialized());
         let mut build_program = true;
         let info = &program.source_info;
@@ -2255,7 +2023,7 @@ impl Device {
         if build_program {
             // Compile the vertex shader
             let vs_source = info.compute_source(self, ShaderKind::Vertex);
-            let vs_id = match self.compile_shader(&info.full_name(), gl::VERTEX_SHADER, &vs_source) {
+            let vs_id = match Device::compile_shader(&*self.gl, &info.base_filename, gl::VERTEX_SHADER, &vs_source, self.requires_null_terminated_shader_source) {
                     Ok(vs_id) => vs_id,
                     Err(err) => return Err(err),
                 };
@@ -2263,7 +2031,7 @@ impl Device {
             // Compile the fragment shader
             let fs_source = info.compute_source(self, ShaderKind::Fragment);
             let fs_id =
-                match self.compile_shader(&info.full_name(), gl::FRAGMENT_SHADER, &fs_source) {
+                match Device::compile_shader(&*self.gl, &info.base_filename, gl::FRAGMENT_SHADER, &fs_source, self.requires_null_terminated_shader_source) {
                     Ok(fs_id) => fs_id,
                     Err(err) => {
                         self.gl.delete_shader(vs_id);
@@ -2356,17 +2124,13 @@ impl Device {
         program.is_initialized = true;
         program.u_transform = self.gl.get_uniform_location(program.id, "uTransform");
         program.u_mode = self.gl.get_uniform_location(program.id, "uMode");
-        program.u_texture_size = self.gl.get_uniform_location(program.id, "uTextureSize");
 
         Ok(())
     }
 
-    pub fn bind_program(&mut self, program: &Program) -> bool {
+    pub fn bind_program(&mut self, program: &Program) {
         debug_assert!(self.inside_frame);
         debug_assert!(program.is_initialized());
-        if !program.is_initialized() {
-            return false;
-        }
         #[cfg(debug_assertions)]
         {
             self.shader_is_ready = true;
@@ -2375,20 +2139,19 @@ impl Device {
         if self.bound_program != program.id {
             self.gl.use_program(program.id);
             self.bound_program = program.id;
-            self.bound_program_name = program.source_info.full_name_cstr.clone();
             self.program_mode_id = UniformLocation(program.u_mode);
         }
-        true
     }
 
     pub fn create_texture(
         &mut self,
-        target: ImageBufferKind,
+        target: TextureTarget,
         format: ImageFormat,
         mut width: i32,
         mut height: i32,
         filter: TextureFilter,
         render_target: Option<RenderTargetInfo>,
+        layer_count: i32,
     ) -> Texture {
         debug_assert!(self.inside_frame);
 
@@ -2403,11 +2166,13 @@ impl Device {
             id: self.gl.gen_textures(1)[0],
             target: get_gl_target(target),
             size: DeviceIntSize::new(width, height),
+            layer_count,
             format,
             filter,
             active_swizzle: Cell::default(),
-            fbo: None,
-            fbo_with_depth: None,
+            fbos: vec![],
+            fbos_with_depth: vec![],
+            blit_workaround_buffer: None,
             last_frame_used: self.frame_id,
             flags: TextureFlags::default(),
         };
@@ -2420,6 +2185,12 @@ impl Device {
 
         // Allocate storage.
         let desc = self.gl_describe_format(texture.format);
+        let is_array = match texture.target {
+            gl::TEXTURE_2D_ARRAY => true,
+            gl::TEXTURE_2D | gl::TEXTURE_RECTANGLE | gl::TEXTURE_EXTERNAL_OES => false,
+            _ => panic!("BUG: Unexpected texture target!"),
+        };
+        assert!(is_array || texture.layer_count == 1);
 
         // Firefox doesn't use mipmaps, but Servo uses them for standalone image
         // textures images larger than 512 pixels. This is the only case where
@@ -2431,9 +2202,6 @@ impl Device {
             1
         };
 
-        // We never want to upload texture data at the same time as allocating the texture.
-        self.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, 0);
-
         // Use glTexStorage where available, since it avoids allocating
         // unnecessary mipmap storage and generally improves performance with
         // stronger invariants.
@@ -2442,26 +2210,49 @@ impl Device {
             TexStorageUsage::NonBGRA8 => texture.format != ImageFormat::BGRA8,
             TexStorageUsage::Never => false,
         };
-        if use_texture_storage {
-            self.gl.tex_storage_2d(
-                texture.target,
-                mipmap_levels,
-                desc.internal,
-                texture.size.width as gl::GLint,
-                texture.size.height as gl::GLint,
-            );
-        } else {
-            self.gl.tex_image_2d(
-                texture.target,
-                0,
-                desc.internal as gl::GLint,
-                texture.size.width as gl::GLint,
-                texture.size.height as gl::GLint,
-                0,
-                desc.external,
-                desc.pixel_type,
-                None,
-            );            
+        match (use_texture_storage, is_array) {
+            (true, true) =>
+                self.gl.tex_storage_3d(
+                    gl::TEXTURE_2D_ARRAY,
+                    mipmap_levels,
+                    desc.internal,
+                    texture.size.width as gl::GLint,
+                    texture.size.height as gl::GLint,
+                    texture.layer_count,
+                ),
+            (true, false) =>
+                self.gl.tex_storage_2d(
+                    texture.target,
+                    mipmap_levels,
+                    desc.internal,
+                    texture.size.width as gl::GLint,
+                    texture.size.height as gl::GLint,
+                ),
+            (false, true) =>
+                self.gl.tex_image_3d(
+                    gl::TEXTURE_2D_ARRAY,
+                    0,
+                    desc.internal as gl::GLint,
+                    texture.size.width as gl::GLint,
+                    texture.size.height as gl::GLint,
+                    texture.layer_count,
+                    0,
+                    desc.external,
+                    desc.pixel_type,
+                    None,
+                ),
+            (false, false) =>
+                self.gl.tex_image_2d(
+                    texture.target,
+                    0,
+                    desc.internal as gl::GLint,
+                    texture.size.width as gl::GLint,
+                    texture.size.height as gl::GLint,
+                    0,
+                    desc.external,
+                    desc.pixel_type,
+                    None,
+                ),
         }
 
         // Set up FBOs, if required.
@@ -2472,6 +2263,28 @@ impl Device {
             }
         }
 
+        // Set up intermediate buffer for blitting to texture, if required.
+        if texture.layer_count > 1 && !self.capabilities.supports_blit_to_texture_array {
+            let rbo = RBOId(self.gl.gen_renderbuffers(1)[0]);
+            let fbo = FBOId(self.gl.gen_framebuffers(1)[0]);
+            self.gl.bind_renderbuffer(gl::RENDERBUFFER, rbo.0);
+            self.gl.renderbuffer_storage(
+                gl::RENDERBUFFER,
+                self.matching_renderbuffer_format(texture.format),
+                texture.size.width as _,
+                texture.size.height as _
+            );
+
+            self.bind_draw_target_impl(fbo);
+            self.gl.framebuffer_renderbuffer(
+                gl::DRAW_FRAMEBUFFER,
+                gl::COLOR_ATTACHMENT0,
+                gl::RENDERBUFFER,
+                rbo.0
+            );
+            texture.blit_workaround_buffer = Some((rbo, fbo));
+        }
+
         texture
     }
 
@@ -2498,10 +2311,8 @@ impl Device {
             .tex_parameter_i(target, gl::TEXTURE_WRAP_T, gl::CLAMP_TO_EDGE as gl::GLint);
     }
 
-    /// Copies the entire contents of one texture to another. The dest texture must be at least
-    /// as large as the source texture in each dimension. No scaling is performed, so if the dest
-    /// texture is larger than the source texture then some of its pixels will not be written to.
-    pub fn copy_entire_texture(
+    /// Copies the contents from one renderable texture to another.
+    pub fn blit_renderable_texture(
         &mut self,
         dst: &mut Texture,
         src: &Texture,
@@ -2509,107 +2320,59 @@ impl Device {
         debug_assert!(self.inside_frame);
         debug_assert!(dst.size.width >= src.size.width);
         debug_assert!(dst.size.height >= src.size.height);
+        debug_assert!(dst.layer_count >= src.layer_count);
 
-        self.copy_texture_sub_region(
-            src,
-            0,
-            0,
-            dst,
-            0,
-            0,
-            src.size.width as _,
-            src.size.height as _,
-        );
-    }
-
-    /// Copies the specified subregion from src_texture to dest_texture.
-    pub fn copy_texture_sub_region(
-        &mut self,
-        src_texture: &Texture,
-        src_x: usize,
-        src_y: usize,
-        dest_texture: &Texture,
-        dest_x: usize,
-        dest_y: usize,
-        width: usize,
-        height: usize,
-    ) {
         if self.capabilities.supports_copy_image_sub_data {
-            assert_ne!(
-                src_texture.id, dest_texture.id,
-                "glCopyImageSubData's behaviour is undefined if src and dst images are identical and the rectangles overlap."
-            );
+            assert_ne!(src.id, dst.id,
+                    "glCopyImageSubData's behaviour is undefined if src and dst images are identical and the rectangles overlap.");
             unsafe {
-                self.gl.copy_image_sub_data(
-                    src_texture.id,
-                    src_texture.target,
-                    0,
-                    src_x as _,
-                    src_y as _,
-                    0,
-                    dest_texture.id,
-                    dest_texture.target,
-                    0,
-                    dest_x as _,
-                    dest_y as _,
-                    0,
-                    width as _,
-                    height as _,
-                    1,
-                );
+                self.gl.copy_image_sub_data(src.id, src.target, 0,
+                                            0, 0, 0,
+                                            dst.id, dst.target, 0,
+                                            0, 0, 0,
+                                            src.size.width as _, src.size.height as _, src.layer_count);
             }
         } else {
-            let src_offset = FramebufferIntPoint::new(src_x as i32, src_y as i32);
-            let dest_offset = FramebufferIntPoint::new(dest_x as i32, dest_y as i32);
-            let size = FramebufferIntSize::new(width as i32, height as i32);
-
-            self.blit_render_target(
-                ReadTarget::from_texture(src_texture),
-                FramebufferIntRect::new(src_offset, size),
-                DrawTarget::from_texture(dest_texture, false),
-                FramebufferIntRect::new(dest_offset, size),
-                // In most cases the filter shouldn't matter, as there is no scaling involved
-                // in the blit. We were previously using Linear, but this caused issues when
-                // blitting RGBAF32 textures on Mali, so use Nearest to be safe.
-                TextureFilter::Nearest,
+            let rect = FramebufferIntRect::new(
+                FramebufferIntPoint::zero(),
+                device_size_as_framebuffer_size(src.get_dimensions()),
             );
+            for layer in 0..src.layer_count.min(dst.layer_count) as LayerIndex {
+                self.blit_render_target(
+                    ReadTarget::from_texture(src, layer),
+                    rect,
+                    DrawTarget::from_texture(dst, layer, false),
+                    rect,
+                    TextureFilter::Linear
+                );
+            }
+            self.reset_draw_target();
+            self.reset_read_target();
         }
     }
 
     /// Notifies the device that the contents of a render target are no longer
     /// needed.
+    ///
+    /// FIXME(bholley): We could/should invalidate the depth targets earlier
+    /// than the color targets, i.e. immediately after each pass.
     pub fn invalidate_render_target(&mut self, texture: &Texture) {
-        let (fbo, attachments) = if texture.supports_depth() {
-            (&texture.fbo_with_depth,
+        let (fbos, attachments) = if texture.supports_depth() {
+            (&texture.fbos_with_depth,
              &[gl::COLOR_ATTACHMENT0, gl::DEPTH_ATTACHMENT] as &[gl::GLenum])
         } else {
-            (&texture.fbo, &[gl::COLOR_ATTACHMENT0] as &[gl::GLenum])
+            (&texture.fbos, &[gl::COLOR_ATTACHMENT0] as &[gl::GLenum])
         };
 
-        if let Some(fbo_id) = fbo {
-            let original_bound_fbo = self.bound_draw_fbo;
+        let original_bound_fbo = self.bound_draw_fbo;
+        for fbo_id in fbos.iter() {
             // Note: The invalidate extension may not be supported, in which
             // case this is a no-op. That's ok though, because it's just a
             // hint.
             self.bind_external_draw_target(*fbo_id);
             self.gl.invalidate_framebuffer(gl::FRAMEBUFFER, attachments);
-            self.bind_external_draw_target(original_bound_fbo);
         }
-    }
-
-    /// Notifies the device that the contents of the current framebuffer's depth
-    /// attachment is no longer needed. Unlike invalidate_render_target, this can
-    /// be called even when the contents of the colour attachment is still required.
-    /// This should be called before unbinding the framebuffer at the end of a pass,
-    /// to allow tiled GPUs to avoid writing the contents back to memory.
-    pub fn invalidate_depth_target(&mut self) {
-        assert!(self.depth_available);
-        let attachments = if self.bound_draw_fbo == self.default_draw_fbo {
-            &[gl::DEPTH] as &[gl::GLenum]
-        } else {
-            &[gl::DEPTH_ATTACHMENT] as &[gl::GLenum]
-        };
-        self.gl.invalidate_framebuffer(gl::DRAW_FRAMEBUFFER, attachments);
+        self.bind_external_draw_target(original_bound_fbo);
     }
 
     /// Notifies the device that a render target is about to be reused.
@@ -2629,49 +2392,71 @@ impl Device {
     }
 
     fn init_fbos(&mut self, texture: &mut Texture, with_depth: bool) {
-        let (fbo, depth_rb) = if with_depth {
+        let (fbos, depth_rb) = if with_depth {
             let depth_target = self.acquire_depth_target(texture.get_dimensions());
-            (&mut texture.fbo_with_depth, Some(depth_target))
+            (&mut texture.fbos_with_depth, Some(depth_target))
         } else {
-            (&mut texture.fbo, None)
+            (&mut texture.fbos, None)
         };
 
         // Generate the FBOs.
-        assert!(fbo.is_none());
-        let fbo_id = FBOId(*self.gl.gen_framebuffers(1).first().unwrap());
-        *fbo = Some(fbo_id);
+        assert!(fbos.is_empty());
+        fbos.extend(self.gl.gen_framebuffers(texture.layer_count).into_iter().map(FBOId));
 
         // Bind the FBOs.
         let original_bound_fbo = self.bound_draw_fbo;
+        for (fbo_index, &fbo_id) in fbos.iter().enumerate() {
+            self.bind_external_draw_target(fbo_id);
+            match texture.target {
+                gl::TEXTURE_2D_ARRAY => {
+                    self.gl.framebuffer_texture_layer(
+                        gl::DRAW_FRAMEBUFFER,
+                        gl::COLOR_ATTACHMENT0,
+                        texture.id,
+                        0,
+                        fbo_index as _,
+                    )
+                }
+                _ => {
+                    assert_eq!(fbo_index, 0);
+                    self.gl.framebuffer_texture_2d(
+                        gl::DRAW_FRAMEBUFFER,
+                        gl::COLOR_ATTACHMENT0,
+                        texture.target,
+                        texture.id,
+                        0,
+                    )
+                }
+            }
 
-        self.bind_external_draw_target(fbo_id);
-
-        self.gl.framebuffer_texture_2d(
-            gl::DRAW_FRAMEBUFFER,
-            gl::COLOR_ATTACHMENT0,
-            texture.target,
-            texture.id,
-            0,
-        );
+            if let Some(depth_rb) = depth_rb {
+                self.gl.framebuffer_renderbuffer(
+                    gl::DRAW_FRAMEBUFFER,
+                    gl::DEPTH_ATTACHMENT,
+                    gl::RENDERBUFFER,
+                    depth_rb.0,
+                );
+            }
 
-        if let Some(depth_rb) = depth_rb {
-            self.gl.framebuffer_renderbuffer(
-                gl::DRAW_FRAMEBUFFER,
-                gl::DEPTH_ATTACHMENT,
-                gl::RENDERBUFFER,
-                depth_rb.0,
+            debug_assert_eq!(
+                self.gl.check_frame_buffer_status(gl::DRAW_FRAMEBUFFER),
+                gl::FRAMEBUFFER_COMPLETE,
+                "Incomplete framebuffer",
             );
         }
-
-        debug_assert_eq!(
-            self.gl.check_frame_buffer_status(gl::DRAW_FRAMEBUFFER),
-            gl::FRAMEBUFFER_COMPLETE,
-            "Incomplete framebuffer",
-        );
-
         self.bind_external_draw_target(original_bound_fbo);
     }
 
+    fn deinit_fbos(&mut self, fbos: &mut Vec<FBOId>) {
+        if !fbos.is_empty() {
+            let fbo_ids: SmallVec<[gl::GLuint; 8]> = fbos
+                .drain(..)
+                .map(|FBOId(fbo_id)| fbo_id)
+                .collect();
+            self.gl.delete_framebuffers(&fbo_ids[..]);
+        }
+    }
+
     fn acquire_depth_target(&mut self, dimensions: DeviceIntSize) -> RBOId {
         let gl = &self.gl;
         let depth_format = self.depth_format;
@@ -2721,14 +2506,11 @@ impl Device {
             TextureFilter::Linear | TextureFilter::Trilinear => gl::LINEAR,
         };
 
-        let src_x0 = src_rect.origin.x + self.bound_read_fbo.1.x;
-        let src_y0 = src_rect.origin.y + self.bound_read_fbo.1.y;
-
         self.gl.blit_framebuffer(
-            src_x0,
-            src_y0,
-            src_x0 + src_rect.size.width,
-            src_y0 + src_rect.size.height,
+            src_rect.origin.x,
+            src_rect.origin.y,
+            src_rect.origin.x + src_rect.size.width,
+            src_rect.origin.y + src_rect.size.height,
             dest_rect.origin.x,
             dest_rect.origin.y,
             dest_rect.origin.x + dest_rect.size.width,
@@ -2752,9 +2534,60 @@ impl Device {
 
         self.bind_read_target(src_target);
 
-        self.bind_draw_target(dest_target);
+        match dest_target {
+            DrawTarget::Texture { layer, blit_workaround_buffer, dimensions, id, target, .. } if layer != 0 &&
+                !self.capabilities.supports_blit_to_texture_array =>
+            {
+                // This should have been initialized in create_texture().
+                let (_rbo, fbo) = blit_workaround_buffer.expect("Blit workaround buffer has not been initialized.");
+
+                // Blit from read target to intermediate buffer.
+                self.bind_draw_target_impl(fbo);
+                self.blit_render_target_impl(
+                    src_rect,
+                    dest_rect,
+                    filter
+                );
+
+                // dest_rect may be inverted, so min_x/y() might actually be the
+                // bottom-right, max_x/y() might actually be the top-left,
+                // and width/height might be negative. See servo/euclid#321.
+                // Calculate the non-inverted rect here.
+                let dest_bounds = DeviceIntRect::new(
+                    DeviceIntPoint::new(
+                        dest_rect.min_x().min(dest_rect.max_x()),
+                        dest_rect.min_y().min(dest_rect.max_y()),
+                    ),
+                    DeviceIntSize::new(
+                        dest_rect.size.width.abs(),
+                        dest_rect.size.height.abs(),
+                    ),
+                ).intersection(&dimensions.into()).unwrap_or_else(DeviceIntRect::zero);
+
+                self.bind_read_target_impl(fbo);
+                self.bind_texture_impl(
+                    DEFAULT_TEXTURE,
+                    id,
+                    target,
+                    None, // not depending on swizzle
+                );
 
-        self.blit_render_target_impl(src_rect, dest_rect, filter);
+                // Copy from intermediate buffer to the texture layer.
+                self.gl.copy_tex_sub_image_3d(
+                    target, 0,
+                    dest_bounds.origin.x, dest_bounds.origin.y,
+                    layer as _,
+                    dest_bounds.origin.x, dest_bounds.origin.y,
+                    dest_bounds.size.width, dest_bounds.size.height,
+                );
+
+            }
+            _ => {
+                self.bind_draw_target(dest_target);
+
+                self.blit_render_target_impl(src_rect, dest_rect, filter);
+            }
+        }
     }
 
     /// Performs a blit while flipping vertically. Useful for blitting textures
@@ -2785,18 +2618,15 @@ impl Device {
     pub fn delete_texture(&mut self, mut texture: Texture) {
         debug_assert!(self.inside_frame);
         let had_depth = texture.supports_depth();
-        if let Some(fbo) = texture.fbo {
-            self.gl.delete_framebuffers(&[fbo.0]);
-            texture.fbo = None;
-        }
-        if let Some(fbo) = texture.fbo_with_depth {
-            self.gl.delete_framebuffers(&[fbo.0]);
-            texture.fbo_with_depth = None;
-        }
-
+        self.deinit_fbos(&mut texture.fbos);
+        self.deinit_fbos(&mut texture.fbos_with_depth);
         if had_depth {
             self.release_depth_target(texture.get_dimensions());
         }
+        if let Some((rbo, fbo)) = texture.blit_workaround_buffer {
+            self.gl.delete_framebuffers(&[fbo.0]);
+            self.gl.delete_renderbuffers(&[rbo.0]);
+        }
 
         self.gl.delete_textures(&[texture.id]);
 
@@ -2862,7 +2692,6 @@ impl Device {
             id: pid,
             u_transform: 0,
             u_mode: 0,
-            u_texture_size: 0,
             source_info,
             is_initialized: false,
         };
@@ -2929,22 +2758,6 @@ impl Device {
         self.gl.uniform_1i(self.program_mode_id.0, mode);
     }
 
-    /// Sets the uTextureSize uniform. Most shaders do not require this to be called
-    /// as they use the textureSize GLSL function instead.
-    pub fn set_shader_texture_size(
-        &self,
-        program: &Program,
-        texture_size: DeviceSize,
-    ) {
-        debug_assert!(self.inside_frame);
-        #[cfg(debug_assertions)]
-        debug_assert!(self.shader_is_ready);
-
-        if program.u_texture_size != -1 {
-            self.gl.uniform_2f(program.u_texture_size, texture_size.width, texture_size.height);
-        }
-    }
-
     pub fn create_pbo(&mut self) -> PBO {
         let id = self.gl.gen_buffers(1)[0];
         PBO {
@@ -3046,7 +2859,7 @@ impl Device {
         let bytes_pp = format.bytes_per_pixel() as usize;
         let width_bytes = size.width as usize * bytes_pp;
 
-        let dst_stride = round_up_to_multiple(width_bytes, self.required_pbo_stride.num_bytes(format));
+        let dst_stride = round_up_to_multiple(width_bytes, self.optimal_pbo_stride.num_bytes(format));
 
         // The size of the chunk should only need to be (height - 1) * dst_stride + width_bytes,
         // however, the android emulator will error unless it is height * dst_stride.
@@ -3058,19 +2871,78 @@ impl Device {
         (dst_size, dst_stride)
     }
 
+    /// (Re)allocates and maps a PBO, returning a `PixelBuffer` if successful.
+    /// The contents can be written to using the `mapping` field.
+    /// The buffer must be bound to `GL_PIXEL_UNPACK_BUFFER` before calling this function,
+    /// and must be unmapped using `glUnmapBuffer` prior to uploading the contents to a texture.
+    fn create_upload_buffer<'a>(&mut self, hint: VertexUsageHint, size: usize) -> Result<PixelBuffer<'a>, ()> {
+        self.gl.buffer_data_untyped(
+            gl::PIXEL_UNPACK_BUFFER,
+            size as _,
+            ptr::null(),
+            hint.to_gl(),
+        );
+        let ptr = self.gl.map_buffer_range(
+            gl::PIXEL_UNPACK_BUFFER,
+            0,
+            size as _,
+            gl::MAP_WRITE_BIT | gl::MAP_INVALIDATE_BUFFER_BIT,
+        );
+
+        if ptr != ptr::null_mut() {
+            let mapping = unsafe {
+                slice::from_raw_parts_mut(ptr as *mut _, size)
+            };
+            Ok(PixelBuffer::new(size, mapping))
+        } else {
+            error!("Failed to map PBO of size {} bytes", size);
+            Err(())
+        }
+    }
+
     /// Returns a `TextureUploader` which can be used to upload texture data to `texture`.
-    /// Once uploads have been performed the uploader must be flushed with `TextureUploader::flush()`.
-    pub fn upload_texture<'a>(
-        &mut self,
-        pbo_pool: &'a mut UploadPBOPool,
-    ) -> TextureUploader<'a> {
+    /// The total size in bytes is specified by `upload_size`, and must be greater than zero
+    /// and at least as large as the sum of the sizes returned from
+    /// `required_upload_size_and_stride()` for each subsequent call to `TextureUploader.upload()`.
+    pub fn upload_texture<'a, T>(
+        &'a mut self,
+        texture: &'a Texture,
+        pbo: &PBO,
+        upload_size: usize,
+    ) -> TextureUploader<'a, T> {
         debug_assert!(self.inside_frame);
+        assert_ne!(upload_size, 0, "Must specify valid upload size");
 
-        pbo_pool.begin_frame(self);
+        self.bind_texture(DEFAULT_TEXTURE, texture, Swizzle::default());
+
+        let uploader_type = match self.upload_method {
+            UploadMethod::Immediate => TextureUploaderType::Immediate,
+            UploadMethod::PixelBuffer(hint) => {
+                self.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, pbo.id);
+                if self.capabilities.supports_nonzero_pbo_offsets {
+                    match self.create_upload_buffer(hint, upload_size) {
+                        Ok(buffer) => TextureUploaderType::MutliUseBuffer(buffer),
+                        Err(_) => {
+                            // If allocating the buffer failed, fall back to immediate uploads
+                            self.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, 0);
+                            TextureUploaderType::Immediate
+                        }
+                    }
+                } else {
+                    // If we cannot upload from non-zero offsets, then we must
+                    // reallocate a new buffer for each upload.
+                    TextureUploaderType::SingleUseBuffers(hint)
+                }
+            },
+        };
 
         TextureUploader {
-            buffers: Vec::new(),
-            pbo_pool,
+            target: UploadTarget {
+                device: self,
+                texture,
+            },
+            uploader_type,
+            marker: PhantomData,
         }
     }
 
@@ -3082,17 +2954,35 @@ impl Device {
     ) {
         self.bind_texture(DEFAULT_TEXTURE, texture, Swizzle::default());
         let desc = self.gl_describe_format(texture.format);
-        self.gl.tex_sub_image_2d(
-            texture.target,
-            0,
-            0,
-            0,
-            texture.size.width as gl::GLint,
-            texture.size.height as gl::GLint,
-            desc.external,
-            desc.pixel_type,
-            texels_to_u8_slice(pixels),
-        );
+        match texture.target {
+            gl::TEXTURE_2D | gl::TEXTURE_RECTANGLE | gl::TEXTURE_EXTERNAL_OES =>
+                self.gl.tex_sub_image_2d(
+                    texture.target,
+                    0,
+                    0,
+                    0,
+                    texture.size.width as gl::GLint,
+                    texture.size.height as gl::GLint,
+                    desc.external,
+                    desc.pixel_type,
+                    texels_to_u8_slice(pixels),
+                ),
+            gl::TEXTURE_2D_ARRAY =>
+                self.gl.tex_sub_image_3d(
+                    texture.target,
+                    0,
+                    0,
+                    0,
+                    0,
+                    texture.size.width as gl::GLint,
+                    texture.size.height as gl::GLint,
+                    texture.layer_count as gl::GLint,
+                    desc.external,
+                    desc.pixel_type,
+                    texels_to_u8_slice(pixels),
+                ),
+            _ => panic!("BUG: Unexpected texture target!"),
+        }
     }
 
     pub fn read_pixels(&mut self, img_desc: &ImageDescriptor) -> Vec<u8> {
@@ -3149,24 +3039,40 @@ impl Device {
     }
 
     /// Attaches the provided texture to the current Read FBO binding.
-    fn attach_read_texture_raw(&mut self, texture_id: gl::GLuint, target: gl::GLuint) {
-        self.gl.framebuffer_texture_2d(
-            gl::READ_FRAMEBUFFER,
-            gl::COLOR_ATTACHMENT0,
-            target,
-            texture_id,
-            0,
-        )
+    fn attach_read_texture_raw(
+        &mut self, texture_id: gl::GLuint, target: gl::GLuint, layer_id: i32
+    ) {
+        match target {
+            gl::TEXTURE_2D_ARRAY => {
+                self.gl.framebuffer_texture_layer(
+                    gl::READ_FRAMEBUFFER,
+                    gl::COLOR_ATTACHMENT0,
+                    texture_id,
+                    0,
+                    layer_id,
+                )
+            }
+            _ => {
+                assert_eq!(layer_id, 0);
+                self.gl.framebuffer_texture_2d(
+                    gl::READ_FRAMEBUFFER,
+                    gl::COLOR_ATTACHMENT0,
+                    target,
+                    texture_id,
+                    0,
+                )
+            }
+        }
     }
 
     pub fn attach_read_texture_external(
-        &mut self, texture_id: gl::GLuint, target: ImageBufferKind
+        &mut self, texture_id: gl::GLuint, target: TextureTarget, layer_id: i32
     ) {
-        self.attach_read_texture_raw(texture_id, get_gl_target(target))
+        self.attach_read_texture_raw(texture_id, get_gl_target(target), layer_id)
     }
 
-    pub fn attach_read_texture(&mut self, texture: &Texture) {
-        self.attach_read_texture_raw(texture.id, texture.target)
+    pub fn attach_read_texture(&mut self, texture: &Texture, layer_id: i32) {
+        self.attach_read_texture_raw(texture.id, texture.target, layer_id)
     }
 
     fn bind_vao_impl(&mut self, id: gl::GLuint) {
@@ -3191,7 +3097,6 @@ impl Device {
         descriptor: &VertexDescriptor,
         main_vbo_id: VBOId,
         instance_vbo_id: VBOId,
-        instance_divisor: u32,
         ibo_id: IBOId,
         owns_vertices_and_indices: bool,
     ) -> VAO {
@@ -3200,7 +3105,7 @@ impl Device {
 
         self.bind_vao_impl(vao_id);
 
-        descriptor.bind(self.gl(), main_vbo_id, instance_vbo_id, instance_divisor);
+        descriptor.bind(self.gl(), main_vbo_id, instance_vbo_id);
         ibo_id.bind(self.gl()); // force it to be a part of VAO
 
         VAO {
@@ -3209,7 +3114,6 @@ impl Device {
             main_vbo_id,
             instance_vbo_id,
             instance_stride,
-            instance_divisor,
             owns_vertices_and_indices,
         }
     }
@@ -3260,7 +3164,7 @@ impl Device {
         vbo.id = 0;
     }
 
-    pub fn create_vao(&mut self, descriptor: &VertexDescriptor, instance_divisor: u32) -> VAO {
+    pub fn create_vao(&mut self, descriptor: &VertexDescriptor) -> VAO {
         debug_assert!(self.inside_frame);
 
         let buffer_ids = self.gl.gen_buffers(3);
@@ -3268,7 +3172,7 @@ impl Device {
         let main_vbo_id = VBOId(buffer_ids[1]);
         let intance_vbo_id = VBOId(buffer_ids[2]);
 
-        self.create_vao_with_vbos(descriptor, main_vbo_id, intance_vbo_id, instance_divisor, ibo_id, true)
+        self.create_vao_with_vbos(descriptor, main_vbo_id, intance_vbo_id, ibo_id, true)
     }
 
     pub fn delete_vao(&mut self, mut vao: VAO) {
@@ -3346,7 +3250,6 @@ impl Device {
             descriptor,
             base_vao.main_vbo_id,
             intance_vbo_id,
-            base_vao.instance_divisor,
             base_vao.ibo_id,
             false,
         )
@@ -3362,54 +3265,16 @@ impl Device {
         self.update_vbo_data(vao.main_vbo_id, vertices, usage_hint)
     }
 
-    pub fn update_vao_instances<V: Clone>(
+    pub fn update_vao_instances<V>(
         &mut self,
         vao: &VAO,
         instances: &[V],
         usage_hint: VertexUsageHint,
-        // if `Some(count)`, each instance is repeated `count` times
-        repeat: Option<NonZeroUsize>,
     ) {
         debug_assert_eq!(self.bound_vao, vao.id);
         debug_assert_eq!(vao.instance_stride as usize, mem::size_of::<V>());
 
-        match repeat {
-            Some(count) => {
-                let target = gl::ARRAY_BUFFER;
-                self.gl.bind_buffer(target, vao.instance_vbo_id.0);
-                let size = instances.len() * count.get() * mem::size_of::<V>();
-                self.gl.buffer_data_untyped(
-                    target,
-                    size as _,
-                    ptr::null(),
-                    usage_hint.to_gl(),
-                );
-
-                let ptr = match self.gl.get_type() {
-                    gl::GlType::Gl => {
-                        self.gl.map_buffer(target, gl::WRITE_ONLY)
-                    }
-                    gl::GlType::Gles => {
-                        self.gl.map_buffer_range(target, 0, size as _, gl::MAP_WRITE_BIT)
-                    }
-                };
-                assert!(!ptr.is_null());
-
-                let buffer_slice = unsafe {
-                    slice::from_raw_parts_mut(ptr as *mut V, instances.len() * count.get())
-                };
-                for (quad, instance) in buffer_slice.chunks_mut(4).zip(instances) {
-                    quad[0] = instance.clone();
-                    quad[1] = instance.clone();
-                    quad[2] = instance.clone();
-                    quad[3] = instance.clone();
-                }
-                self.gl.unmap_buffer(target);
-            }
-            None => {
-                self.update_vbo_data(vao.instance_vbo_id, instances, usage_hint);
-            }
-        }
+        self.update_vbo_data(vao.instance_vbo_id, instances, usage_hint)
     }
 
     pub fn update_vao_indices<I>(&mut self, vao: &VAO, indices: &[I], usage_hint: VertexUsageHint) {
@@ -3430,12 +3295,6 @@ impl Device {
         #[cfg(debug_assertions)]
         debug_assert!(self.shader_is_ready);
 
-        let _guard = CrashAnnotatorGuard::new(
-            &self.crash_annotator,
-            CrashAnnotation::DrawShader,
-            &self.bound_program_name,
-        );
-
         self.gl.draw_elements(
             gl::TRIANGLES,
             index_count,
@@ -3449,12 +3308,6 @@ impl Device {
         #[cfg(debug_assertions)]
         debug_assert!(self.shader_is_ready);
 
-        let _guard = CrashAnnotatorGuard::new(
-            &self.crash_annotator,
-            CrashAnnotation::DrawShader,
-            &self.bound_program_name,
-        );
-
         self.gl.draw_elements(
             gl::TRIANGLES,
             index_count,
@@ -3468,12 +3321,6 @@ impl Device {
         #[cfg(debug_assertions)]
         debug_assert!(self.shader_is_ready);
 
-        let _guard = CrashAnnotatorGuard::new(
-            &self.crash_annotator,
-            CrashAnnotation::DrawShader,
-            &self.bound_program_name,
-        );
-
         self.gl.draw_arrays(gl::POINTS, first_vertex, vertex_count);
     }
 
@@ -3482,45 +3329,14 @@ impl Device {
         #[cfg(debug_assertions)]
         debug_assert!(self.shader_is_ready);
 
-        let _guard = CrashAnnotatorGuard::new(
-            &self.crash_annotator,
-            CrashAnnotation::DrawShader,
-            &self.bound_program_name,
-        );
-
         self.gl.draw_arrays(gl::LINES, first_vertex, vertex_count);
     }
 
-    pub fn draw_indexed_triangles(&mut self, index_count: i32) {
-        debug_assert!(self.inside_frame);
-        #[cfg(debug_assertions)]
-        debug_assert!(self.shader_is_ready);
-
-        let _guard = CrashAnnotatorGuard::new(
-            &self.crash_annotator,
-            CrashAnnotation::DrawShader,
-            &self.bound_program_name,
-        );
-
-        self.gl.draw_elements(
-            gl::TRIANGLES,
-            index_count,
-            gl::UNSIGNED_SHORT,
-            0,
-        );
-    }
-
     pub fn draw_indexed_triangles_instanced_u16(&mut self, index_count: i32, instance_count: i32) {
         debug_assert!(self.inside_frame);
         #[cfg(debug_assertions)]
         debug_assert!(self.shader_is_ready);
 
-        let _guard = CrashAnnotatorGuard::new(
-            &self.crash_annotator,
-            CrashAnnotation::DrawShader,
-            &self.bound_program_name,
-        );
-
         self.gl.draw_elements_instanced(
             gl::TRIANGLES,
             index_count,
@@ -3682,7 +3498,7 @@ impl Device {
     pub fn set_blend_mode_alpha(&mut self) {
         self.set_blend_factors(
             (gl::SRC_ALPHA, gl::ONE_MINUS_SRC_ALPHA),
-            (gl::ONE, gl::ONE_MINUS_SRC_ALPHA),
+            (gl::ONE, gl::ONE),
         );
     }
 
@@ -3750,24 +3566,6 @@ impl Device {
             (gl::ONE, gl::ONE_MINUS_SRC1_ALPHA),
         );
     }
-    pub fn set_blend_mode_multiply_dual_source(&mut self) {
-        self.set_blend_factors(
-            (gl::ONE_MINUS_DST_ALPHA, gl::ONE_MINUS_SRC1_COLOR),
-            (gl::ONE, gl::ONE_MINUS_SRC_ALPHA),
-        );
-    }
-    pub fn set_blend_mode_screen(&mut self) {
-        self.set_blend_factors(
-            (gl::ONE, gl::ONE_MINUS_SRC_COLOR),
-            (gl::ONE, gl::ONE_MINUS_SRC_ALPHA),
-        );
-    }
-    pub fn set_blend_mode_exclusion(&mut self) {
-        self.set_blend_factors(
-            (gl::ONE_MINUS_DST_COLOR, gl::ONE_MINUS_SRC_COLOR),
-            (gl::ONE, gl::ONE_MINUS_SRC_ALPHA),
-        );
-    }
     pub fn set_blend_mode_show_overdraw(&mut self) {
         self.set_blend_factors(
             (gl::ONE, gl::ONE_MINUS_SRC_ALPHA),
@@ -3826,6 +3624,18 @@ impl Device {
         supports_extension(&self.extensions, extension)
     }
 
+    /// Enable the pixel local storage functionality. Caller must
+    /// have already confirmed the device supports this.
+    pub fn enable_pixel_local_storage(&mut self, enable: bool) {
+        debug_assert!(self.capabilities.supports_pixel_local_storage);
+
+        if enable {
+            self.gl.enable(gl::SHADER_PIXEL_LOCAL_STORAGE_EXT);
+        } else {
+            self.gl.disable(gl::SHADER_PIXEL_LOCAL_STORAGE_EXT);
+        }
+    }
+
     pub fn echo_driver_messages(&self) {
         if self.capabilities.supports_khr_debug {
             Device::log_driver_messages(self.gl());
@@ -3876,7 +3686,7 @@ impl Device {
                     internal: self.bgra_formats.internal,
                     external: self.bgra_formats.external,
                     read: gl::BGRA,
-                    pixel_type: self.bgra_pixel_type,
+                    pixel_type: gl::UNSIGNED_BYTE,
                 }
             },
             ImageFormat::RGBA8 => {
@@ -3914,18 +3724,26 @@ impl Device {
         }
     }
 
+    /// Returns a GL format matching an ImageFormat suitable for a renderbuffer.
+    fn matching_renderbuffer_format(&self, format: ImageFormat) -> gl::GLenum {
+        match format {
+            ImageFormat::R8 => gl::R8,
+            ImageFormat::R16 => gl::R16UI,
+            ImageFormat::BGRA8 => panic!("Unable to render to BGRA format!"),
+            ImageFormat::RGBAF32 => gl::RGBA32F,
+            ImageFormat::RG8 => gl::RG8,
+            ImageFormat::RG16 => gl::RG16,
+            ImageFormat::RGBAI32 => gl::RGBA32I,
+            ImageFormat::RGBA8 => gl::RGBA8,
+        }
+    }
+
     /// Generates a memory report for the resources managed by the device layer.
-    pub fn report_memory(&self, size_op_funs: &MallocSizeOfOps) -> MemoryReport {
+    pub fn report_memory(&self) -> MemoryReport {
         let mut report = MemoryReport::default();
         for dim in self.depth_targets.keys() {
             report.depth_target_textures += depth_target_size_in_bytes(dim);
         }
-        #[cfg(feature = "sw_compositor")]
-        {
-            report.swgl += swgl::Context::report_memory(size_op_funs.size_of_op);
-        }
-        // unconditionally use size_op_funs
-        let _ = size_op_funs;
         report
     }
 }
@@ -3942,43 +3760,40 @@ pub struct FormatDesc {
     pub pixel_type: gl::GLuint,
 }
 
-#[derive(Debug)]
-struct UploadChunk<'a> {
+struct UploadChunk {
     rect: DeviceIntRect,
+    layer_index: i32,
     stride: Option<i32>,
     offset: usize,
     format_override: Option<ImageFormat>,
-    texture: &'a Texture,
 }
 
-#[derive(Debug)]
 struct PixelBuffer<'a> {
+    size_allocated: usize,
     size_used: usize,
     // small vector avoids heap allocation for a single chunk
-    chunks: SmallVec<[UploadChunk<'a>; 1]>,
-    inner: UploadPBO,
+    chunks: SmallVec<[UploadChunk; 1]>,
     mapping: &'a mut [mem::MaybeUninit<u8>],
 }
 
 impl<'a> PixelBuffer<'a> {
     fn new(
-        pbo: UploadPBO,
+        size_allocated: usize,
+        mapping: &'a mut [mem::MaybeUninit<u8>],
     ) -> Self {
-        let mapping = unsafe {
-            slice::from_raw_parts_mut(pbo.mapping.get_ptr().as_ptr(), pbo.pbo.reserved_size)
-        };
-        Self {
+        PixelBuffer {
+            size_allocated,
             size_used: 0,
             chunks: SmallVec::new(),
-            inner: pbo,
             mapping,
         }
     }
 
-    fn flush_chunks(&mut self, device: &mut Device) {
+    fn flush_chunks(&mut self, target: &mut UploadTarget) {
         for chunk in self.chunks.drain(..) {
-            TextureUploader::update_impl(device, chunk);
+            target.update_impl(chunk);
         }
+        self.size_used = 0;
     }
 }
 
@@ -3988,440 +3803,44 @@ impl<'a> Drop for PixelBuffer<'a> {
     }
 }
 
-#[derive(Debug)]
-enum PBOMapping {
-    Unmapped,
-    Transient(ptr::NonNull<mem::MaybeUninit<u8>>),
-    Persistent(ptr::NonNull<mem::MaybeUninit<u8>>),
-}
-
-impl PBOMapping {
-    fn get_ptr(&self) -> ptr::NonNull<mem::MaybeUninit<u8>> {
-        match self {
-            PBOMapping::Unmapped => unreachable!("Cannot get pointer to unmapped PBO."),
-            PBOMapping::Transient(ptr) => *ptr,
-            PBOMapping::Persistent(ptr) => *ptr,
-        }
-    }
-}
-
-/// A PBO for uploading texture data, managed by UploadPBOPool.
-#[derive(Debug)]
-struct UploadPBO {
-    pbo: PBO,
-    mapping: PBOMapping,
-    can_recycle: bool,
+struct UploadTarget<'a> {
+    device: &'a mut Device,
+    texture: &'a Texture,
 }
 
-impl UploadPBO {
-    fn empty() -> Self {
-        Self {
-            pbo: PBO {
-                id: 0,
-                reserved_size: 0,
-            },
-            mapping: PBOMapping::Unmapped,
-            can_recycle: false,
-        }
-    }
+enum TextureUploaderType<'a> {
+    Immediate,
+    SingleUseBuffers(VertexUsageHint),
+    MutliUseBuffer(PixelBuffer<'a>)
 }
 
-/// Allocates and recycles PBOs used for uploading texture data.
-/// Tries to allocate and recycle PBOs of a fixed size, but will make exceptions when
-/// a larger buffer is required or to work around driver bugs.
-pub struct UploadPBOPool {
-    /// Usage hint to provide to the driver for optimizations.
-    usage_hint: VertexUsageHint,
-    /// The preferred size, in bytes, of the buffers to allocate.
-    default_size: usize,
-    /// List of allocated PBOs ready to be re-used.
-    available_buffers: Vec<UploadPBO>,
-    /// PBOs which have been returned during the current frame,
-    /// and do not yet have an associated sync object.
-    returned_buffers: Vec<UploadPBO>,
-    /// PBOs which are waiting until their sync object is signalled,
-    /// indicating they can are ready to be re-used.
-    waiting_buffers: Vec<(gl::GLsync, Vec<UploadPBO>)>,
-    /// PBOs which have been orphaned.
-    /// We can recycle their IDs but must reallocate their storage.
-    orphaned_buffers: Vec<PBO>,
+pub struct TextureUploader<'a, T> {
+    target: UploadTarget<'a>,
+    uploader_type: TextureUploaderType<'a>,
+    marker: PhantomData<T>,
 }
 
-impl UploadPBOPool {
-    pub fn new(device: &mut Device, default_size: usize) -> Self {
-        let usage_hint = match device.upload_method {
-            UploadMethod::Immediate => VertexUsageHint::Stream,
-            UploadMethod::PixelBuffer(usage_hint) => usage_hint,
-        };
-        Self {
-            usage_hint,
-            default_size,
-            available_buffers: Vec::new(),
-            returned_buffers: Vec::new(),
-            waiting_buffers: Vec::new(),
-            orphaned_buffers: Vec::new(),
-        }
-    }
-
-    /// To be called at the beginning of a series of uploads.
-    /// Moves any buffers which are now ready to be used from the waiting list to the ready list.
-    pub fn begin_frame(&mut self, device: &mut Device) {
-        // Iterate through the waiting buffers and check if each fence has been signalled.
-        // If a fence is signalled, move its corresponding buffers to the available list.
-        // On error, delete the buffers. Stop when we find the first non-signalled fence,
-        // and clean up the signalled fences.
-        let mut first_not_signalled = self.waiting_buffers.len();
-        for (i, (sync, buffers)) in self.waiting_buffers.iter_mut().enumerate() {
-            match device.gl.client_wait_sync(*sync, 0, 0) {
-                gl::TIMEOUT_EXPIRED => {
-                    first_not_signalled = i;
-                    break;
-                },
-                gl::ALREADY_SIGNALED | gl::CONDITION_SATISFIED => {
-                    self.available_buffers.extend(buffers.drain(..));
-                }
-                gl::WAIT_FAILED | _ => {
-                    warn!("glClientWaitSync error in UploadPBOPool::begin_frame()");
-                    for buffer in buffers.drain(..) {
-                        device.delete_pbo(buffer.pbo);
-                    }
-                }
-            }
-        }
-
-        // Delete signalled fences, and remove their now-empty Vecs from waiting_buffers.
-        for (sync, _) in self.waiting_buffers.drain(0..first_not_signalled) {
-            device.gl.delete_sync(sync);
-        }
-    }
-
-    // To be called at the end of a series of uploads.
-    // Creates a sync object, and adds the buffers returned during this frame to waiting_buffers.
-    pub fn end_frame(&mut self, device: &mut Device) {
-        if !self.returned_buffers.is_empty() {
-            let sync = device.gl.fence_sync(gl::SYNC_GPU_COMMANDS_COMPLETE, 0);
-            if !sync.is_null() {
-                self.waiting_buffers.push((sync, mem::replace(&mut self.returned_buffers, Vec::new())))
-            } else {
-                warn!("glFenceSync error in UploadPBOPool::end_frame()");
-
-                for buffer in self.returned_buffers.drain(..) {
-                    device.delete_pbo(buffer.pbo);
-                }
-            }
-        }
-    }
-
-    /// Obtain a PBO, either by reusing an existing PBO or allocating a new one.
-    /// min_size specifies the minimum required size of the PBO. The returned PBO
-    /// may be larger than required.
-    fn get_pbo(&mut self, device: &mut Device, min_size: usize) -> Result<UploadPBO, ()> {
-
-        // If min_size is smaller than our default size, then use the default size.
-        // The exception to this is when due to driver bugs we cannot upload from
-        // offsets other than zero within a PBO. In this case, there is no point in
-        // allocating buffers larger than required, as they cannot be shared.
-        let (can_recycle, size) = if min_size <= self.default_size && device.capabilities.supports_nonzero_pbo_offsets {
-            (true, self.default_size)
-        } else {
-            (false, min_size)
-        };
-
-        // Try to recycle an already allocated PBO.
-        if can_recycle {
-            if let Some(mut buffer) = self.available_buffers.pop() {
-                assert_eq!(buffer.pbo.reserved_size, size);
-                assert!(buffer.can_recycle);
-
-                device.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, buffer.pbo.id);
-
-                match buffer.mapping {
-                    PBOMapping::Unmapped => {
-                        // If buffer was unmapped then transiently map it.
-                        let ptr = device.gl.map_buffer_range(
-                            gl::PIXEL_UNPACK_BUFFER,
-                            0,
-                            buffer.pbo.reserved_size as _,
-                            gl::MAP_WRITE_BIT | gl::MAP_UNSYNCHRONIZED_BIT,
-                        ) as *mut _;
-
-                        let ptr = ptr::NonNull::new(ptr).ok_or_else(|| {
-                            error!("Failed to transiently map PBO of size {} bytes", buffer.pbo.reserved_size);
-                        })?;
-
-                        buffer.mapping = PBOMapping::Transient(ptr);
-                    }
-                    PBOMapping::Transient(_) => {
-                        unreachable!("Transiently mapped UploadPBO must be unmapped before returning to pool.");
-                    }
-                    PBOMapping::Persistent(_) => {
-                    }
-                }
-
-                return Ok(buffer);
-            }
-        }
-
-        // Try to recycle a PBO ID (but not its allocation) from a previously allocated PBO.
-        // If there are none available, create a new PBO.
-        let mut pbo = match self.orphaned_buffers.pop() {
-            Some(pbo) => pbo,
-            None => device.create_pbo(),
-        };
-
-        assert_eq!(pbo.reserved_size, 0);
-        pbo.reserved_size = size;
-
-        device.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, pbo.id);
-        let mapping = if device.capabilities.supports_buffer_storage && can_recycle {
-            device.gl.buffer_storage(
-                gl::PIXEL_UNPACK_BUFFER,
-                pbo.reserved_size as _,
-                ptr::null(),
-                gl::MAP_WRITE_BIT | gl::MAP_PERSISTENT_BIT,
-            );
-            let ptr = device.gl.map_buffer_range(
-                gl::PIXEL_UNPACK_BUFFER,
-                0,
-                pbo.reserved_size as _,
-                // GL_MAP_COHERENT_BIT doesn't seem to work on Adreno, so use glFlushMappedBufferRange.
-                // kvark notes that coherent memory can be faster on some platforms, such as nvidia,
-                // so in the future we could choose which to use at run time.
-                gl::MAP_WRITE_BIT | gl::MAP_PERSISTENT_BIT | gl::MAP_FLUSH_EXPLICIT_BIT,
-            ) as *mut _;
-
-            let ptr = ptr::NonNull::new(ptr).ok_or_else(|| {
-                error!("Failed to persistently map PBO of size {} bytes", pbo.reserved_size);
-            })?;
-
-            PBOMapping::Persistent(ptr)
-        } else {
-            device.gl.buffer_data_untyped(
-                gl::PIXEL_UNPACK_BUFFER,
-                pbo.reserved_size as _,
-                ptr::null(),
-                self.usage_hint.to_gl(),
-            );
-            let ptr = device.gl.map_buffer_range(
-                gl::PIXEL_UNPACK_BUFFER,
-                0,
-                pbo.reserved_size as _,
-                // Unlike the above code path, where we are re-mapping a buffer that has previously been unmapped,
-                // this buffer has just been created there is no need for GL_MAP_UNSYNCHRONIZED_BIT.
-                gl::MAP_WRITE_BIT,
-            ) as *mut _;
-
-            let ptr = ptr::NonNull::new(ptr).ok_or_else(|| {
-                error!("Failed to transiently map PBO of size {} bytes", pbo.reserved_size);
-            })?;
-
-            PBOMapping::Transient(ptr)
-        };
-
-        Ok(UploadPBO { pbo, mapping, can_recycle })
-    }
-
-    /// Returns a PBO to the pool. If the PBO is recyclable it is placed in the waiting list.
-    /// Otherwise we orphan the allocation immediately, and will subsequently reuse just the ID.
-    fn return_pbo(&mut self, device: &mut Device, mut buffer: UploadPBO) {
-        assert!(
-            !matches!(buffer.mapping, PBOMapping::Transient(_)),
-            "Transiently mapped UploadPBO must be unmapped before returning to pool.",
-        );
-
-        if buffer.can_recycle {
-            self.returned_buffers.push(buffer);
-        } else {
-            device.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, buffer.pbo.id);
-            device.gl.buffer_data_untyped(
-                gl::PIXEL_UNPACK_BUFFER,
-                0,
-                ptr::null(),
-                gl::STREAM_DRAW,
-            );
-            buffer.pbo.reserved_size = 0;
-            self.orphaned_buffers.push(buffer.pbo);
-        }
-
-        device.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, 0);
-    }
-
-    /// Frees all allocated buffers in response to a memory pressure event.
-    pub fn on_memory_pressure(&mut self, device: &mut Device) {
-        for buffer in self.available_buffers.drain(..) {
-            device.delete_pbo(buffer.pbo);
-        }
-        for buffer in self.returned_buffers.drain(..) {
-            device.delete_pbo(buffer.pbo)
-        }
-        for (sync, buffers) in self.waiting_buffers.drain(..) {
-            device.gl.delete_sync(sync);
-            for buffer in buffers {
-                device.delete_pbo(buffer.pbo)
+impl<'a, T> Drop for TextureUploader<'a, T> {
+    fn drop(&mut self) {
+        match self.uploader_type {
+            TextureUploaderType::MutliUseBuffer(ref mut buffer) => {
+                self.target.device.gl.unmap_buffer(gl::PIXEL_UNPACK_BUFFER);
+                buffer.flush_chunks(&mut self.target);
+                self.target.device.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, 0);
             }
-        }
-        // There is no need to delete orphaned PBOs on memory pressure.
-    }
-
-    /// Generates a memory report.
-    pub fn report_memory(&self) -> MemoryReport {
-        let mut report = MemoryReport::default();
-        for buffer in &self.available_buffers {
-            report.texture_upload_pbos += buffer.pbo.reserved_size;
-        }
-        for buffer in &self.returned_buffers {
-            report.texture_upload_pbos += buffer.pbo.reserved_size;
-        }
-        for (_, buffers) in &self.waiting_buffers {
-            for buffer in buffers {
-                report.texture_upload_pbos += buffer.pbo.reserved_size;
+            TextureUploaderType::SingleUseBuffers(_) => {
+                self.target.device.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, 0);
             }
+            TextureUploaderType::Immediate => {}
         }
-        report
-    }
-
-    pub fn deinit(&mut self, device: &mut Device) {
-        for buffer in self.available_buffers.drain(..) {
-            device.delete_pbo(buffer.pbo);
-        }
-        for buffer in self.returned_buffers.drain(..) {
-            device.delete_pbo(buffer.pbo)
-        }
-        for (sync, buffers) in self.waiting_buffers.drain(..) {
-            device.gl.delete_sync(sync);
-            for buffer in buffers {
-                device.delete_pbo(buffer.pbo)
-            }
-        }
-        for pbo in self.orphaned_buffers.drain(..) {
-            device.delete_pbo(pbo);
-        }
-    }
-}
-
-/// Used to perform a series of texture uploads.
-/// Create using Device::upload_texture(). Perform a series of uploads using either
-/// upload(), or stage() and upload_staged(), then call flush().
-pub struct TextureUploader<'a> {
-    /// A list of buffers containing uploads that need to be flushed.
-    buffers: Vec<PixelBuffer<'a>>,
-    /// Pool used to obtain PBOs to fill with texture data.
-    pub pbo_pool: &'a mut UploadPBOPool,
-}
-
-impl<'a> Drop for TextureUploader<'a> {
-    fn drop(&mut self) {
-        assert!(
-            thread::panicking() || self.buffers.is_empty(),
-            "TextureUploader must be flushed before it is dropped."
-        );
-    }
-}
-
-/// A buffer used to manually stage data to be uploaded to a texture.
-/// Created by calling TextureUploader::stage(), the data can then be written to via get_mapping().
-#[derive(Debug)]
-pub struct UploadStagingBuffer<'a> {
-    /// The PixelBuffer containing this upload.
-    buffer: PixelBuffer<'a>,
-    /// The offset of this upload within the PixelBuffer.
-    offset: usize,
-    /// The size of this upload.
-    size: usize,
-    /// The stride of the data within the buffer.
-    stride: usize,
-}
-
-impl<'a> UploadStagingBuffer<'a> {
-    /// Returns the required stride of the data to be written to the buffer.
-    pub fn get_stride(&self) -> usize {
-        self.stride
-    }
-
-    /// Returns a mapping of the data in the buffer, to be written to.
-    pub fn get_mapping(&mut self) -> &mut [mem::MaybeUninit<u8>] {
-        &mut self.buffer.mapping[self.offset..self.offset + self.size]
     }
 }
 
-impl<'a> TextureUploader<'a> {
-    /// Returns an UploadStagingBuffer which can be used to manually stage data to be uploaded.
-    /// Once the data has been staged, it can be uploaded with upload_staged().
-    pub fn stage(
+impl<'a, T> TextureUploader<'a, T> {
+    pub fn upload(
         &mut self,
-        device: &mut Device,
-        format: ImageFormat,
-        size: DeviceIntSize,
-    ) -> Result<UploadStagingBuffer<'a>, ()> {
-        assert!(matches!(device.upload_method, UploadMethod::PixelBuffer(_)), "Texture uploads should only be staged when using pixel buffers.");
-
-        // for optimal PBO texture uploads the offset and stride of the data in
-        // the buffer may have to be a multiple of a certain value.
-        let (dst_size, dst_stride) = device.required_upload_size_and_stride(
-            size,
-            format,
-        );
-
-        // Find a pixel buffer with enough space remaining, creating a new one if required.
-        let buffer_index = self.buffers.iter().position(|buffer| {
-            buffer.size_used + dst_size <= buffer.inner.pbo.reserved_size
-        });
-        let buffer = match buffer_index {
-            Some(i) => self.buffers.swap_remove(i),
-            None => PixelBuffer::new(self.pbo_pool.get_pbo(device, dst_size)?),
-        };
-
-        if !device.capabilities.supports_nonzero_pbo_offsets {
-            assert_eq!(buffer.size_used, 0, "PBO uploads from non-zero offset are not supported.");
-        }
-        assert!(buffer.size_used + dst_size <= buffer.inner.pbo.reserved_size, "PixelBuffer is too small");
-
-        let offset = buffer.size_used;
-
-        Ok(UploadStagingBuffer {
-            buffer,
-            offset,
-            size: dst_size,
-            stride: dst_stride,
-        })
-    }
-
-    /// Uploads manually staged texture data to the specified texture.
-    pub fn upload_staged(
-        &mut self,
-        device: &mut Device,
-        texture: &'a Texture,
-        rect: DeviceIntRect,
-        format_override: Option<ImageFormat>,
-        mut staging_buffer: UploadStagingBuffer<'a>,
-    ) -> usize {
-        let size = staging_buffer.size;
-
-        staging_buffer.buffer.chunks.push(UploadChunk {
-            rect,
-            stride: Some(staging_buffer.stride as i32),
-            offset: staging_buffer.offset,
-            format_override,
-            texture,
-        });
-        staging_buffer.buffer.size_used += staging_buffer.size;
-
-        // Flush the buffer if it is full, otherwise return it to the uploader for further use.
-        if staging_buffer.buffer.size_used < staging_buffer.buffer.inner.pbo.reserved_size {
-            self.buffers.push(staging_buffer.buffer);
-        } else {
-            Self::flush_buffer(device, self.pbo_pool, staging_buffer.buffer);
-        }
-
-        size
-    }
-
-    /// Uploads texture data to the specified texture.
-    pub fn upload<T>(
-        &mut self,
-        device: &mut Device,
-        texture: &'a Texture,
         mut rect: DeviceIntRect,
+        layer_index: i32,
         stride: Option<i32>,
         format_override: Option<ImageFormat>,
         data: *const T,
@@ -4430,7 +3849,7 @@ impl<'a> TextureUploader<'a> {
         // Textures dimensions may have been clamped by the hardware. Crop the
         // upload region to match.
         let cropped = rect.intersection(
-            &DeviceIntRect::new(DeviceIntPoint::zero(), texture.get_dimensions())
+            &DeviceIntRect::new(DeviceIntPoint::zero(), self.target.texture.get_dimensions())
         );
         if cfg!(debug_assertions) && cropped.map_or(true, |r| r != rect) {
             warn!("Cropping texture upload {:?} to {:?}", rect, cropped);
@@ -4440,7 +3859,7 @@ impl<'a> TextureUploader<'a> {
             Some(r) => r,
         };
 
-        let bytes_pp = texture.format.bytes_per_pixel() as usize;
+        let bytes_pp = self.target.texture.format.bytes_per_pixel() as usize;
         let width_bytes = rect.size.width as usize * bytes_pp;
 
         let src_stride = stride.map_or(width_bytes, |stride| {
@@ -4450,32 +3869,40 @@ impl<'a> TextureUploader<'a> {
         let src_size = (rect.size.height as usize - 1) * src_stride + width_bytes;
         assert!(src_size <= len * mem::size_of::<T>());
 
-        match device.upload_method {
-            UploadMethod::Immediate => {
-                if cfg!(debug_assertions) {
-                    let mut bound_buffer = [0];
-                    unsafe {
-                        device.gl.get_integer_v(gl::PIXEL_UNPACK_BUFFER_BINDING, &mut bound_buffer);
+        // for optimal PBO texture uploads the offset and stride of the data in
+        // the buffer may have to be a multiple of a certain value.
+        let (dst_size, dst_stride) = self.target.device.required_upload_size_and_stride(
+            rect.size,
+            self.target.texture.format,
+        );
+
+        // Choose the buffer to use, if any, allocating a new single-use buffer if required.
+        let mut single_use_buffer = None;
+        let mut buffer = match self.uploader_type {
+            TextureUploaderType::MutliUseBuffer(ref mut buffer) => Some(buffer),
+            TextureUploaderType::SingleUseBuffers(hint) => {
+                match self.target.device.create_upload_buffer(hint, dst_size) {
+                    Ok(buffer) => {
+                        single_use_buffer = Some(buffer);
+                        single_use_buffer.as_mut()
+                    }
+                    Err(_) => {
+                        // If allocating the buffer failed, fall back to immediate uploads
+                        self.target.device.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, 0);
+                        self.uploader_type = TextureUploaderType::Immediate;
+                        None
                     }
-                    assert_eq!(bound_buffer[0], 0, "GL_PIXEL_UNPACK_BUFFER must not be bound for immediate uploads.");
                 }
-
-                Self::update_impl(device, UploadChunk {
-                    rect,
-                    stride: Some(src_stride as i32),
-                    offset: data as _,
-                    format_override,
-                    texture,
-                });
-
-                width_bytes * rect.size.height as usize
             }
-            UploadMethod::PixelBuffer(_) => {
-                let mut staging_buffer = match self.stage(device, texture.format, rect.size) {
-                    Ok(staging_buffer) => staging_buffer,
-                    Err(_) => return 0,
-                };
-                let dst_stride = staging_buffer.get_stride();
+            TextureUploaderType::Immediate => None,
+        };
+
+        match buffer {
+            Some(ref mut buffer) => {
+                if !self.target.device.capabilities.supports_nonzero_pbo_offsets {
+                    assert_eq!(buffer.size_used, 0, "PBO uploads from non-zero offset are not supported.");
+                }
+                assert!(buffer.size_used + dst_size <= buffer.size_allocated, "PixelBuffer is too small");
 
                 unsafe {
                     let src: &[mem::MaybeUninit<u8>] = slice::from_raw_parts(data as *const _, src_size);
@@ -4483,61 +3910,69 @@ impl<'a> TextureUploader<'a> {
                     if src_stride == dst_stride {
                         // the stride is already optimal, so simply copy
                         // the data as-is in to the buffer
-                        staging_buffer.get_mapping()[..src_size].copy_from_slice(src);
+                        let dst_start = buffer.size_used;
+                        let dst_end = dst_start + src_size;
+
+                        buffer.mapping[dst_start..dst_end].copy_from_slice(src);
                     } else {
                         // copy the data line-by-line in to the buffer so
                         // that it has an optimal stride
                         for y in 0..rect.size.height as usize {
                             let src_start = y * src_stride;
                             let src_end = src_start + width_bytes;
-                            let dst_start = y * staging_buffer.get_stride();
+                            let dst_start = buffer.size_used + y * dst_stride;
                             let dst_end = dst_start + width_bytes;
 
-                            staging_buffer.get_mapping()[dst_start..dst_end].copy_from_slice(&src[src_start..src_end])
+                            buffer.mapping[dst_start..dst_end].copy_from_slice(&src[src_start..src_end])
                         }
                     }
                 }
 
-                self.upload_staged(device, texture, rect, format_override, staging_buffer)
+                buffer.chunks.push(UploadChunk {
+                    rect,
+                    layer_index,
+                    stride: Some(dst_stride as i32),
+                    offset: buffer.size_used,
+                    format_override,
+                });
+                buffer.size_used += dst_size;
             }
-        }
-    }
+            None => {
+                if cfg!(debug_assertions) {
+                    let mut bound_buffer = [0];
+                    unsafe {
+                        self.target.device.gl.get_integer_v(gl::PIXEL_UNPACK_BUFFER_BINDING, &mut bound_buffer);
+                    }
+                    assert_eq!(bound_buffer[0], 0, "GL_PIXEL_UNPACK_BUFFER must not be bound for immediate uploads.");
+                }
 
-    fn flush_buffer(device: &mut Device, pbo_pool: &mut UploadPBOPool, mut buffer: PixelBuffer) {
-        device.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, buffer.inner.pbo.id);
-        match buffer.inner.mapping {
-            PBOMapping::Unmapped => unreachable!("UploadPBO should be mapped at this stage."),
-            PBOMapping::Transient(_) => {
-                device.gl.unmap_buffer(gl::PIXEL_UNPACK_BUFFER);
-                buffer.inner.mapping = PBOMapping::Unmapped;
-            }
-            PBOMapping::Persistent(_) => {
-                device.gl.flush_mapped_buffer_range(gl::PIXEL_UNPACK_BUFFER, 0, buffer.size_used as _);
+                self.target.update_impl(UploadChunk {
+                    rect,
+                    layer_index,
+                    stride,
+                    offset: data as _,
+                    format_override,
+                });
             }
         }
-        buffer.flush_chunks(device);
-        let pbo = mem::replace(&mut buffer.inner, UploadPBO::empty());
-        pbo_pool.return_pbo(device, pbo);
-    }
 
-    /// Flushes all pending texture uploads. Must be called after all
-    /// required upload() or upload_staged() calls have been made.
-    pub fn flush(mut self, device: &mut Device) {
-        for buffer in self.buffers.drain(..) {
-            Self::flush_buffer(device, self.pbo_pool, buffer);
+        // Flush the buffer if it is for single-use.
+        if let Some(ref mut buffer) = single_use_buffer {
+            self.target.device.gl.unmap_buffer(gl::PIXEL_UNPACK_BUFFER);
+            buffer.flush_chunks(&mut self.target);
         }
 
-        device.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, 0);
+        dst_size
     }
+}
 
-    fn update_impl(device: &mut Device, chunk: UploadChunk) {
-        device.bind_texture(DEFAULT_TEXTURE, chunk.texture, Swizzle::default());
-
-        let format = chunk.format_override.unwrap_or(chunk.texture.format);
+impl<'a> UploadTarget<'a> {
+    fn update_impl(&mut self, chunk: UploadChunk) {
+        let format = chunk.format_override.unwrap_or(self.texture.format);
         let (gl_format, bpp, data_type) = match format {
             ImageFormat::R8 => (gl::RED, 1, gl::UNSIGNED_BYTE),
             ImageFormat::R16 => (gl::RED, 2, gl::UNSIGNED_SHORT),
-            ImageFormat::BGRA8 => (device.bgra_formats.external, 4, device.bgra_pixel_type),
+            ImageFormat::BGRA8 => (self.device.bgra_formats.external, 4, gl::UNSIGNED_BYTE),
             ImageFormat::RGBA8 => (gl::RGBA, 4, gl::UNSIGNED_BYTE),
             ImageFormat::RG8 => (gl::RG, 2, gl::UNSIGNED_BYTE),
             ImageFormat::RG16 => (gl::RG, 4, gl::UNSIGNED_SHORT),
@@ -4547,11 +3982,11 @@ impl<'a> TextureUploader<'a> {
 
         let row_length = match chunk.stride {
             Some(value) => value / bpp,
-            None => chunk.texture.size.width,
+            None => self.texture.size.width,
         };
 
         if chunk.stride.is_some() {
-            device.gl.pixel_store_i(
+            self.device.gl.pixel_store_i(
                 gl::UNPACK_ROW_LENGTH,
                 row_length as _,
             );
@@ -4560,10 +3995,25 @@ impl<'a> TextureUploader<'a> {
         let pos = chunk.rect.origin;
         let size = chunk.rect.size;
 
-        match chunk.texture.target {
+        match self.texture.target {
+            gl::TEXTURE_2D_ARRAY => {
+                self.device.gl.tex_sub_image_3d_pbo(
+                    self.texture.target,
+                    0,
+                    pos.x as _,
+                    pos.y as _,
+                    chunk.layer_index,
+                    size.width as _,
+                    size.height as _,
+                    1,
+                    gl_format,
+                    data_type,
+                    chunk.offset,
+                );
+            }
             gl::TEXTURE_2D | gl::TEXTURE_RECTANGLE | gl::TEXTURE_EXTERNAL_OES => {
-                device.gl.tex_sub_image_2d_pbo(
-                    chunk.texture.target,
+                self.device.gl.tex_sub_image_2d_pbo(
+                    self.texture.target,
                     0,
                     pos.x as _,
                     pos.y as _,
@@ -4578,13 +4028,13 @@ impl<'a> TextureUploader<'a> {
         }
 
         // If using tri-linear filtering, build the mip-map chain for this texture.
-        if chunk.texture.filter == TextureFilter::Trilinear {
-            device.gl.generate_mipmap(chunk.texture.target);
+        if self.texture.filter == TextureFilter::Trilinear {
+            self.device.gl.generate_mipmap(self.texture.target);
         }
 
         // Reset row length to 0, otherwise the stride would apply to all texture uploads.
         if chunk.stride.is_some() {
-            device.gl.pixel_store_i(gl::UNPACK_ROW_LENGTH, 0 as _);
+            self.device.gl.pixel_store_i(gl::UNPACK_ROW_LENGTH, 0 as _);
         }
     }
 }
diff --git a/third_party/webrender/webrender/src/device/query_gl.rs b/third_party/webrender/webrender/src/device/query_gl.rs
index c7fd9a9070d..95d515dc5ec 100644
--- a/third_party/webrender/webrender/src/device/query_gl.rs
+++ b/third_party/webrender/webrender/src/device/query_gl.rs
@@ -7,7 +7,6 @@ use std::mem;
 use std::rc::Rc;
 
 use crate::device::GpuFrameId;
-use crate::profiler::GpuProfileTag;
 
 #[derive(Copy, Clone, Debug)]
 pub enum GpuDebugMethod {
@@ -16,15 +15,19 @@ pub enum GpuDebugMethod {
     KHR,
 }
 
+pub trait NamedTag {
+    fn get_label(&self) -> &str;
+}
+
 #[derive(Debug, Clone)]
-pub struct GpuTimer {
-    pub tag: GpuProfileTag,
+pub struct GpuTimer<T> {
+    pub tag: T,
     pub time_ns: u64,
 }
 
 #[derive(Debug, Clone)]
-pub struct GpuSampler {
-    pub tag: GpuProfileTag,
+pub struct GpuSampler<T> {
+    pub tag: T,
     pub count: u64,
 }
 
@@ -66,16 +69,16 @@ impl<T> QuerySet<T> {
     }
 }
 
-pub struct GpuFrameProfile {
+pub struct GpuFrameProfile<T> {
     gl: Rc<dyn gl::Gl>,
-    timers: QuerySet<GpuTimer>,
-    samplers: QuerySet<GpuSampler>,
+    timers: QuerySet<GpuTimer<T>>,
+    samplers: QuerySet<GpuSampler<T>>,
     frame_id: GpuFrameId,
     inside_frame: bool,
     debug_method: GpuDebugMethod,
 }
 
-impl GpuFrameProfile {
+impl<T> GpuFrameProfile<T> {
     fn new(gl: Rc<dyn gl::Gl>, debug_method: GpuDebugMethod) -> Self {
         GpuFrameProfile {
             gl,
@@ -137,11 +140,13 @@ impl GpuFrameProfile {
             self.samplers.pending = 0;
         }
     }
+}
 
-    fn start_timer(&mut self, tag: GpuProfileTag) -> GpuTimeQuery {
+impl<T: NamedTag> GpuFrameProfile<T> {
+    fn start_timer(&mut self, tag: T) -> GpuTimeQuery {
         self.finish_timer();
 
-        let marker = GpuMarker::new(&self.gl, tag.label, self.debug_method);
+        let marker = GpuMarker::new(&self.gl, tag.get_label(), self.debug_method);
 
         if let Some(query) = self.timers.add(GpuTimer { tag, time_ns: 0 }) {
             self.gl.begin_query(gl::TIME_ELAPSED, query);
@@ -150,7 +155,7 @@ impl GpuFrameProfile {
         GpuTimeQuery(marker)
     }
 
-    fn start_sampler(&mut self, tag: GpuProfileTag) -> GpuSampleQuery {
+    fn start_sampler(&mut self, tag: T) -> GpuSampleQuery {
         self.finish_sampler();
 
         if let Some(query) = self.samplers.add(GpuSampler { tag, count: 0 }) {
@@ -160,7 +165,7 @@ impl GpuFrameProfile {
         GpuSampleQuery
     }
 
-    fn build_samples(&mut self) -> (GpuFrameId, Vec<GpuTimer>, Vec<GpuSampler>) {
+    fn build_samples(&mut self) -> (GpuFrameId, Vec<GpuTimer<T>>, Vec<GpuSampler<T>>) {
         debug_assert!(!self.inside_frame);
         let gl = &self.gl;
 
@@ -176,27 +181,27 @@ impl GpuFrameProfile {
     }
 }
 
-impl Drop for GpuFrameProfile {
+impl<T> Drop for GpuFrameProfile<T> {
     fn drop(&mut self) {
         self.disable_timers();
         self.disable_samplers();
     }
 }
 
-const NUM_PROFILE_FRAMES: usize = 4;
-
-pub struct GpuProfiler {
+pub struct GpuProfiler<T> {
     gl: Rc<dyn gl::Gl>,
-    frames: [GpuFrameProfile; NUM_PROFILE_FRAMES],
+    frames: Vec<GpuFrameProfile<T>>,
     next_frame: usize,
     debug_method: GpuDebugMethod
 }
 
-impl GpuProfiler {
+impl<T> GpuProfiler<T> {
     pub fn new(gl: Rc<dyn gl::Gl>, debug_method: GpuDebugMethod) -> Self {
-        let f = || GpuFrameProfile::new(Rc::clone(&gl), debug_method);
+        const MAX_PROFILE_FRAMES: usize = 4;
+        let frames = (0 .. MAX_PROFILE_FRAMES)
+            .map(|_| GpuFrameProfile::new(Rc::clone(&gl), debug_method))
+            .collect();
 
-        let frames = [f(), f(), f(), f()];
         GpuProfiler {
             gl,
             next_frame: 0,
@@ -235,8 +240,10 @@ impl GpuProfiler {
             frame.disable_samplers();
         }
     }
+}
 
-    pub fn build_samples(&mut self) -> (GpuFrameId, Vec<GpuTimer>, Vec<GpuSampler>) {
+impl<T: NamedTag> GpuProfiler<T> {
+    pub fn build_samples(&mut self) -> (GpuFrameId, Vec<GpuTimer<T>>, Vec<GpuSampler<T>>) {
         self.frames[self.next_frame].build_samples()
     }
 
@@ -249,11 +256,11 @@ impl GpuProfiler {
         self.next_frame = (self.next_frame + 1) % self.frames.len();
     }
 
-    pub fn start_timer(&mut self, tag: GpuProfileTag) -> GpuTimeQuery {
+    pub fn start_timer(&mut self, tag: T) -> GpuTimeQuery {
         self.frames[self.next_frame].start_timer(tag)
     }
 
-    pub fn start_sampler(&mut self, tag: GpuProfileTag) -> GpuSampleQuery {
+    pub fn start_sampler(&mut self, tag: T) -> GpuSampleQuery {
         self.frames[self.next_frame].start_sampler(tag)
     }
 
diff --git a/third_party/webrender/webrender/src/filterdata.rs b/third_party/webrender/webrender/src/filterdata.rs
index d399b2252e2..3bbfcebea5e 100644
--- a/third_party/webrender/webrender/src/filterdata.rs
+++ b/third_party/webrender/webrender/src/filterdata.rs
@@ -7,7 +7,7 @@ use crate::gpu_cache::{GpuCacheHandle};
 use crate::frame_builder::FrameBuildingState;
 use crate::gpu_cache::GpuDataRequest;
 use crate::intern;
-use api::{ComponentTransferFuncType};
+use api::{FilterDataIntern, ComponentTransferFuncType};
 
 
 pub type FilterDataHandle = intern::Handle<FilterDataIntern>;
@@ -154,15 +154,10 @@ impl SFilterDataTemplate {
     }
 }
 
-#[derive(Copy, Clone, Debug, MallocSizeOf)]
-#[cfg_attr(any(feature = "serde"), derive(Deserialize, Serialize))]
-pub enum FilterDataIntern {}
-
 impl intern::Internable for FilterDataIntern {
     type Key = SFilterDataKey;
     type StoreData = SFilterDataTemplate;
     type InternData = ();
-    const PROFILE_COUNTER: usize = crate::profiler::INTERNED_FILTER_DATA;
 }
 
 fn push_component_transfer_data(
diff --git a/third_party/webrender/webrender/src/frame_builder.rs b/third_party/webrender/webrender/src/frame_builder.rs
index 1d3cb27ca87..afb56778c95 100644
--- a/third_party/webrender/webrender/src/frame_builder.rs
+++ b/third_party/webrender/webrender/src/frame_builder.rs
@@ -2,37 +2,34 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{ColorF, DebugFlags, FontRenderMode, PremultipliedColorF};
+use api::{ColorF, DebugFlags, DocumentLayer, FontRenderMode, PremultipliedColorF};
 use api::units::*;
 use crate::batch::{BatchBuilder, AlphaBatchBuilder, AlphaBatchContainer};
-use crate::clip::{ClipStore, ClipChainStack};
+use crate::clip::{ClipStore, ClipChainStack, ClipInstance};
 use crate::spatial_tree::{SpatialTree, ROOT_SPATIAL_NODE_INDEX, SpatialNodeIndex};
-use crate::composite::{CompositorKind, CompositeState, CompositeStatePreallocator};
-use crate::debug_item::DebugItem;
+use crate::composite::{CompositorKind, CompositeState};
+use crate::debug_render::DebugItem;
 use crate::gpu_cache::{GpuCache, GpuCacheHandle};
-use crate::gpu_types::{PrimitiveHeaders, TransformPalette, ZBufferIdGenerator};
+use crate::gpu_types::{PrimitiveHeaders, TransformPalette, UvRectKind, ZBufferIdGenerator};
 use crate::gpu_types::TransformData;
-use crate::internal_types::{FastHashMap, PlaneSplitter};
-use crate::picture::{DirtyRegion, PictureUpdateState, SliceId, TileCacheInstance};
-use crate::picture::{SurfaceInfo, SurfaceIndex, ROOT_SURFACE_INDEX, SurfaceRenderTasks, SubSliceIndex};
-use crate::picture::{BackdropKind, SubpixelMode, TileCacheLogger, RasterConfig, PictureCompositeMode};
-use crate::prepare::prepare_primitives;
-use crate::prim_store::{PictureIndex, PrimitiveDebugId};
-use crate::prim_store::{DeferredResolve};
-use crate::profiler::{self, TransactionProfile};
-use crate::render_backend::{DataStores, FrameStamp, FrameId, ScratchBuffer};
+use crate::internal_types::{FastHashMap, PlaneSplitter, SavedTargetIndex};
+use crate::picture::{PictureUpdateState, SurfaceInfo, ROOT_SURFACE_INDEX, SurfaceIndex, RecordedDirtyRegion};
+use crate::picture::{RetainedTiles, TileCacheInstance, DirtyRegion, SurfaceRenderTasks, SubpixelMode};
+use crate::picture::{BackdropKind, TileCacheLogger};
+use crate::prim_store::{SpaceMapper, PictureIndex, PrimitiveDebugId, PrimitiveScratchBuffer};
+use crate::prim_store::{DeferredResolve, PrimitiveVisibilityMask};
+use crate::profiler::{FrameProfileCounters, TextureCacheProfileCounters, ResourceProfileCounters};
+use crate::render_backend::{DataStores, FrameStamp, FrameId};
 use crate::render_target::{RenderTarget, PictureCacheTarget, TextureCacheRenderTarget};
-use crate::render_target::{RenderTargetContext, RenderTargetKind, AlphaRenderTarget, ColorRenderTarget};
-use crate::render_task_graph::{RenderTaskId, RenderTaskGraph, Pass, SubPassSurface};
-use crate::render_task_graph::{RenderPass, RenderTaskGraphBuilder};
-use crate::render_task::{RenderTaskLocation, RenderTaskKind, StaticRenderTaskSurface};
+use crate::render_target::{RenderTargetContext, RenderTargetKind};
+use crate::render_task_graph::{RenderTaskId, RenderTaskGraph, RenderTaskGraphCounters};
+use crate::render_task_graph::{RenderPassKind, RenderPass};
+use crate::render_task::{RenderTask, RenderTaskLocation, RenderTaskKind};
 use crate::resource_cache::{ResourceCache};
 use crate::scene::{BuiltScene, SceneProperties};
-use crate::space::SpaceMapper;
 use crate::segment::SegmentBuilder;
 use std::{f32, mem};
-use crate::util::{VecHelper, Recycler, Preallocator};
-use crate::visibility::{update_primitive_visibility, FrameVisibilityState, FrameVisibilityContext};
+use crate::util::MaxRect;
 
 
 #[derive(Clone, Copy, Debug, PartialEq)]
@@ -58,23 +55,19 @@ pub struct FrameBuilderConfig {
     pub dual_source_blending_is_supported: bool,
     pub dual_source_blending_is_enabled: bool,
     pub chase_primitive: ChasePrimitive,
+    /// The immutable global picture caching enable from `RendererOptions`
+    pub global_enable_picture_caching: bool,
     /// True if we're running tests (i.e. via wrench).
     pub testing: bool,
     pub gpu_supports_fast_clears: bool,
     pub gpu_supports_advanced_blend: bool,
     pub advanced_blend_is_coherent: bool,
-    pub gpu_supports_render_target_partial_update: bool,
-    /// Whether ImageBufferKind::TextureExternal images must first be copied
-    /// to a regular texture before rendering.
-    pub external_images_require_copy: bool,
     pub batch_lookback_count: usize,
     pub background_color: Option<ColorF>,
     pub compositor_kind: CompositorKind,
     pub tile_size_override: Option<DeviceIntSize>,
     pub max_depth_ids: i32,
     pub max_target_size: i32,
-    pub force_invalidation: bool,
-    pub is_software: bool,
 }
 
 /// A set of common / global resources that are retained between
@@ -121,51 +114,56 @@ impl FrameGlobalResources {
     }
 }
 
-pub struct FrameScratchBuffer {
-    surfaces: Vec<SurfaceInfo>,
-    dirty_region_stack: Vec<DirtyRegion>,
-    surface_stack: Vec<SurfaceIndex>,
-    clip_chain_stack: ClipChainStack,
+/// Produces the frames that are sent to the renderer.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub struct FrameBuilder {
+    /// Cache of surface tiles from the previous frame builder
+    /// that can optionally be consumed by this frame builder.
+    pending_retained_tiles: RetainedTiles,
+    pub globals: FrameGlobalResources,
 }
 
-impl Default for FrameScratchBuffer {
-    fn default() -> Self {
-        FrameScratchBuffer {
-            surfaces: Vec::new(),
-            dirty_region_stack: Vec::new(),
-            surface_stack: Vec::new(),
-            clip_chain_stack: ClipChainStack::new(),
-        }
-    }
+pub struct FrameVisibilityContext<'a> {
+    pub spatial_tree: &'a SpatialTree,
+    pub global_screen_world_rect: WorldRect,
+    pub global_device_pixel_scale: DevicePixelScale,
+    pub surfaces: &'a [SurfaceInfo],
+    pub debug_flags: DebugFlags,
+    pub scene_properties: &'a SceneProperties,
+    pub config: FrameBuilderConfig,
 }
 
-impl FrameScratchBuffer {
-    pub fn begin_frame(&mut self) {
-        self.surfaces.clear();
-        self.dirty_region_stack.clear();
-        self.surface_stack.clear();
-        self.clip_chain_stack.clear();
-    }
+pub struct FrameVisibilityState<'a> {
+    pub clip_store: &'a mut ClipStore,
+    pub resource_cache: &'a mut ResourceCache,
+    pub gpu_cache: &'a mut GpuCache,
+    pub scratch: &'a mut PrimitiveScratchBuffer,
+    pub tile_cache: Option<Box<TileCacheInstance>>,
+    pub retained_tiles: &'a mut RetainedTiles,
+    pub data_stores: &'a mut DataStores,
+    pub clip_chain_stack: ClipChainStack,
+    pub render_tasks: &'a mut RenderTaskGraph,
+    pub composite_state: &'a mut CompositeState,
+    /// A stack of currently active off-screen surfaces during the
+    /// visibility frame traversal.
+    pub surface_stack: Vec<SurfaceIndex>,
+}
 
-    pub fn recycle(&mut self, recycler: &mut Recycler) {
-        recycler.recycle_vec(&mut self.surfaces);
-        // Don't call recycle on the stacks because the reycler's
-        // role is to get rid of allocations when the capacity
-        // is much larger than the lengths. with stacks the
-        // length varies through the frame but is supposedly
-        // back to zero by the end so we would always throw the
-        // allocation away.
+impl<'a> FrameVisibilityState<'a> {
+    pub fn push_surface(
+        &mut self,
+        surface_index: SurfaceIndex,
+        shared_clips: &[ClipInstance],
+        spatial_tree: &SpatialTree,
+    ) {
+        self.surface_stack.push(surface_index);
+        self.clip_chain_stack.push_surface(shared_clips, spatial_tree);
     }
-}
 
-/// Produces the frames that are sent to the renderer.
-#[cfg_attr(feature = "capture", derive(Serialize))]
-pub struct FrameBuilder {
-    pub globals: FrameGlobalResources,
-    #[cfg_attr(feature = "capture", serde(skip))]
-    prim_headers_prealloc: Preallocator,
-    #[cfg_attr(feature = "capture", serde(skip))]
-    composite_state_prealloc: CompositeStatePreallocator,
+    pub fn pop_surface(&mut self) {
+        self.surface_stack.pop().unwrap();
+        self.clip_chain_stack.pop_surface();
+    }
 }
 
 pub struct FrameBuildingContext<'a> {
@@ -179,7 +177,8 @@ pub struct FrameBuildingContext<'a> {
 }
 
 pub struct FrameBuildingState<'a> {
-    pub rg_builder: &'a mut RenderTaskGraphBuilder,
+    pub render_tasks: &'a mut RenderTaskGraph,
+    pub profile_counters: &'a mut FrameProfileCounters,
     pub clip_store: &'a mut ClipStore,
     pub resource_cache: &'a mut ResourceCache,
     pub gpu_cache: &'a mut GpuCache,
@@ -188,7 +187,6 @@ pub struct FrameBuildingState<'a> {
     pub surfaces: &'a mut Vec<SurfaceInfo>,
     pub dirty_region_stack: Vec<DirtyRegion>,
     pub composite_state: &'a mut CompositeState,
-    pub num_visible_primitives: u32,
 }
 
 impl<'a> FrameBuildingState<'a> {
@@ -206,76 +204,6 @@ impl<'a> FrameBuildingState<'a> {
     pub fn pop_dirty_region(&mut self) {
         self.dirty_region_stack.pop().unwrap();
     }
-
-    /// Initialize render tasks for a surface that is tiled (currently applies
-    /// only to picture cache surfaces).
-    pub fn init_surface_tiled(
-        &mut self,
-        surface_index: SurfaceIndex,
-        tasks: Vec<RenderTaskId>,
-        device_rect: DeviceRect,
-    ) {
-        let surface = &mut self.surfaces[surface_index.0];
-        assert!(surface.render_tasks.is_none());
-        surface.render_tasks = Some(SurfaceRenderTasks::Tiled(tasks));
-        surface.device_rect = Some(device_rect);
-    }
-
-    /// Initialize render tasks for a simple surface, that contains only a
-    /// single render task.
-    pub fn init_surface(
-        &mut self,
-        surface_index: SurfaceIndex,
-        task_id: RenderTaskId,
-        parent_surface_index: SurfaceIndex,
-        device_rect: DeviceRect,
-    ) {
-        let surface = &mut self.surfaces[surface_index.0];
-        assert!(surface.render_tasks.is_none());
-        surface.render_tasks = Some(SurfaceRenderTasks::Simple(task_id));
-        surface.device_rect = Some(device_rect);
-
-        self.add_child_render_task(
-            parent_surface_index,
-            task_id,
-        );
-    }
-
-    /// Initialize render tasks for a surface that is made up of a chain of
-    /// render tasks, where the final output render task is different than the
-    /// input render task (for example, a blur pass on a picture).
-    pub fn init_surface_chain(
-        &mut self,
-        surface_index: SurfaceIndex,
-        root_task_id: RenderTaskId,
-        port_task_id: RenderTaskId,
-        parent_surface_index: SurfaceIndex,
-        device_rect: DeviceRect,
-    ) {
-        let surface = &mut self.surfaces[surface_index.0];
-        assert!(surface.render_tasks.is_none());
-        surface.render_tasks = Some(SurfaceRenderTasks::Chained { root_task_id, port_task_id });
-        surface.device_rect = Some(device_rect);
-
-        self.add_child_render_task(
-            parent_surface_index,
-            root_task_id,
-        );
-    }
-
-    /// Add a render task as a dependency of a given surface.
-    pub fn add_child_render_task(
-        &mut self,
-        surface_index: SurfaceIndex,
-        child_task_id: RenderTaskId,
-    ) {
-        add_child_render_task(
-            surface_index,
-            child_task_id,
-            self.surfaces,
-            self.rg_builder,
-        );
-    }
 }
 
 /// Immutable context of a picture when processing children.
@@ -283,6 +211,7 @@ impl<'a> FrameBuildingState<'a> {
 pub struct PictureContext {
     pub pic_index: PictureIndex,
     pub apply_local_clip_rect: bool,
+    pub is_passthrough: bool,
     pub surface_spatial_node_index: SpatialNodeIndex,
     pub raster_spatial_node_index: SpatialNodeIndex,
     /// The surface that this picture will render on.
@@ -306,12 +235,26 @@ pub struct PictureState {
 impl FrameBuilder {
     pub fn new() -> Self {
         FrameBuilder {
+            pending_retained_tiles: RetainedTiles::new(),
             globals: FrameGlobalResources::empty(),
-            prim_headers_prealloc: Preallocator::new(0),
-            composite_state_prealloc: CompositeStatePreallocator::default(),
         }
     }
 
+    /// Provide any cached surface tiles from the previous frame builder
+    /// to a new frame builder. These will be consumed or dropped the
+    /// first time a new frame builder creates a frame.
+    pub fn set_retained_resources(&mut self, retained_tiles: RetainedTiles) {
+        // In general, the pending retained tiles are consumed by the frame
+        // builder the first time a frame is built after a new scene has
+        // arrived. However, if two scenes arrive in quick succession, the
+        // frame builder may not have had a chance to build a frame and
+        // consume the pending tiles. In this case, the pending tiles will
+        // be lost, causing a full invalidation of the entire screen. To
+        // avoid this, if there are still pending tiles, include them in
+        // the retained tiles passed to the next frame builder.
+        self.pending_retained_tiles.merge(retained_tiles);
+    }
+
     /// Compute the contribution (bounding rectangles, and resources) of layers and their
     /// primitives in screen space.
     fn build_layer_screen_rects_and_cull_layers(
@@ -320,20 +263,25 @@ impl FrameBuilder {
         global_screen_world_rect: WorldRect,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
-        rg_builder: &mut RenderTaskGraphBuilder,
+        render_tasks: &mut RenderTaskGraph,
+        profile_counters: &mut FrameProfileCounters,
         global_device_pixel_scale: DevicePixelScale,
         scene_properties: &SceneProperties,
         transform_palette: &mut TransformPalette,
         data_stores: &mut DataStores,
-        scratch: &mut ScratchBuffer,
+        surfaces: &mut Vec<SurfaceInfo>,
+        scratch: &mut PrimitiveScratchBuffer,
         debug_flags: DebugFlags,
+        texture_cache_profile: &mut TextureCacheProfileCounters,
         composite_state: &mut CompositeState,
         tile_cache_logger: &mut TileCacheLogger,
-        tile_caches: &mut FastHashMap<SliceId, Box<TileCacheInstance>>,
-        profile: &mut TransactionProfile,
-    ) {
+    ) -> Option<RenderTaskId> {
         profile_scope!("build_layer_screen_rects_and_cull_layers");
 
+        if scene.prim_store.pictures.is_empty() {
+            return None
+        }
+
         scratch.begin_frame();
 
         let root_spatial_node_index = scene.spatial_tree.root_reference_frame_index();
@@ -353,6 +301,21 @@ impl FrameBuilder {
             fb_config: &scene.config,
         };
 
+        let root_render_task_id = render_tasks.add().init(
+            RenderTask::new_picture(
+                RenderTaskLocation::Fixed(scene.output_rect),
+                scene.output_rect.size.to_f32(),
+                scene.root_pic_index,
+                DeviceIntPoint::zero(),
+                UvRectKind::Rect,
+                ROOT_SPATIAL_NODE_INDEX,
+                global_device_pixel_scale,
+                PrimitiveVisibilityMask::all(),
+                None,
+                None,
+            )
+        );
+
         // Construct a dummy root surface, that represents the
         // main framebuffer surface.
         let root_surface = SurfaceInfo::new(
@@ -364,9 +327,13 @@ impl FrameBuilder {
             global_device_pixel_scale,
             (1.0, 1.0),
         );
-        let mut surfaces = scratch.frame.surfaces.take();
         surfaces.push(root_surface);
 
+        let mut retained_tiles = mem::replace(
+            &mut self.pending_retained_tiles,
+            RetainedTiles::new(),
+        );
+
         // The first major pass of building a frame is to walk the picture
         // tree. This pass must be quick (it should never touch individual
         // primitives). For now, all we do here is determine which pictures
@@ -374,151 +341,169 @@ impl FrameBuilder {
         // set up render tasks, determine scaling of surfaces, and detect
         // which surfaces have valid cached surfaces that don't need to
         // be rendered this frame.
-        for pic_index in &scene.tile_cache_pictures {
-            PictureUpdateState::update_all(
-                &mut scratch.picture,
-                &mut surfaces,
-                *pic_index,
-                &mut scene.prim_store.pictures,
-                &frame_context,
-                gpu_cache,
-                &scene.clip_store,
-                data_stores,
-            );
-        }
+        PictureUpdateState::update_all(
+            surfaces,
+            scene.root_pic_index,
+            &mut scene.prim_store.pictures,
+            &frame_context,
+            gpu_cache,
+            &scene.clip_store,
+            data_stores,
+            composite_state,
+        );
 
         {
             profile_scope!("UpdateVisibility");
             profile_marker!("UpdateVisibility");
-            profile.start_time(profiler::FRAME_VISIBILITY_TIME);
 
             let visibility_context = FrameVisibilityContext {
                 global_device_pixel_scale,
                 spatial_tree: &scene.spatial_tree,
                 global_screen_world_rect,
-                surfaces: &mut surfaces,
+                surfaces,
                 debug_flags,
                 scene_properties,
                 config: scene.config,
             };
 
             let mut visibility_state = FrameVisibilityState {
-                clip_chain_stack: scratch.frame.clip_chain_stack.take(),
-                surface_stack: scratch.frame.surface_stack.take(),
                 resource_cache,
                 gpu_cache,
                 clip_store: &mut scene.clip_store,
                 scratch,
                 tile_cache: None,
+                retained_tiles: &mut retained_tiles,
                 data_stores,
+                clip_chain_stack: ClipChainStack::new(),
+                render_tasks,
                 composite_state,
+                /// Try to avoid allocating during frame traversal - it's unlikely to have a
+                /// surface stack depth of > 16 in most cases.
+                surface_stack: Vec::with_capacity(16),
             };
 
-            for pic_index in scene.tile_cache_pictures.iter().rev() {
-                update_primitive_visibility(
-                    &mut scene.prim_store,
-                    *pic_index,
-                    ROOT_SURFACE_INDEX,
-                    &global_screen_world_rect,
-                    &visibility_context,
-                    &mut visibility_state,
-                    tile_caches,
-                    true,
-                );
+            scene.prim_store.update_visibility(
+                scene.root_pic_index,
+                ROOT_SURFACE_INDEX,
+                &global_screen_world_rect,
+                &visibility_context,
+                &mut visibility_state,
+            );
+
+            // When there are tiles that are left remaining in the `retained_tiles`,
+            // dirty rects are not valid.
+            if !visibility_state.retained_tiles.caches.is_empty() {
+              visibility_state.composite_state.dirty_rects_are_valid = false;
             }
 
-            visibility_state.scratch.frame.clip_chain_stack = visibility_state.clip_chain_stack.take();
-            visibility_state.scratch.frame.surface_stack = visibility_state.surface_stack.take();
+            // When a new display list is processed by WR, the existing tiles from
+            // any picture cache are stored in the `retained_tiles` field above. This
+            // allows the first frame of a new display list to reuse any existing tiles
+            // and surfaces that match. Once the `update_visibility` call above is
+            // complete, any tiles that are left remaining in the `retained_tiles`
+            // map are not needed and will be dropped. For simple compositing mode,
+            // this is fine, since texture cache handles are garbage collected at
+            // the end of each frame. However, if we're in native compositor mode,
+            // we need to manually clean up any native compositor surfaces that were
+            // allocated by these tiles.
+            for (_, mut cache_state) in visibility_state.retained_tiles.caches.drain() {
+                if let Some(native_surface) = cache_state.native_surface.take() {
+                    visibility_state.resource_cache.destroy_compositor_surface(native_surface.opaque);
+                    visibility_state.resource_cache.destroy_compositor_surface(native_surface.alpha);
+                }
 
-            profile.end_time(profiler::FRAME_VISIBILITY_TIME);
+                for (_, external_surface) in cache_state.external_native_surface_cache.drain() {
+                    visibility_state.resource_cache.destroy_compositor_surface(external_surface.native_surface_id)
+                }
+            }
         }
 
-        profile.start_time(profiler::FRAME_PREPARE_TIME);
-
         let mut frame_state = FrameBuildingState {
-            rg_builder,
+            render_tasks,
+            profile_counters,
             clip_store: &mut scene.clip_store,
             resource_cache,
             gpu_cache,
             transforms: transform_palette,
             segment_builder: SegmentBuilder::new(),
-            surfaces: &mut surfaces,
-            dirty_region_stack: scratch.frame.dirty_region_stack.take(),
+            surfaces,
+            dirty_region_stack: Vec::new(),
             composite_state,
-            num_visible_primitives: 0,
         };
 
+        frame_state
+            .surfaces
+            .first_mut()
+            .unwrap()
+            .render_tasks = Some(SurfaceRenderTasks {
+                root: root_render_task_id,
+                port: root_render_task_id,
+            });
+
         // Push a default dirty region which culls primitives
         // against the screen world rect, in absence of any
         // other dirty regions.
-        let mut default_dirty_region = DirtyRegion::new(
-            ROOT_SPATIAL_NODE_INDEX,
-        );
-        default_dirty_region.add_dirty_region(
-            frame_context.global_screen_world_rect.cast_unit(),
-            SubSliceIndex::DEFAULT,
-            frame_context.spatial_tree,
+        let mut default_dirty_region = DirtyRegion::new();
+        default_dirty_region.push(
+            frame_context.global_screen_world_rect,
+            PrimitiveVisibilityMask::all(),
         );
         frame_state.push_dirty_region(default_dirty_region);
 
-        for pic_index in &scene.tile_cache_pictures {
-            if let Some((pic_context, mut pic_state, mut prim_list)) = scene
-                .prim_store
-                .pictures[pic_index.0]
-                .take_context(
-                    *pic_index,
-                    root_spatial_node_index,
-                    root_spatial_node_index,
-                    ROOT_SURFACE_INDEX,
-                    SubpixelMode::Allow,
-                    &mut frame_state,
-                    &frame_context,
-                    &mut scratch.primitive,
-                    tile_cache_logger,
-                    tile_caches,
-                )
-            {
-                profile_marker!("PreparePrims");
-
-                prepare_primitives(
-                    &mut scene.prim_store,
-                    &mut prim_list,
-                    &pic_context,
-                    &mut pic_state,
-                    &frame_context,
-                    &mut frame_state,
-                    data_stores,
-                    &mut scratch.primitive,
-                    tile_cache_logger,
-                    tile_caches,
-                );
+        let (pic_context, mut pic_state, mut prim_list) = scene
+            .prim_store
+            .pictures[scene.root_pic_index.0]
+            .take_context(
+                scene.root_pic_index,
+                WorldRect::max_rect(),
+                root_spatial_node_index,
+                root_spatial_node_index,
+                ROOT_SURFACE_INDEX,
+                &SubpixelMode::Allow,
+                &mut frame_state,
+                &frame_context,
+                scratch,
+                tile_cache_logger
+            )
+            .unwrap();
 
-                let pic = &mut scene.prim_store.pictures[pic_index.0];
-                pic.restore_context(
-                    prim_list,
-                    pic_context,
-                    pic_state,
-                    &mut frame_state,
-                );
-            }
+        tile_cache_logger.advance();
+
+        {
+            profile_marker!("PreparePrims");
+
+            scene.prim_store.prepare_primitives(
+                &mut prim_list,
+                &pic_context,
+                &mut pic_state,
+                &frame_context,
+                &mut frame_state,
+                data_stores,
+                scratch,
+                tile_cache_logger,
+            );
         }
 
-        tile_cache_logger.advance();
-        frame_state.pop_dirty_region();
-        profile.end_time(profiler::FRAME_PREPARE_TIME);
-        profile.set(profiler::VISIBLE_PRIMITIVES, frame_state.num_visible_primitives);
+        let pic = &mut scene.prim_store.pictures[scene.root_pic_index.0];
+        pic.restore_context(
+            ROOT_SURFACE_INDEX,
+            prim_list,
+            pic_context,
+            pic_state,
+            &mut frame_state,
+        );
 
-        scratch.frame.dirty_region_stack = frame_state.dirty_region_stack.take();
-        scratch.frame.surfaces = surfaces.take();
+        frame_state.pop_dirty_region();
 
         {
             profile_marker!("BlockOnResources");
 
-            resource_cache.block_until_all_resources_added(
-                gpu_cache,
-                profile,
-            );
+            resource_cache.block_until_all_resources_added(gpu_cache,
+                                                           render_tasks,
+                                                           texture_cache_profile);
         }
+
+        Some(root_render_task_id)
     }
 
     pub fn build(
@@ -526,26 +511,28 @@ impl FrameBuilder {
         scene: &mut BuiltScene,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
-        rg_builder: &mut RenderTaskGraphBuilder,
         stamp: FrameStamp,
         global_device_pixel_scale: DevicePixelScale,
+        layer: DocumentLayer,
         device_origin: DeviceIntPoint,
         pan: WorldPoint,
+        resource_profile: &mut ResourceProfileCounters,
         scene_properties: &SceneProperties,
         data_stores: &mut DataStores,
-        scratch: &mut ScratchBuffer,
+        scratch: &mut PrimitiveScratchBuffer,
+        render_task_counters: &mut RenderTaskGraphCounters,
         debug_flags: DebugFlags,
         tile_cache_logger: &mut TileCacheLogger,
-        tile_caches: &mut FastHashMap<SliceId, Box<TileCacheInstance>>,
-        dirty_rects_are_valid: bool,
-        profile: &mut TransactionProfile,
     ) -> Frame {
         profile_scope!("build");
         profile_marker!("BuildFrame");
 
-        profile.set(profiler::PRIMITIVES, scene.prim_store.prim_count());
-        profile.set(profiler::PICTURE_CACHE_SLICES, scene.tile_cache_config.picture_cache_slice_count);
-        resource_cache.begin_frame(stamp, profile);
+        let mut profile_counters = FrameProfileCounters::new();
+        profile_counters
+            .total_primitives
+            .set(scene.prim_store.prim_count());
+        resource_profile.content_slices.set(scene.content_slice_count);
+        resource_cache.begin_frame(stamp);
         gpu_cache.begin_frame(stamp);
 
         self.globals.update(gpu_cache);
@@ -556,66 +543,78 @@ impl FrameBuilder {
             scene_properties,
         );
         let mut transform_palette = scene.spatial_tree.build_transform_palette();
-        scene.clip_store.begin_frame(&mut scratch.clip_store);
+        scene.clip_store.clear_old_instances();
 
-        rg_builder.begin_frame(stamp.frame_id());
+        let mut render_tasks = RenderTaskGraph::new(
+            stamp.frame_id(),
+            render_task_counters,
+        );
+        let mut surfaces = Vec::new();
 
         let output_size = scene.output_rect.size.to_i32();
         let screen_world_rect = (scene.output_rect.to_f32() / global_device_pixel_scale).round_out();
 
+        // Determine if we will draw this frame with picture caching enabled. This depends on:
+        // (1) If globally enabled when WR was initialized
+        // (2) If current debug flags allow picture caching
+        // (3) Whether we are currently pinch zooming
+        // (4) If any picture cache spatial nodes are not in the root coordinate system
+        let picture_caching_is_enabled =
+            scene.config.global_enable_picture_caching &&
+            !debug_flags.contains(DebugFlags::DISABLE_PICTURE_CACHING) &&
+            !scene.picture_cache_spatial_nodes.iter().any(|spatial_node_index| {
+                let spatial_node = &scene
+                    .spatial_tree
+                    .spatial_nodes[spatial_node_index.0 as usize];
+                spatial_node.is_ancestor_or_self_zooming
+            });
+
         let mut composite_state = CompositeState::new(
             scene.config.compositor_kind,
+            picture_caching_is_enabled,
             global_device_pixel_scale,
             scene.config.max_depth_ids,
-            dirty_rects_are_valid,
         );
 
-        self.composite_state_prealloc.preallocate(&mut composite_state);
-
-        self.build_layer_screen_rects_and_cull_layers(
+        let main_render_task_id = self.build_layer_screen_rects_and_cull_layers(
             scene,
             screen_world_rect,
             resource_cache,
             gpu_cache,
-            rg_builder,
+            &mut render_tasks,
+            &mut profile_counters,
             global_device_pixel_scale,
             scene_properties,
             &mut transform_palette,
             data_stores,
+            &mut surfaces,
             scratch,
             debug_flags,
+            &mut resource_profile.texture_cache,
             &mut composite_state,
             tile_cache_logger,
-            tile_caches,
-            profile,
         );
 
-        profile.start_time(profiler::FRAME_BATCHING_TIME);
-
+        let mut passes;
         let mut deferred_resolves = vec![];
-
-        // Finish creating the frame graph and build it.
-        let render_tasks = rg_builder.end_frame(
-            resource_cache,
-            gpu_cache,
-            &mut deferred_resolves,
-        );
-
-        let mut passes = Vec::new();
         let mut has_texture_cache_tasks = false;
         let mut prim_headers = PrimitiveHeaders::new();
-        self.prim_headers_prealloc.preallocate_vec(&mut prim_headers.headers_int);
-        self.prim_headers_prealloc.preallocate_vec(&mut prim_headers.headers_float);
 
         {
             profile_marker!("Batching");
 
+            passes = render_tasks.generate_passes(
+                main_render_task_id,
+                output_size,
+                scene.config.gpu_supports_fast_clears,
+            );
+
             // Used to generated a unique z-buffer value per primitive.
-            let mut z_generator = ZBufferIdGenerator::new(scene.config.max_depth_ids);
+            let mut z_generator = ZBufferIdGenerator::new(layer, scene.config.max_depth_ids);
             let use_dual_source_blending = scene.config.dual_source_blending_is_enabled &&
                                            scene.config.dual_source_blending_is_supported;
 
-            for pass in render_tasks.passes.iter().rev() {
+            for pass in &mut passes {
                 let mut ctx = RenderTargetContext {
                     global_device_pixel_scale,
                     prim_store: &scene.prim_store,
@@ -626,77 +625,53 @@ impl FrameBuilder {
                     batch_lookback_count: scene.config.batch_lookback_count,
                     spatial_tree: &scene.spatial_tree,
                     data_stores,
-                    surfaces: &scratch.frame.surfaces,
-                    scratch: &mut scratch.primitive,
+                    surfaces: &surfaces,
+                    scratch,
                     screen_world_rect,
                     globals: &self.globals,
-                    tile_caches,
                 };
 
-                let pass = build_render_pass(
+                build_render_pass(
                     pass,
-                    output_size,
                     &mut ctx,
                     gpu_cache,
-                    &render_tasks,
+                    &mut render_tasks,
                     &mut deferred_resolves,
                     &scene.clip_store,
                     &mut transform_palette,
                     &mut prim_headers,
                     &mut z_generator,
                     &mut composite_state,
-                    scene.config.gpu_supports_fast_clears,
                 );
 
-                has_texture_cache_tasks |= !pass.texture_cache.is_empty();
-                has_texture_cache_tasks |= !pass.picture_cache.is_empty();
-
-                passes.push(pass);
+                match pass.kind {
+                    RenderPassKind::MainFramebuffer { .. } => {}
+                    RenderPassKind::OffScreen {
+                        ref texture_cache,
+                        ref picture_cache,
+                        ..
+                    } => {
+                        has_texture_cache_tasks |= !texture_cache.is_empty();
+                        has_texture_cache_tasks |= !picture_cache.is_empty();
+                    }
+                }
             }
-
-            let mut ctx = RenderTargetContext {
-                global_device_pixel_scale,
-                prim_store: &scene.prim_store,
-                resource_cache,
-                use_dual_source_blending,
-                use_advanced_blending: scene.config.gpu_supports_advanced_blend,
-                break_advanced_blend_batches: !scene.config.advanced_blend_is_coherent,
-                batch_lookback_count: scene.config.batch_lookback_count,
-                spatial_tree: &scene.spatial_tree,
-                data_stores,
-                surfaces: &scratch.frame.surfaces,
-                scratch: &mut scratch.primitive,
-                screen_world_rect,
-                globals: &self.globals,
-                tile_caches,
-            };
-
-            self.build_composite_pass(
-                scene,
-                &mut ctx,
-                gpu_cache,
-                &mut deferred_resolves,
-                &mut composite_state,
-            );
         }
 
-        profile.end_time(profiler::FRAME_BATCHING_TIME);
+        let gpu_cache_frame_id = gpu_cache.end_frame(&mut resource_profile.gpu_cache).frame_id();
 
-        let gpu_cache_frame_id = gpu_cache.end_frame(profile).frame_id();
-
-        resource_cache.end_frame(profile);
-
-        self.prim_headers_prealloc.record_vec(&mut prim_headers.headers_int);
-        self.composite_state_prealloc.record(&composite_state);
-
-        composite_state.end_frame();
-        scene.clip_store.end_frame(&mut scratch.clip_store);
+        render_tasks.write_task_data();
+        *render_task_counters = render_tasks.counters();
+        resource_cache.end_frame(&mut resource_profile.texture_cache);
 
         Frame {
+            content_origin: scene.output_rect.origin,
             device_rect: DeviceIntRect::new(
                 device_origin,
                 scene.output_rect.size,
             ),
+            layer,
+            profile_counters,
             passes,
             transform_palette: transform_palette.finish(),
             render_tasks,
@@ -705,54 +680,11 @@ impl FrameBuilder {
             has_been_rendered: false,
             has_texture_cache_tasks,
             prim_headers,
-            debug_items: mem::replace(&mut scratch.primitive.debug_items, Vec::new()),
+            recorded_dirty_regions: mem::replace(&mut scratch.recorded_dirty_regions, Vec::new()),
+            debug_items: mem::replace(&mut scratch.debug_items, Vec::new()),
             composite_state,
         }
     }
-
-    fn build_composite_pass(
-        &self,
-        scene: &BuiltScene,
-        ctx: &RenderTargetContext,
-        gpu_cache: &mut GpuCache,
-        deferred_resolves: &mut Vec<DeferredResolve>,
-        composite_state: &mut CompositeState,
-    ) {
-        for pic_index in &scene.tile_cache_pictures {
-            let pic = &ctx.prim_store.pictures[pic_index.0];
-
-            match pic.raster_config {
-                Some(RasterConfig { composite_mode: PictureCompositeMode::TileCache { slice_id }, .. }) => {
-                    // Tile cache instances are added to the composite config, rather than
-                    // directly added to batches. This allows them to be drawn with various
-                    // present modes during render, such as partial present etc.
-                    let tile_cache = &ctx.tile_caches[&slice_id];
-                    let map_local_to_world = SpaceMapper::new_with_target(
-                        ROOT_SPATIAL_NODE_INDEX,
-                        tile_cache.spatial_node_index,
-                        ctx.screen_world_rect,
-                        ctx.spatial_tree,
-                    );
-                    let world_clip_rect = map_local_to_world
-                        .map(&tile_cache.local_clip_rect)
-                        .expect("bug: unable to map clip rect");
-                    let device_clip_rect = (world_clip_rect * ctx.global_device_pixel_scale).round();
-
-                    composite_state.push_surface(
-                        tile_cache,
-                        device_clip_rect,
-                        ctx.global_device_pixel_scale,
-                        ctx.resource_cache,
-                        gpu_cache,
-                        deferred_resolves,
-                    );
-                }
-                _ => {
-                    panic!("bug: found a top-level prim that isn't a tile cache");
-                }
-            }
-        }
-    }
 }
 
 /// Processes this pass to prepare it for rendering.
@@ -761,269 +693,328 @@ impl FrameBuilder {
 /// (added via `add_render_task`) in a RenderTarget and assigns it into that
 /// target.
 pub fn build_render_pass(
-    src_pass: &Pass,
-    screen_size: DeviceIntSize,
+    pass: &mut RenderPass,
     ctx: &mut RenderTargetContext,
     gpu_cache: &mut GpuCache,
-    render_tasks: &RenderTaskGraph,
+    render_tasks: &mut RenderTaskGraph,
     deferred_resolves: &mut Vec<DeferredResolve>,
     clip_store: &ClipStore,
     transforms: &mut TransformPalette,
     prim_headers: &mut PrimitiveHeaders,
     z_generator: &mut ZBufferIdGenerator,
     composite_state: &mut CompositeState,
-    gpu_supports_fast_clears: bool,
-) -> RenderPass {
+) {
     profile_scope!("build_render_pass");
 
-    // TODO(gw): In this initial frame graph work, we try to maintain the existing
-    //           build_render_pass code as closely as possible, to make the review
-    //           simpler and reduce chance of regressions. However, future work should
-    //           include refactoring this to more closely match the built frame graph.
-
-    // Collect a list of picture cache tasks, keyed by picture index.
-    // This allows us to only walk that picture root once, adding the
-    // primitives to all relevant batches at the same time.
-    let mut picture_cache_tasks = FastHashMap::default();
-    let mut pass = RenderPass::new(src_pass);
-
-    for sub_pass in &src_pass.sub_passes {
-        match sub_pass.surface {
-            SubPassSurface::Dynamic { target_kind, texture_id, used_rect } => {
-                match target_kind {
-                    RenderTargetKind::Color => {
-                        let mut target = ColorRenderTarget::new(
-                            texture_id,
-                            screen_size,
-                            gpu_supports_fast_clears,
-                            used_rect,
-                        );
-
-                        for task_id in &sub_pass.task_ids {
-                            target.add_task(
-                                *task_id,
-                                ctx,
-                                gpu_cache,
-                                render_tasks,
-                                clip_store,
-                                transforms,
-                            );
+    match pass.kind {
+        RenderPassKind::MainFramebuffer { ref mut main_target, .. } => {
+            profile_scope!("MainFrameBuffer");
+            for &task_id in &pass.tasks {
+                profile_scope!("task");
+                assert_eq!(render_tasks[task_id].target_kind(), RenderTargetKind::Color);
+                main_target.add_task(
+                    task_id,
+                    ctx,
+                    gpu_cache,
+                    render_tasks,
+                    clip_store,
+                    transforms,
+                    deferred_resolves,
+                );
+            }
+            main_target.build(
+                ctx,
+                gpu_cache,
+                render_tasks,
+                deferred_resolves,
+                prim_headers,
+                transforms,
+                z_generator,
+                composite_state,
+            );
+        }
+        RenderPassKind::OffScreen {
+            ref mut color,
+            ref mut alpha,
+            ref mut texture_cache,
+            ref mut picture_cache,
+        } => {
+            profile_scope!("OffScreen");
+            let saved_color = if pass.tasks.iter().any(|&task_id| {
+                let t = &render_tasks[task_id];
+                t.target_kind() == RenderTargetKind::Color && t.saved_index.is_some()
+            }) {
+                Some(render_tasks.save_target())
+            } else {
+                None
+            };
+            let saved_alpha = if pass.tasks.iter().any(|&task_id| {
+                let t = &render_tasks[task_id];
+                t.target_kind() == RenderTargetKind::Alpha && t.saved_index.is_some()
+            }) {
+                Some(render_tasks.save_target())
+            } else {
+                None
+            };
+
+            // Collect a list of picture cache tasks, keyed by picture index.
+            // This allows us to only walk that picture root once, adding the
+            // primitives to all relevant batches at the same time.
+            let mut picture_cache_tasks = FastHashMap::default();
+
+            // Step through each task, adding to batches as appropriate.
+            for &task_id in &pass.tasks {
+                let (target_kind, texture_target, layer) = {
+                    let task = &mut render_tasks[task_id];
+                    let target_kind = task.target_kind();
+
+                    // Find a target to assign this task to, or create a new
+                    // one if required.
+                    let (texture_target, layer) = match task.location {
+                        RenderTaskLocation::TextureCache { texture, layer, .. } => {
+                            (Some(texture), layer)
+                        }
+                        RenderTaskLocation::Fixed(..) => {
+                            (None, 0)
+                        }
+                        RenderTaskLocation::Dynamic(ref mut origin, size) => {
+                            let (target_index, alloc_origin) =  match target_kind {
+                                RenderTargetKind::Color => color.allocate(size),
+                                RenderTargetKind::Alpha => alpha.allocate(size),
+                            };
+                            *origin = Some((alloc_origin, target_index));
+                            (None, target_index.0)
+                        }
+                        RenderTaskLocation::PictureCache { .. } => {
+                            // For picture cache tiles, just store them in the map
+                            // of picture cache tasks, to be handled below.
+                            let pic_index = match task.kind {
+                                RenderTaskKind::Picture(ref info) => {
+                                    info.pic_index
+                                }
+                                _ => {
+                                    unreachable!();
+                                }
+                            };
+
+                            picture_cache_tasks
+                                .entry(pic_index)
+                                .or_insert_with(Vec::new)
+                                .push(task_id);
+
+                            continue;
                         }
+                    };
 
-                        pass.color.targets.push(target);
+                    // Replace the pending saved index with a real one
+                    if let Some(index) = task.saved_index {
+                        assert_eq!(index, SavedTargetIndex::PENDING);
+                        task.saved_index = match target_kind {
+                            RenderTargetKind::Color => saved_color,
+                            RenderTargetKind::Alpha => saved_alpha,
+                        };
                     }
-                    RenderTargetKind::Alpha => {
-                        let mut target = AlphaRenderTarget::new(
-                            texture_id,
-                            screen_size,
-                            gpu_supports_fast_clears,
-                            used_rect,
-                        );
-
-                        for task_id in &sub_pass.task_ids {
-                            target.add_task(
-                                *task_id,
-                                ctx,
-                                gpu_cache,
-                                render_tasks,
-                                clip_store,
-                                transforms,
+
+                    // Give the render task an opportunity to add any
+                    // information to the GPU cache, if appropriate.
+                    task.write_gpu_blocks(gpu_cache);
+
+                    (target_kind, texture_target, layer)
+                };
+
+                match texture_target {
+                    Some(texture_target) => {
+                        let texture = texture_cache
+                            .entry((texture_target, layer))
+                            .or_insert_with(||
+                                TextureCacheRenderTarget::new(target_kind)
                             );
+                        texture.add_task(task_id, render_tasks);
+                    }
+                    None => {
+                        match target_kind {
+                            RenderTargetKind::Color => {
+                                color.targets[layer].add_task(
+                                    task_id,
+                                    ctx,
+                                    gpu_cache,
+                                    render_tasks,
+                                    clip_store,
+                                    transforms,
+                                    deferred_resolves,
+                                )
+                            }
+                            RenderTargetKind::Alpha => {
+                                alpha.targets[layer].add_task(
+                                    task_id,
+                                    ctx,
+                                    gpu_cache,
+                                    render_tasks,
+                                    clip_store,
+                                    transforms,
+                                    deferred_resolves,
+                                )
+                            }
                         }
-
-                        pass.alpha.targets.push(target);
                     }
                 }
             }
-            SubPassSurface::Persistent { surface: StaticRenderTaskSurface::PictureCache { .. }, .. } => {
-                assert_eq!(sub_pass.task_ids.len(), 1);
-                let task_id = sub_pass.task_ids[0];
-                let task = &render_tasks[task_id];
-
-                // For picture cache tiles, just store them in the map
-                // of picture cache tasks, to be handled below.
-                let pic_index = match task.kind {
-                    RenderTaskKind::Picture(ref info) => {
-                        info.pic_index
+
+            // For each picture in this pass that has picture cache tiles, create
+            // a batcher per task, and then build batches for each of the tasks
+            // at the same time.
+            for (pic_index, task_ids) in picture_cache_tasks {
+                profile_scope!("picture_cache_task");
+                let pic = &ctx.prim_store.pictures[pic_index.0];
+                let tile_cache = pic.tile_cache.as_ref().expect("bug");
+
+                // Extract raster/surface spatial nodes for this surface.
+                let (root_spatial_node_index, surface_spatial_node_index) = match pic.raster_config {
+                    Some(ref rc) => {
+                        let surface = &ctx.surfaces[rc.surface_index.0];
+                        (surface.raster_spatial_node_index, surface.surface_spatial_node_index)
                     }
-                    _ => {
+                    None => {
                         unreachable!();
                     }
                 };
 
-                picture_cache_tasks
-                    .entry(pic_index)
-                    .or_insert_with(Vec::new)
-                    .push(task_id);
-            }
-            SubPassSurface::Persistent { surface: StaticRenderTaskSurface::TextureCache { target_kind, texture, .. } } => {
-                let texture = pass.texture_cache
-                    .entry(texture)
-                    .or_insert_with(||
-                        TextureCacheRenderTarget::new(target_kind)
-                    );
-                for task_id in &sub_pass.task_ids {
-                    texture.add_task(*task_id, render_tasks, gpu_cache);
-                }
-            }
-            SubPassSurface::Persistent { surface: StaticRenderTaskSurface::ReadOnly { .. } } => {
-                panic!("Should not create a render pass for read-only task locations.");
-            }
-        }
-    }
+                // Determine the clear color for this picture cache.
+                // If the entire tile cache is opaque, we can skip clear completely.
+                // If it's the first layer, clear it to white to allow subpixel AA on that
+                // first layer even if it's technically transparent.
+                // Otherwise, clear to transparent and composite with alpha.
+                // TODO(gw): We can detect per-tile opacity for the clear color here
+                //           which might be a significant win on some pages?
+                let forced_opaque = match tile_cache.background_color {
+                    Some(color) => color.a >= 1.0,
+                    None => false,
+                };
+                let mut clear_color = if forced_opaque {
+                    Some(ColorF::WHITE)
+                } else {
+                    Some(ColorF::TRANSPARENT)
+                };
 
-    // For each picture in this pass that has picture cache tiles, create
-    // a batcher per task, and then build batches for each of the tasks
-    // at the same time.
-    for (pic_index, task_ids) in picture_cache_tasks {
-        profile_scope!("picture_cache_task");
-        let pic = &ctx.prim_store.pictures[pic_index.0];
-
-        // Extract raster/surface spatial nodes for this surface.
-        let (root_spatial_node_index, surface_spatial_node_index, tile_cache) = match pic.raster_config {
-            Some(RasterConfig { surface_index, composite_mode: PictureCompositeMode::TileCache { slice_id }, .. }) => {
-                let surface = &ctx.surfaces[surface_index.0];
-                (
-                    surface.raster_spatial_node_index,
-                    surface.surface_spatial_node_index,
-                    &ctx.tile_caches[&slice_id],
-                )
-            }
-            _ => {
-                unreachable!();
-            }
-        };
+                // If this picture cache has a valid color backdrop, we will use
+                // that as the clear color, skipping the draw of the backdrop
+                // primitive (and anything prior to it) during batching.
+                if let Some(BackdropKind::Color { color }) = tile_cache.backdrop.kind {
+                    clear_color = Some(color);
+                }
 
-        // Create an alpha batcher for each of the tasks of this picture.
-        let mut batchers = Vec::new();
-        for task_id in &task_ids {
-            let task_id = *task_id;
-            let batch_filter = match render_tasks[task_id].kind {
-                RenderTaskKind::Picture(ref info) => info.batch_filter,
-                _ => unreachable!(),
-            };
-            batchers.push(AlphaBatchBuilder::new(
-                screen_size,
-                ctx.break_advanced_blend_batches,
-                ctx.batch_lookback_count,
-                task_id,
-                task_id.into(),
-                batch_filter,
-                0,
-            ));
-        }
+                // Create an alpha batcher for each of the tasks of this picture.
+                let mut batchers = Vec::new();
+                for task_id in &task_ids {
+                    let task_id = *task_id;
+                    let vis_mask = match render_tasks[task_id].kind {
+                        RenderTaskKind::Picture(ref info) => info.vis_mask,
+                        _ => unreachable!(),
+                    };
+                    batchers.push(AlphaBatchBuilder::new(
+                        pass.screen_size,
+                        ctx.break_advanced_blend_batches,
+                        ctx.batch_lookback_count,
+                        task_id,
+                        render_tasks.get_task_address(task_id),
+                        vis_mask,
+                    ));
+                }
 
-        // Run the batch creation code for this picture, adding items to
-        // all relevant per-task batchers.
-        let mut batch_builder = BatchBuilder::new(batchers);
-        {
-        profile_scope!("add_pic_to_batch");
-        batch_builder.add_pic_to_batch(
-            pic,
-            ctx,
-            gpu_cache,
-            render_tasks,
-            deferred_resolves,
-            prim_headers,
-            transforms,
-            root_spatial_node_index,
-            surface_spatial_node_index,
-            z_generator,
-            composite_state,
-        );
-        }
+                // Run the batch creation code for this picture, adding items to
+                // all relevant per-task batchers.
+                let mut batch_builder = BatchBuilder::new(batchers);
+                {
+                profile_scope!("add_pic_to_batch");
+                batch_builder.add_pic_to_batch(
+                    pic,
+                    ctx,
+                    gpu_cache,
+                    render_tasks,
+                    deferred_resolves,
+                    prim_headers,
+                    transforms,
+                    root_spatial_node_index,
+                    surface_spatial_node_index,
+                    z_generator,
+                    composite_state,
+                );
+                }
 
-        // Create picture cache targets, one per render task, and assign
-        // the correct batcher to them.
-        let batchers = batch_builder.finalize();
-        for (task_id, batcher) in task_ids.into_iter().zip(batchers.into_iter()) {
-            profile_scope!("task");
-            let task = &render_tasks[task_id];
-            let target_rect = task.get_target_rect();
-
-            match task.location {
-                RenderTaskLocation::Static { surface: StaticRenderTaskSurface::PictureCache { ref surface, .. }, .. } => {
-                    // TODO(gw): The interface here is a bit untidy since it's
-                    //           designed to support batch merging, which isn't
-                    //           relevant for picture cache targets. We
-                    //           can restructure / tidy this up a bit.
-                    let (scissor_rect, valid_rect, clear_color)  = match render_tasks[task_id].kind {
-                        RenderTaskKind::Picture(ref info) => {
-                            let mut clear_color = ColorF::TRANSPARENT;
-
-                            // TODO(gw): The way we check the batch filter for is_primary is a bit hacky, tidy up somehow?
-                            if let Some(batch_filter) = info.batch_filter {
-                                if batch_filter.sub_slice_index.is_primary() {
-                                    if let Some(background_color) = tile_cache.background_color {
-                                        clear_color = background_color;
-                                    }
-
-                                    // If this picture cache has a valid color backdrop, we will use
-                                    // that as the clear color, skipping the draw of the backdrop
-                                    // primitive (and anything prior to it) during batching.
-                                    if let Some(BackdropKind::Color { color }) = tile_cache.backdrop.kind {
-                                        clear_color = color;
-                                    }
+                // Create picture cache targets, one per render task, and assign
+                // the correct batcher to them.
+                let batchers = batch_builder.finalize();
+                for (task_id, batcher) in task_ids.into_iter().zip(batchers.into_iter()) {
+                    profile_scope!("task");
+                    let task = &render_tasks[task_id];
+                    let (target_rect, _) = task.get_target_rect();
+
+                    match task.location {
+                        RenderTaskLocation::PictureCache { ref surface, .. } => {
+                            // TODO(gw): The interface here is a bit untidy since it's
+                            //           designed to support batch merging, which isn't
+                            //           relevant for picture cache targets. We
+                            //           can restructure / tidy this up a bit.
+                            let (scissor_rect, valid_rect)  = match render_tasks[task_id].kind {
+                                RenderTaskKind::Picture(ref info) => {
+                                    (
+                                        info.scissor_rect.expect("bug: must be set for cache tasks"),
+                                        info.valid_rect.expect("bug: must be set for cache tasks"),
+                                    )
                                 }
-                            }
+                                _ => unreachable!(),
+                            };
+                            let mut batch_containers = Vec::new();
+                            let mut alpha_batch_container = AlphaBatchContainer::new(Some(scissor_rect));
+                            batcher.build(
+                                &mut batch_containers,
+                                &mut alpha_batch_container,
+                                target_rect,
+                                None,
+                            );
+                            debug_assert!(batch_containers.is_empty());
 
-                            (
-                                info.scissor_rect.expect("bug: must be set for cache tasks"),
-                                info.valid_rect.expect("bug: must be set for cache tasks"),
+                            let target = PictureCacheTarget {
+                                surface: surface.clone(),
                                 clear_color,
-                            )
-                        }
-                        _ => unreachable!(),
-                    };
-                    let mut batch_containers = Vec::new();
-                    let mut alpha_batch_container = AlphaBatchContainer::new(Some(scissor_rect));
-                    batcher.build(
-                        &mut batch_containers,
-                        &mut alpha_batch_container,
-                        target_rect,
-                        None,
-                    );
-                    debug_assert!(batch_containers.is_empty());
-
-                    let target = PictureCacheTarget {
-                        surface: surface.clone(),
-                        clear_color: Some(clear_color),
-                        alpha_batch_container,
-                        dirty_rect: scissor_rect,
-                        valid_rect,
-                    };
+                                alpha_batch_container,
+                                dirty_rect: scissor_rect,
+                                valid_rect,
+                            };
 
-                    pass.picture_cache.push(target);
-                }
-                _ => {
-                    unreachable!()
+                            picture_cache.push(target);
+                        }
+                        _ => {
+                            unreachable!()
+                        }
+                    }
                 }
             }
+
+            color.build(
+                ctx,
+                gpu_cache,
+                render_tasks,
+                deferred_resolves,
+                saved_color,
+                prim_headers,
+                transforms,
+                z_generator,
+                composite_state,
+            );
+            alpha.build(
+                ctx,
+                gpu_cache,
+                render_tasks,
+                deferred_resolves,
+                saved_alpha,
+                prim_headers,
+                transforms,
+                z_generator,
+                composite_state,
+            );
         }
     }
-
-    pass.color.build(
-        ctx,
-        gpu_cache,
-        render_tasks,
-        deferred_resolves,
-        prim_headers,
-        transforms,
-        z_generator,
-        composite_state,
-    );
-    pass.alpha.build(
-        ctx,
-        gpu_cache,
-        render_tasks,
-        deferred_resolves,
-        prim_headers,
-        transforms,
-        z_generator,
-        composite_state,
-    );
-
-    pass
 }
 
 /// A rendering-oriented representation of the frame built by the render backend
@@ -1031,9 +1022,14 @@ pub fn build_render_pass(
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct Frame {
+    /// The origin on content produced by the render tasks.
+    pub content_origin: DeviceIntPoint,
     /// The rectangle to show the frame in, on screen.
     pub device_rect: DeviceIntRect,
+    pub layer: DocumentLayer,
     pub passes: Vec<RenderPass>,
+    #[cfg_attr(any(feature = "capture", feature = "replay"), serde(default = "FrameProfileCounters::new", skip))]
+    pub profile_counters: FrameProfileCounters,
 
     pub transform_palette: Vec<TransformData>,
     pub render_tasks: RenderTaskGraph,
@@ -1056,6 +1052,11 @@ pub struct Frame {
     /// renderer.
     pub has_been_rendered: bool,
 
+    /// Dirty regions recorded when generating this frame. Empty when not in
+    /// testing.
+    #[cfg_attr(feature = "serde", serde(skip))]
+    pub recorded_dirty_regions: Vec<RecordedDirtyRegion>,
+
     /// Debugging information to overlay for this frame.
     pub debug_items: Vec<DebugItem>,
 
@@ -1074,44 +1075,23 @@ impl Frame {
 
     // Returns true if this frame doesn't alter what is on screen currently.
     pub fn is_nop(&self) -> bool {
-        // If there are no off-screen passes, that implies that there are no
-        // picture cache tiles, and no texture cache tasks being updates. If this
-        // is the case, we can consider the frame a nop (higher level checks
-        // test if a composite is needed due to picture cache surfaces moving
-        // or external surfaces being updated).
-        self.passes.is_empty()
-    }
-}
-
-/// Add a child render task as a dependency to a surface. This is a free
-/// function for now as it's also used by the render task cache.
-// TODO(gw): Find a more appropriate place for this to live - probably clearer
-//           once SurfaceInfo gets refactored.
-pub fn add_child_render_task(
-    surface_index: SurfaceIndex,
-    child_task_id: RenderTaskId,
-    surfaces: &[SurfaceInfo],
-    rg_builder: &mut RenderTaskGraphBuilder,
-) {
-    let surface_tasks = surfaces[surface_index.0]
-        .render_tasks
-        .as_ref()
-        .expect("bug: no task for surface");
-
-    match surface_tasks {
-        SurfaceRenderTasks::Tiled(ref tasks) => {
-            // For a tiled render task, add as a dependency to every tile.
-            for parent_id in tasks {
-                rg_builder.add_dependency(*parent_id, child_task_id);
-            }
-        }
-        SurfaceRenderTasks::Simple(parent_id) => {
-            rg_builder.add_dependency(*parent_id, child_task_id);
+        // If picture caching is disabled, we don't have enough information
+        // to know if this frame is a nop, so it gets drawn unconditionally.
+        if !self.composite_state.picture_caching_is_enabled {
+            return false;
         }
-        SurfaceRenderTasks::Chained { port_task_id, .. } => {
-            // For chained render tasks, add as a dependency of the lowest part of
-            // the chain (the picture content)
-            rg_builder.add_dependency(*port_task_id, child_task_id);
+
+        // When picture caching is enabled, the first (main framebuffer) pass
+        // consists of compositing tiles only (whether via the simple compositor
+        // or the native OS compositor). If there are no other passes, that
+        // implies that none of the picture cache tiles were updated, and thus
+        // the frame content must be exactly the same as last frame. If this is
+        // true, drawing this frame is a no-op and can be skipped.
+
+        if self.passes.len() > 1 {
+            return false;
         }
+
+        true
     }
 }
diff --git a/third_party/webrender/webrender/src/freelist.rs b/third_party/webrender/webrender/src/freelist.rs
index aa90aba03c3..5ca196191cc 100644
--- a/third_party/webrender/webrender/src/freelist.rs
+++ b/third_party/webrender/webrender/src/freelist.rs
@@ -44,7 +44,6 @@ impl Epoch {
 
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
-#[derive(MallocSizeOf)]
 pub struct FreeListHandle<M> {
     index: u32,
     epoch: Epoch,
@@ -132,7 +131,7 @@ impl<M> WeakFreeListHandle<M> {
     }
 }
 
-#[derive(Debug, MallocSizeOf)]
+#[derive(Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 struct Slot<T> {
@@ -141,7 +140,7 @@ struct Slot<T> {
     value: Option<T>,
 }
 
-#[derive(Debug, MallocSizeOf)]
+#[derive(Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct FreeList<T, M> {
diff --git a/third_party/webrender/webrender/src/glyph_cache.rs b/third_party/webrender/webrender/src/glyph_cache.rs
index cee2c865630..b0b9ecf65c7 100644
--- a/third_party/webrender/webrender/src/glyph_cache.rs
+++ b/third_party/webrender/webrender/src/glyph_cache.rs
@@ -5,6 +5,7 @@
 use crate::glyph_rasterizer::{FontInstance, GlyphFormat, GlyphKey, GlyphRasterizer};
 use crate::internal_types::FastHashMap;
 use crate::render_backend::{FrameId, FrameStamp};
+use crate::render_task_cache::RenderTaskCache;
 use crate::resource_cache::ResourceClassCache;
 use std::sync::Arc;
 use crate::texture_cache::{EvictionNotice, TextureCache};
@@ -32,12 +33,21 @@ pub enum GlyphCacheEntry {
 }
 
 impl GlyphCacheEntry {
-    fn has_been_evicted(&self, texture_cache: &TextureCache) -> bool {
+    fn get_allocated_size(&self, texture_cache: &TextureCache, _: &RenderTaskCache)
+                          -> Option<usize> {
         match *self {
             GlyphCacheEntry::Cached(ref glyph) => {
-                !texture_cache.is_allocated(&glyph.texture_cache_handle)
+                texture_cache.get_allocated_size(&glyph.texture_cache_handle)
             }
-            GlyphCacheEntry::Pending | GlyphCacheEntry::Blank => false,
+            GlyphCacheEntry::Pending | GlyphCacheEntry::Blank => Some(0),
+        }
+    }
+
+    fn is_recently_used(&self, texture_cache: &mut TextureCache) -> bool {
+        if let GlyphCacheEntry::Cached(ref glyph) = *self {
+            texture_cache.is_recently_used(&glyph.texture_cache_handle, 1)
+        } else {
+            false
         }
     }
 }
@@ -56,32 +66,76 @@ pub enum CachedGlyphData {
 #[derive(Default)]
 pub struct GlyphKeyCacheInfo {
     eviction_notice: EvictionNotice,
-    #[cfg(debug_assertions)]
-    #[allow(dead_code)]
-    #[cfg_attr(feature = "replay", serde(default))]
     last_frame_used: FrameId,
+    bytes_used: usize,
 }
 
 pub type GlyphKeyCache = ResourceClassCache<GlyphKey, GlyphCacheEntry, GlyphKeyCacheInfo>;
 
 impl GlyphKeyCache {
+    const DIRTY: usize = !0;
+
     pub fn eviction_notice(&self) -> &EvictionNotice {
         &self.user_data.eviction_notice
     }
 
-    fn clear_glyphs(&mut self) {
+    fn is_recently_used(&self, current_frame: FrameId) -> bool {
+        self.user_data.last_frame_used + 1 >= current_frame
+    }
+
+    fn clear_glyphs(&mut self) -> usize {
+        let pruned = self.user_data.bytes_used;
         self.clear();
+        self.user_data.bytes_used = 0;
+        pruned
+    }
+
+    fn prune_glyphs(
+        &mut self,
+        skip_recent: bool,
+        excess_bytes_used: usize,
+        texture_cache: &mut TextureCache,
+        render_task_cache: &RenderTaskCache,
+    ) -> usize {
+        let mut pruned = 0;
+        self.retain(|_, entry| {
+            if pruned <= excess_bytes_used &&
+               (!skip_recent || !entry.is_recently_used(texture_cache)) {
+                match entry.get_allocated_size(texture_cache, render_task_cache) {
+                    Some(size) => {
+                        pruned += size;
+                        false
+                    }
+                    None => true,
+                }
+            } else {
+                true
+            }
+        });
+        self.user_data.bytes_used -= pruned;
+        pruned
     }
 
     pub fn add_glyph(&mut self, key: GlyphKey, value: GlyphCacheEntry) {
         self.insert(key, value);
+        self.user_data.bytes_used = Self::DIRTY;
     }
 
-    fn clear_evicted(&mut self, texture_cache: &TextureCache) {
-        if self.eviction_notice().check() {
+    fn clear_evicted(
+        &mut self,
+        texture_cache: &TextureCache,
+        render_task_cache: &RenderTaskCache,
+    ) {
+        if self.eviction_notice().check() || self.user_data.bytes_used == Self::DIRTY {
             // If there are evictions, filter out any glyphs evicted from the
             // texture cache from the glyph key cache.
-            self.retain(|_, entry| !entry.has_been_evicted(texture_cache));
+            let mut usage = 0;
+            self.retain(|_, entry| {
+                let size = entry.get_allocated_size(texture_cache, render_task_cache);
+                usage += size.unwrap_or(0);
+                size.is_some()
+            });
+            self.user_data.bytes_used = usage;
         }
     }
 }
@@ -91,13 +145,20 @@ impl GlyphKeyCache {
 pub struct GlyphCache {
     glyph_key_caches: FastHashMap<FontInstance, GlyphKeyCache>,
     current_frame: FrameId,
+    bytes_used: usize,
+    max_bytes_used: usize,
 }
 
 impl GlyphCache {
-    pub fn new() -> Self {
+    /// The default space usage threshold, in bytes, after which to start pruning away old fonts.
+    pub const DEFAULT_MAX_BYTES_USED: usize = 6 * 1024 * 1024;
+
+    pub fn new(max_bytes_used: usize) -> Self {
         GlyphCache {
             glyph_key_caches: FastHashMap::default(),
             current_frame: Default::default(),
+            bytes_used: 0,
+            max_bytes_used,
         }
     }
 
@@ -105,10 +166,7 @@ impl GlyphCache {
         let cache = self.glyph_key_caches
                         .entry(font)
                         .or_insert_with(GlyphKeyCache::new);
-        #[cfg(debug_assertions)]
-        {
-            cache.user_data.last_frame_used = self.current_frame;
-        }
+        cache.user_data.last_frame_used = self.current_frame;
         cache
     }
 
@@ -143,11 +201,18 @@ impl GlyphCache {
     }
 
     /// Clear out evicted entries from glyph key caches.
-    fn clear_evicted(&mut self, texture_cache: &TextureCache) {
+    fn clear_evicted(
+        &mut self,
+        texture_cache: &TextureCache,
+        render_task_cache: &RenderTaskCache,
+    ) {
+        let mut usage = 0;
         for cache in self.glyph_key_caches.values_mut() {
             // Scan for any glyph key caches that have evictions.
-            cache.clear_evicted(texture_cache);
+            cache.clear_evicted(texture_cache, render_task_cache);
+            usage += cache.user_data.bytes_used;
         }
+        self.bytes_used = usage;
     }
 
     /// If possible, remove entirely any empty glyph key caches.
@@ -163,15 +228,55 @@ impl GlyphCache {
         });
     }
 
+    /// Check the total space usage of the glyph cache. If it exceeds the maximum usage threshold,
+    /// then start clearing the oldest glyphs until below the threshold.
+    fn prune_excess_usage(
+        &mut self,
+        texture_cache: &mut TextureCache,
+        render_task_cache: &RenderTaskCache,
+    ) {
+        if self.bytes_used < self.max_bytes_used {
+            return;
+        }
+        // Usage is above the threshold. Get a last-recently-used ordered list of caches to clear.
+        let mut caches: Vec<_> = self.glyph_key_caches.values_mut().collect();
+        caches.sort_unstable_by(|a, b| {
+            a.user_data.last_frame_used.cmp(&b.user_data.last_frame_used)
+        });
+        // Clear out the oldest caches until below the threshold.
+        for cache in caches {
+            if self.bytes_used < self.max_bytes_used {
+                break;
+            }
+            let recent = cache.is_recently_used(self.current_frame);
+            let excess = self.bytes_used - self.max_bytes_used;
+            if !recent && excess >= cache.user_data.bytes_used {
+                // If the excess is greater than the cache's size, just clear the whole thing.
+                self.bytes_used -= cache.clear_glyphs();
+            } else {
+                // Otherwise, just clear as little of the cache as needed to remove the excess
+                // and avoid rematerialization costs.
+                self.bytes_used -= cache.prune_glyphs(
+                    recent,
+                    excess,
+                    texture_cache,
+                    render_task_cache,
+                );
+            }
+        }
+    }
+
     pub fn begin_frame(
         &mut self,
         stamp: FrameStamp,
         texture_cache: &mut TextureCache,
+        render_task_cache: &RenderTaskCache,
         glyph_rasterizer: &mut GlyphRasterizer,
     ) {
         profile_scope!("begin_frame");
         self.current_frame = stamp.frame_id();
-        self.clear_evicted(texture_cache);
+        self.clear_evicted(texture_cache, render_task_cache);
+        self.prune_excess_usage(texture_cache, render_task_cache);
         // Clearing evicted glyphs and pruning excess usage might have produced empty caches,
         // so get rid of them if possible.
         self.clear_empty_caches(glyph_rasterizer);
diff --git a/third_party/webrender/webrender/src/glyph_rasterizer/mod.rs b/third_party/webrender/webrender/src/glyph_rasterizer/mod.rs
index 80aa37826ba..0a60cc77d6c 100644
--- a/third_party/webrender/webrender/src/glyph_rasterizer/mod.rs
+++ b/third_party/webrender/webrender/src/glyph_rasterizer/mod.rs
@@ -5,7 +5,6 @@
 use api::{FontInstanceFlags, FontSize, BaseFontInstance};
 use api::{FontKey, FontRenderMode, FontTemplate};
 use api::{ColorU, GlyphIndex, GlyphDimensions, SyntheticItalics};
-use api::channel::{unbounded_channel, Receiver, Sender};
 use api::units::*;
 use api::{ImageDescriptor, ImageDescriptorFlags, ImageFormat, DirtyRect};
 use crate::internal_types::ResourceCacheError;
@@ -13,23 +12,24 @@ use crate::platform::font::FontContext;
 use crate::device::TextureFilter;
 use crate::gpu_types::UvRectKind;
 use crate::glyph_cache::{GlyphCache, CachedGlyphInfo, GlyphCacheEntry};
-use crate::internal_types::FastHashMap;
 use crate::resource_cache::CachedImageData;
-use crate::texture_cache::{TextureCache, TextureCacheHandle, Eviction, TargetShader};
+use crate::texture_cache::{TextureCache, TextureCacheHandle, Eviction};
 use crate::gpu_cache::GpuCache;
-use crate::profiler::{self, TransactionProfile};
+use crate::render_task_graph::RenderTaskGraph;
+use crate::render_task_cache::RenderTaskCache;
+use crate::profiler::TextureCacheProfileCounters;
 use malloc_size_of::{MallocSizeOf, MallocSizeOfOps};
 use rayon::ThreadPool;
 use rayon::prelude::*;
 use euclid::approxeq::ApproxEq;
 use euclid::size2;
-use smallvec::SmallVec;
 use std::cmp;
 use std::cell::Cell;
 use std::hash::{Hash, Hasher};
 use std::mem;
 use std::ops::Deref;
 use std::sync::{Arc, Condvar, Mutex, MutexGuard};
+use std::sync::mpsc::{channel, Receiver, Sender};
 use std::sync::atomic::{AtomicBool, Ordering};
 
 pub static GLYPH_FLASHING: AtomicBool = AtomicBool::new(false);
@@ -66,12 +66,15 @@ impl GlyphRasterizer {
         glyph_keys: &[GlyphKey],
         texture_cache: &mut TextureCache,
         gpu_cache: &mut GpuCache,
+        _: &mut RenderTaskCache,
+        _: &mut RenderTaskGraph,
     ) {
         assert!(
             self.font_contexts
                 .lock_shared_context()
                 .has_font(&font.font_key)
         );
+        let mut new_glyphs = Vec::new();
 
         let glyph_key_cache = glyph_cache.get_glyph_key_cache_for_font_mut(font.clone());
 
@@ -92,63 +95,28 @@ impl GlyphRasterizer {
                     GlyphCacheEntry::Blank | GlyphCacheEntry::Pending => continue,
                 }
             }
+            new_glyphs.push(key.clone());
+            glyph_key_cache.add_glyph(key.clone(), GlyphCacheEntry::Pending);
+        }
 
-            // Increment the total number of glyphs that are pending. This is used to determine
-            // later whether to use worker threads for the remaining glyphs during resolve time.
-            self.pending_glyph_count += 1;
-            self.glyph_request_count += 1;
-
-            // Find a batch container for the font instance for this glyph. Use get_mut to avoid
-            // cloning the font instance, since this is the common path.
-            match self.pending_glyph_requests.get_mut(&font) {
-                Some(container) => {
-                    container.push(*key);
-
-                    // If the batch for this font instance is big enough, kick off an async
-                    // job to start rasterizing these glyphs on other threads now.
-                    if container.len() == 8 {
-                        let glyphs = mem::replace(container, SmallVec::new());
-                        self.flush_glyph_requests(
-                            font.clone(),
-                            glyphs,
-                            true,
-                        );
-                    }
-                }
-                None => {
-                    // If no batch exists for this font instance, add the glyph to a new one.
-                    self.pending_glyph_requests.insert(
-                        font.clone(),
-                        smallvec![*key],
-                    );
-                }
-            }
-
-            glyph_key_cache.add_glyph(*key, GlyphCacheEntry::Pending);
+        if new_glyphs.is_empty() {
+            return;
         }
+
+        self.pending_glyphs += 1;
+
+        self.request_glyphs_from_backend(font, new_glyphs);
     }
 
     pub fn enable_multithreading(&mut self, enable: bool) {
         self.enable_multithreading = enable;
     }
 
-    /// Internal method to flush a list of glyph requests to a set of worker threads,
-    /// or process on this thread if there isn't much work to do (in which case the
-    /// overhead of processing these on a thread is unlikely to be a performance win).
-    fn flush_glyph_requests(
-        &mut self,
-        font: FontInstance,
-        glyphs: SmallVec<[GlyphKey; 16]>,
-        use_workers: bool,
-    ) {
+    pub(in super) fn request_glyphs_from_backend(&mut self, font: FontInstance, glyphs: Vec<GlyphKey>) {
         let font_contexts = Arc::clone(&self.font_contexts);
         let glyph_tx = self.glyph_tx.clone();
-        self.pending_glyph_jobs += 1;
-        self.pending_glyph_count -= glyphs.len();
-
-        let can_use_r8_format = self.can_use_r8_format;
 
-        let process_glyph = move |key: &GlyphKey, font_contexts: &FontContexts, font: &FontInstance| -> GlyphRasterJob {
+        fn process_glyph(key: &GlyphKey, font_contexts: &FontContexts, font: &FontInstance) -> GlyphRasterJob {
             profile_scope!("glyph-raster");
             let mut context = font_contexts.lock_current_context();
             let mut job = GlyphRasterJob {
@@ -183,24 +151,19 @@ impl GlyphRasterizer {
 
                 // Check if the glyph has a bitmap that needs to be downscaled.
                 glyph.downscale_bitmap_if_required(&font);
-
-                // Convert from BGRA8 to R8 if required. In the future we can make it the
-                // backends' responsibility to output glyphs in the desired format,
-                // potentially reducing the number of copies.
-                if glyph.format.image_format(can_use_r8_format).bytes_per_pixel() == 1 {
-                    glyph.bytes = glyph.bytes
-                        .chunks_mut(4)
-                        .map(|pixel| pixel[3])
-                        .collect::<Vec<_>>();
-                }
             }
 
             job
-        };
+        }
 
         // if the number of glyphs is small, do it inline to avoid the threading overhead;
         // send the result into glyph_tx so downstream code can't tell the difference.
-        if self.enable_multithreading && use_workers {
+        if !self.enable_multithreading || glyphs.len() < 8 {
+            let jobs = glyphs.iter()
+                             .map(|key: &GlyphKey| process_glyph(key, &font_contexts, &font))
+                             .collect();
+            glyph_tx.send(GlyphRasterJobs { font, jobs }).unwrap();
+        } else {
             // spawn an async task to get off of the render backend thread as early as
             // possible and in that task use rayon's fork join dispatch to rasterize the
             // glyphs in the thread pool.
@@ -213,11 +176,6 @@ impl GlyphRasterizer {
 
                 glyph_tx.send(GlyphRasterJobs { font, jobs }).unwrap();
             });
-        } else {
-            let jobs = glyphs.iter()
-                             .map(|key: &GlyphKey| process_glyph(key, &font_contexts, &font))
-                             .collect();
-            glyph_tx.send(GlyphRasterJobs { font, jobs }).unwrap();
         }
     }
 
@@ -226,39 +184,14 @@ impl GlyphRasterizer {
         glyph_cache: &mut GlyphCache,
         texture_cache: &mut TextureCache,
         gpu_cache: &mut GpuCache,
-        profile: &mut TransactionProfile,
+        _: &mut RenderTaskCache,
+        _: &mut RenderTaskGraph,
+        _: &mut TextureCacheProfileCounters,
     ) {
-        profile.start_time(profiler::GLYPH_RESOLVE_TIME);
-
-        // Work around the borrow checker, since we call flush_glyph_requests below
-        let mut pending_glyph_requests = mem::replace(
-            &mut self.pending_glyph_requests,
-            FastHashMap::default(),
-        );
-        // If we have a large amount of remaining work to do, spawn to worker threads,
-        // even if that work is shared among a number of different font instances.
-        let use_workers = self.pending_glyph_count >= 8;
-        for (font, pending_glyphs) in pending_glyph_requests.drain() {
-            self.flush_glyph_requests(
-                font,
-                pending_glyphs,
-                use_workers,
-            );
-        }
-        // Restore this so that we don't heap allocate next frame
-        self.pending_glyph_requests = pending_glyph_requests;
-        debug_assert_eq!(self.pending_glyph_count, 0);
-        debug_assert!(self.pending_glyph_requests.is_empty());
-
-        if self.glyph_request_count > 0 {
-            profile.set(profiler::RASTERIZED_GLYPHS, self.glyph_request_count);
-            self.glyph_request_count = 0;
-        }
-
         profile_scope!("resolve_glyphs");
         // Pull rasterized glyphs from the queue and update the caches.
-        while self.pending_glyph_jobs > 0 {
-            self.pending_glyph_jobs -= 1;
+        while self.pending_glyphs > 0 {
+            self.pending_glyphs -= 1;
 
             // TODO: rather than blocking until all pending glyphs are available
             // we could try_recv and steal work from the thread pool to take advantage
@@ -295,19 +228,18 @@ impl GlyphRasterizer {
                             ImageDescriptor {
                                 size: size2(glyph.width, glyph.height),
                                 stride: None,
-                                format: glyph.format.image_format(self.can_use_r8_format),
+                                format: FORMAT,
                                 flags: ImageDescriptorFlags::empty(),
                                 offset: 0,
                             },
                             TextureFilter::Linear,
                             Some(CachedImageData::Raw(Arc::new(glyph.bytes))),
-                            [glyph.left, -glyph.top, glyph.scale, 0.0],
+                            [glyph.left, -glyph.top, glyph.scale],
                             DirtyRect::All,
                             gpu_cache,
                             Some(glyph_key_cache.eviction_notice()),
                             UvRectKind::Rect,
                             Eviction::Auto,
-                            TargetShader::Text,
                         );
                         GlyphCacheEntry::Cached(CachedGlyphInfo {
                             texture_cache_handle,
@@ -322,11 +254,12 @@ impl GlyphRasterizer {
         // Now that we are done with the critical path (rendering the glyphs),
         // we can schedule removing the fonts if needed.
         self.remove_dead_fonts();
-
-        profile.end_time(profiler::GLYPH_RESOLVE_TIME);
     }
 }
 
+#[allow(dead_code)]
+pub const FORMAT: ImageFormat = ImageFormat::BGRA8;
+
 #[derive(Clone, Copy, Debug, MallocSizeOf, PartialEq, PartialOrd)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
@@ -708,7 +641,7 @@ impl Into<f64> for SubpixelOffset {
     }
 }
 
-#[derive(Copy, Clone, Hash, PartialEq, Eq, Debug, Ord, PartialOrd)]
+#[derive(Clone, Hash, PartialEq, Eq, Debug, Ord, PartialOrd)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct GlyphKey(u32);
@@ -759,23 +692,10 @@ pub enum GlyphFormat {
 }
 
 impl GlyphFormat {
-    /// Returns the ImageFormat that a glyph should be stored as in the texture cache.
-    /// can_use_r8_format should be set false on platforms where we have encountered
-    /// issues with R8 textures, so that we do not use them for glyphs.
-    pub fn image_format(&self, can_use_r8_format: bool) -> ImageFormat {
-        match *self {
-            GlyphFormat::Alpha |
-            GlyphFormat::TransformedAlpha |
-            GlyphFormat::Bitmap => {
-                if can_use_r8_format {
-                    ImageFormat::R8
-                } else {
-                    ImageFormat::BGRA8
-                }
-            }
-            GlyphFormat::Subpixel |
-            GlyphFormat::TransformedSubpixel |
-            GlyphFormat::ColorBitmap => ImageFormat::BGRA8,
+    pub fn ignore_color(self) -> Self {
+        match self {
+            GlyphFormat::ColorBitmap => GlyphFormat::Bitmap,
+            _ => self,
         }
     }
 }
@@ -957,20 +877,19 @@ pub struct GlyphRasterizer {
     workers: Arc<ThreadPool>,
     font_contexts: Arc<FontContexts>,
 
-    /// The current number of individual glyphs waiting in pending batches.
-    pending_glyph_count: usize,
-
-    /// The current number of glyph request jobs that have been kicked to worker threads.
-    pending_glyph_jobs: usize,
-
-    /// The number of glyphs requested this frame.
-    glyph_request_count: usize,
-
-    /// A map of current glyph request batches.
-    pending_glyph_requests: FastHashMap<FontInstance, SmallVec<[GlyphKey; 16]>>,
+    // Maintain a set of glyphs that have been requested this
+    // frame. This ensures the glyph thread won't rasterize
+    // the same glyph more than once in a frame. This is required
+    // because the glyph cache hash table is not updated
+    // until the end of the frame when we wait for glyph requests
+    // to be resolved.
+    #[allow(dead_code)]
+    pending_glyphs: usize,
 
     // Receives the rendered glyphs.
+    #[allow(dead_code)]
     glyph_rx: Receiver<GlyphRasterJobs>,
+    #[allow(dead_code)]
     glyph_tx: Sender<GlyphRasterJobs>,
 
     // We defer removing fonts to the end of the frame so that:
@@ -981,16 +900,16 @@ pub struct GlyphRasterizer {
     // Defer removal of font instances, as for fonts.
     font_instances_to_remove: Vec<FontInstance>,
 
+    #[allow(dead_code)]
+    next_gpu_glyph_cache_key: GpuGlyphCacheKey,
+
     // Whether to parallelize glyph rasterization with rayon.
     enable_multithreading: bool,
-
-    // Whether glyphs can be rasterized in r8 format when it makes sense.
-    can_use_r8_format: bool,
 }
 
 impl GlyphRasterizer {
-    pub fn new(workers: Arc<ThreadPool>, can_use_r8_format: bool) -> Result<Self, ResourceCacheError> {
-        let (glyph_tx, glyph_rx) = unbounded_channel();
+    pub fn new(workers: Arc<ThreadPool>) -> Result<Self, ResourceCacheError> {
+        let (glyph_tx, glyph_rx) = channel();
 
         let num_workers = workers.current_num_threads();
         let mut contexts = Vec::with_capacity(num_workers);
@@ -1011,17 +930,14 @@ impl GlyphRasterizer {
 
         Ok(GlyphRasterizer {
             font_contexts: Arc::new(font_context),
-            pending_glyph_jobs: 0,
-            pending_glyph_count: 0,
-            glyph_request_count: 0,
+            pending_glyphs: 0,
             glyph_rx,
             glyph_tx,
             workers,
             fonts_to_remove: Vec::new(),
             font_instances_to_remove: Vec::new(),
+            next_gpu_glyph_cache_key: GpuGlyphCacheKey(0),
             enable_multithreading: true,
-            pending_glyph_requests: FastHashMap::default(),
-            can_use_r8_format,
         })
     }
 
@@ -1093,9 +1009,7 @@ impl GlyphRasterizer {
     #[cfg(feature = "replay")]
     pub fn reset(&mut self) {
         //TODO: any signals need to be sent to the workers?
-        self.pending_glyph_jobs = 0;
-        self.pending_glyph_count = 0;
-        self.glyph_request_count = 0;
+        self.pending_glyphs = 0;
         self.fonts_to_remove.clear();
         self.font_instances_to_remove.clear();
     }
@@ -1145,8 +1059,6 @@ struct GlyphRasterJobs {
 
 #[cfg(test)]
 mod test_glyph_rasterizer {
-    pub const FORMAT: api::ImageFormat = api::ImageFormat::BGRA8;
-
     #[test]
     fn rasterize_200_glyphs() {
         // This test loads a font from disc, the renders 4 requests containing
@@ -1158,21 +1070,26 @@ mod test_glyph_rasterizer {
         use crate::texture_cache::TextureCache;
         use crate::glyph_cache::GlyphCache;
         use crate::gpu_cache::GpuCache;
-        use crate::profiler::TransactionProfile;
+        use crate::render_task_cache::RenderTaskCache;
+        use crate::render_task_graph::{RenderTaskGraph, RenderTaskGraphCounters};
+        use crate::profiler::TextureCacheProfileCounters;
         use api::{FontKey, FontInstanceKey, FontSize, FontTemplate, FontRenderMode,
                   IdNamespace, ColorU};
         use api::units::DevicePoint;
+        use crate::render_backend::FrameId;
         use std::sync::Arc;
-        use crate::glyph_rasterizer::{FontInstance, BaseFontInstance, GlyphKey, GlyphRasterizer};
+        use crate::glyph_rasterizer::{FORMAT, FontInstance, BaseFontInstance, GlyphKey, GlyphRasterizer};
 
         let worker = ThreadPoolBuilder::new()
             .thread_name(|idx|{ format!("WRWorker#{}", idx) })
             .build();
         let workers = Arc::new(worker.unwrap());
-        let mut glyph_rasterizer = GlyphRasterizer::new(workers, true).unwrap();
-        let mut glyph_cache = GlyphCache::new();
+        let mut glyph_rasterizer = GlyphRasterizer::new(workers).unwrap();
+        let mut glyph_cache = GlyphCache::new(GlyphCache::DEFAULT_MAX_BYTES_USED);
         let mut gpu_cache = GpuCache::new_for_testing();
-        let mut texture_cache = TextureCache::new_for_testing(2048, FORMAT);
+        let mut texture_cache = TextureCache::new_for_testing(2048, 1024, FORMAT);
+        let mut render_task_cache = RenderTaskCache::new();
+        let mut render_task_tree = RenderTaskGraph::new(FrameId::INVALID, &RenderTaskGraphCounters::new());
         let mut font_file =
             File::open("../wrench/reftests/text/VeraBd.ttf").expect("Couldn't open font file");
         let mut font_data = vec![];
@@ -1213,6 +1130,8 @@ mod test_glyph_rasterizer {
                 &glyph_keys[(50 * i) .. (50 * (i + 1))],
                 &mut texture_cache,
                 &mut gpu_cache,
+                &mut render_task_cache,
+                &mut render_task_tree,
             );
         }
 
@@ -1220,85 +1139,11 @@ mod test_glyph_rasterizer {
 
         glyph_rasterizer.resolve_glyphs(
             &mut glyph_cache,
-            &mut TextureCache::new_for_testing(4096, FORMAT),
-            &mut gpu_cache,
-            &mut TransactionProfile::new(),
-        );
-    }
-
-    #[test]
-    fn rasterize_large_glyphs() {
-        // This test loads a font from disc and rasterize a few glyphs with a size of 200px to check
-        // that the texture cache handles them properly.
-        use rayon::ThreadPoolBuilder;
-        use std::fs::File;
-        use std::io::Read;
-        use crate::texture_cache::TextureCache;
-        use crate::glyph_cache::GlyphCache;
-        use crate::gpu_cache::GpuCache;
-        use crate::profiler::TransactionProfile;
-        use api::{FontKey, FontInstanceKey, FontSize, FontTemplate, FontRenderMode,
-                  IdNamespace, ColorU};
-        use api::units::DevicePoint;
-        use std::sync::Arc;
-        use crate::glyph_rasterizer::{FontInstance, BaseFontInstance, GlyphKey, GlyphRasterizer};
-
-        let worker = ThreadPoolBuilder::new()
-            .thread_name(|idx|{ format!("WRWorker#{}", idx) })
-            .build();
-        let workers = Arc::new(worker.unwrap());
-        let mut glyph_rasterizer = GlyphRasterizer::new(workers, true).unwrap();
-        let mut glyph_cache = GlyphCache::new();
-        let mut gpu_cache = GpuCache::new_for_testing();
-        let mut texture_cache = TextureCache::new_for_testing(2048, FORMAT);
-        let mut font_file =
-            File::open("../wrench/reftests/text/VeraBd.ttf").expect("Couldn't open font file");
-        let mut font_data = vec![];
-        font_file
-            .read_to_end(&mut font_data)
-            .expect("failed to read font file");
-
-        let font_key = FontKey::new(IdNamespace(0), 0);
-        glyph_rasterizer.add_font(font_key, FontTemplate::Raw(Arc::new(font_data), 0));
-
-        let font = FontInstance::from_base(Arc::new(BaseFontInstance {
-            instance_key: FontInstanceKey(IdNamespace(0), 0),
-            font_key,
-            size: FontSize::from_f32_px(200.0),
-            bg_color: ColorU::new(0, 0, 0, 0),
-            render_mode: FontRenderMode::Subpixel,
-            flags: Default::default(),
-            synthetic_italics: Default::default(),
-            platform_options: None,
-            variations: Vec::new(),
-        }));
-
-        let subpx_dir = font.get_subpx_dir();
-
-        let mut glyph_keys = Vec::with_capacity(10);
-        for i in 0 .. 10 {
-            glyph_keys.push(GlyphKey::new(
-                i,
-                DevicePoint::zero(),
-                subpx_dir,
-            ));
-        }
-
-        glyph_rasterizer.request_glyphs(
-            &mut glyph_cache,
-            font.clone(),
-            &glyph_keys,
-            &mut texture_cache,
-            &mut gpu_cache,
-        );
-
-        glyph_rasterizer.delete_font(font_key);
-
-        glyph_rasterizer.resolve_glyphs(
-            &mut glyph_cache,
-            &mut TextureCache::new_for_testing(4096, FORMAT),
+            &mut TextureCache::new_for_testing(4096, 1024, FORMAT),
             &mut gpu_cache,
-            &mut TransactionProfile::new(),
+            &mut render_task_cache,
+            &mut render_task_tree,
+            &mut TextureCacheProfileCounters::new(),
         );
     }
 
diff --git a/third_party/webrender/webrender/src/gpu_cache.rs b/third_party/webrender/webrender/src/gpu_cache.rs
index 98dc8bf5845..c34efd09d8b 100644
--- a/third_party/webrender/webrender/src/gpu_cache.rs
+++ b/third_party/webrender/webrender/src/gpu_cache.rs
@@ -27,15 +27,13 @@
 use api::{DebugFlags, DocumentId, PremultipliedColorF};
 #[cfg(test)]
 use api::IdNamespace;
-use api::units::*;
+use api::units::TexelRect;
 use euclid::{HomogeneousVector, Rect};
 use crate::internal_types::{FastHashMap, FastHashSet};
-use crate::profiler::{self, TransactionProfile};
+use crate::profiler::GpuCacheProfileCounters;
 use crate::render_backend::{FrameStamp, FrameId};
-use crate::prim_store::VECS_PER_SEGMENT;
 use crate::renderer::MAX_VERTEX_TEXTURE_WIDTH;
-use crate::util::VecHelper;
-use std::{u16, u32};
+use std::{mem, u16, u32};
 use std::num::NonZeroU32;
 use std::ops::Add;
 use std::time::{Duration, Instant};
@@ -139,6 +137,13 @@ impl From<TexelRect> for GpuBlockData {
 }
 
 
+// Any data type that can be stored in the GPU cache should
+// implement this trait.
+pub trait ToGpuBlocks {
+    // Request an arbitrary number of GPU data blocks.
+    fn write_gpu_blocks(&self, _: GpuDataRequest);
+}
+
 // A handle to a GPU resource.
 #[derive(Debug, Copy, Clone, MallocSizeOf)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
@@ -151,10 +156,6 @@ impl GpuCacheHandle {
     pub fn new() -> Self {
         GpuCacheHandle { location: None }
     }
-
-    pub fn as_int(self, gpu_cache: &GpuCache) -> i32 {
-        gpu_cache.get_address(&self).as_int()
-    }
 }
 
 // A unique address in the GPU cache. These are uploaded
@@ -180,13 +181,6 @@ impl GpuCacheAddress {
         u: u16::MAX,
         v: u16::MAX,
     };
-
-    pub fn as_int(self) -> i32 {
-        // TODO(gw): Temporarily encode GPU Cache addresses as a single int.
-        //           In the future, we can change the PrimitiveInstanceData struct
-        //           to use 2x u16 for the vertex attribute instead of an i32.
-        self.v as i32 * MAX_VERTEX_TEXTURE_WIDTH as i32 + self.u as i32
-    }
 }
 
 impl Add<usize> for GpuCacheAddress {
@@ -648,9 +642,6 @@ impl Texture {
 /// works as a container that can only grow.
 #[must_use]
 pub struct GpuDataRequest<'a> {
-    //TODO: remove this, see
-    // https://bugzilla.mozilla.org/show_bug.cgi?id=1690546
-    #[allow(dead_code)]
     handle: &'a mut GpuCacheHandle,
     frame_stamp: FrameStamp,
     start_index: usize,
@@ -666,17 +657,6 @@ impl<'a> GpuDataRequest<'a> {
         self.texture.pending_blocks.push(block.into());
     }
 
-    // Write the GPU cache data for an individual segment.
-    pub fn write_segment(
-        &mut self,
-        local_rect: LayoutRect,
-        extra_data: [f32; 4],
-    ) {
-        let _ = VECS_PER_SEGMENT;
-        self.push(local_rect);
-        self.push(extra_data);
-    }
-
     pub fn current_used_block_num(&self) -> usize {
         self.texture.pending_blocks.len() - self.start_index
     }
@@ -868,12 +848,18 @@ impl GpuCache {
     /// device specific cache texture.
     pub fn end_frame(
         &mut self,
-        profile: &mut TransactionProfile,
+        profile_counters: &mut GpuCacheProfileCounters,
     ) -> FrameStamp {
         profile_scope!("end_frame");
-        profile.set(profiler::GPU_CACHE_ROWS_TOTAL, self.texture.rows.len());
-        profile.set(profiler::GPU_CACHE_BLOCKS_TOTAL, self.texture.allocated_block_count);
-        profile.set(profiler::GPU_CACHE_BLOCKS_SAVED, self.saved_block_count);
+        profile_counters
+            .allocated_rows
+            .set(self.texture.rows.len());
+        profile_counters
+            .allocated_blocks
+            .set(self.texture.allocated_block_count);
+        profile_counters
+            .saved_blocks
+            .set(self.saved_block_count);
 
         let reached_threshold =
             self.texture.rows.len() > (GPU_CACHE_INITIAL_HEIGHT as usize) &&
@@ -903,9 +889,9 @@ impl GpuCache {
             frame_id: self.now.frame_id(),
             clear,
             height: self.texture.height,
-            debug_commands: self.texture.debug_commands.take_and_preallocate(),
-            updates: self.texture.updates.take_and_preallocate(),
-            blocks: self.texture.pending_blocks.take_and_preallocate(),
+            debug_commands: mem::replace(&mut self.texture.debug_commands, Vec::new()),
+            updates: mem::replace(&mut self.texture.updates, Vec::new()),
+            blocks: mem::replace(&mut self.texture.pending_blocks, Vec::new()),
         }
     }
 
diff --git a/third_party/webrender/webrender/src/gpu_types.rs b/third_party/webrender/webrender/src/gpu_types.rs
index f6d91cab342..8e85b4dff06 100644
--- a/third_party/webrender/webrender/src/gpu_types.rs
+++ b/third_party/webrender/webrender/src/gpu_types.rs
@@ -2,14 +2,12 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{AlphaType, PremultipliedColorF, YuvFormat, YuvColorSpace};
+use api::{AlphaType, DocumentLayer, PremultipliedColorF, YuvFormat, YuvColorSpace};
+use api::EdgeAaSegmentMask;
 use api::units::*;
-use crate::composite::CompositeFeatures;
-use crate::segment::EdgeAaSegmentMask;
 use crate::spatial_tree::{SpatialTree, ROOT_SPATIAL_NODE_INDEX, SpatialNodeIndex};
 use crate::gpu_cache::{GpuCacheAddress, GpuDataRequest};
 use crate::internal_types::FastHashMap;
-use crate::prim_store::ClipData;
 use crate::render_task::RenderTaskAddress;
 use crate::renderer::ShaderColorMode;
 use std::i32;
@@ -27,6 +25,10 @@ pub const VECS_PER_TRANSFORM: usize = 8;
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct ZBufferId(pub i32);
 
+const MAX_DOCUMENT_LAYERS : i8 = 1 << 3;
+const MAX_DOCUMENT_LAYER_VALUE : i8 = MAX_DOCUMENT_LAYERS / 2 - 1;
+const MIN_DOCUMENT_LAYER_VALUE : i8 = -MAX_DOCUMENT_LAYERS / 2;
+
 impl ZBufferId {
     pub fn invalid() -> Self {
         ZBufferId(i32::MAX)
@@ -37,26 +39,53 @@ impl ZBufferId {
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct ZBufferIdGenerator {
+    base: i32,
     next: i32,
-    max_depth_ids: i32,
+    max_items_per_document_layer: i32,
 }
 
 impl ZBufferIdGenerator {
-    pub fn new(max_depth_ids: i32) -> Self {
+    pub fn new(layer: DocumentLayer, max_depth_ids: i32) -> Self {
+        debug_assert!(layer >= MIN_DOCUMENT_LAYER_VALUE);
+        debug_assert!(layer <= MAX_DOCUMENT_LAYER_VALUE);
+        let max_items_per_document_layer = max_depth_ids / MAX_DOCUMENT_LAYERS as i32;
         ZBufferIdGenerator {
+            base: layer as i32 * max_items_per_document_layer,
             next: 0,
-            max_depth_ids,
+            max_items_per_document_layer,
         }
     }
 
     pub fn next(&mut self) -> ZBufferId {
-        debug_assert!(self.next < self.max_depth_ids);
-        let id = ZBufferId(self.next);
+        debug_assert!(self.next < self.max_items_per_document_layer);
+        let id = ZBufferId(self.next + self.base);
         self.next += 1;
         id
     }
 }
 
+/// A shader kind identifier that can be used by a generic-shader to select the behavior at runtime.
+///
+/// Not all brush kinds need to be present in this enum, only those we want to support in the generic
+/// brush shader.
+/// Do not use the 24 lowest bits. This will be packed with other information in the vertex attributes.
+/// The constants must match the corresponding defines in brush_multi.glsl.
+#[repr(i32)]
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub enum BrushShaderKind {
+    None            = 0,
+    Solid           = 1,
+    Image           = 2,
+    Text            = 3,
+    LinearGradient  = 4,
+    RadialGradient  = 5,
+    ConicGradient   = 6,
+    Blend           = 7,
+    MixBlend        = 8,
+    Yuv             = 9,
+    Opacity         = 10,
+}
+
 #[derive(Debug, Copy, Clone)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
@@ -84,7 +113,7 @@ pub enum BlurDirection {
     Vertical,
 }
 
-#[derive(Clone, Debug)]
+#[derive(Debug)]
 #[repr(C)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
@@ -94,16 +123,17 @@ pub struct BlurInstance {
     pub blur_direction: BlurDirection,
 }
 
-#[derive(Clone, Debug)]
+#[derive(Debug)]
 #[repr(C)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct ScalingInstance {
     pub target_rect: DeviceRect,
-    pub source_rect: DeviceRect,
+    pub source_rect: DeviceIntRect,
+    pub source_layer: i32,
 }
 
-#[derive(Clone, Debug)]
+#[derive(Debug)]
 #[repr(C)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
@@ -147,62 +177,6 @@ pub struct BorderInstance {
     pub clip_params: [f32; 8],
 }
 
-#[derive(Copy, Clone, Debug)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-#[repr(C)]
-pub struct ClipMaskInstanceCommon {
-    pub sub_rect: DeviceRect,
-    pub task_origin: DevicePoint,
-    pub screen_origin: DevicePoint,
-    pub device_pixel_scale: f32,
-    pub clip_transform_id: TransformPaletteId,
-    pub prim_transform_id: TransformPaletteId,
-}
-
-#[derive(Clone, Debug)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-#[repr(C)]
-pub struct ClipMaskInstanceImage {
-    pub common: ClipMaskInstanceCommon,
-    pub tile_rect: LayoutRect,
-    pub resource_address: GpuCacheAddress,
-    pub local_rect: LayoutRect,
-}
-
-#[derive(Clone, Debug)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-#[repr(C)]
-pub struct ClipMaskInstanceRect {
-    pub common: ClipMaskInstanceCommon,
-    pub local_pos: LayoutPoint,
-    pub clip_data: ClipData,
-}
-
-#[derive(Clone, Debug)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-#[repr(C)]
-pub struct BoxShadowData {
-    pub src_rect_size: LayoutSize,
-    pub clip_mode: i32,
-    pub stretch_mode_x: i32,
-    pub stretch_mode_y: i32,
-    pub dest_rect: LayoutRect,
-}
-
-#[derive(Clone, Debug)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-#[repr(C)]
-pub struct ClipMaskInstanceBoxShadow {
-    pub common: ClipMaskInstanceCommon,
-    pub resource_address: GpuCacheAddress,
-    pub shadow_data: BoxShadowData,
-}
-
 /// A clipping primitive drawn into the clipping mask.
 /// Could be an image or a rectangle, which defines the
 /// way `address` is treated.
@@ -223,6 +197,16 @@ pub struct ClipMaskInstance {
     pub device_pixel_scale: f32,
 }
 
+/// A border corner dot or dash drawn into the clipping mask.
+#[derive(Debug, Copy, Clone)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[repr(C)]
+pub struct ClipMaskBorderCornerDotDash {
+    pub clip_mask_instance: ClipMaskInstance,
+    pub dot_dash_data: [f32; 8],
+}
+
 // 16 bytes per instance should be enough for anyone!
 #[derive(Debug, Clone)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
@@ -231,20 +215,35 @@ pub struct PrimitiveInstanceData {
     data: [i32; 4],
 }
 
-/// Specifies that an RGB CompositeInstance's UV coordinates are normalized.
-const UV_TYPE_NORMALIZED: u32 = 0;
-/// Specifies that an RGB CompositeInstance's UV coordinates are not normalized.
-const UV_TYPE_UNNORMALIZED: u32 = 1;
+/// Vertex format for resolve style operations with pixel local storage.
+#[derive(Debug, Clone)]
+#[repr(C)]
+pub struct ResolveInstanceData {
+    rect: [f32; 4],
+}
+
+impl ResolveInstanceData {
+    pub fn new(rect: DeviceIntRect) -> Self {
+        ResolveInstanceData {
+            rect: [
+                rect.origin.x as f32,
+                rect.origin.y as f32,
+                rect.size.width as f32,
+                rect.size.height as f32,
+            ],
+        }
+    }
+}
 
 /// Vertex format for picture cache composite shader.
 /// When editing the members, update desc::COMPOSITE
 /// so its list of instance_attributes matches:
-#[derive(Clone, Debug)]
+#[derive(Debug, Clone)]
 #[repr(C)]
 pub struct CompositeInstance {
-    // Device space destination rectangle of surface
+    // Device space rectangle of surface
     rect: DeviceRect,
-    // Device space destination clip rect for this surface
+    // Device space clip rect for this surface
     clip_rect: DeviceRect,
     // Color for solid color tiles, white otherwise
     color: PremultipliedColorF,
@@ -258,6 +257,9 @@ pub struct CompositeInstance {
 
     // UV rectangles (pixel space) for color / yuv texture planes
     uv_rects: [TexelRect; 3],
+
+    // Texture array layers for color / yuv texture planes
+    texture_layers: [f32; 3],
 }
 
 impl CompositeInstance {
@@ -265,6 +267,7 @@ impl CompositeInstance {
         rect: DeviceRect,
         clip_rect: DeviceRect,
         color: PremultipliedColorF,
+        layer: f32,
         z_id: ZBufferId,
     ) -> Self {
         let uv = TexelRect::new(0.0, 0.0, 1.0, 1.0);
@@ -273,9 +276,10 @@ impl CompositeInstance {
             clip_rect,
             color,
             z_id: z_id.0 as f32,
-            color_space_or_uv_type: pack_as_float(UV_TYPE_NORMALIZED),
+            color_space_or_uv_type: pack_as_float(0u32),
             yuv_format: 0.0,
             yuv_rescale: 0.0,
+            texture_layers: [layer, 0.0, 0.0],
             uv_rects: [uv, uv, uv],
         }
     }
@@ -284,6 +288,7 @@ impl CompositeInstance {
         rect: DeviceRect,
         clip_rect: DeviceRect,
         color: PremultipliedColorF,
+        layer: f32,
         z_id: ZBufferId,
         uv_rect: TexelRect,
     ) -> Self {
@@ -292,9 +297,10 @@ impl CompositeInstance {
             clip_rect,
             color,
             z_id: z_id.0 as f32,
-            color_space_or_uv_type: pack_as_float(UV_TYPE_UNNORMALIZED),
+            color_space_or_uv_type: pack_as_float(1u32),
             yuv_format: 0.0,
             yuv_rescale: 0.0,
+            texture_layers: [layer, 0.0, 0.0],
             uv_rects: [uv_rect, uv_rect, uv_rect],
         }
     }
@@ -306,6 +312,7 @@ impl CompositeInstance {
         yuv_color_space: YuvColorSpace,
         yuv_format: YuvFormat,
         yuv_rescale: f32,
+        texture_layers: [f32; 3],
         uv_rects: [TexelRect; 3],
     ) -> Self {
         CompositeInstance {
@@ -316,29 +323,10 @@ impl CompositeInstance {
             color_space_or_uv_type: pack_as_float(yuv_color_space as u32),
             yuv_format: pack_as_float(yuv_format as u32),
             yuv_rescale,
+            texture_layers,
             uv_rects,
         }
     }
-
-    // Returns the CompositeFeatures that can be used to composite
-    // this RGB instance.
-    pub fn get_rgb_features(&self) -> CompositeFeatures {
-        let mut features = CompositeFeatures::empty();
-
-        // If the UV rect covers the entire texture then we can avoid UV clamping.
-        // We should try harder to determine this for unnormalized UVs too.
-        if self.color_space_or_uv_type == pack_as_float(UV_TYPE_NORMALIZED)
-            && self.uv_rects[0] == TexelRect::new(0.0, 0.0, 1.0, 1.0)
-        {
-            features |= CompositeFeatures::NO_UV_CLAMP;
-        }
-
-        if self.color == PremultipliedColorF::WHITE {
-            features |= CompositeFeatures::NO_COLOR_MODULATION
-        }
-
-        features
-    }
 }
 
 /// Vertex format for issuing colored quads.
@@ -466,7 +454,8 @@ impl GlyphInstance {
                 (subpx_dir as u32 as i32) << 24
                 | (color_mode as u32 as i32) << 16
                 | glyph_index_in_text_run,
-                glyph_uv_rect.as_int(),
+                glyph_uv_rect.as_int()
+                | ((BrushShaderKind::Text as i32) << 24),
             ],
         }
     }
@@ -528,6 +517,7 @@ pub struct BrushInstance {
     pub edge_flags: EdgeAaSegmentMask,
     pub brush_flags: BrushFlags,
     pub resource_address: i32,
+    pub brush_kind: BrushShaderKind,
 }
 
 impl From<BrushInstance> for PrimitiveInstanceData {
@@ -540,7 +530,8 @@ impl From<BrushInstance> for PrimitiveInstanceData {
                 instance.segment_index
                 | ((instance.edge_flags.bits() as i32) << 16)
                 | ((instance.brush_flags.bits() as i32) << 24),
-                instance.resource_address,
+                instance.resource_address
+                | ((instance.brush_kind as i32) << 24),
             ]
         }
     }
@@ -591,14 +582,6 @@ impl TransformPaletteId {
             TransformedRectKind::Complex
         }
     }
-
-    /// Override the kind of transform stored in this id. This can be useful in
-    /// cases where we don't want shaders to consider certain transforms axis-
-    /// aligned (i.e. perspective warp) even though we may still want to for the
-    /// general case.
-    pub fn override_transform_kind(&self, kind: TransformedRectKind) -> Self {
-        TransformPaletteId((self.0 & 0xFFFFFFu32) | ((kind as u32) << 24))
-    }
 }
 
 /// The GPU data payload for a transform palette entry.
@@ -774,11 +757,8 @@ pub enum UvRectKind {
 pub struct ImageSource {
     pub p0: DevicePoint,
     pub p1: DevicePoint,
-    // TODO: It appears that only glyphs make use of user_data (to store glyph offset
-    // and scale).
-    // Perhaps we should separate the two so we don't have to push an empty unused vec4
-    // for all image sources.
-    pub user_data: [f32; 4],
+    pub texture_layer: f32,
+    pub user_data: [f32; 3],
     pub uv_rect_kind: UvRectKind,
 }
 
@@ -792,7 +772,12 @@ impl ImageSource {
             self.p1.x,
             self.p1.y,
         ]);
-        request.push(self.user_data);
+        request.push([
+            self.texture_layer,
+            self.user_data[0],
+            self.user_data[1],
+            self.user_data[2],
+        ]);
 
         // If this is a polygon uv kind, then upload the four vertices.
         if let UvRectKind::Quad { top_left, top_right, bottom_left, bottom_right } = self.uv_rect_kind {
diff --git a/third_party/webrender/webrender/src/hit_test.rs b/third_party/webrender/webrender/src/hit_test.rs
index 0bd02cd4269..b0a402b7ed1 100644
--- a/third_party/webrender/webrender/src/hit_test.rs
+++ b/third_party/webrender/webrender/src/hit_test.rs
@@ -2,18 +2,16 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{BorderRadius, ClipMode, HitTestItem, HitTestResult, ItemTag, PrimitiveFlags, HitTestFlags};
-use api::{PipelineId, ApiHitTester, ClipId};
+use api::{BorderRadius, ClipMode, HitTestFlags, HitTestItem, HitTestResult, ItemTag, PrimitiveFlags};
+use api::{PipelineId, ApiHitTester};
 use api::units::*;
-use crate::clip::{ClipItemKind, ClipStore, ClipNode, rounded_rectangle_contains_point};
-use crate::clip::polygon_contains_point;
-use crate::prim_store::PolygonKey;
-use crate::scene_builder_thread::Interners;
+use crate::clip::{ClipChainId, ClipDataStore, ClipNode, ClipItemKind, ClipStore};
+use crate::clip::{rounded_rectangle_contains_point};
 use crate::spatial_tree::{SpatialNodeIndex, SpatialTree};
-use crate::internal_types::{FastHashMap, FastHashSet, LayoutPrimitiveInfo};
-use std::ops;
+use crate::internal_types::{FastHashMap, LayoutPrimitiveInfo};
+use std::{ops, u32};
 use std::sync::{Arc, Mutex};
-use crate::util::{LayoutToWorldFastTransform, VecHelper};
+use crate::util::LayoutToWorldFastTransform;
 
 pub struct SharedHitTester {
     // We don't really need a mutex here. We could do with some sort of
@@ -45,7 +43,7 @@ impl ApiHitTester for SharedHitTester {
     fn hit_test(&self,
         pipeline_id: Option<PipelineId>,
         point: WorldPoint,
-        flags: HitTestFlags,
+        flags: HitTestFlags
     ) -> HitTestResult {
         self.get_ref().hit_test(HitTest::new(pipeline_id, point, flags))
     }
@@ -55,7 +53,7 @@ impl ApiHitTester for SharedHitTester {
 /// data from the SpatialTree that will persist as a new frame is under construction,
 /// allowing hit tests consistent with the currently rendered frame.
 #[derive(MallocSizeOf)]
-struct HitTestSpatialNode {
+pub struct HitTestSpatialNode {
     /// The pipeline id of this node.
     pipeline_id: PipelineId,
 
@@ -70,20 +68,14 @@ struct HitTestSpatialNode {
 }
 
 #[derive(MallocSizeOf)]
-struct HitTestClipNode {
+pub struct HitTestClipNode {
     /// A particular point must be inside all of these regions to be considered clipped in
     /// for the purposes of a hit test.
     region: HitTestRegion,
-    /// The positioning node for this clip
-    spatial_node_index: SpatialNodeIndex,
 }
 
 impl HitTestClipNode {
-    fn new(
-        node: ClipNode,
-        spatial_node_index: SpatialNodeIndex,
-        interners: &Interners,
-    ) -> Self {
+    fn new(node: &ClipNode) -> Self {
         let region = match node.item.kind {
             ClipItemKind::Rectangle { rect, mode } => {
                 HitTestRegion::Rectangle(rect, mode)
@@ -91,42 +83,58 @@ impl HitTestClipNode {
             ClipItemKind::RoundedRectangle { rect, radius, mode } => {
                 HitTestRegion::RoundedRectangle(rect, radius, mode)
             }
-            ClipItemKind::Image { rect, polygon_handle, .. } => {
-                if let Some(handle) = polygon_handle {
-                    // Retrieve the polygon data from the interner.
-                    let polygon = &interners.polygon[handle];
-                    HitTestRegion::Polygon(rect, *polygon)
-                } else {
-                    HitTestRegion::Rectangle(rect, ClipMode::Clip)
-                }
+            ClipItemKind::Image { rect, .. } => {
+                HitTestRegion::Rectangle(rect, ClipMode::Clip)
             }
             ClipItemKind::BoxShadow { .. } => HitTestRegion::Invalid,
         };
 
         HitTestClipNode {
             region,
-            spatial_node_index,
         }
     }
 }
 
+#[derive(Debug, Copy, Clone, MallocSizeOf, PartialEq, Eq, Hash)]
+pub struct HitTestClipChainId(u32);
+
+impl HitTestClipChainId {
+    pub const NONE: Self = HitTestClipChainId(u32::MAX);
+}
+
+/// A hit testing clip chain node is the same as a
+/// normal clip chain node, except that the clip
+/// node is embedded inside the clip chain, rather
+/// than referenced. This means we don't need to
+/// copy the complete interned clip data store for
+/// hit testing.
+#[derive(MallocSizeOf)]
+pub struct HitTestClipChainNode {
+    pub region: HitTestClipNode,
+    pub spatial_node_index: SpatialNodeIndex,
+    pub parent_clip_chain_id: HitTestClipChainId,
+}
+
+#[derive(Copy, Clone, Debug, MallocSizeOf)]
+pub struct HitTestingClipChainIndex(u32);
+
 #[derive(Clone, MallocSizeOf)]
-struct HitTestingItem {
+pub struct HitTestingItem {
     rect: LayoutRect,
     clip_rect: LayoutRect,
     tag: ItemTag,
     is_backface_visible: bool,
+    #[ignore_malloc_size_of = "simple"]
+    clip_chain_range: ops::Range<HitTestingClipChainIndex>,
     spatial_node_index: SpatialNodeIndex,
-    #[ignore_malloc_size_of = "Range"]
-    clip_nodes_range: ops::Range<ClipNodeIndex>,
 }
 
 impl HitTestingItem {
-    fn new(
+    pub fn new(
         tag: ItemTag,
         info: &LayoutPrimitiveInfo,
         spatial_node_index: SpatialNodeIndex,
-        clip_nodes_range: ops::Range<ClipNodeIndex>,
+        clip_chain_range: ops::Range<HitTestingClipChainIndex>,
     ) -> HitTestingItem {
         HitTestingItem {
             rect: info.rect,
@@ -134,7 +142,7 @@ impl HitTestingItem {
             tag,
             is_backface_visible: info.flags.contains(PrimitiveFlags::IS_BACKFACE_VISIBLE),
             spatial_node_index,
-            clip_nodes_range,
+            clip_chain_range,
         }
     }
 }
@@ -142,22 +150,19 @@ impl HitTestingItem {
 /// Statistics about allocation sizes of current hit tester,
 /// used to pre-allocate size of the next hit tester.
 pub struct HitTestingSceneStats {
-    pub clip_nodes_count: usize,
+    pub clip_chain_roots_count: usize,
     pub items_count: usize,
 }
 
 impl HitTestingSceneStats {
     pub fn empty() -> Self {
         HitTestingSceneStats {
-            clip_nodes_count: 0,
+            clip_chain_roots_count: 0,
             items_count: 0,
         }
     }
 }
 
-#[derive(MallocSizeOf, Debug, Copy, Clone)]
-pub struct ClipNodeIndex(u32);
-
 /// Defines the immutable part of a hit tester for a given scene.
 /// The hit tester is recreated each time a frame is built, since
 /// it relies on the current values of the spatial tree.
@@ -166,25 +171,11 @@ pub struct ClipNodeIndex(u32);
 /// hit tester instances via Arc.
 #[derive(MallocSizeOf)]
 pub struct HitTestingScene {
-    /// Packed array of all hit test clip nodes
-    clip_nodes: Vec<HitTestClipNode>,
+    /// The list of variable clip chain roots referenced by the items.
+    pub clip_chain_roots: Vec<HitTestClipChainId>,
 
     /// List of hit testing primitives.
-    items: Vec<HitTestingItem>,
-
-    /// Current stack of clip ids from stacking context
-    #[ignore_malloc_size_of = "ClipId"]
-    clip_id_stack: Vec<ClipId>,
-
-    /// Last cached clip id, useful for scenes with a lot
-    /// of hit-test items that reference the same clip
-    #[ignore_malloc_size_of = "simple"]
-    cached_clip_id: Option<(ClipId, ops::Range<ClipNodeIndex>)>,
-
-    /// Temporary buffer used to de-duplicate clip ids when creating hit
-    /// test clip nodes.
-    #[ignore_malloc_size_of = "ClipId"]
-    seen_clips: FastHashSet<ClipId>,
+    pub items: Vec<HitTestingItem>,
 }
 
 impl HitTestingScene {
@@ -192,104 +183,39 @@ impl HitTestingScene {
     /// provided by previous scene stats.
     pub fn new(stats: &HitTestingSceneStats) -> Self {
         HitTestingScene {
-            clip_nodes: Vec::with_capacity(stats.clip_nodes_count),
+            clip_chain_roots: Vec::with_capacity(stats.clip_chain_roots_count),
             items: Vec::with_capacity(stats.items_count),
-            clip_id_stack: Vec::with_capacity(8),
-            cached_clip_id: None,
-            seen_clips: FastHashSet::default(),
         }
     }
 
     /// Get stats about the current scene allocation sizes.
     pub fn get_stats(&self) -> HitTestingSceneStats {
         HitTestingSceneStats {
-            clip_nodes_count: self.clip_nodes.len(),
+            clip_chain_roots_count: self.clip_chain_roots.len(),
             items_count: self.items.len(),
         }
     }
 
     /// Add a hit testing primitive.
-    pub fn add_item(
-        &mut self,
-        tag: ItemTag,
-        info: &LayoutPrimitiveInfo,
-        spatial_node_index: SpatialNodeIndex,
-        clip_id: ClipId,
-        clip_store: &ClipStore,
-        interners: &Interners,
-    ) {
-        let clip_range = match self.cached_clip_id {
-            Some((cached_clip_id, ref range)) if cached_clip_id == clip_id => {
-                range.clone()
-            }
-            Some(_) | None => {
-                let start = ClipNodeIndex(self.clip_nodes.len() as u32);
-
-                // Clear the set of which clip ids have been encountered for this item
-                self.seen_clips.clear();
-
-                // Flatten all clips from the stacking context hierarchy
-                for clip_id in &self.clip_id_stack {
-                    add_clips(
-                        *clip_id,
-                        clip_store,
-                        &mut self.clip_nodes,
-                        &mut self.seen_clips,
-                        interners,
-                    );
-                }
-
-                // Add the primitive clip
-                add_clips(
-                    clip_id,
-                    clip_store,
-                    &mut self.clip_nodes,
-                    &mut self.seen_clips,
-                    interners,
-                );
-
-                let end = ClipNodeIndex(self.clip_nodes.len() as u32);
-
-                let range = ops::Range {
-                    start,
-                    end,
-                };
-
-                self.cached_clip_id = Some((clip_id, range.clone()));
-
-                range
-            }
-        };
-
-        let item = HitTestingItem::new(
-            tag,
-            info,
-            spatial_node_index,
-            clip_range,
-        );
-
+    pub fn add_item(&mut self, item: HitTestingItem) {
         self.items.push(item);
     }
 
-    /// Push a clip onto the current stack
-    pub fn push_clip(
-        &mut self,
-        clip_id: ClipId,
-    ) {
-        // Invalidate the cache since the stack may affect the produced hit test clip struct
-        self.cached_clip_id = None;
-
-        self.clip_id_stack.push(clip_id);
+    /// Add a clip chain to the clip chain roots list.
+    pub fn add_clip_chain(&mut self, clip_chain_id: ClipChainId) {
+        if clip_chain_id != ClipChainId::INVALID {
+            self.clip_chain_roots.push(HitTestClipChainId(clip_chain_id.0));
+        }
     }
 
-    /// Pop a clip from the current stack
-    pub fn pop_clip(
-        &mut self,
-    ) {
-        // Invalidate the cache since the stack may affect the produced hit test clip struct
-        self.cached_clip_id = None;
+    /// Get the slice of clip chain roots for a given hit test primitive.
+    fn get_clip_chains_for_item(&self, item: &HitTestingItem) -> &[HitTestClipChainId] {
+        &self.clip_chain_roots[item.clip_chain_range.start.0 as usize .. item.clip_chain_range.end.0 as usize]
+    }
 
-        self.clip_id_stack.pop().unwrap();
+    /// Get the next index of the clip chain roots list.
+    pub fn next_clip_chain_index(&self) -> HitTestingClipChainIndex {
+        HitTestingClipChainIndex(self.clip_chain_roots.len() as u32)
     }
 }
 
@@ -298,11 +224,10 @@ enum HitTestRegion {
     Invalid,
     Rectangle(LayoutRect, ClipMode),
     RoundedRectangle(LayoutRect, BorderRadius, ClipMode),
-    Polygon(LayoutRect, PolygonKey),
 }
 
 impl HitTestRegion {
-    fn contains(&self, point: &LayoutPoint) -> bool {
+    pub fn contains(&self, point: &LayoutPoint) -> bool {
         match *self {
             HitTestRegion::Rectangle(ref rectangle, ClipMode::Clip) =>
                 rectangle.contains(*point),
@@ -312,8 +237,6 @@ impl HitTestRegion {
                 rounded_rectangle_contains_point(point, &rect, &radii),
             HitTestRegion::RoundedRectangle(rect, radii, ClipMode::ClipOut) =>
                 !rounded_rectangle_contains_point(point, &rect, &radii),
-            HitTestRegion::Polygon(rect, polygon) =>
-                polygon_contains_point(point, &rect, &polygon),
             HitTestRegion::Invalid => true,
         }
     }
@@ -324,6 +247,7 @@ pub struct HitTester {
     #[ignore_malloc_size_of = "Arc"]
     scene: Arc<HitTestingScene>,
     spatial_nodes: Vec<HitTestSpatialNode>,
+    clip_chains: Vec<HitTestClipChainNode>,
     pipeline_root_nodes: FastHashMap<PipelineId, SpatialNodeIndex>,
 }
 
@@ -332,6 +256,7 @@ impl HitTester {
         HitTester {
             scene: Arc::new(HitTestingScene::new(&HitTestingSceneStats::empty())),
             spatial_nodes: Vec::new(),
+            clip_chains: Vec::new(),
             pipeline_root_nodes: FastHashMap::default(),
         }
     }
@@ -339,24 +264,33 @@ impl HitTester {
     pub fn new(
         scene: Arc<HitTestingScene>,
         spatial_tree: &SpatialTree,
+        clip_store: &ClipStore,
+        clip_data_store: &ClipDataStore,
     ) -> HitTester {
         let mut hit_tester = HitTester {
             scene,
             spatial_nodes: Vec::new(),
+            clip_chains: Vec::new(),
             pipeline_root_nodes: FastHashMap::default(),
         };
-        hit_tester.read_spatial_tree(spatial_tree);
+        hit_tester.read_spatial_tree(
+            spatial_tree,
+            clip_store,
+            clip_data_store,
+        );
         hit_tester
     }
 
     fn read_spatial_tree(
         &mut self,
         spatial_tree: &SpatialTree,
+        clip_store: &ClipStore,
+        clip_data_store: &ClipDataStore,
     ) {
         self.spatial_nodes.clear();
+        self.clip_chains.clear();
 
         self.spatial_nodes.reserve(spatial_tree.spatial_nodes.len());
-        self.pipeline_root_nodes.clear();
         for (index, node) in spatial_tree.spatial_nodes.iter().enumerate() {
             let index = SpatialNodeIndex::new(index);
 
@@ -379,13 +313,151 @@ impl HitTester {
                 external_scroll_offset: spatial_tree.external_scroll_offset(index),
             });
         }
+
+        // For each clip chain node, extract the clip node from the clip
+        // data store, and store it inline with the clip chain node.
+        self.clip_chains.reserve(clip_store.clip_chain_nodes.len());
+        for node in &clip_store.clip_chain_nodes {
+            let clip_node = &clip_data_store[node.handle];
+            self.clip_chains.push(HitTestClipChainNode {
+                region: HitTestClipNode::new(clip_node),
+                spatial_node_index: node.spatial_node_index,
+                parent_clip_chain_id: HitTestClipChainId(node.parent_clip_chain_id.0),
+            });
+        }
+    }
+
+    fn is_point_clipped_in_for_clip_chain(
+        &self,
+        point: WorldPoint,
+        clip_chain_id: HitTestClipChainId,
+        test: &mut HitTest
+    ) -> bool {
+        if clip_chain_id == HitTestClipChainId::NONE {
+            return true;
+        }
+
+        if let Some(result) = test.get_from_clip_chain_cache(clip_chain_id) {
+            return result == ClippedIn::ClippedIn;
+        }
+
+        let descriptor = &self.clip_chains[clip_chain_id.0 as usize];
+        let parent_clipped_in = self.is_point_clipped_in_for_clip_chain(
+            point,
+            descriptor.parent_clip_chain_id,
+            test,
+        );
+
+        if !parent_clipped_in {
+            test.set_in_clip_chain_cache(clip_chain_id, ClippedIn::NotClippedIn);
+            return false;
+        }
+
+        if !self.is_point_clipped_in_for_clip_node(
+            point,
+            clip_chain_id,
+            descriptor.spatial_node_index,
+            test,
+        ) {
+            test.set_in_clip_chain_cache(clip_chain_id, ClippedIn::NotClippedIn);
+            return false;
+        }
+
+        test.set_in_clip_chain_cache(clip_chain_id, ClippedIn::ClippedIn);
+        true
+    }
+
+    fn is_point_clipped_in_for_clip_node(
+        &self,
+        point: WorldPoint,
+        clip_chain_node_id: HitTestClipChainId,
+        spatial_node_index: SpatialNodeIndex,
+        test: &mut HitTest
+    ) -> bool {
+        if let Some(clipped_in) = test.node_cache.get(&clip_chain_node_id) {
+            return *clipped_in == ClippedIn::ClippedIn;
+        }
+
+        let node = &self.clip_chains[clip_chain_node_id.0 as usize].region;
+        let transform = self
+            .spatial_nodes[spatial_node_index.0 as usize]
+            .world_content_transform;
+        let transformed_point = match transform
+            .inverse()
+            .and_then(|inverted| inverted.transform_point2d(point))
+        {
+            Some(point) => point,
+            None => {
+                test.node_cache.insert(clip_chain_node_id, ClippedIn::NotClippedIn);
+                return false;
+            }
+        };
+
+        if !node.region.contains(&transformed_point) {
+            test.node_cache.insert(clip_chain_node_id, ClippedIn::NotClippedIn);
+            return false;
+        }
+
+        test.node_cache.insert(clip_chain_node_id, ClippedIn::ClippedIn);
+        true
     }
 
-    pub fn hit_test(&self, test: HitTest) -> HitTestResult {
+    pub fn find_node_under_point(&self, mut test: HitTest) -> Option<SpatialNodeIndex> {
         let point = test.get_absolute_point(self);
+        let mut current_spatial_node_index = SpatialNodeIndex::INVALID;
+        let mut point_in_layer = None;
 
-        let mut result = HitTestResult::default();
+        // For each hit test primitive
+        for item in self.scene.items.iter().rev() {
+            let scroll_node = &self.spatial_nodes[item.spatial_node_index.0 as usize];
+
+            // Update the cached point in layer space, if the spatial node
+            // changed since last primitive.
+            if item.spatial_node_index != current_spatial_node_index {
+                point_in_layer = scroll_node
+                    .world_content_transform
+                    .inverse()
+                    .and_then(|inverted| inverted.transform_point2d(point));
+
+                current_spatial_node_index = item.spatial_node_index;
+            }
+
+            // Only consider hit tests on transformable layers.
+            if let Some(point_in_layer) = point_in_layer {
+                // If the item's rect or clip rect don't contain this point,
+                // it's not a valid hit.
+                if !item.rect.contains(point_in_layer) {
+                    continue;
+                }
+                if !item.clip_rect.contains(point_in_layer) {
+                    continue;
+                }
+
+                // See if any of the clip chain roots for this primitive
+                // cull out the item.
+                let clip_chains = self.scene.get_clip_chains_for_item(item);
+                let mut is_valid = true;
+                for clip_chain_id in clip_chains {
+                    if !self.is_point_clipped_in_for_clip_chain(point, *clip_chain_id, &mut test) {
+                        is_valid = false;
+                        break;
+                    }
+                }
+
+                // Found a valid hit test result!
+                if is_valid {
+                    return Some(item.spatial_node_index);
+                }
+            }
+        }
 
+        None
+    }
+
+    pub fn hit_test(&self, mut test: HitTest) -> HitTestResult {
+        let point = test.get_absolute_point(self);
+
+        let mut result = HitTestResult::default();
         let mut current_spatial_node_index = SpatialNodeIndex::INVALID;
         let mut point_in_layer = None;
         let mut current_root_spatial_node_index = SpatialNodeIndex::INVALID;
@@ -421,23 +493,12 @@ impl HitTester {
                     continue;
                 }
 
-                // See if any of the clips for this primitive cull out the item.
+                // See if any of the clip chain roots for this primitive
+                // cull out the item.
+                let clip_chains = self.scene.get_clip_chains_for_item(item);
                 let mut is_valid = true;
-                let clip_nodes = &self.scene.clip_nodes[item.clip_nodes_range.start.0 as usize .. item.clip_nodes_range.end.0 as usize];
-                for clip_node in clip_nodes {
-                    let transform = self
-                        .spatial_nodes[clip_node.spatial_node_index.0 as usize]
-                        .world_content_transform;
-                    let transformed_point = match transform
-                        .inverse()
-                        .and_then(|inverted| inverted.transform_point2d(point))
-                    {
-                        Some(point) => point,
-                        None => {
-                            continue;
-                        }
-                    };
-                    if !clip_node.region.contains(&transformed_point) {
+                for clip_chain_id in clip_chains {
+                    if !self.is_point_clipped_in_for_clip_chain(point, *clip_chain_id, &mut test) {
                         is_valid = false;
                         break;
                     }
@@ -486,10 +547,15 @@ impl HitTester {
         result
     }
 
-    fn get_pipeline_root(&self, pipeline_id: PipelineId) -> &HitTestSpatialNode {
+    pub fn get_pipeline_root(&self, pipeline_id: PipelineId) -> &HitTestSpatialNode {
         &self.spatial_nodes[self.pipeline_root_nodes[&pipeline_id].0 as usize]
     }
+}
 
+#[derive(Clone, Copy, MallocSizeOf, PartialEq)]
+enum ClippedIn {
+    ClippedIn,
+    NotClippedIn,
 }
 
 #[derive(MallocSizeOf)]
@@ -497,6 +563,8 @@ pub struct HitTest {
     pipeline_id: Option<PipelineId>,
     point: WorldPoint,
     flags: HitTestFlags,
+    node_cache: FastHashMap<HitTestClipChainId, ClippedIn>,
+    clip_chain_cache: Vec<Option<ClippedIn>>,
 }
 
 impl HitTest {
@@ -508,8 +576,27 @@ impl HitTest {
         HitTest {
             pipeline_id,
             point,
-            flags
+            flags,
+            node_cache: FastHashMap::default(),
+            clip_chain_cache: Vec::new(),
+        }
+    }
+
+    fn get_from_clip_chain_cache(&mut self, index: HitTestClipChainId) -> Option<ClippedIn> {
+        let index = index.0 as usize;
+        if index >= self.clip_chain_cache.len() {
+            None
+        } else {
+            self.clip_chain_cache[index]
+        }
+    }
+
+    fn set_in_clip_chain_cache(&mut self, index: HitTestClipChainId, value: ClippedIn) {
+        let index = index.0 as usize;
+        if index >= self.clip_chain_cache.len() {
+            self.clip_chain_cache.resize(index + 1, None);
         }
+        self.clip_chain_cache[index] = Some(value);
     }
 
     fn get_absolute_point(&self, hit_tester: &HitTester) -> WorldPoint {
@@ -529,45 +616,4 @@ impl HitTest {
                 WorldPoint::new(self.point.x, self.point.y)
             })
     }
-
-}
-
-/// Collect clips for a given ClipId, convert and add them to the hit testing
-/// scene, if not already present.
-fn add_clips(
-    clip_id: ClipId,
-    clip_store: &ClipStore,
-    clip_nodes: &mut Vec<HitTestClipNode>,
-    seen_clips: &mut FastHashSet<ClipId>,
-    interners: &Interners,
-) {
-    // If this clip-id has already been added to this hit-test item, skip it
-    if seen_clips.contains(&clip_id) {
-        return;
-    }
-    seen_clips.insert(clip_id);
-
-    let template = &clip_store.templates[&clip_id];
-    let instances = &clip_store.instances[template.clips.start as usize .. template.clips.end as usize];
-
-    for clip in instances {
-        clip_nodes.alloc().init(
-            HitTestClipNode::new(
-                clip.key.into(),
-                clip.clip.spatial_node_index,
-                interners,
-            )
-        );
-    }
-
-    // The ClipId parenting is terminated when we reach the root ClipId
-    if clip_id != template.parent {
-        add_clips(
-            template.parent,
-            clip_store,
-            clip_nodes,
-            seen_clips,
-            interners,
-        );
-    }
 }
diff --git a/third_party/webrender/webrender/src/host_utils.rs b/third_party/webrender/webrender/src/host_utils.rs
deleted file mode 100644
index 675680f766c..00000000000
--- a/third_party/webrender/webrender/src/host_utils.rs
+++ /dev/null
@@ -1,26 +0,0 @@
-#[cfg(feature = "gecko")]
-mod utils {
-    use std::ffi::CString;
-    extern "C" {
-        fn gecko_profiler_register_thread(name: *const ::std::os::raw::c_char);
-        fn gecko_profiler_unregister_thread();
-    }
-    pub fn thread_started(thread_name: &str) {
-        let name = CString::new(thread_name).unwrap();
-        unsafe {
-            // gecko_profiler_register_thread copies the passed name here.
-            gecko_profiler_register_thread(name.as_ptr());
-        }
-    }
-    pub fn thread_stopped() {
-        unsafe { gecko_profiler_unregister_thread(); }
-    }
-}
-
-#[cfg(not(feature = "gecko"))]
-mod utils {
-    pub fn thread_started(_: &str) { }
-    pub fn thread_stopped() { }
-}
-
-pub use utils::*;
diff --git a/third_party/webrender/webrender/src/image_source.rs b/third_party/webrender/webrender/src/image_source.rs
deleted file mode 100644
index 6b6533494a4..00000000000
--- a/third_party/webrender/webrender/src/image_source.rs
+++ /dev/null
@@ -1,94 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-//! This module contains the logic to obtain a primitive's source texture and uv rect.
-//!
-//! Currently this is a somewhat involved process because the code grew into having ad-hoc
-//! ways to store this information depending on how the image data is produced. The goal
-//! is for any textured primitive to be able to read from any source (texture cache, render
-//! tasks, etc.) without primitive-specific code.
-
-use crate::api::ExternalImageType;
-use crate::api::units::*;
-use crate::gpu_cache::GpuCache;
-use crate::prim_store::DeferredResolve;
-use crate::renderer::BLOCKS_PER_UV_RECT;
-use crate::render_task_cache::RenderTaskCacheEntryHandle;
-use crate::resource_cache::{ResourceCache, ImageRequest, CacheItem};
-use crate::internal_types::{TextureSource, DeferredResolveIndex};
-
-/// Resolve a resource cache's imagre request into a texture cache item.
-pub fn resolve_image(
-    request: ImageRequest,
-    resource_cache: &ResourceCache,
-    gpu_cache: &mut GpuCache,
-    deferred_resolves: &mut Vec<DeferredResolve>,
-) -> CacheItem {
-    match resource_cache.get_image_properties(request.key) {
-        Some(image_properties) => {
-            // Check if an external image that needs to be resolved
-            // by the render thread.
-            match image_properties.external_image {
-                Some(external_image) => {
-                    // This is an external texture - we will add it to
-                    // the deferred resolves list to be patched by
-                    // the render thread...
-                    let cache_handle = gpu_cache.push_deferred_per_frame_blocks(BLOCKS_PER_UV_RECT);
-
-                    let deferred_resolve_index = DeferredResolveIndex(deferred_resolves.len() as u32);
-
-                    let image_buffer_kind = match external_image.image_type {
-                        ExternalImageType::TextureHandle(target) => {
-                            target
-                        }
-                        ExternalImageType::Buffer => {
-                            // The ExternalImageType::Buffer should be handled by resource_cache.
-                            // It should go through the non-external case.
-                            panic!("Unexpected non-texture handle type");
-                        }
-                    };
-
-                    let cache_item = CacheItem {
-                        texture_id: TextureSource::External(deferred_resolve_index, image_buffer_kind),
-                        uv_rect_handle: cache_handle,
-                        uv_rect: DeviceIntRect::new(
-                            DeviceIntPoint::zero(),
-                            image_properties.descriptor.size,
-                        ),
-                        user_data: [0.0; 4],
-                    };
-
-                    deferred_resolves.push(DeferredResolve {
-                        image_properties,
-                        address: gpu_cache.get_address(&cache_handle),
-                        rendering: request.rendering,
-                    });
-
-                    cache_item
-                }
-                None => {
-                    if let Ok(cache_item) = resource_cache.get_cached_image(request) {
-                        cache_item
-                    } else {
-                        // There is no usable texture entry for the image key. Just return an invalid texture here.
-                        CacheItem::invalid()
-                    }
-                }
-            }
-        }
-        None => {
-            CacheItem::invalid()
-        }
-    }
-}
-
-pub fn resolve_cached_render_task(
-    handle: &RenderTaskCacheEntryHandle,
-    resource_cache: &ResourceCache,
-) -> CacheItem {
-    let rt_cache_entry = resource_cache
-        .get_cached_render_task(&handle);
-
-    resource_cache.get_texture_cache_item(&rt_cache_entry.handle)
-}
diff --git a/third_party/webrender/webrender/src/image_tiling.rs b/third_party/webrender/webrender/src/image_tiling.rs
deleted file mode 100644
index 0d003ccfef9..00000000000
--- a/third_party/webrender/webrender/src/image_tiling.rs
+++ /dev/null
@@ -1,816 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-use crate::api::TileSize;
-use crate::api::units::*;
-use crate::segment::EdgeAaSegmentMask;
-use euclid::{point2, size2};
-use std::i32;
-use std::ops::Range;
-
-/// If repetitions are far enough apart that only one is within
-/// the primitive rect, then we can simplify the parameters and
-/// treat the primitive as not repeated.
-/// This can let us avoid unnecessary work later to handle some
-/// of the parameters.
-pub fn simplify_repeated_primitive(
-    stretch_size: &LayoutSize,
-    tile_spacing: &mut LayoutSize,
-    prim_rect: &mut LayoutRect,
-) {
-    let stride = *stretch_size + *tile_spacing;
-
-    if stride.width >= prim_rect.size.width {
-        tile_spacing.width = 0.0;
-        prim_rect.size.width = f32::min(prim_rect.size.width, stretch_size.width);
-    }
-    if stride.height >= prim_rect.size.height {
-        tile_spacing.height = 0.0;
-        prim_rect.size.height = f32::min(prim_rect.size.height, stretch_size.height);
-    }
-}
-
-pub struct Repetition {
-    pub origin: LayoutPoint,
-    pub edge_flags: EdgeAaSegmentMask,
-}
-
-pub struct RepetitionIterator {
-    current_x: i32,
-    x_count: i32,
-    current_y: i32,
-    y_count: i32,
-    row_flags: EdgeAaSegmentMask,
-    current_origin: LayoutPoint,
-    initial_origin: LayoutPoint,
-    stride: LayoutSize,
-}
-
-impl Iterator for RepetitionIterator {
-    type Item = Repetition;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        if self.current_x == self.x_count {
-            self.current_y += 1;
-            if self.current_y >= self.y_count {
-                return None;
-            }
-            self.current_x = 0;
-
-            self.row_flags = EdgeAaSegmentMask::empty();
-            if self.current_y == self.y_count - 1 {
-                self.row_flags |= EdgeAaSegmentMask::BOTTOM;
-            }
-
-            self.current_origin.x = self.initial_origin.x;
-            self.current_origin.y += self.stride.height;
-        }
-
-        let mut edge_flags = self.row_flags;
-        if self.current_x == 0 {
-            edge_flags |= EdgeAaSegmentMask::LEFT;
-        }
-
-        if self.current_x == self.x_count - 1 {
-            edge_flags |= EdgeAaSegmentMask::RIGHT;
-        }
-
-        let repetition = Repetition {
-            origin: self.current_origin,
-            edge_flags,
-        };
-
-        self.current_origin.x += self.stride.width;
-        self.current_x += 1;
-
-        Some(repetition)
-    }
-}
-
-pub fn repetitions(
-    prim_rect: &LayoutRect,
-    visible_rect: &LayoutRect,
-    stride: LayoutSize,
-) -> RepetitionIterator {
-    let visible_rect = match prim_rect.intersection(&visible_rect) {
-        Some(rect) => rect,
-        None => {
-            return RepetitionIterator {
-                current_origin: LayoutPoint::zero(),
-                initial_origin: LayoutPoint::zero(),
-                current_x: 0,
-                current_y: 0,
-                x_count: 0,
-                y_count: 0,
-                stride,
-                row_flags: EdgeAaSegmentMask::empty(),
-            }
-        }
-    };
-
-    assert!(stride.width > 0.0);
-    assert!(stride.height > 0.0);
-
-    let nx = if visible_rect.origin.x > prim_rect.origin.x {
-        f32::floor((visible_rect.origin.x - prim_rect.origin.x) / stride.width)
-    } else {
-        0.0
-    };
-
-    let ny = if visible_rect.origin.y > prim_rect.origin.y {
-        f32::floor((visible_rect.origin.y - prim_rect.origin.y) / stride.height)
-    } else {
-        0.0
-    };
-
-    let x0 = prim_rect.origin.x + nx * stride.width;
-    let y0 = prim_rect.origin.y + ny * stride.height;
-
-    let x_most = visible_rect.max_x();
-    let y_most = visible_rect.max_y();
-
-    let x_count = f32::ceil((x_most - x0) / stride.width) as i32;
-    let y_count = f32::ceil((y_most - y0) / stride.height) as i32;
-
-    let mut row_flags = EdgeAaSegmentMask::TOP;
-    if y_count == 1 {
-        row_flags |= EdgeAaSegmentMask::BOTTOM;
-    }
-
-    RepetitionIterator {
-        current_origin: LayoutPoint::new(x0, y0),
-        initial_origin: LayoutPoint::new(x0, y0),
-        current_x: 0,
-        current_y: 0,
-        x_count,
-        y_count,
-        row_flags,
-        stride,
-    }
-}
-
-#[derive(Debug)]
-pub struct Tile {
-    pub rect: LayoutRect,
-    pub offset: TileOffset,
-    pub edge_flags: EdgeAaSegmentMask,
-}
-
-#[derive(Debug)]
-pub struct TileIteratorExtent {
-    /// Range of visible tiles to iterate over in number of tiles.
-    tile_range: Range<i32>,
-    /// Range of tiles of the full image including tiles that are culled out.
-    image_tiles: Range<i32>,
-    /// Size of the first tile in layout space.
-    first_tile_layout_size: f32,
-    /// Size of the last tile in layout space.
-    last_tile_layout_size: f32,
-    /// Position of blob point (0, 0) in layout space.
-    layout_tiling_origin: f32,
-    /// Position of the top-left corner of the primitive rect in layout space.
-    layout_prim_start: f32,
-}
-
-#[derive(Debug)]
-pub struct TileIterator {
-    current_tile: TileOffset,
-    x: TileIteratorExtent,
-    y: TileIteratorExtent,
-    regular_tile_size: LayoutSize,
-}
-
-impl Iterator for TileIterator {
-    type Item = Tile;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        // If we reach the end of a row, reset to the beginning of the next row.
-        if self.current_tile.x >= self.x.tile_range.end {
-            self.current_tile.y += 1;
-            self.current_tile.x = self.x.tile_range.start;
-        }
-
-        // Stop iterating if we reach the last tile. We may start here if there
-        // were no tiles to iterate over.
-        if self.current_tile.x >= self.x.tile_range.end || self.current_tile.y >= self.y.tile_range.end {
-            return None;
-        }
-
-        let tile_offset = self.current_tile;
-
-        let mut segment_rect = LayoutRect {
-            origin: LayoutPoint::new(
-                self.x.layout_tiling_origin + tile_offset.x as f32 * self.regular_tile_size.width,
-                self.y.layout_tiling_origin + tile_offset.y as f32 * self.regular_tile_size.height,
-            ),
-            size: self.regular_tile_size,
-        };
-
-        let mut edge_flags = EdgeAaSegmentMask::empty();
-
-        if tile_offset.x == self.x.image_tiles.start {
-            edge_flags |= EdgeAaSegmentMask::LEFT;
-            segment_rect.size.width = self.x.first_tile_layout_size;
-            segment_rect.origin.x = self.x.layout_prim_start;
-        }
-        if tile_offset.x == self.x.image_tiles.end - 1 {
-            edge_flags |= EdgeAaSegmentMask::RIGHT;
-            segment_rect.size.width = self.x.last_tile_layout_size;
-        }
-
-        if tile_offset.y == self.y.image_tiles.start {
-            segment_rect.size.height = self.y.first_tile_layout_size;
-            segment_rect.origin.y = self.y.layout_prim_start;
-            edge_flags |= EdgeAaSegmentMask::TOP;
-        }
-        if tile_offset.y == self.y.image_tiles.end - 1 {
-            segment_rect.size.height = self.y.last_tile_layout_size;
-            edge_flags |= EdgeAaSegmentMask::BOTTOM;
-        }
-
-        assert!(tile_offset.y < self.y.tile_range.end);
-        let tile = Tile {
-            rect: segment_rect,
-            offset: tile_offset,
-            edge_flags,
-        };
-
-        self.current_tile.x += 1;
-
-        Some(tile)
-    }
-}
-
-pub fn tiles(
-    prim_rect: &LayoutRect,
-    visible_rect: &LayoutRect,
-    image_rect: &DeviceIntRect,
-    device_tile_size: i32,
-) -> TileIterator {
-    // The image resource is tiled. We have to generate an image primitive
-    // for each tile.
-    // We need to do this because the image is broken up into smaller tiles in the texture
-    // cache and the image shader is not able to work with this type of sparse representation.
-
-    // The tiling logic works as follows:
-    //
-    //  +-#################-+  -+
-    //  | #//|    |    |//# |   | image size
-    //  | #//|    |    |//# |   |
-    //  +-#--+----+----+--#-+   |  -+
-    //  | #//|    |    |//# |   |   | regular tile size
-    //  | #//|    |    |//# |   |   |
-    //  +-#--+----+----+--#-+   |  -+-+
-    //  | #//|////|////|//# |   |     | "leftover" height
-    //  | ################# |  -+  ---+
-    //  +----+----+----+----+
-    //
-    // In the ascii diagram above, a large image is split into tiles of almost regular size.
-    // The tiles on the edges (hatched in the diagram) can be smaller than the regular tiles
-    // and are handled separately in the code (we'll call them boundary tiles).
-    //
-    // Each generated segment corresponds to a tile in the texture cache, with the
-    // assumption that the boundary tiles are sized to fit their own irregular size in the
-    // texture cache.
-    //
-    // Because we can have very large virtual images we iterate over the visible portion of
-    // the image in layer space instead of iterating over all device tiles.
-
-    let visible_rect = match prim_rect.intersection(&visible_rect) {
-        Some(rect) => rect,
-        None => {
-            return TileIterator {
-                current_tile: TileOffset::zero(),
-                x: TileIteratorExtent {
-                    tile_range: 0..0,
-                    image_tiles: 0..0,
-                    first_tile_layout_size: 0.0,
-                    last_tile_layout_size: 0.0,
-                    layout_tiling_origin: 0.0,
-                    layout_prim_start: prim_rect.origin.x,
-                },
-                y: TileIteratorExtent {
-                    tile_range: 0..0,
-                    image_tiles: 0..0,
-                    first_tile_layout_size: 0.0,
-                    last_tile_layout_size: 0.0,
-                    layout_tiling_origin: 0.0,
-                    layout_prim_start: prim_rect.origin.y,
-                },
-                regular_tile_size: LayoutSize::zero(),
-            }
-        }
-    };
-
-    // Size of regular tiles in layout space.
-    let layout_tile_size = LayoutSize::new(
-        device_tile_size as f32 / image_rect.size.width as f32 * prim_rect.size.width,
-        device_tile_size as f32 / image_rect.size.height as f32 * prim_rect.size.height,
-    );
-
-    // The decomposition logic is exactly the same on each axis so we reduce
-    // this to a 1-dimensional problem in an attempt to make the code simpler.
-
-    let x_extent = tiles_1d(
-        layout_tile_size.width,
-        visible_rect.x_range(),
-        prim_rect.min_x(),
-        image_rect.x_range(),
-        device_tile_size,
-    );
-
-    let y_extent = tiles_1d(
-        layout_tile_size.height,
-        visible_rect.y_range(),
-        prim_rect.min_y(),
-        image_rect.y_range(),
-        device_tile_size,
-    );
-
-    TileIterator {
-        current_tile: point2(
-            x_extent.tile_range.start,
-            y_extent.tile_range.start,
-        ),
-        x: x_extent,
-        y: y_extent,
-        regular_tile_size: layout_tile_size,
-    }
-}
-
-/// Decompose tiles along an arbitrary axis.
-///
-/// This does most of the heavy lifting needed for `tiles` but in a single dimension for
-/// the sake of simplicity since the problem is independent on the x and y axes.
-fn tiles_1d(
-    layout_tile_size: f32,
-    layout_visible_range: Range<f32>,
-    layout_prim_start: f32,
-    device_image_range: Range<i32>,
-    device_tile_size: i32,
-) -> TileIteratorExtent {
-    // A few sanity checks.
-    debug_assert!(layout_tile_size > 0.0);
-    debug_assert!(layout_visible_range.end >= layout_visible_range.start);
-    debug_assert!(device_image_range.end > device_image_range.start);
-    debug_assert!(device_tile_size > 0);
-
-    // Sizes of the boundary tiles in pixels.
-    let first_tile_device_size = first_tile_size_1d(&device_image_range, device_tile_size);
-    let last_tile_device_size = last_tile_size_1d(&device_image_range, device_tile_size);
-
-    // [start..end[ Range of tiles of this row/column (in number of tiles) without
-    // taking culling into account.
-    let image_tiles = tile_range_1d(&device_image_range, device_tile_size);
-
-    // Layout offset of tile (0, 0) with respect to the top-left corner of the display item.
-    let layout_offset = device_image_range.start as f32 * layout_tile_size / device_tile_size as f32;
-    // Position in layout space of tile (0, 0).
-    let layout_tiling_origin = layout_prim_start - layout_offset;
-
-    // [start..end[ Range of the visible tiles (because of culling).
-    let visible_tiles_start = f32::floor((layout_visible_range.start - layout_tiling_origin) / layout_tile_size) as i32;
-    let visible_tiles_end = f32::ceil((layout_visible_range.end - layout_tiling_origin) / layout_tile_size) as i32;
-
-    // Combine the above two to get the tiles in the image that are visible this frame.
-    let mut tiles_start = i32::max(image_tiles.start, visible_tiles_start);
-    let tiles_end = i32::min(image_tiles.end, visible_tiles_end);
-    if tiles_start > tiles_end {
-        tiles_start = tiles_end;
-    }
-
-    // The size in layout space of the boundary tiles.
-    let first_tile_layout_size = if tiles_start == image_tiles.start {
-        first_tile_device_size as f32 * layout_tile_size / device_tile_size as f32
-    } else {
-        // boundary tile was culled out, so the new first tile is a regularly sized tile.
-        layout_tile_size
-    };
-
-    // Same here.
-    let last_tile_layout_size = if tiles_end == image_tiles.end {
-        last_tile_device_size as f32 * layout_tile_size / device_tile_size as f32
-    } else {
-        layout_tile_size
-    };
-
-    TileIteratorExtent {
-        tile_range: tiles_start..tiles_end,
-        image_tiles,
-        first_tile_layout_size,
-        last_tile_layout_size,
-        layout_tiling_origin,
-        layout_prim_start,
-    }
-}
-
-/// Compute the range of tiles (in number of tiles) that intersect the provided
-/// image range (in pixels) in an arbitrary dimension.
-///
-/// ```ignore
-///
-///         0
-///         :
-///   #-+---+---+---+---+---+--#
-///   # |   |   |   |   |   |  #
-///   #-+---+---+---+---+---+--#
-/// ^       :                   ^
-///
-///  +------------------------+  image_range
-///        +---+  regular_tile_size
-///
-/// ```
-fn tile_range_1d(
-    image_range: &Range<i32>,
-    regular_tile_size: i32,
-) -> Range<i32> {
-    // Integer division truncates towards zero so with negative values if the first/last
-    // tile isn't a full tile we can get offset by one which we account for here.
-
-    let mut start = image_range.start / regular_tile_size;
-    if image_range.start % regular_tile_size < 0 {
-        start -= 1;
-    }
-
-    let mut end = image_range.end / regular_tile_size;
-    if image_range.end % regular_tile_size > 0 {
-        end += 1;
-    }
-
-    start..end
-}
-
-// Sizes of the first boundary tile in pixels.
-//
-// It can be smaller than the regular tile size if the image is not a multiple
-// of the regular tile size.
-fn first_tile_size_1d(
-    image_range: &Range<i32>,
-    regular_tile_size: i32,
-) -> i32 {
-    // We have to account for how the % operation behaves for negative values.
-    let image_size = image_range.end - image_range.start;
-    i32::min(
-        match image_range.start % regular_tile_size {
-            //             .      #------+------+      .
-            //             .      #//////|      |      .
-            0 => regular_tile_size,
-            //   (zero) -> 0      .   #--+------+      .
-            //             .      .   #//|      |      .
-            // %(m):                  ~~>
-            m if m > 0 => regular_tile_size - m,
-            //             .      .   #--+------+      0 <- (zero)
-            //             .      .   #//|      |      .
-            // %(m):                  <~~
-            m => -m,
-        },
-        image_size
-    )
-}
-
-// Sizes of the last boundary tile in pixels.
-//
-// It can be smaller than the regular tile size if the image is not a multiple
-// of the regular tile size.
-fn last_tile_size_1d(
-    image_range: &Range<i32>,
-    regular_tile_size: i32,
-) -> i32 {
-    // We have to account for how the modulo operation behaves for negative values.
-    let image_size = image_range.end - image_range.start;
-    i32::min(
-        match image_range.end % regular_tile_size {
-            //                    +------+------#      .
-            // tiles:      .      |      |//////#      .
-            0 => regular_tile_size,
-            //             .      +------+--#   .      0 <- (zero)
-            //             .      |      |//#   .      .
-            // modulo (m):                   <~~
-            m if m < 0 => regular_tile_size + m,
-            //   (zero) -> 0      +------+--#   .      .
-            //             .      |      |//#   .      .
-            // modulo (m):                ~~>
-            m => m,
-        },
-        image_size,
-    )
-}
-
-pub fn compute_tile_rect(
-    image_rect: &DeviceIntRect,
-    regular_tile_size: TileSize,
-    tile: TileOffset,
-) -> DeviceIntRect {
-    let regular_tile_size = regular_tile_size as i32;
-    DeviceIntRect {
-        origin: point2(
-            compute_tile_origin_1d(image_rect.x_range(), regular_tile_size, tile.x as i32),
-            compute_tile_origin_1d(image_rect.y_range(), regular_tile_size, tile.y as i32),
-        ),
-        size: size2(
-            compute_tile_size_1d(image_rect.x_range(), regular_tile_size, tile.x as i32),
-            compute_tile_size_1d(image_rect.y_range(), regular_tile_size, tile.y as i32),
-        ),
-    }
-}
-
-fn compute_tile_origin_1d(
-    img_range: Range<i32>,
-    regular_tile_size: i32,
-    tile_offset: i32,
-) -> i32 {
-    let tile_range = tile_range_1d(&img_range, regular_tile_size);
-    if tile_offset == tile_range.start {
-        img_range.start
-    } else {
-        tile_offset * regular_tile_size
-    }
-}
-
-// Compute the width and height in pixels of a tile depending on its position in the image.
-pub fn compute_tile_size(
-    image_rect: &DeviceIntRect,
-    regular_tile_size: TileSize,
-    tile: TileOffset,
-) -> DeviceIntSize {
-    let regular_tile_size = regular_tile_size as i32;
-    size2(
-        compute_tile_size_1d(image_rect.x_range(), regular_tile_size, tile.x as i32),
-        compute_tile_size_1d(image_rect.y_range(), regular_tile_size, tile.y as i32),
-    )
-}
-
-fn compute_tile_size_1d(
-    img_range: Range<i32>,
-    regular_tile_size: i32,
-    tile_offset: i32,
-) -> i32 {
-    let tile_range = tile_range_1d(&img_range, regular_tile_size);
-
-    // Most tiles are going to have base_size as width and height,
-    // except for tiles around the edges that are shrunk to fit the image data.
-    let actual_size = if tile_offset == tile_range.start {
-        first_tile_size_1d(&img_range, regular_tile_size)
-    } else if tile_offset == tile_range.end - 1 {
-        last_tile_size_1d(&img_range, regular_tile_size)
-    } else {
-        regular_tile_size
-    };
-
-    assert!(actual_size > 0);
-
-    actual_size
-}
-
-pub fn compute_tile_range(
-    visible_area: &DeviceIntRect,
-    tile_size: u16,
-) -> TileRange {
-    let tile_size = tile_size as i32;
-    let x_range = tile_range_1d(&visible_area.x_range(), tile_size);
-    let y_range = tile_range_1d(&visible_area.y_range(), tile_size);
-
-    TileRange {
-        origin: point2(x_range.start, y_range.start),
-        size: size2(x_range.end - x_range.start, y_range.end - y_range.start),
-    }
-}
-
-pub fn for_each_tile_in_range(
-    range: &TileRange,
-    mut callback: impl FnMut(TileOffset),
-) {
-    for y in range.y_range() {
-        for x in range.x_range() {
-            callback(point2(x, y));
-        }
-    }
-}
-
-pub fn compute_valid_tiles_if_bounds_change(
-    prev_rect: &DeviceIntRect,
-    new_rect: &DeviceIntRect,
-    tile_size: u16,
-) -> Option<TileRange> {
-    let intersection = match prev_rect.intersection(new_rect) {
-        Some(rect) => rect,
-        None => {
-            return Some(TileRange::zero());
-        }
-    };
-
-    let left = prev_rect.min_x() != new_rect.min_x();
-    let right = prev_rect.max_x() != new_rect.max_x();
-    let top = prev_rect.min_y() != new_rect.min_y();
-    let bottom = prev_rect.max_y() != new_rect.max_y();
-
-    if !left && !right && !top && !bottom {
-        // Bounds have not changed.
-        return None;
-    }
-
-    let tw = 1.0 / (tile_size as f32);
-    let th = 1.0 / (tile_size as f32);
-
-    let tiles = intersection
-        .cast::<f32>()
-        .scale(tw, th);
-
-    let min_x = if left { f32::ceil(tiles.min_x()) } else { f32::floor(tiles.min_x()) };
-    let min_y = if top { f32::ceil(tiles.min_y()) } else { f32::floor(tiles.min_y()) };
-    let max_x = if right { f32::floor(tiles.max_x()) } else { f32::ceil(tiles.max_x()) };
-    let max_y = if bottom { f32::floor(tiles.max_y()) } else { f32::ceil(tiles.max_y()) };
-
-    Some(TileRange {
-        origin: point2(min_x as i32, min_y as i32),
-        size: size2((max_x - min_x) as i32, (max_y - min_y) as i32),
-    })
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use std::collections::HashSet;
-    use euclid::rect;
-
-    // this checks some additional invariants
-    fn checked_for_each_tile(
-        prim_rect: &LayoutRect,
-        visible_rect: &LayoutRect,
-        device_image_rect: &DeviceIntRect,
-        device_tile_size: i32,
-        callback: &mut dyn FnMut(&LayoutRect, TileOffset, EdgeAaSegmentMask),
-    ) {
-        let mut coverage = LayoutRect::zero();
-        let mut seen_tiles = HashSet::new();
-        for tile in tiles(
-            prim_rect,
-            visible_rect,
-            device_image_rect,
-            device_tile_size,
-        ) {
-            // make sure we don't get sent duplicate tiles
-            assert!(!seen_tiles.contains(&tile.offset));
-            seen_tiles.insert(tile.offset);
-            coverage = coverage.union(&tile.rect);
-            assert!(prim_rect.contains_rect(&tile.rect));
-            callback(&tile.rect, tile.offset, tile.edge_flags);
-        }
-        assert!(prim_rect.contains_rect(&coverage));
-        assert!(coverage.contains_rect(&visible_rect.intersection(&prim_rect).unwrap_or(LayoutRect::zero())));
-    }
-
-    #[test]
-    fn basic() {
-        let mut count = 0;
-        checked_for_each_tile(&rect(0., 0., 1000., 1000.),
-            &rect(75., 75., 400., 400.),
-            &rect(0, 0, 400, 400),
-            36,
-            &mut |_tile_rect, _tile_offset, _tile_flags| {
-                count += 1;
-            },
-        );
-        assert_eq!(count, 36);
-    }
-
-    #[test]
-    fn empty() {
-        let mut count = 0;
-        checked_for_each_tile(&rect(0., 0., 74., 74.),
-            &rect(75., 75., 400., 400.),
-            &rect(0, 0, 400, 400),
-            36,
-            &mut |_tile_rect, _tile_offset, _tile_flags| {
-              count += 1;
-            },
-        );
-        assert_eq!(count, 0);
-    }
-
-    #[test]
-    fn test_tiles_1d() {
-        // Exactly one full tile at positive offset.
-        let result = tiles_1d(64.0, -10000.0..10000.0, 0.0, 0..64, 64);
-        assert_eq!(result.tile_range.start, 0);
-        assert_eq!(result.tile_range.end, 1);
-        assert_eq!(result.first_tile_layout_size, 64.0);
-        assert_eq!(result.last_tile_layout_size, 64.0);
-
-        // Exactly one full tile at negative offset.
-        let result = tiles_1d(64.0, -10000.0..10000.0, -64.0, -64..0, 64);
-        assert_eq!(result.tile_range.start, -1);
-        assert_eq!(result.tile_range.end, 0);
-        assert_eq!(result.first_tile_layout_size, 64.0);
-        assert_eq!(result.last_tile_layout_size, 64.0);
-
-        // Two full tiles at negative and positive offsets.
-        let result = tiles_1d(64.0, -10000.0..10000.0, -64.0, -64..64, 64);
-        assert_eq!(result.tile_range.start, -1);
-        assert_eq!(result.tile_range.end, 1);
-        assert_eq!(result.first_tile_layout_size, 64.0);
-        assert_eq!(result.last_tile_layout_size, 64.0);
-
-        // One partial tile at positive offset, non-zero origin, culled out.
-        let result = tiles_1d(64.0, -100.0..10.0, 64.0, 64..310, 64);
-        assert_eq!(result.tile_range.start, result.tile_range.end);
-
-        // Two tiles at negative and positive offsets, one of which is culled out.
-        // The remaining tile is partially culled but it should still generate a full tile.
-        let result = tiles_1d(64.0, 10.0..10000.0, -64.0, -64..64, 64);
-        assert_eq!(result.tile_range.start, 0);
-        assert_eq!(result.tile_range.end, 1);
-        assert_eq!(result.first_tile_layout_size, 64.0);
-        assert_eq!(result.last_tile_layout_size, 64.0);
-        let result = tiles_1d(64.0, -10000.0..-10.0, -64.0, -64..64, 64);
-        assert_eq!(result.tile_range.start, -1);
-        assert_eq!(result.tile_range.end, 0);
-        assert_eq!(result.first_tile_layout_size, 64.0);
-        assert_eq!(result.last_tile_layout_size, 64.0);
-
-        // Stretched tile in layout space device tile size is 64 and layout tile size is 128.
-        // So the resulting tile sizes in layout space should be multiplied by two.
-        let result = tiles_1d(128.0, -10000.0..10000.0, -64.0, -64..32, 64);
-        assert_eq!(result.tile_range.start, -1);
-        assert_eq!(result.tile_range.end, 1);
-        assert_eq!(result.first_tile_layout_size, 128.0);
-        assert_eq!(result.last_tile_layout_size, 64.0);
-
-        // Two visible tiles (the rest is culled out).
-        let result = tiles_1d(10.0, 0.0..20.0, 0.0, 0..64, 64);
-        assert_eq!(result.tile_range.start, 0);
-        assert_eq!(result.tile_range.end, 1);
-        assert_eq!(result.first_tile_layout_size, 10.0);
-        assert_eq!(result.last_tile_layout_size, 10.0);
-
-        // Two visible tiles at negative layout offsets (the rest is culled out).
-        let result = tiles_1d(10.0, -20.0..0.0, -20.0, 0..64, 64);
-        assert_eq!(result.tile_range.start, 0);
-        assert_eq!(result.tile_range.end, 1);
-        assert_eq!(result.first_tile_layout_size, 10.0);
-        assert_eq!(result.last_tile_layout_size, 10.0);
-    }
-
-    #[test]
-    fn test_tile_range_1d() {
-        assert_eq!(tile_range_1d(&(0..256), 256), 0..1);
-        assert_eq!(tile_range_1d(&(0..257), 256), 0..2);
-        assert_eq!(tile_range_1d(&(-1..257), 256), -1..2);
-        assert_eq!(tile_range_1d(&(-256..256), 256), -1..1);
-        assert_eq!(tile_range_1d(&(-20..-10), 6), -4..-1);
-        assert_eq!(tile_range_1d(&(20..100), 256), 0..1);
-    }
-
-    #[test]
-    fn test_first_last_tile_size_1d() {
-        assert_eq!(first_tile_size_1d(&(0..10), 64), 10);
-        assert_eq!(first_tile_size_1d(&(-20..0), 64), 20);
-
-        assert_eq!(last_tile_size_1d(&(0..10), 64), 10);
-        assert_eq!(last_tile_size_1d(&(-20..0), 64), 20);
-    }
-
-    #[test]
-    fn doubly_partial_tiles() {
-        // In the following tests the image is a single tile and none of the sides of the tile
-        // align with the tile grid.
-        // This can only happen when we have a single non-aligned partial tile and no regular
-        // tiles.
-        assert_eq!(first_tile_size_1d(&(300..310), 64), 10);
-        assert_eq!(first_tile_size_1d(&(-20..-10), 64), 10);
-
-        assert_eq!(last_tile_size_1d(&(300..310), 64), 10);
-        assert_eq!(last_tile_size_1d(&(-20..-10), 64), 10);
-
-
-        // One partial tile at positve offset, non-zero origin.
-        let result = tiles_1d(64.0, -10000.0..10000.0, 0.0, 300..310, 64);
-        assert_eq!(result.tile_range.start, 4);
-        assert_eq!(result.tile_range.end, 5);
-        assert_eq!(result.first_tile_layout_size, 10.0);
-        assert_eq!(result.last_tile_layout_size, 10.0);
-    }
-
-    #[test]
-    fn smaller_than_tile_size_at_origin() {
-        let r = compute_tile_rect(
-            &rect(0, 0, 80, 80),
-            256,
-            point2(0, 0),
-        );
-
-        assert_eq!(r, rect(0, 0, 80, 80));
-    }
-
-    #[test]
-    fn smaller_than_tile_size_with_offset() {
-        let r = compute_tile_rect(
-            &rect(20, 20, 80, 80),
-            256,
-            point2(0, 0),
-        );
-
-        assert_eq!(r, rect(20, 20, 80, 80));
-    }
-}
diff --git a/third_party/webrender/webrender/src/intern.rs b/third_party/webrender/webrender/src/intern.rs
index 9ba7aa75100..db7fd0c1c9d 100644
--- a/third_party/webrender/webrender/src/intern.rs
+++ b/third_party/webrender/webrender/src/intern.rs
@@ -35,17 +35,18 @@
 
 use crate::internal_types::FastHashMap;
 use malloc_size_of::MallocSizeOf;
+use crate::profiler::ResourceProfileCounter;
 use std::fmt::Debug;
 use std::hash::Hash;
 use std::marker::PhantomData;
-use std::{ops, u64};
+use std::{mem, ops, u64};
+use std::sync::atomic::{AtomicUsize, Ordering};
 use crate::util::VecHelper;
-use crate::profiler::TransactionProfile;
 
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
-#[derive(Debug, Copy, Clone, Hash, MallocSizeOf, PartialEq, Eq)]
-struct Epoch(u32);
+#[derive(Debug, Copy, Clone, MallocSizeOf, PartialEq)]
+struct Epoch(u64);
 
 /// A list of updates to be applied to the data store,
 /// provided by the interning structure.
@@ -93,27 +94,37 @@ impl<S> UpdateList<S> {
     }
 }
 
+lazy_static! {
+    static ref NEXT_UID: AtomicUsize = AtomicUsize::new(0);
+}
+
 /// A globally, unique identifier
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 #[derive(Debug, Copy, Clone, Eq, Hash, MallocSizeOf, PartialEq)]
 pub struct ItemUid {
-    uid: u64,
+    uid: usize,
 }
 
 impl ItemUid {
+    pub fn next_uid() -> ItemUid {
+        let uid = NEXT_UID.fetch_add(1, Ordering::Relaxed);
+        ItemUid { uid }
+    }
+
     // Intended for debug usage only
-    pub fn get_uid(&self) -> u64 {
+    pub fn get_uid(&self) -> usize {
         self.uid
     }
 }
 
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
-#[derive(Debug, Hash, MallocSizeOf, PartialEq, Eq)]
+#[derive(Debug, MallocSizeOf)]
 pub struct Handle<I> {
     index: u32,
     epoch: Epoch,
+    uid: ItemUid,
     _marker: PhantomData<I>,
 }
 
@@ -122,6 +133,7 @@ impl<I> Clone for Handle<I> {
         Handle {
             index: self.index,
             epoch: self.epoch,
+            uid: self.uid,
             _marker: self._marker,
         }
     }
@@ -131,11 +143,7 @@ impl<I> Copy for Handle<I> {}
 
 impl<I> Handle<I> {
     pub fn uid(&self) -> ItemUid {
-        ItemUid {
-            // The index in the freelist + the epoch it was interned generates a stable
-            // unique id for an interned element.
-            uid: ((self.index as u64) << 32) | self.epoch.0 as u64
-        }
+        self.uid
     }
 }
 
@@ -166,7 +174,7 @@ impl<I: Internable> DataStore<I> {
     pub fn apply_updates(
         &mut self,
         update_list: UpdateList<I::Key>,
-        profile: &mut TransactionProfile,
+        profile_counter: &mut ResourceProfileCounter,
     ) {
         for insertion in update_list.insertions {
             self.items
@@ -178,7 +186,8 @@ impl<I: Internable> DataStore<I> {
             self.items[removal.index] = None;
         }
 
-        profile.set(I::PROFILE_COUNTER, self.items.len());
+        let per_item_size = mem::size_of::<I::Key>() + mem::size_of::<I::StoreData>();
+        profile_counter.set(self.items.len(), per_item_size * self.items.len());
     }
 }
 
@@ -198,31 +207,6 @@ impl<I: Internable> ops::IndexMut<Handle<I>> for DataStore<I> {
     }
 }
 
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-#[derive(MallocSizeOf)]
-struct ItemDetails<I> {
-    /// Frame that this element was first interned
-    interned_epoch: Epoch,
-    /// Last frame this element was referenced (used to GC intern items)
-    last_used_epoch: Epoch,
-    /// Index into the freelist this item is located
-    index: usize,
-    /// Type marker for create_handle method
-    _marker: PhantomData<I>,
-}
-
-impl<I> ItemDetails<I> {
-    /// Construct a stable handle value from the item details
-    fn create_handle(&self) -> Handle<I> {
-        Handle {
-            index: self.index as u32,
-            epoch: self.interned_epoch,
-            _marker: PhantomData,
-        }
-    }
-}
-
 /// The main interning data structure. This lives in the
 /// scene builder thread, and handles hashing and interning
 /// unique data structures. It also manages a free-list for
@@ -233,7 +217,7 @@ impl<I> ItemDetails<I> {
 #[derive(MallocSizeOf)]
 pub struct Interner<I: Internable> {
     /// Uniquely map an interning key to a handle
-    map: FastHashMap<I::Key, ItemDetails<I>>,
+    map: FastHashMap<I::Key, Handle<I>>,
     /// List of free slots in the data store for re-use.
     free_list: Vec<usize>,
     /// Pending list of updates that need to be applied.
@@ -273,11 +257,9 @@ impl<I: Internable> Interner<I> {
         // Use get_mut rather than entry here to avoid
         // cloning the (sometimes large) key in the common
         // case, where the data already exists in the interner.
-        if let Some(details) = self.map.get_mut(data) {
-            // Update the last referenced frame for this element
-            details.last_used_epoch = self.current_epoch;
-            // Return a stable handle value for dependency checking
-            return details.create_handle();
+        if let Some(handle) = self.map.get_mut(data) {
+            handle.epoch = self.current_epoch;
+            return *handle;
         }
 
         // We need to intern a new data item. First, find out
@@ -288,14 +270,7 @@ impl<I: Internable> Interner<I> {
             None => self.local_data.len(),
         };
 
-        // Generate a handle for access via the data store.
-        let handle = Handle {
-            index: index as u32,
-            epoch: self.current_epoch,
-            _marker: PhantomData,
-        };
-
-        let uid = handle.uid();
+        let uid = ItemUid::next_uid();
 
         // Add a pending update to insert the new data.
         self.update_list.insertions.push(Insertion {
@@ -304,17 +279,20 @@ impl<I: Internable> Interner<I> {
             value: data.clone(),
         });
 
+        // Generate a handle for access via the data store.
+        let handle = Handle {
+            index: index as u32,
+            epoch: self.current_epoch,
+            uid,
+            _marker: PhantomData,
+        };
+
         #[cfg(debug_assertions)]
-        data.on_interned(uid);
+        data.on_interned(handle.uid);
 
         // Store this handle so the next time it is
         // interned, it gets re-used.
-        self.map.insert(data.clone(), ItemDetails {
-            interned_epoch: self.current_epoch,
-            last_used_epoch: self.current_epoch,
-            index,
-            _marker: PhantomData,
-        });
+        self.map.insert(data.clone(), handle);
 
         // Create the local data for this item that is
         // being interned.
@@ -339,16 +317,16 @@ impl<I: Internable> Interner<I> {
         // map each frame). It also might make sense in the
         // future to adjust how long items remain in the cache
         // based on the current size of the list.
-        self.map.retain(|_, details| {
-            if details.last_used_epoch.0 + 10 < current_epoch {
+        self.map.retain(|_, handle| {
+            if handle.epoch.0 + 10 < current_epoch {
                 // To expire an item:
                 //  - Add index to the free-list for re-use.
                 //  - Add an update to the data store to invalidate this slot.
                 //  - Remove from the hash map.
-                free_list.push(details.index);
+                free_list.push(handle.index as usize);
                 update_list.removals.push(Removal {
-                    index: details.index,
-                    uid: details.create_handle().uid(),
+                    index: handle.index as usize,
+                    uid: handle.uid,
                 });
                 return false;
             }
@@ -371,70 +349,6 @@ impl<I: Internable> ops::Index<Handle<I>> for Interner<I> {
     }
 }
 
-/// Meta-macro to enumerate the various interner identifiers and types.
-///
-/// IMPORTANT: Keep this synchronized with the list in mozilla-central located at
-/// gfx/webrender_bindings/webrender_ffi.h
-///
-/// Note that this could be a lot less verbose if concat_idents! were stable. :-(
-#[macro_export]
-macro_rules! enumerate_interners {
-    ($macro_name: ident) => {
-        $macro_name! {
-            clip: ClipIntern,
-            prim: PrimitiveKeyKind,
-            normal_border: NormalBorderPrim,
-            image_border: ImageBorder,
-            image: Image,
-            yuv_image: YuvImage,
-            line_decoration: LineDecoration,
-            linear_grad: LinearGradient,
-            radial_grad: RadialGradient,
-            conic_grad: ConicGradient,
-            picture: Picture,
-            text_run: TextRun,
-            filter_data: FilterDataIntern,
-            backdrop: Backdrop,
-            polygon: PolygonIntern,
-        }
-    }
-}
-
-macro_rules! declare_interning_memory_report {
-    ( $( $name:ident: $ty:ident, )+ ) => {
-        ///
-        #[repr(C)]
-        #[derive(AddAssign, Clone, Debug, Default)]
-        pub struct InternerSubReport {
-            $(
-                ///
-                pub $name: usize,
-            )+
-        }
-    }
-}
-
-enumerate_interners!(declare_interning_memory_report);
-
-/// Memory report for interning-related data structures.
-/// cbindgen:derive-eq=false
-/// cbindgen:derive-ostream=false
-#[repr(C)]
-#[derive(Clone, Debug, Default)]
-pub struct InterningMemoryReport {
-    ///
-    pub interners: InternerSubReport,
-    ///
-    pub data_stores: InternerSubReport,
-}
-
-impl ::std::ops::AddAssign for InterningMemoryReport {
-    fn add_assign(&mut self, other: InterningMemoryReport) {
-        self.interners += other.interners;
-        self.data_stores += other.data_stores;
-    }
-}
-
 // The trick to make trait bounds configurable by features.
 mod dummy {
     #[cfg(not(feature = "capture"))]
@@ -460,7 +374,4 @@ pub trait Internable: MallocSizeOf {
     type Key: Eq + Hash + Clone + Debug + MallocSizeOf + InternDebug + InternSerialize + for<'a> InternDeserialize<'a>;
     type StoreData: From<Self::Key> + MallocSizeOf + InternSerialize + for<'a> InternDeserialize<'a>;
     type InternData: MallocSizeOf + InternSerialize + for<'a> InternDeserialize<'a>;
-
-    // Profile counter indices, see the list in profiler.rs
-    const PROFILE_COUNTER: usize;
 }
diff --git a/third_party/webrender/webrender/src/internal_types.rs b/third_party/webrender/webrender/src/internal_types.rs
index 9819850db0b..321afd22216 100644
--- a/third_party/webrender/webrender/src/internal_types.rs
+++ b/third_party/webrender/webrender/src/internal_types.rs
@@ -2,19 +2,18 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{ColorF, DocumentId, ExternalImageId, PrimitiveFlags};
-use api::{ImageFormat, NotificationRequest, Shadow, FilterOp, ImageBufferKind};
+use api::{ColorF, DebugCommand, DocumentId, ExternalImageData, ExternalImageId, PrimitiveFlags};
+use api::{ImageFormat, ItemTag, NotificationRequest, Shadow, FilterOp};
 use api::units::*;
 use api;
-use crate::render_api::DebugCommand;
 use crate::composite::NativeSurfaceOperation;
 use crate::device::TextureFilter;
-use crate::renderer::{FullFrameStats, PipelineInfo};
+use crate::renderer::PipelineInfo;
 use crate::gpu_cache::GpuCacheUpdateList;
 use crate::frame_builder::Frame;
-use crate::profiler::TransactionProfile;
 use fxhash::FxHasher;
 use plane_split::BspSplitter;
+use crate::profiler::BackendProfileCounters;
 use smallvec::SmallVec;
 use std::{usize, i32};
 use std::collections::{HashMap, HashSet};
@@ -71,7 +70,7 @@ const OPACITY_EPSILON: f32 = 0.001;
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub enum Filter {
     Identity,
-    Blur(f32, f32),
+    Blur(f32),
     Brightness(f32),
     Contrast(f32),
     Grayscale(f32),
@@ -117,13 +116,13 @@ impl Filter {
     pub fn is_noop(&self) -> bool {
         match *self {
             Filter::Identity => false, // this is intentional
-            Filter::Blur(width, height) => width == 0.0 && height == 0.0,
+            Filter::Blur(length) => length == 0.0,
             Filter::Brightness(amount) => amount == 1.0,
             Filter::Contrast(amount) => amount == 1.0,
             Filter::Grayscale(amount) => amount == 0.0,
             Filter::HueRotate(amount) => amount == 0.0,
             Filter::Invert(amount) => amount == 0.0,
-            Filter::Opacity(api::PropertyBinding::Value(amount), _) => amount >= 1.0,
+            Filter::Opacity(_, amount) => amount >= 1.0,
             Filter::Saturate(amount) => amount == 1.0,
             Filter::Sepia(amount) => amount == 0.0,
             Filter::DropShadows(ref shadows) => {
@@ -144,7 +143,6 @@ impl Filter {
                     0.0, 0.0, 0.0, 0.0
                 ]
             }
-            Filter::Opacity(api::PropertyBinding::Binding(..), _) |
             Filter::SrgbToLinear |
             Filter::LinearToSrgb |
             Filter::ComponentTransfer |
@@ -180,7 +178,7 @@ impl From<FilterOp> for Filter {
     fn from(op: FilterOp) -> Self {
         match op {
             FilterOp::Identity => Filter::Identity,
-            FilterOp::Blur(w, h) => Filter::Blur(w, h),
+            FilterOp::Blur(r) => Filter::Blur(r),
             FilterOp::Brightness(b) => Filter::Brightness(b),
             FilterOp::Contrast(c) => Filter::Contrast(c),
             FilterOp::Grayscale(g) => Filter::Grayscale(g),
@@ -233,16 +231,35 @@ pub struct SwizzleSettings {
 #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct CacheTextureId(pub u32);
+pub struct CacheTextureId(pub u64);
 
-impl CacheTextureId {
-    pub const INVALID: CacheTextureId = CacheTextureId(!0);
-}
+/// Canonical type for texture layer indices.
+///
+/// WebRender is currently not very consistent about layer index types. Some
+/// places use i32 (since that's the type used in various OpenGL APIs), some
+/// places use u32 (since having it be signed is non-sensical, but the
+/// underlying graphics APIs generally operate on 32-bit integers) and some
+/// places use usize (since that's most natural in Rust).
+///
+/// Going forward, we aim to us usize throughout the codebase, since that allows
+/// operations like indexing without a cast, and convert to the required type in
+/// the device module when making calls into the platform layer.
+pub type LayerIndex = usize;
 
-#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
+/// Identifies a render pass target that is persisted until the end of the frame.
+///
+/// By default, only the targets of the immediately-preceding pass are bound as
+/// inputs to the next pass. However, tasks can opt into having their target
+/// preserved in a list until the end of the frame, and this type specifies the
+/// index in that list.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct DeferredResolveIndex(pub u32);
+pub struct SavedTargetIndex(pub usize);
+
+impl SavedTargetIndex {
+    pub const PENDING: Self = SavedTargetIndex(!0);
+}
 
 /// Identifies the source of an input texture to a shader.
 #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
@@ -254,37 +271,20 @@ pub enum TextureSource {
     /// An entry in the texture cache.
     TextureCache(CacheTextureId, Swizzle),
     /// An external image texture, mananged by the embedding.
-    External(DeferredResolveIndex, ImageBufferKind),
+    External(ExternalImageData),
+    /// The alpha target of the immediately-preceding pass.
+    PrevPassAlpha,
+    /// The color target of the immediately-preceding pass.
+    PrevPassColor,
+    /// A render target from an earlier pass. Unlike the immediately-preceding
+    /// passes, these are not made available automatically, but are instead
+    /// opt-in by the `RenderTask` (see `mark_for_saving()`).
+    RenderTaskCache(SavedTargetIndex, Swizzle),
     /// Select a dummy 1x1 white texture. This can be used by image
     /// shaders that want to draw a solid color.
     Dummy,
 }
 
-impl TextureSource {
-    pub fn image_buffer_kind(&self) -> ImageBufferKind {
-        match *self {
-            TextureSource::TextureCache(..) => ImageBufferKind::Texture2D,
-
-            TextureSource::External(_, image_buffer_kind) => image_buffer_kind,
-
-            // Render tasks use texture arrays for now.
-            TextureSource::Dummy => ImageBufferKind::Texture2D,
-
-            TextureSource::Invalid => ImageBufferKind::Texture2D,
-        }
-    }
-
-    #[inline]
-    pub fn is_compatible(
-        &self,
-        other: &TextureSource,
-    ) -> bool {
-        *self == TextureSource::Invalid ||
-        *other == TextureSource::Invalid ||
-        self == other
-    }
-}
-
 #[derive(Copy, Clone, Debug, PartialEq)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
@@ -314,13 +314,13 @@ pub struct TextureCacheAllocation {
 }
 
 /// Information used when allocating / reallocating.
-#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+#[derive(Debug)]
 pub struct TextureCacheAllocInfo {
     pub width: i32,
     pub height: i32,
+    pub layer_count: i32,
     pub format: ImageFormat,
     pub filter: TextureFilter,
-    pub target: ImageBufferKind,
     /// Indicates whether this corresponds to one of the shared texture caches.
     pub is_shared_cache: bool,
     /// If true, this texture requires a depth target.
@@ -332,6 +332,10 @@ pub struct TextureCacheAllocInfo {
 pub enum TextureCacheAllocationKind {
     /// Performs an initial texture allocation.
     Alloc(TextureCacheAllocInfo),
+    /// Reallocates the texture. The existing live texture with the same id
+    /// will be deallocated and its contents blitted over. The new size must
+    /// be greater than the old size.
+    Realloc(TextureCacheAllocInfo),
     /// Reallocates the texture without preserving its contents.
     Reset(TextureCacheAllocInfo),
     /// Frees the texture and the corresponding cache ID.
@@ -344,6 +348,7 @@ pub struct TextureCacheUpdate {
     pub rect: DeviceIntRect,
     pub stride: Option<i32>,
     pub offset: i32,
+    pub layer_index: i32,
     pub format_override: Option<ImageFormat>,
     pub source: TextureUpdateSource,
 }
@@ -403,6 +408,7 @@ impl TextureUpdateList {
         origin: DeviceIntPoint,
         width: i32,
         height: i32,
+        layer_index: usize
     ) {
         let size = DeviceIntSize::new(width, height);
         let rect = DeviceIntRect::new(origin, size);
@@ -410,6 +416,7 @@ impl TextureUpdateList {
             rect,
             stride: None,
             offset: 0,
+            layer_index: layer_index as i32,
             format_override: None,
             source: TextureUpdateSource::DebugClear,
         });
@@ -427,11 +434,30 @@ impl TextureUpdateList {
 
     /// Pushes a reallocation operation onto the list, potentially coalescing
     /// with previous operations.
-    pub fn push_reset(&mut self, id: CacheTextureId, info: TextureCacheAllocInfo) {
+    pub fn push_realloc(&mut self, id: CacheTextureId, info: TextureCacheAllocInfo) {
         self.debug_assert_coalesced(id);
 
-        // Drop any unapplied updates to the to-be-freed texture.
-        self.updates.remove(&id);
+        // Coallesce this realloc into a previous alloc or realloc, if available.
+        if let Some(cur) = self.allocations.iter_mut().find(|x| x.id == id) {
+            match cur.kind {
+                TextureCacheAllocationKind::Alloc(ref mut i) => *i = info,
+                TextureCacheAllocationKind::Realloc(ref mut i) => *i = info,
+                TextureCacheAllocationKind::Reset(ref mut i) => *i = info,
+                TextureCacheAllocationKind::Free => panic!("Reallocating freed texture"),
+            }
+            return
+        }
+
+        self.allocations.push(TextureCacheAllocation {
+            id,
+            kind: TextureCacheAllocationKind::Realloc(info),
+        });
+    }
+
+    /// Pushes a reallocation operation onto the list, potentially coalescing
+    /// with previous operations.
+    pub fn push_reset(&mut self, id: CacheTextureId, info: TextureCacheAllocInfo) {
+        self.debug_assert_coalesced(id);
 
         // Coallesce this realloc into a previous alloc or realloc, if available.
         if let Some(cur) = self.allocations.iter_mut().find(|x| x.id == id) {
@@ -439,6 +465,10 @@ impl TextureUpdateList {
                 TextureCacheAllocationKind::Alloc(ref mut i) => *i = info,
                 TextureCacheAllocationKind::Reset(ref mut i) => *i = info,
                 TextureCacheAllocationKind::Free => panic!("Resetting freed texture"),
+                TextureCacheAllocationKind::Realloc(_) => {
+                    // Reset takes precedence over realloc
+                    cur.kind = TextureCacheAllocationKind::Reset(info);
+                }
             }
             return
         }
@@ -464,6 +494,7 @@ impl TextureUpdateList {
         match removed_kind {
             Some(TextureCacheAllocationKind::Alloc(..)) => { /* no-op! */ },
             Some(TextureCacheAllocationKind::Free) => panic!("Double free"),
+            Some(TextureCacheAllocationKind::Realloc(..)) |
             Some(TextureCacheAllocationKind::Reset(..)) |
             None => {
                 self.allocations.push(TextureCacheAllocation {
@@ -503,11 +534,11 @@ impl ResourceUpdateList {
 pub struct RenderedDocument {
     pub frame: Frame,
     pub is_new_scene: bool,
-    pub profile: TransactionProfile,
-    pub frame_stats: Option<FullFrameStats>
 }
 
 pub enum DebugOutput {
+    FetchDocuments(String),
+    FetchClipScrollTree(String),
     #[cfg(feature = "capture")]
     SaveCapture(CaptureConfig, Vec<ExternalCaptureImage>),
     #[cfg(feature = "replay")]
@@ -529,6 +560,7 @@ pub enum ResultMsg {
         DocumentId,
         RenderedDocument,
         ResourceUpdateList,
+        BackendProfileCounters,
     ),
     AppendNotificationRequests(Vec<NotificationRequest>),
     ForceRedraw,
@@ -536,14 +568,13 @@ pub enum ResultMsg {
 
 #[derive(Clone, Debug)]
 pub struct ResourceCacheError {
-    #[allow(dead_code)]
-    description: String,
+    _description: String,
 }
 
 impl ResourceCacheError {
     pub fn new(description: String) -> ResourceCacheError {
         ResourceCacheError {
-            description,
+            _description: description,
         }
     }
 }
@@ -556,6 +587,7 @@ pub struct LayoutPrimitiveInfo {
     pub rect: LayoutRect,
     pub clip_rect: LayoutRect,
     pub flags: PrimitiveFlags,
+    pub hit_info: Option<ItemTag>,
 }
 
 impl LayoutPrimitiveInfo {
@@ -564,6 +596,7 @@ impl LayoutPrimitiveInfo {
             rect,
             clip_rect,
             flags: PrimitiveFlags::default(),
+            hit_info: None,
         }
     }
 }
diff --git a/third_party/webrender/webrender/src/lib.rs b/third_party/webrender/webrender/src/lib.rs
index cc200e30eb1..0db5feae9ee 100644
--- a/third_party/webrender/webrender/src/lib.rs
+++ b/third_party/webrender/webrender/src/lib.rs
@@ -57,6 +57,8 @@ macro_rules! matches {
 #[macro_use]
 extern crate bitflags;
 #[macro_use]
+extern crate cfg_if;
+#[macro_use]
 extern crate cstr;
 #[macro_use]
 extern crate lazy_static;
@@ -69,8 +71,7 @@ extern crate malloc_size_of_derive;
 extern crate serde;
 #[macro_use]
 extern crate tracy_rs;
-#[macro_use]
-extern crate derive_more;
+
 use malloc_size_of;
 
 #[macro_use]
@@ -82,13 +83,13 @@ mod box_shadow;
 #[cfg(any(feature = "capture", feature = "replay"))]
 mod capture;
 mod clip;
-mod space;
 mod spatial_tree;
 mod composite;
-mod compositor;
 mod debug_colors;
 mod debug_font_data;
-mod debug_item;
+mod debug_render;
+#[cfg(feature = "debugger")]
+mod debug_server;
 mod device;
 mod ellipse;
 mod filterdata;
@@ -101,10 +102,10 @@ mod glyph_rasterizer;
 mod gpu_cache;
 mod gpu_types;
 mod hit_test;
+mod intern;
 mod internal_types;
 mod lru_cache;
 mod picture;
-mod prepare;
 mod prim_store;
 mod print_tree;
 mod render_backend;
@@ -119,22 +120,12 @@ mod scene_builder_thread;
 mod scene_building;
 mod screen_capture;
 mod segment;
+mod shade;
 mod spatial_node;
-mod texture_pack;
+mod storage;
+mod texture_allocator;
 mod texture_cache;
-mod tile_cache;
 mod util;
-mod visibility;
-mod api_resources;
-mod image_tiling;
-mod image_source;
-mod rectangle_occlusion;
-pub mod host_utils;
-
-///
-pub mod intern;
-///
-pub mod render_api;
 
 mod shader_source {
     include!(concat!(env!("OUT_DIR"), "/shaders.rs"));
@@ -162,19 +153,39 @@ mod platform {
     }
 }
 
+#[cfg(target_os = "macos")]
+use core_foundation;
+#[cfg(target_os = "macos")]
+use core_graphics;
+#[cfg(target_os = "macos")]
+use core_text;
+
+#[cfg(target_os = "windows")]
+use dwrote;
+
 pub use euclid;
+#[cfg(feature = "debugger")]
+use serde_json;
 #[macro_use]
 extern crate smallvec;
+#[cfg(feature = "debugger")]
+use ws;
+#[cfg(feature = "debugger")]
+use image_loader;
+#[cfg(feature = "debugger")]
+use base64;
+#[cfg(all(feature = "capture", feature = "png"))]
+use png;
 #[cfg(test)]
 use rand;
 
-pub use api;
+#[macro_use]
+pub extern crate api;
 use webrender_build;
 
 #[doc(hidden)]
-pub use crate::composite::{CompositorConfig, Compositor, CompositorCapabilities, CompositorSurfaceTransform};
-pub use crate::composite::{NativeSurfaceId, NativeTileId, NativeSurfaceInfo, PartialPresentCompositor};
-pub use crate::composite::{MappableCompositor, MappedTileInfo, SWGLCompositeSurfaceInfo};
+pub use crate::composite::{CompositorConfig, Compositor, CompositorCapabilities};
+pub use crate::composite::{NativeSurfaceId, NativeTileId, NativeSurfaceInfo};
 pub use crate::device::{UploadMethod, VertexUsageHint, get_gl_target, get_unoptimized_shader_source};
 pub use crate::device::{ProgramBinary, ProgramCache, ProgramCacheObserver, FormatDesc};
 pub use crate::device::Device;
@@ -184,21 +195,16 @@ pub use crate::profiler::{ProfilerHooks, set_profiler_hooks};
 pub use crate::renderer::{
     AsyncPropertySampler, CpuProfile, DebugFlags, GpuProfile, GraphicsApi,
     GraphicsApiInfo, PipelineInfo, Renderer, RendererError, RendererOptions, RenderResults,
-    RendererStats, SceneBuilderHooks, Shaders, SharedShaders, ShaderPrecacheFlags,
-    MAX_VERTEX_TEXTURE_WIDTH, ONE_TIME_USAGE_HINT,
+    RendererStats, SceneBuilderHooks, ThreadListener, ShaderPrecacheFlags,
+    MAX_VERTEX_TEXTURE_WIDTH,
 };
 pub use crate::hit_test::SharedHitTester;
 pub use crate::internal_types::FastHashMap;
 pub use crate::screen_capture::{AsyncScreenshotHandle, RecordedFrameHandle};
-pub use crate::texture_cache::TextureCacheConfig;
+pub use crate::shade::{Shaders, WrShaders};
 pub use api as webrender_api;
 pub use webrender_build::shader::ProgramSourceDigest;
 pub use crate::picture::{TileDescriptor, TileId, InvalidationReason};
 pub use crate::picture::{PrimitiveCompareResult, PrimitiveCompareResultDetail, CompareHelperResult};
 pub use crate::picture::{TileNode, TileNodeKind, TileSerializer, TileCacheInstanceSerializer, TileOffset, TileCacheLoggerUpdateLists};
 pub use crate::intern::ItemUid;
-pub use crate::render_api::*;
-pub use crate::tile_cache::{PictureCacheDebugInfo, DirtyTileDebugInfo, TileDebugInfo, SliceDebugInfo};
-
-#[cfg(feature = "sw_compositor")]
-pub use crate::compositor::sw_compositor;
diff --git a/third_party/webrender/webrender/src/lru_cache.rs b/third_party/webrender/webrender/src/lru_cache.rs
index d53119b77d1..f741f9fb6ff 100644
--- a/third_party/webrender/webrender/src/lru_cache.rs
+++ b/third_party/webrender/webrender/src/lru_cache.rs
@@ -12,44 +12,29 @@ use std::{mem, num};
   texture cache requires, but should be usable as a general LRU cache
   type if useful in other areas.
 
-  The cache is implemented with two types of backing freelists. These allow
+  The cache is implemented with two backing freelists. These allow
   random access to the underlying data, while being efficient in both
   memory access and allocation patterns.
 
-  The "entries" freelist stores the elements being cached (for example, the
+  The first freelist stores the elements being cached (for example, the
   CacheEntry structure for the texture cache). These elements are stored
   in arbitrary order, reusing empty slots in the freelist where possible.
 
-  The "lru_index" freelists store the LRU tracking information. Although the
+  The second freelist stores the LRU tracking information. Although the
   tracking elements are stored in arbitrary order inside a freelist for
   efficiency, they use next/prev links to represent a doubly-linked list,
   kept sorted in order of recent use. The next link is also used to store
   the current freelist within the array when the element is not occupied.
-
-  The LRU cache allows having multiple LRU "partitions". Every entry is tracked
-  by exactly one partition at any time; all partitions refer to entries in the
-  shared freelist. Entries can move between partitions, if replace_or_insert is
-  called with a new partition index for an existing handle.
-  The partitioning is used by the texture cache so that, for example, allocating
-  more glyph entries does not cause eviction of image entries (which go into
-  a different shared texture). If an existing handle's entry is reallocated with
-  a new size, it might need to move from a shared texture to a standalone
-  texture; in this case the handle will move to a different LRU partition.
  */
 
 /// Stores the data supplied by the user to be cached, and an index
 /// into the LRU tracking freelist for this element.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
-#[derive(MallocSizeOf)]
 struct LRUCacheEntry<T> {
-    /// The LRU partition that tracks this entry.
-    partition_index: u8,
-
-    /// The location of the LRU tracking element for this cache entry in the
-    /// right LRU partition.
-    lru_index: ItemIndex,
-
+    /// The location of the LRU tracking element for this cache entry.
+    /// This is None if the entry has manual eviction policy enabled.
+    lru_index: Option<ItemIndex>,
     /// The cached data provided by the caller for this element.
     value: T,
 }
@@ -57,21 +42,19 @@ struct LRUCacheEntry<T> {
 /// The main public interface to the LRU cache
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
-#[derive(MallocSizeOf)]
 pub struct LRUCache<T, M> {
     /// A free list of cache entries, and indices into the LRU tracking list
     entries: FreeList<LRUCacheEntry<T>, M>,
     /// The LRU tracking list, allowing O(1) access to the oldest element
-    lru: Vec<LRUTracker<FreeListHandle<M>>>,
+    lru: LRUTracker<FreeListHandle<M>>,
 }
 
 impl<T, M> LRUCache<T, M> {
     /// Construct a new LRU cache
-    pub fn new(lru_partition_count: usize) -> Self {
-        assert!(lru_partition_count <= u8::MAX as usize + 1);
+    pub fn new() -> Self {
         LRUCache {
             entries: FreeList::new(),
-            lru: (0..lru_partition_count).map(|_| LRUTracker::new()).collect(),
+            lru: LRUTracker::new(),
         }
     }
 
@@ -80,7 +63,6 @@ impl<T, M> LRUCache<T, M> {
     /// may be evicted at any time.
     pub fn push_new(
         &mut self,
-        partition_index: u8,
         value: T,
     ) -> WeakFreeListHandle<M> {
         // It's a slightly awkward process to insert an element, since we don't know
@@ -89,9 +71,8 @@ impl<T, M> LRUCache<T, M> {
 
         // Insert the data provided by the caller
         let handle = self.entries.insert(LRUCacheEntry {
-            partition_index: 0,
-            lru_index: ItemIndex(num::NonZeroU32::new(1).unwrap()),
-            value
+            lru_index: None,
+            value,
         });
 
         // Get a weak handle to return to the caller
@@ -100,13 +81,22 @@ impl<T, M> LRUCache<T, M> {
         // Add an LRU tracking node that owns the strong handle, and store the location
         // of this inside the cache entry.
         let entry = self.entries.get_mut(&handle);
-        let lru_index = self.lru[partition_index as usize].push_new(handle);
-        entry.partition_index = partition_index;
-        entry.lru_index = lru_index;
+        entry.lru_index = Some(self.lru.push_new(handle));
 
         weak_handle
     }
 
+    /// Get immutable access to the data at a given slot. Since this takes a strong
+    /// handle, it's guaranteed to be valid.
+    pub fn get(
+        &self,
+        handle: &FreeListHandle<M>,
+    ) -> &T {
+        &self.entries
+            .get(handle)
+            .value
+    }
+
     /// Get immutable access to the data at a given slot. Since this takes a weak
     /// handle, it may have been evicted, so returns an Option.
     pub fn get_opt(
@@ -133,27 +123,18 @@ impl<T, M> LRUCache<T, M> {
             })
     }
 
-    /// Return a reference to the oldest item in the cache, keeping it in the cache.
-    /// If the cache is empty, this will return None.
-    pub fn peek_oldest(&self, partition_index: u8) -> Option<&T> {
-        self.lru[partition_index as usize]
-            .peek_front()
-            .map(|handle| {
-                let entry = self.entries.get(handle);
-                &entry.value
-            })
-    }
-
     /// Remove the oldest item from the cache. This is used to select elements to
-    /// be evicted. If the cache is empty, this will return None.
+    /// be evicted. If the cache is empty, or all elements in the cache have manual
+    /// eviction enabled, this will return None
     pub fn pop_oldest(
         &mut self,
-        partition_index: u8,
     ) -> Option<T> {
-        self.lru[partition_index as usize]
+        self.lru
             .pop_front()
             .map(|handle| {
                 let entry = self.entries.free(handle);
+                // We should only find elements in this list with valid LRU location
+                debug_assert!(entry.lru_index.is_some());
                 entry.value
             })
     }
@@ -167,68 +148,82 @@ impl<T, M> LRUCache<T, M> {
     pub fn replace_or_insert(
         &mut self,
         handle: &mut WeakFreeListHandle<M>,
-        partition_index: u8,
         data: T,
     ) -> Option<T> {
         match self.entries.get_opt_mut(handle) {
             Some(entry) => {
-                if entry.partition_index != partition_index {
-                    // Move to a different partition.
-                    let strong_handle = self.lru[entry.partition_index as usize].remove(entry.lru_index);
-                    let lru_index = self.lru[partition_index as usize].push_new(strong_handle);
-                    entry.partition_index = partition_index;
-                    entry.lru_index = lru_index;
-                }
                 Some(mem::replace(&mut entry.value, data))
             }
             None => {
-                *handle = self.push_new(partition_index, data);
+                *handle = self.push_new(data);
                 None
             }
         }
     }
 
-    /// Manually evict a specific item.
-    pub fn remove(&mut self, handle: &WeakFreeListHandle<M>) -> Option<T> {
-        if let Some(entry) = self.entries.get_opt_mut(handle) {
-            let strong_handle = self.lru[entry.partition_index as usize].remove(entry.lru_index);
-            return Some(self.entries.free(strong_handle).value);
-        }
-
-        None
-    }
-
     /// This is used by the calling code to signal that the element that this handle
     /// references has been used on this frame. Internally, it updates the links in
     /// the LRU tracking element to move this item to the end of the LRU list. Returns
     /// the underlying data in case the client wants to mutate it.
     pub fn touch(
         &mut self,
-        handle: &WeakFreeListHandle<M>,
+        handle: &WeakFreeListHandle<M>
     ) -> Option<&mut T> {
         let lru = &mut self.lru;
 
         self.entries
             .get_opt_mut(handle)
             .map(|entry| {
-                lru[entry.partition_index as usize].mark_used(entry.lru_index);
+                // Only have a valid LRU index if eviction mode is auto
+                if let Some(lru_index) = entry.lru_index {
+                    lru.mark_used(lru_index);
+                }
+
                 &mut entry.value
             })
     }
 
+    /// In some special cases, the caller may want to manually manage the
+    /// lifetime of a resource. This method removes the LRU tracking information
+    /// for an element, and returns the strong handle to the caller to manage.
+    #[must_use]
+    pub fn set_manual_eviction(
+        &mut self,
+        handle: &WeakFreeListHandle<M>,
+    ) -> Option<FreeListHandle<M>> {
+        let entry = self.entries
+            .get_opt_mut(handle)
+            .expect("bug: trying to set manual eviction on an invalid handle");
+
+        // Remove the LRU tracking information from this element, if it exists.
+        // (it may be None if manual eviction was already enabled for this element).
+        entry.lru_index.take().map(|lru_index| {
+            self.lru.remove(lru_index)
+        })
+    }
+
+    /// Remove an element that is in manual eviction mode. This takes the caller
+    /// managed strong handle, and removes this element from the freelist.
+    pub fn remove_manual_handle(
+        &mut self,
+        handle: FreeListHandle<M>,
+    ) -> T {
+        let entry = self.entries.free(handle);
+        debug_assert_eq!(entry.lru_index, None, "Must be manual eviction mode!");
+        entry.value
+    }
+
     /// Try to validate that the state of the cache is consistent
     #[cfg(test)]
     fn validate(&self) {
-        for lru in &self.lru {
-            lru.validate();
-        }
+        self.lru.validate();
     }
 }
 
 /// Index of an LRU tracking element
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
-#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, MallocSizeOf)]
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
 struct ItemIndex(num::NonZeroU32);
 
 impl ItemIndex {
@@ -243,7 +238,7 @@ impl ItemIndex {
 /// to minimize heap allocations and improve cache access patterns.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
-#[derive(Debug, MallocSizeOf)]
+#[derive(Debug)]
 struct Item<H> {
     prev: Option<ItemIndex>,
     next: Option<ItemIndex>,
@@ -253,7 +248,6 @@ struct Item<H> {
 /// Internal implementation of the LRU tracking list
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
-#[derive(MallocSizeOf)]
 struct LRUTracker<H> {
     /// Current head of the list - this is the oldest item that will be evicted next.
     head: Option<ItemIndex>,
@@ -388,11 +382,6 @@ impl<H> LRUTracker<H> where H: std::fmt::Debug {
         item_index
     }
 
-    /// Returns a reference to the oldest element, or None if the list is empty.
-    fn peek_front(&self) -> Option<&H> {
-        self.head.map(|head| self.items[head.as_usize()].handle.as_ref().unwrap())
-    }
-
     /// Remove the oldest element from the front of the LRU list. Returns None
     /// if the list is empty.
     fn pop_front(
@@ -434,7 +423,8 @@ impl<H> LRUTracker<H> where H: std::fmt::Debug {
     }
 
     /// Manually remove an item from the LRU tracking list. This is used
-    /// when an element switches from one LRU partition to a different one.
+    /// when an element switches from having its lifetime managed by the LRU
+    /// algorithm to having a manual eviction policy.
     fn remove(
         &mut self,
         index: ItemIndex,
@@ -537,32 +527,6 @@ impl<H> LRUTracker<H> where H: std::fmt::Debug {
 }
 
 #[test]
-fn test_lru_tracker_push_peek() {
-    // Push elements, peek and ensure:
-    // - peek_oldest returns None before first element pushed
-    // - peek_oldest returns oldest element
-    // - subsequent calls to peek_oldest return same element (nothing was removed)
-    struct CacheMarker;
-    const NUM_ELEMENTS: usize = 50;
-
-    let mut cache: LRUCache<usize, CacheMarker> = LRUCache::new(1);
-    cache.validate();
-
-    assert_eq!(cache.peek_oldest(0), None);
-
-    for i in 0 .. NUM_ELEMENTS {
-        cache.push_new(0, i);
-    }
-    cache.validate();
-
-    assert_eq!(cache.peek_oldest(0), Some(&0));
-    assert_eq!(cache.peek_oldest(0), Some(&0));
-
-    cache.pop_oldest(0);
-    assert_eq!(cache.peek_oldest(0), Some(&1));
-}
-
-#[test]
 fn test_lru_tracker_push_pop() {
     // Push elements, pop them all off and ensure:
     // - Returned in oldest order
@@ -570,20 +534,20 @@ fn test_lru_tracker_push_pop() {
     struct CacheMarker;
     const NUM_ELEMENTS: usize = 50;
 
-    let mut cache: LRUCache<usize, CacheMarker> = LRUCache::new(1);
+    let mut cache: LRUCache<usize, CacheMarker> = LRUCache::new();
     cache.validate();
 
     for i in 0 .. NUM_ELEMENTS {
-        cache.push_new(0, i);
+        cache.push_new(i);
     }
     cache.validate();
 
     for i in 0 .. NUM_ELEMENTS {
-        assert_eq!(cache.pop_oldest(0), Some(i));
+        assert_eq!(cache.pop_oldest(), Some(i));
     }
     cache.validate();
 
-    assert_eq!(cache.pop_oldest(0), None);
+    assert_eq!(cache.pop_oldest(), None);
 }
 
 #[test]
@@ -594,12 +558,12 @@ fn test_lru_tracker_push_touch_pop() {
     struct CacheMarker;
     const NUM_ELEMENTS: usize = 50;
 
-    let mut cache: LRUCache<usize, CacheMarker> = LRUCache::new(1);
+    let mut cache: LRUCache<usize, CacheMarker> = LRUCache::new();
     let mut handles = Vec::new();
     cache.validate();
 
     for i in 0 .. NUM_ELEMENTS {
-        handles.push(cache.push_new(0, i));
+        handles.push(cache.push_new(i));
     }
     cache.validate();
 
@@ -609,15 +573,15 @@ fn test_lru_tracker_push_touch_pop() {
     cache.validate();
 
     for i in 0 .. NUM_ELEMENTS/2 {
-        assert_eq!(cache.pop_oldest(0), Some(i*2+1));
+        assert_eq!(cache.pop_oldest(), Some(i*2+1));
     }
     cache.validate();
     for i in 0 .. NUM_ELEMENTS/2 {
-        assert_eq!(cache.pop_oldest(0), Some(i*2));
+        assert_eq!(cache.pop_oldest(), Some(i*2));
     }
     cache.validate();
 
-    assert_eq!(cache.pop_oldest(0), None);
+    assert_eq!(cache.pop_oldest(), None);
 }
 
 #[test]
@@ -627,12 +591,12 @@ fn test_lru_tracker_push_get() {
     struct CacheMarker;
     const NUM_ELEMENTS: usize = 50;
 
-    let mut cache: LRUCache<usize, CacheMarker> = LRUCache::new(1);
+    let mut cache: LRUCache<usize, CacheMarker> = LRUCache::new();
     let mut handles = Vec::new();
     cache.validate();
 
     for i in 0 .. NUM_ELEMENTS {
-        handles.push(cache.push_new(0, i));
+        handles.push(cache.push_new(i));
     }
     cache.validate();
 
@@ -650,17 +614,17 @@ fn test_lru_tracker_push_replace_get() {
     struct CacheMarker;
     const NUM_ELEMENTS: usize = 50;
 
-    let mut cache: LRUCache<usize, CacheMarker> = LRUCache::new(1);
+    let mut cache: LRUCache<usize, CacheMarker> = LRUCache::new();
     let mut handles = Vec::new();
     cache.validate();
 
     for i in 0 .. NUM_ELEMENTS {
-        handles.push(cache.push_new(0, i));
+        handles.push(cache.push_new(i));
     }
     cache.validate();
 
     for i in 0 .. NUM_ELEMENTS {
-        assert_eq!(cache.replace_or_insert(&mut handles[i], 0, i * 2), Some(i));
+        assert_eq!(cache.replace_or_insert(&mut handles[i], i * 2), Some(i));
     }
     cache.validate();
 
@@ -670,6 +634,42 @@ fn test_lru_tracker_push_replace_get() {
     cache.validate();
 
     let mut empty_handle = WeakFreeListHandle::invalid();
-    assert_eq!(cache.replace_or_insert(&mut empty_handle, 0, 100), None);
+    assert_eq!(cache.replace_or_insert(&mut empty_handle, 100), None);
     assert_eq!(cache.get_opt(&empty_handle), Some(&100));
 }
+
+#[test]
+fn test_lru_tracker_manual_evict() {
+    // Push elements, set even as manual eviction, ensure:
+    // - correctly pop auto handles in correct order
+    // - correctly remove manual handles, and have expected value
+    struct CacheMarker;
+    const NUM_ELEMENTS: usize = 50;
+
+    let mut cache: LRUCache<usize, CacheMarker> = LRUCache::new();
+    let mut handles = Vec::new();
+    let mut manual_handles = Vec::new();
+    cache.validate();
+
+    for i in 0 .. NUM_ELEMENTS {
+        handles.push(cache.push_new(i));
+    }
+    cache.validate();
+
+    for i in 0 .. NUM_ELEMENTS/2 {
+        manual_handles.push(cache.set_manual_eviction(&handles[i*2]).unwrap());
+    }
+    cache.validate();
+
+    for i in 0 .. NUM_ELEMENTS/2 {
+        assert!(cache.pop_oldest() == Some(i*2 + 1));
+    }
+    cache.validate();
+
+    assert!(cache.pop_oldest().is_none());
+
+    for (i, manual_handle) in manual_handles.drain(..).enumerate() {
+        assert_eq!(*cache.get(&manual_handle), i*2);
+        assert_eq!(cache.remove_manual_handle(manual_handle), i*2);
+    }
+}
diff --git a/third_party/webrender/webrender/src/picture.rs b/third_party/webrender/webrender/src/picture.rs
index 5cc690f1798..5b71479b0a5 100644
--- a/third_party/webrender/webrender/src/picture.rs
+++ b/third_party/webrender/webrender/src/picture.rs
@@ -94,12 +94,11 @@
 //! blend the overlay tile (this is not always optimal right now, but will be
 //! improved as a follow up).
 
-use api::{MixBlendMode, PremultipliedColorF, FilterPrimitiveKind};
+use api::{MixBlendMode, PipelineId, PremultipliedColorF, FilterPrimitiveKind};
 use api::{PropertyBinding, PropertyBindingId, FilterPrimitive};
-use api::{DebugFlags, ImageKey, ColorF, ColorU, PrimitiveFlags};
-use api::{ImageRendering, ColorDepth, YuvColorSpace, YuvFormat, AlphaType};
+use api::{DebugFlags, RasterSpace, ImageKey, ColorF, ColorU, PrimitiveFlags};
+use api::{ImageRendering, ColorDepth, YuvColorSpace, YuvFormat};
 use api::units::*;
-use crate::batch::BatchFilter;
 use crate::box_shadow::BLUR_SAMPLE_SCALE;
 use crate::clip::{ClipStore, ClipChainInstance, ClipChainId, ClipInstance};
 use crate::spatial_tree::{ROOT_SPATIAL_NODE_INDEX,
@@ -108,38 +107,35 @@ use crate::spatial_tree::{ROOT_SPATIAL_NODE_INDEX,
 use crate::composite::{CompositorKind, CompositeState, NativeSurfaceId, NativeTileId};
 use crate::composite::{ExternalSurfaceDescriptor, ExternalSurfaceDependency};
 use crate::debug_colors;
-use euclid::{vec2, vec3, Point2D, Scale, Size2D, Vector2D, Vector3D, Rect, Transform3D, SideOffsets2D};
+use euclid::{vec2, vec3, Point2D, Scale, Size2D, Vector2D, Rect, Transform3D, SideOffsets2D};
 use euclid::approxeq::ApproxEq;
 use crate::filterdata::SFilterData;
+use crate::frame_builder::{FrameBuilderConfig, FrameVisibilityContext, FrameVisibilityState};
 use crate::intern::ItemUid;
 use crate::internal_types::{FastHashMap, FastHashSet, PlaneSplitter, Filter, PlaneSplitAnchor, TextureSource};
 use crate::frame_builder::{FrameBuildingContext, FrameBuildingState, PictureState, PictureContext};
 use crate::gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle};
 use crate::gpu_types::{UvRectKind, ZBufferId};
 use plane_split::{Clipper, Polygon, Splitter};
-use crate::prim_store::{PrimitiveTemplateKind, PictureIndex, PrimitiveInstance, PrimitiveInstanceKind};
-use crate::prim_store::{ColorBindingStorage, ColorBindingIndex, PrimitiveScratchBuffer};
+use crate::prim_store::{SpaceMapper, PrimitiveVisibilityMask, PrimitiveTemplateKind};
+use crate::prim_store::{SpaceSnapper, PictureIndex, PrimitiveInstance, PrimitiveInstanceKind};
+use crate::prim_store::{get_raster_rects, PrimitiveScratchBuffer};
+use crate::prim_store::{OpacityBindingStorage, ImageInstanceStorage, OpacityBindingIndex};
+use crate::prim_store::{ColorBindingStorage, ColorBindingIndex, PrimitiveVisibilityFlags};
 use crate::print_tree::{PrintTree, PrintTreePrinter};
 use crate::render_backend::{DataStores, FrameId};
 use crate::render_task_graph::RenderTaskId;
 use crate::render_target::RenderTargetKind;
-use crate::render_task::{BlurTask, RenderTask, RenderTaskLocation, BlurTaskCache};
-use crate::render_task::{StaticRenderTaskSurface, RenderTaskKind};
-use crate::renderer::BlendMode;
-use crate::resource_cache::{ResourceCache, ImageGeneration, ImageRequest};
-use crate::space::SpaceMapper;
+use crate::render_task::{RenderTask, RenderTaskLocation, BlurTaskCache, ClearMode};
+use crate::resource_cache::{ResourceCache, ImageGeneration};
 use crate::scene::SceneProperties;
 use smallvec::SmallVec;
 use std::{mem, u8, marker, u32};
 use std::sync::atomic::{AtomicUsize, Ordering};
 use std::collections::hash_map::Entry;
-use std::ops::Range;
 use crate::texture_cache::TextureCacheHandle;
-use crate::util::{MaxRect, VecHelper, MatrixHelpers, Recycler, raster_rect_to_device_pixels, ScaleOffset};
+use crate::util::{MaxRect, VecHelper, RectHelpers, MatrixHelpers};
 use crate::filterdata::{FilterDataHandle};
-use crate::tile_cache::{SliceDebugInfo, TileDebugInfo, DirtyTileDebugInfo};
-use crate::visibility::{PrimitiveVisibilityFlags, FrameVisibilityContext};
-use crate::visibility::{VisibilityState, FrameVisibilityState};
 #[cfg(any(feature = "capture", feature = "replay"))]
 use ron;
 #[cfg(feature = "capture")]
@@ -147,11 +143,7 @@ use crate::scene_builder_thread::InternerUpdates;
 #[cfg(any(feature = "capture", feature = "replay"))]
 use crate::intern::{Internable, UpdateList};
 #[cfg(any(feature = "capture", feature = "replay"))]
-use crate::clip::{ClipIntern, PolygonIntern};
-#[cfg(any(feature = "capture", feature = "replay"))]
-use crate::filterdata::FilterDataIntern;
-#[cfg(any(feature = "capture", feature = "replay"))]
-use api::PrimitiveKeyKind;
+use api::{ClipIntern, FilterDataIntern, PrimitiveKeyKind};
 #[cfg(any(feature = "capture", feature = "replay"))]
 use crate::prim_store::backdrop::Backdrop;
 #[cfg(any(feature = "capture", feature = "replay"))]
@@ -184,7 +176,7 @@ use std::collections::HashMap;
 pub const MAX_BLUR_RADIUS: f32 = 100.;
 
 /// Specify whether a surface allows subpixel AA text rendering.
-#[derive(Debug, Copy, Clone)]
+#[derive(Debug, Clone, PartialEq)]
 pub enum SubpixelMode {
     /// This surface allows subpixel AA text
     Allow,
@@ -194,6 +186,7 @@ pub enum SubpixelMode {
     /// with the excluded regions, and inside the allowed rect.
     Conditional {
         allowed_rect: PictureRect,
+        excluded_rects: Vec<PictureRect>,
     },
 }
 
@@ -261,15 +254,76 @@ impl<Src, Dst> From<CoordinateSpaceMapping<Src, Dst>> for TransformKey {
     }
 }
 
+/// Information about a picture that is pushed / popped on the
+/// PictureUpdateState during picture traversal pass.
+struct PictureInfo {
+    /// The spatial node for this picture.
+    _spatial_node_index: SpatialNodeIndex,
+}
+
+/// Picture-caching state to keep between scenes.
+pub struct PictureCacheState {
+    /// The tiles retained by this picture cache.
+    pub tiles: FastHashMap<TileOffset, Box<Tile>>,
+    /// State of the spatial nodes from previous frame
+    spatial_node_comparer: SpatialNodeComparer,
+    /// State of opacity bindings from previous frame
+    opacity_bindings: FastHashMap<PropertyBindingId, OpacityBindingInfo>,
+    /// State of color bindings from previous frame
+    color_bindings: FastHashMap<PropertyBindingId, ColorBindingInfo>,
+    /// The current transform of the picture cache root spatial node
+    root_transform: TransformKey,
+    /// The current tile size in device pixels
+    current_tile_size: DeviceIntSize,
+    /// Various allocations we want to avoid re-doing.
+    allocations: PictureCacheRecycledAllocations,
+    /// Currently allocated native compositor surface for this picture cache.
+    pub native_surface: Option<NativeSurface>,
+    /// A cache of compositor surfaces that are retained between display lists
+    pub external_native_surface_cache: FastHashMap<ExternalNativeSurfaceKey, ExternalNativeSurface>,
+    /// The retained virtual offset for this slice between display lists.
+    virtual_offset: DeviceIntPoint,
+    /// Current frame ID of this picture cache
+    frame_id: FrameId,
+}
+
+pub struct PictureCacheRecycledAllocations {
+    old_opacity_bindings: FastHashMap<PropertyBindingId, OpacityBindingInfo>,
+    old_color_bindings: FastHashMap<PropertyBindingId, ColorBindingInfo>,
+    compare_cache: FastHashMap<PrimitiveComparisonKey, PrimitiveCompareResult>,
+}
+
+/// Stores a list of cached picture tiles that are retained
+/// between new scenes.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub struct RetainedTiles {
+    /// The tiles retained between display lists.
+    #[cfg_attr(feature = "capture", serde(skip))] //TODO
+    pub caches: FastHashMap<usize, PictureCacheState>,
+}
+
+impl RetainedTiles {
+    pub fn new() -> Self {
+        RetainedTiles {
+            caches: FastHashMap::default(),
+        }
+    }
+
+    /// Merge items from one retained tiles into another.
+    pub fn merge(&mut self, other: RetainedTiles) {
+        assert!(self.caches.is_empty() || other.caches.is_empty());
+        if self.caches.is_empty() {
+            self.caches = other.caches;
+        }
+    }
+}
+
 /// Unit for tile coordinates.
 #[derive(Hash, Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)]
 pub struct TileCoordinate;
 
 // Geometry types for tile coordinates.
 pub type TileOffset = Point2D<i32, TileCoordinate>;
-// TileSize type is also used in used in lib.rs and cbindgen picks the wrong one when
-// generating headers.
-/// cbindgen:ignore
 pub type TileSize = Size2D<i32, TileCoordinate>;
 pub type TileRect = Rect<i32, TileCoordinate>;
 
@@ -287,18 +341,52 @@ pub const TILE_SIZE_DEFAULT: DeviceIntSize = DeviceIntSize {
 
 /// The size in device pixels of a tile for horizontal scroll bars
 pub const TILE_SIZE_SCROLLBAR_HORIZONTAL: DeviceIntSize = DeviceIntSize {
-    width: 1024,
-    height: 32,
+    width: 512,
+    height: 16,
     _unit: marker::PhantomData,
 };
 
 /// The size in device pixels of a tile for vertical scroll bars
 pub const TILE_SIZE_SCROLLBAR_VERTICAL: DeviceIntSize = DeviceIntSize {
-    width: 32,
-    height: 1024,
+    width: 16,
+    height: 512,
     _unit: marker::PhantomData,
 };
 
+const TILE_SIZE_FOR_TESTS: [DeviceIntSize; 6] = [
+    DeviceIntSize {
+        width: 128,
+        height: 128,
+        _unit: marker::PhantomData,
+    },
+    DeviceIntSize {
+        width: 256,
+        height: 256,
+        _unit: marker::PhantomData,
+    },
+    DeviceIntSize {
+        width: 512,
+        height: 512,
+        _unit: marker::PhantomData,
+    },
+    TILE_SIZE_DEFAULT,
+    TILE_SIZE_SCROLLBAR_VERTICAL,
+    TILE_SIZE_SCROLLBAR_HORIZONTAL,
+];
+
+// Return the list of tile sizes for the renderer to allocate texture arrays for.
+pub fn tile_cache_sizes(testing: bool) -> &'static [DeviceIntSize] {
+    if testing {
+        &TILE_SIZE_FOR_TESTS
+    } else {
+        &[
+            TILE_SIZE_DEFAULT,
+            TILE_SIZE_SCROLLBAR_HORIZONTAL,
+            TILE_SIZE_SCROLLBAR_VERTICAL,
+        ]
+    }
+}
+
 /// The maximum size per axis of a surface,
 ///  in WorldPixel coordinates.
 const MAX_SURFACE_SIZE: f32 = 4096.0;
@@ -487,7 +575,6 @@ struct TilePreUpdateContext {
     /// The fractional position of the picture cache, which may
     /// require invalidation of all tiles.
     fract_offset: PictureVector2D,
-    device_fract_offset: DeviceVector2D,
 
     /// The optional background color of the picture cache instance
     background_color: Option<ColorF>,
@@ -514,7 +601,7 @@ struct TilePostUpdateContext<'a> {
     local_clip_rect: PictureRect,
 
     /// The calculated backdrop information for this cache instance.
-    backdrop: Option<BackdropInfo>,
+    backdrop: BackdropInfo,
 
     /// Information about opacity bindings from the picture cache.
     opacity_bindings: &'a FastHashMap<PropertyBindingId, OpacityBindingInfo>,
@@ -528,12 +615,18 @@ struct TilePostUpdateContext<'a> {
     /// The local rect of the overall picture cache
     local_rect: PictureRect,
 
-    /// Pre-allocated z-id to assign to tiles during post_update.
-    z_id: ZBufferId,
+    /// A list of the external surfaces that are present on this slice
+    external_surfaces: &'a [ExternalSurfaceDescriptor],
+
+    /// Pre-allocated z-id to assign to opaque tiles during post_update. We
+    /// use a different z-id for opaque/alpha tiles, so that compositor
+    /// surfaces (such as videos) can have a z-id between these values,
+    /// which allows compositor surfaces to occlude opaque tiles, but not
+    /// alpha tiles.
+    z_id_opaque: ZBufferId,
 
-    /// If true, the scale factor of the root transform for this picture
-    /// cache changed, so we need to invalidate the tile and re-render.
-    invalidate_all: bool,
+    /// Pre-allocated z-id to assign to alpha tiles during post_update
+    z_id_alpha: ZBufferId,
 }
 
 // Mutable state passed to picture cache tiles during post_update
@@ -573,6 +666,9 @@ struct PrimitiveDependencyInfo {
 
     /// Spatial nodes references by the clip dependencies of this primitive.
     spatial_nodes: SmallVec<[SpatialNodeIndex; 4]>,
+
+    /// If true, this primitive has been promoted to be a compositor surface.
+    is_compositor_surface: bool,
 }
 
 impl PrimitiveDependencyInfo {
@@ -589,6 +685,7 @@ impl PrimitiveDependencyInfo {
             prim_clip_box,
             clips: SmallVec::new(),
             spatial_nodes: SmallVec::new(),
+            is_compositor_surface: false,
         }
     }
 }
@@ -620,13 +717,15 @@ pub enum SurfaceTextureDescriptor {
 /// This is the same as a `SurfaceTextureDescriptor` but has been resolved
 /// into a texture cache handle (if appropriate) that can be used by the
 /// batching and compositing code in the renderer.
-#[derive(Clone, Debug, Eq, PartialEq, Hash)]
+#[derive(Clone, Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub enum ResolvedSurfaceTexture {
     TextureCache {
         /// The texture ID to draw to.
         texture: TextureSource,
+        /// Slice index in the texture array to draw to.
+        layer: i32,
     },
     Native {
         /// The arbitrary id of this tile.
@@ -649,6 +748,7 @@ impl SurfaceTextureDescriptor {
 
                 ResolvedSurfaceTexture::TextureCache {
                     texture: cache_item.texture_id,
+                    layer: cache_item.texture_layer,
                 }
             }
             SurfaceTextureDescriptor::Native { id } => {
@@ -667,6 +767,8 @@ pub enum TileSurface {
     Texture {
         /// Descriptor for the surface that this tile draws into.
         descriptor: SurfaceTextureDescriptor,
+        /// Bitfield specifying the dirty region(s) that are relevant to this tile.
+        visibility_mask: PrimitiveVisibilityMask,
     },
     Color {
         color: ColorF,
@@ -776,8 +878,8 @@ pub enum PrimitiveCompareResultDetail {
 pub enum InvalidationReason {
     /// The fractional offset changed
     FractionalOffset {
-        old: DeviceVector2D,
-        new: DeviceVector2D,
+        old: PictureVector2D,
+        new: PictureVector2D,
     },
     /// The background color changed
     BackgroundColor {
@@ -807,8 +909,6 @@ pub enum InvalidationReason {
     CompositorKindChanged,
     // The valid region of the tile changed
     ValidRectChanged,
-    // The overall scale of the picture cache changed
-    ScaleChanged,
 }
 
 /// A minimal subset of Tile for debug capturing
@@ -817,7 +917,7 @@ pub enum InvalidationReason {
 pub struct TileSerializer {
     pub rect: PictureRect,
     pub current_descriptor: TileDescriptor,
-    pub device_fract_offset: DeviceVector2D,
+    pub fract_offset: PictureVector2D,
     pub id: TileId,
     pub root: TileNode,
     pub background_color: Option<ColorF>,
@@ -869,7 +969,7 @@ pub struct Tile {
     /// The current fractional offset of the cache transform root. If this changes,
     /// all tiles need to be invalidated and redrawn, since snapping differences are
     /// likely to occur.
-    device_fract_offset: DeviceVector2D,
+    fract_offset: PictureVector2D,
     /// The tile id is stable between display lists and / or frames,
     /// if the tile is retained. Useful for debugging tile evictions.
     pub id: TileId,
@@ -882,9 +982,15 @@ pub struct Tile {
     background_color: Option<ColorF>,
     /// The first reason the tile was invalidated this frame.
     invalidation_reason: Option<InvalidationReason>,
-    /// The local space valid rect for all primitives that affect this tile.
-    local_valid_rect: PictureBox2D,
-    /// z-buffer id for this tile
+    /// If true, this tile has one or more compositor surfaces affecting it.
+    pub has_compositor_surface: bool,
+    /// The local space valid rect for any primitives found prior to the first compositor
+    /// surface that affects this tile.
+    bg_local_valid_rect: PictureBox2D,
+    /// The local space valid rect for any primitives found after the first compositor
+    /// surface that affects this tile.
+    fg_local_valid_rect: PictureBox2D,
+    /// z-buffer id for this tile, which is one of z_id_opaque or z_id_alpha, depending on tile opacity
     pub z_id: ZBufferId,
     /// The last frame this tile had its dependencies updated (dependency updating is
     /// skipped if a tile is off-screen).
@@ -909,13 +1015,15 @@ impl Tile {
             prev_descriptor: TileDescriptor::new(),
             is_valid: false,
             is_visible: false,
-            device_fract_offset: DeviceVector2D::zero(),
+            fract_offset: PictureVector2D::zero(),
             id,
             is_opaque: false,
             root: TileNode::new_leaf(Vec::new()),
             background_color: None,
             invalidation_reason: None,
-            local_valid_rect: PictureBox2D::zero(),
+            has_compositor_surface: false,
+            bg_local_valid_rect: PictureBox2D::zero(),
+            fg_local_valid_rect: PictureBox2D::zero(),
             z_id: ZBufferId::invalid(),
             last_updated_frame_id: FrameId::INVALID,
         }
@@ -925,7 +1033,7 @@ impl Tile {
     fn print(&self, pt: &mut dyn PrintTreePrinter) {
         pt.new_level(format!("Tile {:?}", self.id));
         pt.add_item(format!("local_tile_rect: {:?}", self.local_tile_rect));
-        pt.add_item(format!("device_fract_offset: {:?}", self.device_fract_offset));
+        pt.add_item(format!("fract_offset: {:?}", self.fract_offset));
         pt.add_item(format!("background_color: {:?}", self.background_color));
         pt.add_item(format!("invalidation_reason: {:?}", self.invalidation_reason));
         self.current_descriptor.print(pt);
@@ -989,9 +1097,6 @@ impl Tile {
                 invalidation_reason.expect("bug: no invalidation_reason"),
             );
         }
-        if ctx.invalidate_all {
-            self.invalidate(None, InvalidationReason::ScaleChanged);
-        }
         // TODO(gw): We can avoid invalidating the whole tile in some cases here,
         //           but it should be a fairly rare invalidation case.
         if self.current_descriptor.local_valid_rect != self.prev_descriptor.local_valid_rect {
@@ -1043,8 +1148,10 @@ impl Tile {
             self.local_tile_rect.origin,
             self.local_tile_rect.bottom_right(),
         );
-        self.local_valid_rect = PictureBox2D::zero();
+        self.bg_local_valid_rect = PictureBox2D::zero();
+        self.fg_local_valid_rect = PictureBox2D::zero();
         self.invalidation_reason  = None;
+        self.has_compositor_surface = false;
 
         self.world_tile_rect = ctx.pic_to_world_mapper
             .map(&self.local_tile_rect)
@@ -1060,16 +1167,15 @@ impl Tile {
             return;
         }
 
-        // We may need to rerender if glyph subpixel positions have changed. Note
-        // that we update the tile fract offset itself after we have completed
-        // invalidation. This allows for other whole tile invalidation cases to
-        // update the fract offset appropriately.
-        let fract_delta = self.device_fract_offset - ctx.device_fract_offset;
-        let fract_changed = fract_delta.x.abs() > 0.01 || fract_delta.y.abs() > 0.01;
+        // Determine if the fractional offset of the transform is different this frame
+        // from the currently cached tile set.
+        let fract_changed = (self.fract_offset.x - ctx.fract_offset.x).abs() > 0.01 ||
+                            (self.fract_offset.y - ctx.fract_offset.y).abs() > 0.01;
         if fract_changed {
             self.invalidate(None, InvalidationReason::FractionalOffset {
-                                    old: self.device_fract_offset,
-                                    new: ctx.device_fract_offset });
+                                    old: self.fract_offset,
+                                    new: ctx.fract_offset });
+            self.fract_offset = ctx.fract_offset;
         }
 
         if ctx.background_color != self.background_color {
@@ -1104,10 +1210,26 @@ impl Tile {
             return;
         }
 
-        // Incorporate the bounding rect of the primitive in the local valid rect
-        // for this tile. This is used to minimize the size of the scissor rect
-        // during rasterization and the draw rect during composition of partial tiles.
-        self.local_valid_rect = self.local_valid_rect.union(&info.prim_clip_box);
+        // If this primitive is a compositor surface, any tile it affects must be
+        // drawn as an overlay tile.
+        if info.is_compositor_surface {
+            self.has_compositor_surface = true;
+        } else {
+            // Incorporate the bounding rect of the primitive in the local valid rect
+            // for this tile. This is used to minimize the size of the scissor rect
+            // during rasterization and the draw rect during composition of partial tiles.
+
+            // Once we have encountered 1+ compositor surfaces affecting this tile, include
+            // this bounding rect in the foreground. Otherwise, include in the background rect.
+            // This allows us to determine if we found any primitives that are on top of the
+            // compositor surface(s) for this tile. If so, we need to draw the tile with alpha
+            // blending as an overlay.
+            if self.has_compositor_surface {
+                self.fg_local_valid_rect = self.fg_local_valid_rect.union(&info.prim_clip_box);
+            } else {
+                self.bg_local_valid_rect = self.bg_local_valid_rect.union(&info.prim_clip_box);
+            }
+        }
 
         // Include any image keys this tile depends on.
         self.current_descriptor.images.extend_from_slice(&info.images);
@@ -1206,8 +1328,12 @@ impl Tile {
             return false;
         }
 
-        // Calculate the overall valid rect for this tile.
-        self.current_descriptor.local_valid_rect = self.local_valid_rect.to_rect();
+        // Calculate the overall valid rect for this tile, including both the foreground
+        // and background local valid rects.
+        self.current_descriptor.local_valid_rect =
+            self.bg_local_valid_rect
+                .union(&self.fg_local_valid_rect)
+                .to_rect();
 
         // TODO(gw): In theory, the local tile rect should always have an
         //           intersection with the overall picture rect. In practice,
@@ -1224,22 +1350,6 @@ impl Tile {
             .and_then(|r| r.intersection(&self.current_descriptor.local_valid_rect))
             .unwrap_or_else(PictureRect::zero);
 
-        // The device_valid_rect is referenced during `update_content_validity` so it
-        // must be updated here first.
-        let world_valid_rect = ctx.pic_to_world_mapper
-            .map(&self.current_descriptor.local_valid_rect)
-            .expect("bug: map local valid rect");
-
-        // The device rect is guaranteed to be aligned on a device pixel - the round
-        // is just to deal with float accuracy. However, the valid rect is not
-        // always aligned to a device pixel. To handle this, round out to get all
-        // required pixels, and intersect with the tile device rect.
-        let device_rect = (self.world_tile_rect * ctx.global_device_pixel_scale).round();
-        self.device_valid_rect = (world_valid_rect * ctx.global_device_pixel_scale)
-            .round_out()
-            .intersection(&device_rect)
-            .unwrap_or_else(DeviceRect::zero);
-
         // Invalidate the tile based on the content changing.
         self.update_content_validity(ctx, state, frame_context);
 
@@ -1259,19 +1369,51 @@ impl Tile {
             return false;
         }
 
+        let world_valid_rect = ctx.pic_to_world_mapper
+            .map(&self.current_descriptor.local_valid_rect)
+            .expect("bug: map local valid rect");
+
+        // The device rect is guaranteed to be aligned on a device pixel - the round
+        // is just to deal with float accuracy. However, the valid rect is not
+        // always aligned to a device pixel. To handle this, round out to get all
+        // required pixels, and intersect with the tile device rect.
+        let device_rect = (self.world_tile_rect * ctx.global_device_pixel_scale).round();
+        self.device_valid_rect = (world_valid_rect * ctx.global_device_pixel_scale)
+            .round_out()
+            .intersection(&device_rect)
+            .unwrap_or_else(DeviceRect::zero);
+
         // Check if this tile can be considered opaque. Opacity state must be updated only
         // after all early out checks have been performed. Otherwise, we might miss updating
         // the native surface next time this tile becomes visible.
         let clipped_rect = self.current_descriptor.local_valid_rect
             .intersection(&ctx.local_clip_rect)
             .unwrap_or_else(PictureRect::zero);
+        let mut is_opaque = ctx.backdrop.opaque_rect.contains_rect(&clipped_rect);
+
+        if self.has_compositor_surface {
+            // If we found primitive(s) that are ordered _after_ the first compositor
+            // surface, _and_ intersect with any compositor surface, then we will need
+            // to draw this tile with alpha blending, as an overlay to the compositor surface.
+            let fg_world_valid_rect = ctx.pic_to_world_mapper
+                .map(&self.fg_local_valid_rect.to_rect())
+                .expect("bug: map fg local valid rect");
+            let fg_device_valid_rect = fg_world_valid_rect * ctx.global_device_pixel_scale;
+
+            for surface in ctx.external_surfaces {
+                if surface.device_rect.intersects(&fg_device_valid_rect) {
+                    is_opaque = false;
+                    break;
+                }
+            }
+        }
 
-        let has_opaque_bg_color = self.background_color.map_or(false, |c| c.a >= 1.0);
-        let has_opaque_backdrop = ctx.backdrop.map_or(false, |b| b.opaque_rect.contains_rect(&clipped_rect));
-        let is_opaque = has_opaque_bg_color || has_opaque_backdrop;
-
-        // Set the correct z_id for this tile
-        self.z_id = ctx.z_id;
+        // Set the correct z_id for this tile based on opacity
+        if is_opaque {
+            self.z_id = ctx.z_id_opaque;
+        } else {
+            self.z_id = ctx.z_id_alpha;
+        }
 
         if is_opaque != self.is_opaque {
             // If opacity changed, the native compositor surface and all tiles get invalidated.
@@ -1293,12 +1435,11 @@ impl Tile {
         }
 
         // Check if the selected composite mode supports dirty rect updates. For Draw composite
-        // mode, we can always update the content with smaller dirty rects, unless there is a
-        // driver bug to workaround. For native composite mode, we can only use dirty rects if
-        // the compositor supports partial surface updates.
+        // mode, we can always update the content with smaller dirty rects. For native composite
+        // mode, we can only use dirty rects if the compositor supports partial surface updates.
         let (supports_dirty_rects, supports_simple_prims) = match state.composite_state.compositor_kind {
             CompositorKind::Draw { .. } => {
-                (frame_context.config.gpu_supports_render_target_partial_update, true)
+                (true, true)
             }
             CompositorKind::Native { max_update_rects, .. } => {
                 (max_update_rects > 0, false)
@@ -1309,7 +1450,7 @@ impl Tile {
         //           native compositors that don't support dirty rects.
         if supports_dirty_rects {
             // Only allow splitting for normal content sized tiles
-            if ctx.current_tile_size == state.resource_cache.texture_cache.default_picture_tile_size() {
+            if ctx.current_tile_size == TILE_SIZE_DEFAULT {
                 let max_split_level = 3;
 
                 // Consider splitting / merging dirty regions
@@ -1334,7 +1475,7 @@ impl Tile {
         //           color tiles. We can definitely support this in DC, so this
         //           should be added as a follow up.
         let is_simple_prim =
-            ctx.backdrop.map_or(false, |b| b.kind.is_some()) &&
+            ctx.backdrop.kind.is_some() &&
             self.current_descriptor.prims.len() == 1 &&
             self.is_opaque &&
             supports_simple_prims;
@@ -1344,7 +1485,7 @@ impl Tile {
             // If we determine the tile can be represented by a color, set the
             // surface unconditionally (this will drop any previously used
             // texture cache backing surface).
-            match ctx.backdrop.unwrap().kind {
+            match ctx.backdrop.kind {
                 Some(BackdropKind::Color { color }) => {
                     TileSurface::Color {
                         color,
@@ -1364,10 +1505,11 @@ impl Tile {
             // the tile was previously a color, or not set, then just set
             // up a new texture cache handle.
             match self.surface.take() {
-                Some(TileSurface::Texture { descriptor }) => {
+                Some(TileSurface::Texture { descriptor, visibility_mask }) => {
                     // Reuse the existing descriptor and vis mask
                     TileSurface::Texture {
                         descriptor,
+                        visibility_mask,
                     }
                 }
                 Some(TileSurface::Color { .. }) | Some(TileSurface::Clear) | None => {
@@ -1395,6 +1537,7 @@ impl Tile {
 
                     TileSurface::Texture {
                         descriptor,
+                        visibility_mask: PrimitiveVisibilityMask::empty(),
                     }
                 }
             }
@@ -1665,106 +1808,122 @@ impl TileDescriptor {
     }
 }
 
+/// Stores both the world and devices rects for a single dirty rect.
+#[derive(Debug, Clone)]
+pub struct DirtyRegionRect {
+    /// World rect of this dirty region
+    pub world_rect: WorldRect,
+    /// Bitfield for picture render tasks that draw this dirty region.
+    pub visibility_mask: PrimitiveVisibilityMask,
+}
+
 /// Represents the dirty region of a tile cache picture.
-#[derive(Clone)]
+#[derive(Debug, Clone)]
 pub struct DirtyRegion {
-    /// The individual filters that make up this region.
-    pub filters: Vec<BatchFilter>,
+    /// The individual dirty rects of this region.
+    pub dirty_rects: Vec<DirtyRegionRect>,
 
     /// The overall dirty rect, a combination of dirty_rects
     pub combined: WorldRect,
-
-    /// Spatial node of the picture cache this region represents
-    spatial_node_index: SpatialNodeIndex,
 }
 
 impl DirtyRegion {
     /// Construct a new dirty region tracker.
     pub fn new(
-        spatial_node_index: SpatialNodeIndex,
     ) -> Self {
         DirtyRegion {
-            filters: Vec::with_capacity(16),
+            dirty_rects: Vec::with_capacity(PrimitiveVisibilityMask::MAX_DIRTY_REGIONS),
             combined: WorldRect::zero(),
-            spatial_node_index,
         }
     }
 
     /// Reset the dirty regions back to empty
-    pub fn reset(
-        &mut self,
-        spatial_node_index: SpatialNodeIndex,
-    ) {
-        self.filters.clear();
+    pub fn clear(&mut self) {
+        self.dirty_rects.clear();
         self.combined = WorldRect::zero();
-        self.spatial_node_index = spatial_node_index;
     }
 
-    /// Add a dirty region to the tracker. Returns the visibility mask that corresponds to
-    /// this region in the tracker.
-    pub fn add_dirty_region(
+    /// Push a dirty rect into this region
+    pub fn push(
         &mut self,
-        rect_in_pic_space: PictureRect,
-        sub_slice_index: SubSliceIndex,
-        spatial_tree: &SpatialTree,
+        rect: WorldRect,
+        visibility_mask: PrimitiveVisibilityMask,
     ) {
-        let map_pic_to_world = SpaceMapper::new_with_target(
-            ROOT_SPATIAL_NODE_INDEX,
-            self.spatial_node_index,
-            WorldRect::max_rect(),
-            spatial_tree,
-        );
-
-        let world_rect = map_pic_to_world
-            .map(&rect_in_pic_space)
-            .expect("bug");
-
         // Include this in the overall dirty rect
-        self.combined = self.combined.union(&world_rect);
+        self.combined = self.combined.union(&rect);
 
-        self.filters.push(BatchFilter {
-            rect_in_pic_space,
-            sub_slice_index,
+        // Store the individual dirty rect.
+        self.dirty_rects.push(DirtyRegionRect {
+            world_rect: rect,
+            visibility_mask,
         });
     }
 
+    /// Include another rect into an existing dirty region.
+    pub fn include_rect(
+        &mut self,
+        region_index: usize,
+        rect: WorldRect,
+    ) {
+        self.combined = self.combined.union(&rect);
+
+        let region = &mut self.dirty_rects[region_index];
+        region.world_rect = region.world_rect.union(&rect);
+    }
+
     // TODO(gw): This returns a heap allocated object. Perhaps we can simplify this
     //           logic? Although - it's only used very rarely so it may not be an issue.
     pub fn inflate(
         &self,
         inflate_amount: f32,
-        spatial_tree: &SpatialTree,
     ) -> DirtyRegion {
-        let map_pic_to_world = SpaceMapper::new_with_target(
-            ROOT_SPATIAL_NODE_INDEX,
-            self.spatial_node_index,
-            WorldRect::max_rect(),
-            spatial_tree,
-        );
-
-        let mut filters = Vec::with_capacity(self.filters.len());
+        let mut dirty_rects = Vec::with_capacity(self.dirty_rects.len());
         let mut combined = WorldRect::zero();
 
-        for filter in &self.filters {
-            let rect_in_pic_space = filter.rect_in_pic_space.inflate(inflate_amount, inflate_amount);
-
-            let world_rect = map_pic_to_world
-                .map(&rect_in_pic_space)
-                .expect("bug");
-
+        for rect in &self.dirty_rects {
+            let world_rect = rect.world_rect.inflate(inflate_amount, inflate_amount);
             combined = combined.union(&world_rect);
-            filters.push(BatchFilter {
-                rect_in_pic_space,
-                sub_slice_index: filter.sub_slice_index,
+            dirty_rects.push(DirtyRegionRect {
+                world_rect,
+                visibility_mask: rect.visibility_mask,
             });
         }
 
         DirtyRegion {
-            filters,
+            dirty_rects,
             combined,
-            spatial_node_index: self.spatial_node_index,
         }
     }
+
+    /// Creates a record of this dirty region for exporting to test infrastructure.
+    pub fn record(&self) -> RecordedDirtyRegion {
+        let mut rects: Vec<WorldRect> =
+            self.dirty_rects.iter().map(|r| r.world_rect).collect();
+        rects.sort_unstable_by_key(|r| (r.origin.y as usize, r.origin.x as usize));
+        RecordedDirtyRegion { rects }
+    }
+}
+
+/// A recorded copy of the dirty region for exporting to test infrastructure.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub struct RecordedDirtyRegion {
+    pub rects: Vec<WorldRect>,
+}
+
+impl ::std::fmt::Display for RecordedDirtyRegion {
+    fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result {
+        for r in self.rects.iter() {
+            let (x, y, w, h) = (r.origin.x, r.origin.y, r.size.width, r.size.height);
+            write!(f, "[({},{}):{}x{}]", x, y, w, h)?;
+        }
+        Ok(())
+    }
+}
+
+impl ::std::fmt::Debug for RecordedDirtyRegion {
+    fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result {
+        ::std::fmt::Display::fmt(self, f)
+    }
 }
 
 #[derive(Debug, Copy, Clone)]
@@ -1911,7 +2070,7 @@ macro_rules! declare_tile_cache_logger_updatelists {
 }
 
 #[cfg(any(feature = "capture", feature = "replay"))]
-crate::enumerate_interners!(declare_tile_cache_logger_updatelists);
+enumerate_interners!(declare_tile_cache_logger_updatelists);
 
 #[cfg(not(any(feature = "capture", feature = "replay")))]
 pub struct TileCacheLoggerUpdateLists {
@@ -2087,9 +2246,6 @@ pub struct ExternalNativeSurfaceKey {
     pub image_keys: [ImageKey; 3],
     /// The current device size of the surface.
     pub size: DeviceIntSize,
-    /// True if this is an 'external' compositor surface created via
-    /// Compositor::create_external_surface.
-    pub is_external_surface: bool,
 }
 
 /// Information about a native compositor surface cached between frames.
@@ -2105,129 +2261,6 @@ pub struct ExternalNativeSurface {
     pub image_dependencies: [ImageDependency; 3],
 }
 
-/// The key that identifies a tile cache instance. For now, it's simple the index of
-/// the slice as it was created during scene building.
-#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct SliceId(usize);
-
-impl SliceId {
-    pub fn new(index: usize) -> Self {
-        SliceId(index)
-    }
-}
-
-/// Information that is required to reuse or create a new tile cache. Created
-/// during scene building and passed to the render backend / frame builder.
-pub struct TileCacheParams {
-    // Index of the slice (also effectively the key of the tile cache, though we use SliceId where that matters)
-    pub slice: usize,
-    // Flags describing content of this cache (e.g. scrollbars)
-    pub slice_flags: SliceFlags,
-    // The anchoring spatial node / scroll root
-    pub spatial_node_index: SpatialNodeIndex,
-    // Optional background color of this tilecache. If present, can be used as an optimization
-    // to enable opaque blending and/or subpixel AA in more places.
-    pub background_color: Option<ColorF>,
-    // List of clips shared by all prims that are promoted to this tile cache
-    pub shared_clips: Vec<ClipInstance>,
-    // The clip chain handle representing `shared_clips`
-    pub shared_clip_chain: ClipChainId,
-    // Virtual surface sizes are always square, so this represents both the width and height
-    pub virtual_surface_size: i32,
-    // The number of compositor surfaces that are being requested for this tile cache.
-    // This is only a suggestion - the tile cache will clamp this as a reasonable number
-    // and only promote a limited number of surfaces.
-    pub compositor_surface_count: usize,
-}
-
-/// Defines which sub-slice (effectively a z-index) a primitive exists on within
-/// a picture cache instance.
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-#[derive(Debug, Copy, Clone, PartialEq)]
-pub struct SubSliceIndex(u8);
-
-impl SubSliceIndex {
-    pub const DEFAULT: SubSliceIndex = SubSliceIndex(0);
-
-    pub fn new(index: usize) -> Self {
-        SubSliceIndex(index as u8)
-    }
-
-    /// Return true if this sub-slice is the primary sub-slice (for now, we assume
-    /// that only the primary sub-slice may be opaque and support subpixel AA, for example).
-    pub fn is_primary(&self) -> bool {
-        self.0 == 0
-    }
-}
-
-/// Wrapper struct around an external surface descriptor with a little more information
-/// that the picture caching code needs.
-pub struct CompositorSurface {
-    // External surface descriptor used by compositing logic
-    pub descriptor: ExternalSurfaceDescriptor,
-    // The compositor surface rect + any intersecting prims. Later prims that intersect
-    // with this must be added to the next sub-slice.
-    prohibited_rect: PictureRect,
-    // If the compositor surface content is opaque.
-    pub is_opaque: bool,
-}
-
-/// A SubSlice represents a potentially overlapping set of tiles within a picture cache. Most
-/// picture cache instances will have only a single sub-slice. The exception to this is when
-/// a picture cache has compositor surfaces, in which case sub slices are used to interleave
-/// content under or order the compositor surface(s).
-pub struct SubSlice {
-    /// Hash of tiles present in this picture.
-    pub tiles: FastHashMap<TileOffset, Box<Tile>>,
-    /// The allocated compositor surfaces for this picture cache. May be None if
-    /// not using native compositor, or if the surface was destroyed and needs
-    /// to be reallocated next time this surface contains valid tiles.
-    pub native_surface: Option<NativeSurface>,
-    /// List of compositor surfaces that have been promoted from primitives
-    /// in this tile cache.
-    pub compositor_surfaces: Vec<CompositorSurface>,
-}
-
-impl SubSlice {
-    /// Construct a new sub-slice
-    fn new() -> Self {
-        SubSlice {
-            tiles: FastHashMap::default(),
-            native_surface: None,
-            compositor_surfaces: Vec::new(),
-        }
-    }
-
-    /// Reset the list of compositor surfaces that follow this sub-slice.
-    /// Built per-frame, since APZ may change whether an image is suitable to be a compositor surface.
-    fn reset(&mut self) {
-        self.compositor_surfaces.clear();
-    }
-
-    /// Resize the tile grid to match a new tile bounds
-    fn resize(&mut self, new_tile_rect: TileRect) -> FastHashMap<TileOffset, Box<Tile>> {
-        let mut old_tiles = mem::replace(&mut self.tiles, FastHashMap::default());
-        self.tiles.reserve(new_tile_rect.size.area() as usize);
-
-        for y in new_tile_rect.origin.y .. new_tile_rect.origin.y + new_tile_rect.size.height {
-            for x in new_tile_rect.origin.x .. new_tile_rect.origin.x + new_tile_rect.size.width {
-                let key = TileOffset::new(x, y);
-                let tile = old_tiles
-                    .remove(&key)
-                    .unwrap_or_else(|| {
-                        Box::new(Tile::new(key))
-                    });
-                self.tiles.insert(key, tile);
-            }
-        }
-
-        old_tiles
-    }
-}
-
 /// Represents a cache of tiles that make up a picture primitives.
 pub struct TileCacheInstance {
     /// Index of the tile cache / slice for this frame builder. It's determined
@@ -2241,10 +2274,14 @@ pub struct TileCacheInstance {
     pub slice_flags: SliceFlags,
     /// The currently selected tile size to use for this cache
     pub current_tile_size: DeviceIntSize,
-    /// The list of sub-slices in this tile cache
-    pub sub_slices: Vec<SubSlice>,
     /// The positioning node for this tile cache.
     pub spatial_node_index: SpatialNodeIndex,
+    /// Hash of tiles present in this picture.
+    pub tiles: FastHashMap<TileOffset, Box<Tile>>,
+    /// A helper struct to map local rects into surface coords.
+    map_local_to_surface: SpaceMapper<LayoutPixel, PicturePixel>,
+    /// A helper struct to map child picture rects into picture cache surface coords.
+    map_child_pic_to_surface: SpaceMapper<PicturePixel, PicturePixel>,
     /// List of opacity bindings, with some extra information
     /// about whether they changed since last frame.
     opacity_bindings: FastHashMap<PropertyBindingId, OpacityBindingInfo>,
@@ -2270,7 +2307,7 @@ pub struct TileCacheInstance {
     /// Local rect (unclipped) of the picture this cache covers.
     pub local_rect: PictureRect,
     /// The local clip rect, from the shared clips of this picture.
-    pub local_clip_rect: PictureRect,
+    local_clip_rect: PictureRect,
     /// The surface index that this tile cache will be drawn into.
     surface_index: SurfaceIndex,
     /// The background color from the renderer. If this is set opaque, we know it's
@@ -2290,7 +2327,7 @@ pub struct TileCacheInstance {
     /// clip rect for this tile cache.
     shared_clip_chain: ClipChainId,
     /// The current transform of the picture cache root spatial node
-    root_transform: ScaleOffset,
+    root_transform: TransformKey,
     /// The number of frames until this cache next evaluates what tile size to use.
     /// If a picture rect size is regularly changing just around a size threshold,
     /// we don't want to constantly invalidate and reallocate different tile size
@@ -2298,8 +2335,6 @@ pub struct TileCacheInstance {
     frames_until_size_eval: usize,
     /// The current fractional offset of the cached picture
     fract_offset: PictureVector2D,
-    /// The current device fractional offset of the cached picture
-    device_fract_offset: DeviceVector2D,
     /// For DirectComposition, virtual surfaces don't support negative coordinates. However,
     /// picture cache tile coordinates can be negative. To handle this, we apply an offset
     /// to each tile in DirectComposition. We want to change this as little as possible,
@@ -2310,12 +2345,21 @@ pub struct TileCacheInstance {
     /// keep around the hash map used as compare_cache to avoid reallocating it each
     /// frame.
     compare_cache: FastHashMap<PrimitiveComparisonKey, PrimitiveCompareResult>,
+    /// The allocated compositor surfaces for this picture cache. May be None if
+    /// not using native compositor, or if the surface was destroyed and needs
+    /// to be reallocated next time this surface contains valid tiles.
+    pub native_surface: Option<NativeSurface>,
     /// The current device position of this cache. Used to set the compositor
     /// offset of the surface when building the visual tree.
     pub device_position: DevicePoint,
     /// The currently considered tile size override. Used to check if we should
     /// re-evaluate tile size, even if the frame timer hasn't expired.
     tile_size_override: Option<DeviceIntSize>,
+    /// List of external surfaces that have been promoted from primitives
+    /// in this tile cache.
+    pub external_surfaces: Vec<ExternalSurfaceDescriptor>,
+    /// z-buffer ID assigned to opaque tiles in this slice
+    pub z_id_opaque: ZBufferId,
     /// A cache of compositor surfaces that are retained between frames
     pub external_native_surface_cache: FastHashMap<ExternalNativeSurfaceKey, ExternalNativeSurface>,
     /// Current frame ID of this tile cache instance. Used for book-keeping / garbage collecting
@@ -2330,27 +2374,36 @@ enum SurfacePromotionResult {
 }
 
 impl TileCacheInstance {
-    pub fn new(params: TileCacheParams) -> Self {
-        // Determine how many sub-slices we need. Clamp to an arbitrary limit to ensure
-        // we don't create a huge number of OS compositor tiles and sub-slices.
-        let sub_slice_count = params.compositor_surface_count.min(MAX_COMPOSITOR_SURFACES) + 1;
-
-        let mut sub_slices = Vec::with_capacity(sub_slice_count);
-        for _ in 0 .. sub_slice_count {
-            sub_slices.push(SubSlice::new());
-        }
+    pub fn new(
+        slice: usize,
+        slice_flags: SliceFlags,
+        spatial_node_index: SpatialNodeIndex,
+        background_color: Option<ColorF>,
+        shared_clips: Vec<ClipInstance>,
+        shared_clip_chain: ClipChainId,
+        fb_config: &FrameBuilderConfig,
+    ) -> Self {
+        let virtual_surface_size = fb_config.compositor_kind.get_virtual_surface_size();
 
         TileCacheInstance {
-            slice: params.slice,
-            slice_flags: params.slice_flags,
-            spatial_node_index: params.spatial_node_index,
-            sub_slices,
+            slice,
+            slice_flags,
+            spatial_node_index,
+            tiles: FastHashMap::default(),
+            map_local_to_surface: SpaceMapper::new(
+                ROOT_SPATIAL_NODE_INDEX,
+                PictureRect::zero(),
+            ),
+            map_child_pic_to_surface: SpaceMapper::new(
+                ROOT_SPATIAL_NODE_INDEX,
+                PictureRect::zero(),
+            ),
             opacity_bindings: FastHashMap::default(),
             old_opacity_bindings: FastHashMap::default(),
             spatial_node_comparer: SpatialNodeComparer::new(),
             color_bindings: FastHashMap::default(),
             old_color_bindings: FastHashMap::default(),
-            dirty_region: DirtyRegion::new(params.spatial_node_index),
+            dirty_region: DirtyRegion::new(),
             tile_size: PictureSize::zero(),
             tile_rect: TileRect::zero(),
             tile_bounds_p0: TileOffset::zero(),
@@ -2358,105 +2411,38 @@ impl TileCacheInstance {
             local_rect: PictureRect::zero(),
             local_clip_rect: PictureRect::zero(),
             surface_index: SurfaceIndex(0),
-            background_color: params.background_color,
+            background_color,
             backdrop: BackdropInfo::empty(),
             subpixel_mode: SubpixelMode::Allow,
-            root_transform: ScaleOffset::identity(),
-            shared_clips: params.shared_clips,
-            shared_clip_chain: params.shared_clip_chain,
+            root_transform: TransformKey::Local,
+            shared_clips,
+            shared_clip_chain,
             current_tile_size: DeviceIntSize::zero(),
             frames_until_size_eval: 0,
             fract_offset: PictureVector2D::zero(),
-            device_fract_offset: DeviceVector2D::zero(),
             // Default to centering the virtual offset in the middle of the DC virtual surface
             virtual_offset: DeviceIntPoint::new(
-                params.virtual_surface_size / 2,
-                params.virtual_surface_size / 2,
+                virtual_surface_size / 2,
+                virtual_surface_size / 2,
             ),
             compare_cache: FastHashMap::default(),
+            native_surface: None,
             device_position: DevicePoint::zero(),
             tile_size_override: None,
+            external_surfaces: Vec::new(),
+            z_id_opaque: ZBufferId::invalid(),
             external_native_surface_cache: FastHashMap::default(),
             frame_id: FrameId::INVALID,
         }
     }
 
-    /// Return the total number of tiles allocated by this tile cache
-    pub fn tile_count(&self) -> usize {
-        self.tile_rect.size.area() as usize * self.sub_slices.len()
-    }
-
-    /// Reset this tile cache with the updated parameters from a new scene
-    /// that has arrived. This allows the tile cache to be retained across
-    /// new scenes.
-    pub fn prepare_for_new_scene(
-        &mut self,
-        params: TileCacheParams,
-        resource_cache: &mut ResourceCache,
-    ) {
-        // We should only receive updated state for matching slice key
-        assert_eq!(self.slice, params.slice);
-
-        // Determine how many sub-slices we need, based on how many compositor surface prims are
-        // in the supplied primitive list.
-        let required_sub_slice_count = params.compositor_surface_count.min(MAX_COMPOSITOR_SURFACES) + 1;
-
-        if self.sub_slices.len() != required_sub_slice_count {
-            self.tile_rect = TileRect::zero();
-
-            if self.sub_slices.len() > required_sub_slice_count {
-                let old_sub_slices = self.sub_slices.split_off(required_sub_slice_count);
-
-                for mut sub_slice in old_sub_slices {
-                    for tile in sub_slice.tiles.values_mut() {
-                        if let Some(TileSurface::Texture { descriptor: SurfaceTextureDescriptor::Native { ref mut id, .. }, .. }) = tile.surface {
-                            if let Some(id) = id.take() {
-                                resource_cache.destroy_compositor_tile(id);
-                            }
-                        }
-                    }
-
-                    if let Some(native_surface) = sub_slice.native_surface {
-                        resource_cache.destroy_compositor_surface(native_surface.opaque);
-                        resource_cache.destroy_compositor_surface(native_surface.alpha);
-                    }
-                }
-            } else {
-                while self.sub_slices.len() < required_sub_slice_count {
-                    self.sub_slices.push(SubSlice::new());
-                }
-            }
-        }
-
-        // Store the parameters from the scene builder for this slice. Other
-        // params in the tile cache are retained and reused, or are always
-        // updated during pre/post_update.
-        self.slice_flags = params.slice_flags;
-        self.spatial_node_index = params.spatial_node_index;
-        self.background_color = params.background_color;
-        self.shared_clips = params.shared_clips;
-        self.shared_clip_chain = params.shared_clip_chain;
-
-        // Since the slice flags may have changed, ensure we re-evaluate the
-        // appropriate tile size for this cache next update.
-        self.frames_until_size_eval = 0;
-    }
-
-    /// Destroy any manually managed resources before this picture cache is
-    /// destroyed, such as native compositor surfaces.
-    pub fn destroy(
-        self,
-        resource_cache: &mut ResourceCache,
-    ) {
-        for sub_slice in self.sub_slices {
-            if let Some(native_surface) = sub_slice.native_surface {
-                resource_cache.destroy_compositor_surface(native_surface.opaque);
-                resource_cache.destroy_compositor_surface(native_surface.alpha);
-            }
-        }
-
-        for (_, external_surface) in self.external_native_surface_cache {
-            resource_cache.destroy_compositor_surface(external_surface.native_surface_id)
+    /// Returns true if this tile cache is considered opaque.
+    pub fn is_opaque(&self) -> bool {
+        // If known opaque due to background clear color and being the first slice.
+        // The background_color will only be Some(..) if this is the first slice.
+        match self.background_color {
+            Some(color) => color.a >= 1.0,
+            None => false
         }
     }
 
@@ -2493,18 +2479,29 @@ impl TileCacheInstance {
         frame_context: &FrameVisibilityContext,
         frame_state: &mut FrameVisibilityState,
     ) -> WorldRect {
+        self.external_surfaces.clear();
         self.surface_index = surface_index;
         self.local_rect = pic_rect;
         self.local_clip_rect = PictureRect::max_rect();
 
-        for sub_slice in &mut self.sub_slices {
-            sub_slice.reset();
-        }
+        // Opaque surfaces get the first z_id. Compositor surfaces then get
+        // allocated a z_id each. After all compositor surfaces are added,
+        // then we allocate a z_id for alpha tiles.
+        self.z_id_opaque = frame_state.composite_state.z_generator.next();
 
         // Reset the opaque rect + subpixel mode, as they are calculated
         // during the prim dependency checks.
         self.backdrop = BackdropInfo::empty();
 
+        self.map_local_to_surface = SpaceMapper::new(
+            self.spatial_node_index,
+            pic_rect,
+        );
+        self.map_child_pic_to_surface = SpaceMapper::new(
+            self.spatial_node_index,
+            pic_rect,
+        );
+
         let pic_to_world_mapper = SpaceMapper::new_with_target(
             ROOT_SPATIAL_NODE_INDEX,
             self.spatial_node_index,
@@ -2516,14 +2513,7 @@ impl TileCacheInstance {
         // which will provide a local clip rect. This is useful for establishing things
         // like whether the backdrop rect supplied by Gecko can be considered opaque.
         if self.shared_clip_chain != ClipChainId::NONE {
-            let shared_clips = &mut frame_state.scratch.picture.clip_chain_ids;
-            shared_clips.clear();
-
-            let map_local_to_surface = SpaceMapper::new(
-                self.spatial_node_index,
-                pic_rect,
-            );
-
+            let mut shared_clips = Vec::new();
             let mut current_clip_chain_id = self.shared_clip_chain;
             while current_clip_chain_id != ClipChainId::NONE {
                 shared_clips.push(current_clip_chain_id);
@@ -2534,7 +2524,6 @@ impl TileCacheInstance {
             frame_state.clip_store.set_active_clips(
                 LayoutRect::max_rect(),
                 self.spatial_node_index,
-                map_local_to_surface.ref_spatial_node_index,
                 &shared_clips,
                 frame_context.spatial_tree,
                 &mut frame_state.data_stores.clip,
@@ -2542,7 +2531,7 @@ impl TileCacheInstance {
 
             let clip_chain_instance = frame_state.clip_store.build_clip_chain_instance(
                 pic_rect.cast_unit(),
-                &map_local_to_surface,
+                &self.map_local_to_surface,
                 &pic_to_world_mapper,
                 frame_context.spatial_tree,
                 frame_state.gpu_cache,
@@ -2562,6 +2551,50 @@ impl TileCacheInstance {
             });
         }
 
+        // If there are pending retained state, retrieve it.
+        if let Some(prev_state) = frame_state.retained_tiles.caches.remove(&self.slice) {
+            self.tiles.extend(prev_state.tiles);
+            self.root_transform = prev_state.root_transform;
+            self.spatial_node_comparer = prev_state.spatial_node_comparer;
+            self.opacity_bindings = prev_state.opacity_bindings;
+            self.color_bindings = prev_state.color_bindings;
+            self.current_tile_size = prev_state.current_tile_size;
+            self.native_surface = prev_state.native_surface;
+            self.external_native_surface_cache = prev_state.external_native_surface_cache;
+            self.virtual_offset = prev_state.virtual_offset;
+            self.frame_id = prev_state.frame_id;
+
+            fn recycle_map<K: std::cmp::Eq + std::hash::Hash, V>(
+                ideal_len: usize,
+                dest: &mut FastHashMap<K, V>,
+                src: FastHashMap<K, V>,
+            ) {
+                if dest.capacity() < src.capacity() {
+                    if src.capacity() < 3 * ideal_len {
+                        *dest = src;
+                    } else {
+                        dest.clear();
+                        dest.reserve(ideal_len);
+                    }
+                }
+            }
+            recycle_map(
+                self.opacity_bindings.len(),
+                &mut self.old_opacity_bindings,
+                prev_state.allocations.old_opacity_bindings,
+            );
+            recycle_map(
+                self.color_bindings.len(),
+                &mut self.old_color_bindings,
+                prev_state.allocations.old_color_bindings,
+            );
+            recycle_map(
+                prev_state.allocations.compare_cache.len(),
+                &mut self.compare_cache,
+                prev_state.allocations.compare_cache,
+            );
+        }
+
         // Advance the current frame ID counter for this picture cache (must be done
         // after any retained prev state is taken above).
         self.frame_id.advance();
@@ -2598,7 +2631,7 @@ impl TileCacheInstance {
                             TILE_SIZE_SCROLLBAR_HORIZONTAL
                         }
                     } else {
-                        frame_state.resource_cache.texture_cache.default_picture_tile_size()
+                        TILE_SIZE_DEFAULT
                     }
                 }
             };
@@ -2606,16 +2639,13 @@ impl TileCacheInstance {
             // If the desired tile size has changed, then invalidate and drop any
             // existing tiles.
             if desired_tile_size != self.current_tile_size {
-                for sub_slice in &mut self.sub_slices {
-                    // Destroy any native surfaces on the tiles that will be dropped due
-                    // to resizing.
-                    if let Some(native_surface) = sub_slice.native_surface.take() {
-                        frame_state.resource_cache.destroy_compositor_surface(native_surface.opaque);
-                        frame_state.resource_cache.destroy_compositor_surface(native_surface.alpha);
-                    }
-                    sub_slice.tiles.clear();
+                // Destroy any native surfaces on the tiles that will be dropped due
+                // to resizing.
+                if let Some(native_surface) = self.native_surface.take() {
+                    frame_state.resource_cache.destroy_compositor_surface(native_surface.opaque);
+                    frame_state.resource_cache.destroy_compositor_surface(native_surface.alpha);
                 }
-                self.tile_rect = TileRect::zero();
+                self.tiles.clear();
                 self.current_tile_size = desired_tile_size;
             }
 
@@ -2640,7 +2670,6 @@ impl TileCacheInstance {
         let device_origin = world_origin * frame_context.global_device_pixel_scale;
         let desired_device_origin = device_origin.round();
         self.device_position = desired_device_origin;
-        self.device_fract_offset = desired_device_origin - device_origin;
 
         // Unmap from device space to world space rect
         let ref_world_rect = WorldRect::new(
@@ -2648,13 +2677,17 @@ impl TileCacheInstance {
             WorldSize::new(1.0, 1.0),
         );
 
-        // Unmap from world space to picture space; this should be the fractional offset
-        // required in picture space to align in device space
-        self.fract_offset = pic_to_world_mapper
+        // Unmap from world space to picture space
+        let ref_point = pic_to_world_mapper
             .unmap(&ref_world_rect)
             .expect("bug: unable to unmap ref world rect")
-            .origin
-            .to_vector();
+            .origin;
+
+        // Extract the fractional offset required in picture space to align in device space
+        self.fract_offset = PictureVector2D::new(
+            ref_point.x.fract(),
+            ref_point.y.fract(),
+        );
 
         // Do a hacky diff of opacity binding values from the last frame. This is
         // used later on during tile invalidation tests.
@@ -2739,34 +2772,33 @@ impl TileCacheInstance {
         // virtual offset. If so, we need to invalidate all tiles, and set up
         // a new virtual offset, centered around the current tile grid.
 
-        let virtual_surface_size = frame_context.config.compositor_kind.get_virtual_surface_size();
-        // We only need to invalidate in this case if the underlying platform
-        // uses virtual surfaces.
-        if virtual_surface_size > 0 {
-            // Get the extremities of the tile grid after virtual offset is applied
-            let tx0 = self.virtual_offset.x + x0 * self.current_tile_size.width;
-            let ty0 = self.virtual_offset.y + y0 * self.current_tile_size.height;
-            let tx1 = self.virtual_offset.x + (x1+1) * self.current_tile_size.width;
-            let ty1 = self.virtual_offset.y + (y1+1) * self.current_tile_size.height;
-
-            let need_new_virtual_offset = tx0 < 0 ||
-                                          ty0 < 0 ||
-                                          tx1 >= virtual_surface_size ||
-                                          ty1 >= virtual_surface_size;
-
-            if need_new_virtual_offset {
-                // Calculate a new virtual offset, centered around the middle of the
-                // current tile grid. This means we won't need to invalidate and get
-                // a new offset for a long time!
-                self.virtual_offset = DeviceIntPoint::new(
-                    (virtual_surface_size/2) - ((x0 + x1) / 2) * self.current_tile_size.width,
-                    (virtual_surface_size/2) - ((y0 + y1) / 2) * self.current_tile_size.height,
-                );
+        if let CompositorKind::Native { virtual_surface_size, .. } = frame_context.config.compositor_kind {
+            // We only need to invalidate in this case if the underlying platform
+            // uses virtual surfaces.
+            if virtual_surface_size > 0 {
+                // Get the extremities of the tile grid after virtual offset is applied
+                let tx0 = self.virtual_offset.x + x0 * self.current_tile_size.width;
+                let ty0 = self.virtual_offset.y + y0 * self.current_tile_size.height;
+                let tx1 = self.virtual_offset.x + (x1+1) * self.current_tile_size.width;
+                let ty1 = self.virtual_offset.y + (y1+1) * self.current_tile_size.height;
+
+                let need_new_virtual_offset = tx0 < 0 ||
+                                              ty0 < 0 ||
+                                              tx1 >= virtual_surface_size ||
+                                              ty1 >= virtual_surface_size;
+
+                if need_new_virtual_offset {
+                    // Calculate a new virtual offset, centered around the middle of the
+                    // current tile grid. This means we won't need to invalidate and get
+                    // a new offset for a long time!
+                    self.virtual_offset = DeviceIntPoint::new(
+                        (virtual_surface_size/2) - ((x0 + x1) / 2) * self.current_tile_size.width,
+                        (virtual_surface_size/2) - ((y0 + y1) / 2) * self.current_tile_size.height,
+                    );
 
-                // Invalidate all native tile surfaces. They will be re-allocated next time
-                // they are scheduled to be rasterized.
-                for sub_slice in &mut self.sub_slices {
-                    for tile in sub_slice.tiles.values_mut() {
+                    // Invalidate all native tile surfaces. They will be re-allocated next time
+                    // they are scheduled to be rasterized.
+                    for tile in self.tiles.values_mut() {
                         if let Some(TileSurface::Texture { descriptor: SurfaceTextureDescriptor::Native { ref mut id, .. }, .. }) = tile.surface {
                             if let Some(id) = id.take() {
                                 frame_state.resource_cache.destroy_compositor_tile(id);
@@ -2780,7 +2812,7 @@ impl TileCacheInstance {
 
                     // Destroy the native virtual surfaces. They will be re-allocated next time a tile
                     // that references them is scheduled to draw.
-                    if let Some(native_surface) = sub_slice.native_surface.take() {
+                    if let Some(native_surface) = self.native_surface.take() {
                         frame_state.resource_cache.destroy_compositor_surface(native_surface.opaque);
                         frame_state.resource_cache.destroy_compositor_surface(native_surface.alpha);
                     }
@@ -2790,23 +2822,34 @@ impl TileCacheInstance {
 
         // Rebuild the tile grid if the picture cache rect has changed.
         if new_tile_rect != self.tile_rect {
-            for sub_slice in &mut self.sub_slices {
-                let mut old_tiles = sub_slice.resize(new_tile_rect);
+            let mut old_tiles = mem::replace(&mut self.tiles, FastHashMap::default());
+            self.tiles.reserve(new_tile_rect.size.area() as usize);
 
-                // When old tiles that remain after the loop, dirty rects are not valid.
-                if !old_tiles.is_empty() {
-                    frame_state.composite_state.dirty_rects_are_valid = false;
+            for y in y0 .. y1 {
+                for x in x0 .. x1 {
+                    let key = TileOffset::new(x, y);
+                    let tile = old_tiles
+                        .remove(&key)
+                        .unwrap_or_else(|| {
+                            Box::new(Tile::new(key))
+                        });
+                    self.tiles.insert(key, tile);
                 }
+            }
 
-                // Any old tiles that remain after the loop above are going to be dropped. For
-                // simple composite mode, the texture cache handle will expire and be collected
-                // by the texture cache. For native compositor mode, we need to explicitly
-                // invoke a callback to the client to destroy that surface.
-                frame_state.composite_state.destroy_native_tiles(
-                    old_tiles.values_mut(),
-                    frame_state.resource_cache,
-                );
+            // When old tiles that remain after the loop, dirty rects are not valid.
+            if !old_tiles.is_empty() {
+                frame_state.composite_state.dirty_rects_are_valid = false;
             }
+
+            // Any old tiles that remain after the loop above are going to be dropped. For
+            // simple composite mode, the texture cache handle will expire and be collected
+            // by the texture cache. For native compositor mode, we need to explicitly
+            // invoke a callback to the client to destroy that surface.
+            frame_state.composite_state.destroy_native_tiles(
+                old_tiles.values_mut(),
+                frame_state.resource_cache,
+            );
         }
 
         // This is duplicated information from tile_rect, but cached here to avoid
@@ -2817,10 +2860,9 @@ impl TileCacheInstance {
 
         let mut world_culling_rect = WorldRect::zero();
 
-        let mut ctx = TilePreUpdateContext {
+        let ctx = TilePreUpdateContext {
             pic_to_world_mapper,
             fract_offset: self.fract_offset,
-            device_fract_offset: self.device_fract_offset,
             background_color: self.background_color,
             global_screen_world_rect: frame_context.global_screen_world_rect,
             tile_size: self.tile_size,
@@ -2828,50 +2870,43 @@ impl TileCacheInstance {
         };
 
         // Pre-update each tile
-        for sub_slice in &mut self.sub_slices {
-            for tile in sub_slice.tiles.values_mut() {
-                tile.pre_update(&ctx);
-
-                // Only include the tiles that are currently in view into the world culling
-                // rect. This is a very important optimization for a couple of reasons:
-                // (1) Primitives that intersect with tiles in the grid that are not currently
-                //     visible can be skipped from primitive preparation, clip chain building
-                //     and tile dependency updates.
-                // (2) When we need to allocate an off-screen surface for a child picture (for
-                //     example a CSS filter) we clip the size of the GPU surface to the world
-                //     culling rect below (to ensure we draw enough of it to be sampled by any
-                //     tiles that reference it). Making the world culling rect only affected
-                //     by visible tiles (rather than the entire virtual tile display port) can
-                //     result in allocating _much_ smaller GPU surfaces for cases where the
-                //     true off-screen surface size is very large.
-                if tile.is_visible {
-                    world_culling_rect = world_culling_rect.union(&tile.world_tile_rect);
-                }
+        for tile in self.tiles.values_mut() {
+            tile.pre_update(&ctx);
+
+            // Only include the tiles that are currently in view into the world culling
+            // rect. This is a very important optimization for a couple of reasons:
+            // (1) Primitives that intersect with tiles in the grid that are not currently
+            //     visible can be skipped from primitive preparation, clip chain building
+            //     and tile dependency updates.
+            // (2) When we need to allocate an off-screen surface for a child picture (for
+            //     example a CSS filter) we clip the size of the GPU surface to the world
+            //     culling rect below (to ensure we draw enough of it to be sampled by any
+            //     tiles that reference it). Making the world culling rect only affected
+            //     by visible tiles (rather than the entire virtual tile display port) can
+            //     result in allocating _much_ smaller GPU surfaces for cases where the
+            //     true off-screen surface size is very large.
+            if tile.is_visible {
+                world_culling_rect = world_culling_rect.union(&tile.world_tile_rect);
             }
-
-            // The background color can only be applied to the first sub-slice.
-            ctx.background_color = None;
         }
 
         // If compositor mode is changed, need to drop all incompatible tiles.
         match frame_context.config.compositor_kind {
             CompositorKind::Draw { .. } => {
-                for sub_slice in &mut self.sub_slices {
-                    for tile in sub_slice.tiles.values_mut() {
-                        if let Some(TileSurface::Texture { descriptor: SurfaceTextureDescriptor::Native { ref mut id, .. }, .. }) = tile.surface {
-                            if let Some(id) = id.take() {
-                                frame_state.resource_cache.destroy_compositor_tile(id);
-                            }
-                            tile.surface = None;
-                            // Invalidate the entire tile to force a redraw.
-                            tile.invalidate(None, InvalidationReason::CompositorKindChanged);
+                for tile in self.tiles.values_mut() {
+                    if let Some(TileSurface::Texture { descriptor: SurfaceTextureDescriptor::Native { ref mut id, .. }, .. }) = tile.surface {
+                        if let Some(id) = id.take() {
+                            frame_state.resource_cache.destroy_compositor_tile(id);
                         }
+                        tile.surface = None;
+                        // Invalidate the entire tile to force a redraw.
+                        tile.invalidate(None, InvalidationReason::CompositorKindChanged);
                     }
+                }
 
-                    if let Some(native_surface) = sub_slice.native_surface.take() {
-                        frame_state.resource_cache.destroy_compositor_surface(native_surface.opaque);
-                        frame_state.resource_cache.destroy_compositor_surface(native_surface.alpha);
-                    }
+                if let Some(native_surface) = self.native_surface.take() {
+                    frame_state.resource_cache.destroy_compositor_surface(native_surface.opaque);
+                    frame_state.resource_cache.destroy_compositor_surface(native_surface.alpha);
                 }
 
                 for (_, external_surface) in self.external_native_surface_cache.drain() {
@@ -2881,13 +2916,11 @@ impl TileCacheInstance {
             CompositorKind::Native { .. } => {
                 // This could hit even when compositor mode is not changed,
                 // then we need to check if there are incompatible tiles.
-                for sub_slice in &mut self.sub_slices {
-                    for tile in sub_slice.tiles.values_mut() {
-                        if let Some(TileSurface::Texture { descriptor: SurfaceTextureDescriptor::TextureCache { .. }, .. }) = tile.surface {
-                            tile.surface = None;
-                            // Invalidate the entire tile to force a redraw.
-                            tile.invalidate(None, InvalidationReason::CompositorKindChanged);
-                        }
+                for tile in self.tiles.values_mut() {
+                    if let Some(TileSurface::Texture { descriptor: SurfaceTextureDescriptor::TextureCache { .. }, .. }) = tile.surface {
+                        tile.surface = None;
+                        // Invalidate the entire tile to force a redraw.
+                        tile.invalidate(None, InvalidationReason::CompositorKindChanged);
                     }
                 }
             }
@@ -2901,8 +2934,7 @@ impl TileCacheInstance {
         flags: PrimitiveFlags,
         prim_clip_chain: &ClipChainInstance,
         prim_spatial_node_index: SpatialNodeIndex,
-        is_root_tile_cache: bool,
-        sub_slice_index: usize,
+        on_picture_surface: bool,
         frame_context: &FrameVisibilityContext,
     ) -> SurfacePromotionResult {
         // Check if this primitive _wants_ to be promoted to a compositor surface.
@@ -2911,7 +2943,7 @@ impl TileCacheInstance {
         }
 
         // For now, only support a small (arbitrary) number of compositor surfaces.
-        if sub_slice_index == MAX_COMPOSITOR_SURFACES {
+        if self.external_surfaces.len() == MAX_COMPOSITOR_SURFACES {
             return SurfacePromotionResult::Failed;
         }
 
@@ -2923,9 +2955,9 @@ impl TileCacheInstance {
             return SurfacePromotionResult::Failed;
         }
 
-        // If not on the root picture cache, it has some kind of
+        // If not on the same surface as the picture cache, it has some kind of
         // complex effect (such as a filter, mix-blend-mode or 3d transform).
-        if !is_root_tile_cache {
+        if !on_picture_surface {
             return SurfacePromotionResult::Failed;
         }
 
@@ -2942,10 +2974,6 @@ impl TileCacheInstance {
             return SurfacePromotionResult::Failed;
         }
 
-        if self.slice_flags.contains(SliceFlags::IS_BLEND_CONTAINER) {
-            return SurfacePromotionResult::Failed;
-        }
-
         SurfacePromotionResult::Success {
             flip_y: transform.m22 < 0.0,
         }
@@ -2953,43 +2981,21 @@ impl TileCacheInstance {
 
     fn setup_compositor_surfaces_yuv(
         &mut self,
-        sub_slice_index: usize,
         prim_info: &mut PrimitiveDependencyInfo,
-        flags: PrimitiveFlags,
-        local_prim_rect: LayoutRect,
-        prim_spatial_node_index: SpatialNodeIndex,
-        pic_clip_rect: PictureRect,
+        prim_rect: PictureRect,
         frame_context: &FrameVisibilityContext,
         image_dependencies: &[ImageDependency;3],
         api_keys: &[ImageKey; 3],
         resource_cache: &mut ResourceCache,
         composite_state: &mut CompositeState,
-        gpu_cache: &mut GpuCache,
         image_rendering: ImageRendering,
         color_depth: ColorDepth,
         color_space: YuvColorSpace,
         format: YuvFormat,
     ) -> bool {
-        for &key in api_keys {
-            if key != ImageKey::DUMMY {
-                // TODO: See comment in setup_compositor_surfaces_rgb.
-                resource_cache.request_image(ImageRequest {
-                        key,
-                        rendering: image_rendering,
-                        tile: None,
-                    },
-                    gpu_cache,
-                );
-            }
-        }
-
         self.setup_compositor_surfaces_impl(
-            sub_slice_index,
             prim_info,
-            flags,
-            local_prim_rect,
-            prim_spatial_node_index,
-            pic_clip_rect,
+            prim_rect,
             frame_context,
             ExternalSurfaceDependency::Yuv {
                 image_dependencies: *image_dependencies,
@@ -3001,54 +3007,26 @@ impl TileCacheInstance {
             resource_cache,
             composite_state,
             image_rendering,
-            true,
         )
     }
 
     fn setup_compositor_surfaces_rgb(
         &mut self,
-        sub_slice_index: usize,
         prim_info: &mut PrimitiveDependencyInfo,
-        flags: PrimitiveFlags,
-        local_prim_rect: LayoutRect,
-        prim_spatial_node_index: SpatialNodeIndex,
-        pic_clip_rect: PictureRect,
+        prim_rect: PictureRect,
         frame_context: &FrameVisibilityContext,
         image_dependency: ImageDependency,
         api_key: ImageKey,
         resource_cache: &mut ResourceCache,
         composite_state: &mut CompositeState,
-        gpu_cache: &mut GpuCache,
         image_rendering: ImageRendering,
         flip_y: bool,
     ) -> bool {
         let mut api_keys = [ImageKey::DUMMY; 3];
         api_keys[0] = api_key;
-
-        // TODO: The picture compositing code requires images promoted
-        // into their own picture cache slices to be requested every
-        // frame even if they are not visible. However the image updates
-        // are only reached on the prepare pass for visible primitives.
-        // So we make sure to trigger an image request when promoting
-        // the image here.
-        resource_cache.request_image(ImageRequest {
-                key: api_key,
-                rendering: image_rendering,
-                tile: None,
-            },
-            gpu_cache,
-        );
-
-        let is_opaque = resource_cache.get_image_properties(api_key)
-            .map_or(false, |properties| properties.descriptor.is_opaque());
-
         self.setup_compositor_surfaces_impl(
-            sub_slice_index,
             prim_info,
-            flags,
-            local_prim_rect,
-            prim_spatial_node_index,
-            pic_clip_rect,
+            prim_rect,
             frame_context,
             ExternalSurfaceDependency::Rgb {
                 image_dependency,
@@ -3058,7 +3036,6 @@ impl TileCacheInstance {
             resource_cache,
             composite_state,
             image_rendering,
-            is_opaque,
         )
     }
 
@@ -3066,37 +3043,16 @@ impl TileCacheInstance {
     // and the non-compositor path should be used to draw it instead.
     fn setup_compositor_surfaces_impl(
         &mut self,
-        sub_slice_index: usize,
         prim_info: &mut PrimitiveDependencyInfo,
-        flags: PrimitiveFlags,
-        local_prim_rect: LayoutRect,
-        prim_spatial_node_index: SpatialNodeIndex,
-        pic_clip_rect: PictureRect,
+        prim_rect: PictureRect,
         frame_context: &FrameVisibilityContext,
         dependency: ExternalSurfaceDependency,
         api_keys: &[ImageKey; 3],
         resource_cache: &mut ResourceCache,
         composite_state: &mut CompositeState,
         image_rendering: ImageRendering,
-        is_opaque: bool,
     ) -> bool {
-        let map_local_to_surface = SpaceMapper::new_with_target(
-            self.spatial_node_index,
-            prim_spatial_node_index,
-            self.local_rect,
-            frame_context.spatial_tree,
-        );
-
-        // Map the primitive local rect into picture space.
-        let prim_rect = match map_local_to_surface.map(&local_prim_rect) {
-            Some(rect) => rect,
-            None => return true,
-        };
-
-        // If the rect is invalid, no need to create dependencies.
-        if prim_rect.size.is_empty() {
-            return true;
-        }
+        prim_info.is_compositor_surface = true;
 
         let pic_to_world_mapper = SpaceMapper::new_with_target(
             ROOT_SPATIAL_NODE_INDEX,
@@ -3105,6 +3061,9 @@ impl TileCacheInstance {
             frame_context.spatial_tree,
         );
 
+        let world_rect = pic_to_world_mapper
+            .map(&prim_rect)
+            .expect("bug: unable to map the primitive to world space");
         let world_clip_rect = pic_to_world_mapper
             .map(&prim_info.prim_clip_box.to_rect())
             .expect("bug: unable to map clip to world space");
@@ -3114,54 +3073,17 @@ impl TileCacheInstance {
             return true;
         }
 
-        let world_rect = pic_to_world_mapper
-            .map(&prim_rect)
-            .expect("bug: unable to map the primitive to world space");
-        let device_rect = (world_rect * frame_context.global_device_pixel_scale).round();
-
         // TODO(gw): Is there any case where if the primitive ends up on a fractional
         //           boundary we want to _skip_ promoting to a compositor surface and
         //           draw it as part of the content?
-        let (surface_rect, transform) = match composite_state.compositor_kind {
-            CompositorKind::Draw { .. } => {
-                (device_rect, Transform3D::identity())
-            }
-            CompositorKind::Native { .. } => {
-                // If we have a Native Compositor, then we can support doing the transformation
-                // as part of compositing. Use the local prim rect for the external surface, and
-                // compute the full local to device transform to provide to the compositor.
-                let surface_to_world_mapper : SpaceMapper<PicturePixel, WorldPixel> = SpaceMapper::new_with_target(
-                    ROOT_SPATIAL_NODE_INDEX,
-                    prim_spatial_node_index,
-                    frame_context.global_screen_world_rect,
-                    frame_context.spatial_tree,
-                );
-                let prim_origin = Vector3D::new(local_prim_rect.origin.x, local_prim_rect.origin.y, 0.0);
-                let world_to_device_scale = Transform3D::from_scale(frame_context.global_device_pixel_scale);
-                let transform = surface_to_world_mapper.get_transform().pre_translate(prim_origin).then(&world_to_device_scale);
-
-                (local_prim_rect.cast_unit(), transform)
-            }
-        };
-
+        let device_rect = (world_rect * frame_context.global_device_pixel_scale).round();
         let clip_rect = (world_clip_rect * frame_context.global_device_pixel_scale).round();
 
-        if surface_rect.size.width >= MAX_COMPOSITOR_SURFACES_SIZE ||
-           surface_rect.size.height >= MAX_COMPOSITOR_SURFACES_SIZE {
+        if device_rect.size.width >= MAX_COMPOSITOR_SURFACES_SIZE ||
+           device_rect.size.height >= MAX_COMPOSITOR_SURFACES_SIZE {
                return false;
         }
 
-        // If this primitive is an external image, and supports being used
-        // directly by a native compositor, then lookup the external image id
-        // so we can pass that through.
-        let external_image_id = if flags.contains(PrimitiveFlags::SUPPORTS_EXTERNAL_COMPOSITOR_SURFACE) {
-            resource_cache.get_image_properties(api_keys[0])
-                .and_then(|properties| properties.external_image)
-                .and_then(|image| Some(image.id))
-        } else {
-            None
-        };
-
         // When using native compositing, we need to find an existing native surface
         // handle to use, or allocate a new one. For existing native surfaces, we can
         // also determine whether this needs to be updated, depending on whether the
@@ -3171,45 +3093,33 @@ impl TileCacheInstance {
                 (None, None)
             }
             CompositorKind::Native { .. } => {
-                let native_surface_size = surface_rect.size.round().to_i32();
+                let native_surface_size = device_rect.size.round().to_i32();
 
                 let key = ExternalNativeSurfaceKey {
                     image_keys: *api_keys,
                     size: native_surface_size,
-                    is_external_surface: external_image_id.is_some(),
                 };
 
                 let native_surface = self.external_native_surface_cache
                     .entry(key)
                     .or_insert_with(|| {
-                        // No existing surface, so allocate a new compositor surface.
-                        let native_surface_id = match external_image_id {
-                            Some(_external_image) => {
-                                // If we have a suitable external image, then create an external
-                                // surface to attach to.
-                                resource_cache.create_compositor_external_surface(is_opaque)
-                            }
-                            None => {
-                                // Otherwise create a normal compositor surface and a single
-                                // compositor tile that covers the entire surface.
-                                let native_surface_id =
-                                resource_cache.create_compositor_surface(
-                                    DeviceIntPoint::zero(),
-                                    native_surface_size,
-                                    is_opaque,
-                                );
+                        // No existing surface, so allocate a new compositor surface and
+                        // a single compositor tile that covers the entire compositor surface.
 
-                                let tile_id = NativeTileId {
-                                    surface_id: native_surface_id,
-                                    x: 0,
-                                    y: 0,
-                                };
-                                resource_cache.create_compositor_tile(tile_id);
+                        let native_surface_id = resource_cache.create_compositor_surface(
+                            DeviceIntPoint::zero(),
+                            native_surface_size,
+                            true,
+                        );
 
-                                native_surface_id
-                            }
+                        let tile_id = NativeTileId {
+                            surface_id: native_surface_id,
+                            x: 0,
+                            y: 0,
                         };
 
+                        resource_cache.create_compositor_tile(tile_id);
+
                         ExternalNativeSurface {
                             used_this_frame: true,
                             native_surface_id,
@@ -3221,65 +3131,41 @@ impl TileCacheInstance {
                 // backing native surface handle isn't freed.
                 native_surface.used_this_frame = true;
 
-                let update_params = match external_image_id {
-                    Some(external_image) => {
-                        // If this is an external image surface, then there's no update
-                        // to be done. Just attach the current external image to the surface
-                        // and we're done.
-                        resource_cache.attach_compositor_external_image(
-                            native_surface.native_surface_id,
-                            external_image,
-                        );
-                        None
-                    }
-                    None => {
-                        // If the image dependencies match, there is no need to update
-                        // the backing native surface.
-                        match dependency {
-                            ExternalSurfaceDependency::Yuv{ image_dependencies, .. } => {
-                                if image_dependencies == native_surface.image_dependencies {
-                                    None
-                                } else {
-                                    Some(native_surface_size)
-                                }
-                            },
-                            ExternalSurfaceDependency::Rgb{ image_dependency, .. } => {
-                                if image_dependency == native_surface.image_dependencies[0] {
-                                    None
-                                } else {
-                                    Some(native_surface_size)
-                                }
-                            },
-                        }
-                    }
+                // If the image dependencies match, there is no need to update
+                // the backing native surface.
+                let update_params = match dependency {
+                    ExternalSurfaceDependency::Yuv{ image_dependencies, .. } => {
+                       if image_dependencies == native_surface.image_dependencies {
+                           None
+                       } else {
+                           Some(native_surface_size)
+                       }
+                    },
+                    ExternalSurfaceDependency::Rgb{ image_dependency, .. } => {
+                       if image_dependency == native_surface.image_dependencies[0] {
+                           None
+                       } else {
+                           Some(native_surface_size)
+                       }
+                    },
                 };
 
                 (Some(native_surface.native_surface_id), update_params)
             }
         };
 
-        // For compositor surfaces, if we didn't find an earlier sub-slice to add to,
-        // we know we can append to the current slice.
-        assert!(sub_slice_index < self.sub_slices.len() - 1);
-        let sub_slice = &mut self.sub_slices[sub_slice_index];
-
         // Each compositor surface allocates a unique z-id
-        sub_slice.compositor_surfaces.push(CompositorSurface {
-            prohibited_rect: pic_clip_rect,
-            is_opaque,
-            descriptor: ExternalSurfaceDescriptor {
-                local_rect: prim_info.prim_clip_box.to_rect(),
-                local_clip_rect: prim_info.prim_clip_box.to_rect(),
-                dependency,
-                image_rendering,
-                device_rect,
-                surface_rect,
-                clip_rect,
-                transform: transform.cast_unit(),
-                z_id: ZBufferId::invalid(),
-                native_surface_id,
-                update_params,
-            },
+        self.external_surfaces.push(ExternalSurfaceDescriptor {
+            local_rect: prim_info.prim_clip_box.to_rect(),
+            world_rect,
+            local_clip_rect: prim_info.prim_clip_box.to_rect(),
+            dependency,
+            image_rendering,
+            device_rect,
+            clip_rect,
+            z_id: composite_state.z_generator.next(),
+            native_surface_id,
+            update_params,
         });
 
         true
@@ -3290,22 +3176,45 @@ impl TileCacheInstance {
         &mut self,
         prim_instance: &mut PrimitiveInstance,
         prim_spatial_node_index: SpatialNodeIndex,
+        prim_clip_chain: Option<&ClipChainInstance>,
         local_prim_rect: LayoutRect,
         frame_context: &FrameVisibilityContext,
         data_stores: &DataStores,
         clip_store: &ClipStore,
         pictures: &[PicturePrimitive],
         resource_cache: &mut ResourceCache,
+        opacity_binding_store: &OpacityBindingStorage,
         color_bindings: &ColorBindingStorage,
+        image_instances: &ImageInstanceStorage,
         surface_stack: &[SurfaceIndex],
         composite_state: &mut CompositeState,
-        gpu_cache: &mut GpuCache,
-        is_root_tile_cache: bool,
-    ) {
+    ) -> Option<PrimitiveVisibilityFlags> {
         // This primitive exists on the last element on the current surface stack.
         profile_scope!("update_prim_dependencies");
         let prim_surface_index = *surface_stack.last().unwrap();
-        let prim_clip_chain = &prim_instance.vis.clip_chain;
+
+        // If the primitive is completely clipped out by the clip chain, there
+        // is no need to add it to any primitive dependencies.
+        let prim_clip_chain = match prim_clip_chain {
+            Some(prim_clip_chain) => prim_clip_chain,
+            None => return None,
+        };
+
+        self.map_local_to_surface.set_target_spatial_node(
+            prim_spatial_node_index,
+            frame_context.spatial_tree,
+        );
+
+        // Map the primitive local rect into picture space.
+        let prim_rect = match self.map_local_to_surface.map(&local_prim_rect) {
+            Some(rect) => rect,
+            None => return None,
+        };
+
+        // If the rect is invalid, no need to create dependencies.
+        if prim_rect.size.is_empty() {
+            return None;
+        }
 
         // If the primitive is directly drawn onto this picture cache surface, then
         // the pic_clip_rect is in the same space. If not, we need to map it from
@@ -3344,7 +3253,7 @@ impl TileCacheInstance {
                         rect.inflate(surface.inflation_factor, surface.inflation_factor)
                     }
                     None => {
-                        return;
+                        return None;
                     }
                 };
 
@@ -3360,7 +3269,7 @@ impl TileCacheInstance {
         // If the primitive is outside the tiling rects, it's known to not
         // be visible.
         if p0.x == p1.x || p0.y == p1.y {
-            return;
+            return None;
         }
 
         // Build the list of resources that this primitive has dependencies on.
@@ -3369,30 +3278,6 @@ impl TileCacheInstance {
             pic_clip_rect.to_box2d(),
         );
 
-        let mut sub_slice_index = self.sub_slices.len() - 1;
-
-        // Only need to evaluate sub-slice regions if we have compositor surfaces present
-        if sub_slice_index > 0 {
-            // Find the first sub-slice we can add this primitive to (we want to add
-            // prims to the primary surface if possible, so they get subpixel AA).
-            for (i, sub_slice) in self.sub_slices.iter_mut().enumerate() {
-                let mut intersects_prohibited_region = false;
-
-                for surface in &mut sub_slice.compositor_surfaces {
-                    if pic_clip_rect.intersects(&surface.prohibited_rect) {
-                        surface.prohibited_rect = surface.prohibited_rect.union(&pic_clip_rect);
-
-                        intersects_prohibited_region = true;
-                    }
-                }
-
-                if !intersects_prohibited_region {
-                    sub_slice_index = i;
-                    break;
-                }
-            }
-        }
-
         // Include the prim spatial node, if differs relative to cache root.
         if prim_spatial_node_index != self.spatial_node_index {
             prim_info.spatial_nodes.push(prim_spatial_node_index);
@@ -3416,6 +3301,7 @@ impl TileCacheInstance {
         // then applied below.
         let mut backdrop_candidate = None;
 
+
         // For pictures, we don't (yet) know the valid clip rect, so we can't correctly
         // use it to calculate the local bounding rect for the tiles. If we include them
         // then we may calculate a bounding rect that is too large, since it won't include
@@ -3435,78 +3321,84 @@ impl TileCacheInstance {
                     prim_info.opacity_bindings.push(binding.into());
                 }
             }
-            PrimitiveInstanceKind::Rectangle { data_handle, color_binding_index, .. } => {
-                // Rectangles can only form a backdrop candidate if they are known opaque.
-                // TODO(gw): We could resolve the opacity binding here, but the common
-                //           case for background rects is that they don't have animated opacity.
-                let color = match data_stores.prim[data_handle].kind {
-                    PrimitiveTemplateKind::Rectangle { color, .. } => {
-                        frame_context.scene_properties.resolve_color(&color)
+            PrimitiveInstanceKind::Rectangle { data_handle, opacity_binding_index, color_binding_index, .. } => {
+                if opacity_binding_index == OpacityBindingIndex::INVALID {
+                    // Rectangles can only form a backdrop candidate if they are known opaque.
+                    // TODO(gw): We could resolve the opacity binding here, but the common
+                    //           case for background rects is that they don't have animated opacity.
+                    let color = match data_stores.prim[data_handle].kind {
+                        PrimitiveTemplateKind::Rectangle { color, .. } => {
+                            frame_context.scene_properties.resolve_color(&color)
+                        }
+                        _ => unreachable!(),
+                    };
+                    if color.a >= 1.0 {
+                        backdrop_candidate = Some(BackdropInfo {
+                            opaque_rect: pic_clip_rect,
+                            kind: Some(BackdropKind::Color { color }),
+                        });
+                    }
+                } else {
+                    let opacity_binding = &opacity_binding_store[opacity_binding_index];
+                    for binding in &opacity_binding.bindings {
+                        prim_info.opacity_bindings.push(OpacityBinding::from(*binding));
                     }
-                    _ => unreachable!(),
-                };
-                if color.a >= 1.0 {
-                    backdrop_candidate = Some(BackdropInfo {
-                        opaque_rect: pic_clip_rect,
-                        kind: Some(BackdropKind::Color { color }),
-                    });
                 }
 
                 if color_binding_index != ColorBindingIndex::INVALID {
                     prim_info.color_binding = Some(color_bindings[color_binding_index].into());
                 }
             }
-            PrimitiveInstanceKind::Image { data_handle, ref mut is_compositor_surface, .. } => {
+            PrimitiveInstanceKind::Image { data_handle, image_instance_index, ref mut is_compositor_surface, .. } => {
                 let image_key = &data_stores.image[data_handle];
                 let image_data = &image_key.kind;
+                let image_instance = &image_instances[image_instance_index];
+                let opacity_binding_index = image_instance.opacity_binding_index;
 
                 let mut promote_to_surface = false;
                 let mut promote_with_flip_y = false;
-                match self.can_promote_to_surface(image_key.common.flags,
-                                                  prim_clip_chain,
-                                                  prim_spatial_node_index,
-                                                  is_root_tile_cache,
-                                                  sub_slice_index,
-                                                  frame_context) {
-                    SurfacePromotionResult::Failed => {
-                    }
-                    SurfacePromotionResult::Success{flip_y} => {
-                        promote_to_surface = true;
-                        promote_with_flip_y = flip_y;
+                // If picture caching is disabled, we can't support any compositor surfaces.
+                if composite_state.picture_caching_is_enabled {
+                    match self.can_promote_to_surface(image_key.common.flags,
+                                                      prim_clip_chain,
+                                                      prim_spatial_node_index,
+                                                      on_picture_surface,
+                                                      frame_context) {
+                        SurfacePromotionResult::Failed => {
+                        }
+                        SurfacePromotionResult::Success{flip_y} => {
+                            promote_to_surface = true;
+                            promote_with_flip_y = flip_y;
+                        }
                     }
                 }
 
-                // Native OS compositors (DC and CA, at least) support premultiplied alpha
-                // only. If we have an image that's not pre-multiplied alpha, we can't promote it.
-                if image_data.alpha_type == AlphaType::Alpha {
-                    promote_to_surface = false;
-                }
-
-                if let Some(image_properties) = resource_cache.get_image_properties(image_data.key) {
-                    // For an image to be a possible opaque backdrop, it must:
-                    // - Have a valid, opaque image descriptor
-                    // - Not use tiling (since they can fail to draw)
-                    // - Not having any spacing / padding
-                    // - Have opaque alpha in the instance (flattened) color
-                    if image_properties.descriptor.is_opaque() &&
-                       image_properties.tiling.is_none() &&
-                       image_data.tile_spacing == LayoutSize::zero() &&
-                       image_data.color.a >= 1.0 {
-                        backdrop_candidate = Some(BackdropInfo {
-                            opaque_rect: pic_clip_rect,
-                            kind: None,
-                        });
+                if opacity_binding_index == OpacityBindingIndex::INVALID {
+                    if let Some(image_properties) = resource_cache.get_image_properties(image_data.key) {
+                        // For an image to be a possible opaque backdrop, it must:
+                        // - Have a valid, opaque image descriptor
+                        // - Not use tiling (since they can fail to draw)
+                        // - Not having any spacing / padding
+                        if image_properties.descriptor.is_opaque() &&
+                           image_properties.tiling.is_none() &&
+                           image_data.tile_spacing == LayoutSize::zero() {
+                            backdrop_candidate = Some(BackdropInfo {
+                                opaque_rect: pic_clip_rect,
+                                kind: None,
+                            });
+                        }
+                    }
+                } else {
+                    let opacity_binding = &opacity_binding_store[opacity_binding_index];
+                    for binding in &opacity_binding.bindings {
+                        prim_info.opacity_bindings.push(OpacityBinding::from(*binding));
                     }
                 }
 
                 if promote_to_surface {
                     promote_to_surface = self.setup_compositor_surfaces_rgb(
-                        sub_slice_index,
                         &mut prim_info,
-                        image_key.common.flags,
-                        local_prim_rect,
-                        prim_spatial_node_index,
-                        pic_clip_rect,
+                        prim_rect,
                         frame_context,
                         ImageDependency {
                             key: image_data.key,
@@ -3515,39 +3407,42 @@ impl TileCacheInstance {
                         image_data.key,
                         resource_cache,
                         composite_state,
-                        gpu_cache,
                         image_data.image_rendering,
                         promote_with_flip_y,
                     );
                 }
 
-                *is_compositor_surface = promote_to_surface;
-
-                if promote_to_surface {
-                    prim_instance.vis.state = VisibilityState::Culled;
-                    return;
-                } else {
+                if !promote_to_surface {
                     prim_info.images.push(ImageDependency {
                         key: image_data.key,
                         generation: resource_cache.get_image_generation(image_data.key),
                     });
                 }
+
+                *is_compositor_surface = promote_to_surface;
             }
             PrimitiveInstanceKind::YuvImage { data_handle, ref mut is_compositor_surface, .. } => {
                 let prim_data = &data_stores.yuv_image[data_handle];
-                let mut promote_to_surface = match self.can_promote_to_surface(
-                                            prim_data.common.flags,
-                                            prim_clip_chain,
-                                            prim_spatial_node_index,
-                                            is_root_tile_cache,
-                                            sub_slice_index,
-                                            frame_context) {
-                    SurfacePromotionResult::Failed => false,
-                    SurfacePromotionResult::Success{flip_y} => !flip_y,
-                };
+                // TODO(gw): For now, we only support promoting YUV primitives to be compositor
+                //           surfaces. However, some videos are RGBA images. As a follow up,
+                //           extract the logic below and support RGBA compositor surfaces too.
+                let mut promote_to_surface = false;
+
+                // If picture caching is disabled, we can't support any compositor surfaces.
+                if composite_state.picture_caching_is_enabled {
+                    promote_to_surface = match self.can_promote_to_surface(
+                                                prim_data.common.flags,
+                                                prim_clip_chain,
+                                                prim_spatial_node_index,
+                                                on_picture_surface,
+                                                frame_context) {
+                        SurfacePromotionResult::Failed => false,
+                        SurfacePromotionResult::Success{flip_y} => !flip_y,
+                    };
 
-                // TODO(gw): When we support RGBA images for external surfaces, we also
-                //           need to check if opaque (YUV images are implicitly opaque).
+                    // TODO(gw): When we support RGBA images for external surfaces, we also
+                    //           need to check if opaque (YUV images are implicitly opaque).
+                }
 
                 // If this primitive is being promoted to a surface, construct an external
                 // surface descriptor for use later during batching and compositing. We only
@@ -3566,18 +3461,13 @@ impl TileCacheInstance {
                     }
 
                     promote_to_surface = self.setup_compositor_surfaces_yuv(
-                        sub_slice_index,
                         &mut prim_info,
-                        prim_data.common.flags,
-                        local_prim_rect,
-                        prim_spatial_node_index,
-                        pic_clip_rect,
+                        prim_rect,
                         frame_context,
                         &image_dependencies,
                         &prim_data.kind.yuv_key,
                         resource_cache,
                         composite_state,
-                        gpu_cache,
                         prim_data.kind.image_rendering,
                         prim_data.kind.color_depth,
                         prim_data.kind.color_space,
@@ -3585,15 +3475,7 @@ impl TileCacheInstance {
                     );
                 }
 
-                // Store on the YUV primitive instance whether this is a promoted surface.
-                // This is used by the batching code to determine whether to draw the
-                // image to the content tiles, or just a transparent z-write.
-                *is_compositor_surface = promote_to_surface;
-
-                if promote_to_surface {
-                    prim_instance.vis.state = VisibilityState::Culled;
-                    return;
-                } else {
+                if !promote_to_surface {
                     prim_info.images.extend(
                         prim_data.kind.yuv_key.iter().map(|key| {
                             ImageDependency {
@@ -3603,6 +3485,12 @@ impl TileCacheInstance {
                         })
                     );
                 }
+
+                // Store on the YUV primitive instance whether this is a promoted surface.
+                // This is used by the batching code to determine whether to draw the
+                // image to the content tiles, or just a transparent z-write.
+                *is_compositor_surface = promote_to_surface;
+
             }
             PrimitiveInstanceKind::ImageBorder { data_handle, .. } => {
                 let border_data = &data_stores.image_border[data_handle].kind;
@@ -3617,8 +3505,7 @@ impl TileCacheInstance {
                     kind: Some(BackdropKind::Clear),
                 });
             }
-            PrimitiveInstanceKind::LinearGradient { data_handle, .. }
-            | PrimitiveInstanceKind::CachedLinearGradient { data_handle, .. } => {
+            PrimitiveInstanceKind::LinearGradient { data_handle, .. } => {
                 let gradient_data = &data_stores.linear_grad[data_handle];
                 if gradient_data.stops_opacity.is_opaque
                     && gradient_data.tile_spacing == LayoutSize::zero()
@@ -3663,8 +3550,6 @@ impl TileCacheInstance {
         // checks to see if it matches all conditions to be a backdrop.
         let mut vis_flags = PrimitiveVisibilityFlags::empty();
 
-        let sub_slice = &mut self.sub_slices[sub_slice_index];
-
         if let Some(backdrop_candidate) = backdrop_candidate {
             let is_suitable_backdrop = match backdrop_candidate.kind {
                 Some(BackdropKind::Clear) => {
@@ -3696,10 +3581,9 @@ impl TileCacheInstance {
                 }
             };
 
-            if sub_slice_index == 0 &&
-               is_suitable_backdrop &&
-               sub_slice.compositor_surfaces.is_empty() &&
-               !prim_clip_chain.needs_mask {
+            if is_suitable_backdrop
+                && self.external_surfaces.is_empty()
+                && !prim_clip_chain.needs_mask {
 
                 if backdrop_candidate.opaque_rect.contains_rect(&self.backdrop.opaque_rect) {
                     self.backdrop.opaque_rect = backdrop_candidate.opaque_rect;
@@ -3742,19 +3626,13 @@ impl TileCacheInstance {
             for x in p0.x .. p1.x {
                 // TODO(gw): Convert to 2d array temporarily to avoid hash lookups per-tile?
                 let key = TileOffset::new(x, y);
-                let tile = sub_slice.tiles.get_mut(&key).expect("bug: no tile");
+                let tile = self.tiles.get_mut(&key).expect("bug: no tile");
 
                 tile.add_prim_dependency(&prim_info);
             }
         }
 
-        prim_instance.vis.state = VisibilityState::Coarse {
-            filter: BatchFilter {
-                rect_in_pic_space: pic_clip_rect,
-                sub_slice_index: SubSliceIndex::new(sub_slice_index),
-            },
-            vis_flags,
-        };
+        Some(vis_flags)
     }
 
     /// Print debug information about this picture cache to a tree printer.
@@ -3766,38 +3644,30 @@ impl TileCacheInstance {
         //           diff'ing the invalidation states in a visual tool.
         let mut pt = PrintTree::new("Picture Cache");
 
-        pt.new_level(format!("Slice {:?}", self.slice));
+        pt.new_level(format!("Slice {}", self.slice));
 
         pt.add_item(format!("fract_offset: {:?}", self.fract_offset));
         pt.add_item(format!("background_color: {:?}", self.background_color));
 
-        for (sub_slice_index, sub_slice) in self.sub_slices.iter().enumerate() {
-            pt.new_level(format!("SubSlice {:?}", sub_slice_index));
-
-            for y in self.tile_bounds_p0.y .. self.tile_bounds_p1.y {
-                for x in self.tile_bounds_p0.x .. self.tile_bounds_p1.x {
-                    let key = TileOffset::new(x, y);
-                    let tile = &sub_slice.tiles[&key];
-                    tile.print(&mut pt);
-                }
+        for y in self.tile_bounds_p0.y .. self.tile_bounds_p1.y {
+            for x in self.tile_bounds_p0.x .. self.tile_bounds_p1.x {
+                let key = TileOffset::new(x, y);
+                let tile = &self.tiles[&key];
+                tile.print(&mut pt);
             }
-
-            pt.end_level();
         }
 
         pt.end_level();
     }
 
     fn calculate_subpixel_mode(&self) -> SubpixelMode {
-        let has_opaque_bg_color = self.background_color.map_or(false, |c| c.a >= 1.0);
-
         // If the overall tile cache is known opaque, subpixel AA is allowed everywhere
-        if has_opaque_bg_color {
+        if self.is_opaque() {
             return SubpixelMode::Allow;
         }
 
         // If we didn't find any valid opaque backdrop, no subpixel AA allowed
-        if self.backdrop.opaque_rect.is_empty() {
+        if !self.backdrop.opaque_rect.is_well_formed_and_nonempty() {
             return SubpixelMode::Deny;
         }
 
@@ -3808,15 +3678,26 @@ impl TileCacheInstance {
             return SubpixelMode::Allow;
         }
 
-        // If none of the simple cases above match, we need test where we can support subpixel AA.
+        // If none of the simple cases above match, we need to build a list
+        // of excluded rects (compositor surfaces) and a valid inclusion rect
+        // (known opaque area) where we can support subpixel AA.
         // TODO(gw): In future, it may make sense to have > 1 inclusion rect,
         //           but this handles the common cases.
         // TODO(gw): If a text run gets animated such that it's moving in a way that is
         //           sometimes intersecting with the video rect, this can result in subpixel
         //           AA flicking on/off for that text run. It's probably very rare, but
         //           something we should handle in future.
+
+        let excluded_rects = self.external_surfaces
+            .iter()
+            .map(|s| {
+                s.local_rect
+            })
+            .collect();
+
         SubpixelMode::Conditional {
             allowed_rect: self.backdrop.opaque_rect,
+            excluded_rects,
         }
     }
 
@@ -3828,7 +3709,7 @@ impl TileCacheInstance {
         frame_context: &FrameVisibilityContext,
         frame_state: &mut FrameVisibilityState,
     ) {
-        self.dirty_region.reset(self.spatial_node_index);
+        self.dirty_region.clear();
         self.subpixel_mode = self.calculate_subpixel_mode();
 
         let map_pic_to_world = SpaceMapper::new_with_target(
@@ -3838,14 +3719,56 @@ impl TileCacheInstance {
             frame_context.spatial_tree,
         );
 
+        // Register the opaque region of this tile cache as an occluder, which
+        // is used later in the frame to occlude other tiles.
+        if self.backdrop.opaque_rect.is_well_formed_and_nonempty() {
+            let backdrop_rect = self.backdrop.opaque_rect
+                .intersection(&self.local_rect)
+                .and_then(|r| {
+                    r.intersection(&self.local_clip_rect)
+                });
+
+            if let Some(backdrop_rect) = backdrop_rect {
+                let world_backdrop_rect = map_pic_to_world
+                    .map(&backdrop_rect)
+                    .expect("bug: unable to map backdrop to world space");
+
+                // Since we register the entire backdrop rect, use the opaque z-id for the
+                // picture cache slice.
+                frame_state.composite_state.register_occluder(
+                    self.z_id_opaque,
+                    world_backdrop_rect,
+                );
+            }
+        }
+
+        // Register any external compositor surfaces as potential occluders. This
+        // is especially useful when viewing video in full-screen mode, as it is
+        // able to occlude every background tile (avoiding allocation, rasterizion
+        // and compositing).
+        for external_surface in &self.external_surfaces {
+            let local_surface_rect = external_surface.local_rect
+                .intersection(&external_surface.local_clip_rect)
+                .and_then(|r| {
+                    r.intersection(&self.local_clip_rect)
+                });
+
+            if let Some(local_surface_rect) = local_surface_rect {
+                let world_surface_rect = map_pic_to_world
+                    .map(&local_surface_rect)
+                    .expect("bug: unable to map external surface to world space");
+
+                frame_state.composite_state.register_occluder(
+                    external_surface.z_id,
+                    world_surface_rect,
+                );
+            }
+        }
+
         // A simple GC of the native external surface cache, to remove and free any
         // surfaces that were not referenced during the update_prim_dependencies pass.
         self.external_native_surface_cache.retain(|_, surface| {
             if !surface.used_this_frame {
-                // If we removed an external surface, we need to mark the dirty rects as
-                // invalid so a full composite occurs on the next frame.
-                frame_state.composite_state.dirty_rects_are_valid = false;
-
                 frame_state.resource_cache.destroy_compositor_surface(surface.native_surface_id);
             }
 
@@ -3860,21 +3783,10 @@ impl TileCacheInstance {
             .get_relative_transform(
                 self.spatial_node_index,
                 ROOT_SPATIAL_NODE_INDEX,
-            );
-        let root_transform = match root_transform {
-            CoordinateSpaceMapping::Local => ScaleOffset::identity(),
-            CoordinateSpaceMapping::ScaleOffset(scale_offset) => scale_offset,
-            CoordinateSpaceMapping::Transform(..) => panic!("bug: picture caches don't support complex transforms"),
-        };
-        const EPSILON: f32 = 0.001;
-        let root_translation_changed =
-            !root_transform.offset.x.approx_eq_eps(&self.root_transform.offset.x, &EPSILON) ||
-            !root_transform.offset.y.approx_eq_eps(&self.root_transform.offset.y, &EPSILON);
-        let root_scale_changed =
-            !root_transform.scale.x.approx_eq_eps(&self.root_transform.scale.x, &EPSILON) ||
-            !root_transform.scale.y.approx_eq_eps(&self.root_transform.scale.y, &EPSILON);
-
-        if root_translation_changed || root_scale_changed || frame_context.config.force_invalidation {
+            )
+            .into();
+        let root_transform_changed = root_transform != self.root_transform;
+        if root_transform_changed {
             self.root_transform = root_transform;
             frame_state.composite_state.dirty_rects_are_valid = false;
         }
@@ -3886,17 +3798,21 @@ impl TileCacheInstance {
             frame_context.spatial_tree,
         );
 
-        let mut ctx = TilePostUpdateContext {
+        // All compositor surfaces have allocated a z_id, so reserve a z_id for alpha tiles.
+        let z_id_alpha = frame_state.composite_state.z_generator.next();
+
+        let ctx = TilePostUpdateContext {
             pic_to_world_mapper,
             global_device_pixel_scale: frame_context.global_device_pixel_scale,
             local_clip_rect: self.local_clip_rect,
-            backdrop: None,
+            backdrop: self.backdrop,
             opacity_bindings: &self.opacity_bindings,
             color_bindings: &self.color_bindings,
             current_tile_size: self.current_tile_size,
             local_rect: self.local_rect,
-            z_id: ZBufferId::invalid(),
-            invalidate_all: root_scale_changed || frame_context.config.force_invalidation,
+            external_surfaces: &self.external_surfaces,
+            z_id_opaque: self.z_id_opaque,
+            z_id_alpha,
         };
 
         let mut state = TilePostUpdateState {
@@ -3908,115 +3824,30 @@ impl TileCacheInstance {
 
         // Step through each tile and invalidate if the dependencies have changed. Determine
         // the current opacity setting and whether it's changed.
-        for (i, sub_slice) in self.sub_slices.iter_mut().enumerate().rev() {
-            // The backdrop is only relevant for the first sub-slice
-            if i == 0 {
-                ctx.backdrop = Some(self.backdrop);
-            }
-
-            for compositor_surface in sub_slice.compositor_surfaces.iter_mut().rev() {
-                compositor_surface.descriptor.z_id = state.composite_state.z_generator.next();
-            }
-
-            ctx.z_id = state.composite_state.z_generator.next();
-
-            for tile in sub_slice.tiles.values_mut() {
-                tile.post_update(&ctx, &mut state, frame_context);
-            }
+        for tile in self.tiles.values_mut() {
+            tile.post_update(&ctx, &mut state, frame_context);
         }
 
-        // Register any opaque external compositor surfaces as potential occluders. This
-        // is especially useful when viewing video in full-screen mode, as it is
-        // able to occlude every background tile (avoiding allocation, rasterizion
-        // and compositing).
-
-        for sub_slice in &self.sub_slices {
-            for compositor_surface in &sub_slice.compositor_surfaces {
-                if compositor_surface.is_opaque {
-                    let local_surface_rect = compositor_surface
-                        .descriptor
-                        .local_rect
-                        .intersection(&compositor_surface.descriptor.local_clip_rect)
-                        .and_then(|r| {
-                            r.intersection(&self.local_clip_rect)
-                        });
-
-                    if let Some(local_surface_rect) = local_surface_rect {
-                        let world_surface_rect = map_pic_to_world
-                            .map(&local_surface_rect)
-                            .expect("bug: unable to map external surface to world space");
-
-                        frame_state.composite_state.register_occluder(
-                            compositor_surface.descriptor.z_id,
-                            world_surface_rect,
-                        );
-                    }
-                }
-            }
-        }
-
-        // Register the opaque region of this tile cache as an occluder, which
-        // is used later in the frame to occlude other tiles.
-        if !self.backdrop.opaque_rect.is_empty() {
-            let z_id_backdrop = frame_state.composite_state.z_generator.next();
-
-            let backdrop_rect = self.backdrop.opaque_rect
-                .intersection(&self.local_rect)
-                .and_then(|r| {
-                    r.intersection(&self.local_clip_rect)
-                });
-
-            if let Some(backdrop_rect) = backdrop_rect {
-                let world_backdrop_rect = map_pic_to_world
-                    .map(&backdrop_rect)
-                    .expect("bug: unable to map backdrop to world space");
-
-                // Since we register the entire backdrop rect, use the opaque z-id for the
-                // picture cache slice.
-                frame_state.composite_state.register_occluder(
-                    z_id_backdrop,
-                    world_backdrop_rect,
-                );
-            }
-        }
-    }
-}
-
-pub struct PictureScratchBuffer {
-    surface_stack: Vec<SurfaceIndex>,
-    clip_chain_ids: Vec<ClipChainId>,
-}
-
-impl Default for PictureScratchBuffer {
-    fn default() -> Self {
-        PictureScratchBuffer {
-            surface_stack: Vec::new(),
-            clip_chain_ids: Vec::new(),
+        // When under test, record a copy of the dirty region to support
+        // invalidation testing in wrench.
+        if frame_context.config.testing {
+            frame_state.scratch.recorded_dirty_regions.push(self.dirty_region.record());
         }
     }
 }
 
-impl PictureScratchBuffer {
-    pub fn begin_frame(&mut self) {
-        self.surface_stack.clear();
-        self.clip_chain_ids.clear();
-    }
-
-    pub fn recycle(&mut self, recycler: &mut Recycler) {
-        recycler.recycle_vec(&mut self.surface_stack);
-    }
- }
-
 /// Maintains a stack of picture and surface information, that
 /// is used during the initial picture traversal.
 pub struct PictureUpdateState<'a> {
     surfaces: &'a mut Vec<SurfaceInfo>,
     surface_stack: Vec<SurfaceIndex>,
+    picture_stack: Vec<PictureInfo>,
+    are_raster_roots_assigned: bool,
+    composite_state: &'a CompositeState,
 }
 
 impl<'a> PictureUpdateState<'a> {
     pub fn update_all(
-        buffers: &mut PictureScratchBuffer,
         surfaces: &'a mut Vec<SurfaceInfo>,
         pic_index: PictureIndex,
         picture_primitives: &mut [PicturePrimitive],
@@ -4024,17 +3855,19 @@ impl<'a> PictureUpdateState<'a> {
         gpu_cache: &mut GpuCache,
         clip_store: &ClipStore,
         data_stores: &mut DataStores,
+        composite_state: &CompositeState,
     ) {
         profile_scope!("UpdatePictures");
         profile_marker!("UpdatePictures");
 
         let mut state = PictureUpdateState {
             surfaces,
-            surface_stack: buffers.surface_stack.take().cleared(),
+            surface_stack: vec![SurfaceIndex(0)],
+            picture_stack: Vec::new(),
+            are_raster_roots_assigned: true,
+            composite_state,
         };
 
-        state.surface_stack.push(SurfaceIndex(0));
-
         state.update(
             pic_index,
             picture_primitives,
@@ -4044,7 +3877,13 @@ impl<'a> PictureUpdateState<'a> {
             data_stores,
         );
 
-        buffers.surface_stack = state.surface_stack.take();
+        if !state.are_raster_roots_assigned {
+            state.assign_raster_roots(
+                pic_index,
+                picture_primitives,
+                ROOT_SPATIAL_NODE_INDEX,
+            );
+        }
     }
 
     /// Return the current surface
@@ -4073,6 +3912,21 @@ impl<'a> PictureUpdateState<'a> {
         self.surface_stack.pop().unwrap()
     }
 
+    /// Push information about a picture on the update stack
+    fn push_picture(
+        &mut self,
+        info: PictureInfo,
+    ) {
+        self.picture_stack.push(info);
+    }
+
+    /// Pop the picture info off, on the way up the picture traversal
+    fn pop_picture(
+        &mut self,
+    ) -> PictureInfo {
+        self.picture_stack.pop().unwrap()
+    }
+
     /// Update a picture, determining surface configuration,
     /// rasterization roots, and (in future) whether there
     /// are cached surfaces that can be used by this picture.
@@ -4089,15 +3943,24 @@ impl<'a> PictureUpdateState<'a> {
             self,
             frame_context,
         ) {
-            for child_pic_index in &prim_list.child_pictures {
-                self.update(
-                    *child_pic_index,
-                    picture_primitives,
-                    frame_context,
-                    gpu_cache,
-                    clip_store,
-                    data_stores,
-                );
+            for cluster in &prim_list.clusters {
+                if cluster.flags.contains(ClusterFlags::IS_PICTURE) {
+                    for prim_instance in &cluster.prim_instances {
+                        let child_pic_index = match prim_instance.kind {
+                            PrimitiveInstanceKind::Picture { pic_index, .. } => pic_index,
+                            _ => unreachable!(),
+                        };
+
+                        self.update(
+                            child_pic_index,
+                            picture_primitives,
+                            frame_context,
+                            gpu_cache,
+                            clip_store,
+                            data_stores,
+                        );
+                    }
+                }
             }
 
             picture_primitives[pic_index.0].post_update(
@@ -4108,6 +3971,48 @@ impl<'a> PictureUpdateState<'a> {
             );
         }
     }
+
+    /// Process the picture tree again in a depth-first order,
+    /// and adjust the raster roots of the pictures that want to establish
+    /// their own roots but are not able to due to the size constraints.
+    fn assign_raster_roots(
+        &mut self,
+        pic_index: PictureIndex,
+        picture_primitives: &[PicturePrimitive],
+        fallback_raster_spatial_node: SpatialNodeIndex,
+    ) {
+        let picture = &picture_primitives[pic_index.0];
+        if !picture.is_visible() {
+            return
+        }
+
+        let new_fallback = match picture.raster_config {
+            Some(ref config) => {
+                let surface = &mut self.surfaces[config.surface_index.0];
+                if !config.establishes_raster_root {
+                    surface.raster_spatial_node_index = fallback_raster_spatial_node;
+                }
+                surface.raster_spatial_node_index
+            }
+            None => fallback_raster_spatial_node,
+        };
+
+        for cluster in &picture.prim_list.clusters {
+            if cluster.flags.contains(ClusterFlags::IS_PICTURE) {
+                for instance in &cluster.prim_instances {
+                    let child_pic_index = match instance.kind {
+                        PrimitiveInstanceKind::Picture { pic_index, .. } => pic_index,
+                        _ => unreachable!(),
+                    };
+                    self.assign_raster_roots(
+                        child_pic_index,
+                        picture_primitives,
+                        new_fallback,
+                    );
+                }
+            }
+        }
+    }
 }
 
 #[derive(Debug, Copy, Clone, PartialEq)]
@@ -4116,19 +4021,15 @@ pub struct SurfaceIndex(pub usize);
 
 pub const ROOT_SURFACE_INDEX: SurfaceIndex = SurfaceIndex(0);
 
-/// Describes the render task configuration for a picture surface.
-#[derive(Debug)]
-pub enum SurfaceRenderTasks {
-    /// The common type of surface is a single render task
-    Simple(RenderTaskId),
-    /// Some surfaces draw their content, and then have further tasks applied
-    /// to that input (such as blur passes for shadows). These tasks have a root
-    /// (the output of the surface), and a port (for attaching child task dependencies
-    /// to the content).
-    Chained { root_task_id: RenderTaskId, port_task_id: RenderTaskId },
-    /// Picture caches are a single surface consisting of multiple render
-    /// tasks, one per tile with dirty content.
-    Tiled(Vec<RenderTaskId>),
+#[derive(Debug, Copy, Clone)]
+pub struct SurfaceRenderTasks {
+    /// The root of the render task chain for this surface. This
+    /// is attached to parent tasks, and also the surface that
+    /// gets added during batching.
+    pub root: RenderTaskId,
+    /// The port of the render task change for this surface. This
+    /// is where child tasks for this surface get attached to.
+    pub port: RenderTaskId,
 }
 
 /// Information about an offscreen surface. For now,
@@ -4142,8 +4043,6 @@ pub struct SurfaceInfo {
     /// A local rect defining the size of this surface, in the
     /// coordinate system of the surface itself.
     pub rect: PictureRect,
-    /// Part of the surface that we know to be opaque.
-    pub opaque_rect: PictureRect,
     /// Helper structs for mapping local rects in different
     /// coordinate systems into the surface coordinates.
     pub map_local_to_surface: SpaceMapper<LayoutPixel, PicturePixel>,
@@ -4159,8 +4058,6 @@ pub struct SurfaceInfo {
     pub device_pixel_scale: DevicePixelScale,
     /// The scale factors of the surface to raster transform.
     pub scale_factors: (f32, f32),
-    /// The allocated device rect for this surface
-    pub device_rect: Option<DeviceRect>,
 }
 
 impl SurfaceInfo {
@@ -4191,7 +4088,6 @@ impl SurfaceInfo {
 
         SurfaceInfo {
             rect: PictureRect::zero(),
-            opaque_rect: PictureRect::zero(),
             map_local_to_surface,
             render_tasks: None,
             raster_spatial_node_index,
@@ -4199,13 +4095,8 @@ impl SurfaceInfo {
             inflation_factor,
             device_pixel_scale,
             scale_factors,
-            device_rect: None,
         }
     }
-
-    pub fn get_device_rect(&self) -> DeviceRect {
-        self.device_rect.expect("bug: queried before surface was initialized")
-    }
 }
 
 #[derive(Debug)]
@@ -4226,11 +4117,6 @@ pub struct RasterConfig {
     /// However e.g. text rasterization uses it to ensure consistent
     /// on-screen font size.
     pub root_scaling_factor: f32,
-    /// The world rect of this picture clipped to the current culling
-    /// rect. This is used for determining the size of the render
-    /// target rect for this surface, and calculating raster scale
-    /// factors.
-    pub clipped_bounding_rect: WorldRect,
 }
 
 bitflags! {
@@ -4265,7 +4151,6 @@ pub enum PictureCompositeMode {
     Blit(BlitReason),
     /// Used to cache a picture as a series of tiles.
     TileCache {
-        slice_id: SliceId,
     },
     /// Apply an SVG filter
     SvgFilter(Vec<FilterPrimitive>, Vec<SFilterData>),
@@ -4276,10 +4161,9 @@ impl PictureCompositeMode {
         let mut result_rect = picture_rect;
         match self {
             PictureCompositeMode::Filter(filter) => match filter {
-                Filter::Blur(width, height) => {
-                    let width_factor = clamp_blur_radius(*width, scale_factors).ceil() * BLUR_SAMPLE_SCALE;
-                    let height_factor = clamp_blur_radius(*height, scale_factors).ceil() * BLUR_SAMPLE_SCALE;
-                    result_rect = picture_rect.inflate(width_factor, height_factor);
+                Filter::Blur(blur_radius) => {
+                    let inflation_factor = clamp_blur_radius(*blur_radius, scale_factors).ceil() * BLUR_SAMPLE_SCALE;
+                    result_rect = picture_rect.inflate(inflation_factor, inflation_factor);
                 },
                 Filter::DropShadows(shadows) => {
                     let mut max_inflation: f32 = 0.0;
@@ -4297,9 +4181,8 @@ impl PictureCompositeMode {
                     let output_rect = match primitive.kind {
                         FilterPrimitiveKind::Blur(ref primitive) => {
                             let input = primitive.input.to_index(cur_index).map(|index| output_rects[index]).unwrap_or(picture_rect);
-                            let width_factor = primitive.width.round() * BLUR_SAMPLE_SCALE;
-                            let height_factor = primitive.height.round() * BLUR_SAMPLE_SCALE;
-                            input.inflate(width_factor, height_factor)
+                            let inflation_factor = primitive.radius.round() * BLUR_SAMPLE_SCALE;
+                            input.inflate(inflation_factor, inflation_factor)
                         }
                         FilterPrimitiveKind::DropShadow(ref primitive) => {
                             let inflation_factor = primitive.shadow.blur_radius.ceil() * BLUR_SAMPLE_SCALE;
@@ -4373,14 +4256,28 @@ bitflags! {
     /// A set of flags describing why a picture may need a backing surface.
     #[cfg_attr(feature = "capture", derive(Serialize))]
     pub struct ClusterFlags: u32 {
+        /// This cluster is a picture
+        const IS_PICTURE = 1;
         /// Whether this cluster is visible when the position node is a backface.
-        const IS_BACKFACE_VISIBLE = 1;
+        const IS_BACKFACE_VISIBLE = 2;
         /// This flag is set during the first pass picture traversal, depending on whether
         /// the cluster is visible or not. It's read during the second pass when primitives
         /// consult their owning clusters to see if the primitive itself is visible.
-        const IS_VISIBLE = 2;
+        const IS_VISIBLE = 4;
         /// Is a backdrop-filter cluster that requires special handling during post_update.
-        const IS_BACKDROP_FILTER = 4;
+        const IS_BACKDROP_FILTER = 8;
+        /// Force creation of a picture caching slice before this cluster.
+        const CREATE_PICTURE_CACHE_PRE = 16;
+        /// Force creation of a picture caching slice after this cluster.
+        const CREATE_PICTURE_CACHE_POST = 32;
+        /// If set, this cluster represents a scroll bar container.
+        const SCROLLBAR_CONTAINER = 64;
+        /// If set, this cluster contains clear rectangle primitives.
+        const IS_CLEAR_PRIMITIVE = 128;
+        /// This is used as a performance hint - this primitive may be promoted to a native
+        /// compositor surface under certain (implementation specific) conditions. This
+        /// is typically used for large videos, and canvas elements.
+        const PREFER_COMPOSITOR_SURFACE = 256;
     }
 }
 
@@ -4395,14 +4292,19 @@ pub struct PrimitiveCluster {
     /// during the first picture traversal, which is needed for local scale
     /// determination, and render task size calculations.
     bounding_rect: LayoutRect,
-    /// a part of the cluster that we know to be opaque if any. Does not always
-    /// describe the entire opaque region, but all content within that rect must
-    /// be opaque.
-    pub opaque_rect: LayoutRect,
-    /// The range of primitive instance indices associated with this cluster.
-    pub prim_range: Range<usize>,
+    /// The list of primitive instances in this cluster.
+    pub prim_instances: Vec<PrimitiveInstance>,
     /// Various flags / state for this cluster.
     pub flags: ClusterFlags,
+    /// An optional scroll root to use if this cluster establishes a picture cache slice.
+    pub cache_scroll_root: Option<SpatialNodeIndex>,
+}
+
+/// Where to insert a prim instance in a primitive list.
+#[derive(Debug, Copy, Clone)]
+enum PrimitiveListPosition {
+    Begin,
+    End,
 }
 
 impl PrimitiveCluster {
@@ -4410,14 +4312,13 @@ impl PrimitiveCluster {
     fn new(
         spatial_node_index: SpatialNodeIndex,
         flags: ClusterFlags,
-        first_instance_index: usize,
     ) -> Self {
         PrimitiveCluster {
             bounding_rect: LayoutRect::zero(),
-            opaque_rect: LayoutRect::zero(),
             spatial_node_index,
             flags,
-            prim_range: first_instance_index..first_instance_index
+            prim_instances: Vec::new(),
+            cache_scroll_root: None,
         }
     }
 
@@ -4427,22 +4328,28 @@ impl PrimitiveCluster {
         spatial_node_index: SpatialNodeIndex,
         flags: ClusterFlags,
     ) -> bool {
-        self.flags == flags && self.spatial_node_index == spatial_node_index
-    }
+        // If this cluster is a scrollbar, ensure that a matching scrollbar
+        // container that follows is split up, so we don't combine the
+        // scrollbars into a single slice.
+        if self.flags.contains(ClusterFlags::SCROLLBAR_CONTAINER) {
+            return false;
+        }
 
-    pub fn prim_range(&self) -> Range<usize> {
-        self.prim_range.clone()
+        self.flags == flags && self.spatial_node_index == spatial_node_index
     }
 
     /// Add a primitive instance to this cluster, at the start or end
-    fn add_instance(
+    fn push(
         &mut self,
-        culling_rect: &LayoutRect,
-        instance_index: usize,
+        prim_instance: PrimitiveInstance,
+        prim_rect: LayoutRect,
     ) {
-        debug_assert_eq!(instance_index, self.prim_range.end);
-        self.bounding_rect = self.bounding_rect.union(culling_rect);
-        self.prim_range.end += 1;
+        let culling_rect = prim_instance.local_clip_rect
+            .intersection(&prim_rect)
+            .unwrap_or_else(LayoutRect::zero);
+
+        self.bounding_rect = self.bounding_rect.union(&culling_rect);
+        self.prim_instances.push(prim_instance);
     }
 }
 
@@ -4454,11 +4361,6 @@ impl PrimitiveCluster {
 pub struct PrimitiveList {
     /// List of primitives grouped into clusters.
     pub clusters: Vec<PrimitiveCluster>,
-    pub prim_instances: Vec<PrimitiveInstance>,
-    pub child_pictures: Vec<PictureIndex>,
-    /// The number of preferred compositor surfaces that were found when
-    /// adding prims to this list.
-    pub compositor_surface_count: usize,
 }
 
 impl PrimitiveList {
@@ -4469,31 +4371,32 @@ impl PrimitiveList {
     pub fn empty() -> Self {
         PrimitiveList {
             clusters: Vec::new(),
-            prim_instances: Vec::new(),
-            child_pictures: Vec::new(),
-            compositor_surface_count: 0,
         }
     }
 
-    /// Add a primitive instance to the end of the list
-    pub fn add_prim(
+    /// Add a primitive instance to this list, at the start or end
+    fn push(
         &mut self,
         prim_instance: PrimitiveInstance,
         prim_rect: LayoutRect,
         spatial_node_index: SpatialNodeIndex,
         prim_flags: PrimitiveFlags,
+        insert_position: PrimitiveListPosition,
     ) {
         let mut flags = ClusterFlags::empty();
 
         // Pictures are always put into a new cluster, to make it faster to
         // iterate all pictures in a given primitive list.
         match prim_instance.kind {
-            PrimitiveInstanceKind::Picture { pic_index, .. } => {
-                self.child_pictures.push(pic_index);
+            PrimitiveInstanceKind::Picture { .. } => {
+                flags.insert(ClusterFlags::IS_PICTURE);
             }
             PrimitiveInstanceKind::Backdrop { .. } => {
                 flags.insert(ClusterFlags::IS_BACKDROP_FILTER);
             }
+            PrimitiveInstanceKind::Clear { .. } => {
+                flags.insert(ClusterFlags::IS_CLEAR_PRIMITIVE);
+            }
             _ => {}
         }
 
@@ -4501,73 +4404,90 @@ impl PrimitiveList {
             flags.insert(ClusterFlags::IS_BACKFACE_VISIBLE);
         }
 
-        if prim_flags.contains(PrimitiveFlags::PREFER_COMPOSITOR_SURFACE) {
-            self.compositor_surface_count += 1;
+        if prim_flags.contains(PrimitiveFlags::IS_SCROLLBAR_CONTAINER) {
+            flags.insert(ClusterFlags::SCROLLBAR_CONTAINER);
         }
 
-        let culling_rect = prim_instance.clip_set.local_clip_rect
-            .intersection(&prim_rect)
-            .unwrap_or_else(LayoutRect::zero);
-
-        // Primitive lengths aren't evenly distributed among primitive lists:
-        // We often have a large amount of single primitive lists, a
-        // few below 20~30 primitives, and even fewer lists (maybe a couple)
-        // in the multiple hundreds with nothing in between.
-        // We can see in profiles that reallocating vectors while pushing
-        // primitives is taking a large amount of the total scene build time,
-        // so we take advantage of what we know about the length distributions
-        // to go for an adapted vector growth pattern that avoids over-allocating
-        // for the many small allocations while avoiding a lot of reallocation by
-        // quickly converging to the common sizes.
-        // Rust's default vector growth strategy (when pushing elements one by one)
-        // is to double the capacity every time.
-        let prims_len = self.prim_instances.len();
-        if prims_len == self.prim_instances.capacity() {
-            let next_alloc = match prims_len {
-                1 ..= 31 => 32 - prims_len,
-                32 ..= 256 => 512 - prims_len,
-                _ => prims_len * 2,
-            };
-
-            self.prim_instances.reserve(next_alloc);
+        if prim_flags.contains(PrimitiveFlags::PREFER_COMPOSITOR_SURFACE) {
+            flags.insert(ClusterFlags::PREFER_COMPOSITOR_SURFACE);
         }
 
-        let instance_index = prims_len;
-        self.prim_instances.push(prim_instance);
-
-        if let Some(cluster) = self.clusters.last_mut() {
-            if cluster.is_compatible(spatial_node_index, flags) {
-                cluster.add_instance(&culling_rect, instance_index);
-                return;
+        // Insert the primitive into the first or last cluster as required
+        match insert_position {
+            PrimitiveListPosition::Begin => {
+                let mut cluster = PrimitiveCluster::new(
+                    spatial_node_index,
+                    flags,
+                );
+                cluster.push(prim_instance, prim_rect);
+                self.clusters.insert(0, cluster);
             }
-        }
-
-        // Same idea with clusters, using a different distribution.
-        let clusters_len = self.clusters.len();
-        if clusters_len == self.clusters.capacity() {
-            let next_alloc = match clusters_len {
-                1 ..= 15 => 16 - clusters_len,
-                16 ..= 127 => 128 - clusters_len,
-                _ => clusters_len * 2,
-            };
+            PrimitiveListPosition::End => {
+                if let Some(cluster) = self.clusters.last_mut() {
+                    if cluster.is_compatible(spatial_node_index, flags) {
+                        cluster.push(prim_instance, prim_rect);
+                        return;
+                    }
+                }
 
-            self.clusters.reserve(next_alloc);
+                let mut cluster = PrimitiveCluster::new(
+                    spatial_node_index,
+                    flags,
+                );
+                cluster.push(prim_instance, prim_rect);
+                self.clusters.push(cluster);
+            }
         }
+    }
 
-        let mut cluster = PrimitiveCluster::new(
+    /// Add a primitive instance to the start of the list
+    pub fn add_prim_to_start(
+        &mut self,
+        prim_instance: PrimitiveInstance,
+        prim_rect: LayoutRect,
+        spatial_node_index: SpatialNodeIndex,
+        flags: PrimitiveFlags,
+    ) {
+        self.push(
+            prim_instance,
+            prim_rect,
             spatial_node_index,
             flags,
-            instance_index,
-        );
+            PrimitiveListPosition::Begin,
+        )
+    }
 
-        cluster.add_instance(&culling_rect, instance_index);
-        self.clusters.push(cluster);
+    /// Add a primitive instance to the end of the list
+    pub fn add_prim(
+        &mut self,
+        prim_instance: PrimitiveInstance,
+        prim_rect: LayoutRect,
+        spatial_node_index: SpatialNodeIndex,
+        flags: PrimitiveFlags,
+    ) {
+        self.push(
+            prim_instance,
+            prim_rect,
+            spatial_node_index,
+            flags,
+            PrimitiveListPosition::End,
+        )
     }
 
     /// Returns true if there are no clusters (and thus primitives)
     pub fn is_empty(&self) -> bool {
         self.clusters.is_empty()
     }
+
+    /// Add an existing cluster to this prim list
+    pub fn add_cluster(&mut self, cluster: PrimitiveCluster) {
+        self.clusters.push(cluster);
+    }
+
+    /// Merge another primitive list into this one
+    pub fn extend(&mut self, prim_list: PrimitiveList) {
+        self.clusters.extend(prim_list.clusters);
+    }
 }
 
 /// Defines configuration options for a given picture primitive.
@@ -4601,21 +4521,27 @@ pub struct PicturePrimitive {
     /// it will be considered invisible.
     pub is_backface_visible: bool,
 
-    pub primary_render_task_id: Option<RenderTaskId>,
-    /// If a mix-blend-mode, contains the render task for
-    /// the readback of the framebuffer that we use to sample
-    /// from in the mix-blend-mode shader.
-    /// For drop-shadow filter, this will store the original
-    /// picture task which would be rendered on screen after
-    /// blur pass.
+    // If a mix-blend-mode, contains the render task for
+    // the readback of the framebuffer that we use to sample
+    // from in the mix-blend-mode shader.
+    // For drop-shadow filter, this will store the original
+    // picture task which would be rendered on screen after
+    // blur pass.
     pub secondary_render_task_id: Option<RenderTaskId>,
     /// How this picture should be composited.
     /// If None, don't composite - just draw directly on parent surface.
     pub requested_composite_mode: Option<PictureCompositeMode>,
+    /// Requested rasterization space for this picture. It is
+    /// a performance hint only.
+    pub requested_raster_space: RasterSpace,
 
     pub raster_config: Option<RasterConfig>,
     pub context_3d: Picture3DContext<OrderedPictureChild>,
 
+    // If requested as a frame output (for rendering
+    // pages to a texture), this is the pipeline this
+    // picture is the root of.
+    pub frame_output_pipeline_id: Option<PipelineId>,
     // Optional cache handles for storing extra data
     // in the GPU cache, depending on the type of
     // picture.
@@ -4639,22 +4565,22 @@ pub struct PicturePrimitive {
     /// different depending on how much was culled.
     pub precise_local_rect: LayoutRect,
 
-    /// Store the state of the previous precise local rect
-    /// for this picture. We need this in order to know when
-    /// to invalidate segments / drop-shadow gpu cache handles.
-    pub prev_precise_local_rect: LayoutRect,
-
     /// If false, this picture needs to (re)build segments
     /// if it supports segment rendering. This can occur
     /// if the local rect of the picture changes due to
     /// transform animation and/or scrolling.
     pub segments_are_valid: bool,
 
+    /// If Some(..) the tile cache that is associated with this picture.
+    #[cfg_attr(feature = "capture", serde(skip))] //TODO
+    pub tile_cache: Option<Box<TileCacheInstance>>,
+
     /// The config options for this picture.
     pub options: PictureOptions,
 
-    /// Set to true if we know for sure the picture is fully opaque.
-    pub is_opaque: bool,
+    /// Keep track of the number of render tasks dependencies to pre-allocate
+    /// the dependency array next frame.
+    num_render_tasks: usize,
 }
 
 impl PicturePrimitive {
@@ -4672,8 +4598,16 @@ impl PicturePrimitive {
         pt.add_item(format!("raster_config: {:?}", self.raster_config));
         pt.add_item(format!("requested_composite_mode: {:?}", self.requested_composite_mode));
 
-        for child_pic_index in &self.prim_list.child_pictures {
-            pictures[child_pic_index.0].print(pictures, *child_pic_index, pt);
+        for cluster in &self.prim_list.clusters {
+            if cluster.flags.contains(ClusterFlags::IS_PICTURE) {
+                for instance in &cluster.prim_instances {
+                    let index = match instance.kind {
+                        PrimitiveInstanceKind::Picture { pic_index, .. } => pic_index,
+                        _ => unreachable!(),
+                    };
+                    pictures[index.0].print(pictures, index, pt);
+                }
+            }
         }
 
         pt.end_level();
@@ -4724,6 +4658,40 @@ impl PicturePrimitive {
         }
     }
 
+    /// Destroy an existing picture. This is called just before
+    /// a frame builder is replaced with a newly built scene. It
+    /// gives a picture a chance to retain any cached tiles that
+    /// may be useful during the next scene build.
+    pub fn destroy(
+        &mut self,
+        retained_tiles: &mut RetainedTiles,
+    ) {
+        if let Some(tile_cache) = self.tile_cache.take() {
+            if !tile_cache.tiles.is_empty() {
+                retained_tiles.caches.insert(
+                    tile_cache.slice,
+                    PictureCacheState {
+                        tiles: tile_cache.tiles,
+                        spatial_node_comparer: tile_cache.spatial_node_comparer,
+                        opacity_bindings: tile_cache.opacity_bindings,
+                        color_bindings: tile_cache.color_bindings,
+                        root_transform: tile_cache.root_transform,
+                        current_tile_size: tile_cache.current_tile_size,
+                        native_surface: tile_cache.native_surface,
+                        external_native_surface_cache: tile_cache.external_native_surface_cache,
+                        virtual_offset: tile_cache.virtual_offset,
+                        frame_id: tile_cache.frame_id,
+                        allocations: PictureCacheRecycledAllocations {
+                            old_opacity_bindings: tile_cache.old_opacity_bindings,
+                            old_color_bindings: tile_cache.old_color_bindings,
+                            compare_cache: tile_cache.compare_cache,
+                        },
+                    },
+                );
+            }
+        }
+    }
+
     // TODO(gw): We have the PictureOptions struct available. We
     //           should move some of the parameter list in this
     //           method to be part of the PictureOptions, and
@@ -4731,54 +4699,79 @@ impl PicturePrimitive {
     pub fn new_image(
         requested_composite_mode: Option<PictureCompositeMode>,
         context_3d: Picture3DContext<OrderedPictureChild>,
+        frame_output_pipeline_id: Option<PipelineId>,
         apply_local_clip_rect: bool,
         flags: PrimitiveFlags,
+        requested_raster_space: RasterSpace,
         prim_list: PrimitiveList,
         spatial_node_index: SpatialNodeIndex,
+        tile_cache: Option<Box<TileCacheInstance>>,
         options: PictureOptions,
     ) -> Self {
         PicturePrimitive {
             prim_list,
             state: None,
-            primary_render_task_id: None,
             secondary_render_task_id: None,
             requested_composite_mode,
             raster_config: None,
             context_3d,
+            frame_output_pipeline_id,
             extra_gpu_data_handles: SmallVec::new(),
             apply_local_clip_rect,
             is_backface_visible: flags.contains(PrimitiveFlags::IS_BACKFACE_VISIBLE),
+            requested_raster_space,
             spatial_node_index,
             estimated_local_rect: LayoutRect::zero(),
             precise_local_rect: LayoutRect::zero(),
-            prev_precise_local_rect: LayoutRect::zero(),
+            tile_cache,
             options,
             segments_are_valid: false,
-            is_opaque: false,
+            num_render_tasks: 0,
+        }
+    }
+
+    /// Gets the raster space to use when rendering the picture.
+    /// Usually this would be the requested raster space. However, if the
+    /// picture's spatial node or one of its ancestors is being pinch zoomed
+    /// then we round it. This prevents us rasterizing glyphs for every minor
+    /// change in zoom level, as that would be too expensive.
+    pub fn get_raster_space(&self, spatial_tree: &SpatialTree) -> RasterSpace {
+        let spatial_node = &spatial_tree.spatial_nodes[self.spatial_node_index.0 as usize];
+        if spatial_node.is_ancestor_or_self_zooming {
+            let scale_factors = spatial_tree
+                .get_relative_transform(self.spatial_node_index, ROOT_SPATIAL_NODE_INDEX)
+                .scale_factors();
+
+            // Round the scale up to the nearest power of 2, but don't exceed 8.
+            let scale = scale_factors.0.max(scale_factors.1).min(8.0);
+            let rounded_up = 2.0f32.powf(scale.log2().ceil());
+
+            RasterSpace::Local(rounded_up)
+        } else {
+            self.requested_raster_space
         }
     }
 
     pub fn take_context(
         &mut self,
         pic_index: PictureIndex,
+        clipped_prim_bounding_rect: WorldRect,
         surface_spatial_node_index: SpatialNodeIndex,
         raster_spatial_node_index: SpatialNodeIndex,
         parent_surface_index: SurfaceIndex,
-        parent_subpixel_mode: SubpixelMode,
+        parent_subpixel_mode: &SubpixelMode,
         frame_state: &mut FrameBuildingState,
         frame_context: &FrameBuildingContext,
         scratch: &mut PrimitiveScratchBuffer,
         tile_cache_logger: &mut TileCacheLogger,
-        tile_caches: &mut FastHashMap<SliceId, Box<TileCacheInstance>>,
     ) -> Option<(PictureContext, PictureState, PrimitiveList)> {
-        self.primary_render_task_id = None;
-        self.secondary_render_task_id = None;
-
         if !self.is_visible() {
             return None;
         }
 
         profile_scope!("take_context");
+        let task_id = frame_state.surfaces[parent_surface_index.0].render_tasks.unwrap().port;
+        frame_state.render_tasks[task_id].children.reserve(self.num_render_tasks);
 
         // Extract the raster and surface spatial nodes from the raster
         // config, if this picture establishes a surface. Otherwise just
@@ -4839,345 +4832,6 @@ impl PicturePrimitive {
         };
 
         match self.raster_config {
-            Some(RasterConfig { surface_index, composite_mode: PictureCompositeMode::TileCache { slice_id }, .. }) => {
-                let tile_cache = tile_caches.get_mut(&slice_id).unwrap();
-                let mut debug_info = SliceDebugInfo::new();
-                let mut surface_tasks = Vec::with_capacity(tile_cache.tile_count());
-                let mut surface_device_rect = DeviceRect::zero();
-                let device_pixel_scale = frame_state
-                    .surfaces[surface_index.0]
-                    .device_pixel_scale;
-
-                // Get the overall world space rect of the picture cache. Used to clip
-                // the tile rects below for occlusion testing to the relevant area.
-                let world_clip_rect = map_pic_to_world
-                    .map(&tile_cache.local_clip_rect)
-                    .expect("bug: unable to map clip rect");
-                let device_clip_rect = (world_clip_rect * frame_context.global_device_pixel_scale).round();
-
-                for (sub_slice_index, sub_slice) in tile_cache.sub_slices.iter_mut().enumerate() {
-                    for tile in sub_slice.tiles.values_mut() {
-                        surface_device_rect = surface_device_rect.union(&tile.device_valid_rect);
-
-                        if tile.is_visible {
-                            // Get the world space rect that this tile will actually occupy on screem
-                            let device_draw_rect = device_clip_rect.intersection(&tile.device_valid_rect);
-
-                            // If that draw rect is occluded by some set of tiles in front of it,
-                            // then mark it as not visible and skip drawing. When it's not occluded
-                            // it will fail this test, and get rasterized by the render task setup
-                            // code below.
-                            match device_draw_rect {
-                                Some(device_draw_rect) => {
-                                    // Only check for occlusion on visible tiles that are fixed position.
-                                    if tile_cache.spatial_node_index == ROOT_SPATIAL_NODE_INDEX &&
-                                       frame_state.composite_state.occluders.is_tile_occluded(tile.z_id, device_draw_rect) {
-                                        // If this tile has an allocated native surface, free it, since it's completely
-                                        // occluded. We will need to re-allocate this surface if it becomes visible,
-                                        // but that's likely to be rare (e.g. when there is no content display list
-                                        // for a frame or two during a tab switch).
-                                        let surface = tile.surface.as_mut().expect("no tile surface set!");
-
-                                        if let TileSurface::Texture { descriptor: SurfaceTextureDescriptor::Native { id, .. }, .. } = surface {
-                                            if let Some(id) = id.take() {
-                                                frame_state.resource_cache.destroy_compositor_tile(id);
-                                            }
-                                        }
-
-                                        tile.is_visible = false;
-
-                                        if frame_context.fb_config.testing {
-                                            debug_info.tiles.insert(
-                                                tile.tile_offset,
-                                                TileDebugInfo::Occluded,
-                                            );
-                                        }
-
-                                        continue;
-                                    }
-                                }
-                                None => {
-                                    tile.is_visible = false;
-                                }
-                            }
-                        }
-
-                        // If we get here, we want to ensure that the surface remains valid in the texture
-                        // cache, _even if_ it's not visible due to clipping or being scrolled off-screen.
-                        // This ensures that we retain valid tiles that are off-screen, but still in the
-                        // display port of this tile cache instance.
-                        if let Some(TileSurface::Texture { descriptor, .. }) = tile.surface.as_ref() {
-                            if let SurfaceTextureDescriptor::TextureCache { ref handle, .. } = descriptor {
-                                frame_state.resource_cache.texture_cache.request(
-                                    handle,
-                                    frame_state.gpu_cache,
-                                );
-                            }
-                        }
-
-                        // If the tile has been found to be off-screen / clipped, skip any further processing.
-                        if !tile.is_visible {
-                            if frame_context.fb_config.testing {
-                                debug_info.tiles.insert(
-                                    tile.tile_offset,
-                                    TileDebugInfo::Culled,
-                                );
-                            }
-
-                            continue;
-                        }
-
-                        if frame_context.debug_flags.contains(DebugFlags::PICTURE_CACHING_DBG) {
-                            tile.root.draw_debug_rects(
-                                &map_pic_to_world,
-                                tile.is_opaque,
-                                tile.current_descriptor.local_valid_rect,
-                                scratch,
-                                frame_context.global_device_pixel_scale,
-                            );
-
-                            let label_offset = DeviceVector2D::new(
-                                20.0 + sub_slice_index as f32 * 20.0,
-                                30.0 + sub_slice_index as f32 * 20.0,
-                            );
-                            let tile_device_rect = tile.world_tile_rect * frame_context.global_device_pixel_scale;
-                            if tile_device_rect.size.height >= label_offset.y {
-                                let surface = tile.surface.as_ref().expect("no tile surface set!");
-
-                                scratch.push_debug_string(
-                                    tile_device_rect.origin + label_offset,
-                                    debug_colors::RED,
-                                    format!("{:?}: s={} is_opaque={} surface={} sub={}",
-                                            tile.id,
-                                            tile_cache.slice,
-                                            tile.is_opaque,
-                                            surface.kind(),
-                                            sub_slice_index,
-                                    ),
-                                );
-                            }
-                        }
-
-                        if let TileSurface::Texture { descriptor, .. } = tile.surface.as_mut().unwrap() {
-                            match descriptor {
-                                SurfaceTextureDescriptor::TextureCache { ref handle, .. } => {
-                                    // Invalidate if the backing texture was evicted.
-                                    if frame_state.resource_cache.texture_cache.is_allocated(handle) {
-                                        // Request the backing texture so it won't get evicted this frame.
-                                        // We specifically want to mark the tile texture as used, even
-                                        // if it's detected not visible below and skipped. This is because
-                                        // we maintain the set of tiles we care about based on visibility
-                                        // during pre_update. If a tile still exists after that, we are
-                                        // assuming that it's either visible or we want to retain it for
-                                        // a while in case it gets scrolled back onto screen soon.
-                                        // TODO(gw): Consider switching to manual eviction policy?
-                                        frame_state.resource_cache.texture_cache.request(handle, frame_state.gpu_cache);
-                                    } else {
-                                        // If the texture was evicted on a previous frame, we need to assume
-                                        // that the entire tile rect is dirty.
-                                        tile.invalidate(None, InvalidationReason::NoTexture);
-                                    }
-                                }
-                                SurfaceTextureDescriptor::Native { id, .. } => {
-                                    if id.is_none() {
-                                        // There is no current surface allocation, so ensure the entire tile is invalidated
-                                        tile.invalidate(None, InvalidationReason::NoSurface);
-                                    }
-                                }
-                            }
-                        }
-
-                        // Ensure that the dirty rect doesn't extend outside the local valid rect.
-                        tile.local_dirty_rect = tile.local_dirty_rect
-                            .intersection(&tile.current_descriptor.local_valid_rect)
-                            .unwrap_or_else(PictureRect::zero);
-
-                        // Update the world/device dirty rect
-                        let world_dirty_rect = map_pic_to_world.map(&tile.local_dirty_rect).expect("bug");
-
-                        let device_rect = (tile.world_tile_rect * frame_context.global_device_pixel_scale).round();
-                        tile.device_dirty_rect = (world_dirty_rect * frame_context.global_device_pixel_scale)
-                            .round_out()
-                            .intersection(&device_rect)
-                            .unwrap_or_else(DeviceRect::zero);
-
-                        if tile.is_valid {
-                            if frame_context.fb_config.testing {
-                                debug_info.tiles.insert(
-                                    tile.tile_offset,
-                                    TileDebugInfo::Valid,
-                                );
-                            }
-
-                            continue;
-                        }
-
-                        // Add this dirty rect to the dirty region tracker. This must be done outside the if statement below,
-                        // so that we include in the dirty region tiles that are handled by a background color only (no
-                        // surface allocation).
-                        tile_cache.dirty_region.add_dirty_region(
-                            tile.local_dirty_rect,
-                            SubSliceIndex::new(sub_slice_index),
-                            frame_context.spatial_tree,
-                        );
-
-                        // Ensure that this texture is allocated.
-                        if let TileSurface::Texture { ref mut descriptor } = tile.surface.as_mut().unwrap() {
-                            match descriptor {
-                                SurfaceTextureDescriptor::TextureCache { ref mut handle } => {
-                                    if !frame_state.resource_cache.texture_cache.is_allocated(handle) {
-                                        frame_state.resource_cache.texture_cache.update_picture_cache(
-                                            tile_cache.current_tile_size,
-                                            handle,
-                                            frame_state.gpu_cache,
-                                        );
-                                    }
-                                }
-                                SurfaceTextureDescriptor::Native { id } => {
-                                    if id.is_none() {
-                                        // Allocate a native surface id if we're in native compositing mode,
-                                        // and we don't have a surface yet (due to first frame, or destruction
-                                        // due to tile size changing etc).
-                                        if sub_slice.native_surface.is_none() {
-                                            let opaque = frame_state
-                                                .resource_cache
-                                                .create_compositor_surface(
-                                                    tile_cache.virtual_offset,
-                                                    tile_cache.current_tile_size,
-                                                    true,
-                                                );
-
-                                            let alpha = frame_state
-                                                .resource_cache
-                                                .create_compositor_surface(
-                                                    tile_cache.virtual_offset,
-                                                    tile_cache.current_tile_size,
-                                                    false,
-                                                );
-
-                                            sub_slice.native_surface = Some(NativeSurface {
-                                                opaque,
-                                                alpha,
-                                            });
-                                        }
-
-                                        // Create the tile identifier and allocate it.
-                                        let surface_id = if tile.is_opaque {
-                                            sub_slice.native_surface.as_ref().unwrap().opaque
-                                        } else {
-                                            sub_slice.native_surface.as_ref().unwrap().alpha
-                                        };
-
-                                        let tile_id = NativeTileId {
-                                            surface_id,
-                                            x: tile.tile_offset.x,
-                                            y: tile.tile_offset.y,
-                                        };
-
-                                        frame_state.resource_cache.create_compositor_tile(tile_id);
-
-                                        *id = Some(tile_id);
-                                    }
-                                }
-                            }
-
-                            let content_origin_f = tile.world_tile_rect.origin * device_pixel_scale;
-                            let content_origin = content_origin_f.round();
-                            debug_assert!((content_origin_f.x - content_origin.x).abs() < 0.01);
-                            debug_assert!((content_origin_f.y - content_origin.y).abs() < 0.01);
-
-                            let surface = descriptor.resolve(
-                                frame_state.resource_cache,
-                                tile_cache.current_tile_size,
-                            );
-
-                            let scissor_rect = tile.device_dirty_rect
-                                .translate(-device_rect.origin.to_vector())
-                                .round()
-                                .to_i32();
-
-                            let valid_rect = tile.device_valid_rect
-                                .translate(-device_rect.origin.to_vector())
-                                .round()
-                                .to_i32();
-
-                            let task_size = tile_cache.current_tile_size;
-
-                            let batch_filter = BatchFilter {
-                                rect_in_pic_space: tile.local_dirty_rect,
-                                sub_slice_index: SubSliceIndex::new(sub_slice_index),
-                            };
-
-                            let render_task_id = frame_state.rg_builder.add().init(
-                                RenderTask::new(
-                                    RenderTaskLocation::Static {
-                                        surface: StaticRenderTaskSurface::PictureCache {
-                                            surface,
-                                        },
-                                        rect: task_size.into(),
-                                    },
-                                    RenderTaskKind::new_picture(
-                                        task_size,
-                                        tile_cache.current_tile_size.to_f32(),
-                                        pic_index,
-                                        content_origin,
-                                        surface_spatial_node_index,
-                                        device_pixel_scale,
-                                        Some(batch_filter),
-                                        Some(scissor_rect),
-                                        Some(valid_rect),
-                                    )
-                                ),
-                            );
-
-                            surface_tasks.push(render_task_id);
-                        }
-
-                        if frame_context.fb_config.testing {
-                            debug_info.tiles.insert(
-                                tile.tile_offset,
-                                TileDebugInfo::Dirty(DirtyTileDebugInfo {
-                                    local_valid_rect: tile.current_descriptor.local_valid_rect,
-                                    local_dirty_rect: tile.local_dirty_rect,
-                                }),
-                            );
-                        }
-
-                        // If the entire tile valid region is dirty, we can update the fract offset
-                        // at which the tile was rendered.
-                        if tile.device_dirty_rect.contains_rect(&tile.device_valid_rect) {
-                            tile.device_fract_offset = tile_cache.device_fract_offset;
-                        }
-
-                        // Now that the tile is valid, reset the dirty rect.
-                        tile.local_dirty_rect = PictureRect::zero();
-                        tile.is_valid = true;
-                    }
-                }
-
-                // If invalidation debugging is enabled, dump the picture cache state to a tree printer.
-                if frame_context.debug_flags.contains(DebugFlags::INVALIDATION_DBG) {
-                    tile_cache.print();
-                }
-
-                // If testing mode is enabled, write some information about the current state
-                // of this picture cache (made available in RenderResults).
-                if frame_context.fb_config.testing {
-                    frame_state.composite_state
-                        .picture_cache_debug
-                        .slices
-                        .insert(
-                            tile_cache.slice,
-                            debug_info,
-                        );
-                }
-
-                frame_state.init_surface_tiled(
-                    surface_index,
-                    surface_tasks,
-                    surface_device_rect,
-                );
-            }
             Some(ref mut raster_config) => {
                 let pic_rect = self.precise_local_rect.cast_unit();
 
@@ -5220,7 +4874,7 @@ impl PicturePrimitive {
                     pic_rect,
                     &map_pic_to_raster,
                     &map_raster_to_world,
-                    raster_config.clipped_bounding_rect.outer_rect(clip_inflation),
+                    clipped_prim_bounding_rect.outer_rect(clip_inflation),
                     device_pixel_scale,
                 ) {
                     Some(info) => info,
@@ -5243,7 +4897,7 @@ impl PicturePrimitive {
                 /// support.  The on-the-fly scaling can be seen as on-the-fly,
                 /// per-task DPI adjustment.  Logical pixels are unaffected.
                 ///
-                /// The scaling factor is returned to the caller; blur radius,
+                /// The scaling factor is returned to the caller; blur radius, 
                 /// font size, etc. need to be scaled accordingly.
                 fn adjust_scale_for_max_surface_size(
                     raster_config: &RasterConfig,
@@ -5290,17 +4944,12 @@ impl PicturePrimitive {
                     }
                 }
 
-                let primary_render_task_id;
-                match raster_config.composite_mode {
-                    PictureCompositeMode::TileCache { .. } => {
-                        unreachable!("handled above");
-                    }
-                    PictureCompositeMode::Filter(Filter::Blur(width, height)) => {
-                        let width_std_deviation = clamp_blur_radius(width, scale_factors) * device_pixel_scale.0;
-                        let height_std_deviation = clamp_blur_radius(height, scale_factors) * device_pixel_scale.0;
+                let dep_info = match raster_config.composite_mode {
+                    PictureCompositeMode::Filter(Filter::Blur(blur_radius)) => {
+                        let blur_std_deviation = clamp_blur_radius(blur_radius, scale_factors) * device_pixel_scale.0;
                         let mut blur_std_deviation = DeviceSize::new(
-                            width_std_deviation * scale_factors.0,
-                            height_std_deviation * scale_factors.1
+                            blur_std_deviation * scale_factors.0,
+                            blur_std_deviation * scale_factors.1
                         );
                         let mut device_rect = if self.options.inflate_if_required {
                             let inflation_factor = frame_state.surfaces[raster_config.surface_index.0].inflation_factor;
@@ -5327,7 +4976,7 @@ impl PicturePrimitive {
                         // Adjust the size to avoid introducing sampling errors during the down-scaling passes.
                         // what would be even better is to rasterize the picture at the down-scaled size
                         // directly.
-                        device_rect.size = BlurTask::adjusted_blur_source_size(
+                        device_rect.size = RenderTask::adjusted_blur_source_size(
                             device_rect.size,
                             blur_std_deviation,
                         );
@@ -5335,7 +4984,7 @@ impl PicturePrimitive {
                         if let Some(scale) = adjust_scale_for_max_surface_size(
                             raster_config, frame_context.fb_config.max_target_size,
                             pic_rect, &map_pic_to_raster, &map_raster_to_world,
-                            raster_config.clipped_bounding_rect,
+                            clipped_prim_bounding_rect,
                             &mut device_pixel_scale, &mut device_rect, &mut unclipped,
                         ) {
                             blur_std_deviation = blur_std_deviation * scale;
@@ -5343,6 +4992,8 @@ impl PicturePrimitive {
                             raster_config.root_scaling_factor = scale;
                         }
 
+                        let device_rect = device_rect.to_i32();
+
                         let uv_rect_kind = calculate_uv_rect_kind(
                             &pic_rect,
                             &transform,
@@ -5350,43 +5001,32 @@ impl PicturePrimitive {
                             device_pixel_scale,
                         );
 
-                        let task_size = device_rect.size.to_i32();
-
-                        let picture_task_id = frame_state.rg_builder.add().init(
-                            RenderTask::new_dynamic(
-                                task_size,
-                                RenderTaskKind::new_picture(
-                                    task_size,
-                                    unclipped.size,
-                                    pic_index,
-                                    device_rect.origin,
-                                    surface_spatial_node_index,
-                                    device_pixel_scale,
-                                    None,
-                                    None,
-                                    None,
-                                )
-                            ).with_uv_rect_kind(uv_rect_kind)
+                        let picture_task_id = frame_state.render_tasks.add().init(
+                            RenderTask::new_picture(
+                                RenderTaskLocation::Dynamic(None, device_rect.size),
+                                unclipped.size,
+                                pic_index,
+                                device_rect.origin,
+                                uv_rect_kind,
+                                surface_spatial_node_index,
+                                device_pixel_scale,
+                                PrimitiveVisibilityMask::all(),
+                                None,
+                                None,
+                            )
                         );
 
                         let blur_render_task_id = RenderTask::new_blur(
                             blur_std_deviation,
                             picture_task_id,
-                            frame_state.rg_builder,
+                            frame_state.render_tasks,
                             RenderTargetKind::Color,
+                            ClearMode::Transparent,
                             None,
                             original_size.to_i32(),
                         );
 
-                        primary_render_task_id = Some(blur_render_task_id);
-
-                        frame_state.init_surface_chain(
-                            raster_config.surface_index,
-                            blur_render_task_id,
-                            picture_task_id,
-                            parent_surface_index,
-                            device_rect,
-                        );
+                        Some((blur_render_task_id, picture_task_id))
                     }
                     PictureCompositeMode::Filter(Filter::DropShadows(ref shadows)) => {
                         let mut max_std_deviation = 0.0;
@@ -5403,7 +5043,7 @@ impl PicturePrimitive {
                                 .intersection(&unclipped)
                                 .unwrap();
 
-                        device_rect.size = BlurTask::adjusted_blur_source_size(
+                        device_rect.size = RenderTask::adjusted_blur_source_size(
                             device_rect.size,
                             DeviceSize::new(
                                 max_std_deviation * scale_factors.0,
@@ -5414,13 +5054,15 @@ impl PicturePrimitive {
                         if let Some(scale) = adjust_scale_for_max_surface_size(
                             raster_config, frame_context.fb_config.max_target_size,
                             pic_rect, &map_pic_to_raster, &map_raster_to_world,
-                            raster_config.clipped_bounding_rect,
+                            clipped_prim_bounding_rect,
                             &mut device_pixel_scale, &mut device_rect, &mut unclipped,
                         ) {
                             // std_dev adjusts automatically from using device_pixel_scale
                             raster_config.root_scaling_factor = scale;
                         }
 
+                        let device_rect = device_rect.to_i32();
+
                         let uv_rect_kind = calculate_uv_rect_kind(
                             &pic_rect,
                             &transform,
@@ -5428,31 +5070,25 @@ impl PicturePrimitive {
                             device_pixel_scale,
                         );
 
-                        let task_size = device_rect.size.to_i32();
-
-                        let picture_task_id = frame_state.rg_builder.add().init(
-                            RenderTask::new_dynamic(
-                                task_size,
-                                RenderTaskKind::new_picture(
-                                    task_size,
-                                    unclipped.size,
-                                    pic_index,
-                                    device_rect.origin,
-                                    surface_spatial_node_index,
-                                    device_pixel_scale,
-                                    None,
-                                    None,
-                                    None,
-                                ),
-                            ).with_uv_rect_kind(uv_rect_kind)
-                        );
+                        let picture_task_id = frame_state.render_tasks.add().init({
+                            let mut picture_task = RenderTask::new_picture(
+                                RenderTaskLocation::Dynamic(None, device_rect.size),
+                                unclipped.size,
+                                pic_index,
+                                device_rect.origin,
+                                uv_rect_kind,
+                                surface_spatial_node_index,
+                                device_pixel_scale,
+                                PrimitiveVisibilityMask::all(),
+                                None,
+                                None,
+                            );
+                            picture_task.mark_for_saving();
 
-                        // Add this content picture as a dependency of the parent surface, to
-                        // ensure it isn't free'd after the shadow uses it as an input.
-                        frame_state.add_child_render_task(
-                            parent_surface_index,
-                            picture_task_id,
-                        );
+                            picture_task
+                        });
+
+                        self.secondary_render_task_id = Some(picture_task_id);
 
                         let mut blur_tasks = BlurTaskCache::default();
 
@@ -5467,40 +5103,29 @@ impl PicturePrimitive {
                                     blur_radius * scale_factors.1,
                                 ),
                                 picture_task_id,
-                                frame_state.rg_builder,
+                                frame_state.render_tasks,
                                 RenderTargetKind::Color,
+                                ClearMode::Transparent,
                                 Some(&mut blur_tasks),
-                                device_rect.size.to_i32(),
+                                device_rect.size,
                             );
                         }
 
-                        primary_render_task_id = Some(blur_render_task_id);
-                        self.secondary_render_task_id = Some(picture_task_id);
-
-                        frame_state.init_surface_chain(
-                            raster_config.surface_index,
-                            blur_render_task_id,
-                            picture_task_id,
-                            parent_surface_index,
-                            device_rect,
-                        );
+                        // TODO(nical) the second one should to be the blur's task id but we have several blurs now
+                        Some((blur_render_task_id, picture_task_id))
                     }
-                    PictureCompositeMode::MixBlend(mode) if BlendMode::from_mix_blend_mode(
-                        mode,
-                        frame_context.fb_config.gpu_supports_advanced_blend,
-                        frame_context.fb_config.advanced_blend_is_coherent,
-                        frame_context.fb_config.dual_source_blending_is_enabled &&
-                            frame_context.fb_config.dual_source_blending_is_supported,
-                    ).is_none() => {
+                    PictureCompositeMode::MixBlend(..) if !frame_context.fb_config.gpu_supports_advanced_blend => {
                         if let Some(scale) = adjust_scale_for_max_surface_size(
                             raster_config, frame_context.fb_config.max_target_size,
                             pic_rect, &map_pic_to_raster, &map_raster_to_world,
-                            raster_config.clipped_bounding_rect,
+                            clipped_prim_bounding_rect,
                             &mut device_pixel_scale, &mut clipped, &mut unclipped,
                         ) {
                             raster_config.root_scaling_factor = scale;
                         }
 
+                        let clipped = clipped.to_i32();
+
                         let uv_rect_kind = calculate_uv_rect_kind(
                             &pic_rect,
                             &transform,
@@ -5508,116 +5133,47 @@ impl PicturePrimitive {
                             device_pixel_scale,
                         );
 
-                        let parent_surface = &frame_state.surfaces[parent_surface_index.0];
-                        let parent_raster_spatial_node_index = parent_surface.raster_spatial_node_index;
-                        let parent_device_pixel_scale = parent_surface.device_pixel_scale;
-
-                        // Create a space mapper that will allow mapping from the local rect
-                        // of the mix-blend primitive into the space of the surface that we
-                        // need to read back from. Note that we use the parent's raster spatial
-                        // node here, so that we are in the correct device space of the parent
-                        // surface, whether it establishes a raster root or not.
-                        let map_pic_to_parent = SpaceMapper::new_with_target(
-                            parent_raster_spatial_node_index,
-                            self.spatial_node_index,
-                            RasterRect::max_rect(),         // TODO(gw): May need a conservative estimate?
-                            frame_context.spatial_tree,
-                        );
-                        let pic_in_raster_space = map_pic_to_parent
-                            .map(&pic_rect)
-                            .expect("bug: unable to map mix-blend content into parent");
-
-                        // Apply device pixel ratio for parent surface to get into device
-                        // pixels for that surface.
-                        let backdrop_rect = raster_rect_to_device_pixels(
-                            pic_in_raster_space,
-                            parent_device_pixel_scale,
+                        let readback_task_id = frame_state.render_tasks.add().init(
+                            RenderTask::new_readback(clipped)
                         );
 
-                        let parent_surface_rect = parent_surface.get_device_rect();
-
-                        // If there is no available parent surface to read back from (for example, if
-                        // the parent surface is affected by a clip that doesn't affect the child
-                        // surface), then create a dummy 16x16 readback. In future, we could alter
-                        // the composite mode of this primitive to skip the mix-blend, but for simplicity
-                        // we just create a dummy readback for now.
-
-                        let readback_task_id = match backdrop_rect.intersection(&parent_surface_rect) {
-                            Some(available_rect) => {
-                                // Calculate the UV coords necessary for the shader to sampler
-                                // from the primitive rect within the readback region. This is
-                                // 0..1 for aligned surfaces, but doing it this way allows
-                                // accurate sampling if the primitive bounds have fractional values.
-                                let backdrop_uv = calculate_uv_rect_kind(
-                                    &pic_rect,
-                                    &map_pic_to_parent.get_transform(),
-                                    &available_rect,
-                                    parent_device_pixel_scale,
-                                );
-
-                                frame_state.rg_builder.add().init(
-                                    RenderTask::new_dynamic(
-                                        available_rect.size.to_i32(),
-                                        RenderTaskKind::new_readback(Some(available_rect.origin)),
-                                    ).with_uv_rect_kind(backdrop_uv)
-                                )
-                            }
-                            None => {
-                                frame_state.rg_builder.add().init(
-                                    RenderTask::new_dynamic(
-                                        DeviceIntSize::new(16, 16),
-                                        RenderTaskKind::new_readback(None),
-                                    )
-                                )
-                            }
-                        };
-
-                        frame_state.add_child_render_task(
-                            parent_surface_index,
+                        frame_state.render_tasks.add_dependency(
+                            frame_state.surfaces[parent_surface_index.0].render_tasks.unwrap().port,
                             readback_task_id,
                         );
 
                         self.secondary_render_task_id = Some(readback_task_id);
 
-                        let task_size = clipped.size.to_i32();
-
-                        let render_task_id = frame_state.rg_builder.add().init(
-                            RenderTask::new_dynamic(
-                                task_size,
-                                RenderTaskKind::new_picture(
-                                    task_size,
-                                    unclipped.size,
-                                    pic_index,
-                                    clipped.origin,
-                                    surface_spatial_node_index,
-                                    device_pixel_scale,
-                                    None,
-                                    None,
-                                    None,
-                                )
-                            ).with_uv_rect_kind(uv_rect_kind)
+                        let render_task_id = frame_state.render_tasks.add().init(
+                            RenderTask::new_picture(
+                                RenderTaskLocation::Dynamic(None, clipped.size),
+                                unclipped.size,
+                                pic_index,
+                                clipped.origin,
+                                uv_rect_kind,
+                                surface_spatial_node_index,
+                                device_pixel_scale,
+                                PrimitiveVisibilityMask::all(),
+                                None,
+                                None,
+                            )
                         );
 
-                        primary_render_task_id = Some(render_task_id);
-
-                        frame_state.init_surface(
-                            raster_config.surface_index,
-                            render_task_id,
-                            parent_surface_index,
-                            clipped,
-                        );
+                        Some((render_task_id, render_task_id))
                     }
                     PictureCompositeMode::Filter(..) => {
 
                         if let Some(scale) = adjust_scale_for_max_surface_size(
                             raster_config, frame_context.fb_config.max_target_size,
                             pic_rect, &map_pic_to_raster, &map_raster_to_world,
-                            raster_config.clipped_bounding_rect,
+                            clipped_prim_bounding_rect,
                             &mut device_pixel_scale, &mut clipped, &mut unclipped,
                         ) {
                             raster_config.root_scaling_factor = scale;
                         }
 
+                        let clipped = clipped.to_i32();
+
                         let uv_rect_kind = calculate_uv_rect_kind(
                             &pic_rect,
                             &transform,
@@ -5625,44 +5181,35 @@ impl PicturePrimitive {
                             device_pixel_scale,
                         );
 
-                        let task_size = clipped.size.to_i32();
-
-                        let render_task_id = frame_state.rg_builder.add().init(
-                            RenderTask::new_dynamic(
-                                task_size,
-                                RenderTaskKind::new_picture(
-                                    task_size,
-                                    unclipped.size,
-                                    pic_index,
-                                    clipped.origin,
-                                    surface_spatial_node_index,
-                                    device_pixel_scale,
-                                    None,
-                                    None,
-                                    None,
-                                )
-                            ).with_uv_rect_kind(uv_rect_kind)
+                        let render_task_id = frame_state.render_tasks.add().init(
+                            RenderTask::new_picture(
+                                RenderTaskLocation::Dynamic(None, clipped.size),
+                                unclipped.size,
+                                pic_index,
+                                clipped.origin,
+                                uv_rect_kind,
+                                surface_spatial_node_index,
+                                device_pixel_scale,
+                                PrimitiveVisibilityMask::all(),
+                                None,
+                                None,
+                            )
                         );
 
-                        primary_render_task_id = Some(render_task_id);
-
-                        frame_state.init_surface(
-                            raster_config.surface_index,
-                            render_task_id,
-                            parent_surface_index,
-                            clipped,
-                        );
+                        Some((render_task_id, render_task_id))
                     }
                     PictureCompositeMode::ComponentTransferFilter(..) => {
                         if let Some(scale) = adjust_scale_for_max_surface_size(
                             raster_config, frame_context.fb_config.max_target_size,
                             pic_rect, &map_pic_to_raster, &map_raster_to_world,
-                            raster_config.clipped_bounding_rect,
+                            clipped_prim_bounding_rect,
                             &mut device_pixel_scale, &mut clipped, &mut unclipped,
                         ) {
                             raster_config.root_scaling_factor = scale;
                         }
 
+                        let clipped = clipped.to_i32();
+
                         let uv_rect_kind = calculate_uv_rect_kind(
                             &pic_rect,
                             &transform,
@@ -5670,45 +5217,328 @@ impl PicturePrimitive {
                             device_pixel_scale,
                         );
 
-                        let task_size = clipped.size.to_i32();
-
-                        let render_task_id = frame_state.rg_builder.add().init(
-                            RenderTask::new_dynamic(
-                                task_size,
-                                RenderTaskKind::new_picture(
-                                    task_size,
-                                    unclipped.size,
-                                    pic_index,
-                                    clipped.origin,
-                                    surface_spatial_node_index,
-                                    device_pixel_scale,
-                                    None,
-                                    None,
-                                    None,
-                                )
-                            ).with_uv_rect_kind(uv_rect_kind)
+                        let render_task_id = frame_state.render_tasks.add().init(
+                            RenderTask::new_picture(
+                                RenderTaskLocation::Dynamic(None, clipped.size),
+                                unclipped.size,
+                                pic_index,
+                                clipped.origin,
+                                uv_rect_kind,
+                                surface_spatial_node_index,
+                                device_pixel_scale,
+                                PrimitiveVisibilityMask::all(),
+                                None,
+                                None,
+                            )
                         );
 
-                        primary_render_task_id = Some(render_task_id);
+                        Some((render_task_id, render_task_id))
+                    }
+                    PictureCompositeMode::TileCache { .. } => {
+                        let tile_cache = self.tile_cache.as_mut().unwrap();
+                        let mut first = true;
+
+                        // Get the overall world space rect of the picture cache. Used to clip
+                        // the tile rects below for occlusion testing to the relevant area.
+                        let world_clip_rect = map_pic_to_world
+                            .map(&tile_cache.local_clip_rect)
+                            .expect("bug: unable to map clip rect");
+                        let device_clip_rect = (world_clip_rect * frame_context.global_device_pixel_scale).round();
+
+                        for tile in tile_cache.tiles.values_mut() {
+
+                            if tile.is_visible {
+                                // Get the world space rect that this tile will actually occupy on screem
+                                let device_draw_rect = device_clip_rect.intersection(&tile.device_valid_rect);
+
+                                // If that draw rect is occluded by some set of tiles in front of it,
+                                // then mark it as not visible and skip drawing. When it's not occluded
+                                // it will fail this test, and get rasterized by the render task setup
+                                // code below.
+                                match device_draw_rect {
+                                    Some(device_draw_rect) => {
+                                        // Only check for occlusion on visible tiles that are fixed position.
+                                        if tile_cache.spatial_node_index == ROOT_SPATIAL_NODE_INDEX &&
+                                           frame_state.composite_state.is_tile_occluded(tile.z_id, device_draw_rect) {
+                                            // If this tile has an allocated native surface, free it, since it's completely
+                                            // occluded. We will need to re-allocate this surface if it becomes visible,
+                                            // but that's likely to be rare (e.g. when there is no content display list
+                                            // for a frame or two during a tab switch).
+                                            let surface = tile.surface.as_mut().expect("no tile surface set!");
+
+                                            if let TileSurface::Texture { descriptor: SurfaceTextureDescriptor::Native { id, .. }, .. } = surface {
+                                                if let Some(id) = id.take() {
+                                                    frame_state.resource_cache.destroy_compositor_tile(id);
+                                                }
+                                            }
 
-                        frame_state.init_surface(
-                            raster_config.surface_index,
-                            render_task_id,
-                            parent_surface_index,
-                            clipped,
-                        );
+                                            tile.is_visible = false;
+                                            continue;
+                                        }
+                                    }
+                                    None => {
+                                        tile.is_visible = false;
+                                    }
+                                }
+                            }
+
+                            // If we get here, we want to ensure that the surface remains valid in the texture
+                            // cache, _even if_ it's not visible due to clipping or being scrolled off-screen.
+                            // This ensures that we retain valid tiles that are off-screen, but still in the
+                            // display port of this tile cache instance.
+                            if let Some(TileSurface::Texture { descriptor, .. }) = tile.surface.as_ref() {
+                                if let SurfaceTextureDescriptor::TextureCache { ref handle, .. } = descriptor {
+                                    frame_state.resource_cache.texture_cache.request(
+                                        handle,
+                                        frame_state.gpu_cache,
+                                    );
+                                }
+                            }
+
+                            // If the tile has been found to be off-screen / clipped, skip any further processing.
+                            if !tile.is_visible {
+                                continue;
+                            }
+
+                            if frame_context.debug_flags.contains(DebugFlags::PICTURE_CACHING_DBG) {
+                                tile.root.draw_debug_rects(
+                                    &map_pic_to_world,
+                                    tile.is_opaque,
+                                    tile.current_descriptor.local_valid_rect,
+                                    scratch,
+                                    frame_context.global_device_pixel_scale,
+                                );
+
+                                let label_offset = DeviceVector2D::new(20.0, 30.0);
+                                let tile_device_rect = tile.world_tile_rect * frame_context.global_device_pixel_scale;
+                                if tile_device_rect.size.height >= label_offset.y {
+                                    let surface = tile.surface.as_ref().expect("no tile surface set!");
+
+                                    scratch.push_debug_string(
+                                        tile_device_rect.origin + label_offset,
+                                        debug_colors::RED,
+                                        format!("{:?}: s={} is_opaque={} surface={}",
+                                                tile.id,
+                                                tile_cache.slice,
+                                                tile.is_opaque,
+                                                surface.kind(),
+                                        ),
+                                    );
+                                }
+                            }
+
+                            if let TileSurface::Texture { descriptor, .. } = tile.surface.as_mut().unwrap() {
+                                match descriptor {
+                                    SurfaceTextureDescriptor::TextureCache { ref handle, .. } => {
+                                        // Invalidate if the backing texture was evicted.
+                                        if frame_state.resource_cache.texture_cache.is_allocated(handle) {
+                                            // Request the backing texture so it won't get evicted this frame.
+                                            // We specifically want to mark the tile texture as used, even
+                                            // if it's detected not visible below and skipped. This is because
+                                            // we maintain the set of tiles we care about based on visibility
+                                            // during pre_update. If a tile still exists after that, we are
+                                            // assuming that it's either visible or we want to retain it for
+                                            // a while in case it gets scrolled back onto screen soon.
+                                            // TODO(gw): Consider switching to manual eviction policy?
+                                            frame_state.resource_cache.texture_cache.request(handle, frame_state.gpu_cache);
+                                        } else {
+                                            // If the texture was evicted on a previous frame, we need to assume
+                                            // that the entire tile rect is dirty.
+                                            tile.invalidate(None, InvalidationReason::NoTexture);
+                                        }
+                                    }
+                                    SurfaceTextureDescriptor::Native { id, .. } => {
+                                        if id.is_none() {
+                                            // There is no current surface allocation, so ensure the entire tile is invalidated
+                                            tile.invalidate(None, InvalidationReason::NoSurface);
+                                        }
+                                    }
+                                }
+                            }
+
+                            // Ensure that the dirty rect doesn't extend outside the local valid rect.
+                            tile.local_dirty_rect = tile.local_dirty_rect
+                                .intersection(&tile.current_descriptor.local_valid_rect)
+                                .unwrap_or_else(PictureRect::zero);
+
+                            // Update the world/device dirty rect
+                            let world_dirty_rect = map_pic_to_world.map(&tile.local_dirty_rect).expect("bug");
+
+                            let device_rect = (tile.world_tile_rect * frame_context.global_device_pixel_scale).round();
+                            tile.device_dirty_rect = (world_dirty_rect * frame_context.global_device_pixel_scale)
+                                .round_out()
+                                .intersection(&device_rect)
+                                .unwrap_or_else(DeviceRect::zero);
+
+                            if tile.is_valid {
+                                continue;
+                            }
+
+                            // Ensure that this texture is allocated.
+                            if let TileSurface::Texture { ref mut descriptor, ref mut visibility_mask } = tile.surface.as_mut().unwrap() {
+                                match descriptor {
+                                    SurfaceTextureDescriptor::TextureCache { ref mut handle } => {
+                                        if !frame_state.resource_cache.texture_cache.is_allocated(handle) {
+                                            frame_state.resource_cache.texture_cache.update_picture_cache(
+                                                tile_cache.current_tile_size,
+                                                handle,
+                                                frame_state.gpu_cache,
+                                            );
+                                        }
+                                    }
+                                    SurfaceTextureDescriptor::Native { id } => {
+                                        if id.is_none() {
+                                            // Allocate a native surface id if we're in native compositing mode,
+                                            // and we don't have a surface yet (due to first frame, or destruction
+                                            // due to tile size changing etc).
+                                            if tile_cache.native_surface.is_none() {
+                                                let opaque = frame_state
+                                                    .resource_cache
+                                                    .create_compositor_surface(
+                                                        tile_cache.virtual_offset,
+                                                        tile_cache.current_tile_size,
+                                                        true,
+                                                    );
+
+                                                let alpha = frame_state
+                                                    .resource_cache
+                                                    .create_compositor_surface(
+                                                        tile_cache.virtual_offset,
+                                                        tile_cache.current_tile_size,
+                                                        false,
+                                                    );
+
+                                                tile_cache.native_surface = Some(NativeSurface {
+                                                    opaque,
+                                                    alpha,
+                                                });
+                                            }
+
+                                            // Create the tile identifier and allocate it.
+                                            let surface_id = if tile.is_opaque {
+                                                tile_cache.native_surface.as_ref().unwrap().opaque
+                                            } else {
+                                                tile_cache.native_surface.as_ref().unwrap().alpha
+                                            };
+
+                                            let tile_id = NativeTileId {
+                                                surface_id,
+                                                x: tile.tile_offset.x,
+                                                y: tile.tile_offset.y,
+                                            };
+
+                                            frame_state.resource_cache.create_compositor_tile(tile_id);
+
+                                            *id = Some(tile_id);
+                                        }
+                                    }
+                                }
+
+                                *visibility_mask = PrimitiveVisibilityMask::empty();
+                                let dirty_region_index = tile_cache.dirty_region.dirty_rects.len();
+
+                                // If we run out of dirty regions, then force the last dirty region to
+                                // be a union of any remaining regions. This is an inefficiency, in that
+                                // we'll add items to batches later on that are redundant / outside this
+                                // tile, but it's really rare except in pathological cases (even on a
+                                // 4k screen, the typical dirty region count is < 16).
+                                if dirty_region_index < PrimitiveVisibilityMask::MAX_DIRTY_REGIONS {
+                                    visibility_mask.set_visible(dirty_region_index);
+
+                                    tile_cache.dirty_region.push(
+                                        world_dirty_rect,
+                                        *visibility_mask,
+                                    );
+                                } else {
+                                    visibility_mask.set_visible(PrimitiveVisibilityMask::MAX_DIRTY_REGIONS - 1);
+
+                                    tile_cache.dirty_region.include_rect(
+                                        PrimitiveVisibilityMask::MAX_DIRTY_REGIONS - 1,
+                                        world_dirty_rect,
+                                    );
+                                }
+
+                                let content_origin_f = tile.world_tile_rect.origin * device_pixel_scale;
+                                let content_origin = content_origin_f.round();
+                                debug_assert!((content_origin_f.x - content_origin.x).abs() < 0.01);
+                                debug_assert!((content_origin_f.y - content_origin.y).abs() < 0.01);
+
+                                let surface = descriptor.resolve(
+                                    frame_state.resource_cache,
+                                    tile_cache.current_tile_size,
+                                );
+
+                                let scissor_rect = tile.device_dirty_rect
+                                    .translate(-device_rect.origin.to_vector())
+                                    .round()
+                                    .to_i32();
+
+                                let valid_rect = tile.device_valid_rect
+                                    .translate(-device_rect.origin.to_vector())
+                                    .round()
+                                    .to_i32();
+
+                                let render_task_id = frame_state.render_tasks.add().init(
+                                    RenderTask::new_picture(
+                                        RenderTaskLocation::PictureCache {
+                                            size: tile_cache.current_tile_size,
+                                            surface,
+                                        },
+                                        tile_cache.current_tile_size.to_f32(),
+                                        pic_index,
+                                        content_origin.to_i32(),
+                                        UvRectKind::Rect,
+                                        surface_spatial_node_index,
+                                        device_pixel_scale,
+                                        *visibility_mask,
+                                        Some(scissor_rect),
+                                        Some(valid_rect),
+                                    )
+                                );
+
+                                frame_state.render_tasks.add_dependency(
+                                    frame_state.surfaces[parent_surface_index.0].render_tasks.unwrap().port,
+                                    render_task_id,
+                                );
+
+                                if first {
+                                    // TODO(gw): Maybe we can restructure this code to avoid the
+                                    //           first hack here. Or at least explain it with a follow up
+                                    //           bug.
+                                    frame_state.surfaces[raster_config.surface_index.0].render_tasks = Some(SurfaceRenderTasks {
+                                        root: render_task_id,
+                                        port: render_task_id,
+                                    });
+
+                                    first = false;
+                                }
+                            }
+
+                            // Now that the tile is valid, reset the dirty rect.
+                            tile.local_dirty_rect = PictureRect::zero();
+                            tile.is_valid = true;
+                        }
+
+                        // If invalidation debugging is enabled, dump the picture cache state to a tree printer.
+                        if frame_context.debug_flags.contains(DebugFlags::INVALIDATION_DBG) {
+                            tile_cache.print();
+                        }
+
+                        None
                     }
                     PictureCompositeMode::MixBlend(..) |
                     PictureCompositeMode::Blit(_) => {
                         if let Some(scale) = adjust_scale_for_max_surface_size(
                             raster_config, frame_context.fb_config.max_target_size,
                             pic_rect, &map_pic_to_raster, &map_raster_to_world,
-                            raster_config.clipped_bounding_rect,
+                            clipped_prim_bounding_rect,
                             &mut device_pixel_scale, &mut clipped, &mut unclipped,
                         ) {
                             raster_config.root_scaling_factor = scale;
                         }
 
+                        let clipped = clipped.to_i32();
+
                         let uv_rect_kind = calculate_uv_rect_kind(
                             &pic_rect,
                             &transform,
@@ -5716,45 +5546,36 @@ impl PicturePrimitive {
                             device_pixel_scale,
                         );
 
-                        let task_size = clipped.size.to_i32();
-
-                        let render_task_id = frame_state.rg_builder.add().init(
-                            RenderTask::new_dynamic(
-                                task_size,
-                                RenderTaskKind::new_picture(
-                                    task_size,
-                                    unclipped.size,
-                                    pic_index,
-                                    clipped.origin,
-                                    surface_spatial_node_index,
-                                    device_pixel_scale,
-                                    None,
-                                    None,
-                                    None,
-                                )
-                            ).with_uv_rect_kind(uv_rect_kind)
+                        let render_task_id = frame_state.render_tasks.add().init(
+                            RenderTask::new_picture(
+                                RenderTaskLocation::Dynamic(None, clipped.size),
+                                unclipped.size,
+                                pic_index,
+                                clipped.origin,
+                                uv_rect_kind,
+                                surface_spatial_node_index,
+                                device_pixel_scale,
+                                PrimitiveVisibilityMask::all(),
+                                None,
+                                None,
+                            )
                         );
 
-                        primary_render_task_id = Some(render_task_id);
-
-                        frame_state.init_surface(
-                            raster_config.surface_index,
-                            render_task_id,
-                            parent_surface_index,
-                            clipped,
-                        );
+                        Some((render_task_id, render_task_id))
                     }
                     PictureCompositeMode::SvgFilter(ref primitives, ref filter_datas) => {
 
                         if let Some(scale) = adjust_scale_for_max_surface_size(
                             raster_config, frame_context.fb_config.max_target_size,
                             pic_rect, &map_pic_to_raster, &map_raster_to_world,
-                            raster_config.clipped_bounding_rect,
+                            clipped_prim_bounding_rect,
                             &mut device_pixel_scale, &mut clipped, &mut unclipped,
                         ) {
                             raster_config.root_scaling_factor = scale;
                         }
 
+                        let clipped = clipped.to_i32();
+
                         let uv_rect_kind = calculate_uv_rect_kind(
                             &pic_rect,
                             &transform,
@@ -5762,87 +5583,75 @@ impl PicturePrimitive {
                             device_pixel_scale,
                         );
 
-                        let task_size = clipped.size.to_i32();
-
-                        let picture_task_id = frame_state.rg_builder.add().init(
-                            RenderTask::new_dynamic(
-                                task_size,
-                                RenderTaskKind::new_picture(
-                                    task_size,
-                                    unclipped.size,
-                                    pic_index,
-                                    clipped.origin,
-                                    surface_spatial_node_index,
-                                    device_pixel_scale,
-                                    None,
-                                    None,
-                                    None,
-                                )
-                            ).with_uv_rect_kind(uv_rect_kind)
+                        let picture_task_id = frame_state.render_tasks.add().init(
+                            RenderTask::new_picture(
+                                RenderTaskLocation::Dynamic(None, clipped.size),
+                                unclipped.size,
+                                pic_index,
+                                clipped.origin,
+                                uv_rect_kind,
+                                surface_spatial_node_index,
+                                device_pixel_scale,
+                                PrimitiveVisibilityMask::all(),
+                                None,
+                                None,
+                            )
                         );
 
                         let filter_task_id = RenderTask::new_svg_filter(
                             primitives,
                             filter_datas,
-                            frame_state.rg_builder,
-                            clipped.size.to_i32(),
+                            &mut frame_state.render_tasks,
+                            clipped.size,
                             uv_rect_kind,
                             picture_task_id,
                             device_pixel_scale,
                         );
 
-                        primary_render_task_id = Some(filter_task_id);
-
-                        frame_state.init_surface_chain(
-                            raster_config.surface_index,
-                            filter_task_id,
-                            picture_task_id,
-                            parent_surface_index,
-                            clipped,
-                        );
+                        Some((filter_task_id, picture_task_id))
                     }
-                }
+                };
 
-                self.primary_render_task_id = primary_render_task_id;
+                if let Some((root, port)) = dep_info {
+                    frame_state.surfaces[raster_config.surface_index.0].render_tasks = Some(SurfaceRenderTasks {
+                        root,
+                        port,
+                    });
 
-                // Update the device pixel ratio in the surface, in case it was adjusted due
-                // to the surface being too large. This ensures the correct scale is available
-                // in case it's used as input to a parent mix-blend-mode readback.
-                frame_state
-                    .surfaces[raster_config.surface_index.0]
-                    .device_pixel_scale = device_pixel_scale;
+                    frame_state.render_tasks.add_dependency(
+                        frame_state.surfaces[parent_surface_index.0].render_tasks.unwrap().port,
+                        root,
+                    );
+                }
             }
             None => {}
         };
 
-
         #[cfg(feature = "capture")]
         {
             if frame_context.debug_flags.contains(DebugFlags::TILE_CACHE_LOGGING_DBG) {
-                if let Some(PictureCompositeMode::TileCache { slice_id }) = self.requested_composite_mode {
-                    if let Some(ref tile_cache) = tile_caches.get(&slice_id) {
-                        // extract just the fields that we're interested in
-                        let mut tile_cache_tiny = TileCacheInstanceSerializer {
-                            slice: tile_cache.slice,
-                            tiles: FastHashMap::default(),
-                            background_color: tile_cache.background_color,
-                            fract_offset: tile_cache.fract_offset
-                        };
-                        // TODO(gw): Debug output only writes the primary sub-slice for now
-                        for (key, tile) in &tile_cache.sub_slices.first().unwrap().tiles {
-                            tile_cache_tiny.tiles.insert(*key, TileSerializer {
-                                rect: tile.local_tile_rect,
-                                current_descriptor: tile.current_descriptor.clone(),
-                                device_fract_offset: tile.device_fract_offset,
-                                id: tile.id,
-                                root: tile.root.clone(),
-                                background_color: tile.background_color,
-                                invalidation_reason: tile.invalidation_reason.clone()
-                            });
-                        }
-                        let text = ron::ser::to_string_pretty(&tile_cache_tiny, Default::default()).unwrap();
-                        tile_cache_logger.add(text, map_pic_to_world.get_transform());
+                if let Some(ref tile_cache) = self.tile_cache
+                {
+                    // extract just the fields that we're interested in
+                    let mut tile_cache_tiny = TileCacheInstanceSerializer {
+                        slice: tile_cache.slice,
+                        tiles: FastHashMap::default(),
+                        background_color: tile_cache.background_color,
+                        fract_offset: tile_cache.fract_offset
+                    };
+                    for (key, tile) in &tile_cache.tiles {
+                        tile_cache_tiny.tiles.insert(*key, TileSerializer {
+                            rect: tile.local_tile_rect,
+                            current_descriptor: tile.current_descriptor.clone(),
+                            fract_offset: tile.fract_offset,
+                            id: tile.id,
+                            root: tile.root.clone(),
+                            background_color: tile.background_color,
+                            invalidation_reason: tile.invalidation_reason.clone()
+                        });
                     }
+                    let text = ron::ser::to_string_pretty(&tile_cache_tiny, Default::default()).unwrap();
+                    tile_cache_logger.add(text, map_pic_to_world.get_transform());
                 }
             }
         }
@@ -5864,28 +5673,25 @@ impl PicturePrimitive {
 
         // If this is a picture cache, push the dirty region to ensure any
         // child primitives are culled and clipped to the dirty rect(s).
-        if let Some(RasterConfig { composite_mode: PictureCompositeMode::TileCache { slice_id }, .. }) = self.raster_config {
-            let dirty_region = tile_caches[&slice_id].dirty_region.clone();
+        if let Some(RasterConfig { composite_mode: PictureCompositeMode::TileCache { .. }, .. }) = self.raster_config {
+            let dirty_region = self.tile_cache.as_ref().unwrap().dirty_region.clone();
             frame_state.push_dirty_region(dirty_region);
             dirty_region_count += 1;
         }
 
         if inflation_factor > 0.0 {
-            let inflated_region = frame_state.current_dirty_region().inflate(
-                inflation_factor,
-                frame_context.spatial_tree,
-            );
+            let inflated_region = frame_state.current_dirty_region().inflate(inflation_factor);
             frame_state.push_dirty_region(inflated_region);
             dirty_region_count += 1;
         }
 
         // Disallow subpixel AA if an intermediate surface is needed.
         // TODO(lsalzman): allow overriding parent if intermediate surface is opaque
-        let subpixel_mode = match self.raster_config {
+        let (is_passthrough, subpixel_mode) = match self.raster_config {
             Some(RasterConfig { ref composite_mode, .. }) => {
                 let subpixel_mode = match composite_mode {
-                    PictureCompositeMode::TileCache { slice_id } => {
-                        tile_caches[&slice_id].subpixel_mode
+                    PictureCompositeMode::TileCache { .. } => {
+                        self.tile_cache.as_ref().unwrap().subpixel_mode.clone()
                     }
                     PictureCompositeMode::Blit(..) |
                     PictureCompositeMode::ComponentTransferFilter(..) |
@@ -5900,10 +5706,10 @@ impl PicturePrimitive {
                     }
                 };
 
-                subpixel_mode
+                (false, subpixel_mode)
             }
             None => {
-                SubpixelMode::Allow
+                (true, SubpixelMode::Allow)
             }
         };
 
@@ -5913,16 +5719,18 @@ impl PicturePrimitive {
                 // Both parent and this surface unconditionally allow subpixel AA
                 SubpixelMode::Allow
             }
-            (SubpixelMode::Allow, SubpixelMode::Conditional { allowed_rect }) => {
+            (SubpixelMode::Allow, SubpixelMode::Conditional { allowed_rect, excluded_rects }) => {
                 // Parent allows, but we are conditional subpixel AA
                 SubpixelMode::Conditional {
                     allowed_rect,
+                    excluded_rects,
                 }
             }
-            (SubpixelMode::Conditional { allowed_rect }, SubpixelMode::Allow) => {
+            (SubpixelMode::Conditional { allowed_rect, excluded_rects }, SubpixelMode::Allow) => {
                 // Propagate conditional subpixel mode to child pictures that allow subpixel AA
                 SubpixelMode::Conditional {
-                    allowed_rect,
+                    allowed_rect: *allowed_rect,
+                    excluded_rects: excluded_rects.clone(),
                 }
             }
             (SubpixelMode::Conditional { .. }, SubpixelMode::Conditional { ..}) => {
@@ -5937,6 +5745,7 @@ impl PicturePrimitive {
         let context = PictureContext {
             pic_index,
             apply_local_clip_rect: self.apply_local_clip_rect,
+            is_passthrough,
             raster_spatial_node_index,
             surface_spatial_node_index,
             surface_index,
@@ -5951,6 +5760,7 @@ impl PicturePrimitive {
 
     pub fn restore_context(
         &mut self,
+        parent_surface_index: SurfaceIndex,
         prim_list: PrimitiveList,
         context: PictureContext,
         state: PictureState,
@@ -5961,6 +5771,9 @@ impl PicturePrimitive {
             frame_state.pop_dirty_region();
         }
 
+        let task_id = frame_state.surfaces[parent_surface_index.0].render_tasks.unwrap().port;
+        self.num_render_tasks = frame_state.render_tasks[task_id].children.len();
+
         self.prim_list = prim_list;
         self.state = Some(state);
     }
@@ -6053,9 +5866,7 @@ impl PicturePrimitive {
 
         // Process the accumulated split planes and order them for rendering.
         // Z axis is directed at the screen, `sort` is ascending, and we need back-to-front order.
-        let sorted = splitter.sort(vec3(0.0, 0.0, 1.0));
-        ordered.reserve(sorted.len());
-        for poly in sorted {
+        for poly in splitter.sort(vec3(0.0, 0.0, 1.0)) {
             let cluster = &self.prim_list.clusters[poly.anchor.cluster_index];
             let spatial_node_index = cluster.spatial_node_index;
             let transform = match spatial_tree
@@ -6129,9 +5940,25 @@ impl PicturePrimitive {
             }
         }
 
+        // Push information about this pic on stack for children to read.
+        state.push_picture(PictureInfo {
+            _spatial_node_index: self.spatial_node_index,
+        });
+
         // See if this picture actually needs a surface for compositing.
-        // TODO(gw): FPC: Remove the actual / requested composite mode distinction.
-        let actual_composite_mode = self.requested_composite_mode.clone();
+        let actual_composite_mode = match self.requested_composite_mode {
+            Some(PictureCompositeMode::Filter(ref filter)) if filter.is_noop() => None,
+            Some(PictureCompositeMode::TileCache { .. }) => {
+                // Only allow picture caching composite mode if global picture caching setting
+                // is enabled this frame.
+                if state.composite_state.picture_caching_is_enabled {
+                    Some(PictureCompositeMode::TileCache { })
+                } else {
+                    None
+                }
+            },
+            ref mode => mode.clone(),
+        };
 
         if let Some(composite_mode) = actual_composite_mode {
             // Retrieve the positioning node information for the parent surface.
@@ -6139,29 +5966,19 @@ impl PicturePrimitive {
             let parent_device_pixel_scale = state.current_surface().device_pixel_scale;
             let surface_spatial_node_index = self.spatial_node_index;
 
+            // Filters must be applied before transforms, to do this, we can mark this picture as establishing a raster root.
+            let has_svg_filter = if let PictureCompositeMode::SvgFilter(..) = composite_mode {
+                true
+            } else {
+                false
+            };
+
             let surface_to_parent_transform = frame_context.spatial_tree
                 .get_relative_transform(surface_spatial_node_index, parent_raster_node_index);
 
             // Check if there is perspective or if an SVG filter is applied, and thus whether a new
             // rasterization root should be established.
-            let establishes_raster_root = match composite_mode {
-                PictureCompositeMode::TileCache { .. } => {
-                    // Picture caches are special cased - they never need to establish a raster root. In future,
-                    // we will probably remove TileCache as a specific composite mode.
-                    false
-                }
-                PictureCompositeMode::SvgFilter(..) => {
-                    // Filters must be applied before transforms, to do this, we can mark this picture as establishing a raster root.
-                    true
-                }
-                PictureCompositeMode::MixBlend(..) |
-                PictureCompositeMode::Filter(..) |
-                PictureCompositeMode::ComponentTransferFilter(..) |
-                PictureCompositeMode::Blit(..) => {
-                    // TODO(gw): As follow ups, individually move each of these composite modes to create raster roots.
-                    surface_to_parent_transform.is_perspective()
-                }
-            };
+            let establishes_raster_root = has_svg_filter || surface_to_parent_transform.is_perspective();
 
             let (raster_spatial_node_index, device_pixel_scale) = if establishes_raster_root {
                 // If a raster root is established, this surface should be scaled based on the scale factors of the surface raster to parent raster transform.
@@ -6188,8 +6005,8 @@ impl PicturePrimitive {
             let mut inflation_factor = 0.0;
             if self.options.inflate_if_required {
                 match composite_mode {
-                    PictureCompositeMode::Filter(Filter::Blur(width, height)) => {
-                        let blur_radius = f32::max(clamp_blur_radius(width, scale_factors), clamp_blur_radius(height, scale_factors));
+                    PictureCompositeMode::Filter(Filter::Blur(blur_radius)) => {
+                        let blur_radius = clamp_blur_radius(blur_radius, scale_factors);
                         // The amount of extra space needed for primitives inside
                         // this picture to ensure the visibility check is correct.
                         inflation_factor = blur_radius * BLUR_SAMPLE_SCALE;
@@ -6198,8 +6015,7 @@ impl PicturePrimitive {
                         let mut max = 0.0;
                         for primitive in primitives {
                             if let FilterPrimitiveKind::Blur(ref blur) = primitive.kind {
-                                max = f32::max(max, blur.width);
-                                max = f32::max(max, blur.height);
+                                max = f32::max(max, blur.radius);
                             }
                         }
                         inflation_factor = clamp_blur_radius(max, scale_factors) * BLUR_SAMPLE_SCALE;
@@ -6237,7 +6053,6 @@ impl PicturePrimitive {
                 establishes_raster_root,
                 surface_index: state.push_surface(surface),
                 root_scaling_factor: 1.0,
-                clipped_bounding_rect: WorldRect::zero(),
             });
         }
 
@@ -6256,7 +6071,8 @@ impl PicturePrimitive {
         // Restore the pictures list used during recursion.
         self.prim_list = prim_list;
 
-        let surface = state.current_surface_mut();
+        // Pop the state information about this picture.
+        state.pop_picture();
 
         for cluster in &mut self.prim_list.clusters {
             cluster.flags.remove(ClusterFlags::IS_VISIBLE);
@@ -6266,14 +6082,12 @@ impl PicturePrimitive {
                 // For in-preserve-3d primitives and pictures, the backface visibility is
                 // evaluated relative to the containing block.
                 if let Picture3DContext::In { ancestor_index, .. } = self.context_3d {
-                    let mut face = VisibleFace::Front;
-                    frame_context.spatial_tree.get_relative_transform_with_face(
-                        cluster.spatial_node_index,
-                        ancestor_index,
-                        Some(&mut face),
-                    );
-                    if face == VisibleFace::Back {
-                        continue
+                    match frame_context.spatial_tree
+                        .get_relative_transform(cluster.spatial_node_index, ancestor_index)
+                        .visible_face()
+                    {
+                        VisibleFace::Back => continue,
+                        VisibleFace::Front => (),
                     }
                 }
             }
@@ -6296,7 +6110,7 @@ impl PicturePrimitive {
                     frame_context.spatial_tree,
                 );
 
-                for prim_instance in &mut self.prim_list.prim_instances[cluster.prim_range()] {
+                for prim_instance in &mut cluster.prim_instances {
                     match prim_instance.kind {
                         PrimitiveInstanceKind::Backdrop { data_handle, .. } => {
                             // The actual size and clip rect of this primitive are determined by computing the bounding
@@ -6326,7 +6140,7 @@ impl PicturePrimitive {
                             // frame building is usually problematic since scene building will cache
                             // the primitive information in the GPU already.
                             prim_data.common.prim_rect = prim_rect;
-                            prim_instance.clip_set.local_clip_rect = prim_rect;
+                            prim_instance.local_clip_rect = prim_rect;
 
                             // Update the cluster bounding rect now that we have the backdrop rect.
                             cluster.bounding_rect = cluster.bounding_rect.union(&prim_rect);
@@ -6340,13 +6154,16 @@ impl PicturePrimitive {
 
             // Map the cluster bounding rect into the space of the surface, and
             // include it in the surface bounding rect.
+            let surface = state.current_surface_mut();
             surface.map_local_to_surface.set_target_spatial_node(
                 cluster.spatial_node_index,
                 frame_context.spatial_tree,
             );
 
             // Mark the cluster visible, since it passed the invertible and
-            // backface checks.
+            // backface checks. In future, this will include spatial clustering
+            // which will allow the frame building code to skip most of the
+            // current per-primitive culling code.
             cluster.flags.insert(ClusterFlags::IS_VISIBLE);
             if let Some(cluster_rect) = surface.map_local_to_surface.map(&cluster.bounding_rect) {
                 surface.rect = surface.rect.union(&cluster_rect);
@@ -6359,8 +6176,23 @@ impl PicturePrimitive {
         if let Some(ref mut raster_config) = self.raster_config {
             let surface = state.current_surface_mut();
             // Inflate the local bounding rect if required by the filter effect.
+            // This inflaction factor is to be applied to the surface itself.
             if self.options.inflate_if_required {
                 surface.rect = raster_config.composite_mode.inflate_picture_rect(surface.rect, surface.scale_factors);
+
+                // The picture's local rect is calculated as the union of the
+                // snapped primitive rects, which should result in a snapped
+                // local rect, unless it was inflated. This is also done during
+                // update visibility when calculating the picture's precise
+                // local rect.
+                let snap_surface_to_raster = SpaceSnapper::new_with_target(
+                    surface.raster_spatial_node_index,
+                    self.spatial_node_index,
+                    surface.device_pixel_scale,
+                    frame_context.spatial_tree,
+                );
+
+                surface.rect = snap_surface_to_raster.snap_rect(&surface.rect);
             }
 
             let mut surface_rect = surface.rect * Scale::new(1.0);
@@ -6369,6 +6201,16 @@ impl PicturePrimitive {
             let surface_index = state.pop_surface();
             debug_assert_eq!(surface_index, raster_config.surface_index);
 
+            // Check if any of the surfaces can't be rasterized in local space but want to.
+            if raster_config.establishes_raster_root
+                && (surface_rect.size.width > MAX_SURFACE_SIZE
+                    || surface_rect.size.height > MAX_SURFACE_SIZE)
+                && frame_context.debug_flags.contains(DebugFlags::DISABLE_RASTER_ROOT_SCALING)
+            {
+                raster_config.establishes_raster_root = false;
+                state.are_raster_roots_assigned = false;
+            }
+
             // Set the estimated and precise local rects. The precise local rect
             // may be changed again during frame visibility.
             self.estimated_local_rect = surface_rect;
@@ -6459,6 +6301,7 @@ impl PicturePrimitive {
                     }
                 }
             }
+            PictureCompositeMode::MixBlend(..) if !frame_context.fb_config.gpu_supports_advanced_blend => {}
             PictureCompositeMode::Filter(ref filter) => {
                 match *filter {
                     Filter::ColorMatrix(ref m) => {
@@ -6517,9 +6360,11 @@ fn calculate_screen_uv(
 fn calculate_uv_rect_kind(
     pic_rect: &PictureRect,
     transform: &PictureToRasterTransform,
-    rendered_rect: &DeviceRect,
+    rendered_rect: &DeviceIntRect,
     device_pixel_scale: DevicePixelScale,
 ) -> UvRectKind {
+    let rendered_rect = rendered_rect.to_f32();
+
     let top_left = calculate_screen_uv(
         &pic_rect.origin,
         transform,
@@ -7338,40 +7183,3 @@ impl CompositeState {
         }
     }
 }
-
-pub fn get_raster_rects(
-    pic_rect: PictureRect,
-    map_to_raster: &SpaceMapper<PicturePixel, RasterPixel>,
-    map_to_world: &SpaceMapper<RasterPixel, WorldPixel>,
-    prim_bounding_rect: WorldRect,
-    device_pixel_scale: DevicePixelScale,
-) -> Option<(DeviceRect, DeviceRect)> {
-    let unclipped_raster_rect = map_to_raster.map(&pic_rect)?;
-
-    let unclipped = raster_rect_to_device_pixels(
-        unclipped_raster_rect,
-        device_pixel_scale,
-    );
-
-    let unclipped_world_rect = map_to_world.map(&unclipped_raster_rect)?;
-    let clipped_world_rect = unclipped_world_rect.intersection(&prim_bounding_rect)?;
-
-    // We don't have to be able to do the back-projection from world into raster.
-    // Rendering only cares one way, so if that fails, we fall back to the full rect.
-    let clipped_raster_rect = match map_to_world.unmap(&clipped_world_rect) {
-        Some(rect) => rect.intersection(&unclipped_raster_rect)?,
-        None => return Some((unclipped, unclipped)),
-    };
-
-    let clipped = raster_rect_to_device_pixels(
-        clipped_raster_rect,
-        device_pixel_scale,
-    );
-
-    // Ensure that we won't try to allocate a zero-sized clip render task.
-    if clipped.is_empty() {
-        return None;
-    }
-
-    Some((clipped, unclipped))
-}
diff --git a/third_party/webrender/webrender/src/platform/macos/font.rs b/third_party/webrender/webrender/src/platform/macos/font.rs
index 919e3a0086d..437522d5e9e 100644
--- a/third_party/webrender/webrender/src/platform/macos/font.rs
+++ b/third_party/webrender/webrender/src/platform/macos/font.rs
@@ -4,7 +4,7 @@
 
 use api::{ColorU, FontKey, FontRenderMode, FontSize, GlyphDimensions};
 use api::{FontInstanceFlags, FontVariation, NativeFontHandle};
-use core_foundation::{array::{CFArray, CFArrayRef}, data::CFData};
+use core_foundation::array::{CFArray, CFArrayRef};
 use core_foundation::base::TCFType;
 use core_foundation::dictionary::CFDictionary;
 use core_foundation::number::{CFNumber, CFNumberRef};
@@ -14,12 +14,12 @@ use core_graphics::base::{kCGBitmapByteOrder32Little};
 use core_graphics::color_space::CGColorSpace;
 use core_graphics::context::CGContext;
 use core_graphics::context::{CGBlendMode, CGTextDrawingMode};
+use core_graphics::data_provider::CGDataProvider;
 use core_graphics::font::{CGFont, CGGlyph};
 use core_graphics::geometry::{CGAffineTransform, CGPoint, CGSize};
 use core_graphics::geometry::{CG_AFFINE_TRANSFORM_IDENTITY, CGRect};
-use core_text::{self, font_descriptor::CTFontDescriptorCreateCopyWithAttributes};
+use core_text;
 use core_text::font::{CTFont, CTFontRef};
-use core_text::font_descriptor::{CTFontDescriptor, CTFontSymbolicTraits};
 use core_text::font_descriptor::{kCTFontDefaultOrientation, kCTFontColorGlyphsTrait};
 use euclid::default::Size2D;
 use crate::gamma_lut::{ColorLut, GammaLut};
@@ -31,21 +31,9 @@ use std::sync::Arc;
 
 const INITIAL_CG_CONTEXT_SIDE_LENGTH: u32 = 32;
 
-// We prefer to create CTFonts from a CTFontDescriptor, but that doesn't work in the case
-// of hidden system fonts on recent macOS versions, so for those we will instead use a
-// native CGFont as the basis.
-enum DescOrFont {
-    Desc(CTFontDescriptor),
-    Font(CGFont),
-}
-
 pub struct FontContext {
-    desc_or_fonts: FastHashMap<FontKey, DescOrFont>,
-    // Table mapping a sized font key with variations to its instantiated CoreText font.
-    // We also cache the symbolic traits for the given CT font when it is instantiated.
-    // This avoids an expensive bottleneck accessing the symbolic traits every time we
-    // need to rasterize a glyph or access its dimensions.
-    ct_fonts: FastHashMap<(FontKey, FontSize, Vec<FontVariation>), (CTFont, CTFontSymbolicTraits)>,
+    cg_fonts: FastHashMap<FontKey, CGFont>,
+    ct_fonts: FastHashMap<(FontKey, FontSize, Vec<FontVariation>), CTFont>,
     #[allow(dead_code)]
     graphics_context: GraphicsContext,
     #[allow(dead_code)]
@@ -66,75 +54,29 @@ struct GlyphMetrics {
     advance: f32,
 }
 
-// There are a number of different OS prefs that control whether or not
-// requesting font smoothing actually results in subpixel AA. This gets even
-// murkier in newer macOS versions that deprecate subpixel AA, with the prefs
-// potentially interacting and overriding each other. In an attempt to future-
-// proof things against any new prefs or interpretation of those prefs in
-// future macOS versions, we do a check here to request font smoothing and see
-// what result it actually gives us much like Skia does. We need to check for
-// each of three potential results and process them in the font backend in
-// distinct ways:
-// 1) subpixel AA (differing RGB channels) with dilation
-// 2) grayscale AA (matching RGB channels) with dilation, a compatibility mode
-// 3) grayscale AA without dilation as if font smoothing was not requested
-// We can discern between case 1 and the rest by checking if the subpixels differ.
-// We can discern between cases 2 and 3 by rendering with and without smoothing
-// and comparing the two to determine if there was some dilation.
-// This returns the actual FontRenderMode needed to support each case, if any.
-fn determine_font_smoothing_mode() -> Option<FontRenderMode> {
-    let mut smooth_context = CGContext::create_bitmap_context(
-        None,
-        12,
-        12,
-        8,
-        12 * 4,
-        &CGColorSpace::create_device_rgb(),
-        kCGImageAlphaNoneSkipFirst | kCGBitmapByteOrder32Little,
-    );
-    smooth_context.set_should_smooth_fonts(true);
-    smooth_context.set_should_antialias(true);
-    smooth_context.set_rgb_fill_color(1.0, 1.0, 1.0, 1.0);
-    let mut gray_context = CGContext::create_bitmap_context(
+// According to the Skia source code, there's no public API to
+// determine if subpixel AA is supported. So jrmuizel ported
+// this function from Skia which is used to check if a glyph
+// can be rendered with subpixel AA.
+fn supports_subpixel_aa() -> bool {
+    let mut cg_context = CGContext::create_bitmap_context(
         None,
-        12,
-        12,
+        1,
+        1,
         8,
-        12 * 4,
+        4,
         &CGColorSpace::create_device_rgb(),
         kCGImageAlphaNoneSkipFirst | kCGBitmapByteOrder32Little,
     );
-    gray_context.set_should_smooth_fonts(false);
-    gray_context.set_should_antialias(true);
-    gray_context.set_rgb_fill_color(1.0, 1.0, 1.0, 1.0);
-
-    // Autorelease pool for CTFont
-    objc::rc::autoreleasepool(|| {
-        // Lucida Grande 12 is the default fallback font in Firefox
-        let ct_font = core_text::font::new_from_name("Lucida Grande", 12.).unwrap();
-        let point = CGPoint { x: 0., y: 0. };
-        let glyph = 'X' as CGGlyph;
-        ct_font.draw_glyphs(&[glyph], &[point], smooth_context.clone());
-        ct_font.draw_glyphs(&[glyph], &[point], gray_context.clone());
-    });
-
-    let mut mode = None;
-    for (smooth, gray) in smooth_context.data().chunks(4).zip(gray_context.data().chunks(4)) {
-        if smooth[0] != smooth[1] || smooth[1] != smooth[2] {
-            return Some(FontRenderMode::Subpixel);
-        }
-        if smooth[0] != gray[0] || smooth[1] != gray[1] || smooth[2] != gray[2] {
-            mode = Some(FontRenderMode::Alpha);
-        }
-    }
-    return mode;
-}
-
-// We cache the font smoothing mode globally, rather than storing it in each FontContext,
-// to avoid having to determine this redundantly in each context and to avoid needing to
-// lock them to access this setting in prepare_font.
-lazy_static! {
-    static ref FONT_SMOOTHING_MODE: Option<FontRenderMode> = determine_font_smoothing_mode();
+    let ct_font = core_text::font::new_from_name("Helvetica", 16.).unwrap();
+    cg_context.set_should_smooth_fonts(true);
+    cg_context.set_should_antialias(true);
+    cg_context.set_rgb_fill_color(1.0, 1.0, 1.0, 1.0);
+    let point = CGPoint { x: -1., y: 0. };
+    let glyph = '|' as CGGlyph;
+    ct_font.draw_glyphs(&[glyph], &[point], cg_context.clone());
+    let data = cg_context.data();
+    data[0] != data[1] || data[1] != data[2]
 }
 
 fn should_use_white_on_black(color: ColorU) -> bool {
@@ -223,17 +165,13 @@ extern {
     static kCTFontVariationAxisMinimumValueKey: CFStringRef;
     static kCTFontVariationAxisMaximumValueKey: CFStringRef;
     static kCTFontVariationAxisDefaultValueKey: CFStringRef;
-    static kCTFontVariationAttribute: CFStringRef;
 
     fn CTFontCopyVariationAxes(font: CTFontRef) -> CFArrayRef;
 }
 
-fn new_ct_font_with_variations(desc_or_font: &DescOrFont, size: f64, variations: &[FontVariation]) -> CTFont {
+fn new_ct_font_with_variations(cg_font: &CGFont, size: f64, variations: &[FontVariation]) -> CTFont {
     unsafe {
-        let ct_font = match desc_or_font {
-            DescOrFont::Desc(ct_font_desc) => core_text::font::new_from_descriptor(ct_font_desc, size),
-            DescOrFont::Font(cg_font) => core_text::font::new_from_CGFont(cg_font, size)
-        };
+        let ct_font = core_text::font::new_from_CGFont(cg_font, size);
         if variations.is_empty() {
             return ct_font;
         }
@@ -242,12 +180,7 @@ fn new_ct_font_with_variations(desc_or_font: &DescOrFont, size: f64, variations:
             return ct_font;
         }
         let axes: CFArray<CFDictionary> = TCFType::wrap_under_create_rule(axes_ref);
-        // We collect the values with either number or string keys, depending whether
-        // we're going to instantiate the CTFont from a descriptor or a CGFont.
-        // It'd probably be better to switch the CGFont-related APIs to expect numbers,
-        // but that's left for a future cleanup.
-        let mut vals: Vec<(CFNumber, CFNumber)> = Vec::with_capacity(variations.len() as usize);
-        let mut vals_str: Vec<(CFString, CFNumber)> = Vec::with_capacity(variations.len() as usize);
+        let mut vals: Vec<(CFString, CFNumber)> = Vec::with_capacity(variations.len() as usize);
         for axis in axes.iter() {
             if !axis.instance_of::<CFDictionary>() {
                 return ct_font;
@@ -320,48 +253,33 @@ fn new_ct_font_with_variations(desc_or_font: &DescOrFont, size: f64, variations:
 
             val = val.max(min_val).min(max_val);
             if val != def_val {
-                match desc_or_font {
-                    DescOrFont::Font(_) => vals_str.push((name, CFNumber::from(val))),
-                    DescOrFont::Desc(_) => vals.push((CFNumber::from(tag_val), CFNumber::from(val))),
-                }
+                vals.push((name, CFNumber::from(val)));
             }
         }
-        match desc_or_font {
-            DescOrFont::Desc(ct_font_desc) => {
-                if vals.is_empty() {
-                    return ct_font;
-                }
-                let vals_dict = CFDictionary::from_CFType_pairs(&vals);
-                let attrs_dict = CFDictionary::from_CFType_pairs(&[(CFString::wrap_under_get_rule(kCTFontVariationAttribute), vals_dict)]);
-                let ct_var_font_desc = create_copy_with_attributes(ct_font_desc, attrs_dict.to_untyped()).unwrap();
-                core_text::font::new_from_descriptor(&ct_var_font_desc, size)
-            }
-            DescOrFont::Font(cg_font) => {
-                if vals_str.is_empty() {
-                    return ct_font;
-                }
-                let vals_dict = CFDictionary::from_CFType_pairs(&vals_str);
-                let cg_var_font = cg_font.create_copy_from_variations(&vals_dict).unwrap();
-                core_text::font::new_from_CGFont_with_variations(&cg_var_font, size, &vals_dict)
-            }
+        if vals.is_empty() {
+            return ct_font;
         }
+        let vals_dict = CFDictionary::from_CFType_pairs(&vals);
+        let cg_var_font = cg_font.create_copy_from_variations(&vals_dict).unwrap();
+        core_text::font::new_from_CGFont_with_variations(&cg_var_font, size, &vals_dict)
     }
 }
 
-fn is_bitmap_font(traits: CTFontSymbolicTraits) -> bool {
+fn is_bitmap_font(ct_font: &CTFont) -> bool {
+    let traits = ct_font.symbolic_traits();
     (traits & kCTFontColorGlyphsTrait) != 0
 }
 
 impl FontContext {
     pub fn new() -> Result<FontContext, ResourceCacheError> {
-        debug!("Test for subpixel AA support: {:?}", *FONT_SMOOTHING_MODE);
+        debug!("Test for subpixel AA support: {}", supports_subpixel_aa());
 
         // Force CG to use sRGB color space to gamma correct.
         let contrast = 0.0;
         let gamma = 0.0;
 
         Ok(FontContext {
-            desc_or_fonts: FastHashMap::default(),
+            cg_fonts: FastHashMap::default(),
             ct_fonts: FastHashMap::default(),
             graphics_context: GraphicsContext::new(),
             gamma_lut: GammaLut::new(contrast, gamma, gamma),
@@ -369,47 +287,34 @@ impl FontContext {
     }
 
     pub fn has_font(&self, font_key: &FontKey) -> bool {
-        self.desc_or_fonts.contains_key(font_key)
+        self.cg_fonts.contains_key(font_key)
     }
 
     pub fn add_raw_font(&mut self, font_key: &FontKey, bytes: Arc<Vec<u8>>, index: u32) {
-        if self.desc_or_fonts.contains_key(font_key) {
+        if self.cg_fonts.contains_key(font_key) {
             return;
         }
 
         assert_eq!(index, 0);
-        let data = CFData_wrapping_arc_vec(bytes);
-        let ct_font_desc = match create_font_descriptor(data) {
+        let data_provider = CGDataProvider::from_buffer(bytes);
+        let cg_font = match CGFont::from_data_provider(data_provider) {
             Err(_) => return,
-            Ok(desc) => desc,
+            Ok(cg_font) => cg_font,
         };
-        self.desc_or_fonts.insert(*font_key, DescOrFont::Desc(ct_font_desc));
+        self.cg_fonts.insert(*font_key, cg_font);
     }
 
     pub fn add_native_font(&mut self, font_key: &FontKey, native_font_handle: NativeFontHandle) {
-        if self.desc_or_fonts.contains_key(font_key) {
+        if self.cg_fonts.contains_key(font_key) {
             return;
         }
 
-        // there's no way great way to go from a CGFont to a CTFontDescriptor
-        // so we use the postscript name. Ideally NativeFontHandle would
-        // just use a CTFontDescriptor.
-        let name = native_font_handle.0.postscript_name();
-        // For "hidden" system fonts, whose names start with a period,
-        // we can't instantiate CTFonts via a descriptor. We're really
-        // supposed to use CTFontCreateUIFontForLanguage, but for now
-        // we just use the CGFont.
-        let desc_or_font = if name.to_string().starts_with('.') {
-            DescOrFont::Font(native_font_handle.0)
-        } else {
-            DescOrFont::Desc(core_text::font_descriptor::new_from_postscript_name(&name))
-        };
-
-        self.desc_or_fonts.insert(*font_key, desc_or_font);
+        self.cg_fonts
+            .insert(*font_key, native_font_handle.0);
     }
 
     pub fn delete_font(&mut self, font_key: &FontKey) {
-        if let Some(_) = self.desc_or_fonts.remove(font_key) {
+        if let Some(_) = self.cg_fonts.remove(font_key) {
             self.ct_fonts.retain(|k, _| k.0 != *font_key);
         }
     }
@@ -425,20 +330,16 @@ impl FontContext {
         font_key: FontKey,
         size: f64,
         variations: &[FontVariation],
-    ) -> Option<(CTFont, CTFontSymbolicTraits)> {
-        // Interacting with CoreText can create autorelease garbage.
-        objc::rc::autoreleasepool(|| {
-            match self.ct_fonts.entry((font_key, FontSize::from_f64_px(size), variations.to_vec())) {
-                Entry::Occupied(entry) => Some((*entry.get()).clone()),
-                Entry::Vacant(entry) => {
-                    let desc_or_font = self.desc_or_fonts.get(&font_key)?;
-                    let ct_font = new_ct_font_with_variations(desc_or_font, size, variations);
-                    let traits = ct_font.symbolic_traits();
-                    entry.insert((ct_font.clone(), traits));
-                    Some((ct_font, traits))
-                }
+    ) -> Option<CTFont> {
+        match self.ct_fonts.entry((font_key, FontSize::from_f64_px(size), variations.to_vec())) {
+            Entry::Occupied(entry) => Some((*entry.get()).clone()),
+            Entry::Vacant(entry) => {
+                let cg_font = self.cg_fonts.get(&font_key)?;
+                let ct_font = new_ct_font_with_variations(cg_font, size, variations);
+                entry.insert(ct_font.clone());
+                Some(ct_font)
             }
-        })
+        }
     }
 
     pub fn get_glyph_index(&mut self, font_key: FontKey, ch: char) -> Option<u32> {
@@ -446,7 +347,7 @@ impl FontContext {
         let mut glyph = 0;
 
         self.get_ct_font(font_key, 16.0, &[])
-            .and_then(|(ct_font, _)| {
+            .and_then(|ref ct_font| {
                 unsafe {
                     let result = ct_font.get_glyphs_for_characters(&character, &mut glyph, 1);
 
@@ -467,9 +368,9 @@ impl FontContext {
         let (x_scale, y_scale) = font.transform.compute_scale().unwrap_or((1.0, 1.0));
         let size = font.size.to_f64_px() * y_scale;
         self.get_ct_font(font.font_key, size, &font.variations)
-            .and_then(|(ct_font, traits)| {
+            .and_then(|ref ct_font| {
                 let glyph = key.index() as CGGlyph;
-                let bitmap = is_bitmap_font(traits);
+                let bitmap = is_bitmap_font(ct_font);
                 let (mut shape, (x_offset, y_offset)) = if bitmap {
                     (FontTransform::identity(), (0.0, 0.0))
                 } else {
@@ -510,7 +411,7 @@ impl FontContext {
                 };
                 let extra_strikes = font.get_extra_strikes(strike_scale);
                 let metrics = get_glyph_metrics(
-                    &ct_font,
+                    ct_font,
                     transform.as_ref(),
                     glyph,
                     x_offset,
@@ -569,22 +470,6 @@ impl FontContext {
     }
 
     pub fn prepare_font(font: &mut FontInstance) {
-        // Sanitize the render mode for font smoothing. If font smoothing is supported,
-        // then we just need to ensure the render mode is limited to what is supported.
-        // If font smoothing is actually disabled, then we need to fall back to grayscale.
-        if font.flags.contains(FontInstanceFlags::FONT_SMOOTHING) ||
-            font.render_mode == FontRenderMode::Subpixel {
-            match *FONT_SMOOTHING_MODE {
-                Some(mode) => {
-                    font.render_mode = font.render_mode.limit_by(mode);
-                    font.flags.insert(FontInstanceFlags::FONT_SMOOTHING);
-                }
-                None => {
-                    font.render_mode = font.render_mode.limit_by(FontRenderMode::Alpha);
-                    font.flags.remove(FontInstanceFlags::FONT_SMOOTHING);
-                }
-            }
-        }
         match font.render_mode {
             FontRenderMode::Mono => {
                 // In mono mode the color of the font is irrelevant.
@@ -616,12 +501,10 @@ impl FontContext {
     }
 
     pub fn rasterize_glyph(&mut self, font: &FontInstance, key: &GlyphKey) -> GlyphRasterResult {
-        objc::rc::autoreleasepool(|| {
         let (x_scale, y_scale) = font.transform.compute_scale().unwrap_or((1.0, 1.0));
         let size = font.size.to_f64_px() * y_scale;
-        let (ct_font, traits) =
-            self.get_ct_font(font.font_key, size, &font.variations).ok_or(GlyphRasterError::LoadFailed)?;
-        let glyph_type = if is_bitmap_font(traits) {
+        let ct_font = self.get_ct_font(font.font_key, size, &font.variations).ok_or(GlyphRasterError::LoadFailed)?;
+        let glyph_type = if is_bitmap_font(&ct_font) {
             GlyphType::Bitmap
         } else {
             GlyphType::Vector
@@ -860,7 +743,7 @@ impl FontContext {
                 GlyphType::Vector => font.get_glyph_format(),
             },
             bytes: rasterized_pixels,
-        })})
+        })
     }
 }
 
@@ -969,72 +852,3 @@ enum GlyphType {
     Vector,
     Bitmap,
 }
-
-// This stuff should eventually migrate to upstream core-foundation
-#[allow(non_snake_case)]
-fn CFData_wrapping_arc_vec(buffer: Arc<Vec<u8>>) -> CFData {
-    use core_foundation::base::*;
-    use core_foundation::data::CFDataRef;
-    use std::os::raw::c_void;
-
-    extern "C" {
-        pub fn CFDataCreateWithBytesNoCopy(
-            allocator: CFAllocatorRef,
-            bytes: *const u8,
-            length: CFIndex,
-            allocator: CFAllocatorRef,
-        ) -> CFDataRef;
-    }
-    unsafe {
-        let ptr = (*buffer).as_ptr() as *const _;
-        let len = buffer.len().to_CFIndex();
-        let info = Arc::into_raw(buffer) as *mut c_void;
-
-        extern "C" fn deallocate(_: *mut c_void, info: *mut c_void) {
-            unsafe {
-                drop(Arc::from_raw(info as *mut Vec<u8>));
-            }
-        }
-
-        // CFAllocatorContext doesn't have nullable members so we transmute
-        let allocator = CFAllocator::new(CFAllocatorContext {
-            info: info,
-            version: 0,
-            retain: None,
-            reallocate: None,
-            release: None,
-            copyDescription: None,
-            allocate: None,
-            deallocate: Some(deallocate),
-            preferredSize: None,
-        });
-        let data_ref =
-            CFDataCreateWithBytesNoCopy(kCFAllocatorDefault, ptr, len, allocator.as_CFTypeRef());
-        TCFType::wrap_under_create_rule(data_ref)
-    }
-}
-
-fn create_font_descriptor(cf_data: CFData) -> Result<CTFontDescriptor, ()> {
-    use core_text::font_descriptor::CTFontDescriptorRef;
-    use core_foundation::data::CFDataRef;
-    extern {
-        pub fn CTFontManagerCreateFontDescriptorFromData(data: CFDataRef) -> CTFontDescriptorRef;
-    }
-    unsafe {
-        let ct_font_descriptor_ref = CTFontManagerCreateFontDescriptorFromData(cf_data.as_concrete_TypeRef());
-        if ct_font_descriptor_ref.is_null() {
-            return Err(());
-        }
-        Ok(CTFontDescriptor::wrap_under_create_rule(ct_font_descriptor_ref))
-    }
-}
-
-fn create_copy_with_attributes(desc: &CTFontDescriptor, attr: CFDictionary) -> Result<CTFontDescriptor, ()> {
-    unsafe {
-    let ct_font_descriptor_ref = CTFontDescriptorCreateCopyWithAttributes(desc.as_concrete_TypeRef(), attr.as_concrete_TypeRef());
-    if ct_font_descriptor_ref.is_null() {
-        return Err(());
-    }
-    Ok(CTFontDescriptor::wrap_under_create_rule(ct_font_descriptor_ref))
-}
-}
diff --git a/third_party/webrender/webrender/src/platform/unix/font.rs b/third_party/webrender/webrender/src/platform/unix/font.rs
index aa02e9460a9..52c0d114101 100644
--- a/third_party/webrender/webrender/src/platform/unix/font.rs
+++ b/third_party/webrender/webrender/src/platform/unix/font.rs
@@ -104,7 +104,6 @@ macro_rules! ft_dyn_fn {
 ft_dyn_fn!(FT_Get_MM_Var(face: FT_Face, desc: *mut *mut FT_MM_Var) -> FT_Error);
 ft_dyn_fn!(FT_Done_MM_Var(library: FT_Library, desc: *mut FT_MM_Var) -> FT_Error);
 ft_dyn_fn!(FT_Set_Var_Design_Coordinates(face: FT_Face, num_vals: FT_UInt, vals: *mut FT_Fixed) -> FT_Error);
-ft_dyn_fn!(FT_Get_Var_Design_Coordinates(face: FT_Face, num_vals: FT_UInt, vals: *mut FT_Fixed) -> FT_Error);
 
 extern "C" {
     fn FT_GlyphSlot_Embolden(slot: FT_GlyphSlot);
@@ -336,9 +335,9 @@ impl FontContext {
             })
         } else {
             // TODO(gw): Provide detailed error values.
-            // Once this panic has been here for a while with no issues we should get rid of
-            // ResourceCacheError as this was the only place that could fail previously.
-            panic!("Failed to initialize FreeType - {}", result)
+            Err(ResourceCacheError::new(
+                format!("Failed to initialize FreeType - {}", result)
+            ))
         }
     }
 
@@ -398,19 +397,6 @@ impl FontContext {
                     let mm_var = normal_face.mm_var;
                     let num_axis = (*mm_var).num_axis;
                     let mut coords: Vec<FT_Fixed> = Vec::with_capacity(num_axis as usize);
-
-                    // Calling this before FT_Set_Var_Design_Coordinates avoids a bug with font variations
-                    // not initialized properly in the font face, even if we ignore the result.
-                    // See bug 1647035.
-                    let mut tmp = [0; 16];
-                    let res = FT_Get_Var_Design_Coordinates(
-                        normal_face.face,
-                        num_axis.min(16),
-                        tmp.as_mut_ptr()
-                    );
-                    debug_assert!(succeeded(res));
-
-
                     for i in 0 .. num_axis {
                         let axis = (*mm_var).axis.offset(i as isize);
                         let mut value = (*axis).def;
@@ -424,8 +410,7 @@ impl FontContext {
                         }
                         coords.push(value);
                     }
-                    let res = FT_Set_Var_Design_Coordinates(var_face, num_axis, coords.as_mut_ptr());
-                    debug_assert!(succeeded(res));
+                    FT_Set_Var_Design_Coordinates(var_face, num_axis, coords.as_mut_ptr());
                 }
                 entry.insert(VariationFace(var_face));
                 Some(var_face)
diff --git a/third_party/webrender/webrender/src/prepare.rs b/third_party/webrender/webrender/src/prepare.rs
deleted file mode 100644
index c4cfcc982c7..00000000000
--- a/third_party/webrender/webrender/src/prepare.rs
+++ /dev/null
@@ -1,1606 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-//! # Prepare pass
-//!
-//! TODO: document this!
-
-use std::cmp;
-use api::{PremultipliedColorF, PropertyBinding};
-use api::{BoxShadowClipMode, BorderStyle, ClipMode};
-use api::units::*;
-use euclid::Scale;
-use smallvec::SmallVec;
-use crate::image_tiling::{self, Repetition};
-use crate::border::{get_max_scale_for_border, build_border_instances};
-use crate::clip::{ClipStore};
-use crate::spatial_tree::{SpatialNodeIndex, SpatialTree};
-use crate::clip::{ClipDataStore, ClipNodeFlags, ClipChainInstance, ClipItemKind};
-use crate::frame_builder::{FrameBuildingContext, FrameBuildingState, PictureContext, PictureState};
-use crate::gpu_cache::{GpuCacheHandle, GpuDataRequest};
-use crate::gpu_types::{BrushFlags};
-use crate::internal_types::{FastHashMap, PlaneSplitAnchor};
-use crate::picture::{PicturePrimitive, SliceId, TileCacheLogger, ClusterFlags, SurfaceRenderTasks};
-use crate::picture::{PrimitiveList, PrimitiveCluster, SurfaceIndex, TileCacheInstance, SubpixelMode};
-use crate::prim_store::line_dec::MAX_LINE_DECORATION_RESOLUTION;
-use crate::prim_store::*;
-use crate::render_backend::DataStores;
-use crate::render_task_graph::RenderTaskId;
-use crate::render_task_cache::RenderTaskCacheKeyKind;
-use crate::render_task_cache::{RenderTaskCacheKey, to_cache_size, RenderTaskParent};
-use crate::render_task::{RenderTaskKind, RenderTask};
-use crate::segment::SegmentBuilder;
-use crate::space::SpaceMapper;
-use crate::util::{clamp_to_scale_factor, pack_as_float, raster_rect_to_device_pixels};
-use crate::visibility::{compute_conservative_visible_rect, PrimitiveVisibility, VisibilityState};
-
-
-const MAX_MASK_SIZE: f32 = 4096.0;
-
-const MIN_BRUSH_SPLIT_AREA: f32 = 128.0 * 128.0;
-
-
-pub fn prepare_primitives(
-    store: &mut PrimitiveStore,
-    prim_list: &mut PrimitiveList,
-    pic_context: &PictureContext,
-    pic_state: &mut PictureState,
-    frame_context: &FrameBuildingContext,
-    frame_state: &mut FrameBuildingState,
-    data_stores: &mut DataStores,
-    scratch: &mut PrimitiveScratchBuffer,
-    tile_cache_log: &mut TileCacheLogger,
-    tile_caches: &mut FastHashMap<SliceId, Box<TileCacheInstance>>,
-) {
-    profile_scope!("prepare_primitives");
-    for (cluster_index, cluster) in prim_list.clusters.iter_mut().enumerate() {
-        if !cluster.flags.contains(ClusterFlags::IS_VISIBLE) {
-            continue;
-        }
-        profile_scope!("cluster");
-        pic_state.map_local_to_pic.set_target_spatial_node(
-            cluster.spatial_node_index,
-            frame_context.spatial_tree,
-        );
-
-        frame_state.surfaces[pic_context.surface_index.0].opaque_rect = PictureRect::zero();
-
-        for (idx, prim_instance) in (&mut prim_list.prim_instances[cluster.prim_range()]).iter_mut().enumerate() {
-            let prim_instance_index = cluster.prim_range.start + idx;
-
-            // First check for coarse visibility (if this primitive was completely off-screen)
-            match prim_instance.vis.state {
-                VisibilityState::Unset => {
-                    panic!("bug: invalid vis state");
-                }
-                VisibilityState::Culled => {
-                    continue;
-                }
-                VisibilityState::Coarse { ref filter, vis_flags } => {
-                    // The original coarse state was calculated during the initial visibility pass.
-                    // However, it's possible that the dirty rect has got smaller, if tiles were not
-                    // dirty. Intersecting with the dirty rect here eliminates preparing any primitives
-                    // outside the dirty rect, and reduces the size of any off-screen surface allocations
-                    // for clip masks / render tasks that we make.
-
-                    // Clear the current visibiilty mask, and build a more detailed one based on the dirty rect
-                    // regions below.
-                    let dirty_region = frame_state.current_dirty_region();
-                    let is_in_dirty_region = dirty_region.filters
-                        .iter()
-                        .any(|region_filter| region_filter.matches(filter));
-
-                    if is_in_dirty_region {
-                        prim_instance.vis.state = VisibilityState::Detailed {
-                            filter: *filter,
-                            vis_flags,
-                        }
-                    } else {
-                        prim_instance.clear_visibility();
-                        continue;
-                    }
-                }
-                VisibilityState::Detailed { .. } => {
-                    // Was already set to detailed (picture caching disabled or a root element)
-                }
-                VisibilityState::PassThrough => {}
-            }
-
-            let plane_split_anchor = PlaneSplitAnchor::new(cluster_index, prim_instance_index);
-
-            if prepare_prim_for_render(
-                store,
-                prim_instance,
-                cluster,
-                pic_context,
-                pic_state,
-                frame_context,
-                frame_state,
-                plane_split_anchor,
-                data_stores,
-                scratch,
-                tile_cache_log,
-                tile_caches,
-            ) {
-                frame_state.num_visible_primitives += 1;
-            } else {
-                prim_instance.clear_visibility();
-            }
-        }
-
-        if !cluster.opaque_rect.is_empty() {
-            let surface = &mut frame_state.surfaces[pic_context.surface_index.0];
-
-            if let Some(cluster_opaque_rect) = surface.map_local_to_surface.map_inner_bounds(&cluster.opaque_rect) {
-                surface.opaque_rect = crate::util::conservative_union_rect(&surface.opaque_rect, &cluster_opaque_rect);
-            }
-        }
-    }
-}
-
-fn prepare_prim_for_render(
-    store: &mut PrimitiveStore,
-    prim_instance: &mut PrimitiveInstance,
-    cluster: &mut PrimitiveCluster,
-    pic_context: &PictureContext,
-    pic_state: &mut PictureState,
-    frame_context: &FrameBuildingContext,
-    frame_state: &mut FrameBuildingState,
-    plane_split_anchor: PlaneSplitAnchor,
-    data_stores: &mut DataStores,
-    scratch: &mut PrimitiveScratchBuffer,
-    tile_cache_log: &mut TileCacheLogger,
-    tile_caches: &mut FastHashMap<SliceId, Box<TileCacheInstance>>,
-) -> bool {
-    profile_scope!("prepare_prim_for_render");
-
-    // If we have dependencies, we need to prepare them first, in order
-    // to know the actual rect of this primitive.
-    // For example, scrolling may affect the location of an item in
-    // local space, which may force us to render this item on a larger
-    // picture target, if being composited.
-    if let PrimitiveInstanceKind::Picture { pic_index, .. } = prim_instance.kind {
-        let pic = &mut store.pictures[pic_index.0];
-
-        match pic.take_context(
-            pic_index,
-            pic_context.surface_spatial_node_index,
-            pic_context.raster_spatial_node_index,
-            pic_context.surface_index,
-            pic_context.subpixel_mode,
-            frame_state,
-            frame_context,
-            scratch,
-            tile_cache_log,
-            tile_caches,
-        ) {
-            Some((pic_context_for_children, mut pic_state_for_children, mut prim_list)) => {
-                prepare_primitives(
-                    store,
-                    &mut prim_list,
-                    &pic_context_for_children,
-                    &mut pic_state_for_children,
-                    frame_context,
-                    frame_state,
-                    data_stores,
-                    scratch,
-                    tile_cache_log,
-                    tile_caches,
-                );
-
-                // Restore the dependencies (borrow check dance)
-                store.pictures[pic_context_for_children.pic_index.0]
-                    .restore_context(
-                        prim_list,
-                        pic_context_for_children,
-                        pic_state_for_children,
-                        frame_state,
-                    );
-            }
-            None => {
-                if prim_instance.is_chased() {
-                    println!("\tculled for carrying an invisible composite filter");
-                }
-
-                return false;
-            }
-        }
-    }
-
-    let prim_rect = data_stores.get_local_prim_rect(
-        prim_instance,
-        store,
-    );
-
-    if !update_clip_task(
-        prim_instance,
-        &prim_rect.origin,
-        cluster.spatial_node_index,
-        pic_context.raster_spatial_node_index,
-        pic_context,
-        pic_state,
-        frame_context,
-        frame_state,
-        store,
-        data_stores,
-        scratch,
-    ) {
-        if prim_instance.is_chased() {
-            println!("\tconsidered invisible");
-        }
-        return false;
-    }
-
-    if prim_instance.is_chased() {
-        println!("\tconsidered visible and ready with local pos {:?}", prim_rect.origin);
-    }
-
-    #[cfg(debug_assertions)]
-    {
-        prim_instance.prepared_frame_id = frame_state.rg_builder.frame_id();
-    }
-
-    prepare_interned_prim_for_render(
-        store,
-        prim_instance,
-        cluster,
-        plane_split_anchor,
-        pic_context,
-        pic_state,
-        frame_context,
-        frame_state,
-        data_stores,
-        scratch,
-    );
-
-    true
-}
-
-/// Prepare an interned primitive for rendering, by requesting
-/// resources, render tasks etc. This is equivalent to the
-/// prepare_prim_for_render_inner call for old style primitives.
-fn prepare_interned_prim_for_render(
-    store: &mut PrimitiveStore,
-    prim_instance: &mut PrimitiveInstance,
-    cluster: &mut PrimitiveCluster,
-    plane_split_anchor: PlaneSplitAnchor,
-    pic_context: &PictureContext,
-    pic_state: &mut PictureState,
-    frame_context: &FrameBuildingContext,
-    frame_state: &mut FrameBuildingState,
-    data_stores: &mut DataStores,
-    scratch: &mut PrimitiveScratchBuffer,
-) {
-    let prim_spatial_node_index = cluster.spatial_node_index;
-    let is_chased = prim_instance.is_chased();
-    let device_pixel_scale = frame_state.surfaces[pic_context.surface_index.0].device_pixel_scale;
-    let mut is_opaque = false;
-
-    match &mut prim_instance.kind {
-        PrimitiveInstanceKind::LineDecoration { data_handle, ref mut render_task, .. } => {
-            profile_scope!("LineDecoration");
-            let prim_data = &mut data_stores.line_decoration[*data_handle];
-            let common_data = &mut prim_data.common;
-            let line_dec_data = &mut prim_data.kind;
-
-            // Update the template this instane references, which may refresh the GPU
-            // cache with any shared template data.
-            line_dec_data.update(common_data, frame_state);
-
-            // Work out the device pixel size to be used to cache this line decoration.
-            if is_chased {
-                println!("\tline decoration key={:?}", line_dec_data.cache_key);
-            }
-
-            // If we have a cache key, it's a wavy / dashed / dotted line. Otherwise, it's
-            // a simple solid line.
-            if let Some(cache_key) = line_dec_data.cache_key.as_ref() {
-                // TODO(gw): Do we ever need / want to support scales for text decorations
-                //           based on the current transform?
-                let scale_factor = Scale::new(1.0) * device_pixel_scale;
-                let mut task_size = (LayoutSize::from_au(cache_key.size) * scale_factor).ceil().to_i32();
-                if task_size.width > MAX_LINE_DECORATION_RESOLUTION as i32 ||
-                   task_size.height > MAX_LINE_DECORATION_RESOLUTION as i32 {
-                     let max_extent = cmp::max(task_size.width, task_size.height);
-                     let task_scale_factor = Scale::new(MAX_LINE_DECORATION_RESOLUTION as f32 / max_extent as f32);
-                     task_size = (LayoutSize::from_au(cache_key.size) * scale_factor * task_scale_factor)
-                                    .ceil().to_i32();
-                }
-
-                // Request a pre-rendered image task.
-                // TODO(gw): This match is a bit untidy, but it should disappear completely
-                //           once the prepare_prims and batching are unified. When that
-                //           happens, we can use the cache handle immediately, and not need
-                //           to temporarily store it in the primitive instance.
-                *render_task = Some(frame_state.resource_cache.request_render_task(
-                    RenderTaskCacheKey {
-                        size: task_size,
-                        kind: RenderTaskCacheKeyKind::LineDecoration(cache_key.clone()),
-                    },
-                    frame_state.gpu_cache,
-                    frame_state.rg_builder,
-                    None,
-                    false,
-                    RenderTaskParent::Surface(pic_context.surface_index),
-                    frame_state.surfaces,
-                    |rg_builder| {
-                        rg_builder.add().init(RenderTask::new_dynamic(
-                            task_size,
-                            RenderTaskKind::new_line_decoration(
-                                cache_key.style,
-                                cache_key.orientation,
-                                cache_key.wavy_line_thickness.to_f32_px(),
-                                LayoutSize::from_au(cache_key.size),
-                            ),
-                        ))
-                    }
-                ));
-            }
-        }
-        PrimitiveInstanceKind::TextRun { run_index, data_handle, .. } => {
-            profile_scope!("TextRun");
-            let prim_data = &mut data_stores.text_run[*data_handle];
-            let run = &mut store.text_runs[*run_index];
-
-            prim_data.common.may_need_repetition = false;
-
-            // The glyph transform has to match `glyph_transform` in "ps_text_run" shader.
-            // It's relative to the rasterizing space of a glyph.
-            let transform = frame_context.spatial_tree
-                .get_relative_transform(
-                    prim_spatial_node_index,
-                    pic_context.raster_spatial_node_index,
-                )
-                .into_fast_transform();
-            let prim_offset = prim_data.common.prim_rect.origin.to_vector() - run.reference_frame_relative_offset;
-
-            let pic = &store.pictures[pic_context.pic_index.0];
-            let surface = &frame_state.surfaces[pic_context.surface_index.0];
-            let root_scaling_factor = match pic.raster_config {
-                Some(ref raster_config) => raster_config.root_scaling_factor,
-                None => 1.0
-            };
-
-            // If subpixel AA is disabled due to the backing surface the glyphs
-            // are being drawn onto, disable it (unless we are using the
-            // specifial subpixel mode that estimates background color).
-            let allow_subpixel = match prim_instance.vis.state {
-                VisibilityState::Culled |
-                VisibilityState::Unset |
-                VisibilityState::Coarse { .. } |
-                VisibilityState::PassThrough => {
-                    panic!("bug: invalid visibility state");
-                }
-                VisibilityState::Detailed { ref filter, .. } => {
-                    // For now, we only allow subpixel AA on primary sub-slices. In future we
-                    // may support other sub-slices if we find content that does this.
-                    if filter.sub_slice_index.is_primary() {
-                        match pic_context.subpixel_mode {
-                            SubpixelMode::Allow => true,
-                            SubpixelMode::Deny => false,
-                            SubpixelMode::Conditional { allowed_rect } => {
-                                // Conditional mode allows subpixel AA to be enabled for this
-                                // text run, so long as it's inside the allowed rect.
-                                allowed_rect.contains_rect(&prim_instance.vis.clip_chain.pic_clip_rect)
-                            }
-                        }
-                    } else {
-                        false
-                    }
-                }
-            };
-
-            run.request_resources(
-                prim_offset,
-                &prim_data.font,
-                &prim_data.glyphs,
-                &transform.to_transform().with_destination::<_>(),
-                surface,
-                prim_spatial_node_index,
-                root_scaling_factor,
-                allow_subpixel,
-                frame_state.resource_cache,
-                frame_state.gpu_cache,
-                frame_context.spatial_tree,
-                scratch,
-            );
-
-            // Update the template this instane references, which may refresh the GPU
-            // cache with any shared template data.
-            prim_data.update(frame_state);
-        }
-        PrimitiveInstanceKind::Clear { data_handle, .. } => {
-            profile_scope!("Clear");
-            let prim_data = &mut data_stores.prim[*data_handle];
-
-            prim_data.common.may_need_repetition = false;
-
-            // Update the template this instane references, which may refresh the GPU
-            // cache with any shared template data.
-            prim_data.update(frame_state, frame_context.scene_properties);
-        }
-        PrimitiveInstanceKind::NormalBorder { data_handle, ref mut render_task_ids, .. } => {
-            profile_scope!("NormalBorder");
-            let prim_data = &mut data_stores.normal_border[*data_handle];
-            let common_data = &mut prim_data.common;
-            let border_data = &mut prim_data.kind;
-
-            common_data.may_need_repetition =
-                matches!(border_data.border.top.style, BorderStyle::Dotted | BorderStyle::Dashed) ||
-                matches!(border_data.border.right.style, BorderStyle::Dotted | BorderStyle::Dashed) ||
-                matches!(border_data.border.bottom.style, BorderStyle::Dotted | BorderStyle::Dashed) ||
-                matches!(border_data.border.left.style, BorderStyle::Dotted | BorderStyle::Dashed);
-
-
-            // Update the template this instance references, which may refresh the GPU
-            // cache with any shared template data.
-            border_data.update(common_data, frame_state);
-
-            // TODO(gw): For now, the scale factors to rasterize borders at are
-            //           based on the true world transform of the primitive. When
-            //           raster roots with local scale are supported in future,
-            //           that will need to be accounted for here.
-            let scale = frame_context
-                .spatial_tree
-                .get_world_transform(prim_spatial_node_index)
-                .scale_factors();
-
-            // Scale factors are normalized to a power of 2 to reduce the number of
-            // resolution changes.
-            // For frames with a changing scale transform round scale factors up to
-            // nearest power-of-2 boundary so that we don't keep having to redraw
-            // the content as it scales up and down. Rounding up to nearest
-            // power-of-2 boundary ensures we never scale up, only down --- avoiding
-            // jaggies. It also ensures we never scale down by more than a factor of
-            // 2, avoiding bad downscaling quality.
-            let scale_width = clamp_to_scale_factor(scale.0, false);
-            let scale_height = clamp_to_scale_factor(scale.1, false);
-            // Pick the maximum dimension as scale
-            let world_scale = LayoutToWorldScale::new(scale_width.max(scale_height));
-            let mut scale = world_scale * device_pixel_scale;
-            let max_scale = get_max_scale_for_border(border_data);
-            scale.0 = scale.0.min(max_scale.0);
-
-            // For each edge and corner, request the render task by content key
-            // from the render task cache. This ensures that the render task for
-            // this segment will be available for batching later in the frame.
-            let mut handles: SmallVec<[RenderTaskId; 8]> = SmallVec::new();
-
-            for segment in &border_data.border_segments {
-                // Update the cache key device size based on requested scale.
-                let cache_size = to_cache_size(segment.local_task_size, &mut scale);
-                let cache_key = RenderTaskCacheKey {
-                    kind: RenderTaskCacheKeyKind::BorderSegment(segment.cache_key.clone()),
-                    size: cache_size,
-                };
-
-                handles.push(frame_state.resource_cache.request_render_task(
-                    cache_key,
-                    frame_state.gpu_cache,
-                    frame_state.rg_builder,
-                    None,
-                    false,          // TODO(gw): We don't calculate opacity for borders yet!
-                    RenderTaskParent::Surface(pic_context.surface_index),
-                    frame_state.surfaces,
-                    |rg_builder| {
-                        rg_builder.add().init(RenderTask::new_dynamic(
-                            cache_size,
-                            RenderTaskKind::new_border_segment(
-                                build_border_instances(
-                                    &segment.cache_key,
-                                    cache_size,
-                                    &border_data.border,
-                                    scale,
-                                )
-                            ),
-                        ))
-                    }
-                ));
-            }
-
-            *render_task_ids = scratch
-                .border_cache_handles
-                .extend(handles);
-        }
-        PrimitiveInstanceKind::ImageBorder { data_handle, .. } => {
-            profile_scope!("ImageBorder");
-            let prim_data = &mut data_stores.image_border[*data_handle];
-
-            // TODO: get access to the ninepatch and to check whether we need support
-            // for repetitions in the shader.
-
-            // Update the template this instance references, which may refresh the GPU
-            // cache with any shared template data.
-            prim_data.kind.update(
-                &mut prim_data.common,
-                frame_state
-            );
-        }
-        PrimitiveInstanceKind::Rectangle { data_handle, segment_instance_index, color_binding_index, .. } => {
-            profile_scope!("Rectangle");
-            let prim_data = &mut data_stores.prim[*data_handle];
-            prim_data.common.may_need_repetition = false;
-
-            if *color_binding_index != ColorBindingIndex::INVALID {
-                match store.color_bindings[*color_binding_index] {
-                    PropertyBinding::Binding(..) => {
-                        // We explicitly invalidate the gpu cache
-                        // if the color is animating.
-                        let gpu_cache_handle =
-                            if *segment_instance_index == SegmentInstanceIndex::INVALID {
-                                None
-                            } else if *segment_instance_index == SegmentInstanceIndex::UNUSED {
-                                Some(&prim_data.common.gpu_cache_handle)
-                            } else {
-                                Some(&scratch.segment_instances[*segment_instance_index].gpu_cache_handle)
-                            };
-                        if let Some(gpu_cache_handle) = gpu_cache_handle {
-                            frame_state.gpu_cache.invalidate(gpu_cache_handle);
-                        }
-                    }
-                    PropertyBinding::Value(..) => {},
-                }
-            }
-
-            // Update the template this instane references, which may refresh the GPU
-            // cache with any shared template data.
-            prim_data.update(
-                frame_state,
-                frame_context.scene_properties,
-            );
-
-            is_opaque = prim_data.common.opacity.is_opaque;
-
-            write_segment(
-                *segment_instance_index,
-                frame_state,
-                &mut scratch.segments,
-                &mut scratch.segment_instances,
-                |request| {
-                    prim_data.kind.write_prim_gpu_blocks(
-                        request,
-                        frame_context.scene_properties,
-                    );
-                }
-            );
-        }
-        PrimitiveInstanceKind::YuvImage { data_handle, segment_instance_index, .. } => {
-            profile_scope!("YuvImage");
-            let prim_data = &mut data_stores.yuv_image[*data_handle];
-            let common_data = &mut prim_data.common;
-            let yuv_image_data = &mut prim_data.kind;
-            is_opaque = true;
-
-            common_data.may_need_repetition = false;
-
-            // Update the template this instane references, which may refresh the GPU
-            // cache with any shared template data.
-            yuv_image_data.update(common_data, frame_state);
-
-            write_segment(
-                *segment_instance_index,
-                frame_state,
-                &mut scratch.segments,
-                &mut scratch.segment_instances,
-                |request| {
-                    yuv_image_data.write_prim_gpu_blocks(request);
-                }
-            );
-        }
-        PrimitiveInstanceKind::Image { data_handle, image_instance_index, .. } => {
-            profile_scope!("Image");
-
-            let prim_data = &mut data_stores.image[*data_handle];
-            let common_data = &mut prim_data.common;
-            let image_data = &mut prim_data.kind;
-            let image_instance = &mut store.images[*image_instance_index];
-
-            // Update the template this instance references, which may refresh the GPU
-            // cache with any shared template data.
-            image_data.update(
-                common_data,
-                image_instance,
-                pic_context.surface_index,
-                prim_spatial_node_index,
-                frame_state,
-                frame_context,
-                &mut prim_instance.vis,
-            );
-
-            // common_data.opacity.is_opaque is computed in the above update call.
-            is_opaque = common_data.opacity.is_opaque;
-
-            write_segment(
-                image_instance.segment_instance_index,
-                frame_state,
-                &mut scratch.segments,
-                &mut scratch.segment_instances,
-                |request| {
-                    image_data.write_prim_gpu_blocks(request);
-                },
-            );
-        }
-        PrimitiveInstanceKind::LinearGradient { data_handle, ref mut visible_tiles_range, .. } => {
-            profile_scope!("LinearGradient");
-            let prim_data = &mut data_stores.linear_grad[*data_handle];
-
-            // Update the template this instane references, which may refresh the GPU
-            // cache with any shared template data.
-            prim_data.update(frame_state, pic_context.surface_index);
-
-            if prim_data.stretch_size.width >= prim_data.common.prim_rect.size.width &&
-                prim_data.stretch_size.height >= prim_data.common.prim_rect.size.height {
-
-                prim_data.common.may_need_repetition = false;
-            }
-
-            if prim_data.tile_spacing != LayoutSize::zero() {
-                // We are performing the decomposition on the CPU here, no need to
-                // have it in the shader.
-                prim_data.common.may_need_repetition = false;
-
-                *visible_tiles_range = decompose_repeated_gradient(
-                    &prim_instance.vis,
-                    &prim_data.common.prim_rect,
-                    prim_spatial_node_index,
-                    &prim_data.stretch_size,
-                    &prim_data.tile_spacing,
-                    frame_state,
-                    &mut scratch.gradient_tiles,
-                    &frame_context.spatial_tree,
-                    Some(&mut |_, mut request| {
-                        request.push([
-                            prim_data.start_point.x,
-                            prim_data.start_point.y,
-                            prim_data.end_point.x,
-                            prim_data.end_point.y,
-                        ]);
-                        request.push([
-                            pack_as_float(prim_data.extend_mode as u32),
-                            prim_data.stretch_size.width,
-                            prim_data.stretch_size.height,
-                            0.0,
-                        ]);
-                    }),
-                );
-
-                if visible_tiles_range.is_empty() {
-                    prim_instance.clear_visibility();
-                }
-            }
-
-            // TODO(gw): Consider whether it's worth doing segment building
-            //           for gradient primitives.
-        }
-        PrimitiveInstanceKind::CachedLinearGradient { data_handle, ref mut visible_tiles_range, .. } => {
-            profile_scope!("CachedLinearGradient");
-            let prim_data = &mut data_stores.linear_grad[*data_handle];
-            prim_data.common.may_need_repetition = prim_data.stretch_size.width < prim_data.common.prim_rect.size.width
-                || prim_data.stretch_size.height < prim_data.common.prim_rect.size.height;
-
-            // Update the template this instance references, which may refresh the GPU
-            // cache with any shared template data.
-            prim_data.update(frame_state, pic_context.surface_index);
-
-            if prim_data.tile_spacing != LayoutSize::zero() {
-                prim_data.common.may_need_repetition = false;
-
-                *visible_tiles_range = decompose_repeated_gradient(
-                    &prim_instance.vis,
-                    &prim_data.common.prim_rect,
-                    prim_spatial_node_index,
-                    &prim_data.stretch_size,
-                    &prim_data.tile_spacing,
-                    frame_state,
-                    &mut scratch.gradient_tiles,
-                    &frame_context.spatial_tree,
-                    None,
-                );
-
-                if visible_tiles_range.is_empty() {
-                    prim_instance.clear_visibility();
-                }
-            }
-        }
-        PrimitiveInstanceKind::RadialGradient { data_handle, ref mut visible_tiles_range, .. } => {
-            profile_scope!("RadialGradient");
-            let prim_data = &mut data_stores.radial_grad[*data_handle];
-
-            prim_data.common.may_need_repetition = prim_data.stretch_size.width < prim_data.common.prim_rect.size.width
-                || prim_data.stretch_size.height < prim_data.common.prim_rect.size.height;
-
-            // Update the template this instane references, which may refresh the GPU
-            // cache with any shared template data.
-            prim_data.update(frame_state, pic_context.surface_index);
-
-            if prim_data.tile_spacing != LayoutSize::zero() {
-                prim_data.common.may_need_repetition = false;
-
-                *visible_tiles_range = decompose_repeated_gradient(
-                    &prim_instance.vis,
-                    &prim_data.common.prim_rect,
-                    prim_spatial_node_index,
-                    &prim_data.stretch_size,
-                    &prim_data.tile_spacing,
-                    frame_state,
-                    &mut scratch.gradient_tiles,
-                    &frame_context.spatial_tree,
-                    None,
-                );
-
-                if visible_tiles_range.is_empty() {
-                    prim_instance.clear_visibility();
-                }
-            }
-
-            // TODO(gw): Consider whether it's worth doing segment building
-            //           for gradient primitives.
-        }
-        PrimitiveInstanceKind::ConicGradient { data_handle, ref mut visible_tiles_range, .. } => {
-            profile_scope!("ConicGradient");
-            let prim_data = &mut data_stores.conic_grad[*data_handle];
-
-            prim_data.common.may_need_repetition = prim_data.stretch_size.width < prim_data.common.prim_rect.size.width
-                || prim_data.stretch_size.height < prim_data.common.prim_rect.size.height;
-
-            // Update the template this instane references, which may refresh the GPU
-            // cache with any shared template data.
-            prim_data.update(frame_state, pic_context.surface_index);
-
-            if prim_data.tile_spacing != LayoutSize::zero() {
-                prim_data.common.may_need_repetition = false;
-
-                *visible_tiles_range = decompose_repeated_gradient(
-                    &prim_instance.vis,
-                    &prim_data.common.prim_rect,
-                    prim_spatial_node_index,
-                    &prim_data.stretch_size,
-                    &prim_data.tile_spacing,
-                    frame_state,
-                    &mut scratch.gradient_tiles,
-                    &frame_context.spatial_tree,
-                    None,
-                );
-
-                if visible_tiles_range.is_empty() {
-                    prim_instance.clear_visibility();
-                }
-            }
-
-            // TODO(gw): Consider whether it's worth doing segment building
-            //           for gradient primitives.
-        }
-        PrimitiveInstanceKind::Picture { pic_index, segment_instance_index, .. } => {
-            profile_scope!("Picture");
-            let pic = &mut store.pictures[pic_index.0];
-
-            if pic.prepare_for_render(
-                frame_context,
-                frame_state,
-                data_stores,
-            ) {
-                if let Some(ref mut splitter) = pic_state.plane_splitter {
-                    PicturePrimitive::add_split_plane(
-                        splitter,
-                        frame_context.spatial_tree,
-                        prim_spatial_node_index,
-                        pic.precise_local_rect,
-                        &prim_instance.vis.combined_local_clip_rect,
-                        frame_state.current_dirty_region().combined,
-                        plane_split_anchor,
-                    );
-                }
-
-                // If this picture uses segments, ensure the GPU cache is
-                // up to date with segment local rects.
-                // TODO(gw): This entire match statement above can now be
-                //           refactored into prepare_interned_prim_for_render.
-                if pic.can_use_segments() {
-                    write_segment(
-                        *segment_instance_index,
-                        frame_state,
-                        &mut scratch.segments,
-                        &mut scratch.segment_instances,
-                        |request| {
-                            request.push(PremultipliedColorF::WHITE);
-                            request.push(PremultipliedColorF::WHITE);
-                            request.push([
-                                -1.0,       // -ve means use prim rect for stretch size
-                                0.0,
-                                0.0,
-                                0.0,
-                            ]);
-                        }
-                    );
-                }
-            } else {
-                prim_instance.clear_visibility();
-            }
-        }
-        PrimitiveInstanceKind::Backdrop { data_handle } => {
-            profile_scope!("Backdrop");
-            let backdrop_pic_index = data_stores.backdrop[*data_handle].kind.pic_index;
-
-            // Setup a dependency on the backdrop picture to ensure it is rendered prior to rendering this primitive.
-            let backdrop_surface_index = store.pictures[backdrop_pic_index.0].raster_config.as_ref().unwrap().surface_index;
-            if let Some(ref backdrop_tasks) = frame_state.surfaces[backdrop_surface_index.0].render_tasks {
-                // This is untidy / code duplication but matches existing behavior and will be
-                // removed in follow up patches to this bug to rework how backdrop-filter works.
-                let backdrop_task_id = match backdrop_tasks {
-                    SurfaceRenderTasks::Tiled(..) => unreachable!(),
-                    SurfaceRenderTasks::Simple(id) => *id,
-                    SurfaceRenderTasks::Chained { port_task_id, .. } => *port_task_id,
-                };
-
-                frame_state.add_child_render_task(
-                    pic_context.surface_index,
-                    backdrop_task_id,
-                );
-            } else {
-                if prim_instance.is_chased() {
-                    println!("\tBackdrop primitive culled because backdrop task was not assigned render tasks");
-                }
-                prim_instance.clear_visibility();
-            }
-        }
-    };
-
-    // If the primitive is opaque, see if it can contribut to it's picture surface's opaque rect.
-
-    is_opaque = is_opaque && {
-        let clip = prim_instance.vis.clip_task_index;
-        clip == ClipTaskIndex::INVALID
-    };
-
-    is_opaque = is_opaque && !frame_context.spatial_tree.is_relative_transform_complex(
-        prim_spatial_node_index,
-        pic_context.raster_spatial_node_index,
-    );
-
-    if is_opaque {
-        let prim_local_rect = data_stores.get_local_prim_rect(
-            prim_instance,
-            store,
-        );
-        cluster.opaque_rect = crate::util::conservative_union_rect(&cluster.opaque_rect, &prim_local_rect);
-    }
-}
-
-
-fn write_segment<F>(
-    segment_instance_index: SegmentInstanceIndex,
-    frame_state: &mut FrameBuildingState,
-    segments: &mut SegmentStorage,
-    segment_instances: &mut SegmentInstanceStorage,
-    f: F,
-) where F: Fn(&mut GpuDataRequest) {
-    debug_assert_ne!(segment_instance_index, SegmentInstanceIndex::INVALID);
-    if segment_instance_index != SegmentInstanceIndex::UNUSED {
-        let segment_instance = &mut segment_instances[segment_instance_index];
-
-        if let Some(mut request) = frame_state.gpu_cache.request(&mut segment_instance.gpu_cache_handle) {
-            let segments = &segments[segment_instance.segments_range];
-
-            f(&mut request);
-
-            for segment in segments {
-                request.write_segment(
-                    segment.local_rect,
-                    [0.0; 4],
-                );
-            }
-        }
-    }
-}
-
-fn decompose_repeated_gradient(
-    prim_vis: &PrimitiveVisibility,
-    prim_local_rect: &LayoutRect,
-    prim_spatial_node_index: SpatialNodeIndex,
-    stretch_size: &LayoutSize,
-    tile_spacing: &LayoutSize,
-    frame_state: &mut FrameBuildingState,
-    gradient_tiles: &mut GradientTileStorage,
-    spatial_tree: &SpatialTree,
-    mut callback: Option<&mut dyn FnMut(&LayoutRect, GpuDataRequest)>,
-) -> GradientTileRange {
-    let mut visible_tiles = Vec::new();
-
-    // Tighten the clip rect because decomposing the repeated image can
-    // produce primitives that are partially covering the original image
-    // rect and we want to clip these extra parts out.
-    let tight_clip_rect = prim_vis
-        .combined_local_clip_rect
-        .intersection(prim_local_rect).unwrap();
-
-    let visible_rect = compute_conservative_visible_rect(
-        &prim_vis.clip_chain,
-        frame_state.current_dirty_region().combined,
-        prim_spatial_node_index,
-        spatial_tree,
-    );
-    let stride = *stretch_size + *tile_spacing;
-
-    let repetitions = image_tiling::repetitions(prim_local_rect, &visible_rect, stride);
-    for Repetition { origin, .. } in repetitions {
-        let mut handle = GpuCacheHandle::new();
-        let rect = LayoutRect {
-            origin,
-            size: *stretch_size,
-        };
-
-        if let Some(callback) = &mut callback {
-            if let Some(request) = frame_state.gpu_cache.request(&mut handle) {
-                callback(&rect, request);
-            }
-        }
-
-        visible_tiles.push(VisibleGradientTile {
-            local_rect: rect,
-            local_clip_rect: tight_clip_rect,
-            handle
-        });
-    }
-
-    // At this point if we don't have tiles to show it means we could probably
-    // have done a better a job at culling during an earlier stage.
-    // Clearing the screen rect has the effect of "culling out" the primitive
-    // from the point of view of the batch builder, and ensures we don't hit
-    // assertions later on because we didn't request any image.
-    if visible_tiles.is_empty() {
-        GradientTileRange::empty()
-    } else {
-        gradient_tiles.extend(visible_tiles)
-    }
-}
-
-
-fn update_clip_task_for_brush(
-    instance: &PrimitiveInstance,
-    prim_origin: &LayoutPoint,
-    prim_spatial_node_index: SpatialNodeIndex,
-    root_spatial_node_index: SpatialNodeIndex,
-    pic_context: &PictureContext,
-    pic_state: &mut PictureState,
-    frame_context: &FrameBuildingContext,
-    frame_state: &mut FrameBuildingState,
-    prim_store: &PrimitiveStore,
-    data_stores: &mut DataStores,
-    segments_store: &mut SegmentStorage,
-    segment_instances_store: &mut SegmentInstanceStorage,
-    clip_mask_instances: &mut Vec<ClipMaskKind>,
-    unclipped: &DeviceRect,
-    device_pixel_scale: DevicePixelScale,
-) -> Option<ClipTaskIndex> {
-    let segments = match instance.kind {
-        PrimitiveInstanceKind::TextRun { .. } |
-        PrimitiveInstanceKind::Clear { .. } |
-        PrimitiveInstanceKind::LineDecoration { .. } |
-        PrimitiveInstanceKind::Backdrop { .. } => {
-            return None;
-        }
-        PrimitiveInstanceKind::Image { image_instance_index, .. } => {
-            let segment_instance_index = prim_store
-                .images[image_instance_index]
-                .segment_instance_index;
-
-            if segment_instance_index == SegmentInstanceIndex::UNUSED {
-                return None;
-            }
-
-            let segment_instance = &segment_instances_store[segment_instance_index];
-
-            &segments_store[segment_instance.segments_range]
-        }
-        PrimitiveInstanceKind::Picture { segment_instance_index, .. } => {
-            // Pictures may not support segment rendering at all (INVALID)
-            // or support segment rendering but choose not to due to size
-            // or some other factor (UNUSED).
-            if segment_instance_index == SegmentInstanceIndex::UNUSED ||
-               segment_instance_index == SegmentInstanceIndex::INVALID {
-                return None;
-            }
-
-            let segment_instance = &segment_instances_store[segment_instance_index];
-            &segments_store[segment_instance.segments_range]
-        }
-        PrimitiveInstanceKind::YuvImage { segment_instance_index, .. } |
-        PrimitiveInstanceKind::Rectangle { segment_instance_index, .. } => {
-            debug_assert!(segment_instance_index != SegmentInstanceIndex::INVALID);
-
-            if segment_instance_index == SegmentInstanceIndex::UNUSED {
-                return None;
-            }
-
-            let segment_instance = &segment_instances_store[segment_instance_index];
-
-            &segments_store[segment_instance.segments_range]
-        }
-        PrimitiveInstanceKind::ImageBorder { data_handle, .. } => {
-            let border_data = &data_stores.image_border[data_handle].kind;
-
-            // TODO: This is quite messy - once we remove legacy primitives we
-            //       can change this to be a tuple match on (instance, template)
-            border_data.brush_segments.as_slice()
-        }
-        PrimitiveInstanceKind::NormalBorder { data_handle, .. } => {
-            let border_data = &data_stores.normal_border[data_handle].kind;
-
-            // TODO: This is quite messy - once we remove legacy primitives we
-            //       can change this to be a tuple match on (instance, template)
-            border_data.brush_segments.as_slice()
-        }
-        PrimitiveInstanceKind::LinearGradient { data_handle, .. }
-        | PrimitiveInstanceKind::CachedLinearGradient { data_handle, .. } => {
-            let prim_data = &data_stores.linear_grad[data_handle];
-
-            // TODO: This is quite messy - once we remove legacy primitives we
-            //       can change this to be a tuple match on (instance, template)
-            if prim_data.brush_segments.is_empty() {
-                return None;
-            }
-
-            prim_data.brush_segments.as_slice()
-        }
-        PrimitiveInstanceKind::RadialGradient { data_handle, .. } => {
-            let prim_data = &data_stores.radial_grad[data_handle];
-
-            // TODO: This is quite messy - once we remove legacy primitives we
-            //       can change this to be a tuple match on (instance, template)
-            if prim_data.brush_segments.is_empty() {
-                return None;
-            }
-
-            prim_data.brush_segments.as_slice()
-        }
-        PrimitiveInstanceKind::ConicGradient { data_handle, .. } => {
-            let prim_data = &data_stores.conic_grad[data_handle];
-
-            // TODO: This is quite messy - once we remove legacy primitives we
-            //       can change this to be a tuple match on (instance, template)
-            if prim_data.brush_segments.is_empty() {
-                return None;
-            }
-
-            prim_data.brush_segments.as_slice()
-        }
-    };
-
-    // If there are no segments, early out to avoid setting a valid
-    // clip task instance location below.
-    if segments.is_empty() {
-        return None;
-    }
-
-    // Set where in the clip mask instances array the clip mask info
-    // can be found for this primitive. Each segment will push the
-    // clip mask information for itself in update_clip_task below.
-    let clip_task_index = ClipTaskIndex(clip_mask_instances.len() as _);
-
-    // If we only built 1 segment, there is no point in re-running
-    // the clip chain builder. Instead, just use the clip chain
-    // instance that was built for the main primitive. This is a
-    // significant optimization for the common case.
-    if segments.len() == 1 {
-        let clip_mask_kind = update_brush_segment_clip_task(
-            &segments[0],
-            Some(&instance.vis.clip_chain),
-            frame_state.current_dirty_region().combined,
-            root_spatial_node_index,
-            pic_context.surface_index,
-            pic_state,
-            frame_context,
-            frame_state,
-            &mut data_stores.clip,
-            unclipped,
-            device_pixel_scale,
-        );
-        clip_mask_instances.push(clip_mask_kind);
-    } else {
-        let dirty_world_rect = frame_state.current_dirty_region().combined;
-
-        for segment in segments {
-            // Build a clip chain for the smaller segment rect. This will
-            // often manage to eliminate most/all clips, and sometimes
-            // clip the segment completely.
-            frame_state.clip_store.set_active_clips_from_clip_chain(
-                &instance.vis.clip_chain,
-                prim_spatial_node_index,
-                &frame_context.spatial_tree,
-            );
-
-            let segment_clip_chain = frame_state
-                .clip_store
-                .build_clip_chain_instance(
-                    segment.local_rect.translate(prim_origin.to_vector()),
-                    &pic_state.map_local_to_pic,
-                    &pic_state.map_pic_to_world,
-                    &frame_context.spatial_tree,
-                    frame_state.gpu_cache,
-                    frame_state.resource_cache,
-                    device_pixel_scale,
-                    &dirty_world_rect,
-                    &mut data_stores.clip,
-                    false,
-                    instance.is_chased(),
-                );
-
-            let clip_mask_kind = update_brush_segment_clip_task(
-                &segment,
-                segment_clip_chain.as_ref(),
-                frame_state.current_dirty_region().combined,
-                root_spatial_node_index,
-                pic_context.surface_index,
-                pic_state,
-                frame_context,
-                frame_state,
-                &mut data_stores.clip,
-                unclipped,
-                device_pixel_scale,
-            );
-            clip_mask_instances.push(clip_mask_kind);
-        }
-    }
-
-    Some(clip_task_index)
-}
-
-pub fn update_clip_task(
-    instance: &mut PrimitiveInstance,
-    prim_origin: &LayoutPoint,
-    prim_spatial_node_index: SpatialNodeIndex,
-    root_spatial_node_index: SpatialNodeIndex,
-    pic_context: &PictureContext,
-    pic_state: &mut PictureState,
-    frame_context: &FrameBuildingContext,
-    frame_state: &mut FrameBuildingState,
-    prim_store: &mut PrimitiveStore,
-    data_stores: &mut DataStores,
-    scratch: &mut PrimitiveScratchBuffer,
-) -> bool {
-    let device_pixel_scale = frame_state.surfaces[pic_context.surface_index.0].device_pixel_scale;
-
-    if instance.is_chased() {
-        println!("\tupdating clip task with pic rect {:?}", instance.vis.clip_chain.pic_clip_rect);
-    }
-
-    // Get the device space rect for the primitive if it was unclipped.
-    let unclipped = match get_unclipped_device_rect(
-        instance.vis.clip_chain.pic_clip_rect,
-        &pic_state.map_pic_to_raster,
-        device_pixel_scale,
-    ) {
-        Some(rect) => rect,
-        None => return false,
-    };
-
-    build_segments_if_needed(
-        instance,
-        frame_state,
-        prim_store,
-        data_stores,
-        &mut scratch.segments,
-        &mut scratch.segment_instances,
-    );
-
-    // First try to  render this primitive's mask using optimized brush rendering.
-    instance.vis.clip_task_index = if let Some(clip_task_index) = update_clip_task_for_brush(
-        instance,
-        prim_origin,
-        prim_spatial_node_index,
-        root_spatial_node_index,
-        pic_context,
-        pic_state,
-        frame_context,
-        frame_state,
-        prim_store,
-        data_stores,
-        &mut scratch.segments,
-        &mut scratch.segment_instances,
-        &mut scratch.clip_mask_instances,
-        &unclipped,
-        device_pixel_scale,
-    ) {
-        if instance.is_chased() {
-            println!("\tsegment tasks have been created for clipping: {:?}", clip_task_index);
-        }
-        clip_task_index
-    } else if instance.vis.clip_chain.needs_mask {
-        // Get a minimal device space rect, clipped to the screen that we
-        // need to allocate for the clip mask, as well as interpolated
-        // snap offsets.
-        let unadjusted_device_rect = match get_clipped_device_rect(
-            &unclipped,
-            &pic_state.map_raster_to_world,
-            frame_state.current_dirty_region().combined,
-            device_pixel_scale,
-        ) {
-            Some(device_rect) => device_rect,
-            None => return false,
-        };
-
-        let (device_rect, device_pixel_scale) = adjust_mask_scale_for_max_size(
-            unadjusted_device_rect,
-            device_pixel_scale,
-        );
-        let clip_task_id = RenderTaskKind::new_mask(
-            device_rect,
-            instance.vis.clip_chain.clips_range,
-            root_spatial_node_index,
-            frame_state.clip_store,
-            frame_state.gpu_cache,
-            frame_state.resource_cache,
-            frame_state.rg_builder,
-            &mut data_stores.clip,
-            device_pixel_scale,
-            frame_context.fb_config,
-            frame_state.surfaces,
-        );
-        if instance.is_chased() {
-            println!("\tcreated task {:?} with device rect {:?}",
-                clip_task_id, device_rect);
-        }
-        // Set the global clip mask instance for this primitive.
-        let clip_task_index = ClipTaskIndex(scratch.clip_mask_instances.len() as _);
-        scratch.clip_mask_instances.push(ClipMaskKind::Mask(clip_task_id));
-        instance.vis.clip_task_index = clip_task_index;
-        frame_state.add_child_render_task(
-            pic_context.surface_index,
-            clip_task_id,
-        );
-        clip_task_index
-    } else {
-        if instance.is_chased() {
-            println!("\tno mask is needed");
-        }
-        ClipTaskIndex::INVALID
-    };
-
-    true
-}
-
-/// Write out to the clip mask instances array the correct clip mask
-/// config for this segment.
-pub fn update_brush_segment_clip_task(
-    segment: &BrushSegment,
-    clip_chain: Option<&ClipChainInstance>,
-    world_clip_rect: WorldRect,
-    root_spatial_node_index: SpatialNodeIndex,
-    surface_index: SurfaceIndex,
-    pic_state: &mut PictureState,
-    frame_context: &FrameBuildingContext,
-    frame_state: &mut FrameBuildingState,
-    clip_data_store: &mut ClipDataStore,
-    unclipped: &DeviceRect,
-    device_pixel_scale: DevicePixelScale,
-) -> ClipMaskKind {
-    let clip_chain = match clip_chain {
-        Some(chain) => chain,
-        None => return ClipMaskKind::Clipped,
-    };
-    if !clip_chain.needs_mask ||
-       (!segment.may_need_clip_mask && !clip_chain.has_non_local_clips) {
-        return ClipMaskKind::None;
-    }
-
-    let segment_world_rect = match pic_state.map_pic_to_world.map(&clip_chain.pic_clip_rect) {
-        Some(rect) => rect,
-        None => return ClipMaskKind::Clipped,
-    };
-
-    let segment_world_rect = match segment_world_rect.intersection(&world_clip_rect) {
-        Some(rect) => rect,
-        None => return ClipMaskKind::Clipped,
-    };
-
-    // Get a minimal device space rect, clipped to the screen that we
-    // need to allocate for the clip mask, as well as interpolated
-    // snap offsets.
-    let device_rect = match get_clipped_device_rect(
-        unclipped,
-        &pic_state.map_raster_to_world,
-        segment_world_rect,
-        device_pixel_scale,
-    ) {
-        Some(info) => info,
-        None => {
-            return ClipMaskKind::Clipped;
-        }
-    };
-
-    let (device_rect, device_pixel_scale) = adjust_mask_scale_for_max_size(device_rect, device_pixel_scale);
-
-    let clip_task_id = RenderTaskKind::new_mask(
-        device_rect,
-        clip_chain.clips_range,
-        root_spatial_node_index,
-        frame_state.clip_store,
-        frame_state.gpu_cache,
-        frame_state.resource_cache,
-        frame_state.rg_builder,
-        clip_data_store,
-        device_pixel_scale,
-        frame_context.fb_config,
-        frame_state.surfaces,
-    );
-
-    frame_state.add_child_render_task(
-        surface_index,
-        clip_task_id,
-    );
-    ClipMaskKind::Mask(clip_task_id)
-}
-
-
-fn write_brush_segment_description(
-    prim_local_rect: LayoutRect,
-    prim_local_clip_rect: LayoutRect,
-    clip_chain: &ClipChainInstance,
-    segment_builder: &mut SegmentBuilder,
-    clip_store: &ClipStore,
-    data_stores: &DataStores,
-) -> bool {
-    // If the brush is small, we want to skip building segments
-    // and just draw it as a single primitive with clip mask.
-    if prim_local_rect.size.area() < MIN_BRUSH_SPLIT_AREA {
-        return false;
-    }
-
-    segment_builder.initialize(
-        prim_local_rect,
-        None,
-        prim_local_clip_rect
-    );
-
-    // Segment the primitive on all the local-space clip sources that we can.
-    for i in 0 .. clip_chain.clips_range.count {
-        let clip_instance = clip_store
-            .get_instance_from_range(&clip_chain.clips_range, i);
-        let clip_node = &data_stores.clip[clip_instance.handle];
-
-        // If this clip item is positioned by another positioning node, its relative position
-        // could change during scrolling. This means that we would need to resegment. Instead
-        // of doing that, only segment with clips that have the same positioning node.
-        // TODO(mrobinson, #2858): It may make sense to include these nodes, resegmenting only
-        // when necessary while scrolling.
-        if !clip_instance.flags.contains(ClipNodeFlags::SAME_SPATIAL_NODE) {
-            continue;
-        }
-
-        let (local_clip_rect, radius, mode) = match clip_node.item.kind {
-            ClipItemKind::RoundedRectangle { rect, radius, mode } => {
-                (rect, Some(radius), mode)
-            }
-            ClipItemKind::Rectangle { rect, mode } => {
-                (rect, None, mode)
-            }
-            ClipItemKind::BoxShadow { ref source } => {
-                // For inset box shadows, we can clip out any
-                // pixels that are inside the shadow region
-                // and are beyond the inner rect, as they can't
-                // be affected by the blur radius.
-                let inner_clip_mode = match source.clip_mode {
-                    BoxShadowClipMode::Outset => None,
-                    BoxShadowClipMode::Inset => Some(ClipMode::ClipOut),
-                };
-
-                // Push a region into the segment builder where the
-                // box-shadow can have an effect on the result. This
-                // ensures clip-mask tasks get allocated for these
-                // pixel regions, even if no other clips affect them.
-                segment_builder.push_mask_region(
-                    source.prim_shadow_rect,
-                    source.prim_shadow_rect.inflate(
-                        -0.5 * source.original_alloc_size.width,
-                        -0.5 * source.original_alloc_size.height,
-                    ),
-                    inner_clip_mode,
-                );
-
-                continue;
-            }
-            ClipItemKind::Image { .. } => {
-                // If we encounter an image mask, bail out from segment building.
-                // It's not possible to know which parts of the primitive are affected
-                // by the mask (without inspecting the pixels). We could do something
-                // better here in the future if it ever shows up as a performance issue
-                // (for instance, at least segment based on the bounding rect of the
-                // image mask if it's non-repeating).
-                return false;
-            }
-        };
-
-        segment_builder.push_clip_rect(local_clip_rect, radius, mode);
-    }
-
-    true
-}
-
-fn build_segments_if_needed(
-    instance: &mut PrimitiveInstance,
-    frame_state: &mut FrameBuildingState,
-    prim_store: &mut PrimitiveStore,
-    data_stores: &DataStores,
-    segments_store: &mut SegmentStorage,
-    segment_instances_store: &mut SegmentInstanceStorage,
-) {
-    let prim_clip_chain = &instance.vis.clip_chain;
-
-    // Usually, the primitive rect can be found from information
-    // in the instance and primitive template.
-    let prim_local_rect = data_stores.get_local_prim_rect(
-        instance,
-        prim_store,
-    );
-
-    let segment_instance_index = match instance.kind {
-        PrimitiveInstanceKind::Rectangle { ref mut segment_instance_index, .. } |
-        PrimitiveInstanceKind::YuvImage { ref mut segment_instance_index, .. } => {
-            segment_instance_index
-        }
-        PrimitiveInstanceKind::Image { data_handle, image_instance_index, .. } => {
-            let image_data = &data_stores.image[data_handle].kind;
-            let image_instance = &mut prim_store.images[image_instance_index];
-            //Note: tiled images don't support automatic segmentation,
-            // they strictly produce one segment per visible tile instead.
-            if frame_state
-                .resource_cache
-                .get_image_properties(image_data.key)
-                .and_then(|properties| properties.tiling)
-                .is_some()
-            {
-                image_instance.segment_instance_index = SegmentInstanceIndex::UNUSED;
-                return;
-            }
-            &mut image_instance.segment_instance_index
-        }
-        PrimitiveInstanceKind::Picture { ref mut segment_instance_index, pic_index, .. } => {
-            let pic = &mut prim_store.pictures[pic_index.0];
-
-            // If this picture supports segment rendering
-            if pic.can_use_segments() {
-                // If the segments have been invalidated, ensure the current
-                // index of segments is invalid. This ensures that the segment
-                // building logic below will be run.
-                if !pic.segments_are_valid {
-                    *segment_instance_index = SegmentInstanceIndex::INVALID;
-                    pic.segments_are_valid = true;
-                }
-
-                segment_instance_index
-            } else {
-                return;
-            }
-        }
-        PrimitiveInstanceKind::TextRun { .. } |
-        PrimitiveInstanceKind::NormalBorder { .. } |
-        PrimitiveInstanceKind::ImageBorder { .. } |
-        PrimitiveInstanceKind::Clear { .. } |
-        PrimitiveInstanceKind::LinearGradient { .. } |
-        PrimitiveInstanceKind::CachedLinearGradient { .. } |
-        PrimitiveInstanceKind::RadialGradient { .. } |
-        PrimitiveInstanceKind::ConicGradient { .. } |
-        PrimitiveInstanceKind::LineDecoration { .. } |
-        PrimitiveInstanceKind::Backdrop { .. } => {
-            // These primitives don't support / need segments.
-            return;
-        }
-    };
-
-    if *segment_instance_index == SegmentInstanceIndex::INVALID {
-        let mut segments: SmallVec<[BrushSegment; 8]> = SmallVec::new();
-
-        if write_brush_segment_description(
-            prim_local_rect,
-            instance.clip_set.local_clip_rect,
-            prim_clip_chain,
-            &mut frame_state.segment_builder,
-            frame_state.clip_store,
-            data_stores,
-        ) {
-            frame_state.segment_builder.build(|segment| {
-                segments.push(
-                    BrushSegment::new(
-                        segment.rect.translate(-prim_local_rect.origin.to_vector()),
-                        segment.has_mask,
-                        segment.edge_flags,
-                        [0.0; 4],
-                        BrushFlags::PERSPECTIVE_INTERPOLATION,
-                    ),
-                );
-            });
-        }
-
-        // If only a single segment is produced, there is no benefit to writing
-        // a segment instance array. Instead, just use the main primitive rect
-        // written into the GPU cache.
-        // TODO(gw): This is (sortof) a bandaid - due to a limitation in the current
-        //           brush encoding, we can only support a total of up to 2^16 segments.
-        //           This should be (more than) enough for any real world case, so for
-        //           now we can handle this by skipping cases where we were generating
-        //           segments where there is no benefit. The long term / robust fix
-        //           for this is to move the segment building to be done as a more
-        //           limited nine-patch system during scene building, removing arbitrary
-        //           segmentation during frame-building (see bug #1617491).
-        if segments.len() <= 1 {
-            *segment_instance_index = SegmentInstanceIndex::UNUSED;
-        } else {
-            let segments_range = segments_store.extend(segments);
-
-            let instance = SegmentedInstance {
-                segments_range,
-                gpu_cache_handle: GpuCacheHandle::new(),
-            };
-
-            *segment_instance_index = segment_instances_store.push(instance);
-        };
-    }
-}
-
-/// Retrieve the exact unsnapped device space rectangle for a primitive.
-fn get_unclipped_device_rect(
-    prim_rect: PictureRect,
-    map_to_raster: &SpaceMapper<PicturePixel, RasterPixel>,
-    device_pixel_scale: DevicePixelScale,
-) -> Option<DeviceRect> {
-    let raster_rect = map_to_raster.map(&prim_rect)?;
-    let world_rect = raster_rect * Scale::new(1.0);
-    Some(world_rect * device_pixel_scale)
-}
-
-/// Given an unclipped device rect, try to find a minimal device space
-/// rect to allocate a clip mask for, by clipping to the screen. This
-/// function is very similar to picture::get_raster_rects. It is far from
-/// ideal, and should be refactored as part of the support for setting
-/// scale per-raster-root.
-fn get_clipped_device_rect(
-    unclipped: &DeviceRect,
-    map_to_world: &SpaceMapper<RasterPixel, WorldPixel>,
-    world_clip_rect: WorldRect,
-    device_pixel_scale: DevicePixelScale,
-) -> Option<DeviceRect> {
-    let unclipped_raster_rect = {
-        let world_rect = *unclipped * Scale::new(1.0);
-        let raster_rect = world_rect * device_pixel_scale.inverse();
-
-        raster_rect.cast_unit()
-    };
-
-    let unclipped_world_rect = map_to_world.map(&unclipped_raster_rect)?;
-
-    let clipped_world_rect = unclipped_world_rect.intersection(&world_clip_rect)?;
-
-    let clipped_raster_rect = map_to_world.unmap(&clipped_world_rect)?;
-
-    let clipped_raster_rect = clipped_raster_rect.intersection(&unclipped_raster_rect)?;
-
-    // Ensure that we won't try to allocate a zero-sized clip render task.
-    if clipped_raster_rect.is_empty() {
-        return None;
-    }
-
-    let clipped = raster_rect_to_device_pixels(
-        clipped_raster_rect,
-        device_pixel_scale,
-    );
-
-    Some(clipped)
-}
-
-// Ensures that the size of mask render tasks are within MAX_MASK_SIZE.
-fn adjust_mask_scale_for_max_size(device_rect: DeviceRect, device_pixel_scale: DevicePixelScale) -> (DeviceRect, DevicePixelScale) {
-    if device_rect.width() > MAX_MASK_SIZE || device_rect.height() > MAX_MASK_SIZE {
-        // round_out will grow by 1 integer pixel if origin is on a
-        // fractional position, so keep that margin for error with -1:
-        let scale = (MAX_MASK_SIZE - 1.0) /
-            f32::max(device_rect.width(), device_rect.height());
-        let new_device_pixel_scale = device_pixel_scale * Scale::new(scale);
-        let new_device_rect = (device_rect.to_f32() * Scale::new(scale))
-            .round_out();
-        (new_device_rect, new_device_pixel_scale)
-    } else {
-        (device_rect, device_pixel_scale)
-    }
-}
-
diff --git a/third_party/webrender/webrender/src/prim_store/backdrop.rs b/third_party/webrender/webrender/src/prim_store/backdrop.rs
index c45bf78eef4..ea033574fb2 100644
--- a/third_party/webrender/webrender/src/prim_store/backdrop.rs
+++ b/third_party/webrender/webrender/src/prim_store/backdrop.rs
@@ -74,7 +74,6 @@ impl Internable for Backdrop {
     type Key = BackdropKey;
     type StoreData = BackdropTemplate;
     type InternData = ();
-    const PROFILE_COUNTER: usize = crate::profiler::INTERNED_BACKDROPS;
 }
 
 impl InternablePrimitive for Backdrop {
diff --git a/third_party/webrender/webrender/src/prim_store/borders.rs b/third_party/webrender/webrender/src/prim_store/borders.rs
index 4be7d72b46d..084350c3357 100644
--- a/third_party/webrender/webrender/src/prim_store/borders.rs
+++ b/third_party/webrender/webrender/src/prim_store/borders.rs
@@ -2,13 +2,13 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{NormalBorder, PremultipliedColorF, Shadow, RasterSpace};
+use api::{NormalBorder, PremultipliedColorF, Shadow};
 use api::units::*;
 use crate::border::create_border_segments;
 use crate::border::NormalBorderAu;
 use crate::scene_building::{CreateShadow, IsVisible};
 use crate::frame_builder::{FrameBuildingState};
-use crate::gpu_cache::GpuDataRequest;
+use crate::gpu_cache::{GpuCache, GpuDataRequest};
 use crate::intern;
 use crate::internal_types::LayoutPrimitiveInfo;
 use crate::prim_store::{
@@ -17,12 +17,8 @@ use crate::prim_store::{
     PrimitiveInstanceKind, PrimitiveOpacity,
     PrimitiveStore, InternablePrimitive,
 };
-use crate::resource_cache::ImageRequest;
-use crate::render_task::RenderTask;
-use crate::render_task_graph::RenderTaskId;
-use crate::render_backend::FrameId;
-
-use super::storage;
+use crate::resource_cache::{ImageRequest, ResourceCache};
+use crate::storage;
 
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
@@ -149,7 +145,6 @@ impl intern::Internable for NormalBorderPrim {
     type Key = NormalBorderKey;
     type StoreData = NormalBorderTemplate;
     type InternData = ();
-    const PROFILE_COUNTER: usize = crate::profiler::INTERNED_NORMAL_BORDERS;
 }
 
 impl InternablePrimitive for NormalBorderPrim {
@@ -171,18 +166,13 @@ impl InternablePrimitive for NormalBorderPrim {
     ) -> PrimitiveInstanceKind {
         PrimitiveInstanceKind::NormalBorder {
             data_handle,
-            render_task_ids: storage::Range::empty(),
+            cache_handles: storage::Range::empty(),
         }
     }
 }
 
 impl CreateShadow for NormalBorderPrim {
-    fn create_shadow(
-        &self,
-        shadow: &Shadow,
-        _: bool,
-        _: RasterSpace,
-    ) -> Self {
+    fn create_shadow(&self, shadow: &Shadow) -> Self {
         let border = self.border.with_color(shadow.color.into());
         NormalBorderPrim {
             border,
@@ -232,9 +222,6 @@ pub struct ImageBorderData {
     #[ignore_malloc_size_of = "Arc"]
     pub request: ImageRequest,
     pub brush_segments: Vec<BrushSegment>,
-    pub src_color: Option<RenderTaskId>,
-    pub frame_id: FrameId,
-    pub is_opaque: bool,
 }
 
 impl ImageBorderData {
@@ -252,31 +239,28 @@ impl ImageBorderData {
             self.write_segment_gpu_blocks(request);
         }
 
-        let frame_id = frame_state.rg_builder.frame_id();
-        if self.frame_id != frame_id {
-            self.frame_id = frame_id;
-
-            let size = frame_state.resource_cache.request_image(
-                self.request,
-                frame_state.gpu_cache,
-            );
-
-            let task_id = frame_state.rg_builder.add().init(
-                RenderTask::new_image(size, self.request)
-            );
-
-            self.src_color = Some(task_id);
-
-            let image_properties = frame_state
-                .resource_cache
-                .get_image_properties(self.request.key);
+        let image_properties = frame_state
+            .resource_cache
+            .get_image_properties(self.request.key);
 
-            self.is_opaque = image_properties
-                .map(|properties| properties.descriptor.is_opaque())
-                .unwrap_or(true);
+        common.opacity = if let Some(image_properties) = image_properties {
+            PrimitiveOpacity {
+                is_opaque: image_properties.descriptor.is_opaque(),
+            }
+        } else {
+            PrimitiveOpacity::opaque()
         }
+    }
 
-        common.opacity = PrimitiveOpacity { is_opaque: self.is_opaque };
+    pub fn request_resources(
+        &mut self,
+        resource_cache: &mut ResourceCache,
+        gpu_cache: &mut GpuCache,
+    ) {
+        resource_cache.request_image(
+            self.request,
+            gpu_cache,
+        );
     }
 
     fn write_prim_gpu_blocks(
@@ -323,9 +307,6 @@ impl From<ImageBorderKey> for ImageBorderTemplate {
             kind: ImageBorderData {
                 request: key.kind.request,
                 brush_segments,
-                src_color: None,
-                frame_id: FrameId::INVALID,
-                is_opaque: false,
             }
         }
     }
@@ -337,7 +318,6 @@ impl intern::Internable for ImageBorder {
     type Key = ImageBorderKey;
     type StoreData = ImageBorderTemplate;
     type InternData = ();
-    const PROFILE_COUNTER: usize = crate::profiler::INTERNED_IMAGE_BORDERS;
 }
 
 impl InternablePrimitive for ImageBorder {
@@ -383,6 +363,6 @@ fn test_struct_sizes() {
     assert_eq!(mem::size_of::<NormalBorderTemplate>(), 216, "NormalBorderTemplate size changed");
     assert_eq!(mem::size_of::<NormalBorderKey>(), 104, "NormalBorderKey size changed");
     assert_eq!(mem::size_of::<ImageBorder>(), 84, "ImageBorder size changed");
-    assert_eq!(mem::size_of::<ImageBorderTemplate>(), 96, "ImageBorderTemplate size changed");
+    assert_eq!(mem::size_of::<ImageBorderTemplate>(), 80, "ImageBorderTemplate size changed");
     assert_eq!(mem::size_of::<ImageBorderKey>(), 104, "ImageBorderKey size changed");
 }
diff --git a/third_party/webrender/webrender/src/prim_store/gradient.rs b/third_party/webrender/webrender/src/prim_store/gradient.rs
new file mode 100644
index 00000000000..add65ad8cd8
--- /dev/null
+++ b/third_party/webrender/webrender/src/prim_store/gradient.rs
@@ -0,0 +1,1007 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use api::{
+    ColorF, ColorU, ExtendMode, GradientStop,
+    PremultipliedColorF, LineOrientation,
+};
+use api::units::{LayoutPoint, LayoutSize, LayoutVector2D};
+use crate::scene_building::IsVisible;
+use euclid::approxeq::ApproxEq;
+use crate::frame_builder::FrameBuildingState;
+use crate::gpu_cache::{GpuCacheHandle, GpuDataRequest};
+use crate::intern::{Internable, InternDebug, Handle as InternHandle};
+use crate::internal_types::LayoutPrimitiveInfo;
+use crate::prim_store::{BrushSegment, CachedGradientSegment, GradientTileRange, VectorKey};
+use crate::prim_store::{PrimitiveInstanceKind, PrimitiveOpacity};
+use crate::prim_store::{PrimKeyCommonData, PrimTemplateCommonData, PrimitiveStore};
+use crate::prim_store::{NinePatchDescriptor, PointKey, SizeKey, InternablePrimitive};
+use std::{hash, ops::{Deref, DerefMut}};
+use crate::util::pack_as_float;
+use crate::texture_cache::TEXTURE_REGION_DIMENSIONS;
+
+/// The maximum number of stops a gradient may have to use the fast path.
+pub const GRADIENT_FP_STOPS: usize = 4;
+
+/// A hashable gradient stop that can be used in primitive keys.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Copy, Clone, MallocSizeOf, PartialEq)]
+pub struct GradientStopKey {
+    pub offset: f32,
+    pub color: ColorU,
+}
+
+impl GradientStopKey {
+    pub fn empty() -> Self {
+        GradientStopKey {
+            offset: 0.0,
+            color: ColorU::new(0, 0, 0, 0),
+        }
+    }
+}
+
+impl Into<GradientStopKey> for GradientStop {
+    fn into(self) -> GradientStopKey {
+        GradientStopKey {
+            offset: self.offset,
+            color: self.color.into(),
+        }
+    }
+}
+
+// Convert `stop_keys` into a vector of `GradientStop`s, which is a more
+// convenient representation for the current gradient builder. Compute the
+// minimum stop alpha along the way.
+fn stops_and_min_alpha(stop_keys: &[GradientStopKey]) -> (Vec<GradientStop>, f32) {
+    let mut min_alpha: f32 = 1.0;
+    let stops = stop_keys.iter().map(|stop_key| {
+        let color: ColorF = stop_key.color.into();
+        min_alpha = min_alpha.min(color.a);
+
+        GradientStop {
+            offset: stop_key.offset,
+            color,
+        }
+    }).collect();
+
+    (stops, min_alpha)
+}
+
+impl Eq for GradientStopKey {}
+
+impl hash::Hash for GradientStopKey {
+    fn hash<H: hash::Hasher>(&self, state: &mut H) {
+        self.offset.to_bits().hash(state);
+        self.color.hash(state);
+    }
+}
+
+/// Identifying key for a linear gradient.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Clone, Eq, PartialEq, Hash, MallocSizeOf)]
+pub struct LinearGradientKey {
+    pub common: PrimKeyCommonData,
+    pub extend_mode: ExtendMode,
+    pub start_point: PointKey,
+    pub end_point: PointKey,
+    pub stretch_size: SizeKey,
+    pub tile_spacing: SizeKey,
+    pub stops: Vec<GradientStopKey>,
+    pub reverse_stops: bool,
+    pub nine_patch: Option<Box<NinePatchDescriptor>>,
+}
+
+impl LinearGradientKey {
+    pub fn new(
+        info: &LayoutPrimitiveInfo,
+        linear_grad: LinearGradient,
+    ) -> Self {
+        LinearGradientKey {
+            common: info.into(),
+            extend_mode: linear_grad.extend_mode,
+            start_point: linear_grad.start_point,
+            end_point: linear_grad.end_point,
+            stretch_size: linear_grad.stretch_size,
+            tile_spacing: linear_grad.tile_spacing,
+            stops: linear_grad.stops,
+            reverse_stops: linear_grad.reverse_stops,
+            nine_patch: linear_grad.nine_patch,
+        }
+    }
+}
+
+impl InternDebug for LinearGradientKey {}
+
+#[derive(Clone, Debug, Hash, MallocSizeOf, PartialEq, Eq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct GradientCacheKey {
+    pub orientation: LineOrientation,
+    pub start_stop_point: VectorKey,
+    pub stops: [GradientStopKey; GRADIENT_FP_STOPS],
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+pub struct LinearGradientTemplate {
+    pub common: PrimTemplateCommonData,
+    pub extend_mode: ExtendMode,
+    pub start_point: LayoutPoint,
+    pub end_point: LayoutPoint,
+    pub stretch_size: LayoutSize,
+    pub tile_spacing: LayoutSize,
+    pub stops_opacity: PrimitiveOpacity,
+    pub stops: Vec<GradientStop>,
+    pub brush_segments: Vec<BrushSegment>,
+    pub reverse_stops: bool,
+    pub stops_handle: GpuCacheHandle,
+    /// If true, this gradient can be drawn via the fast path
+    /// (cache gradient, and draw as image).
+    pub supports_caching: bool,
+}
+
+impl Deref for LinearGradientTemplate {
+    type Target = PrimTemplateCommonData;
+    fn deref(&self) -> &Self::Target {
+        &self.common
+    }
+}
+
+impl DerefMut for LinearGradientTemplate {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.common
+    }
+}
+
+impl From<LinearGradientKey> for LinearGradientTemplate {
+    fn from(item: LinearGradientKey) -> Self {
+        let common = PrimTemplateCommonData::with_key_common(item.common);
+
+        // Check if we can draw this gradient via a fast path by caching the
+        // gradient in a smaller task, and drawing as an image.
+        // TODO(gw): Aim to reduce the constraints on fast path gradients in future,
+        //           although this catches the vast majority of gradients on real pages.
+        let mut supports_caching =
+            // Gradient must cover entire primitive
+            item.tile_spacing.w + item.stretch_size.w >= common.prim_rect.size.width &&
+            item.tile_spacing.h + item.stretch_size.h >= common.prim_rect.size.height &&
+            // Must be a vertical or horizontal gradient
+            (item.start_point.x.approx_eq(&item.end_point.x) ||
+             item.start_point.y.approx_eq(&item.end_point.y)) &&
+            // Fast path not supported on segmented (border-image) gradients.
+            item.nine_patch.is_none();
+
+        // if we support caching and the gradient uses repeat, we might potentially
+        // emit a lot of quads to cover the primitive. each quad will still cover
+        // the entire gradient along the other axis, so the effect is linear in
+        // display resolution, not quadratic (unlike say a tiny background image
+        // tiling the display). in addition, excessive minification may lead to
+        // texture trashing. so use the minification as a proxy heuristic for both
+        // cases.
+        //
+        // note that the actual number of quads may be further increased due to
+        // hard-stops and/or more than GRADIENT_FP_STOPS stops per gradient.
+        if supports_caching && item.extend_mode == ExtendMode::Repeat {
+            let single_repeat_size =
+                if item.start_point.x.approx_eq(&item.end_point.x) {
+                    item.end_point.y - item.start_point.y
+                } else {
+                    item.end_point.x - item.start_point.x
+                };
+            let downscaling = single_repeat_size as f32 / TEXTURE_REGION_DIMENSIONS as f32;
+            if downscaling < 0.1 {
+                // if a single copy of the gradient is this small relative to its baked
+                // gradient cache, we have bad texture caching and/or too many quads.
+                supports_caching = false;
+            }
+        }
+
+        let (stops, min_alpha) = stops_and_min_alpha(&item.stops);
+
+        let mut brush_segments = Vec::new();
+
+        if let Some(ref nine_patch) = item.nine_patch {
+            brush_segments = nine_patch.create_segments(common.prim_rect.size);
+        }
+
+        // Save opacity of the stops for use in
+        // selecting which pass this gradient
+        // should be drawn in.
+        let stops_opacity = PrimitiveOpacity::from_alpha(min_alpha);
+
+        LinearGradientTemplate {
+            common,
+            extend_mode: item.extend_mode,
+            start_point: item.start_point.into(),
+            end_point: item.end_point.into(),
+            stretch_size: item.stretch_size.into(),
+            tile_spacing: item.tile_spacing.into(),
+            stops_opacity,
+            stops,
+            brush_segments,
+            reverse_stops: item.reverse_stops,
+            stops_handle: GpuCacheHandle::new(),
+            supports_caching,
+        }
+    }
+}
+
+impl LinearGradientTemplate {
+    /// Update the GPU cache for a given primitive template. This may be called multiple
+    /// times per frame, by each primitive reference that refers to this interned
+    /// template. The initial request call to the GPU cache ensures that work is only
+    /// done if the cache entry is invalid (due to first use or eviction).
+    pub fn update(
+        &mut self,
+        frame_state: &mut FrameBuildingState,
+    ) {
+        if let Some(mut request) =
+            frame_state.gpu_cache.request(&mut self.common.gpu_cache_handle) {
+            // write_prim_gpu_blocks
+            request.push([
+                self.start_point.x,
+                self.start_point.y,
+                self.end_point.x,
+                self.end_point.y,
+            ]);
+            request.push([
+                pack_as_float(self.extend_mode as u32),
+                self.stretch_size.width,
+                self.stretch_size.height,
+                0.0,
+            ]);
+
+            // write_segment_gpu_blocks
+            for segment in &self.brush_segments {
+                // has to match VECS_PER_SEGMENT
+                request.write_segment(
+                    segment.local_rect,
+                    segment.extra_data,
+                );
+            }
+        }
+
+        if let Some(mut request) = frame_state.gpu_cache.request(&mut self.stops_handle) {
+            GradientGpuBlockBuilder::build(
+                self.reverse_stops,
+                &mut request,
+                &self.stops,
+            );
+        }
+
+        self.opacity = {
+            // If the coverage of the gradient extends to or beyond
+            // the primitive rect, then the opacity can be determined
+            // by the colors of the stops. If we have tiling / spacing
+            // then we just assume the gradient is translucent for now.
+            // (In the future we could consider segmenting in some cases).
+            let stride = self.stretch_size + self.tile_spacing;
+            if stride.width >= self.common.prim_rect.size.width &&
+               stride.height >= self.common.prim_rect.size.height {
+                self.stops_opacity
+            } else {
+               PrimitiveOpacity::translucent()
+            }
+        }
+    }
+}
+
+pub type LinearGradientDataHandle = InternHandle<LinearGradient>;
+
+#[derive(Debug, MallocSizeOf)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct LinearGradient {
+    pub extend_mode: ExtendMode,
+    pub start_point: PointKey,
+    pub end_point: PointKey,
+    pub stretch_size: SizeKey,
+    pub tile_spacing: SizeKey,
+    pub stops: Vec<GradientStopKey>,
+    pub reverse_stops: bool,
+    pub nine_patch: Option<Box<NinePatchDescriptor>>,
+}
+
+impl Internable for LinearGradient {
+    type Key = LinearGradientKey;
+    type StoreData = LinearGradientTemplate;
+    type InternData = ();
+}
+
+impl InternablePrimitive for LinearGradient {
+    fn into_key(
+        self,
+        info: &LayoutPrimitiveInfo,
+    ) -> LinearGradientKey {
+        LinearGradientKey::new(info, self)
+    }
+
+    fn make_instance_kind(
+        _key: LinearGradientKey,
+        data_handle: LinearGradientDataHandle,
+        prim_store: &mut PrimitiveStore,
+        _reference_frame_relative_offset: LayoutVector2D,
+    ) -> PrimitiveInstanceKind {
+        let gradient_index = prim_store.linear_gradients.push(LinearGradientPrimitive {
+            cache_segments: Vec::new(),
+            visible_tiles_range: GradientTileRange::empty(),
+        });
+
+        PrimitiveInstanceKind::LinearGradient {
+            data_handle,
+            gradient_index,
+        }
+    }
+}
+
+impl IsVisible for LinearGradient {
+    fn is_visible(&self) -> bool {
+        true
+    }
+}
+
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub struct LinearGradientPrimitive {
+    pub cache_segments: Vec<CachedGradientSegment>,
+    pub visible_tiles_range: GradientTileRange,
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// Hashable radial gradient parameters, for use during prim interning.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Clone, MallocSizeOf, PartialEq)]
+pub struct RadialGradientParams {
+    pub start_radius: f32,
+    pub end_radius: f32,
+    pub ratio_xy: f32,
+}
+
+impl Eq for RadialGradientParams {}
+
+impl hash::Hash for RadialGradientParams {
+    fn hash<H: hash::Hasher>(&self, state: &mut H) {
+        self.start_radius.to_bits().hash(state);
+        self.end_radius.to_bits().hash(state);
+        self.ratio_xy.to_bits().hash(state);
+    }
+}
+
+/// Identifying key for a radial gradient.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Clone, Eq, PartialEq, Hash, MallocSizeOf)]
+pub struct RadialGradientKey {
+    pub common: PrimKeyCommonData,
+    pub extend_mode: ExtendMode,
+    pub center: PointKey,
+    pub params: RadialGradientParams,
+    pub stretch_size: SizeKey,
+    pub stops: Vec<GradientStopKey>,
+    pub tile_spacing: SizeKey,
+    pub nine_patch: Option<Box<NinePatchDescriptor>>,
+}
+
+impl RadialGradientKey {
+    pub fn new(
+        info: &LayoutPrimitiveInfo,
+        radial_grad: RadialGradient,
+    ) -> Self {
+        RadialGradientKey {
+            common: info.into(),
+            extend_mode: radial_grad.extend_mode,
+            center: radial_grad.center,
+            params: radial_grad.params,
+            stretch_size: radial_grad.stretch_size,
+            stops: radial_grad.stops,
+            tile_spacing: radial_grad.tile_spacing,
+            nine_patch: radial_grad.nine_patch,
+        }
+    }
+}
+
+impl InternDebug for RadialGradientKey {}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+pub struct RadialGradientTemplate {
+    pub common: PrimTemplateCommonData,
+    pub extend_mode: ExtendMode,
+    pub center: LayoutPoint,
+    pub params: RadialGradientParams,
+    pub stretch_size: LayoutSize,
+    pub tile_spacing: LayoutSize,
+    pub brush_segments: Vec<BrushSegment>,
+    pub stops_opacity: PrimitiveOpacity,
+    pub stops: Vec<GradientStop>,
+    pub stops_handle: GpuCacheHandle,
+}
+
+impl Deref for RadialGradientTemplate {
+    type Target = PrimTemplateCommonData;
+    fn deref(&self) -> &Self::Target {
+        &self.common
+    }
+}
+
+impl DerefMut for RadialGradientTemplate {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.common
+    }
+}
+
+impl From<RadialGradientKey> for RadialGradientTemplate {
+    fn from(item: RadialGradientKey) -> Self {
+        let common = PrimTemplateCommonData::with_key_common(item.common);
+        let mut brush_segments = Vec::new();
+
+        if let Some(ref nine_patch) = item.nine_patch {
+            brush_segments = nine_patch.create_segments(common.prim_rect.size);
+        }
+
+        let (stops, min_alpha) = stops_and_min_alpha(&item.stops);
+
+        // Save opacity of the stops for use in
+        // selecting which pass this gradient
+        // should be drawn in.
+        let stops_opacity = PrimitiveOpacity::from_alpha(min_alpha);
+
+        RadialGradientTemplate {
+            common,
+            center: item.center.into(),
+            extend_mode: item.extend_mode,
+            params: item.params,
+            stretch_size: item.stretch_size.into(),
+            tile_spacing: item.tile_spacing.into(),
+            brush_segments,
+            stops_opacity,
+            stops,
+            stops_handle: GpuCacheHandle::new(),
+        }
+    }
+}
+
+impl RadialGradientTemplate {
+    /// Update the GPU cache for a given primitive template. This may be called multiple
+    /// times per frame, by each primitive reference that refers to this interned
+    /// template. The initial request call to the GPU cache ensures that work is only
+    /// done if the cache entry is invalid (due to first use or eviction).
+    pub fn update(
+        &mut self,
+        frame_state: &mut FrameBuildingState,
+    ) {
+        if let Some(mut request) =
+            frame_state.gpu_cache.request(&mut self.common.gpu_cache_handle) {
+            // write_prim_gpu_blocks
+            request.push([
+                self.center.x,
+                self.center.y,
+                self.params.start_radius,
+                self.params.end_radius,
+            ]);
+            request.push([
+                self.params.ratio_xy,
+                pack_as_float(self.extend_mode as u32),
+                self.stretch_size.width,
+                self.stretch_size.height,
+            ]);
+
+            // write_segment_gpu_blocks
+            for segment in &self.brush_segments {
+                // has to match VECS_PER_SEGMENT
+                request.write_segment(
+                    segment.local_rect,
+                    segment.extra_data,
+                );
+            }
+        }
+
+        if let Some(mut request) = frame_state.gpu_cache.request(&mut self.stops_handle) {
+            GradientGpuBlockBuilder::build(
+                false,
+                &mut request,
+                &self.stops,
+            );
+        }
+
+        self.opacity = PrimitiveOpacity::translucent();
+    }
+}
+
+pub type RadialGradientDataHandle = InternHandle<RadialGradient>;
+
+#[derive(Debug, MallocSizeOf)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct RadialGradient {
+    pub extend_mode: ExtendMode,
+    pub center: PointKey,
+    pub params: RadialGradientParams,
+    pub stretch_size: SizeKey,
+    pub stops: Vec<GradientStopKey>,
+    pub tile_spacing: SizeKey,
+    pub nine_patch: Option<Box<NinePatchDescriptor>>,
+}
+
+impl Internable for RadialGradient {
+    type Key = RadialGradientKey;
+    type StoreData = RadialGradientTemplate;
+    type InternData = ();
+}
+
+impl InternablePrimitive for RadialGradient {
+    fn into_key(
+        self,
+        info: &LayoutPrimitiveInfo,
+    ) -> RadialGradientKey {
+        RadialGradientKey::new(info, self)
+    }
+
+    fn make_instance_kind(
+        _key: RadialGradientKey,
+        data_handle: RadialGradientDataHandle,
+        _prim_store: &mut PrimitiveStore,
+        _reference_frame_relative_offset: LayoutVector2D,
+    ) -> PrimitiveInstanceKind {
+        PrimitiveInstanceKind::RadialGradient {
+            data_handle,
+            visible_tiles_range: GradientTileRange::empty(),
+        }
+    }
+}
+
+impl IsVisible for RadialGradient {
+    fn is_visible(&self) -> bool {
+        true
+    }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+/// Conic gradients
+
+/// Hashable conic gradient parameters, for use during prim interning.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Clone, MallocSizeOf, PartialEq)]
+pub struct ConicGradientParams {
+    pub angle: f32, // in radians
+    pub start_offset: f32,
+    pub end_offset: f32,
+}
+
+impl Eq for ConicGradientParams {}
+
+impl hash::Hash for ConicGradientParams {
+    fn hash<H: hash::Hasher>(&self, state: &mut H) {
+        self.angle.to_bits().hash(state);
+        self.start_offset.to_bits().hash(state);
+        self.end_offset.to_bits().hash(state);
+    }
+}
+
+/// Identifying key for a line decoration.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, Clone, Eq, PartialEq, Hash, MallocSizeOf)]
+pub struct ConicGradientKey {
+    pub common: PrimKeyCommonData,
+    pub extend_mode: ExtendMode,
+    pub center: PointKey,
+    pub params: ConicGradientParams,
+    pub stretch_size: SizeKey,
+    pub stops: Vec<GradientStopKey>,
+    pub tile_spacing: SizeKey,
+    pub nine_patch: Option<Box<NinePatchDescriptor>>,
+}
+
+impl ConicGradientKey {
+    pub fn new(
+        info: &LayoutPrimitiveInfo,
+        conic_grad: ConicGradient,
+    ) -> Self {
+        ConicGradientKey {
+            common: info.into(),
+            extend_mode: conic_grad.extend_mode,
+            center: conic_grad.center,
+            params: conic_grad.params,
+            stretch_size: conic_grad.stretch_size,
+            stops: conic_grad.stops,
+            tile_spacing: conic_grad.tile_spacing,
+            nine_patch: conic_grad.nine_patch,
+        }
+    }
+}
+
+impl InternDebug for ConicGradientKey {}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(MallocSizeOf)]
+pub struct ConicGradientTemplate {
+    pub common: PrimTemplateCommonData,
+    pub extend_mode: ExtendMode,
+    pub center: LayoutPoint,
+    pub params: ConicGradientParams,
+    pub stretch_size: LayoutSize,
+    pub tile_spacing: LayoutSize,
+    pub brush_segments: Vec<BrushSegment>,
+    pub stops_opacity: PrimitiveOpacity,
+    pub stops: Vec<GradientStop>,
+    pub stops_handle: GpuCacheHandle,
+}
+
+impl Deref for ConicGradientTemplate {
+    type Target = PrimTemplateCommonData;
+    fn deref(&self) -> &Self::Target {
+        &self.common
+    }
+}
+
+impl DerefMut for ConicGradientTemplate {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.common
+    }
+}
+
+impl From<ConicGradientKey> for ConicGradientTemplate {
+    fn from(item: ConicGradientKey) -> Self {
+        let common = PrimTemplateCommonData::with_key_common(item.common);
+        let mut brush_segments = Vec::new();
+
+        if let Some(ref nine_patch) = item.nine_patch {
+            brush_segments = nine_patch.create_segments(common.prim_rect.size);
+        }
+
+        let (stops, min_alpha) = stops_and_min_alpha(&item.stops);
+
+        // Save opacity of the stops for use in
+        // selecting which pass this gradient
+        // should be drawn in.
+        let stops_opacity = PrimitiveOpacity::from_alpha(min_alpha);
+
+        ConicGradientTemplate {
+            common,
+            center: item.center.into(),
+            extend_mode: item.extend_mode,
+            params: item.params,
+            stretch_size: item.stretch_size.into(),
+            tile_spacing: item.tile_spacing.into(),
+            brush_segments,
+            stops_opacity,
+            stops,
+            stops_handle: GpuCacheHandle::new(),
+        }
+    }
+}
+
+impl ConicGradientTemplate {
+    /// Update the GPU cache for a given primitive template. This may be called multiple
+    /// times per frame, by each primitive reference that refers to this interned
+    /// template. The initial request call to the GPU cache ensures that work is only
+    /// done if the cache entry is invalid (due to first use or eviction).
+    pub fn update(
+        &mut self,
+        frame_state: &mut FrameBuildingState,
+    ) {
+        if let Some(mut request) =
+            frame_state.gpu_cache.request(&mut self.common.gpu_cache_handle) {
+            // write_prim_gpu_blocks
+            request.push([
+                self.center.x,
+                self.center.y,
+                self.params.start_offset,
+                self.params.end_offset,
+            ]);
+            request.push([
+                self.params.angle,
+                pack_as_float(self.extend_mode as u32),
+                self.stretch_size.width,
+                self.stretch_size.height,
+            ]);
+
+            // write_segment_gpu_blocks
+            for segment in &self.brush_segments {
+                // has to match VECS_PER_SEGMENT
+                request.write_segment(
+                    segment.local_rect,
+                    segment.extra_data,
+                );
+            }
+        }
+
+        if let Some(mut request) = frame_state.gpu_cache.request(&mut self.stops_handle) {
+            GradientGpuBlockBuilder::build(
+                false,
+                &mut request,
+                &self.stops,
+            );
+        }
+
+        self.opacity = PrimitiveOpacity::translucent();
+    }
+}
+
+pub type ConicGradientDataHandle = InternHandle<ConicGradient>;
+
+#[derive(Debug, MallocSizeOf)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ConicGradient {
+    pub extend_mode: ExtendMode,
+    pub center: PointKey,
+    pub params: ConicGradientParams,
+    pub stretch_size: SizeKey,
+    pub stops: Vec<GradientStopKey>,
+    pub tile_spacing: SizeKey,
+    pub nine_patch: Option<Box<NinePatchDescriptor>>,
+}
+
+impl Internable for ConicGradient {
+    type Key = ConicGradientKey;
+    type StoreData = ConicGradientTemplate;
+    type InternData = ();
+}
+
+impl InternablePrimitive for ConicGradient {
+    fn into_key(
+        self,
+        info: &LayoutPrimitiveInfo,
+    ) -> ConicGradientKey {
+        ConicGradientKey::new(info, self)
+    }
+
+    fn make_instance_kind(
+        _key: ConicGradientKey,
+        data_handle: ConicGradientDataHandle,
+        _prim_store: &mut PrimitiveStore,
+        _reference_frame_relative_offset: LayoutVector2D,
+    ) -> PrimitiveInstanceKind {
+        PrimitiveInstanceKind::ConicGradient {
+            data_handle,
+            visible_tiles_range: GradientTileRange::empty(),
+        }
+    }
+}
+
+impl IsVisible for ConicGradient {
+    fn is_visible(&self) -> bool {
+        true
+    }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+// The gradient entry index for the first color stop
+pub const GRADIENT_DATA_FIRST_STOP: usize = 0;
+// The gradient entry index for the last color stop
+pub const GRADIENT_DATA_LAST_STOP: usize = GRADIENT_DATA_SIZE - 1;
+
+// The start of the gradient data table
+pub const GRADIENT_DATA_TABLE_BEGIN: usize = GRADIENT_DATA_FIRST_STOP + 1;
+// The exclusive bound of the gradient data table
+pub const GRADIENT_DATA_TABLE_END: usize = GRADIENT_DATA_LAST_STOP;
+// The number of entries in the gradient data table.
+pub const GRADIENT_DATA_TABLE_SIZE: usize = 128;
+
+// The number of entries in a gradient data: GRADIENT_DATA_TABLE_SIZE + first stop entry + last stop entry
+pub const GRADIENT_DATA_SIZE: usize = GRADIENT_DATA_TABLE_SIZE + 2;
+
+/// An entry in a gradient data table representing a segment of the gradient
+/// color space.
+#[derive(Debug, Copy, Clone)]
+#[repr(C)]
+struct GradientDataEntry {
+    start_color: PremultipliedColorF,
+    end_color: PremultipliedColorF,
+}
+
+impl GradientDataEntry {
+    fn white() -> Self {
+        Self {
+            start_color: PremultipliedColorF::WHITE,
+            end_color: PremultipliedColorF::WHITE,
+        }
+    }
+}
+
+// TODO(gw): Tidy this up to be a free function / module?
+struct GradientGpuBlockBuilder {}
+
+impl GradientGpuBlockBuilder {
+    /// Generate a color ramp filling the indices in [start_idx, end_idx) and interpolating
+    /// from start_color to end_color.
+    fn fill_colors(
+        start_idx: usize,
+        end_idx: usize,
+        start_color: &PremultipliedColorF,
+        end_color: &PremultipliedColorF,
+        entries: &mut [GradientDataEntry; GRADIENT_DATA_SIZE],
+    ) {
+        // Calculate the color difference for individual steps in the ramp.
+        let inv_steps = 1.0 / (end_idx - start_idx) as f32;
+        let step_r = (end_color.r - start_color.r) * inv_steps;
+        let step_g = (end_color.g - start_color.g) * inv_steps;
+        let step_b = (end_color.b - start_color.b) * inv_steps;
+        let step_a = (end_color.a - start_color.a) * inv_steps;
+
+        let mut cur_color = *start_color;
+
+        // Walk the ramp writing start and end colors for each entry.
+        for index in start_idx .. end_idx {
+            let entry = &mut entries[index];
+            entry.start_color = cur_color;
+            cur_color.r += step_r;
+            cur_color.g += step_g;
+            cur_color.b += step_b;
+            cur_color.a += step_a;
+            entry.end_color = cur_color;
+        }
+    }
+
+    /// Compute an index into the gradient entry table based on a gradient stop offset. This
+    /// function maps offsets from [0, 1] to indices in [GRADIENT_DATA_TABLE_BEGIN, GRADIENT_DATA_TABLE_END].
+    #[inline]
+    fn get_index(offset: f32) -> usize {
+        (offset.max(0.0).min(1.0) * GRADIENT_DATA_TABLE_SIZE as f32 +
+            GRADIENT_DATA_TABLE_BEGIN as f32)
+            .round() as usize
+    }
+
+    // Build the gradient data from the supplied stops, reversing them if necessary.
+    fn build(
+        reverse_stops: bool,
+        request: &mut GpuDataRequest,
+        src_stops: &[GradientStop],
+    ) {
+        // Preconditions (should be ensured by DisplayListBuilder):
+        // * we have at least two stops
+        // * first stop has offset 0.0
+        // * last stop has offset 1.0
+        let mut src_stops = src_stops.into_iter();
+        let mut cur_color = match src_stops.next() {
+            Some(stop) => {
+                debug_assert_eq!(stop.offset, 0.0);
+                stop.color.premultiplied()
+            }
+            None => {
+                error!("Zero gradient stops found!");
+                PremultipliedColorF::BLACK
+            }
+        };
+
+        // A table of gradient entries, with two colors per entry, that specify the start and end color
+        // within the segment of the gradient space represented by that entry. To lookup a gradient result,
+        // first the entry index is calculated to determine which two colors to interpolate between, then
+        // the offset within that entry bucket is used to interpolate between the two colors in that entry.
+        // This layout preserves hard stops, as the end color for a given entry can differ from the start
+        // color for the following entry, despite them being adjacent. Colors are stored within in BGRA8
+        // format for texture upload. This table requires the gradient color stops to be normalized to the
+        // range [0, 1]. The first and last entries hold the first and last color stop colors respectively,
+        // while the entries in between hold the interpolated color stop values for the range [0, 1].
+        let mut entries = [GradientDataEntry::white(); GRADIENT_DATA_SIZE];
+
+        if reverse_stops {
+            // Fill in the first entry (for reversed stops) with the first color stop
+            GradientGpuBlockBuilder::fill_colors(
+                GRADIENT_DATA_LAST_STOP,
+                GRADIENT_DATA_LAST_STOP + 1,
+                &cur_color,
+                &cur_color,
+                &mut entries,
+            );
+
+            // Fill in the center of the gradient table, generating a color ramp between each consecutive pair
+            // of gradient stops. Each iteration of a loop will fill the indices in [next_idx, cur_idx). The
+            // loop will then fill indices in [GRADIENT_DATA_TABLE_BEGIN, GRADIENT_DATA_TABLE_END).
+            let mut cur_idx = GRADIENT_DATA_TABLE_END;
+            for next in src_stops {
+                let next_color = next.color.premultiplied();
+                let next_idx = Self::get_index(1.0 - next.offset);
+
+                if next_idx < cur_idx {
+                    GradientGpuBlockBuilder::fill_colors(
+                        next_idx,
+                        cur_idx,
+                        &next_color,
+                        &cur_color,
+                        &mut entries,
+                    );
+                    cur_idx = next_idx;
+                }
+
+                cur_color = next_color;
+            }
+            if cur_idx != GRADIENT_DATA_TABLE_BEGIN {
+                error!("Gradient stops abruptly at {}, auto-completing to white", cur_idx);
+            }
+
+            // Fill in the last entry (for reversed stops) with the last color stop
+            GradientGpuBlockBuilder::fill_colors(
+                GRADIENT_DATA_FIRST_STOP,
+                GRADIENT_DATA_FIRST_STOP + 1,
+                &cur_color,
+                &cur_color,
+                &mut entries,
+            );
+        } else {
+            // Fill in the first entry with the first color stop
+            GradientGpuBlockBuilder::fill_colors(
+                GRADIENT_DATA_FIRST_STOP,
+                GRADIENT_DATA_FIRST_STOP + 1,
+                &cur_color,
+                &cur_color,
+                &mut entries,
+            );
+
+            // Fill in the center of the gradient table, generating a color ramp between each consecutive pair
+            // of gradient stops. Each iteration of a loop will fill the indices in [cur_idx, next_idx). The
+            // loop will then fill indices in [GRADIENT_DATA_TABLE_BEGIN, GRADIENT_DATA_TABLE_END).
+            let mut cur_idx = GRADIENT_DATA_TABLE_BEGIN;
+            for next in src_stops {
+                let next_color = next.color.premultiplied();
+                let next_idx = Self::get_index(next.offset);
+
+                if next_idx > cur_idx {
+                    GradientGpuBlockBuilder::fill_colors(
+                        cur_idx,
+                        next_idx,
+                        &cur_color,
+                        &next_color,
+                        &mut entries,
+                    );
+                    cur_idx = next_idx;
+                }
+
+                cur_color = next_color;
+            }
+            if cur_idx != GRADIENT_DATA_TABLE_END {
+                error!("Gradient stops abruptly at {}, auto-completing to white", cur_idx);
+            }
+
+            // Fill in the last entry with the last color stop
+            GradientGpuBlockBuilder::fill_colors(
+                GRADIENT_DATA_LAST_STOP,
+                GRADIENT_DATA_LAST_STOP + 1,
+                &cur_color,
+                &cur_color,
+                &mut entries,
+            );
+        }
+
+        for entry in entries.iter() {
+            request.push(entry.start_color);
+            request.push(entry.end_color);
+        }
+    }
+}
+
+#[test]
+#[cfg(target_pointer_width = "64")]
+fn test_struct_sizes() {
+    use std::mem;
+    // The sizes of these structures are critical for performance on a number of
+    // talos stress tests. If you get a failure here on CI, there's two possibilities:
+    // (a) You made a structure smaller than it currently is. Great work! Update the
+    //     test expectations and move on.
+    // (b) You made a structure larger. This is not necessarily a problem, but should only
+    //     be done with care, and after checking if talos performance regresses badly.
+    assert_eq!(mem::size_of::<LinearGradient>(), 72, "LinearGradient size changed");
+    assert_eq!(mem::size_of::<LinearGradientTemplate>(), 120, "LinearGradientTemplate size changed");
+    assert_eq!(mem::size_of::<LinearGradientKey>(), 88, "LinearGradientKey size changed");
+
+    assert_eq!(mem::size_of::<RadialGradient>(), 72, "RadialGradient size changed");
+    assert_eq!(mem::size_of::<RadialGradientTemplate>(), 128, "RadialGradientTemplate size changed");
+    assert_eq!(mem::size_of::<RadialGradientKey>(), 96, "RadialGradientKey size changed");
+
+    assert_eq!(mem::size_of::<ConicGradient>(), 72, "ConicGradient size changed");
+    assert_eq!(mem::size_of::<ConicGradientTemplate>(), 128, "ConicGradientTemplate size changed");
+    assert_eq!(mem::size_of::<ConicGradientKey>(), 96, "ConicGradientKey size changed");
+}
diff --git a/third_party/webrender/webrender/src/prim_store/gradient/conic.rs b/third_party/webrender/webrender/src/prim_store/gradient/conic.rs
deleted file mode 100644
index 34bcb976993..00000000000
--- a/third_party/webrender/webrender/src/prim_store/gradient/conic.rs
+++ /dev/null
@@ -1,363 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-//! Conic gradients
-//!
-//! Specification: https://drafts.csswg.org/css-images-4/#conic-gradients
-//!
-//! Conic gradients are rendered via cached render tasks and composited with the image brush.
-
-use euclid::vec2;
-use api::{ExtendMode, GradientStop, PremultipliedColorF};
-use api::units::*;
-use crate::scene_building::IsVisible;
-use crate::frame_builder::FrameBuildingState;
-use crate::gpu_cache::{GpuCache, GpuCacheHandle};
-use crate::intern::{Internable, InternDebug, Handle as InternHandle};
-use crate::internal_types::LayoutPrimitiveInfo;
-use crate::prim_store::{BrushSegment, GradientTileRange};
-use crate::prim_store::{PrimitiveInstanceKind, PrimitiveOpacity, FloatKey};
-use crate::prim_store::{PrimKeyCommonData, PrimTemplateCommonData, PrimitiveStore};
-use crate::prim_store::{NinePatchDescriptor, PointKey, SizeKey, InternablePrimitive};
-use crate::render_task::{RenderTask, RenderTaskKind};
-use crate::render_task_graph::RenderTaskId;
-use crate::render_task_cache::{RenderTaskCacheKeyKind, RenderTaskCacheKey, RenderTaskParent};
-use crate::picture::{SurfaceIndex};
-
-use std::{hash, ops::{Deref, DerefMut}};
-use super::{stops_and_min_alpha, GradientStopKey, GradientGpuBlockBuilder};
-
-/// Hashable conic gradient parameters, for use during prim interning.
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-#[derive(Debug, Clone, MallocSizeOf, PartialEq)]
-pub struct ConicGradientParams {
-    pub angle: f32, // in radians
-    pub start_offset: f32,
-    pub end_offset: f32,
-}
-
-impl Eq for ConicGradientParams {}
-
-impl hash::Hash for ConicGradientParams {
-    fn hash<H: hash::Hasher>(&self, state: &mut H) {
-        self.angle.to_bits().hash(state);
-        self.start_offset.to_bits().hash(state);
-        self.end_offset.to_bits().hash(state);
-    }
-}
-
-/// Identifying key for a line decoration.
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-#[derive(Debug, Clone, Eq, PartialEq, Hash, MallocSizeOf)]
-pub struct ConicGradientKey {
-    pub common: PrimKeyCommonData,
-    pub extend_mode: ExtendMode,
-    pub center: PointKey,
-    pub params: ConicGradientParams,
-    pub stretch_size: SizeKey,
-    pub stops: Vec<GradientStopKey>,
-    pub tile_spacing: SizeKey,
-    pub nine_patch: Option<Box<NinePatchDescriptor>>,
-}
-
-impl ConicGradientKey {
-    pub fn new(
-        info: &LayoutPrimitiveInfo,
-        conic_grad: ConicGradient,
-    ) -> Self {
-        ConicGradientKey {
-            common: info.into(),
-            extend_mode: conic_grad.extend_mode,
-            center: conic_grad.center,
-            params: conic_grad.params,
-            stretch_size: conic_grad.stretch_size,
-            stops: conic_grad.stops,
-            tile_spacing: conic_grad.tile_spacing,
-            nine_patch: conic_grad.nine_patch,
-        }
-    }
-}
-
-impl InternDebug for ConicGradientKey {}
-
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-#[derive(MallocSizeOf)]
-pub struct ConicGradientTemplate {
-    pub common: PrimTemplateCommonData,
-    pub extend_mode: ExtendMode,
-    pub center: DevicePoint,
-    pub params: ConicGradientParams,
-    pub task_size: DeviceIntSize,
-    pub scale: DeviceVector2D,
-    pub stretch_size: LayoutSize,
-    pub tile_spacing: LayoutSize,
-    pub brush_segments: Vec<BrushSegment>,
-    pub stops_opacity: PrimitiveOpacity,
-    pub stops: Vec<GradientStop>,
-    pub stops_handle: GpuCacheHandle,
-    pub src_color: Option<RenderTaskId>,
-}
-
-impl Deref for ConicGradientTemplate {
-    type Target = PrimTemplateCommonData;
-    fn deref(&self) -> &Self::Target {
-        &self.common
-    }
-}
-
-impl DerefMut for ConicGradientTemplate {
-    fn deref_mut(&mut self) -> &mut Self::Target {
-        &mut self.common
-    }
-}
-
-impl From<ConicGradientKey> for ConicGradientTemplate {
-    fn from(item: ConicGradientKey) -> Self {
-        let common = PrimTemplateCommonData::with_key_common(item.common);
-        let mut brush_segments = Vec::new();
-
-        if let Some(ref nine_patch) = item.nine_patch {
-            brush_segments = nine_patch.create_segments(common.prim_rect.size);
-        }
-
-        let (stops, min_alpha) = stops_and_min_alpha(&item.stops);
-
-        // Save opacity of the stops for use in
-        // selecting which pass this gradient
-        // should be drawn in.
-        let stops_opacity = PrimitiveOpacity::from_alpha(min_alpha);
-
-        let mut stretch_size: LayoutSize = item.stretch_size.into();
-        stretch_size.width = stretch_size.width.min(common.prim_rect.size.width);
-        stretch_size.height = stretch_size.height.min(common.prim_rect.size.height);
-
-        // Avoid rendering enormous gradients. Radial gradients are mostly made of soft transitions,
-        // so it is unlikely that rendering at a higher resolution that 1024 would produce noticeable
-        // differences, especially with 8 bits per channel.
-        const MAX_SIZE: f32 = 1024.0;
-        let mut task_size: DeviceSize = stretch_size.cast_unit();
-        let mut scale = vec2(1.0, 1.0);
-        if task_size.width > MAX_SIZE {
-            scale.x = task_size.width / MAX_SIZE;
-            task_size.width = MAX_SIZE;
-        }
-        if task_size.height > MAX_SIZE {
-            scale.y = task_size.height / MAX_SIZE;
-            task_size.height = MAX_SIZE;
-        }
-
-        ConicGradientTemplate {
-            common,
-            center: DevicePoint::new(item.center.x, item.center.y),
-            extend_mode: item.extend_mode,
-            params: item.params,
-            stretch_size,
-            task_size: task_size.ceil().to_i32(),
-            scale,
-            tile_spacing: item.tile_spacing.into(),
-            brush_segments,
-            stops_opacity,
-            stops,
-            stops_handle: GpuCacheHandle::new(),
-            src_color: None,
-        }
-    }
-}
-
-impl ConicGradientTemplate {
-    /// Update the GPU cache for a given primitive template. This may be called multiple
-    /// times per frame, by each primitive reference that refers to this interned
-    /// template. The initial request call to the GPU cache ensures that work is only
-    /// done if the cache entry is invalid (due to first use or eviction).
-    pub fn update(
-        &mut self,
-        frame_state: &mut FrameBuildingState,
-        parent_surface: SurfaceIndex,
-    ) {
-        if let Some(mut request) =
-            frame_state.gpu_cache.request(&mut self.common.gpu_cache_handle) {
-            // write_prim_gpu_blocks
-            request.push(PremultipliedColorF::WHITE);
-            request.push(PremultipliedColorF::WHITE);
-            request.push([
-                self.stretch_size.width,
-                self.stretch_size.height,
-                0.0,
-                0.0,
-            ]);
-
-            // write_segment_gpu_blocks
-            for segment in &self.brush_segments {
-                // has to match VECS_PER_SEGMENT
-                request.write_segment(
-                    segment.local_rect,
-                    segment.extra_data,
-                );
-            }
-        }
-
-        if let Some(mut request) = frame_state.gpu_cache.request(&mut self.stops_handle) {
-            GradientGpuBlockBuilder::build(
-                false,
-                &mut request,
-                &self.stops,
-            );
-        }
-
-        let cache_key = ConicGradientCacheKey {
-            size: self.task_size,
-            center: PointKey { x: self.center.x, y: self.center.y },
-            scale: PointKey { x: self.scale.x, y: self.scale.y },
-            start_offset: FloatKey(self.params.start_offset),
-            end_offset: FloatKey(self.params.end_offset),
-            angle: FloatKey(self.params.angle),
-            extend_mode: self.extend_mode,
-            stops: self.stops.iter().map(|stop| (*stop).into()).collect(),
-        };
-
-        let task_id = frame_state.resource_cache.request_render_task(
-            RenderTaskCacheKey {
-                size: self.task_size,
-                kind: RenderTaskCacheKeyKind::ConicGradient(cache_key),
-            },
-            frame_state.gpu_cache,
-            frame_state.rg_builder,
-            None,
-            false,
-            RenderTaskParent::Surface(parent_surface),
-            frame_state.surfaces,
-            |rg_builder| {
-                rg_builder.add().init(RenderTask::new_dynamic(
-                    self.task_size,
-                    RenderTaskKind::ConicGradient(ConicGradientTask {
-                        extend_mode: self.extend_mode,
-                        scale: self.scale,
-                        center: self.center,
-                        params: self.params.clone(),
-                        stops: self.stops_handle,
-                    }),
-                ))
-            }
-        );
-
-        self.src_color = Some(task_id);
-
-        // Tile spacing is always handled by decomposing into separate draw calls so the
-        // primitive opacity is equivalent to stops opacity. This might change to being
-        // set to non-opaque in the presence of tile spacing if/when tile spacing is handled
-        // in the same way as with the image primitive.
-        self.opacity = self.stops_opacity;
-    }
-}
-
-pub type ConicGradientDataHandle = InternHandle<ConicGradient>;
-
-#[derive(Debug, MallocSizeOf)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct ConicGradient {
-    pub extend_mode: ExtendMode,
-    pub center: PointKey,
-    pub params: ConicGradientParams,
-    pub stretch_size: SizeKey,
-    pub stops: Vec<GradientStopKey>,
-    pub tile_spacing: SizeKey,
-    pub nine_patch: Option<Box<NinePatchDescriptor>>,
-}
-
-impl Internable for ConicGradient {
-    type Key = ConicGradientKey;
-    type StoreData = ConicGradientTemplate;
-    type InternData = ();
-    const PROFILE_COUNTER: usize = crate::profiler::INTERNED_CONIC_GRADIENTS;
-}
-
-impl InternablePrimitive for ConicGradient {
-    fn into_key(
-        self,
-        info: &LayoutPrimitiveInfo,
-    ) -> ConicGradientKey {
-        ConicGradientKey::new(info, self)
-    }
-
-    fn make_instance_kind(
-        _key: ConicGradientKey,
-        data_handle: ConicGradientDataHandle,
-        _prim_store: &mut PrimitiveStore,
-        _reference_frame_relative_offset: LayoutVector2D,
-    ) -> PrimitiveInstanceKind {
-        PrimitiveInstanceKind::ConicGradient {
-            data_handle,
-            visible_tiles_range: GradientTileRange::empty(),
-        }
-    }
-}
-
-impl IsVisible for ConicGradient {
-    fn is_visible(&self) -> bool {
-        true
-    }
-}
-
-#[derive(Debug)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct ConicGradientTask {
-    pub extend_mode: ExtendMode,
-    pub center: DevicePoint,
-    pub scale: DeviceVector2D,
-    pub params: ConicGradientParams,
-    pub stops: GpuCacheHandle,
-}
-
-impl ConicGradientTask {
-    pub fn to_instance(&self, target_rect: &DeviceIntRect, gpu_cache: &mut GpuCache) -> ConicGradientInstance {
-        ConicGradientInstance {
-            task_rect: target_rect.to_f32(),
-            center: self.center,
-            scale: self.scale,
-            start_offset: self.params.start_offset,
-            end_offset: self.params.end_offset,
-            angle: self.params.angle,
-            extend_mode: self.extend_mode as i32,
-            gradient_stops_address: self.stops.as_int(gpu_cache),
-        }
-    }
-}
-
-/// The per-instance shader input of a radial gradient render task.
-///
-/// Must match the RADIAL_GRADIENT instance description in renderer/vertex.rs.
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-#[repr(C)]
-#[derive(Clone, Debug)]
-pub struct ConicGradientInstance {
-    pub task_rect: DeviceRect,
-    pub center: DevicePoint,
-    pub scale: DeviceVector2D,
-    pub start_offset: f32,
-    pub end_offset: f32,
-    pub angle: f32,
-    pub extend_mode: i32,
-    pub gradient_stops_address: i32,
-}
-
-#[derive(Clone, Debug, Hash, PartialEq, Eq)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct ConicGradientCacheKey {
-    pub size: DeviceIntSize,
-    pub center: PointKey,
-    pub scale: PointKey,
-    pub start_offset: FloatKey,
-    pub end_offset: FloatKey,
-    pub angle: FloatKey,
-    pub extend_mode: ExtendMode,
-    pub stops: Vec<GradientStopKey>,
-}
-
diff --git a/third_party/webrender/webrender/src/prim_store/gradient/linear.rs b/third_party/webrender/webrender/src/prim_store/gradient/linear.rs
deleted file mode 100644
index e608cb0bae1..00000000000
--- a/third_party/webrender/webrender/src/prim_store/gradient/linear.rs
+++ /dev/null
@@ -1,723 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-//! Linear gradients
-//!
-//! Specification: https://drafts.csswg.org/css-images-4/#linear-gradients
-//!
-//! Linear gradients are rendered via cached render tasks and composited with the image brush.
-
-use euclid::approxeq::ApproxEq;
-use euclid::{point2, vec2, size2};
-use api::{ExtendMode, GradientStop, LineOrientation, PremultipliedColorF, ColorF, ColorU};
-use api::units::*;
-use crate::scene_building::IsVisible;
-use crate::frame_builder::FrameBuildingState;
-use crate::gpu_cache::{GpuCache, GpuCacheHandle};
-use crate::intern::{Internable, InternDebug, Handle as InternHandle};
-use crate::internal_types::LayoutPrimitiveInfo;
-use crate::image_tiling::simplify_repeated_primitive;
-use crate::prim_store::{BrushSegment, GradientTileRange};
-use crate::prim_store::{PrimitiveInstanceKind, PrimitiveOpacity};
-use crate::prim_store::{PrimKeyCommonData, PrimTemplateCommonData, PrimitiveStore};
-use crate::prim_store::{NinePatchDescriptor, PointKey, SizeKey, InternablePrimitive};
-use crate::render_task::{RenderTask, RenderTaskKind};
-use crate::render_task_graph::RenderTaskId;
-use crate::render_task_cache::{RenderTaskCacheKeyKind, RenderTaskCacheKey, RenderTaskParent};
-use crate::picture::{SurfaceIndex};
-use crate::util::pack_as_float;
-use super::{stops_and_min_alpha, GradientStopKey, GradientGpuBlockBuilder, apply_gradient_local_clip};
-use std::ops::{Deref, DerefMut};
-use std::mem::swap;
-
-/// Identifying key for a linear gradient.
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-#[derive(Debug, Clone, Eq, PartialEq, Hash, MallocSizeOf)]
-pub struct LinearGradientKey {
-    pub common: PrimKeyCommonData,
-    pub extend_mode: ExtendMode,
-    pub start_point: PointKey,
-    pub end_point: PointKey,
-    pub stretch_size: SizeKey,
-    pub tile_spacing: SizeKey,
-    pub stops: Vec<GradientStopKey>,
-    pub reverse_stops: bool,
-    pub cached: bool,
-    pub nine_patch: Option<Box<NinePatchDescriptor>>,
-}
-
-impl LinearGradientKey {
-    pub fn new(
-        info: &LayoutPrimitiveInfo,
-        linear_grad: LinearGradient,
-    ) -> Self {
-        LinearGradientKey {
-            common: info.into(),
-            extend_mode: linear_grad.extend_mode,
-            start_point: linear_grad.start_point,
-            end_point: linear_grad.end_point,
-            stretch_size: linear_grad.stretch_size,
-            tile_spacing: linear_grad.tile_spacing,
-            stops: linear_grad.stops,
-            reverse_stops: linear_grad.reverse_stops,
-            cached: linear_grad.cached,
-            nine_patch: linear_grad.nine_patch,
-        }
-    }
-}
-
-impl InternDebug for LinearGradientKey {}
-
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-#[derive(Debug, MallocSizeOf)]
-pub struct LinearGradientTemplate {
-    pub common: PrimTemplateCommonData,
-    pub extend_mode: ExtendMode,
-    pub start_point: DevicePoint,
-    pub end_point: DevicePoint,
-    pub task_size: DeviceIntSize,
-    pub scale: DeviceVector2D,
-    pub stretch_size: LayoutSize,
-    pub tile_spacing: LayoutSize,
-    pub stops_opacity: PrimitiveOpacity,
-    pub stops: Vec<GradientStop>,
-    pub brush_segments: Vec<BrushSegment>,
-    pub reverse_stops: bool,
-    pub is_fast_path: bool,
-    pub cached: bool,
-    pub stops_handle: GpuCacheHandle,
-    pub src_color: Option<RenderTaskId>,
-}
-
-impl Deref for LinearGradientTemplate {
-    type Target = PrimTemplateCommonData;
-    fn deref(&self) -> &Self::Target {
-        &self.common
-    }
-}
-
-impl DerefMut for LinearGradientTemplate {
-    fn deref_mut(&mut self) -> &mut Self::Target {
-        &mut self.common
-    }
-}
-
-/// Perform a few optimizations to the gradient that are relevant to scene building.
-///
-/// Returns true if the gradient was decomposed into fast-path primitives, indicating
-/// that we shouldn't emit a regular gradient primitive after this returns.
-pub fn optimize_linear_gradient(
-    prim_rect: &mut LayoutRect,
-    tile_size: &mut LayoutSize,
-    mut tile_spacing: LayoutSize,
-    clip_rect: &LayoutRect,
-    start: &mut LayoutPoint,
-    end: &mut LayoutPoint,
-    extend_mode: ExtendMode,
-    stops: &mut [GradientStopKey],
-    // Callback called for each fast-path segment (rect, start end, stops).
-    callback: &mut dyn FnMut(&LayoutRect, LayoutPoint, LayoutPoint, &[GradientStopKey])
-) -> bool {
-    // First sanitize the gradient parameters. See if we can remove repetitions,
-    // tighten the primitive bounds, etc.
-
-    // The size of gradient render tasks depends on the tile_size. No need to generate
-    // large stretch sizes that will be clipped to the bounds of the primitive.
-    tile_size.width = tile_size.width.min(prim_rect.size.width);
-    tile_size.height = tile_size.height.min(prim_rect.size.height);
-
-    simplify_repeated_primitive(&tile_size, &mut tile_spacing, prim_rect);
-
-    let vertical = start.x.approx_eq(&end.x);
-    let horizontal = start.y.approx_eq(&end.y);
-
-    let mut horizontally_tiled = prim_rect.size.width > tile_size.width;
-    let mut vertically_tiled = prim_rect.size.height > tile_size.height;
-
-    // Check whether the tiling is equivalent to stretching on either axis.
-    // Stretching the gradient is more efficient than repeating it.
-    if vertically_tiled && horizontal && tile_spacing.height == 0.0 {
-        tile_size.height = prim_rect.size.height;
-        vertically_tiled = false;
-    }
-
-    if horizontally_tiled && vertical && tile_spacing.width == 0.0 {
-        tile_size.width = prim_rect.size.width;
-        horizontally_tiled = false;
-    }
-
-    let offset = apply_gradient_local_clip(
-        prim_rect,
-        &tile_size,
-        &tile_spacing,
-        &clip_rect
-    );
-
-    *start += offset;
-    *end += offset;
-
-    // Next, in the case of axis-aligned gradients, see if it is worth
-    // decomposing the gradient into multiple gradients with only two
-    // gradient stops per segment to get a faster shader.
-
-    if extend_mode != ExtendMode::Clamp || stops.is_empty() {
-        return false;
-    }
-
-    if !vertical && !horizontal {
-        return false;
-    }
-
-    if vertical && horizontal {
-        return false;
-    }
-
-    if !tile_spacing.is_empty() || vertically_tiled || horizontally_tiled {
-        return false;
-    }
-
-    // If the gradient is small, no need to bother with decomposing it.
-    if (horizontal && tile_size.width < 256.0)
-        || (vertical && tile_size.height < 256.0) {
-
-        return false;
-    }
-
-    // Flip x and y if need be so that we only deal with the horizontal case.
-
-    // From now on don't return false. We are going modifying the caller's
-    // variables and not bother to restore them. If the control flow changes,
-    // Make sure to to restore &mut parameters to sensible values before
-    // returning false.
-
-    let adjust_rect = &mut |rect: &mut LayoutRect| {
-        if vertical {
-            swap(&mut rect.origin.x, &mut rect.origin.y);
-            swap(&mut rect.size.width, &mut rect.size.height);
-        }
-    };
-
-    let adjust_size = &mut |size: &mut LayoutSize| {
-        if vertical { swap(&mut size.width, &mut size.height); }
-    };
-
-    let adjust_point = &mut |p: &mut LayoutPoint| {
-        if vertical { swap(&mut p.x, &mut p.y); }
-    };
-
-    let clip_rect = match clip_rect.intersection(prim_rect) {
-        Some(clip) => clip,
-        None => {
-            return false;
-        }
-    };
-
-    adjust_rect(prim_rect);
-    adjust_point(start);
-    adjust_point(end);
-    adjust_size(tile_size);
-
-    let length = (end.x - start.x).abs();
-
-    // Decompose the gradient into simple segments. This lets us:
-    // - separate opaque from semi-transparent segments,
-    // - compress long segments into small render tasks,
-    // - make sure hard stops stay so even if the primitive is large.
-
-    let reverse_stops = start.x > end.x;
-
-    // Handle reverse stops so we can assume stops are arranged in increasing x.
-    if reverse_stops {
-        stops.reverse();
-        swap(start, end);
-    }
-
-    // Use fake gradient stop to emulate the potential constant color sections
-    // before and after the gradient endpoints.
-    let mut prev = *stops.first().unwrap();
-    let mut last = *stops.last().unwrap();
-
-    // Set the offsets of the fake stops to position them at the edges of the primitive.
-    prev.offset = -start.x / length;
-    last.offset = (tile_size.width - start.x) / length;
-    if reverse_stops {
-        prev.offset = 1.0 - prev.offset;
-        last.offset = 1.0 - last.offset;
-    }
-
-    for stop in stops.iter().chain((&[last]).iter()) {
-        let prev_stop = prev;
-        prev = *stop;
-
-        if prev_stop.color.a == 0 && stop.color.a == 0 {
-            continue;
-        }
-
-
-        let prev_offset = if reverse_stops { 1.0 - prev_stop.offset } else { prev_stop.offset };
-        let offset = if reverse_stops { 1.0 - stop.offset } else { stop.offset };
-
-        // In layout space, relative to the primitive.
-        let segment_start = start.x + prev_offset * length;
-        let segment_end = start.x + offset * length;
-        let segment_length = segment_end - segment_start;
-
-        if segment_length <= 0.0 {
-            continue;
-        }
-
-        let mut segment_rect = *prim_rect;
-        segment_rect.origin.x += segment_start;
-        segment_rect.size.width = segment_length;
-
-        let mut start = point2(0.0, 0.0);
-        let mut end = point2(segment_length, 0.0);
-
-        adjust_point(&mut start);
-        adjust_point(&mut end);
-        adjust_rect(&mut segment_rect);
-
-        let origin_before_clip = segment_rect.origin;
-        segment_rect = match segment_rect.intersection(&clip_rect) {
-            Some(rect) => rect,
-            None => {
-                continue;
-            }
-        };
-        let offset = segment_rect.origin - origin_before_clip;
-
-        // Account for the clipping since start and end are relative to the origin.
-        start -= offset;
-        end -= offset;
-
-        callback(
-            &segment_rect,
-            start,
-            end,
-            &[
-                GradientStopKey { offset: 0.0, .. prev_stop },
-                GradientStopKey { offset: 1.0, .. *stop },
-            ],
-        );
-    }
-
-    true
-}
-
-impl From<LinearGradientKey> for LinearGradientTemplate {
-    fn from(item: LinearGradientKey) -> Self {
-
-        let common = PrimTemplateCommonData::with_key_common(item.common);
-
-        let (mut stops, min_alpha) = stops_and_min_alpha(&item.stops);
-
-        let mut brush_segments = Vec::new();
-
-        if let Some(ref nine_patch) = item.nine_patch {
-            brush_segments = nine_patch.create_segments(common.prim_rect.size);
-        }
-
-        // Save opacity of the stops for use in
-        // selecting which pass this gradient
-        // should be drawn in.
-        let stops_opacity = PrimitiveOpacity::from_alpha(min_alpha);
-
-        let start_point = DevicePoint::new(item.start_point.x, item.start_point.y);
-        let end_point = DevicePoint::new(item.end_point.x, item.end_point.y);
-        let tile_spacing: LayoutSize = item.tile_spacing.into();
-        let stretch_size: LayoutSize = item.stretch_size.into();
-        let mut task_size: DeviceSize = stretch_size.cast_unit();
-
-        let horizontal = start_point.y.approx_eq(&end_point.y);
-        let vertical = start_point.x.approx_eq(&end_point.x);
-
-        if horizontal {
-            // Completely horizontal, we can stretch the gradient vertically.
-            task_size.height = 1.0;
-        }
-
-        if vertical {
-            // Completely vertical, we can stretch the gradient horizontally.
-            task_size.width = 1.0;
-        }
-
-        // See if we can render the gradient using a special fast-path shader.
-        // The fast path path only works with two gradient stops.
-        let mut is_fast_path = false;
-        if item.cached && stops.len() == 2 && brush_segments.is_empty() {
-            if horizontal
-                && stretch_size.width >= common.prim_rect.width()
-                && start_point.x.approx_eq(&0.0)
-                && end_point.x.approx_eq(&stretch_size.width) {
-                is_fast_path = true;
-                task_size.width = task_size.width.min(256.0);
-            }
-            if vertical
-                && stretch_size.height >= common.prim_rect.height()
-                && start_point.y.approx_eq(&0.0)
-                && end_point.y.approx_eq(&stretch_size.height) {
-                is_fast_path = true;
-                task_size.height = task_size.height.min(256.0);
-            }
-
-            if stops[0].color == stops[1].color {
-                is_fast_path = true;
-                task_size = size2(1.0, 1.0);
-            }
-
-            if is_fast_path && item.reverse_stops {
-                // The fast path doesn't use the gradient gpu blocks builder so handle
-                // reversed stops here.
-                stops.swap(0, 1);
-            }
-        }
-
-        // Avoid rendering enormous gradients. Radial gradients are mostly made of soft transitions,
-        // so it is unlikely that rendering at a higher resolution than 1024 would produce noticeable
-        // differences, especially with 8 bits per channel.
-        const MAX_SIZE: f32 = 1024.0;
-
-        let mut scale = vec2(1.0, 1.0);
-
-        if task_size.width > MAX_SIZE {
-            scale.x = task_size.width / MAX_SIZE;
-            task_size.width = MAX_SIZE;
-        }
-
-        if task_size.height > MAX_SIZE {
-            scale.y = task_size.height / MAX_SIZE;
-            task_size.height = MAX_SIZE;
-        }
-
-        LinearGradientTemplate {
-            common,
-            extend_mode: item.extend_mode,
-            start_point,
-            end_point,
-            task_size: task_size.ceil().to_i32(),
-            scale,
-            stretch_size,
-            tile_spacing,
-            stops_opacity,
-            stops,
-            brush_segments,
-            reverse_stops: item.reverse_stops,
-            is_fast_path,
-            cached: item.cached,
-            stops_handle: GpuCacheHandle::new(),
-            src_color: None,
-        }
-    }
-}
-
-impl LinearGradientTemplate {
-    /// Update the GPU cache for a given primitive template. This may be called multiple
-    /// times per frame, by each primitive reference that refers to this interned
-    /// template. The initial request call to the GPU cache ensures that work is only
-    /// done if the cache entry is invalid (due to first use or eviction).
-    pub fn update(
-        &mut self,
-        frame_state: &mut FrameBuildingState,
-        parent_surface: SurfaceIndex,
-    ) {
-        if let Some(mut request) = frame_state.gpu_cache.request(
-            &mut self.common.gpu_cache_handle
-        ) {
-
-            // Write_prim_gpu_blocks
-            if self.cached {
-                // We are using the image brush.
-                request.push(PremultipliedColorF::WHITE);
-                request.push(PremultipliedColorF::WHITE);
-                request.push([
-                    self.stretch_size.width,
-                    self.stretch_size.height,
-                    0.0,
-                    0.0,
-                ]);
-            } else {
-                // We are using the gradient brush.
-                request.push([
-                    self.start_point.x,
-                    self.start_point.y,
-                    self.end_point.x,
-                    self.end_point.y,
-                ]);
-                request.push([
-                    pack_as_float(self.extend_mode as u32),
-                    self.stretch_size.width,
-                    self.stretch_size.height,
-                    0.0,
-                ]);
-            }
-
-            // write_segment_gpu_blocks
-            for segment in &self.brush_segments {
-                // has to match VECS_PER_SEGMENT
-                request.write_segment(
-                    segment.local_rect,
-                    segment.extra_data,
-                );
-            }
-        }
-
-        // The fast path passes gradient stops as vertex attributes directly.
-        if !self.is_fast_path {
-            if let Some(mut request) = frame_state.gpu_cache.request(&mut self.stops_handle) {
-                GradientGpuBlockBuilder::build(
-                    self.reverse_stops,
-                    &mut request,
-                    &self.stops,
-                );
-            }
-        }
-
-        // Tile spacing is always handled by decomposing into separate draw calls so the
-        // primitive opacity is equivalent to stops opacity. This might change to being
-        // set to non-opaque in the presence of tile spacing if/when tile spacing is handled
-        // in the same way as with the image primitive.
-        self.opacity = self.stops_opacity;
-
-        if !self.cached {
-            return;
-        }
-
-        let task_id = if self.is_fast_path {
-            let orientation = if self.task_size.width > self.task_size.height {
-                LineOrientation::Horizontal
-            } else {
-                LineOrientation::Vertical
-            };
-
-            let gradient = FastLinearGradientTask {
-                color0: self.stops[0].color.into(),
-                color1: self.stops[1].color.into(),
-                orientation,
-            };
-
-            frame_state.resource_cache.request_render_task(
-                RenderTaskCacheKey {
-                    size: self.task_size,
-                    kind: RenderTaskCacheKeyKind::FastLinearGradient(gradient),
-                },
-                frame_state.gpu_cache,
-                frame_state.rg_builder,
-                None,
-                false,
-                RenderTaskParent::Surface(parent_surface),
-                frame_state.surfaces,
-                |rg_builder| {
-                    rg_builder.add().init(RenderTask::new_dynamic(
-                        self.task_size,
-                        RenderTaskKind::FastLinearGradient(gradient),
-                    ))
-                }
-            )
-        } else {
-            let cache_key = LinearGradientCacheKey {
-                size: self.task_size,
-                start: PointKey { x: self.start_point.x, y: self.start_point.y },
-                end: PointKey { x: self.end_point.x, y: self.end_point.y },
-                scale: PointKey { x: self.scale.x, y: self.scale.y },
-                extend_mode: self.extend_mode,
-                stops: self.stops.iter().map(|stop| (*stop).into()).collect(),
-                reversed_stops: self.reverse_stops,
-            };
-
-            frame_state.resource_cache.request_render_task(
-                RenderTaskCacheKey {
-                    size: self.task_size,
-                    kind: RenderTaskCacheKeyKind::LinearGradient(cache_key),
-                },
-                frame_state.gpu_cache,
-                frame_state.rg_builder,
-                None,
-                false,
-                RenderTaskParent::Surface(parent_surface),
-                frame_state.surfaces,
-                |rg_builder| {
-                    rg_builder.add().init(RenderTask::new_dynamic(
-                        self.task_size,
-                        RenderTaskKind::LinearGradient(LinearGradientTask {
-                            start: self.start_point,
-                            end: self.end_point,
-                            scale: self.scale,
-                            extend_mode: self.extend_mode,
-                            stops: self.stops_handle,
-                        }),
-                    ))
-                }
-            )
-        };
-
-        self.src_color = Some(task_id);
-    }
-}
-
-pub type LinearGradientDataHandle = InternHandle<LinearGradient>;
-
-#[derive(Debug, MallocSizeOf)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct LinearGradient {
-    pub extend_mode: ExtendMode,
-    pub start_point: PointKey,
-    pub end_point: PointKey,
-    pub stretch_size: SizeKey,
-    pub tile_spacing: SizeKey,
-    pub stops: Vec<GradientStopKey>,
-    pub reverse_stops: bool,
-    pub nine_patch: Option<Box<NinePatchDescriptor>>,
-    pub cached: bool,
-}
-
-impl Internable for LinearGradient {
-    type Key = LinearGradientKey;
-    type StoreData = LinearGradientTemplate;
-    type InternData = ();
-    const PROFILE_COUNTER: usize = crate::profiler::INTERNED_LINEAR_GRADIENTS;
-}
-
-impl InternablePrimitive for LinearGradient {
-    fn into_key(
-        self,
-        info: &LayoutPrimitiveInfo,
-    ) -> LinearGradientKey {
-        LinearGradientKey::new(info, self)
-    }
-
-    fn make_instance_kind(
-        key: LinearGradientKey,
-        data_handle: LinearGradientDataHandle,
-        _prim_store: &mut PrimitiveStore,
-        _reference_frame_relative_offset: LayoutVector2D,
-    ) -> PrimitiveInstanceKind {
-        if key.cached {
-            PrimitiveInstanceKind::CachedLinearGradient {
-                data_handle,
-                visible_tiles_range: GradientTileRange::empty(),
-            }
-        } else {
-            PrimitiveInstanceKind::LinearGradient {
-                data_handle,
-                visible_tiles_range: GradientTileRange::empty(),
-            }
-        }
-    }
-}
-
-impl IsVisible for LinearGradient {
-    fn is_visible(&self) -> bool {
-        true
-    }
-}
-
-#[derive(Debug)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-pub struct LinearGradientPrimitive {
-    pub cache_segments: Vec<CachedGradientSegment>,
-    pub visible_tiles_range: GradientTileRange,
-}
-
-#[derive(Debug)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-pub struct CachedGradientSegment {
-    pub render_task: RenderTaskId,
-    pub local_rect: LayoutRect,
-}
-
-
-#[derive(Copy, Clone, Debug, Hash, MallocSizeOf, PartialEq, Eq)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct FastLinearGradientTask {
-    pub color0: ColorU,
-    pub color1: ColorU,
-    pub orientation: LineOrientation,
-}
-
-impl FastLinearGradientTask {
-    pub fn to_instance(&self, target_rect: &DeviceIntRect) -> FastLinearGradientInstance {
-        FastLinearGradientInstance {
-            task_rect: target_rect.to_f32(),
-            color0: ColorF::from(self.color0).premultiplied(),
-            color1: ColorF::from(self.color1).premultiplied(),
-            axis_select: match self.orientation {
-                LineOrientation::Horizontal => 0.0,
-                LineOrientation::Vertical => 1.0,
-            },
-        }
-    }
-}
-
-pub type FastLinearGradientCacheKey = FastLinearGradientTask;
-
-/// The per-instance shader input of a fast-path linear gradient render task.
-///
-/// Must match the FAST_LINEAR_GRADIENT instance description in renderer/vertex.rs.
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-#[repr(C)]
-#[derive(Clone, Debug)]
-pub struct FastLinearGradientInstance {
-    pub task_rect: DeviceRect,
-    pub color0: PremultipliedColorF,
-    pub color1: PremultipliedColorF,
-    pub axis_select: f32,
-}
-
-#[derive(Debug)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct LinearGradientTask {
-    pub start: DevicePoint,
-    pub end: DevicePoint,
-    pub scale: DeviceVector2D,
-    pub extend_mode: ExtendMode,
-    pub stops: GpuCacheHandle,
-}
-
-impl LinearGradientTask {
-    pub fn to_instance(&self, target_rect: &DeviceIntRect, gpu_cache: &mut GpuCache) -> LinearGradientInstance {
-        LinearGradientInstance {
-            task_rect: target_rect.to_f32(),
-            start: self.start,
-            end: self.end,
-            scale: self.scale,
-            extend_mode: self.extend_mode as i32,
-            gradient_stops_address: self.stops.as_int(gpu_cache),
-        }
-    }
-}
-
-/// The per-instance shader input of a linear gradient render task.
-///
-/// Must match the LINEAR_GRADIENT instance description in renderer/vertex.rs.
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-#[repr(C)]
-#[derive(Clone, Debug)]
-pub struct LinearGradientInstance {
-    pub task_rect: DeviceRect,
-    pub start: DevicePoint,
-    pub end: DevicePoint,
-    pub scale: DeviceVector2D,
-    pub extend_mode: i32,
-    pub gradient_stops_address: i32,
-}
-
-#[derive(Clone, Debug, Hash, PartialEq, Eq)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct LinearGradientCacheKey {
-    pub size: DeviceIntSize,
-    pub start: PointKey,
-    pub end: PointKey,
-    pub scale: PointKey,
-    pub extend_mode: ExtendMode,
-    pub stops: Vec<GradientStopKey>,
-    pub reversed_stops: bool,
-}
diff --git a/third_party/webrender/webrender/src/prim_store/gradient/mod.rs b/third_party/webrender/webrender/src/prim_store/gradient/mod.rs
deleted file mode 100644
index 3fea0bb543f..00000000000
--- a/third_party/webrender/webrender/src/prim_store/gradient/mod.rs
+++ /dev/null
@@ -1,388 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-use api::{ColorF, ColorU, GradientStop, PremultipliedColorF};
-use api::units::{LayoutRect, LayoutSize, LayoutVector2D};
-use crate::gpu_cache::GpuDataRequest;
-use std::hash;
-
-mod linear;
-mod radial;
-mod conic;
-
-pub use linear::*;
-pub use radial::*;
-pub use conic::*;
-
-/// A hashable gradient stop that can be used in primitive keys.
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-#[derive(Debug, Copy, Clone, MallocSizeOf, PartialEq)]
-pub struct GradientStopKey {
-    pub offset: f32,
-    pub color: ColorU,
-}
-
-impl GradientStopKey {
-    pub fn empty() -> Self {
-        GradientStopKey {
-            offset: 0.0,
-            color: ColorU::new(0, 0, 0, 0),
-        }
-    }
-}
-
-impl Into<GradientStopKey> for GradientStop {
-    fn into(self) -> GradientStopKey {
-        GradientStopKey {
-            offset: self.offset,
-            color: self.color.into(),
-        }
-    }
-}
-
-// Convert `stop_keys` into a vector of `GradientStop`s, which is a more
-// convenient representation for the current gradient builder. Compute the
-// minimum stop alpha along the way.
-fn stops_and_min_alpha(stop_keys: &[GradientStopKey]) -> (Vec<GradientStop>, f32) {
-    let mut min_alpha: f32 = 1.0;
-    let stops = stop_keys.iter().map(|stop_key| {
-        let color: ColorF = stop_key.color.into();
-        min_alpha = min_alpha.min(color.a);
-
-        GradientStop {
-            offset: stop_key.offset,
-            color,
-        }
-    }).collect();
-
-    (stops, min_alpha)
-}
-
-impl Eq for GradientStopKey {}
-
-impl hash::Hash for GradientStopKey {
-    fn hash<H: hash::Hasher>(&self, state: &mut H) {
-        self.offset.to_bits().hash(state);
-        self.color.hash(state);
-    }
-}
-
-// The gradient entry index for the first color stop
-pub const GRADIENT_DATA_FIRST_STOP: usize = 0;
-// The gradient entry index for the last color stop
-pub const GRADIENT_DATA_LAST_STOP: usize = GRADIENT_DATA_SIZE - 1;
-
-// The start of the gradient data table
-pub const GRADIENT_DATA_TABLE_BEGIN: usize = GRADIENT_DATA_FIRST_STOP + 1;
-// The exclusive bound of the gradient data table
-pub const GRADIENT_DATA_TABLE_END: usize = GRADIENT_DATA_LAST_STOP;
-// The number of entries in the gradient data table.
-pub const GRADIENT_DATA_TABLE_SIZE: usize = 128;
-
-// The number of entries in a gradient data: GRADIENT_DATA_TABLE_SIZE + first stop entry + last stop entry
-pub const GRADIENT_DATA_SIZE: usize = GRADIENT_DATA_TABLE_SIZE + 2;
-
-/// An entry in a gradient data table representing a segment of the gradient
-/// color space.
-#[derive(Debug, Copy, Clone)]
-#[repr(C)]
-struct GradientDataEntry {
-    start_color: PremultipliedColorF,
-    end_step: PremultipliedColorF,
-}
-
-impl GradientDataEntry {
-    fn white() -> Self {
-        Self {
-            start_color: PremultipliedColorF::WHITE,
-            end_step: PremultipliedColorF::TRANSPARENT,
-        }
-    }
-}
-
-// TODO(gw): Tidy this up to be a free function / module?
-struct GradientGpuBlockBuilder {}
-
-impl GradientGpuBlockBuilder {
-    /// Generate a color ramp filling the indices in [start_idx, end_idx) and interpolating
-    /// from start_color to end_color.
-    fn fill_colors(
-        start_idx: usize,
-        end_idx: usize,
-        start_color: &PremultipliedColorF,
-        end_color: &PremultipliedColorF,
-        entries: &mut [GradientDataEntry; GRADIENT_DATA_SIZE],
-        prev_step: &PremultipliedColorF,
-    ) -> PremultipliedColorF {
-        // Calculate the color difference for individual steps in the ramp.
-        let inv_steps = 1.0 / (end_idx - start_idx) as f32;
-        let mut step = PremultipliedColorF {
-            r: (end_color.r - start_color.r) * inv_steps,
-            g: (end_color.g - start_color.g) * inv_steps,
-            b: (end_color.b - start_color.b) * inv_steps,
-            a: (end_color.a - start_color.a) * inv_steps,
-        };
-        // As a subtle form of compression, we ensure that the step values for
-        // each stop range are the same if and only if they belong to the same
-        // stop range. However, if two different stop ranges have the same step,
-        // we need to modify the steps so they compare unequally between ranges.
-        // This allows to quickly compare if two adjacent stops belong to the
-        // same range by comparing their steps.
-        if step == *prev_step {
-            // Modify the step alpha value as if by nextafter(). The difference
-            // here should be so small as to be unnoticeable, but yet allow it
-            // to compare differently.
-            step.a = f32::from_bits(if step.a == 0.0 { 1 } else { step.a.to_bits() + 1 });
-        }
-
-        let mut cur_color = *start_color;
-
-        // Walk the ramp writing start and end colors for each entry.
-        for index in start_idx .. end_idx {
-            let entry = &mut entries[index];
-            entry.start_color = cur_color;
-            cur_color.r += step.r;
-            cur_color.g += step.g;
-            cur_color.b += step.b;
-            cur_color.a += step.a;
-            entry.end_step = step;
-        }
-
-        step
-    }
-
-    /// Compute an index into the gradient entry table based on a gradient stop offset. This
-    /// function maps offsets from [0, 1] to indices in [GRADIENT_DATA_TABLE_BEGIN, GRADIENT_DATA_TABLE_END].
-    #[inline]
-    fn get_index(offset: f32) -> usize {
-        (offset.max(0.0).min(1.0) * GRADIENT_DATA_TABLE_SIZE as f32 +
-            GRADIENT_DATA_TABLE_BEGIN as f32)
-            .round() as usize
-    }
-
-    // Build the gradient data from the supplied stops, reversing them if necessary.
-    fn build(
-        reverse_stops: bool,
-        request: &mut GpuDataRequest,
-        src_stops: &[GradientStop],
-    ) {
-        // Preconditions (should be ensured by DisplayListBuilder):
-        // * we have at least two stops
-        // * first stop has offset 0.0
-        // * last stop has offset 1.0
-        let mut src_stops = src_stops.into_iter();
-        let mut cur_color = match src_stops.next() {
-            Some(stop) => {
-                debug_assert_eq!(stop.offset, 0.0);
-                stop.color.premultiplied()
-            }
-            None => {
-                error!("Zero gradient stops found!");
-                PremultipliedColorF::BLACK
-            }
-        };
-
-        // A table of gradient entries, with two colors per entry, that specify the start and end color
-        // within the segment of the gradient space represented by that entry. To lookup a gradient result,
-        // first the entry index is calculated to determine which two colors to interpolate between, then
-        // the offset within that entry bucket is used to interpolate between the two colors in that entry.
-        // This layout is motivated by the fact that if one naively tries to store a single color per entry
-        // and interpolate directly between entries, then hard stops will become softened because the end
-        // color of an entry actually differs from the start color of the next entry, even though they fall
-        // at the same edge offset in the gradient space. Instead, the two-color-per-entry layout preserves
-        // hard stops, as the end color for a given entry can differ from the start color for the following
-        // entry.
-        // Colors are stored in RGBA32F format (in the GPU cache). This table requires the gradient color
-        // stops to be normalized to the range [0, 1]. The first and last entries hold the first and last
-        // color stop colors respectively, while the entries in between hold the interpolated color stop
-        // values for the range [0, 1].
-        // As a further optimization, rather than directly storing the end color, the difference of the end
-        // color from the start color is stored instead, so that an entry can be evaluated more cheaply
-        // with start+diff*offset instead of mix(start,end,offset). Further, the color difference in two
-        // adjacent entries will always be the same if they were generated from the same set of stops/run.
-        // To allow fast searching of the table, if two adjacent entries generated from different sets of
-        // stops (a boundary) have the same difference, the floating-point bits of the stop will be nudged
-        // so that they compare differently without perceptibly altering the interpolation result. This way,
-        // one can quickly scan the table and recover runs just by comparing the color differences of the
-        // current and next entry.
-        // For example, a table with 2 inside entries (startR,startG,startB):(diffR,diffG,diffB) might look
-        // like so:
-        //     first           | 0.0              | 0.5              | last
-        //     (0,0,0):(0,0,0) | (1,0,0):(-1,1,0) | (0,0,1):(0,1,-1) | (1,1,1):(0,0,0)
-        //     ^ solid black     ^ red to green     ^ blue to green    ^ solid white
-        let mut entries = [GradientDataEntry::white(); GRADIENT_DATA_SIZE];
-        let mut prev_step = cur_color;
-        if reverse_stops {
-            // Fill in the first entry (for reversed stops) with the first color stop
-            prev_step = GradientGpuBlockBuilder::fill_colors(
-                GRADIENT_DATA_LAST_STOP,
-                GRADIENT_DATA_LAST_STOP + 1,
-                &cur_color,
-                &cur_color,
-                &mut entries,
-                &prev_step,
-            );
-
-            // Fill in the center of the gradient table, generating a color ramp between each consecutive pair
-            // of gradient stops. Each iteration of a loop will fill the indices in [next_idx, cur_idx). The
-            // loop will then fill indices in [GRADIENT_DATA_TABLE_BEGIN, GRADIENT_DATA_TABLE_END).
-            let mut cur_idx = GRADIENT_DATA_TABLE_END;
-            for next in src_stops {
-                let next_color = next.color.premultiplied();
-                let next_idx = Self::get_index(1.0 - next.offset);
-
-                if next_idx < cur_idx {
-                    prev_step = GradientGpuBlockBuilder::fill_colors(
-                        next_idx,
-                        cur_idx,
-                        &next_color,
-                        &cur_color,
-                        &mut entries,
-                        &prev_step,
-                    );
-                    cur_idx = next_idx;
-                }
-
-                cur_color = next_color;
-            }
-            if cur_idx != GRADIENT_DATA_TABLE_BEGIN {
-                error!("Gradient stops abruptly at {}, auto-completing to white", cur_idx);
-            }
-
-            // Fill in the last entry (for reversed stops) with the last color stop
-            GradientGpuBlockBuilder::fill_colors(
-                GRADIENT_DATA_FIRST_STOP,
-                GRADIENT_DATA_FIRST_STOP + 1,
-                &cur_color,
-                &cur_color,
-                &mut entries,
-                &prev_step,
-            );
-        } else {
-            // Fill in the first entry with the first color stop
-            prev_step = GradientGpuBlockBuilder::fill_colors(
-                GRADIENT_DATA_FIRST_STOP,
-                GRADIENT_DATA_FIRST_STOP + 1,
-                &cur_color,
-                &cur_color,
-                &mut entries,
-                &prev_step,
-            );
-
-            // Fill in the center of the gradient table, generating a color ramp between each consecutive pair
-            // of gradient stops. Each iteration of a loop will fill the indices in [cur_idx, next_idx). The
-            // loop will then fill indices in [GRADIENT_DATA_TABLE_BEGIN, GRADIENT_DATA_TABLE_END).
-            let mut cur_idx = GRADIENT_DATA_TABLE_BEGIN;
-            for next in src_stops {
-                let next_color = next.color.premultiplied();
-                let next_idx = Self::get_index(next.offset);
-
-                if next_idx > cur_idx {
-                    prev_step = GradientGpuBlockBuilder::fill_colors(
-                        cur_idx,
-                        next_idx,
-                        &cur_color,
-                        &next_color,
-                        &mut entries,
-                        &prev_step,
-                    );
-                    cur_idx = next_idx;
-                }
-
-                cur_color = next_color;
-            }
-            if cur_idx != GRADIENT_DATA_TABLE_END {
-                error!("Gradient stops abruptly at {}, auto-completing to white", cur_idx);
-            }
-
-            // Fill in the last entry with the last color stop
-            GradientGpuBlockBuilder::fill_colors(
-                GRADIENT_DATA_LAST_STOP,
-                GRADIENT_DATA_LAST_STOP + 1,
-                &cur_color,
-                &cur_color,
-                &mut entries,
-                &prev_step,
-            );
-        }
-
-        for entry in entries.iter() {
-            request.push(entry.start_color);
-            request.push(entry.end_step);
-        }
-    }
-}
-
-// If the gradient is not tiled we know that any content outside of the clip will not
-// be shown. Applying the clip early reduces how much of the gradient we
-// render and cache. We do this optimization separately on each axis.
-// Returns the offset between the new and old primitive rect origin, to apply to the
-// gradient parameters that are relative to the primitive origin.
-pub fn apply_gradient_local_clip(
-    prim_rect: &mut LayoutRect,
-    stretch_size: &LayoutSize,
-    tile_spacing: &LayoutSize,
-    clip_rect: &LayoutRect,
-) -> LayoutVector2D {
-    let w = prim_rect.max_x().min(clip_rect.max_x()) - prim_rect.min_x();
-    let h = prim_rect.max_y().min(clip_rect.max_y()) - prim_rect.min_y();
-    let is_tiled_x = w > stretch_size.width + tile_spacing.width;
-    let is_tiled_y = h > stretch_size.height + tile_spacing.height;
-
-    let mut offset = LayoutVector2D::new(0.0, 0.0);
-
-    if !is_tiled_x {
-        let diff = (clip_rect.min_x() - prim_rect.min_x()).min(prim_rect.size.width);
-        if diff > 0.0 {
-            prim_rect.origin.x += diff;
-            prim_rect.size.width -= diff;
-            offset.x = -diff;
-        }
-
-        let diff = prim_rect.max_x() - clip_rect.max_x();
-        if diff > 0.0 {
-            prim_rect.size.width -= diff;
-        }
-    }
-
-    if !is_tiled_y {
-        let diff = (clip_rect.min_y() - prim_rect.min_y()).min(prim_rect.size.height);
-        if diff > 0.0 {
-            prim_rect.origin.y += diff;
-            prim_rect.size.height -= diff;
-            offset.y = -diff;
-        }
-
-        let diff = prim_rect.max_y() - clip_rect.max_y();
-        if diff > 0.0 {
-            prim_rect.size.height -= diff;
-        }
-    }
-
-    offset
-}
-
-#[test]
-#[cfg(target_pointer_width = "64")]
-fn test_struct_sizes() {
-    use std::mem;
-    // The sizes of these structures are critical for performance on a number of
-    // talos stress tests. If you get a failure here on CI, there's two possibilities:
-    // (a) You made a structure smaller than it currently is. Great work! Update the
-    //     test expectations and move on.
-    // (b) You made a structure larger. This is not necessarily a problem, but should only
-    //     be done with care, and after checking if talos performance regresses badly.
-    assert_eq!(mem::size_of::<LinearGradient>(), 72, "LinearGradient size changed");
-    assert_eq!(mem::size_of::<LinearGradientTemplate>(), 144, "LinearGradientTemplate size changed");
-    assert_eq!(mem::size_of::<LinearGradientKey>(), 88, "LinearGradientKey size changed");
-
-    assert_eq!(mem::size_of::<RadialGradient>(), 72, "RadialGradient size changed");
-    assert_eq!(mem::size_of::<RadialGradientTemplate>(), 144, "RadialGradientTemplate size changed");
-    assert_eq!(mem::size_of::<RadialGradientKey>(), 96, "RadialGradientKey size changed");
-
-    assert_eq!(mem::size_of::<ConicGradient>(), 72, "ConicGradient size changed");
-    assert_eq!(mem::size_of::<ConicGradientTemplate>(), 144, "ConicGradientTemplate size changed");
-    assert_eq!(mem::size_of::<ConicGradientKey>(), 96, "ConicGradientKey size changed");
-}
diff --git a/third_party/webrender/webrender/src/prim_store/gradient/radial.rs b/third_party/webrender/webrender/src/prim_store/gradient/radial.rs
deleted file mode 100644
index 86051bddfa9..00000000000
--- a/third_party/webrender/webrender/src/prim_store/gradient/radial.rs
+++ /dev/null
@@ -1,536 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-//! Radial gradients
-//!
-//! Specification: https://drafts.csswg.org/css-images-4/#radial-gradients
-//!
-//! Radial gradients are rendered via cached render tasks and composited with the image brush.
-
-use euclid::{vec2, size2};
-use api::{ExtendMode, GradientStop, PremultipliedColorF, ColorU};
-use api::units::*;
-use crate::scene_building::IsVisible;
-use crate::frame_builder::FrameBuildingState;
-use crate::gpu_cache::{GpuCache, GpuCacheHandle};
-use crate::intern::{Internable, InternDebug, Handle as InternHandle};
-use crate::internal_types::LayoutPrimitiveInfo;
-use crate::prim_store::{BrushSegment, GradientTileRange, InternablePrimitive};
-use crate::prim_store::{PrimitiveInstanceKind, PrimitiveOpacity};
-use crate::prim_store::{PrimKeyCommonData, PrimTemplateCommonData, PrimitiveStore};
-use crate::prim_store::{NinePatchDescriptor, PointKey, SizeKey, FloatKey};
-use crate::render_task::{RenderTask, RenderTaskKind};
-use crate::render_task_graph::RenderTaskId;
-use crate::render_task_cache::{RenderTaskCacheKeyKind, RenderTaskCacheKey, RenderTaskParent};
-use crate::picture::{SurfaceIndex};
-
-use std::{hash, ops::{Deref, DerefMut}};
-use super::{
-    stops_and_min_alpha, GradientStopKey, GradientGpuBlockBuilder,
-    apply_gradient_local_clip,
-};
-
-/// Hashable radial gradient parameters, for use during prim interning.
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-#[derive(Debug, Clone, MallocSizeOf, PartialEq)]
-pub struct RadialGradientParams {
-    pub start_radius: f32,
-    pub end_radius: f32,
-    pub ratio_xy: f32,
-}
-
-impl Eq for RadialGradientParams {}
-
-impl hash::Hash for RadialGradientParams {
-    fn hash<H: hash::Hasher>(&self, state: &mut H) {
-        self.start_radius.to_bits().hash(state);
-        self.end_radius.to_bits().hash(state);
-        self.ratio_xy.to_bits().hash(state);
-    }
-}
-
-/// Identifying key for a radial gradient.
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-#[derive(Debug, Clone, Eq, PartialEq, Hash, MallocSizeOf)]
-pub struct RadialGradientKey {
-    pub common: PrimKeyCommonData,
-    pub extend_mode: ExtendMode,
-    pub center: PointKey,
-    pub params: RadialGradientParams,
-    pub stretch_size: SizeKey,
-    pub stops: Vec<GradientStopKey>,
-    pub tile_spacing: SizeKey,
-    pub nine_patch: Option<Box<NinePatchDescriptor>>,
-}
-
-impl RadialGradientKey {
-    pub fn new(
-        info: &LayoutPrimitiveInfo,
-        radial_grad: RadialGradient,
-    ) -> Self {
-        RadialGradientKey {
-            common: info.into(),
-            extend_mode: radial_grad.extend_mode,
-            center: radial_grad.center,
-            params: radial_grad.params,
-            stretch_size: radial_grad.stretch_size,
-            stops: radial_grad.stops,
-            tile_spacing: radial_grad.tile_spacing,
-            nine_patch: radial_grad.nine_patch,
-        }
-    }
-}
-
-impl InternDebug for RadialGradientKey {}
-
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-#[derive(MallocSizeOf)]
-#[derive(Debug)]
-pub struct RadialGradientTemplate {
-    pub common: PrimTemplateCommonData,
-    pub extend_mode: ExtendMode,
-    pub params: RadialGradientParams,
-    pub center: DevicePoint,
-    pub task_size: DeviceIntSize,
-    pub scale: DeviceVector2D,
-    pub stretch_size: LayoutSize,
-    pub tile_spacing: LayoutSize,
-    pub brush_segments: Vec<BrushSegment>,
-    pub stops_opacity: PrimitiveOpacity,
-    pub stops: Vec<GradientStop>,
-    pub stops_handle: GpuCacheHandle,
-    pub src_color: Option<RenderTaskId>,
-}
-
-impl Deref for RadialGradientTemplate {
-    type Target = PrimTemplateCommonData;
-    fn deref(&self) -> &Self::Target {
-        &self.common
-    }
-}
-
-impl DerefMut for RadialGradientTemplate {
-    fn deref_mut(&mut self) -> &mut Self::Target {
-        &mut self.common
-    }
-}
-
-impl From<RadialGradientKey> for RadialGradientTemplate {
-    fn from(item: RadialGradientKey) -> Self {
-        let common = PrimTemplateCommonData::with_key_common(item.common);
-        let mut brush_segments = Vec::new();
-
-        if let Some(ref nine_patch) = item.nine_patch {
-            brush_segments = nine_patch.create_segments(common.prim_rect.size);
-        }
-
-        let (stops, min_alpha) = stops_and_min_alpha(&item.stops);
-
-        // Save opacity of the stops for use in
-        // selecting which pass this gradient
-        // should be drawn in.
-        let stops_opacity = PrimitiveOpacity::from_alpha(min_alpha);
-
-        let mut stretch_size: LayoutSize = item.stretch_size.into();
-        stretch_size.width = stretch_size.width.min(common.prim_rect.size.width);
-        stretch_size.height = stretch_size.height.min(common.prim_rect.size.height);
-
-        // Avoid rendering enormous gradients. Radial gradients are mostly made of soft transitions,
-        // so it is unlikely that rendering at a higher resolution that 1024 would produce noticeable
-        // differences, especially with 8 bits per channel.
-        const MAX_SIZE: f32 = 1024.0;
-        let mut task_size: DeviceSize = stretch_size.cast_unit();
-        let mut scale = vec2(1.0, 1.0);
-        if task_size.width > MAX_SIZE {
-            scale.x = task_size.width/ MAX_SIZE;
-            task_size.width = MAX_SIZE;
-        }
-        if task_size.height > MAX_SIZE {
-            scale.y = task_size.height /MAX_SIZE;
-            task_size.height = MAX_SIZE;
-        }
-
-        RadialGradientTemplate {
-            common,
-            center: DevicePoint::new(item.center.x, item.center.y),
-            extend_mode: item.extend_mode,
-            params: item.params,
-            stretch_size,
-            task_size: task_size.ceil().to_i32(),
-            scale,
-            tile_spacing: item.tile_spacing.into(),
-            brush_segments,
-            stops_opacity,
-            stops,
-            stops_handle: GpuCacheHandle::new(),
-            src_color: None,
-        }
-    }
-}
-
-impl RadialGradientTemplate {
-    /// Update the GPU cache for a given primitive template. This may be called multiple
-    /// times per frame, by each primitive reference that refers to this interned
-    /// template. The initial request call to the GPU cache ensures that work is only
-    /// done if the cache entry is invalid (due to first use or eviction).
-    pub fn update(
-        &mut self,
-        frame_state: &mut FrameBuildingState,
-        parent_surface: SurfaceIndex,
-    ) {
-        if let Some(mut request) =
-            frame_state.gpu_cache.request(&mut self.common.gpu_cache_handle) {
-            // write_prim_gpu_blocks
-            request.push(PremultipliedColorF::WHITE);
-            request.push(PremultipliedColorF::WHITE);
-            request.push([
-                self.stretch_size.width,
-                self.stretch_size.height,
-                0.0,
-                0.0,
-            ]);
-
-            // write_segment_gpu_blocks
-            for segment in &self.brush_segments {
-                // has to match VECS_PER_SEGMENT
-                request.write_segment(
-                    segment.local_rect,
-                    segment.extra_data,
-                );
-            }
-        }
-
-        if let Some(mut request) = frame_state.gpu_cache.request(&mut self.stops_handle) {
-            GradientGpuBlockBuilder::build(
-                false,
-                &mut request,
-                &self.stops,
-            );
-        }
-
-        let task_size = self.task_size;
-        let cache_key = RadialGradientCacheKey {
-            size: task_size,
-            center: PointKey { x: self.center.x, y: self.center.y },
-            scale: PointKey { x: self.scale.x, y: self.scale.y },
-            start_radius: FloatKey(self.params.start_radius),
-            end_radius: FloatKey(self.params.end_radius),
-            ratio_xy: FloatKey(self.params.ratio_xy),
-            extend_mode: self.extend_mode,
-            stops: self.stops.iter().map(|stop| (*stop).into()).collect(),
-        };
-
-        let task_id = frame_state.resource_cache.request_render_task(
-            RenderTaskCacheKey {
-                size: task_size,
-                kind: RenderTaskCacheKeyKind::RadialGradient(cache_key),
-            },
-            frame_state.gpu_cache,
-            frame_state.rg_builder,
-            None,
-            false,
-            RenderTaskParent::Surface(parent_surface),
-            frame_state.surfaces,
-            |rg_builder| {
-                rg_builder.add().init(RenderTask::new_dynamic(
-                    task_size,
-                    RenderTaskKind::RadialGradient(RadialGradientTask {
-                        extend_mode: self.extend_mode,
-                        center: self.center,
-                        scale: self.scale,
-                        params: self.params.clone(),
-                        stops: self.stops_handle,
-                    }),
-                ))
-            }
-        );
-
-        self.src_color = Some(task_id);
-
-        // Tile spacing is always handled by decomposing into separate draw calls so the
-        // primitive opacity is equivalent to stops opacity. This might change to being
-        // set to non-opaque in the presence of tile spacing if/when tile spacing is handled
-        // in the same way as with the image primitive.
-        self.opacity = self.stops_opacity;
-    }
-}
-
-pub type RadialGradientDataHandle = InternHandle<RadialGradient>;
-
-#[derive(Debug, MallocSizeOf)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct RadialGradient {
-    pub extend_mode: ExtendMode,
-    pub center: PointKey,
-    pub params: RadialGradientParams,
-    pub stretch_size: SizeKey,
-    pub stops: Vec<GradientStopKey>,
-    pub tile_spacing: SizeKey,
-    pub nine_patch: Option<Box<NinePatchDescriptor>>,
-}
-
-impl Internable for RadialGradient {
-    type Key = RadialGradientKey;
-    type StoreData = RadialGradientTemplate;
-    type InternData = ();
-    const PROFILE_COUNTER: usize = crate::profiler::INTERNED_RADIAL_GRADIENTS;
-}
-
-impl InternablePrimitive for RadialGradient {
-    fn into_key(
-        self,
-        info: &LayoutPrimitiveInfo,
-    ) -> RadialGradientKey {
-        RadialGradientKey::new(info, self)
-    }
-
-    fn make_instance_kind(
-        _key: RadialGradientKey,
-        data_handle: RadialGradientDataHandle,
-        _prim_store: &mut PrimitiveStore,
-        _reference_frame_relative_offset: LayoutVector2D,
-    ) -> PrimitiveInstanceKind {
-        PrimitiveInstanceKind::RadialGradient {
-            data_handle,
-            visible_tiles_range: GradientTileRange::empty(),
-        }
-    }
-}
-
-impl IsVisible for RadialGradient {
-    fn is_visible(&self) -> bool {
-        true
-    }
-}
-
-#[derive(Debug)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct RadialGradientTask {
-    pub extend_mode: ExtendMode,
-    pub center: DevicePoint,
-    pub scale: DeviceVector2D,
-    pub params: RadialGradientParams,
-    pub stops: GpuCacheHandle,
-}
-
-impl RadialGradientTask {
-    pub fn to_instance(&self, target_rect: &DeviceIntRect, gpu_cache: &mut GpuCache) -> RadialGradientInstance {
-        RadialGradientInstance {
-            task_rect: target_rect.to_f32(),
-            center: self.center,
-            scale: self.scale,
-            start_radius: self.params.start_radius,
-            end_radius: self.params.end_radius,
-            ratio_xy: self.params.ratio_xy,
-            extend_mode: self.extend_mode as i32,
-            gradient_stops_address: self.stops.as_int(gpu_cache),
-        }
-    }
-}
-
-/// The per-instance shader input of a radial gradient render task.
-///
-/// Must match the RADIAL_GRADIENT instance description in renderer/vertex.rs.
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-#[repr(C)]
-#[derive(Clone, Debug)]
-pub struct RadialGradientInstance {
-    pub task_rect: DeviceRect,
-    pub center: DevicePoint,
-    pub scale: DeviceVector2D,
-    pub start_radius: f32,
-    pub end_radius: f32,
-    pub ratio_xy: f32,
-    pub extend_mode: i32,
-    pub gradient_stops_address: i32,
-}
-
-#[derive(Clone, Debug, Hash, PartialEq, Eq)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct RadialGradientCacheKey {
-    pub size: DeviceIntSize,
-    pub center: PointKey,
-    pub scale: PointKey,
-    pub start_radius: FloatKey,
-    pub end_radius: FloatKey,
-    pub ratio_xy: FloatKey,
-    pub extend_mode: ExtendMode,
-    pub stops: Vec<GradientStopKey>,
-}
-
-/// Avoid invoking the radial gradient shader on large areas where the color is
-/// constant.
-///
-/// If the extend mode is set to clamp, the "interesting" part
-/// of the gradient is only in the bounds of the gradient's ellipse, and the rest
-/// is the color of the last gradient stop.
-///
-/// Sometimes we run into radial gradient with a small radius compared to the
-/// primitive bounds, which means a large area of the primitive is a constant color
-/// This function tries to detect that, potentially shrink the gradient primitive to only
-/// the useful part and if needed insert solid color primitives around the gradient where
-/// parts of it have been removed.
-pub fn optimize_radial_gradient(
-    prim_rect: &mut LayoutRect,
-    stretch_size: &mut LayoutSize,
-    center: &mut LayoutPoint,
-    tile_spacing: &mut LayoutSize,
-    clip_rect: &LayoutRect,
-    radius: LayoutSize,
-    end_offset: f32,
-    extend_mode: ExtendMode,
-    stops: &[GradientStopKey],
-    solid_parts: &mut dyn FnMut(&LayoutRect, ColorU),
-) {
-    let offset = apply_gradient_local_clip(
-        prim_rect,
-        stretch_size,
-        tile_spacing,
-        clip_rect
-    );
-
-    *center += offset;
-
-    if extend_mode != ExtendMode::Clamp || stops.is_empty() {
-        return;
-    }
-
-    // Bounding box of the "interesting" part of the gradient.
-    let min = prim_rect.origin + center.to_vector() - radius.to_vector() * end_offset;
-    let max = prim_rect.origin + center.to_vector() + radius.to_vector() * end_offset;
-
-    // The (non-repeated) gradient primitive rect.
-    let gradient_rect = LayoutRect {
-        origin: prim_rect.origin,
-        size: *stretch_size,
-    };
-
-    // How much internal margin between the primitive bounds and the gradient's
-    // bounding rect (areas that are a constant color).
-    let mut l = (min.x - gradient_rect.min_x()).max(0.0).floor();
-    let mut t = (min.y - gradient_rect.min_y()).max(0.0).floor();
-    let mut r = (gradient_rect.max_x() - max.x).max(0.0).floor();
-    let mut b = (gradient_rect.max_y() - max.y).max(0.0).floor();
-
-    let is_tiled = prim_rect.size.width > stretch_size.width + tile_spacing.width
-        || prim_rect.size.height > stretch_size.height + tile_spacing.height;
-
-    let bg_color = stops.last().unwrap().color;
-
-    if bg_color.a != 0 && is_tiled {
-        // If the primitive has repetitions, it's not enough to insert solid rects around it,
-        // so bail out.
-        return;
-    }
-
-    // If the background is fully transparent, shrinking the primitive bounds as much as possible
-    // is always a win. If the background is not transparent, we have to insert solid rectangles
-    // around the shrunk parts.
-    // If the background is transparent and the primitive is tiled, the optimization may introduce
-    // tile spacing which forces the tiling to be manually decomposed.
-    // Either way, don't bother optimizing unless it saves a significant amount of pixels.
-    if bg_color.a != 0 || (is_tiled && tile_spacing.is_empty()) {
-        let threshold = 128.0;
-        if l < threshold { l = 0.0 }
-        if t < threshold { t = 0.0 }
-        if r < threshold { r = 0.0 }
-        if b < threshold { b = 0.0 }
-    }
-
-    if l + t + r + b == 0.0 {
-        // No adjustment to make;
-        return;
-    }
-
-    // Insert solid rectangles around the gradient, in the places where the primitive will be
-    // shrunk.
-    if bg_color.a != 0 {
-        if l != 0.0 && t != 0.0 {
-            let solid_rect = LayoutRect {
-                origin: gradient_rect.origin,
-                size: size2(l, t),
-            };
-            solid_parts(&solid_rect, bg_color);
-        }
-
-        if l != 0.0 && b != 0.0 {
-            let solid_rect = LayoutRect {
-                origin: gradient_rect.bottom_left() - vec2(0.0, b),
-                size: size2(l, b),
-            };
-            solid_parts(&solid_rect, bg_color);
-        }
-
-        if t != 0.0 && r != 0.0 {
-            let solid_rect = LayoutRect {
-                origin: gradient_rect.top_right() - vec2(r, 0.0),
-                size: size2(r, t),
-            };
-            solid_parts(&solid_rect, bg_color);
-        }
-
-        if r != 0.0 && b != 0.0 {
-            let solid_rect = LayoutRect {
-                origin: gradient_rect.bottom_right() - vec2(r, b),
-                size: size2(r, b),
-            };
-            solid_parts(&solid_rect, bg_color);
-        }
-
-        if l != 0.0 {
-            let solid_rect = LayoutRect {
-                origin: gradient_rect.origin + vec2(0.0, t),
-                size: size2(l, gradient_rect.size.height - t - b),
-            };
-            solid_parts(&solid_rect, bg_color);
-        }
-
-        if r != 0.0 {
-            let solid_rect = LayoutRect {
-                origin: gradient_rect.top_right() + vec2(-r, t),
-                size: size2(r, gradient_rect.size.height - t - b),
-            };
-            solid_parts(&solid_rect, bg_color);
-        }
-
-        if t != 0.0 {
-            let solid_rect = LayoutRect {
-                origin: gradient_rect.origin + vec2(l, 0.0),
-                size: size2(gradient_rect.size.width - l - r, t),
-            };
-            solid_parts(&solid_rect, bg_color);
-        }
-
-        if b != 0.0 {
-            let solid_rect = LayoutRect {
-                origin: gradient_rect.bottom_left() + vec2(l, -b),
-                size: size2(gradient_rect.size.width - l - r, b),
-            };
-            solid_parts(&solid_rect, bg_color);
-        }
-    }
-
-    // Shrink the gradient primitive.
-
-    prim_rect.origin.x += l;
-    prim_rect.origin.y += t;
-    prim_rect.size.width -= l;
-    prim_rect.size.height -= t;
-
-    stretch_size.width -= l + r;
-    stretch_size.height -= b + t;
-
-    center.x -= l;
-    center.y -= t;
-
-    tile_spacing.width += l + r;
-    tile_spacing.height += t + b;
-}
diff --git a/third_party/webrender/webrender/src/prim_store/image.rs b/third_party/webrender/webrender/src/prim_store/image.rs
index 3e4ed4a896f..922bd5b80cd 100644
--- a/third_party/webrender/webrender/src/prim_store/image.rs
+++ b/third_party/webrender/webrender/src/prim_store/image.rs
@@ -3,40 +3,35 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{
-    AlphaType, ColorDepth, ColorF, ColorU, ExternalImageData, ExternalImageType,
-    ImageKey as ApiImageKey, ImageBufferKind, ImageRendering, PremultipliedColorF,
-    RasterSpace, Shadow, YuvColorSpace, ColorRange, YuvFormat,
+    AlphaType, ColorDepth, ColorF, ColorU,
+    ImageKey as ApiImageKey, ImageRendering,
+    PremultipliedColorF, Shadow, YuvColorSpace, ColorRange, YuvFormat,
 };
 use api::units::*;
 use crate::scene_building::{CreateShadow, IsVisible};
-use crate::frame_builder::{FrameBuildingContext, FrameBuildingState, add_child_render_task};
+use crate::frame_builder::FrameBuildingState;
 use crate::gpu_cache::{GpuCache, GpuDataRequest};
 use crate::intern::{Internable, InternDebug, Handle as InternHandle};
 use crate::internal_types::{LayoutPrimitiveInfo};
-use crate::picture::SurfaceIndex;
 use crate::prim_store::{
-    EdgeAaSegmentMask, PrimitiveInstanceKind,
+    EdgeAaSegmentMask, OpacityBindingIndex, PrimitiveInstanceKind,
     PrimitiveOpacity, PrimKey,
     PrimTemplate, PrimTemplateCommonData, PrimitiveStore, SegmentInstanceIndex,
     SizeKey, InternablePrimitive,
 };
 use crate::render_target::RenderTargetKind;
-use crate::render_task_graph::RenderTaskId;
-use crate::render_task::RenderTask;
+use crate::render_task::{BlitSource, RenderTask};
 use crate::render_task_cache::{
-    RenderTaskCacheKey, RenderTaskCacheKeyKind, RenderTaskParent
+    RenderTaskCacheEntryHandle, RenderTaskCacheKey, RenderTaskCacheKeyKind
 };
-use crate::resource_cache::{ImageRequest, ImageProperties, ResourceCache};
+use crate::resource_cache::{ImageRequest, ResourceCache};
 use crate::util::pack_as_float;
-use crate::visibility::{PrimitiveVisibility, compute_conservative_visible_rect};
-use crate::spatial_tree::SpatialNodeIndex;
-use crate::image_tiling;
 
 #[derive(Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct VisibleImageTile {
-    pub src_color: RenderTaskId,
+    pub tile_offset: TileOffset,
     pub edge_flags: EdgeAaSegmentMask,
     pub local_rect: LayoutRect,
     pub local_clip_rect: LayoutRect,
@@ -67,10 +62,10 @@ pub struct ImageCacheKey {
 #[derive(Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 pub struct ImageInstance {
+    pub opacity_binding_index: OpacityBindingIndex,
     pub segment_instance_index: SegmentInstanceIndex,
     pub tight_local_clip_rect: LayoutRect,
     pub visible_tiles: Vec<VisibleImageTile>,
-    pub src_color: Option<RenderTaskId>,
 }
 
 #[cfg_attr(feature = "capture", derive(Serialize))]
@@ -101,6 +96,21 @@ impl ImageKey {
 
 impl InternDebug for ImageKey {}
 
+// Where to find the texture data for an image primitive.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Debug, MallocSizeOf)]
+pub enum ImageSource {
+    // A normal image - just reference the texture cache.
+    Default,
+    // An image that is pre-rendered into the texture cache
+    // via a render task.
+    Cache {
+        size: DeviceIntSize,
+        handle: Option<RenderTaskCacheEntryHandle>,
+    },
+}
+
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 #[derive(Debug, MallocSizeOf)]
@@ -109,6 +119,7 @@ pub struct ImageData {
     pub stretch_size: LayoutSize,
     pub tile_spacing: LayoutSize,
     pub color: ColorF,
+    pub source: ImageSource,
     pub image_rendering: ImageRendering,
     pub alpha_type: AlphaType,
 }
@@ -120,6 +131,7 @@ impl From<Image> for ImageData {
             color: image.color.into(),
             stretch_size: image.stretch_size.into(),
             tile_spacing: image.tile_spacing.into(),
+            source: ImageSource::Default,
             image_rendering: image.image_rendering,
             alpha_type: image.alpha_type,
         }
@@ -134,249 +146,113 @@ impl ImageData {
     pub fn update(
         &mut self,
         common: &mut PrimTemplateCommonData,
-        image_instance: &mut ImageInstance,
-        parent_surface: SurfaceIndex,
-        prim_spatial_node_index: SpatialNodeIndex,
         frame_state: &mut FrameBuildingState,
-        frame_context: &FrameBuildingContext,
-        visibility: &mut PrimitiveVisibility,
     ) {
-
-        let image_properties = frame_state
-            .resource_cache
-            .get_image_properties(self.key);
-
-        common.opacity = match &image_properties {
-            Some(properties) => {
-                if properties.descriptor.is_opaque() {
-                    PrimitiveOpacity::from_alpha(self.color.a)
-                } else {
-                    PrimitiveOpacity::translucent()
-                }
-            }
-            None => PrimitiveOpacity::opaque(),
-        };
-
-        if self.stretch_size.width >= common.prim_rect.size.width &&
-            self.stretch_size.height >= common.prim_rect.size.height {
-
-            common.may_need_repetition = false;
+        if let Some(mut request) = frame_state.gpu_cache.request(&mut common.gpu_cache_handle) {
+            self.write_prim_gpu_blocks(&mut request);
         }
 
-        let request = ImageRequest {
-            key: self.key,
-            rendering: self.image_rendering,
-            tile: None,
-        };
-
-        match image_properties {
-            // Non-tiled (most common) path.
-            Some(ImageProperties { tiling: None, ref descriptor, ref external_image, .. }) => {
-                let mut size = frame_state.resource_cache.request_image(
-                    request,
-                    frame_state.gpu_cache,
-                );
-
-                let orig_task_id = frame_state.rg_builder.add().init(
-                    RenderTask::new_image(size, request)
-                );
-
-                // On some devices we cannot render from an ImageBufferKind::TextureExternal
-                // source using most shaders, so must peform a copy to a regular texture first.
-                let task_id = if frame_context.fb_config.external_images_require_copy
-                    && matches!(
-                        external_image,
-                        Some(ExternalImageData {
-                            image_type: ExternalImageType::TextureHandle(
-                                ImageBufferKind::TextureExternal
-                            ),
-                            ..
-                        })
-                    )
-                {
-                    let target_kind = if descriptor.format.bytes_per_pixel() == 1 {
-                        RenderTargetKind::Alpha
-                    } else {
-                        RenderTargetKind::Color
-                    };
-
-                    let task_id = RenderTask::new_scaling(
-                        orig_task_id,
-                        frame_state.rg_builder,
-                        target_kind,
-                        size
-                    );
-
-                    add_child_render_task(
-                        parent_surface,
-                        task_id,
-                        frame_state.surfaces,
-                        frame_state.rg_builder,
-                    );
-
-                    task_id
-                } else {
-                    orig_task_id
-                };
-
-                // Every frame, for cached items, we need to request the render
-                // task cache item. The closure will be invoked on the first
-                // time through, and any time the render task output has been
-                // evicted from the texture cache.
-                if self.tile_spacing == LayoutSize::zero() {
-                    // Most common case.
-                    image_instance.src_color = Some(task_id);
-                } else {
-                    let padding = DeviceIntSideOffsets::new(
-                        0,
-                        (self.tile_spacing.width * size.width as f32 / self.stretch_size.width) as i32,
-                        (self.tile_spacing.height * size.height as f32 / self.stretch_size.height) as i32,
-                        0,
-                    );
-
-                    size.width += padding.horizontal();
-                    size.height += padding.vertical();
-
-                    if padding != DeviceIntSideOffsets::zero() {
-                        common.opacity = PrimitiveOpacity::translucent();
+        common.opacity = {
+            let image_properties = frame_state
+                .resource_cache
+                .get_image_properties(self.key);
+
+            match image_properties {
+                Some(image_properties) => {
+                    let is_tiled = image_properties.tiling.is_some();
+
+                    if self.tile_spacing != LayoutSize::zero() && !is_tiled {
+                        self.source = ImageSource::Cache {
+                            // Size in device-pixels we need to allocate in render task cache.
+                            size: image_properties.descriptor.size.to_i32(),
+                            handle: None,
+                        };
                     }
 
-                    let image_cache_key = ImageCacheKey {
-                        request,
-                        texel_rect: None,
-                    };
-                    let target_kind = if descriptor.format.bytes_per_pixel() == 1 {
-                        RenderTargetKind::Alpha
-                    } else {
-                        RenderTargetKind::Color
+                    let mut is_opaque = image_properties.descriptor.is_opaque();
+                    let request = ImageRequest {
+                        key: self.key,
+                        rendering: self.image_rendering,
+                        tile: None,
                     };
 
-                    // Request a pre-rendered image task.
-                    let cached_task_handle = frame_state.resource_cache.request_render_task(
-                        RenderTaskCacheKey {
-                            size,
-                            kind: RenderTaskCacheKeyKind::Image(image_cache_key),
-                        },
-                        frame_state.gpu_cache,
-                        frame_state.rg_builder,
-                        None,
-                        descriptor.is_opaque(),
-                        RenderTaskParent::Surface(parent_surface),
-                        frame_state.surfaces,
-                        |rg_builder| {
-                            // Create a task to blit from the texture cache to
-                            // a normal transient render task surface.
-                            // TODO: figure out if/when we can do a blit instead.
-                            let cache_to_target_task_id = RenderTask::new_scaling_with_padding(
-                                task_id,
-                                rg_builder,
-                                target_kind,
-                                size,
-                                padding,
+                    // Every frame, for cached items, we need to request the render
+                    // task cache item. The closure will be invoked on the first
+                    // time through, and any time the render task output has been
+                    // evicted from the texture cache.
+                    match self.source {
+                        ImageSource::Cache { ref mut size, ref mut handle } => {
+                            let padding = DeviceIntSideOffsets::new(
+                                0,
+                                (self.tile_spacing.width * size.width as f32 / self.stretch_size.width) as i32,
+                                (self.tile_spacing.height * size.height as f32 / self.stretch_size.height) as i32,
+                                0,
                             );
 
-                            // Create a task to blit the rect from the child render
-                            // task above back into the right spot in the persistent
-                            // render target cache.
-                            RenderTask::new_blit(
-                                size,
-                                cache_to_target_task_id,
-                                rg_builder,
-                            )
+                            size.width += padding.horizontal();
+                            size.height += padding.vertical();
+
+                            is_opaque &= padding == DeviceIntSideOffsets::zero();
+
+                            let image_cache_key = ImageCacheKey {
+                                request,
+                                texel_rect: None,
+                            };
+                            let target_kind = if image_properties.descriptor.format.bytes_per_pixel() == 1 {
+                                RenderTargetKind::Alpha
+                            } else {
+                                RenderTargetKind::Color
+                            };
+
+                            // Request a pre-rendered image task.
+                            *handle = Some(frame_state.resource_cache.request_render_task(
+                                RenderTaskCacheKey {
+                                    size: *size,
+                                    kind: RenderTaskCacheKeyKind::Image(image_cache_key),
+                                },
+                                frame_state.gpu_cache,
+                                frame_state.render_tasks,
+                                None,
+                                image_properties.descriptor.is_opaque(),
+                                |render_tasks| {
+                                    // Create a task to blit from the texture cache to
+                                    // a normal transient render task surface. This will
+                                    // copy only the sub-rect, if specified.
+                                    // TODO: figure out if/when we can do a blit instead.
+                                    let cache_to_target_task_id = RenderTask::new_scaling_with_padding(
+                                        BlitSource::Image { key: image_cache_key },
+                                        render_tasks,
+                                        target_kind,
+                                        *size,
+                                        padding,
+                                    );
+
+                                    // Create a task to blit the rect from the child render
+                                    // task above back into the right spot in the persistent
+                                    // render target cache.
+                                    render_tasks.add().init(RenderTask::new_blit(
+                                        *size,
+                                        BlitSource::RenderTask {
+                                            task_id: cache_to_target_task_id,
+                                        },
+                                    ))
+                                }
+                            ));
                         }
-                    );
-
-                    image_instance.src_color = Some(cached_task_handle);
-                }
-            }
-            // Tiled image path.
-            Some(ImageProperties { tiling: Some(tile_size), visible_rect, .. }) => {
-                // we'll  have a source handle per visible tile instead.
-                image_instance.src_color = None;
-
-                image_instance.visible_tiles.clear();
-                // TODO: rename the blob's visible_rect into something that doesn't conflict
-                // with the terminology we use during culling since it's not really the same
-                // thing.
-                let active_rect = visible_rect;
-
-                // Tighten the clip rect because decomposing the repeated image can
-                // produce primitives that are partially covering the original image
-                // rect and we want to clip these extra parts out.
-                let tight_clip_rect = visibility
-                    .combined_local_clip_rect
-                    .intersection(&common.prim_rect).unwrap();
-                image_instance.tight_local_clip_rect = tight_clip_rect;
-
-                let visible_rect = compute_conservative_visible_rect(
-                    &visibility.clip_chain,
-                    frame_state.current_dirty_region().combined,
-                    prim_spatial_node_index,
-                    frame_context.spatial_tree,
-                );
-
-                let base_edge_flags = edge_flags_for_tile_spacing(&self.tile_spacing);
-
-                let stride = self.stretch_size + self.tile_spacing;
-
-                // We are performing the decomposition on the CPU here, no need to
-                // have it in the shader.
-                common.may_need_repetition = false;
-
-                let repetitions = image_tiling::repetitions(
-                    &common.prim_rect,
-                    &visible_rect,
-                    stride,
-                );
-
-                for image_tiling::Repetition { origin, edge_flags } in repetitions {
-                    let edge_flags = base_edge_flags | edge_flags;
-
-                    let layout_image_rect = LayoutRect {
-                        origin,
-                        size: self.stretch_size,
-                    };
+                        ImageSource::Default => {}
+                    }
 
-                    let tiles = image_tiling::tiles(
-                        &layout_image_rect,
-                        &visible_rect,
-                        &active_rect,
-                        tile_size as i32,
-                    );
-
-                    for tile in tiles {
-                        let request = request.with_tile(tile.offset);
-                        let size = frame_state.resource_cache.request_image(
-                            request,
-                            frame_state.gpu_cache,
-                        );
-
-                        let task_id = frame_state.rg_builder.add().init(
-                            RenderTask::new_image(size, request)
-                        );
-
-                        image_instance.visible_tiles.push(VisibleImageTile {
-                            src_color: task_id,
-                            edge_flags: tile.edge_flags & edge_flags,
-                            local_rect: tile.rect,
-                            local_clip_rect: tight_clip_rect,
-                        });
+                    if is_opaque {
+                        PrimitiveOpacity::from_alpha(self.color.a)
+                    } else {
+                        PrimitiveOpacity::translucent()
                     }
                 }
-
-                if image_instance.visible_tiles.is_empty() {
-                    // Mark as invisible
-                    visibility.reset();
+                None => {
+                    PrimitiveOpacity::opaque()
                 }
             }
-            None => {
-                image_instance.src_color = None;
-            }
-        }
-
-        if let Some(mut request) = frame_state.gpu_cache.request(&mut common.gpu_cache_handle) {
-            self.write_prim_gpu_blocks(&mut request);
-        }
+        };
     }
 
     pub fn write_prim_gpu_blocks(&self, request: &mut GpuDataRequest) {
@@ -394,19 +270,6 @@ impl ImageData {
     }
 }
 
-fn edge_flags_for_tile_spacing(tile_spacing: &LayoutSize) -> EdgeAaSegmentMask {
-    let mut flags = EdgeAaSegmentMask::empty();
-
-    if tile_spacing.width > 0.0 {
-        flags |= EdgeAaSegmentMask::LEFT | EdgeAaSegmentMask::RIGHT;
-    }
-    if tile_spacing.height > 0.0 {
-        flags |= EdgeAaSegmentMask::TOP | EdgeAaSegmentMask::BOTTOM;
-    }
-
-    flags
-}
-
 pub type ImageTemplate = PrimTemplate<ImageData>;
 
 impl From<ImageKey> for ImageTemplate {
@@ -426,7 +289,6 @@ impl Internable for Image {
     type Key = ImageKey;
     type StoreData = ImageTemplate;
     type InternData = ();
-    const PROFILE_COUNTER: usize = crate::profiler::INTERNED_IMAGES;
 }
 
 impl InternablePrimitive for Image {
@@ -446,10 +308,10 @@ impl InternablePrimitive for Image {
         // TODO(gw): Refactor this to not need a separate image
         //           instance (see ImageInstance struct).
         let image_instance_index = prim_store.images.push(ImageInstance {
+            opacity_binding_index: OpacityBindingIndex::INVALID,
             segment_instance_index: SegmentInstanceIndex::INVALID,
             tight_local_clip_rect: LayoutRect::zero(),
             visible_tiles: Vec::new(),
-            src_color: None,
         });
 
         PrimitiveInstanceKind::Image {
@@ -461,12 +323,7 @@ impl InternablePrimitive for Image {
 }
 
 impl CreateShadow for Image {
-    fn create_shadow(
-        &self,
-        shadow: &Shadow,
-        _: bool,
-        _: RasterSpace,
-    ) -> Self {
+    fn create_shadow(&self, shadow: &Shadow) -> Self {
         Image {
             tile_spacing: self.tile_spacing,
             stretch_size: self.stretch_size,
@@ -520,7 +377,6 @@ impl InternDebug for YuvImageKey {}
 pub struct YuvImageData {
     pub color_depth: ColorDepth,
     pub yuv_key: [ApiImageKey; 3],
-    pub src_yuv: [Option<RenderTaskId>; 3],
     pub format: YuvFormat,
     pub color_space: YuvColorSpace,
     pub color_range: ColorRange,
@@ -532,7 +388,6 @@ impl From<YuvImage> for YuvImageData {
         YuvImageData {
             color_depth: image.color_depth,
             yuv_key: image.yuv_key,
-            src_yuv: [None, None, None],
             format: image.format,
             color_space: image.color_space,
             color_range: image.color_range,
@@ -551,30 +406,6 @@ impl YuvImageData {
         common: &mut PrimTemplateCommonData,
         frame_state: &mut FrameBuildingState,
     ) {
-
-        self.src_yuv = [ None, None, None ];
-
-        let channel_num = self.format.get_plane_num();
-        debug_assert!(channel_num <= 3);
-        for channel in 0 .. channel_num {
-            let request = ImageRequest {
-                key: self.yuv_key[channel],
-                rendering: self.image_rendering,
-                tile: None,
-            };
-
-            let size = frame_state.resource_cache.request_image(
-                request,
-                frame_state.gpu_cache,
-            );
-
-            let task_id = frame_state.rg_builder.add().init(
-                RenderTask::new_image(size, request)
-            );
-
-            self.src_yuv[channel] = Some(task_id);
-        }
-
         if let Some(mut request) = frame_state.gpu_cache.request(&mut common.gpu_cache_handle) {
             self.write_prim_gpu_blocks(&mut request);
         };
@@ -631,7 +462,6 @@ impl Internable for YuvImage {
     type Key = YuvImageKey;
     type StoreData = YuvImageTemplate;
     type InternData = ();
-    const PROFILE_COUNTER: usize = crate::profiler::INTERNED_YUV_IMAGES;
 }
 
 impl InternablePrimitive for YuvImage {
@@ -673,9 +503,9 @@ fn test_struct_sizes() {
     // (b) You made a structure larger. This is not necessarily a problem, but should only
     //     be done with care, and after checking if talos performance regresses badly.
     assert_eq!(mem::size_of::<Image>(), 32, "Image size changed");
-    assert_eq!(mem::size_of::<ImageTemplate>(), 72, "ImageTemplate size changed");
+    assert_eq!(mem::size_of::<ImageTemplate>(), 92, "ImageTemplate size changed");
     assert_eq!(mem::size_of::<ImageKey>(), 52, "ImageKey size changed");
     assert_eq!(mem::size_of::<YuvImage>(), 32, "YuvImage size changed");
-    assert_eq!(mem::size_of::<YuvImageTemplate>(), 72, "YuvImageTemplate size changed");
+    assert_eq!(mem::size_of::<YuvImageTemplate>(), 60, "YuvImageTemplate size changed");
     assert_eq!(mem::size_of::<YuvImageKey>(), 52, "YuvImageKey size changed");
 }
diff --git a/third_party/webrender/webrender/src/prim_store/line_dec.rs b/third_party/webrender/webrender/src/prim_store/line_dec.rs
index 496bab75691..84537454533 100644
--- a/third_party/webrender/webrender/src/prim_store/line_dec.rs
+++ b/third_party/webrender/webrender/src/prim_store/line_dec.rs
@@ -3,10 +3,10 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{
-    ColorF, ColorU, RasterSpace,
+    ColorF, ColorU,
     LineOrientation, LineStyle, PremultipliedColorF, Shadow,
 };
-use api::units::*;
+use api::units::{Au, LayoutSizeAu, LayoutVector2D};
 use crate::scene_building::{CreateShadow, IsVisible};
 use crate::frame_builder::{FrameBuildingState};
 use crate::gpu_cache::GpuDataRequest;
@@ -126,7 +126,6 @@ impl intern::Internable for LineDecoration {
     type Key = LineDecorationKey;
     type StoreData = LineDecorationTemplate;
     type InternData = ();
-    const PROFILE_COUNTER: usize = crate::profiler::INTERNED_LINE_DECORATIONS;
 }
 
 impl InternablePrimitive for LineDecoration {
@@ -148,18 +147,13 @@ impl InternablePrimitive for LineDecoration {
     ) -> PrimitiveInstanceKind {
         PrimitiveInstanceKind::LineDecoration {
             data_handle,
-            render_task: None,
+            cache_handle: None,
         }
     }
 }
 
 impl CreateShadow for LineDecoration {
-    fn create_shadow(
-        &self,
-        shadow: &Shadow,
-        _: bool,
-        _: RasterSpace,
-    ) -> Self {
+    fn create_shadow(&self, shadow: &Shadow) -> Self {
         LineDecoration {
             color: shadow.color.into(),
             cache_key: self.cache_key.clone(),
@@ -173,74 +167,6 @@ impl IsVisible for LineDecoration {
     }
 }
 
-/// Choose the decoration mask tile size for a given line.
-///
-/// Given a line with overall size `rect_size` and the given `orientation`,
-/// return the dimensions of a single mask tile for the decoration pattern
-/// described by `style` and `wavy_line_thickness`.
-///
-/// If `style` is `Solid`, no mask tile is necessary; return `None`. The other
-/// styles each have their own characteristic periods of repetition, so for each
-/// one, this function returns a `LayoutSize` with the right aspect ratio and
-/// whose specific size is convenient for the `cs_line_decoration.glsl` fragment
-/// shader to work with. The shader uses a local coordinate space in which the
-/// tile fills a rectangle with one corner at the origin, and with the size this
-/// function returns.
-///
-/// The returned size is not necessarily in pixels; device scaling and other
-/// concerns can still affect the actual task size.
-///
-/// Regardless of whether `orientation` is `Vertical` or `Horizontal`, the
-/// `width` and `height` of the returned size are always horizontal and
-/// vertical, respectively.
-pub fn get_line_decoration_size(
-    rect_size: &LayoutSize,
-    orientation: LineOrientation,
-    style: LineStyle,
-    wavy_line_thickness: f32,
-) -> Option<LayoutSize> {
-    let h = match orientation {
-        LineOrientation::Horizontal => rect_size.height,
-        LineOrientation::Vertical => rect_size.width,
-    };
-
-    // TODO(gw): The formulae below are based on the existing gecko and line
-    //           shader code. They give reasonable results for most inputs,
-    //           but could definitely do with a detailed pass to get better
-    //           quality on a wider range of inputs!
-    //           See nsCSSRendering::PaintDecorationLine in Gecko.
-
-    let (parallel, perpendicular) = match style {
-        LineStyle::Solid => {
-            return None;
-        }
-        LineStyle::Dashed => {
-            let dash_length = (3.0 * h).min(64.0).max(1.0);
-
-            (2.0 * dash_length, 4.0)
-        }
-        LineStyle::Dotted => {
-            let diameter = h.min(64.0).max(1.0);
-            let period = 2.0 * diameter;
-
-            (period, diameter)
-        }
-        LineStyle::Wavy => {
-            let line_thickness = wavy_line_thickness.max(1.0);
-            let slope_length = h - line_thickness;
-            let flat_length = ((line_thickness - 1.0) * 2.0).max(1.0);
-            let approx_period = 2.0 * (slope_length + flat_length);
-
-            (approx_period, h)
-        }
-    };
-
-    Some(match orientation {
-        LineOrientation::Horizontal => LayoutSize::new(parallel, perpendicular),
-        LineOrientation::Vertical => LayoutSize::new(perpendicular, parallel),
-    })
-}
-
 #[test]
 #[cfg(target_pointer_width = "64")]
 fn test_struct_sizes() {
diff --git a/third_party/webrender/webrender/src/prim_store/mod.rs b/third_party/webrender/webrender/src/prim_store/mod.rs
index bc1db95d568..0a1514a9b35 100644
--- a/third_party/webrender/webrender/src/prim_store/mod.rs
+++ b/third_party/webrender/webrender/src/prim_store/mod.rs
@@ -2,35 +2,60 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{BorderRadius, ClipMode, ColorF, ColorU, RasterSpace};
+use api::{BorderRadius, ClipMode, ColorF, ColorU};
 use api::{ImageRendering, RepeatMode, PrimitiveFlags};
-use api::{PremultipliedColorF, PropertyBinding, Shadow};
-use api::{PrimitiveKeyKind, FillRule, POLYGON_CLIP_VERTEX_MAX};
+use api::{PremultipliedColorF, PropertyBinding, Shadow, GradientStop};
+use api::{BoxShadowClipMode, LineStyle, LineOrientation, BorderStyle};
+use api::{PrimitiveKeyKind, ExtendMode, EdgeAaSegmentMask};
+use api::image_tiling::{self, Repetition};
 use api::units::*;
-use euclid::{SideOffsets2D, Size2D};
-use malloc_size_of::MallocSizeOf;
-use crate::segment::EdgeAaSegmentMask;
+use crate::border::{get_max_scale_for_border, build_border_instances};
 use crate::border::BorderSegmentCacheKey;
-use crate::clip::{ClipChainId, ClipSet};
-use crate::debug_item::DebugItem;
+use crate::clip::{ClipStore};
+use crate::spatial_tree::{ROOT_SPATIAL_NODE_INDEX, SpatialTree, CoordinateSpaceMapping, SpatialNodeIndex, VisibleFace};
+use crate::clip::{ClipDataStore, ClipNodeFlags, ClipChainId, ClipChainInstance, ClipItemKind};
+use crate::debug_colors;
+use crate::debug_render::DebugItem;
 use crate::scene_building::{CreateShadow, IsVisible};
-use crate::frame_builder::FrameBuildingState;
+use euclid::{SideOffsets2D, Transform3D, Rect, Scale, Size2D, Point2D, Vector2D};
+use euclid::approxeq::ApproxEq;
+use crate::frame_builder::{FrameBuildingContext, FrameBuildingState, PictureContext, PictureState};
+use crate::frame_builder::{FrameVisibilityContext, FrameVisibilityState};
 use crate::glyph_rasterizer::GlyphKey;
-use crate::gpu_cache::{GpuCacheAddress, GpuCacheHandle, GpuDataRequest};
+use crate::gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle, GpuDataRequest, ToGpuBlocks};
 use crate::gpu_types::{BrushFlags};
 use crate::intern;
-use crate::picture::PicturePrimitive;
+use crate::internal_types::PlaneSplitAnchor;
+use malloc_size_of::MallocSizeOf;
+use crate::picture::{PictureCompositeMode, PicturePrimitive, ClusterFlags, TileCacheLogger};
+use crate::picture::{PrimitiveList, RecordedDirtyRegion, SurfaceIndex, RetainedTiles, RasterConfig};
+use crate::prim_store::backdrop::BackdropDataHandle;
+use crate::prim_store::borders::{ImageBorderDataHandle, NormalBorderDataHandle};
+use crate::prim_store::gradient::{GRADIENT_FP_STOPS, GradientCacheKey, GradientStopKey};
+use crate::prim_store::gradient::{LinearGradientPrimitive, LinearGradientDataHandle, RadialGradientDataHandle, ConicGradientDataHandle};
+use crate::prim_store::image::{ImageDataHandle, ImageInstance, VisibleImageTile, YuvImageDataHandle};
+use crate::prim_store::line_dec::{LineDecorationDataHandle,MAX_LINE_DECORATION_RESOLUTION};
+use crate::prim_store::picture::PictureDataHandle;
+use crate::prim_store::text_run::{TextRunDataHandle, TextRunPrimitive};
 #[cfg(debug_assertions)]
 use crate::render_backend::{FrameId};
+use crate::render_backend::DataStores;
 use crate::render_task_graph::RenderTaskId;
-use crate::resource_cache::ImageProperties;
+use crate::render_task_cache::{RenderTaskCacheKeyKind, RenderTaskCacheEntryHandle, RenderTaskCacheKey, to_cache_size};
+use crate::render_task::RenderTask;
+use crate::renderer::{MAX_VERTEX_TEXTURE_WIDTH};
+use crate::resource_cache::{ImageProperties, ImageRequest};
 use crate::scene::SceneProperties;
-use std::{hash, ops, u32, usize};
+use crate::segment::SegmentBuilder;
+use std::{cmp, fmt, hash, ops, u32, usize, mem};
 #[cfg(debug_assertions)]
 use std::sync::atomic::{AtomicUsize, Ordering};
-use crate::util::Recycler;
-use crate::internal_types::LayoutPrimitiveInfo;
-use crate::visibility::PrimitiveVisibility;
+use crate::storage;
+use crate::texture_cache::TEXTURE_REGION_DIMENSIONS;
+use crate::util::{MatrixHelpers, MaxRect, Recycler, ScaleOffset, RectHelpers, PointHelpers};
+use crate::util::{clamp_to_scale_factor, pack_as_float, project_rect, raster_rect_to_device_pixels};
+use crate::internal_types::{LayoutPrimitiveInfo, Filter};
+use smallvec::SmallVec;
 
 pub mod backdrop;
 pub mod borders;
@@ -41,18 +66,6 @@ pub mod picture;
 pub mod text_run;
 pub mod interned;
 
-mod storage;
-
-use backdrop::BackdropDataHandle;
-use borders::{ImageBorderDataHandle, NormalBorderDataHandle};
-use gradient::{LinearGradientPrimitive, LinearGradientDataHandle, RadialGradientDataHandle, ConicGradientDataHandle};
-use image::{ImageDataHandle, ImageInstance, YuvImageDataHandle};
-use line_dec::LineDecorationDataHandle;
-use picture::PictureDataHandle;
-use text_run::{TextRunDataHandle, TextRunPrimitive};
-
-pub const VECS_PER_SEGMENT: usize = 2;
-
 /// Counter for unique primitive IDs for debug tracing.
 #[cfg(debug_assertions)]
 static NEXT_PRIM_ID: AtomicUsize = AtomicUsize::new(0);
@@ -69,6 +82,11 @@ pub fn register_prim_chase_id(id: PrimitiveDebugId) {
 pub fn register_prim_chase_id(_: PrimitiveDebugId) {
 }
 
+const MIN_BRUSH_SPLIT_AREA: f32 = 128.0 * 128.0;
+pub const VECS_PER_SEGMENT: usize = 2;
+
+const MAX_MASK_SIZE: f32 = 4096.0;
+
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 #[derive(Debug, Copy, Clone, MallocSizeOf)]
@@ -98,6 +116,234 @@ impl PrimitiveOpacity {
     }
 }
 
+#[derive(Clone, Debug)]
+pub struct SpaceSnapper {
+    pub ref_spatial_node_index: SpatialNodeIndex,
+    current_target_spatial_node_index: SpatialNodeIndex,
+    snapping_transform: Option<ScaleOffset>,
+    pub device_pixel_scale: DevicePixelScale,
+}
+
+impl SpaceSnapper {
+    pub fn new(
+        ref_spatial_node_index: SpatialNodeIndex,
+        device_pixel_scale: DevicePixelScale,
+    ) -> Self {
+        SpaceSnapper {
+            ref_spatial_node_index,
+            current_target_spatial_node_index: SpatialNodeIndex::INVALID,
+            snapping_transform: None,
+            device_pixel_scale,
+        }
+    }
+
+    pub fn new_with_target(
+        ref_spatial_node_index: SpatialNodeIndex,
+        target_node_index: SpatialNodeIndex,
+        device_pixel_scale: DevicePixelScale,
+        spatial_tree: &SpatialTree,
+    ) -> Self {
+        let mut snapper = SpaceSnapper {
+            ref_spatial_node_index,
+            current_target_spatial_node_index: SpatialNodeIndex::INVALID,
+            snapping_transform: None,
+            device_pixel_scale,
+        };
+
+        snapper.set_target_spatial_node(target_node_index, spatial_tree);
+        snapper
+    }
+
+    pub fn set_target_spatial_node(
+        &mut self,
+        target_node_index: SpatialNodeIndex,
+        spatial_tree: &SpatialTree,
+    ) {
+        if target_node_index == self.current_target_spatial_node_index {
+            return
+        }
+
+        let ref_spatial_node = &spatial_tree.spatial_nodes[self.ref_spatial_node_index.0 as usize];
+        let target_spatial_node = &spatial_tree.spatial_nodes[target_node_index.0 as usize];
+
+        self.current_target_spatial_node_index = target_node_index;
+        self.snapping_transform = match (ref_spatial_node.snapping_transform, target_spatial_node.snapping_transform) {
+            (Some(ref ref_scale_offset), Some(ref target_scale_offset)) => {
+                Some(ref_scale_offset
+                    .inverse()
+                    .accumulate(target_scale_offset)
+                    .scale(self.device_pixel_scale.0))
+            }
+            _ => None,
+        };
+    }
+
+    pub fn snap_rect<F>(&self, rect: &Rect<f32, F>) -> Rect<f32, F> where F: fmt::Debug {
+        debug_assert!(self.current_target_spatial_node_index != SpatialNodeIndex::INVALID);
+        match self.snapping_transform {
+            Some(ref scale_offset) => {
+                let snapped_device_rect : DeviceRect = scale_offset.map_rect(rect).snap();
+                scale_offset.unmap_rect(&snapped_device_rect)
+            }
+            None => *rect,
+        }
+    }
+
+    pub fn snap_point<F>(&self, point: &Point2D<f32, F>) -> Point2D<f32, F> where F: fmt::Debug {
+        debug_assert!(self.current_target_spatial_node_index != SpatialNodeIndex::INVALID);
+        match self.snapping_transform {
+            Some(ref scale_offset) => {
+                let snapped_device_vector : DevicePoint = scale_offset.map_point(point).snap();
+                scale_offset.unmap_point(&snapped_device_vector)
+            }
+            None => *point,
+        }
+    }
+
+    pub fn snap_size<F>(&self, size: &Size2D<f32, F>) -> Size2D<f32, F> where F: fmt::Debug {
+        debug_assert!(self.current_target_spatial_node_index != SpatialNodeIndex::INVALID);
+        match self.snapping_transform {
+            Some(ref scale_offset) => {
+                let rect = Rect::<f32, F>::new(Point2D::<f32, F>::zero(), *size);
+                let snapped_device_rect : DeviceRect = scale_offset.map_rect(&rect).snap();
+                scale_offset.unmap_rect(&snapped_device_rect).size
+            }
+            None => *size,
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct SpaceMapper<F, T> {
+    kind: CoordinateSpaceMapping<F, T>,
+    pub ref_spatial_node_index: SpatialNodeIndex,
+    pub current_target_spatial_node_index: SpatialNodeIndex,
+    pub bounds: Rect<f32, T>,
+    visible_face: VisibleFace,
+}
+
+impl<F, T> SpaceMapper<F, T> where F: fmt::Debug {
+    pub fn new(
+        ref_spatial_node_index: SpatialNodeIndex,
+        bounds: Rect<f32, T>,
+    ) -> Self {
+        SpaceMapper {
+            kind: CoordinateSpaceMapping::Local,
+            ref_spatial_node_index,
+            current_target_spatial_node_index: ref_spatial_node_index,
+            bounds,
+            visible_face: VisibleFace::Front,
+        }
+    }
+
+    pub fn new_with_target(
+        ref_spatial_node_index: SpatialNodeIndex,
+        target_node_index: SpatialNodeIndex,
+        bounds: Rect<f32, T>,
+        spatial_tree: &SpatialTree,
+    ) -> Self {
+        let mut mapper = Self::new(ref_spatial_node_index, bounds);
+        mapper.set_target_spatial_node(target_node_index, spatial_tree);
+        mapper
+    }
+
+    pub fn set_target_spatial_node(
+        &mut self,
+        target_node_index: SpatialNodeIndex,
+        spatial_tree: &SpatialTree,
+    ) {
+        if target_node_index == self.current_target_spatial_node_index {
+            return
+        }
+
+        let ref_spatial_node = &spatial_tree.spatial_nodes[self.ref_spatial_node_index.0 as usize];
+        let target_spatial_node = &spatial_tree.spatial_nodes[target_node_index.0 as usize];
+
+        self.kind = if self.ref_spatial_node_index == target_node_index {
+            CoordinateSpaceMapping::Local
+        } else if ref_spatial_node.coordinate_system_id == target_spatial_node.coordinate_system_id {
+            let scale_offset = ref_spatial_node.content_transform
+                .inverse()
+                .accumulate(&target_spatial_node.content_transform);
+            CoordinateSpaceMapping::ScaleOffset(scale_offset)
+        } else {
+            let transform = spatial_tree
+                .get_relative_transform(target_node_index, self.ref_spatial_node_index)
+                .into_transform()
+                .with_source::<F>()
+                .with_destination::<T>();
+            CoordinateSpaceMapping::Transform(transform)
+        };
+
+        self.visible_face = self.kind.visible_face();
+        self.current_target_spatial_node_index = target_node_index;
+    }
+
+    pub fn get_transform(&self) -> Transform3D<f32, F, T> {
+        match self.kind {
+            CoordinateSpaceMapping::Local => {
+                Transform3D::identity()
+            }
+            CoordinateSpaceMapping::ScaleOffset(ref scale_offset) => {
+                scale_offset.to_transform()
+            }
+            CoordinateSpaceMapping::Transform(transform) => {
+                transform
+            }
+        }
+    }
+
+    pub fn unmap(&self, rect: &Rect<f32, T>) -> Option<Rect<f32, F>> {
+        match self.kind {
+            CoordinateSpaceMapping::Local => {
+                Some(rect.cast_unit())
+            }
+            CoordinateSpaceMapping::ScaleOffset(ref scale_offset) => {
+                Some(scale_offset.unmap_rect(rect))
+            }
+            CoordinateSpaceMapping::Transform(ref transform) => {
+                transform.inverse_rect_footprint(rect)
+            }
+        }
+    }
+
+    pub fn map(&self, rect: &Rect<f32, F>) -> Option<Rect<f32, T>> {
+        match self.kind {
+            CoordinateSpaceMapping::Local => {
+                Some(rect.cast_unit())
+            }
+            CoordinateSpaceMapping::ScaleOffset(ref scale_offset) => {
+                Some(scale_offset.map_rect(rect))
+            }
+            CoordinateSpaceMapping::Transform(ref transform) => {
+                match project_rect(transform, rect, &self.bounds) {
+                    Some(bounds) => {
+                        Some(bounds)
+                    }
+                    None => {
+                        warn!("parent relative transform can't transform the primitive rect for {:?}", rect);
+                        None
+                    }
+                }
+            }
+        }
+    }
+
+    pub fn map_vector(&self, v: Vector2D<f32, F>) -> Vector2D<f32, T> {
+        match self.kind {
+            CoordinateSpaceMapping::Local => {
+                v.cast_unit()
+            }
+            CoordinateSpaceMapping::ScaleOffset(ref scale_offset) => {
+                scale_offset.map_vector(&v)
+            }
+            CoordinateSpaceMapping::Transform(ref transform) => {
+                transform.transform_vector2d(v)
+            }
+        }
+    }
+}
+
 /// For external images, it's not possible to know the
 /// UV coords of the image (or the image data itself)
 /// until the render thread receives the frame and issues
@@ -129,6 +375,21 @@ impl ClipTaskIndex {
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct PictureIndex(pub usize);
 
+impl GpuCacheHandle {
+    pub fn as_int(self, gpu_cache: &GpuCache) -> i32 {
+        gpu_cache.get_address(&self).as_int()
+    }
+}
+
+impl GpuCacheAddress {
+    pub fn as_int(self) -> i32 {
+        // TODO(gw): Temporarily encode GPU Cache addresses as a single int.
+        //           In the future, we can change the PrimitiveInstanceData struct
+        //           to use 2x u16 for the vertex attribute instead of an i32.
+        self.v as i32 * MAX_VERTEX_TEXTURE_WIDTH as i32 + self.u as i32
+    }
+}
+
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 #[derive(Copy, Debug, Clone, MallocSizeOf, PartialEq)]
@@ -210,46 +471,6 @@ impl From<WorldRect> for RectangleKey {
     }
 }
 
-/// To create a fixed-size representation of a polygon, we use a fixed
-/// number of points. Our initialization method restricts us to values
-/// <= 32. If our constant POLYGON_CLIP_VERTEX_MAX is > 32, the Rust
-/// compiler will complain.
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-#[derive(Copy, Debug, Clone, Hash, MallocSizeOf, PartialEq)]
-pub struct PolygonKey {
-    pub point_count: u8,
-    pub points: [PointKey; POLYGON_CLIP_VERTEX_MAX],
-    pub fill_rule: FillRule,
-}
-
-impl PolygonKey {
-    pub fn new(
-        points_layout: &Vec<LayoutPoint>,
-        fill_rule: FillRule,
-    ) -> Self {
-        // We have to fill fixed-size arrays with data from a Vec.
-        // We'll do this by initializing the arrays to known-good
-        // values then overwriting those values as long as our
-        // iterator provides values.
-        let mut points: [PointKey; POLYGON_CLIP_VERTEX_MAX] = [PointKey { x: 0.0, y: 0.0}; POLYGON_CLIP_VERTEX_MAX];
-
-        let mut point_count: u8 = 0;
-        for (src, dest) in points_layout.iter().zip(points.iter_mut()) {
-            *dest = (*src as LayoutPoint).into();
-            point_count = point_count + 1;
-        }
-
-        PolygonKey {
-            point_count,
-            points,
-            fill_rule,
-        }
-    }
-}
-
-impl Eq for PolygonKey {}
-
 /// A hashable SideOffset2D that can be used in primitive keys.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
@@ -426,21 +647,6 @@ impl From<WorldPoint> for PointKey {
     }
 }
 
-/// A hashable float for using as a key during primitive interning.
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-#[derive(Debug, Copy, Clone, MallocSizeOf, PartialEq)]
-pub struct FloatKey(f32);
-
-impl Eq for FloatKey {}
-
-impl hash::Hash for FloatKey {
-    fn hash<H: hash::Hasher>(&self, state: &mut H) {
-        self.0.to_bits().hash(state);
-    }
-}
-
-
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 #[derive(Debug, Clone, Eq, MallocSizeOf, PartialEq, Hash)]
@@ -500,25 +706,6 @@ pub enum PrimitiveTemplateKind {
     Clear,
 }
 
-impl PrimitiveTemplateKind {
-    /// Write any GPU blocks for the primitive template to the given request object.
-    pub fn write_prim_gpu_blocks(
-        &self,
-        request: &mut GpuDataRequest,
-        scene_properties: &SceneProperties,
-    ) {
-        match *self {
-            PrimitiveTemplateKind::Clear => {
-                // Opaque black with operator dest out
-                request.push(PremultipliedColorF::BLACK);
-            }
-            PrimitiveTemplateKind::Rectangle { ref color, .. } => {
-                request.push(scene_properties.resolve_color(color).premultiplied())
-            }
-        }
-    }
-}
-
 /// Construct the primitive template data from a primitive key. This
 /// is invoked when a primitive key is created and the interner
 /// doesn't currently contain a primitive with this key.
@@ -540,7 +727,6 @@ impl From<PrimitiveKeyKind> for PrimitiveTemplateKind {
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 #[derive(MallocSizeOf)]
-#[derive(Debug)]
 pub struct PrimTemplateCommonData {
     pub flags: PrimitiveFlags,
     pub may_need_repetition: bool,
@@ -603,6 +789,25 @@ impl From<PrimitiveKey> for PrimitiveTemplate {
     }
 }
 
+impl PrimitiveTemplateKind {
+    /// Write any GPU blocks for the primitive template to the given request object.
+    fn write_prim_gpu_blocks(
+        &self,
+        request: &mut GpuDataRequest,
+        scene_properties: &SceneProperties,
+    ) {
+        match *self {
+            PrimitiveTemplateKind::Clear => {
+                // Opaque black with operator dest out
+                request.push(PremultipliedColorF::BLACK);
+            }
+            PrimitiveTemplateKind::Rectangle { ref color, .. } => {
+                request.push(scene_properties.resolve_color(color).premultiplied())
+            }
+        }
+    }
+}
+
 impl PrimitiveTemplate {
     /// Update the GPU cache for a given primitive template. This may be called multiple
     /// times per frame, by each primitive reference that refers to this interned
@@ -634,7 +839,6 @@ impl intern::Internable for PrimitiveKeyKind {
     type Key = PrimitiveKey;
     type StoreData = PrimitiveTemplate;
     type InternData = ();
-    const PROFILE_COUNTER: usize = crate::profiler::INTERNED_PRIMITIVES;
 }
 
 impl InternablePrimitive for PrimitiveKeyKind {
@@ -666,6 +870,7 @@ impl InternablePrimitive for PrimitiveKeyKind {
                 };
                 PrimitiveInstanceKind::Rectangle {
                     data_handle,
+                    opacity_binding_index: OpacityBindingIndex::INVALID,
                     segment_instance_index: SegmentInstanceIndex::INVALID,
                     color_binding_index,
                 }
@@ -674,6 +879,44 @@ impl InternablePrimitive for PrimitiveKeyKind {
     }
 }
 
+// Maintains a list of opacity bindings that have been collapsed into
+// the color of a single primitive. This is an important optimization
+// that avoids allocating an intermediate surface for most common
+// uses of opacity filters.
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub struct OpacityBinding {
+    pub bindings: Vec<PropertyBinding<f32>>,
+    pub current: f32,
+}
+
+impl OpacityBinding {
+    pub fn new() -> OpacityBinding {
+        OpacityBinding {
+            bindings: Vec::new(),
+            current: 1.0,
+        }
+    }
+
+    // Add a new opacity value / binding to the list
+    pub fn push(&mut self, binding: PropertyBinding<f32>) {
+        self.bindings.push(binding);
+    }
+
+    // Resolve the current value of each opacity binding, and
+    // store that as a single combined opacity.
+    pub fn update(&mut self, scene_properties: &SceneProperties) {
+        let mut new_opacity = 1.0;
+
+        for binding in &self.bindings {
+            let opacity = scene_properties.resolve_float(binding);
+            new_opacity = new_opacity * opacity;
+        }
+
+        self.current = new_opacity;
+    }
+}
+
 #[derive(Debug, MallocSizeOf)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
@@ -690,6 +933,13 @@ pub struct VisibleGradientTile {
     pub local_clip_rect: LayoutRect,
 }
 
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub struct CachedGradientSegment {
+    pub handle: RenderTaskCacheEntryHandle,
+    pub local_rect: LayoutRect,
+}
+
 /// Information about how to cache a border segment,
 /// along with the current render task cache entry.
 #[cfg_attr(feature = "capture", derive(Serialize))]
@@ -739,21 +989,92 @@ impl BrushSegment {
             brush_flags,
         }
     }
+
+    /// Write out to the clip mask instances array the correct clip mask
+    /// config for this segment.
+    pub fn update_clip_task(
+        &self,
+        clip_chain: Option<&ClipChainInstance>,
+        prim_bounding_rect: WorldRect,
+        root_spatial_node_index: SpatialNodeIndex,
+        surface_index: SurfaceIndex,
+        pic_state: &mut PictureState,
+        frame_context: &FrameBuildingContext,
+        frame_state: &mut FrameBuildingState,
+        clip_data_store: &mut ClipDataStore,
+        unclipped: &DeviceRect,
+        device_pixel_scale: DevicePixelScale,
+    ) -> ClipMaskKind {
+        match clip_chain {
+            Some(clip_chain) => {
+                if !clip_chain.needs_mask ||
+                   (!self.may_need_clip_mask && !clip_chain.has_non_local_clips) {
+                    return ClipMaskKind::None;
+                }
+
+                let segment_world_rect = match pic_state.map_pic_to_world.map(&clip_chain.pic_clip_rect) {
+                    Some(rect) => rect,
+                    None => return ClipMaskKind::Clipped,
+                };
+
+                let segment_world_rect = match segment_world_rect.intersection(&prim_bounding_rect) {
+                    Some(rect) => rect,
+                    None => return ClipMaskKind::Clipped,
+                };
+
+                // Get a minimal device space rect, clipped to the screen that we
+                // need to allocate for the clip mask, as well as interpolated
+                // snap offsets.
+                let device_rect = match get_clipped_device_rect(
+                    unclipped,
+                    &pic_state.map_raster_to_world,
+                    segment_world_rect,
+                    device_pixel_scale,
+                ) {
+                    Some(info) => info,
+                    None => {
+                        return ClipMaskKind::Clipped;
+                    }
+                };
+
+                let (device_rect, device_pixel_scale) = adjust_mask_scale_for_max_size(device_rect, device_pixel_scale);
+
+                let clip_task_id = RenderTask::new_mask(
+                    device_rect.to_i32(),
+                    clip_chain.clips_range,
+                    root_spatial_node_index,
+                    frame_state.clip_store,
+                    frame_state.gpu_cache,
+                    frame_state.resource_cache,
+                    frame_state.render_tasks,
+                    clip_data_store,
+                    device_pixel_scale,
+                    frame_context.fb_config,
+                );
+                let port = frame_state
+                    .surfaces[surface_index.0]
+                    .render_tasks
+                    .unwrap_or_else(|| panic!("bug: no task for surface {:?}", surface_index))
+                    .port;
+                frame_state.render_tasks.add_dependency(port, clip_task_id);
+                ClipMaskKind::Mask(clip_task_id)
+            }
+            None => {
+                ClipMaskKind::Clipped
+            }
+        }
+    }
 }
 
-#[derive(Debug, Clone)]
+#[derive(Debug)]
 #[repr(C)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
 struct ClipRect {
     rect: LayoutRect,
     mode: f32,
 }
 
-#[derive(Debug, Clone)]
+#[derive(Debug)]
 #[repr(C)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
 struct ClipCorner {
     rect: LayoutRect,
     outer_radius_x: f32,
@@ -762,7 +1083,23 @@ struct ClipCorner {
     inner_radius_y: f32,
 }
 
+impl ToGpuBlocks for ClipCorner {
+    fn write_gpu_blocks(&self, mut request: GpuDataRequest) {
+        self.write(&mut request)
+    }
+}
+
 impl ClipCorner {
+    fn write(&self, request: &mut GpuDataRequest) {
+        request.push(self.rect);
+        request.push([
+            self.outer_radius_x,
+            self.outer_radius_y,
+            self.inner_radius_x,
+            self.inner_radius_y,
+        ]);
+    }
+
     fn uniform(rect: LayoutRect, outer_radius: f32, inner_radius: f32) -> ClipCorner {
         ClipCorner {
             rect,
@@ -774,10 +1111,25 @@ impl ClipCorner {
     }
 }
 
-#[derive(Debug, Clone)]
+#[derive(Debug)]
 #[repr(C)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct ImageMaskData {
+    /// The local size of the whole masked area.
+    pub local_mask_size: LayoutSize,
+}
+
+impl ToGpuBlocks for ImageMaskData {
+    fn write_gpu_blocks(&self, mut request: GpuDataRequest) {
+        request.push([
+            self.local_mask_size.width,
+            self.local_mask_size.height,
+            0.0,
+            0.0,
+        ]);
+    }
+}
+
+#[derive(Debug)]
 pub struct ClipData {
     rect: ClipRect,
     top_left: ClipCorner,
@@ -906,6 +1258,19 @@ impl ClipData {
             ),
         }
     }
+
+    pub fn write(&self, request: &mut GpuDataRequest) {
+        request.push(self.rect.rect);
+        request.push([self.rect.mode, 0.0, 0.0, 0.0]);
+        for corner in &[
+            &self.top_left,
+            &self.top_right,
+            &self.bottom_left,
+            &self.bottom_right,
+        ] {
+            corner.write(request);
+        }
+    }
 }
 
 /// A hashable descriptor for nine-patches, used by image and
@@ -954,8 +1319,6 @@ impl CreateShadow for PrimitiveKeyKind {
     fn create_shadow(
         &self,
         shadow: &Shadow,
-        _: bool,
-        _: RasterSpace,
     ) -> PrimitiveKeyKind {
         match *self {
             PrimitiveKeyKind::Rectangle { .. } => {
@@ -975,7 +1338,7 @@ impl CreateShadow for PrimitiveKeyKind {
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct PrimitiveDebugId(pub usize);
 
-#[derive(Debug)]
+#[derive(Clone, Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 pub enum PrimitiveInstanceKind {
     /// Direct reference to a Picture
@@ -1005,12 +1368,12 @@ pub enum PrimitiveInstanceKind {
         //           the things we store here in the instance, and
         //           use them directly. This will remove cache_handle,
         //           but also the opacity, clip_task_id etc below.
-        render_task: Option<RenderTaskId>,
+        cache_handle: Option<RenderTaskCacheEntryHandle>,
     },
     NormalBorder {
         /// Handle to the common interned data for this primitive.
         data_handle: NormalBorderDataHandle,
-        render_task_ids: storage::Range<RenderTaskId>,
+        cache_handles: storage::Range<RenderTaskCacheEntryHandle>,
     },
     ImageBorder {
         /// Handle to the common interned data for this primitive.
@@ -1019,6 +1382,7 @@ pub enum PrimitiveInstanceKind {
     Rectangle {
         /// Handle to the common interned data for this primitive.
         data_handle: PrimitiveDataHandle,
+        opacity_binding_index: OpacityBindingIndex,
         segment_instance_index: SegmentInstanceIndex,
         color_binding_index: ColorBindingIndex,
     },
@@ -1034,19 +1398,10 @@ pub enum PrimitiveInstanceKind {
         image_instance_index: ImageInstanceIndex,
         is_compositor_surface: bool,
     },
-    /// Always rendered directly into the picture. This tends to be
-    /// faster with SWGL.
     LinearGradient {
         /// Handle to the common interned data for this primitive.
         data_handle: LinearGradientDataHandle,
-        visible_tiles_range: GradientTileRange,
-    },
-    /// Always rendered via a cached render task. Usually faster with
-    /// a GPU.
-    CachedLinearGradient {
-        /// Handle to the common interned data for this primitive.
-        data_handle: LinearGradientDataHandle,
-        visible_tiles_range: GradientTileRange,
+        gradient_index: LinearGradientIndex,
     },
     RadialGradient {
         /// Handle to the common interned data for this primitive.
@@ -1069,7 +1424,108 @@ pub enum PrimitiveInstanceKind {
     },
 }
 
-#[derive(Debug)]
+#[derive(Debug, Copy, Clone, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub struct PrimitiveVisibilityIndex(pub u32);
+
+impl PrimitiveVisibilityIndex {
+    pub const INVALID: PrimitiveVisibilityIndex = PrimitiveVisibilityIndex(u32::MAX);
+}
+
+/// A bit mask describing which dirty regions a primitive is visible in.
+/// A value of 0 means not visible in any region, while a mask of 0xffff
+/// would be considered visible in all regions.
+#[derive(Debug, Copy, Clone)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct PrimitiveVisibilityMask {
+    bits: u16,
+}
+
+impl PrimitiveVisibilityMask {
+    /// Construct a default mask, where no regions are considered visible
+    pub fn empty() -> Self {
+        PrimitiveVisibilityMask {
+            bits: 0,
+        }
+    }
+
+    pub fn all() -> Self {
+        PrimitiveVisibilityMask {
+            bits: !0,
+        }
+    }
+
+    pub fn include(&mut self, other: PrimitiveVisibilityMask) {
+        self.bits |= other.bits;
+    }
+
+    pub fn intersects(&self, other: PrimitiveVisibilityMask) -> bool {
+        (self.bits & other.bits) != 0
+    }
+
+    /// Mark a given region index as visible
+    pub fn set_visible(&mut self, region_index: usize) {
+        debug_assert!(region_index < PrimitiveVisibilityMask::MAX_DIRTY_REGIONS);
+        self.bits |= 1 << region_index;
+    }
+
+    /// Returns true if there are no visible regions
+    pub fn is_empty(&self) -> bool {
+        self.bits == 0
+    }
+
+    /// The maximum number of supported dirty regions.
+    pub const MAX_DIRTY_REGIONS: usize = 8 * mem::size_of::<PrimitiveVisibilityMask>();
+}
+
+bitflags! {
+    /// A set of bitflags that can be set in the visibility information
+    /// for a primitive instance. This can be used to control how primitives
+    /// are treated during batching.
+    // TODO(gw): We should also move `is_compositor_surface` to be part of
+    //           this flags struct.
+    #[cfg_attr(feature = "capture", derive(Serialize))]
+    pub struct PrimitiveVisibilityFlags: u16 {
+        /// Implies that this primitive covers the entire picture cache slice,
+        /// and can thus be dropped during batching and drawn with clear color.
+        const IS_BACKDROP = 1;
+    }
+}
+
+/// Information stored for a visible primitive about the visible
+/// rect and associated clip information.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+pub struct PrimitiveVisibility {
+    /// The clip chain instance that was built for this primitive.
+    pub clip_chain: ClipChainInstance,
+
+    /// The current world rect, clipped to screen / dirty rect boundaries.
+    // TODO(gw): This is only used by a small number of primitives.
+    //           It's probably faster to not store this and recalculate
+    //           on demand in those cases?
+    pub clipped_world_rect: WorldRect,
+
+    /// An index into the clip task instances array in the primitive
+    /// store. If this is ClipTaskIndex::INVALID, then the primitive
+    /// has no clip mask. Otherwise, it may store the offset of the
+    /// global clip mask task for this primitive, or the first of
+    /// a list of clip task ids (one per segment).
+    pub clip_task_index: ClipTaskIndex,
+
+    /// A set of flags that define how this primitive should be handled
+    /// during batching of visibile primitives.
+    pub flags: PrimitiveVisibilityFlags,
+
+    /// A mask defining which of the dirty regions this primitive is visible in.
+    pub visibility_mask: PrimitiveVisibilityMask,
+
+    /// The current combined local clip for this primitive, from
+    /// the primitive local clip above and the current clip chain.
+    pub combined_local_clip_rect: LayoutRect,
+}
+
+#[derive(Clone, Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 pub struct PrimitiveInstance {
     /// Identifies the kind of primitive this
@@ -1078,6 +1534,9 @@ pub struct PrimitiveInstance {
     /// can be found.
     pub kind: PrimitiveInstanceKind,
 
+    /// Local space clip rect for this instance
+    pub local_clip_rect: LayoutRect,
+
     #[cfg(debug_assertions)]
     pub id: PrimitiveDebugId,
 
@@ -1086,14 +1545,12 @@ pub struct PrimitiveInstance {
     #[cfg(debug_assertions)]
     pub prepared_frame_id: FrameId,
 
-    /// All information and state related to clip(s) for this primitive
-    pub clip_set: ClipSet,
+    /// If this primitive is visible, an index into the instance
+    /// visibility scratch buffer. If not visible, INVALID.
+    pub visibility_info: PrimitiveVisibilityIndex,
 
-    /// Information related to the current visibility state of this
-    /// primitive.
-    // TODO(gw): Currently built each frame, but can be retained.
-    // TODO(gw): Remove clipped_world_rect (use tile bounds to determine vis flags)
-    pub vis: PrimitiveVisibility,
+    /// ID of the clip chain that this primitive is clipped by.
+    pub clip_chain_id: ClipChainId,
 }
 
 impl PrimitiveInstance {
@@ -1103,26 +1560,20 @@ impl PrimitiveInstance {
         clip_chain_id: ClipChainId,
     ) -> Self {
         PrimitiveInstance {
+            local_clip_rect,
             kind,
             #[cfg(debug_assertions)]
             prepared_frame_id: FrameId::INVALID,
             #[cfg(debug_assertions)]
             id: PrimitiveDebugId(NEXT_PRIM_ID.fetch_add(1, Ordering::Relaxed)),
-            vis: PrimitiveVisibility::new(),
-            clip_set: ClipSet {
-                local_clip_rect,
-                clip_chain_id,
-            },
+            visibility_info: PrimitiveVisibilityIndex::INVALID,
+            clip_chain_id,
         }
     }
 
     // Reset any pre-frame state for this primitive.
     pub fn reset(&mut self) {
-        self.vis.reset();
-    }
-
-    pub fn clear_visibility(&mut self) {
-        self.vis.reset();
+        self.visibility_info = PrimitiveVisibilityIndex::INVALID;
     }
 
     #[cfg(debug_assertions)]
@@ -1153,9 +1604,6 @@ impl PrimitiveInstance {
             PrimitiveInstanceKind::LinearGradient { data_handle, .. } => {
                 data_handle.uid()
             }
-            PrimitiveInstanceKind::CachedLinearGradient { data_handle, .. } => {
-                data_handle.uid()
-            }
             PrimitiveInstanceKind::NormalBorder { data_handle, .. } => {
                 data_handle.uid()
             }
@@ -1191,9 +1639,11 @@ pub struct SegmentedInstance {
 pub type GlyphKeyStorage = storage::Storage<GlyphKey>;
 pub type TextRunIndex = storage::Index<TextRunPrimitive>;
 pub type TextRunStorage = storage::Storage<TextRunPrimitive>;
+pub type OpacityBindingIndex = storage::Index<OpacityBinding>;
+pub type OpacityBindingStorage = storage::Storage<OpacityBinding>;
 pub type ColorBindingIndex = storage::Index<PropertyBinding<ColorU>>;
 pub type ColorBindingStorage = storage::Storage<PropertyBinding<ColorU>>;
-pub type BorderHandleStorage = storage::Storage<RenderTaskId>;
+pub type BorderHandleStorage = storage::Storage<RenderTaskCacheEntryHandle>;
 pub type SegmentStorage = storage::Storage<BrushSegment>;
 pub type SegmentsRange = storage::Range<BrushSegment>;
 pub type SegmentInstanceStorage = storage::Storage<SegmentedInstance>;
@@ -1202,6 +1652,7 @@ pub type ImageInstanceStorage = storage::Storage<ImageInstance>;
 pub type ImageInstanceIndex = storage::Index<ImageInstance>;
 pub type GradientTileStorage = storage::Storage<VisibleGradientTile>;
 pub type GradientTileRange = storage::Range<VisibleGradientTile>;
+pub type LinearGradientIndex = storage::Index<LinearGradientPrimitive>;
 pub type LinearGradientStorage = storage::Storage<LinearGradientPrimitive>;
 
 /// Contains various vecs of data that is used only during frame building,
@@ -1234,12 +1685,19 @@ pub struct PrimitiveScratchBuffer {
     /// per-tile information.
     pub gradient_tiles: GradientTileStorage,
 
+    /// List of the visibility information for currently visible primitives.
+    pub prim_info: Vec<PrimitiveVisibility>,
+
+    /// List of dirty regions for the cached pictures in this document, used to
+    /// verify invalidation in wrench reftests. Only collected in testing.
+    pub recorded_dirty_regions: Vec<RecordedDirtyRegion>,
+
     /// List of debug display items for rendering.
     pub debug_items: Vec<DebugItem>,
 }
 
-impl Default for PrimitiveScratchBuffer {
-    fn default() -> Self {
+impl PrimitiveScratchBuffer {
+    pub fn new() -> Self {
         PrimitiveScratchBuffer {
             clip_mask_instances: Vec::new(),
             glyph_keys: GlyphKeyStorage::new(0),
@@ -1247,14 +1705,15 @@ impl Default for PrimitiveScratchBuffer {
             segments: SegmentStorage::new(0),
             segment_instances: SegmentInstanceStorage::new(0),
             gradient_tiles: GradientTileStorage::new(0),
+            recorded_dirty_regions: Vec::new(),
             debug_items: Vec::new(),
+            prim_info: Vec::new(),
         }
     }
-}
 
-impl PrimitiveScratchBuffer {
     pub fn recycle(&mut self, recycler: &mut Recycler) {
         recycler.recycle_vec(&mut self.clip_mask_instances);
+        recycler.recycle_vec(&mut self.prim_info);
         self.glyph_keys.recycle(recycler);
         self.border_cache_handles.recycle(recycler);
         self.segments.recycle(recycler);
@@ -1278,7 +1737,11 @@ impl PrimitiveScratchBuffer {
         //           should fix this in the future to retain handles.
         self.gradient_tiles.clear();
 
+        self.prim_info.clear();
+
         self.debug_items.clear();
+
+        assert!(self.recorded_dirty_regions.is_empty(), "Should have sent to Renderer");
     }
 
     #[allow(dead_code)]
@@ -1316,6 +1779,7 @@ impl PrimitiveScratchBuffer {
 pub struct PrimitiveStoreStats {
     picture_count: usize,
     text_run_count: usize,
+    opacity_binding_count: usize,
     image_count: usize,
     linear_gradient_count: usize,
     color_binding_count: usize,
@@ -1326,6 +1790,7 @@ impl PrimitiveStoreStats {
         PrimitiveStoreStats {
             picture_count: 0,
             text_run_count: 0,
+            opacity_binding_count: 0,
             image_count: 0,
             linear_gradient_count: 0,
             color_binding_count: 0,
@@ -1344,6 +1809,8 @@ pub struct PrimitiveStore {
     /// for other types.
     pub images: ImageInstanceStorage,
 
+    /// List of animated opacity bindings for a primitive.
+    pub opacity_bindings: OpacityBindingStorage,
     /// animated color bindings for this primitive.
     pub color_bindings: ColorBindingStorage,
 }
@@ -1354,6 +1821,7 @@ impl PrimitiveStore {
             pictures: Vec::with_capacity(stats.picture_count),
             text_runs: TextRunStorage::new(stats.text_run_count),
             images: ImageInstanceStorage::new(stats.image_count),
+            opacity_bindings: OpacityBindingStorage::new(stats.opacity_binding_count),
             color_bindings: ColorBindingStorage::new(stats.color_binding_count),
             linear_gradients: LinearGradientStorage::new(stats.linear_gradient_count),
         }
@@ -1364,6 +1832,7 @@ impl PrimitiveStore {
             picture_count: self.pictures.len(),
             text_run_count: self.text_runs.len(),
             image_count: self.images.len(),
+            opacity_binding_count: self.opacity_bindings.len(),
             linear_gradient_count: self.linear_gradients.len(),
             color_binding_count: self.color_bindings.len(),
         }
@@ -1376,14 +1845,2651 @@ impl PrimitiveStore {
         self.pictures[root.0].print(&self.pictures, root, &mut pt);
     }
 
+    /// Destroy an existing primitive store. This is called just before
+    /// a primitive store is replaced with a newly built scene.
+    pub fn destroy(
+        &mut self,
+        retained_tiles: &mut RetainedTiles,
+    ) {
+        for pic in &mut self.pictures {
+            pic.destroy(
+                retained_tiles,
+            );
+        }
+    }
+
     /// Returns the total count of primitive instances contained in pictures.
     pub fn prim_count(&self) -> usize {
         let mut prim_count = 0;
         for pic in &self.pictures {
-            prim_count += pic.prim_list.prim_instances.len();
+            for cluster in &pic.prim_list.clusters {
+                prim_count += cluster.prim_instances.len();
+            }
         }
         prim_count
     }
+
+    /// Update visibility pass - update each primitive visibility struct, and
+    /// build the clip chain instance if appropriate.
+    pub fn update_visibility(
+        &mut self,
+        pic_index: PictureIndex,
+        parent_surface_index: SurfaceIndex,
+        world_culling_rect: &WorldRect,
+        frame_context: &FrameVisibilityContext,
+        frame_state: &mut FrameVisibilityState,
+    ) -> Option<PictureRect> {
+        profile_scope!("update_visibility");
+        let (mut prim_list, surface_index, apply_local_clip_rect, world_culling_rect, is_composite) = {
+            let pic = &mut self.pictures[pic_index.0];
+            let mut world_culling_rect = *world_culling_rect;
+
+            let prim_list = mem::replace(&mut pic.prim_list, PrimitiveList::empty());
+            let (surface_index, is_composite) = match pic.raster_config {
+                Some(ref raster_config) => (raster_config.surface_index, true),
+                None => (parent_surface_index, false)
+            };
+
+            match pic.raster_config {
+                Some(RasterConfig { composite_mode: PictureCompositeMode::TileCache { .. }, .. }) => {
+                    let mut tile_cache = pic.tile_cache.take().unwrap();
+                    debug_assert!(frame_state.tile_cache.is_none());
+
+                    // If we have a tile cache for this picture, see if any of the
+                    // relative transforms have changed, which means we need to
+                    // re-map the dependencies of any child primitives.
+                    world_culling_rect = tile_cache.pre_update(
+                        layout_rect_as_picture_rect(&pic.estimated_local_rect),
+                        surface_index,
+                        frame_context,
+                        frame_state,
+                    );
+
+                    // Push a new surface, supplying the list of clips that should be
+                    // ignored, since they are handled by clipping when drawing this surface.
+                    frame_state.push_surface(
+                        surface_index,
+                        &tile_cache.shared_clips,
+                        frame_context.spatial_tree,
+                    );
+                    frame_state.tile_cache = Some(tile_cache);
+                }
+                _ => {
+                    if is_composite {
+                        frame_state.push_surface(
+                            surface_index,
+                            &[],
+                            frame_context.spatial_tree,
+                        );
+                    }
+                }
+            }
+
+            (prim_list, surface_index, pic.apply_local_clip_rect, world_culling_rect, is_composite)
+        };
+
+        let surface = &frame_context.surfaces[surface_index.0 as usize];
+
+        let mut map_local_to_surface = surface
+            .map_local_to_surface
+            .clone();
+
+        let map_surface_to_world = SpaceMapper::new_with_target(
+            ROOT_SPATIAL_NODE_INDEX,
+            surface.surface_spatial_node_index,
+            frame_context.global_screen_world_rect,
+            frame_context.spatial_tree,
+        );
+
+        let mut surface_rect = PictureRect::zero();
+
+        for cluster in &mut prim_list.clusters {
+            profile_scope!("cluster");
+            // Get the cluster and see if is visible
+            if !cluster.flags.contains(ClusterFlags::IS_VISIBLE) {
+                // Each prim instance must have reset called each frame, to clear
+                // indices into various scratch buffers. If this doesn't occur,
+                // the primitive may incorrectly be considered visible, which can
+                // cause unexpected conditions to occur later during the frame.
+                // Primitive instances are normally reset in the main loop below,
+                // but we must also reset them in the rare case that the cluster
+                // visibility has changed (due to an invalid transform and/or
+                // backface visibility changing for this cluster).
+                // TODO(gw): This is difficult to test for in CI - as a follow up,
+                //           we should add a debug flag that validates the prim
+                //           instance is always reset every frame to catch similar
+                //           issues in future.
+                for prim_instance in &mut cluster.prim_instances {
+                    prim_instance.reset();
+                }
+                continue;
+            }
+
+            map_local_to_surface.set_target_spatial_node(
+                cluster.spatial_node_index,
+                frame_context.spatial_tree,
+            );
+
+            for prim_instance in &mut cluster.prim_instances {
+                prim_instance.reset();
+
+                if prim_instance.is_chased() {
+                    #[cfg(debug_assertions)] // needed for ".id" part
+                    println!("\tpreparing {:?} in {:?}", prim_instance.id, pic_index);
+                    println!("\t{:?}", prim_instance.kind);
+                }
+
+                let (is_passthrough, prim_local_rect, prim_shadowed_rect) = match prim_instance.kind {
+                    PrimitiveInstanceKind::Picture { pic_index, .. } => {
+                        if !self.pictures[pic_index.0].is_visible() {
+                            continue;
+                        }
+
+                        frame_state.clip_chain_stack.push_clip(
+                            prim_instance.clip_chain_id,
+                            frame_state.clip_store,
+                        );
+
+                        let pic_surface_rect = self.update_visibility(
+                            pic_index,
+                            surface_index,
+                            &world_culling_rect,
+                            frame_context,
+                            frame_state,
+                        );
+
+                        frame_state.clip_chain_stack.pop_clip();
+
+                        let pic = &self.pictures[pic_index.0];
+
+                        if prim_instance.is_chased() && pic.estimated_local_rect != pic.precise_local_rect {
+                            println!("\testimate {:?} adjusted to {:?}", pic.estimated_local_rect, pic.precise_local_rect);
+                        }
+
+                        let mut shadow_rect = pic.precise_local_rect;
+                        match pic.raster_config {
+                            Some(ref rc) => match rc.composite_mode {
+                                // If we have a drop shadow filter, we also need to include the shadow in
+                                // our shadowed local rect for the purpose of calculating the size of the
+                                // picture.
+                                PictureCompositeMode::Filter(Filter::DropShadows(ref shadows)) => {
+                                    for shadow in shadows {
+                                        shadow_rect = shadow_rect.union(&pic.precise_local_rect.translate(shadow.offset));
+                                    }
+                                }
+                                _ => {}
+                            }
+                            None => {
+                                // If the primitive does not have its own raster config, we need to
+                                // propogate the surface rect calculation to the parent.
+                                if let Some(ref rect) = pic_surface_rect {
+                                    surface_rect = surface_rect.union(rect);
+                                }
+                            }
+                        }
+
+                        (pic.raster_config.is_none(), pic.precise_local_rect, shadow_rect)
+                    }
+                    _ => {
+                        let prim_data = &frame_state.data_stores.as_common_data(&prim_instance);
+
+                        (false, prim_data.prim_rect, prim_data.prim_rect)
+                    }
+                };
+
+                if is_passthrough {
+                    let vis_index = PrimitiveVisibilityIndex(frame_state.scratch.prim_info.len() as u32);
+
+                    frame_state.scratch.prim_info.push(
+                        PrimitiveVisibility {
+                            clipped_world_rect: WorldRect::max_rect(),
+                            clip_chain: ClipChainInstance::empty(),
+                            clip_task_index: ClipTaskIndex::INVALID,
+                            combined_local_clip_rect: LayoutRect::zero(),
+                            visibility_mask: PrimitiveVisibilityMask::empty(),
+                            flags: PrimitiveVisibilityFlags::empty(),
+                        }
+                    );
+
+                    prim_instance.visibility_info = vis_index;
+                } else {
+                    if prim_local_rect.size.width <= 0.0 || prim_local_rect.size.height <= 0.0 {
+                        if prim_instance.is_chased() {
+                            println!("\tculled for zero local rectangle");
+                        }
+                        continue;
+                    }
+
+                    // Inflate the local rect for this primitive by the inflation factor of
+                    // the picture context and include the shadow offset. This ensures that
+                    // even if the primitive itself is not visible, any effects from the
+                    // blur radius or shadow will be correctly taken into account.
+                    let inflation_factor = surface.inflation_factor;
+                    let local_rect = prim_shadowed_rect
+                        .inflate(inflation_factor, inflation_factor)
+                        .intersection(&prim_instance.local_clip_rect);
+                    let local_rect = match local_rect {
+                        Some(local_rect) => local_rect,
+                        None => {
+                            if prim_instance.is_chased() {
+                                println!("\tculled for being out of the local clip rectangle: {:?}",
+                                         prim_instance.local_clip_rect);
+                            }
+                            continue;
+                        }
+                    };
+
+                    // Include the clip chain for this primitive in the current stack.
+                    frame_state.clip_chain_stack.push_clip(
+                        prim_instance.clip_chain_id,
+                        frame_state.clip_store,
+                    );
+
+                    frame_state.clip_store.set_active_clips(
+                        prim_instance.local_clip_rect,
+                        cluster.spatial_node_index,
+                        frame_state.clip_chain_stack.current_clips_array(),
+                        &frame_context.spatial_tree,
+                        &frame_state.data_stores.clip,
+                    );
+
+                    let clip_chain = frame_state
+                        .clip_store
+                        .build_clip_chain_instance(
+                            local_rect,
+                            &map_local_to_surface,
+                            &map_surface_to_world,
+                            &frame_context.spatial_tree,
+                            frame_state.gpu_cache,
+                            frame_state.resource_cache,
+                            surface.device_pixel_scale,
+                            &world_culling_rect,
+                            &mut frame_state.data_stores.clip,
+                            true,
+                            prim_instance.is_chased(),
+                        );
+
+                    // Primitive visibility flags default to empty, but may be supplied
+                    // by the `update_prim_dependencies` method below when picture caching
+                    // is active.
+                    let mut vis_flags = PrimitiveVisibilityFlags::empty();
+
+                    if let Some(ref mut tile_cache) = frame_state.tile_cache {
+                        // TODO(gw): Refactor how tile_cache is stored in frame_state
+                        //           so that we can pass frame_state directly to
+                        //           update_prim_dependencies, rather than splitting borrows.
+                        match tile_cache.update_prim_dependencies(
+                            prim_instance,
+                            cluster.spatial_node_index,
+                            clip_chain.as_ref(),
+                            prim_local_rect,
+                            frame_context,
+                            frame_state.data_stores,
+                            frame_state.clip_store,
+                            &self.pictures,
+                            frame_state.resource_cache,
+                            &self.opacity_bindings,
+                            &self.color_bindings,
+                            &self.images,
+                            &frame_state.surface_stack,
+                            &mut frame_state.composite_state,
+                        ) {
+                            Some(flags) => {
+                                vis_flags = flags;
+                            }
+                            None => {
+                                prim_instance.visibility_info = PrimitiveVisibilityIndex::INVALID;
+                                // Ensure the primitive clip is popped - perhaps we can use
+                                // some kind of scope to do this automatically in future.
+                                frame_state.clip_chain_stack.pop_clip();
+                                continue;
+                            }
+                        }
+                    }
+
+                    // Ensure the primitive clip is popped
+                    frame_state.clip_chain_stack.pop_clip();
+
+                    let clip_chain = match clip_chain {
+                        Some(clip_chain) => clip_chain,
+                        None => {
+                            if prim_instance.is_chased() {
+                                println!("\tunable to build the clip chain, skipping");
+                            }
+                            prim_instance.visibility_info = PrimitiveVisibilityIndex::INVALID;
+                            continue;
+                        }
+                    };
+
+                    if prim_instance.is_chased() {
+                        println!("\teffective clip chain from {:?} {}",
+                                 clip_chain.clips_range,
+                                 if apply_local_clip_rect { "(applied)" } else { "" },
+                        );
+                        println!("\tpicture rect {:?} @{:?}",
+                                 clip_chain.pic_clip_rect,
+                                 clip_chain.pic_spatial_node_index,
+                        );
+                    }
+
+                    // Check if the clip bounding rect (in pic space) is visible on screen
+                    // This includes both the prim bounding rect + local prim clip rect!
+                    let world_rect = match map_surface_to_world.map(&clip_chain.pic_clip_rect) {
+                        Some(world_rect) => world_rect,
+                        None => {
+                            continue;
+                        }
+                    };
+
+                    let clipped_world_rect = match world_rect.intersection(&world_culling_rect) {
+                        Some(rect) => rect,
+                        None => {
+                            continue;
+                        }
+                    };
+
+                    let combined_local_clip_rect = if apply_local_clip_rect {
+                        clip_chain.local_clip_rect
+                    } else {
+                        prim_instance.local_clip_rect
+                    };
+
+                    if combined_local_clip_rect.size.is_empty() {
+                        debug_assert!(combined_local_clip_rect.size.width >= 0.0 &&
+                            combined_local_clip_rect.size.height >= 0.0);
+                        if prim_instance.is_chased() {
+                            println!("\tculled for zero local clip rectangle");
+                        }
+                        prim_instance.visibility_info = PrimitiveVisibilityIndex::INVALID;
+                        continue;
+                    }
+
+                    // Include the visible area for primitive, including any shadows, in
+                    // the area affected by the surface.
+                    match combined_local_clip_rect.intersection(&local_rect) {
+                        Some(visible_rect) => {
+                            if let Some(rect) = map_local_to_surface.map(&visible_rect) {
+                                surface_rect = surface_rect.union(&rect);
+                            }
+                        }
+                        None => {
+                            if prim_instance.is_chased() {
+                                println!("\tculled for zero visible rectangle");
+                            }
+                            prim_instance.visibility_info = PrimitiveVisibilityIndex::INVALID;
+                            continue;
+                        }
+                    }
+
+                    // When the debug display is enabled, paint a colored rectangle around each
+                    // primitive.
+                    if frame_context.debug_flags.contains(::api::DebugFlags::PRIMITIVE_DBG) {
+                        let debug_color = match prim_instance.kind {
+                            PrimitiveInstanceKind::Picture { .. } => ColorF::TRANSPARENT,
+                            PrimitiveInstanceKind::TextRun { .. } => debug_colors::RED,
+                            PrimitiveInstanceKind::LineDecoration { .. } => debug_colors::PURPLE,
+                            PrimitiveInstanceKind::NormalBorder { .. } |
+                            PrimitiveInstanceKind::ImageBorder { .. } => debug_colors::ORANGE,
+                            PrimitiveInstanceKind::Rectangle { .. } => ColorF { r: 0.8, g: 0.8, b: 0.8, a: 0.5 },
+                            PrimitiveInstanceKind::YuvImage { .. } => debug_colors::BLUE,
+                            PrimitiveInstanceKind::Image { .. } => debug_colors::BLUE,
+                            PrimitiveInstanceKind::LinearGradient { .. } => debug_colors::PINK,
+                            PrimitiveInstanceKind::RadialGradient { .. } => debug_colors::PINK,
+                            PrimitiveInstanceKind::ConicGradient { .. } => debug_colors::PINK,
+                            PrimitiveInstanceKind::Clear { .. } => debug_colors::CYAN,
+                            PrimitiveInstanceKind::Backdrop { .. } => debug_colors::MEDIUMAQUAMARINE,
+                        };
+                        if debug_color.a != 0.0 {
+                            let debug_rect = clipped_world_rect * frame_context.global_device_pixel_scale;
+                            frame_state.scratch.push_debug_rect(debug_rect, debug_color, debug_color.scale_alpha(0.5));
+                        }
+                    } else if frame_context.debug_flags.contains(::api::DebugFlags::OBSCURE_IMAGES) {
+                        let is_image = matches!(
+                            prim_instance.kind,
+                            PrimitiveInstanceKind::Image { .. } | PrimitiveInstanceKind::YuvImage { .. }
+                        );
+                        if is_image {
+                            // We allow "small" images, since they're generally UI elements.
+                            let rect = clipped_world_rect * frame_context.global_device_pixel_scale;
+                            if rect.size.width > 70.0 && rect.size.height > 70.0 {
+                                frame_state.scratch.push_debug_rect(rect, debug_colors::PURPLE, debug_colors::PURPLE);
+                            }
+                        }
+                    }
+
+                    let vis_index = PrimitiveVisibilityIndex(frame_state.scratch.prim_info.len() as u32);
+                    if prim_instance.is_chased() {
+                        println!("\tvisible {:?} with {:?}", vis_index, combined_local_clip_rect);
+                    }
+
+                    frame_state.scratch.prim_info.push(
+                        PrimitiveVisibility {
+                            clipped_world_rect,
+                            clip_chain,
+                            clip_task_index: ClipTaskIndex::INVALID,
+                            combined_local_clip_rect,
+                            visibility_mask: PrimitiveVisibilityMask::empty(),
+                            flags: vis_flags,
+                        }
+                    );
+
+                    prim_instance.visibility_info = vis_index;
+
+                    self.request_resources_for_prim(
+                        prim_instance,
+                        cluster.spatial_node_index,
+                        clipped_world_rect,
+                        frame_context,
+                        frame_state,
+                    );
+                }
+            }
+        }
+
+        // Similar to above, pop either the clip chain or root entry off the current clip stack.
+        if is_composite {
+            frame_state.pop_surface();
+        }
+
+        let pic = &mut self.pictures[pic_index.0];
+        pic.prim_list = prim_list;
+
+        // If the local rect changed (due to transforms in child primitives) then
+        // invalidate the GPU cache location to re-upload the new local rect
+        // and stretch size. Drop shadow filters also depend on the local rect
+        // size for the extra GPU cache data handle.
+        // TODO(gw): In future, if we support specifying a flag which gets the
+        //           stretch size from the segment rect in the shaders, we can
+        //           remove this invalidation here completely.
+        if let Some(ref rc) = pic.raster_config {
+            // Inflate the local bounding rect if required by the filter effect.
+            // This inflaction factor is to be applied to the surface itself.
+            if pic.options.inflate_if_required {
+                // The picture's local rect is calculated as the union of the
+                // snapped primitive rects, which should result in a snapped
+                // local rect, unless it was inflated. This is also done during
+                // surface configuration when calculating the picture's
+                // estimated local rect.
+                let snap_pic_to_raster = SpaceSnapper::new_with_target(
+                    surface.raster_spatial_node_index,
+                    pic.spatial_node_index,
+                    surface.device_pixel_scale,
+                    frame_context.spatial_tree,
+                );
+
+                surface_rect = rc.composite_mode.inflate_picture_rect(surface_rect, surface.scale_factors);
+                surface_rect = snap_pic_to_raster.snap_rect(&surface_rect);
+            }
+
+            // Layout space for the picture is picture space from the
+            // perspective of its child primitives.
+            let pic_local_rect = surface_rect * Scale::new(1.0);
+            if pic.precise_local_rect != pic_local_rect {
+                match rc.composite_mode {
+                    PictureCompositeMode::Filter(Filter::DropShadows(..)) => {
+                        for handle in &pic.extra_gpu_data_handles {
+                            frame_state.gpu_cache.invalidate(handle);
+                        }
+                    }
+                    _ => {}
+                }
+                // Invalidate any segments built for this picture, since the local
+                // rect has changed.
+                pic.segments_are_valid = false;
+                pic.precise_local_rect = pic_local_rect;
+            }
+
+            if let PictureCompositeMode::TileCache { .. } = rc.composite_mode {
+                let mut tile_cache = frame_state.tile_cache.take().unwrap();
+
+                // Build the dirty region(s) for this tile cache.
+                tile_cache.post_update(
+                    frame_context,
+                    frame_state,
+                );
+
+                pic.tile_cache = Some(tile_cache);
+            }
+
+            None
+        } else {
+            let parent_surface = &frame_context.surfaces[parent_surface_index.0 as usize];
+            let map_surface_to_parent_surface = SpaceMapper::new_with_target(
+                parent_surface.surface_spatial_node_index,
+                surface.surface_spatial_node_index,
+                PictureRect::max_rect(),
+                frame_context.spatial_tree,
+            );
+            map_surface_to_parent_surface.map(&surface_rect)
+        }
+    }
+
+    fn request_resources_for_prim(
+        &mut self,
+        prim_instance: &mut PrimitiveInstance,
+        prim_spatial_node_index: SpatialNodeIndex,
+        prim_world_rect: WorldRect,
+        frame_context: &FrameVisibilityContext,
+        frame_state: &mut FrameVisibilityState,
+    ) {
+        profile_scope!("request_resources_for_prim");
+        match prim_instance.kind {
+            PrimitiveInstanceKind::TextRun { .. } => {
+                // Text runs can't request resources early here, as we don't
+                // know until TileCache::post_update() whether we are drawing
+                // on an opaque surface.
+                // TODO(gw): We might be able to detect simple cases of this earlier,
+                //           during the picture traversal. But it's probably not worth it?
+            }
+            PrimitiveInstanceKind::Image { data_handle, image_instance_index, .. } => {
+                let prim_data = &mut frame_state.data_stores.image[data_handle];
+                let common_data = &mut prim_data.common;
+                let image_data = &mut prim_data.kind;
+                let image_instance = &mut self.images[image_instance_index];
+
+                let image_properties = frame_state
+                    .resource_cache
+                    .get_image_properties(image_data.key);
+
+                let request = ImageRequest {
+                    key: image_data.key,
+                    rendering: image_data.image_rendering,
+                    tile: None,
+                };
+
+                match image_properties {
+                    Some(ImageProperties { tiling: None, .. }) => {
+
+                        frame_state.resource_cache.request_image(
+                            request,
+                            frame_state.gpu_cache,
+                        );
+                    }
+                    Some(ImageProperties { tiling: Some(tile_size), visible_rect, .. }) => {
+                        image_instance.visible_tiles.clear();
+                        // TODO: rename the blob's visible_rect into something that doesn't conflict
+                        // with the terminology we use during culling since it's not really the same
+                        // thing.
+                        let active_rect = visible_rect;
+
+                        // Tighten the clip rect because decomposing the repeated image can
+                        // produce primitives that are partially covering the original image
+                        // rect and we want to clip these extra parts out.
+                        let prim_info = &frame_state.scratch.prim_info[prim_instance.visibility_info.0 as usize];
+                        let tight_clip_rect = prim_info
+                            .combined_local_clip_rect
+                            .intersection(&common_data.prim_rect).unwrap();
+                        image_instance.tight_local_clip_rect = tight_clip_rect;
+
+                        let map_local_to_world = SpaceMapper::new_with_target(
+                            ROOT_SPATIAL_NODE_INDEX,
+                            prim_spatial_node_index,
+                            frame_context.global_screen_world_rect,
+                            frame_context.spatial_tree,
+                        );
+
+                        let visible_rect = compute_conservative_visible_rect(
+                            &tight_clip_rect,
+                            prim_world_rect,
+                            &map_local_to_world,
+                        );
+
+                        let base_edge_flags = edge_flags_for_tile_spacing(&image_data.tile_spacing);
+
+                        let stride = image_data.stretch_size + image_data.tile_spacing;
+
+                        // We are performing the decomposition on the CPU here, no need to
+                        // have it in the shader.
+                        common_data.may_need_repetition = false;
+
+                        let repetitions = image_tiling::repetitions(
+                            &common_data.prim_rect,
+                            &visible_rect,
+                            stride,
+                        );
+
+                        for Repetition { origin, edge_flags } in repetitions {
+                            let edge_flags = base_edge_flags | edge_flags;
+
+                            let layout_image_rect = LayoutRect {
+                                origin,
+                                size: image_data.stretch_size,
+                            };
+
+                            let tiles = image_tiling::tiles(
+                                &layout_image_rect,
+                                &visible_rect,
+                                &active_rect,
+                                tile_size as i32,
+                            );
+
+                            for tile in tiles {
+                                frame_state.resource_cache.request_image(
+                                    request.with_tile(tile.offset),
+                                    frame_state.gpu_cache,
+                                );
+
+                                image_instance.visible_tiles.push(VisibleImageTile {
+                                    tile_offset: tile.offset,
+                                    edge_flags: tile.edge_flags & edge_flags,
+                                    local_rect: tile.rect,
+                                    local_clip_rect: tight_clip_rect,
+                                });
+                            }
+                        }
+
+                        if image_instance.visible_tiles.is_empty() {
+                            // Mark as invisible
+                            prim_instance.visibility_info = PrimitiveVisibilityIndex::INVALID;
+                        }
+                    }
+                    None => {}
+                }
+            }
+            PrimitiveInstanceKind::ImageBorder { data_handle, .. } => {
+                let prim_data = &mut frame_state.data_stores.image_border[data_handle];
+                prim_data.kind.request_resources(
+                    frame_state.resource_cache,
+                    frame_state.gpu_cache,
+                );
+            }
+            PrimitiveInstanceKind::YuvImage { data_handle, .. } => {
+                let prim_data = &mut frame_state.data_stores.yuv_image[data_handle];
+                prim_data.kind.request_resources(
+                    frame_state.resource_cache,
+                    frame_state.gpu_cache,
+                );
+            }
+            _ => {}
+        }
+    }
+
+    pub fn get_opacity_binding(
+        &self,
+        opacity_binding_index: OpacityBindingIndex,
+    ) -> f32 {
+        if opacity_binding_index == OpacityBindingIndex::INVALID {
+            1.0
+        } else {
+            self.opacity_bindings[opacity_binding_index].current
+        }
+    }
+
+    // Internal method that retrieves the primitive index of a primitive
+    // that can be the target for collapsing parent opacity filters into.
+    fn get_opacity_collapse_prim(
+        &self,
+        pic_index: PictureIndex,
+    ) -> Option<PictureIndex> {
+        let pic = &self.pictures[pic_index.0];
+
+        // We can only collapse opacity if there is a single primitive, otherwise
+        // the opacity needs to be applied to the primitives as a group.
+        if pic.prim_list.clusters.len() != 1 {
+            return None;
+        }
+
+        let cluster = &pic.prim_list.clusters[0];
+        if cluster.prim_instances.len() != 1 {
+            return None;
+        }
+
+        let prim_instance = &cluster.prim_instances[0];
+
+        // For now, we only support opacity collapse on solid rects and images.
+        // This covers the most common types of opacity filters that can be
+        // handled by this optimization. In the future, we can easily extend
+        // this to other primitives, such as text runs and gradients.
+        match prim_instance.kind {
+            // If we find a single rect or image, we can use that
+            // as the primitive to collapse the opacity into.
+            PrimitiveInstanceKind::Rectangle { .. } |
+            PrimitiveInstanceKind::Image { .. } => {
+                return Some(pic_index);
+            }
+            PrimitiveInstanceKind::Clear { .. } |
+            PrimitiveInstanceKind::TextRun { .. } |
+            PrimitiveInstanceKind::NormalBorder { .. } |
+            PrimitiveInstanceKind::ImageBorder { .. } |
+            PrimitiveInstanceKind::YuvImage { .. } |
+            PrimitiveInstanceKind::LinearGradient { .. } |
+            PrimitiveInstanceKind::RadialGradient { .. } |
+            PrimitiveInstanceKind::ConicGradient { .. } |
+            PrimitiveInstanceKind::LineDecoration { .. } |
+            PrimitiveInstanceKind::Backdrop { .. } => {
+                // These prims don't support opacity collapse
+            }
+            PrimitiveInstanceKind::Picture { pic_index, .. } => {
+                let pic = &self.pictures[pic_index.0];
+
+                // If we encounter a picture that is a pass-through
+                // (i.e. no composite mode), then we can recurse into
+                // that to try and find a primitive to collapse to.
+                if pic.requested_composite_mode.is_none() {
+                    return self.get_opacity_collapse_prim(pic_index);
+                }
+            }
+        }
+
+        None
+    }
+
+    // Apply any optimizations to drawing this picture. Currently,
+    // we just support collapsing pictures with an opacity filter
+    // by pushing that opacity value into the color of a primitive
+    // if that picture contains one compatible primitive.
+    pub fn optimize_picture_if_possible(
+        &mut self,
+        pic_index: PictureIndex,
+    ) {
+        // Only handle opacity filters for now.
+        let binding = match self.pictures[pic_index.0].requested_composite_mode {
+            Some(PictureCompositeMode::Filter(Filter::Opacity(binding, _))) => {
+                binding
+            }
+            _ => {
+                return;
+            }
+        };
+
+        // See if this picture contains a single primitive that supports
+        // opacity collapse.
+        match self.get_opacity_collapse_prim(pic_index) {
+            Some(pic_index) => {
+                let pic = &mut self.pictures[pic_index.0];
+                let prim_instance = &mut pic.prim_list.clusters[0].prim_instances[0];
+                match prim_instance.kind {
+                    PrimitiveInstanceKind::Image { image_instance_index, .. } => {
+                        let image_instance = &mut self.images[image_instance_index];
+                        // By this point, we know we should only have found a primitive
+                        // that supports opacity collapse.
+                        if image_instance.opacity_binding_index == OpacityBindingIndex::INVALID {
+                            image_instance.opacity_binding_index = self.opacity_bindings.push(OpacityBinding::new());
+                        }
+                        let opacity_binding = &mut self.opacity_bindings[image_instance.opacity_binding_index];
+                        opacity_binding.push(binding);
+                    }
+                    PrimitiveInstanceKind::Rectangle { ref mut opacity_binding_index, .. } => {
+                        // By this point, we know we should only have found a primitive
+                        // that supports opacity collapse.
+                        if *opacity_binding_index == OpacityBindingIndex::INVALID {
+                            *opacity_binding_index = self.opacity_bindings.push(OpacityBinding::new());
+                        }
+                        let opacity_binding = &mut self.opacity_bindings[*opacity_binding_index];
+                        opacity_binding.push(binding);
+                    }
+                    _ => {
+                        unreachable!();
+                    }
+                }
+            }
+            None => {
+                return;
+            }
+        }
+
+        // The opacity filter has been collapsed, so mark this picture
+        // as a pass though. This means it will no longer allocate an
+        // intermediate surface or incur an extra blend / blit. Instead,
+        // the collapsed primitive will be drawn directly into the
+        // parent picture.
+        self.pictures[pic_index.0].requested_composite_mode = None;
+    }
+
+    fn prepare_prim_for_render(
+        &mut self,
+        prim_instance: &mut PrimitiveInstance,
+        prim_spatial_node_index: SpatialNodeIndex,
+        pic_context: &PictureContext,
+        pic_state: &mut PictureState,
+        frame_context: &FrameBuildingContext,
+        frame_state: &mut FrameBuildingState,
+        plane_split_anchor: PlaneSplitAnchor,
+        data_stores: &mut DataStores,
+        scratch: &mut PrimitiveScratchBuffer,
+        tile_cache_log: &mut TileCacheLogger,
+    ) -> bool {
+        profile_scope!("prepare_prim_for_render");
+        // If we have dependencies, we need to prepare them first, in order
+        // to know the actual rect of this primitive.
+        // For example, scrolling may affect the location of an item in
+        // local space, which may force us to render this item on a larger
+        // picture target, if being composited.
+        let pic_info = {
+            match prim_instance.kind {
+                PrimitiveInstanceKind::Picture { pic_index ,.. } => {
+                    let pic = &mut self.pictures[pic_index.0];
+
+                    let clipped_prim_bounding_rect = scratch
+                        .prim_info[prim_instance.visibility_info.0 as usize]
+                        .clipped_world_rect;
+
+                    match pic.take_context(
+                        pic_index,
+                        clipped_prim_bounding_rect,
+                        pic_context.surface_spatial_node_index,
+                        pic_context.raster_spatial_node_index,
+                        pic_context.surface_index,
+                        &pic_context.subpixel_mode,
+                        frame_state,
+                        frame_context,
+                        scratch,
+                        tile_cache_log,
+                    ) {
+                        Some(info) => Some(info),
+                        None => {
+                            if prim_instance.is_chased() {
+                                println!("\tculled for carrying an invisible composite filter");
+                            }
+
+                            prim_instance.visibility_info = PrimitiveVisibilityIndex::INVALID;
+
+                            return false;
+                        }
+                    }
+                }
+                PrimitiveInstanceKind::TextRun { .. } |
+                PrimitiveInstanceKind::Rectangle { .. } |
+                PrimitiveInstanceKind::LineDecoration { .. } |
+                PrimitiveInstanceKind::NormalBorder { .. } |
+                PrimitiveInstanceKind::ImageBorder { .. } |
+                PrimitiveInstanceKind::YuvImage { .. } |
+                PrimitiveInstanceKind::Image { .. } |
+                PrimitiveInstanceKind::LinearGradient { .. } |
+                PrimitiveInstanceKind::RadialGradient { .. } |
+                PrimitiveInstanceKind::ConicGradient { .. } |
+                PrimitiveInstanceKind::Clear { .. } |
+                PrimitiveInstanceKind::Backdrop { .. } => {
+                    None
+                }
+            }
+        };
+
+        let is_passthrough = match pic_info {
+            Some((pic_context_for_children, mut pic_state_for_children, mut prim_list)) => {
+                let is_passthrough = pic_context_for_children.is_passthrough;
+
+                self.prepare_primitives(
+                    &mut prim_list,
+                    &pic_context_for_children,
+                    &mut pic_state_for_children,
+                    frame_context,
+                    frame_state,
+                    data_stores,
+                    scratch,
+                    tile_cache_log,
+                );
+
+                // Restore the dependencies (borrow check dance)
+                self.pictures[pic_context_for_children.pic_index.0]
+                    .restore_context(
+                        pic_context.surface_index,
+                        prim_list,
+                        pic_context_for_children,
+                        pic_state_for_children,
+                        frame_state,
+                    );
+
+                is_passthrough
+            }
+            None => {
+                false
+            }
+        };
+
+        let prim_rect = data_stores.get_local_prim_rect(
+            prim_instance,
+            self,
+        );
+
+        if !is_passthrough {
+            prim_instance.update_clip_task(
+                &prim_rect.origin,
+                prim_spatial_node_index,
+                pic_context.raster_spatial_node_index,
+                pic_context,
+                pic_state,
+                frame_context,
+                frame_state,
+                self,
+                data_stores,
+                scratch,
+            );
+
+            if prim_instance.is_chased() {
+                println!("\tconsidered visible and ready with local pos {:?}", prim_rect.origin);
+            }
+        }
+
+        #[cfg(debug_assertions)]
+        {
+            prim_instance.prepared_frame_id = frame_state.render_tasks.frame_id();
+        }
+
+        self.prepare_interned_prim_for_render(
+            prim_instance,
+            prim_spatial_node_index,
+            plane_split_anchor,
+            pic_context,
+            pic_state,
+            frame_context,
+            frame_state,
+            data_stores,
+            scratch,
+        );
+
+        true
+    }
+
+    pub fn prepare_primitives(
+        &mut self,
+        prim_list: &mut PrimitiveList,
+        pic_context: &PictureContext,
+        pic_state: &mut PictureState,
+        frame_context: &FrameBuildingContext,
+        frame_state: &mut FrameBuildingState,
+        data_stores: &mut DataStores,
+        scratch: &mut PrimitiveScratchBuffer,
+        tile_cache_log: &mut TileCacheLogger,
+    ) {
+        profile_scope!("prepare_primitives");
+        for (cluster_index, cluster) in prim_list.clusters.iter_mut().enumerate() {
+            profile_scope!("cluster");
+            pic_state.map_local_to_pic.set_target_spatial_node(
+                cluster.spatial_node_index,
+                frame_context.spatial_tree,
+            );
+
+            for (prim_instance_index, prim_instance) in cluster.prim_instances.iter_mut().enumerate() {
+                if prim_instance.visibility_info == PrimitiveVisibilityIndex::INVALID {
+                    continue;
+                }
+
+                // The original clipped world rect was calculated during the initial visibility pass.
+                // However, it's possible that the dirty rect has got smaller, if tiles were not
+                // dirty. Intersecting with the dirty rect here eliminates preparing any primitives
+                // outside the dirty rect, and reduces the size of any off-screen surface allocations
+                // for clip masks / render tasks that we make.
+                {
+                    let visibility_info = &mut scratch.prim_info[prim_instance.visibility_info.0 as usize];
+                    let dirty_region = frame_state.current_dirty_region();
+
+                    for dirty_region in &dirty_region.dirty_rects {
+                        if visibility_info.clipped_world_rect.intersects(&dirty_region.world_rect) {
+                            visibility_info.visibility_mask.include(dirty_region.visibility_mask);
+                        }
+                    }
+
+                    if visibility_info.visibility_mask.is_empty() {
+                        prim_instance.visibility_info = PrimitiveVisibilityIndex::INVALID;
+                        continue;
+                    }
+                }
+
+                let plane_split_anchor = PlaneSplitAnchor::new(cluster_index, prim_instance_index);
+
+                if self.prepare_prim_for_render(
+                    prim_instance,
+                    cluster.spatial_node_index,
+                    pic_context,
+                    pic_state,
+                    frame_context,
+                    frame_state,
+                    plane_split_anchor,
+                    data_stores,
+                    scratch,
+                    tile_cache_log,
+                ) {
+                    frame_state.profile_counters.visible_primitives.inc();
+                }
+            }
+        }
+    }
+
+    /// Prepare an interned primitive for rendering, by requesting
+    /// resources, render tasks etc. This is equivalent to the
+    /// prepare_prim_for_render_inner call for old style primitives.
+    fn prepare_interned_prim_for_render(
+        &mut self,
+        prim_instance: &mut PrimitiveInstance,
+        prim_spatial_node_index: SpatialNodeIndex,
+        plane_split_anchor: PlaneSplitAnchor,
+        pic_context: &PictureContext,
+        pic_state: &mut PictureState,
+        frame_context: &FrameBuildingContext,
+        frame_state: &mut FrameBuildingState,
+        data_stores: &mut DataStores,
+        scratch: &mut PrimitiveScratchBuffer,
+    ) {
+        let is_chased = prim_instance.is_chased();
+        let device_pixel_scale = frame_state.surfaces[pic_context.surface_index.0].device_pixel_scale;
+
+        match &mut prim_instance.kind {
+            PrimitiveInstanceKind::LineDecoration { data_handle, ref mut cache_handle, .. } => {
+                profile_scope!("LineDecoration");
+                let prim_data = &mut data_stores.line_decoration[*data_handle];
+                let common_data = &mut prim_data.common;
+                let line_dec_data = &mut prim_data.kind;
+
+                // Update the template this instane references, which may refresh the GPU
+                // cache with any shared template data.
+                line_dec_data.update(common_data, frame_state);
+
+                // Work out the device pixel size to be used to cache this line decoration.
+                if is_chased {
+                    println!("\tline decoration key={:?}", line_dec_data.cache_key);
+                }
+
+                // If we have a cache key, it's a wavy / dashed / dotted line. Otherwise, it's
+                // a simple solid line.
+                if let Some(cache_key) = line_dec_data.cache_key.as_ref() {
+                    // TODO(gw): Do we ever need / want to support scales for text decorations
+                    //           based on the current transform?
+                    let scale_factor = Scale::new(1.0) * device_pixel_scale;
+                    let mut task_size = (LayoutSize::from_au(cache_key.size) * scale_factor).ceil().to_i32();
+                    if task_size.width > MAX_LINE_DECORATION_RESOLUTION as i32 ||
+                       task_size.height > MAX_LINE_DECORATION_RESOLUTION as i32 {
+                         let max_extent = cmp::max(task_size.width, task_size.height);
+                         let task_scale_factor = Scale::new(MAX_LINE_DECORATION_RESOLUTION as f32 / max_extent as f32);
+                         task_size = (LayoutSize::from_au(cache_key.size) * scale_factor * task_scale_factor)
+                                        .ceil().to_i32();
+                    }
+
+                    // Request a pre-rendered image task.
+                    // TODO(gw): This match is a bit untidy, but it should disappear completely
+                    //           once the prepare_prims and batching are unified. When that
+                    //           happens, we can use the cache handle immediately, and not need
+                    //           to temporarily store it in the primitive instance.
+                    *cache_handle = Some(frame_state.resource_cache.request_render_task(
+                        RenderTaskCacheKey {
+                            size: task_size,
+                            kind: RenderTaskCacheKeyKind::LineDecoration(cache_key.clone()),
+                        },
+                        frame_state.gpu_cache,
+                        frame_state.render_tasks,
+                        None,
+                        false,
+                        |render_tasks| {
+                            render_tasks.add().init(RenderTask::new_line_decoration(
+                                task_size,
+                                cache_key.style,
+                                cache_key.orientation,
+                                cache_key.wavy_line_thickness.to_f32_px(),
+                                LayoutSize::from_au(cache_key.size),
+                            ))
+                        }
+                    ));
+                }
+            }
+            PrimitiveInstanceKind::TextRun { run_index, data_handle, .. } => {
+                profile_scope!("TextRun");
+                let prim_data = &mut data_stores.text_run[*data_handle];
+                let run = &mut self.text_runs[*run_index];
+
+                prim_data.common.may_need_repetition = false;
+
+                // The glyph transform has to match `glyph_transform` in "ps_text_run" shader.
+                // It's relative to the rasterizing space of a glyph.
+                let transform = frame_context.spatial_tree
+                    .get_relative_transform(
+                        prim_spatial_node_index,
+                        pic_context.raster_spatial_node_index,
+                    )
+                    .into_fast_transform();
+                let prim_offset = prim_data.common.prim_rect.origin.to_vector() - run.reference_frame_relative_offset;
+
+                let pic = &self.pictures[pic_context.pic_index.0];
+                let raster_space = pic.get_raster_space(frame_context.spatial_tree);
+                let surface = &frame_state.surfaces[pic_context.surface_index.0];
+                let prim_info = &scratch.prim_info[prim_instance.visibility_info.0 as usize];
+                let root_scaling_factor = match pic.raster_config {
+                    Some(ref raster_config) => raster_config.root_scaling_factor,
+                    None => 1.0
+                };
+
+                run.request_resources(
+                    prim_offset,
+                    prim_info.clip_chain.pic_clip_rect,
+                    &prim_data.font,
+                    &prim_data.glyphs,
+                    &transform.to_transform().with_destination::<_>(),
+                    surface,
+                    prim_spatial_node_index,
+                    raster_space,
+                    root_scaling_factor,
+                    &pic_context.subpixel_mode,
+                    frame_state.resource_cache,
+                    frame_state.gpu_cache,
+                    frame_state.render_tasks,
+                    frame_context.spatial_tree,
+                    scratch,
+                );
+
+                // Update the template this instane references, which may refresh the GPU
+                // cache with any shared template data.
+                prim_data.update(frame_state);
+            }
+            PrimitiveInstanceKind::Clear { data_handle, .. } => {
+                profile_scope!("Clear");
+                let prim_data = &mut data_stores.prim[*data_handle];
+
+                prim_data.common.may_need_repetition = false;
+
+                // Update the template this instane references, which may refresh the GPU
+                // cache with any shared template data.
+                prim_data.update(frame_state, frame_context.scene_properties);
+            }
+            PrimitiveInstanceKind::NormalBorder { data_handle, ref mut cache_handles, .. } => {
+                profile_scope!("NormalBorder");
+                let prim_data = &mut data_stores.normal_border[*data_handle];
+                let common_data = &mut prim_data.common;
+                let border_data = &mut prim_data.kind;
+
+                common_data.may_need_repetition =
+                    matches!(border_data.border.top.style, BorderStyle::Dotted | BorderStyle::Dashed) ||
+                    matches!(border_data.border.right.style, BorderStyle::Dotted | BorderStyle::Dashed) ||
+                    matches!(border_data.border.bottom.style, BorderStyle::Dotted | BorderStyle::Dashed) ||
+                    matches!(border_data.border.left.style, BorderStyle::Dotted | BorderStyle::Dashed);
+
+
+                // Update the template this instance references, which may refresh the GPU
+                // cache with any shared template data.
+                border_data.update(common_data, frame_state);
+
+                // TODO(gw): For now, the scale factors to rasterize borders at are
+                //           based on the true world transform of the primitive. When
+                //           raster roots with local scale are supported in future,
+                //           that will need to be accounted for here.
+                let scale = frame_context
+                    .spatial_tree
+                    .get_world_transform(prim_spatial_node_index)
+                    .scale_factors();
+
+                // Scale factors are normalized to a power of 2 to reduce the number of
+                // resolution changes.
+                // For frames with a changing scale transform round scale factors up to
+                // nearest power-of-2 boundary so that we don't keep having to redraw
+                // the content as it scales up and down. Rounding up to nearest
+                // power-of-2 boundary ensures we never scale up, only down --- avoiding
+                // jaggies. It also ensures we never scale down by more than a factor of
+                // 2, avoiding bad downscaling quality.
+                let scale_width = clamp_to_scale_factor(scale.0, false);
+                let scale_height = clamp_to_scale_factor(scale.1, false);
+                // Pick the maximum dimension as scale
+                let world_scale = LayoutToWorldScale::new(scale_width.max(scale_height));
+                let mut scale = world_scale * device_pixel_scale;
+                let max_scale = get_max_scale_for_border(border_data);
+                scale.0 = scale.0.min(max_scale.0);
+
+                // For each edge and corner, request the render task by content key
+                // from the render task cache. This ensures that the render task for
+                // this segment will be available for batching later in the frame.
+                let mut handles: SmallVec<[RenderTaskCacheEntryHandle; 8]> = SmallVec::new();
+
+                for segment in &border_data.border_segments {
+                    // Update the cache key device size based on requested scale.
+                    let cache_size = to_cache_size(segment.local_task_size * scale);
+                    let cache_key = RenderTaskCacheKey {
+                        kind: RenderTaskCacheKeyKind::BorderSegment(segment.cache_key.clone()),
+                        size: cache_size,
+                    };
+
+                    handles.push(frame_state.resource_cache.request_render_task(
+                        cache_key,
+                        frame_state.gpu_cache,
+                        frame_state.render_tasks,
+                        None,
+                        false,          // TODO(gw): We don't calculate opacity for borders yet!
+                        |render_tasks| {
+                            render_tasks.add().init(RenderTask::new_border_segment(
+                                cache_size,
+                                build_border_instances(
+                                    &segment.cache_key,
+                                    cache_size,
+                                    &border_data.border,
+                                    scale,
+                                ),
+                            ))
+                        }
+                    ));
+                }
+
+                *cache_handles = scratch
+                    .border_cache_handles
+                    .extend(handles);
+            }
+            PrimitiveInstanceKind::ImageBorder { data_handle, .. } => {
+                profile_scope!("ImageBorder");
+                let prim_data = &mut data_stores.image_border[*data_handle];
+
+                // TODO: get access to the ninepatch and to check whwther we need support
+                // for repetitions in the shader.
+
+                // Update the template this instane references, which may refresh the GPU
+                // cache with any shared template data.
+                prim_data.kind.update(&mut prim_data.common, frame_state);
+            }
+            PrimitiveInstanceKind::Rectangle { data_handle, segment_instance_index, opacity_binding_index, color_binding_index, .. } => {
+                profile_scope!("Rectangle");
+                let prim_data = &mut data_stores.prim[*data_handle];
+                prim_data.common.may_need_repetition = false;
+
+                if *color_binding_index != ColorBindingIndex::INVALID {
+                    match self.color_bindings[*color_binding_index] {
+                        PropertyBinding::Binding(..) => {
+                            // We explicitly invalidate the gpu cache
+                            // if the color is animating.
+                            let gpu_cache_handle =
+                                if *segment_instance_index == SegmentInstanceIndex::INVALID {
+                                    None
+                                } else if *segment_instance_index == SegmentInstanceIndex::UNUSED {
+                                    Some(&prim_data.common.gpu_cache_handle)
+                                } else {
+                                    Some(&scratch.segment_instances[*segment_instance_index].gpu_cache_handle)
+                                };
+                            if let Some(gpu_cache_handle) = gpu_cache_handle {
+                                frame_state.gpu_cache.invalidate(gpu_cache_handle);
+                            }
+                        }
+                        PropertyBinding::Value(..) => {},
+                    }
+                }
+
+                // Update the template this instane references, which may refresh the GPU
+                // cache with any shared template data.
+                prim_data.update(
+                    frame_state,
+                    frame_context.scene_properties,
+                );
+
+                update_opacity_binding(
+                    &mut self.opacity_bindings,
+                    *opacity_binding_index,
+                    frame_context.scene_properties,
+                );
+
+                write_segment(
+                    *segment_instance_index,
+                    frame_state,
+                    &mut scratch.segments,
+                    &mut scratch.segment_instances,
+                    |request| {
+                        prim_data.kind.write_prim_gpu_blocks(
+                            request,
+                            frame_context.scene_properties,
+                        );
+                    }
+                );
+            }
+            PrimitiveInstanceKind::YuvImage { data_handle, segment_instance_index, .. } => {
+                profile_scope!("YuvImage");
+                let prim_data = &mut data_stores.yuv_image[*data_handle];
+                let common_data = &mut prim_data.common;
+                let yuv_image_data = &mut prim_data.kind;
+
+                common_data.may_need_repetition = false;
+
+                // Update the template this instane references, which may refresh the GPU
+                // cache with any shared template data.
+                yuv_image_data.update(common_data, frame_state);
+
+                write_segment(
+                    *segment_instance_index,
+                    frame_state,
+                    &mut scratch.segments,
+                    &mut scratch.segment_instances,
+                    |request| {
+                        yuv_image_data.write_prim_gpu_blocks(request);
+                    }
+                );
+            }
+            PrimitiveInstanceKind::Image { data_handle, image_instance_index, .. } => {
+                profile_scope!("Image");
+                let prim_data = &mut data_stores.image[*data_handle];
+                let common_data = &mut prim_data.common;
+                let image_data = &mut prim_data.kind;
+
+                if image_data.stretch_size.width >= common_data.prim_rect.size.width &&
+                    image_data.stretch_size.height >= common_data.prim_rect.size.height {
+
+                    common_data.may_need_repetition = false;
+                }
+
+                // Update the template this instane references, which may refresh the GPU
+                // cache with any shared template data.
+                image_data.update(common_data, frame_state);
+
+                let image_instance = &mut self.images[*image_instance_index];
+
+                update_opacity_binding(
+                    &mut self.opacity_bindings,
+                    image_instance.opacity_binding_index,
+                    frame_context.scene_properties,
+                );
+
+                write_segment(
+                    image_instance.segment_instance_index,
+                    frame_state,
+                    &mut scratch.segments,
+                    &mut scratch.segment_instances,
+                    |request| {
+                        image_data.write_prim_gpu_blocks(request);
+                    },
+                );
+            }
+            PrimitiveInstanceKind::LinearGradient { data_handle, gradient_index, .. } => {
+                profile_scope!("LinearGradient");
+                let prim_data = &mut data_stores.linear_grad[*data_handle];
+                let gradient = &mut self.linear_gradients[*gradient_index];
+
+                // Update the template this instane references, which may refresh the GPU
+                // cache with any shared template data.
+                prim_data.update(frame_state);
+
+                if prim_data.stretch_size.width >= prim_data.common.prim_rect.size.width &&
+                    prim_data.stretch_size.height >= prim_data.common.prim_rect.size.height {
+
+                    prim_data.common.may_need_repetition = false;
+                }
+
+                if prim_data.supports_caching {
+                    let gradient_size = (prim_data.end_point - prim_data.start_point).to_size();
+
+                    // Calculate what the range of the gradient is that covers this
+                    // primitive. These values are included in the cache key. The
+                    // size of the gradient task is the length of a texture cache
+                    // region, for maximum accuracy, and a minimal size on the
+                    // axis that doesn't matter.
+                    let (size, orientation, prim_start_offset, prim_end_offset) =
+                        if prim_data.start_point.x.approx_eq(&prim_data.end_point.x) {
+                            let prim_start_offset = -prim_data.start_point.y / gradient_size.height;
+                            let prim_end_offset = (prim_data.common.prim_rect.size.height - prim_data.start_point.y)
+                                                    / gradient_size.height;
+                            let size = DeviceIntSize::new(16, TEXTURE_REGION_DIMENSIONS);
+                            (size, LineOrientation::Vertical, prim_start_offset, prim_end_offset)
+                        } else {
+                            let prim_start_offset = -prim_data.start_point.x / gradient_size.width;
+                            let prim_end_offset = (prim_data.common.prim_rect.size.width - prim_data.start_point.x)
+                                                    / gradient_size.width;
+                            let size = DeviceIntSize::new(TEXTURE_REGION_DIMENSIONS, 16);
+                            (size, LineOrientation::Horizontal, prim_start_offset, prim_end_offset)
+                        };
+
+                    // Build the cache key, including information about the stops.
+                    let mut stops = vec![GradientStopKey::empty(); prim_data.stops.len()];
+
+                    // Reverse the stops as required, same as the gradient builder does
+                    // for the slow path.
+                    if prim_data.reverse_stops {
+                        for (src, dest) in prim_data.stops.iter().rev().zip(stops.iter_mut()) {
+                            let stop = GradientStop {
+                                offset: 1.0 - src.offset,
+                                color: src.color,
+                            };
+                            *dest = stop.into();
+                        }
+                    } else {
+                        for (src, dest) in prim_data.stops.iter().zip(stops.iter_mut()) {
+                            *dest = (*src).into();
+                        }
+                    }
+
+                    gradient.cache_segments.clear();
+
+                    // emit render task caches and image rectangles to draw a gradient
+                    // with offsets from start_offset to end_offset.
+                    //
+                    // the primitive is covered by a gradient that ranges from
+                    // prim_start_offset to prim_end_offset.
+                    //
+                    // when clamping, these two pairs of offsets will always be the same.
+                    // when repeating, however, we march across the primitive, blitting
+                    // copies of the gradient along the way.  each copy has a range from
+                    // 0.0 to 1.0 (assuming it's fully visible), but where it appears on
+                    // the primitive changes as we go.  this position is also expressed
+                    // as an offset: gradient_offset_base.  that is, in terms of stops,
+                    // we draw a gradient from start_offset to end_offset.  its actual
+                    // location on the primitive is at start_offset + gradient_offset_base.
+                    //
+                    // either way, we need a while-loop to draw the gradient as well
+                    // because it might have more than 4 stops (the maximum of a cached
+                    // segment) and/or hard stops. so we have a walk-within-the-walk from
+                    // start_offset to end_offset caching up to GRADIENT_FP_STOPS stops at a
+                    // time.
+                    fn emit_segments(start_offset: f32, // start and end offset together are
+                                     end_offset: f32,   // always a subrange of 0..1
+                                     gradient_offset_base: f32,
+                                     prim_start_offset: f32, // the offsets of the entire gradient as it
+                                     prim_end_offset: f32,   // covers the entire primitive.
+                                     prim_origin_in: LayoutPoint,
+                                     prim_size_in: LayoutSize,
+                                     task_size: DeviceIntSize,
+                                     is_opaque: bool,
+                                     stops: &[GradientStopKey],
+                                     orientation: LineOrientation,
+                                     frame_state: &mut FrameBuildingState,
+                                     gradient: &mut LinearGradientPrimitive)
+                    {
+                        // these prints are used to generate documentation examples, so
+                        // leaving them in but commented out:
+                        //println!("emit_segments call:");
+                        //println!("\tstart_offset: {}, end_offset: {}", start_offset, end_offset);
+                        //println!("\tprim_start_offset: {}, prim_end_offset: {}", prim_start_offset, prim_end_offset);
+                        //println!("\tgradient_offset_base: {}", gradient_offset_base);
+                        let mut first_stop = 0;
+                        // look for an inclusive range of stops [first_stop, last_stop].
+                        // once first_stop points at (or past) the last stop, we're done.
+                        while first_stop < stops.len()-1 {
+
+                            // if the entire sub-gradient starts at an offset that's past the
+                            // segment's end offset, we're done.
+                            if stops[first_stop].offset > end_offset {
+                                return;
+                            }
+
+                            // accumulate stops until we have GRADIENT_FP_STOPS of them, or we hit
+                            // a hard stop:
+                            let mut last_stop = first_stop;
+                            let mut hard_stop = false;   // did we stop on a hard stop?
+                            while last_stop < stops.len()-1 &&
+                                  last_stop - first_stop + 1 < GRADIENT_FP_STOPS
+                            {
+                                if stops[last_stop+1].offset == stops[last_stop].offset {
+                                    hard_stop = true;
+                                    break;
+                                }
+
+                                last_stop = last_stop + 1;
+                            }
+
+                            let num_stops = last_stop - first_stop + 1;
+
+                            // repeated hard stops at the same offset, skip
+                            if num_stops == 0 {
+                                first_stop = last_stop + 1;
+                                continue;
+                            }
+
+                            // if the last_stop offset is before start_offset, the segment's not visible:
+                            if stops[last_stop].offset < start_offset {
+                                first_stop = if hard_stop { last_stop+1 } else { last_stop };
+                                continue;
+                            }
+
+                            let segment_start_point = start_offset.max(stops[first_stop].offset);
+                            let segment_end_point   = end_offset  .min(stops[last_stop ].offset);
+
+                            let mut segment_stops = [GradientStopKey::empty(); GRADIENT_FP_STOPS];
+                            for i in 0..num_stops {
+                                segment_stops[i] = stops[first_stop + i];
+                            }
+
+                            let cache_key = GradientCacheKey {
+                                orientation,
+                                start_stop_point: VectorKey {
+                                    x: segment_start_point,
+                                    y: segment_end_point,
+                                },
+                                stops: segment_stops,
+                            };
+
+                            let mut prim_origin = prim_origin_in;
+                            let mut prim_size   = prim_size_in;
+
+                            // the primitive is covered by a segment from overall_start to
+                            // overall_end; scale and shift based on the length of the actual
+                            // segment that we're drawing:
+                            let inv_length = 1.0 / ( prim_end_offset - prim_start_offset );
+                            if orientation == LineOrientation::Horizontal {
+                                prim_origin.x    += ( segment_start_point + gradient_offset_base - prim_start_offset )
+                                                    * inv_length * prim_size.width;
+                                prim_size.width  *= ( segment_end_point - segment_start_point )
+                                                    * inv_length; // 2 gradient_offset_bases cancel out
+                            } else {
+                                prim_origin.y    += ( segment_start_point + gradient_offset_base - prim_start_offset )
+                                                    * inv_length * prim_size.height;
+                                prim_size.height *= ( segment_end_point - segment_start_point )
+                                                    * inv_length; // 2 gradient_offset_bases cancel out
+                            }
+
+                            // <= 0 can happen if a hardstop lands exactly on an edge
+                            if prim_size.area() > 0.0 {
+                                let local_rect = LayoutRect::new( prim_origin, prim_size );
+
+                                // documentation example traces:
+                                //println!("\t\tcaching from offset {} to {}", segment_start_point, segment_end_point);
+                                //println!("\t\tand blitting to {:?}", local_rect);
+
+                                // Request the render task each frame.
+                                gradient.cache_segments.push(
+                                    CachedGradientSegment {
+                                        handle: frame_state.resource_cache.request_render_task(
+                                            RenderTaskCacheKey {
+                                                size: task_size,
+                                                kind: RenderTaskCacheKeyKind::Gradient(cache_key),
+                                            },
+                                            frame_state.gpu_cache,
+                                            frame_state.render_tasks,
+                                            None,
+                                            is_opaque,
+                                            |render_tasks| {
+                                                render_tasks.add().init(RenderTask::new_gradient(
+                                                    task_size,
+                                                    segment_stops,
+                                                    orientation,
+                                                    segment_start_point,
+                                                    segment_end_point,
+                                                ))
+                                            }),
+                                        local_rect: local_rect,
+                                    }
+                                );
+                            }
+
+                            // if ending on a hardstop, skip past it for the start of the next run:
+                            first_stop = if hard_stop { last_stop + 1 } else { last_stop };
+                        }
+                    }
+
+                    if prim_data.extend_mode == ExtendMode::Clamp ||
+                       ( prim_start_offset >= 0.0 && prim_end_offset <= 1.0 )  // repeat doesn't matter
+                    {
+                        // To support clamping, we need to make sure that quads are emitted for the
+                        // segments before and after the 0.0...1.0 range of offsets.  emit_segments
+                        // can handle that by duplicating the first and last point if necessary:
+                        if prim_start_offset < 0.0 {
+                            stops.insert(0, GradientStopKey {
+                                offset: prim_start_offset,
+                                color : stops[0].color
+                            });
+                        }
+
+                        if prim_end_offset > 1.0 {
+                            stops.push( GradientStopKey {
+                                offset: prim_end_offset,
+                                color : stops[stops.len()-1].color
+                            });
+                        }
+
+                        emit_segments(prim_start_offset, prim_end_offset,
+                                      0.0,
+                                      prim_start_offset, prim_end_offset,
+                                      prim_data.common.prim_rect.origin,
+                                      prim_data.common.prim_rect.size,
+                                      size,
+                                      prim_data.stops_opacity.is_opaque,
+                                      &stops,
+                                      orientation,
+                                      frame_state,
+                                      gradient);
+                    }
+                    else
+                    {
+                        let mut segment_start_point = prim_start_offset;
+                        while segment_start_point < prim_end_offset {
+
+                            // gradient stops are expressed in the range 0.0 ... 1.0, so to blit
+                            // a copy of the gradient, snap to the integer just before the offset
+                            // we want ...
+                            let gradient_offset_base = segment_start_point.floor();
+                            // .. and then draw from a start offset in range 0 to 1 ...
+                            let repeat_start = segment_start_point - gradient_offset_base;
+                            // .. up to the next integer, but clamped to the primitive's real
+                            // end offset:
+                            let repeat_end = (gradient_offset_base + 1.0).min(prim_end_offset) - gradient_offset_base;
+
+                            emit_segments(repeat_start, repeat_end,
+                                          gradient_offset_base,
+                                          prim_start_offset, prim_end_offset,
+                                          prim_data.common.prim_rect.origin,
+                                          prim_data.common.prim_rect.size,
+                                          size,
+                                          prim_data.stops_opacity.is_opaque,
+                                          &stops,
+                                          orientation,
+                                          frame_state,
+                                          gradient);
+
+                            segment_start_point = repeat_end + gradient_offset_base;
+                        }
+                    }
+                }
+
+                if prim_data.tile_spacing != LayoutSize::zero() {
+                    // We are performing the decomposition on the CPU here, no need to
+                    // have it in the shader.
+                    prim_data.common.may_need_repetition = false;
+
+                    let prim_info = &scratch.prim_info[prim_instance.visibility_info.0 as usize];
+
+                    let map_local_to_world = SpaceMapper::new_with_target(
+                        ROOT_SPATIAL_NODE_INDEX,
+                        prim_spatial_node_index,
+                        frame_context.global_screen_world_rect,
+                        frame_context.spatial_tree,
+                    );
+
+                    gradient.visible_tiles_range = decompose_repeated_primitive(
+                        &prim_info.combined_local_clip_rect,
+                        &prim_data.common.prim_rect,
+                        prim_info.clipped_world_rect,
+                        &prim_data.stretch_size,
+                        &prim_data.tile_spacing,
+                        frame_state,
+                        &mut scratch.gradient_tiles,
+                        &map_local_to_world,
+                        &mut |_, mut request| {
+                            request.push([
+                                prim_data.start_point.x,
+                                prim_data.start_point.y,
+                                prim_data.end_point.x,
+                                prim_data.end_point.y,
+                            ]);
+                            request.push([
+                                pack_as_float(prim_data.extend_mode as u32),
+                                prim_data.stretch_size.width,
+                                prim_data.stretch_size.height,
+                                0.0,
+                            ]);
+                        }
+                    );
+
+                    if gradient.visible_tiles_range.is_empty() {
+                        prim_instance.visibility_info = PrimitiveVisibilityIndex::INVALID;
+                    }
+                }
+
+                // TODO(gw): Consider whether it's worth doing segment building
+                //           for gradient primitives.
+            }
+            PrimitiveInstanceKind::RadialGradient { data_handle, ref mut visible_tiles_range, .. } => {
+                profile_scope!("RadialGradient");
+                let prim_data = &mut data_stores.radial_grad[*data_handle];
+
+                if prim_data.stretch_size.width >= prim_data.common.prim_rect.size.width &&
+                    prim_data.stretch_size.height >= prim_data.common.prim_rect.size.height {
+
+                    // We are performing the decomposition on the CPU here, no need to
+                    // have it in the shader.
+                    prim_data.common.may_need_repetition = false;
+                }
+
+                // Update the template this instane references, which may refresh the GPU
+                // cache with any shared template data.
+                prim_data.update(frame_state);
+
+                if prim_data.tile_spacing != LayoutSize::zero() {
+                    let prim_info = &scratch.prim_info[prim_instance.visibility_info.0 as usize];
+
+                    let map_local_to_world = SpaceMapper::new_with_target(
+                        ROOT_SPATIAL_NODE_INDEX,
+                        prim_spatial_node_index,
+                        frame_context.global_screen_world_rect,
+                        frame_context.spatial_tree,
+                    );
+
+                    prim_data.common.may_need_repetition = false;
+
+                    *visible_tiles_range = decompose_repeated_primitive(
+                        &prim_info.combined_local_clip_rect,
+                        &prim_data.common.prim_rect,
+                        prim_info.clipped_world_rect,
+                        &prim_data.stretch_size,
+                        &prim_data.tile_spacing,
+                        frame_state,
+                        &mut scratch.gradient_tiles,
+                        &map_local_to_world,
+                        &mut |_, mut request| {
+                            request.push([
+                                prim_data.center.x,
+                                prim_data.center.y,
+                                prim_data.params.start_radius,
+                                prim_data.params.end_radius,
+                            ]);
+                            request.push([
+                                prim_data.params.ratio_xy,
+                                pack_as_float(prim_data.extend_mode as u32),
+                                prim_data.stretch_size.width,
+                                prim_data.stretch_size.height,
+                            ]);
+                        },
+                    );
+
+                    if visible_tiles_range.is_empty() {
+                        prim_instance.visibility_info = PrimitiveVisibilityIndex::INVALID;
+                    }
+                }
+
+                // TODO(gw): Consider whether it's worth doing segment building
+                //           for gradient primitives.
+            }
+            PrimitiveInstanceKind::ConicGradient { data_handle, ref mut visible_tiles_range, .. } => {
+                profile_scope!("ConicGradient");
+                let prim_data = &mut data_stores.conic_grad[*data_handle];
+
+                if prim_data.stretch_size.width >= prim_data.common.prim_rect.size.width &&
+                    prim_data.stretch_size.height >= prim_data.common.prim_rect.size.height {
+
+                    // We are performing the decomposition on the CPU here, no need to
+                    // have it in the shader.
+                    prim_data.common.may_need_repetition = false;
+                }
+
+                // Update the template this instane references, which may refresh the GPU
+                // cache with any shared template data.
+                prim_data.update(frame_state);
+
+                if prim_data.tile_spacing != LayoutSize::zero() {
+                    let prim_info = &scratch.prim_info[prim_instance.visibility_info.0 as usize];
+
+                    let map_local_to_world = SpaceMapper::new_with_target(
+                        ROOT_SPATIAL_NODE_INDEX,
+                        prim_spatial_node_index,
+                        frame_context.global_screen_world_rect,
+                        frame_context.spatial_tree,
+                    );
+
+                    prim_data.common.may_need_repetition = false;
+
+                    *visible_tiles_range = decompose_repeated_primitive(
+                        &prim_info.combined_local_clip_rect,
+                        &prim_data.common.prim_rect,
+                        prim_info.clipped_world_rect,
+                        &prim_data.stretch_size,
+                        &prim_data.tile_spacing,
+                        frame_state,
+                        &mut scratch.gradient_tiles,
+                        &map_local_to_world,
+                        &mut |_, mut request| {
+                            request.push([
+                                prim_data.center.x,
+                                prim_data.center.y,
+                                prim_data.params.start_offset,
+                                prim_data.params.end_offset,
+                            ]);
+                            request.push([
+                                prim_data.params.angle,
+                                pack_as_float(prim_data.extend_mode as u32),
+                                prim_data.stretch_size.width,
+                                prim_data.stretch_size.height,
+                            ]);
+                        },
+                    );
+
+                    if visible_tiles_range.is_empty() {
+                        prim_instance.visibility_info = PrimitiveVisibilityIndex::INVALID;
+                    }
+                }
+
+                // TODO(gw): Consider whether it's worth doing segment building
+                //           for gradient primitives.
+            }
+            PrimitiveInstanceKind::Picture { pic_index, segment_instance_index, .. } => {
+                profile_scope!("Picture");
+                let pic = &mut self.pictures[pic_index.0];
+                let prim_info = &scratch.prim_info[prim_instance.visibility_info.0 as usize];
+
+                if pic.prepare_for_render(
+                    frame_context,
+                    frame_state,
+                    data_stores,
+                ) {
+                    if let Some(ref mut splitter) = pic_state.plane_splitter {
+                        PicturePrimitive::add_split_plane(
+                            splitter,
+                            frame_context.spatial_tree,
+                            prim_spatial_node_index,
+                            pic.precise_local_rect,
+                            &prim_info.combined_local_clip_rect,
+                            frame_state.current_dirty_region().combined,
+                            plane_split_anchor,
+                        );
+                    }
+
+                    // If this picture uses segments, ensure the GPU cache is
+                    // up to date with segment local rects.
+                    // TODO(gw): This entire match statement above can now be
+                    //           refactored into prepare_interned_prim_for_render.
+                    if pic.can_use_segments() {
+                        write_segment(
+                            *segment_instance_index,
+                            frame_state,
+                            &mut scratch.segments,
+                            &mut scratch.segment_instances,
+                            |request| {
+                                request.push(PremultipliedColorF::WHITE);
+                                request.push(PremultipliedColorF::WHITE);
+                                request.push([
+                                    -1.0,       // -ve means use prim rect for stretch size
+                                    0.0,
+                                    0.0,
+                                    0.0,
+                                ]);
+                            }
+                        );
+                    }
+                } else {
+                    prim_instance.visibility_info = PrimitiveVisibilityIndex::INVALID;
+                }
+            }
+            PrimitiveInstanceKind::Backdrop { data_handle } => {
+                profile_scope!("Backdrop");
+                let backdrop_pic_index = data_stores.backdrop[*data_handle].kind.pic_index;
+
+                // Setup a dependency on the backdrop picture to ensure it is rendered prior to rendering this primitive.
+                let backdrop_surface_index = self.pictures[backdrop_pic_index.0].raster_config.as_ref().unwrap().surface_index;
+                if let Some(backdrop_tasks) = frame_state.surfaces[backdrop_surface_index.0].render_tasks {
+                    let picture_task_id = frame_state.surfaces[pic_context.surface_index.0].render_tasks.as_ref().unwrap().port;
+                    frame_state.render_tasks.add_dependency(picture_task_id, backdrop_tasks.root);
+                } else {
+                    if prim_instance.is_chased() {
+                        println!("\tBackdrop primitive culled because backdrop task was not assigned render tasks");
+                    }
+                    prim_instance.visibility_info = PrimitiveVisibilityIndex::INVALID;
+                }
+            }
+        };
+    }
+}
+
+fn write_segment<F>(
+    segment_instance_index: SegmentInstanceIndex,
+    frame_state: &mut FrameBuildingState,
+    segments: &mut SegmentStorage,
+    segment_instances: &mut SegmentInstanceStorage,
+    f: F,
+) where F: Fn(&mut GpuDataRequest) {
+    debug_assert_ne!(segment_instance_index, SegmentInstanceIndex::INVALID);
+    if segment_instance_index != SegmentInstanceIndex::UNUSED {
+        let segment_instance = &mut segment_instances[segment_instance_index];
+
+        if let Some(mut request) = frame_state.gpu_cache.request(&mut segment_instance.gpu_cache_handle) {
+            let segments = &segments[segment_instance.segments_range];
+
+            f(&mut request);
+
+            for segment in segments {
+                request.write_segment(
+                    segment.local_rect,
+                    [0.0; 4],
+                );
+            }
+        }
+    }
+}
+
+fn decompose_repeated_primitive(
+    combined_local_clip_rect: &LayoutRect,
+    prim_local_rect: &LayoutRect,
+    prim_world_rect: WorldRect,
+    stretch_size: &LayoutSize,
+    tile_spacing: &LayoutSize,
+    frame_state: &mut FrameBuildingState,
+    gradient_tiles: &mut GradientTileStorage,
+    map_local_to_world: &SpaceMapper<LayoutPixel, WorldPixel>,
+    callback: &mut dyn FnMut(&LayoutRect, GpuDataRequest),
+) -> GradientTileRange {
+    let mut visible_tiles = Vec::new();
+
+    // Tighten the clip rect because decomposing the repeated image can
+    // produce primitives that are partially covering the original image
+    // rect and we want to clip these extra parts out.
+    let tight_clip_rect = combined_local_clip_rect
+        .intersection(prim_local_rect).unwrap();
+
+    let visible_rect = compute_conservative_visible_rect(
+        &tight_clip_rect,
+        prim_world_rect,
+        map_local_to_world,
+    );
+    let stride = *stretch_size + *tile_spacing;
+
+    let repetitions = image_tiling::repetitions(prim_local_rect, &visible_rect, stride);
+    for Repetition { origin, .. } in repetitions {
+        let mut handle = GpuCacheHandle::new();
+        let rect = LayoutRect {
+            origin,
+            size: *stretch_size,
+        };
+
+        if let Some(request) = frame_state.gpu_cache.request(&mut handle) {
+            callback(&rect, request);
+        }
+
+        visible_tiles.push(VisibleGradientTile {
+            local_rect: rect,
+            local_clip_rect: tight_clip_rect,
+            handle
+        });
+    }
+
+    // At this point if we don't have tiles to show it means we could probably
+    // have done a better a job at culling during an earlier stage.
+    // Clearing the screen rect has the effect of "culling out" the primitive
+    // from the point of view of the batch builder, and ensures we don't hit
+    // assertions later on because we didn't request any image.
+    if visible_tiles.is_empty() {
+        GradientTileRange::empty()
+    } else {
+        gradient_tiles.extend(visible_tiles)
+    }
+}
+
+fn compute_conservative_visible_rect(
+    local_clip_rect: &LayoutRect,
+    world_culling_rect: WorldRect,
+    map_local_to_world: &SpaceMapper<LayoutPixel, WorldPixel>,
+) -> LayoutRect {
+    if let Some(local_bounds) = map_local_to_world.unmap(&world_culling_rect) {
+        return local_clip_rect.intersection(&local_bounds).unwrap_or_else(LayoutRect::zero)
+    }
+
+    *local_clip_rect
+}
+
+fn edge_flags_for_tile_spacing(tile_spacing: &LayoutSize) -> EdgeAaSegmentMask {
+    let mut flags = EdgeAaSegmentMask::empty();
+
+    if tile_spacing.width > 0.0 {
+        flags |= EdgeAaSegmentMask::LEFT | EdgeAaSegmentMask::RIGHT;
+    }
+    if tile_spacing.height > 0.0 {
+        flags |= EdgeAaSegmentMask::TOP | EdgeAaSegmentMask::BOTTOM;
+    }
+
+    flags
+}
+
+impl<'a> GpuDataRequest<'a> {
+    // Write the GPU cache data for an individual segment.
+    fn write_segment(
+        &mut self,
+        local_rect: LayoutRect,
+        extra_data: [f32; 4],
+    ) {
+        let _ = VECS_PER_SEGMENT;
+        self.push(local_rect);
+        self.push(extra_data);
+    }
+}
+
+    fn write_brush_segment_description(
+        prim_local_rect: LayoutRect,
+        prim_local_clip_rect: LayoutRect,
+        clip_chain: &ClipChainInstance,
+        segment_builder: &mut SegmentBuilder,
+        clip_store: &ClipStore,
+        data_stores: &DataStores,
+    ) -> bool {
+        // If the brush is small, we want to skip building segments
+        // and just draw it as a single primitive with clip mask.
+        if prim_local_rect.size.area() < MIN_BRUSH_SPLIT_AREA {
+            return false;
+        }
+
+        segment_builder.initialize(
+            prim_local_rect,
+            None,
+            prim_local_clip_rect
+        );
+
+        // Segment the primitive on all the local-space clip sources that we can.
+        for i in 0 .. clip_chain.clips_range.count {
+            let clip_instance = clip_store
+                .get_instance_from_range(&clip_chain.clips_range, i);
+            let clip_node = &data_stores.clip[clip_instance.handle];
+
+            // If this clip item is positioned by another positioning node, its relative position
+            // could change during scrolling. This means that we would need to resegment. Instead
+            // of doing that, only segment with clips that have the same positioning node.
+            // TODO(mrobinson, #2858): It may make sense to include these nodes, resegmenting only
+            // when necessary while scrolling.
+            if !clip_instance.flags.contains(ClipNodeFlags::SAME_SPATIAL_NODE) {
+                continue;
+            }
+
+            let (local_clip_rect, radius, mode) = match clip_node.item.kind {
+                ClipItemKind::RoundedRectangle { rect, radius, mode } => {
+                    (rect, Some(radius), mode)
+                }
+                ClipItemKind::Rectangle { rect, mode } => {
+                    (rect, None, mode)
+                }
+                ClipItemKind::BoxShadow { ref source } => {
+                    // For inset box shadows, we can clip out any
+                    // pixels that are inside the shadow region
+                    // and are beyond the inner rect, as they can't
+                    // be affected by the blur radius.
+                    let inner_clip_mode = match source.clip_mode {
+                        BoxShadowClipMode::Outset => None,
+                        BoxShadowClipMode::Inset => Some(ClipMode::ClipOut),
+                    };
+
+                    // Push a region into the segment builder where the
+                    // box-shadow can have an effect on the result. This
+                    // ensures clip-mask tasks get allocated for these
+                    // pixel regions, even if no other clips affect them.
+                    segment_builder.push_mask_region(
+                        source.prim_shadow_rect,
+                        source.prim_shadow_rect.inflate(
+                            -0.5 * source.original_alloc_size.width,
+                            -0.5 * source.original_alloc_size.height,
+                        ),
+                        inner_clip_mode,
+                    );
+
+                    continue;
+                }
+                ClipItemKind::Image { .. } => {
+                    // If we encounter an image mask, bail out from segment building.
+                    // It's not possible to know which parts of the primitive are affected
+                    // by the mask (without inspecting the pixels). We could do something
+                    // better here in the future if it ever shows up as a performance issue
+                    // (for instance, at least segment based on the bounding rect of the
+                    // image mask if it's non-repeating).
+                    return false;
+                }
+            };
+
+            segment_builder.push_clip_rect(local_clip_rect, radius, mode);
+        }
+
+        true
+    }
+
+impl PrimitiveInstance {
+    fn build_segments_if_needed(
+        &mut self,
+        prim_info: &PrimitiveVisibility,
+        frame_state: &mut FrameBuildingState,
+        prim_store: &mut PrimitiveStore,
+        data_stores: &DataStores,
+        segments_store: &mut SegmentStorage,
+        segment_instances_store: &mut SegmentInstanceStorage,
+    ) {
+        let prim_clip_chain = &prim_info.clip_chain;
+
+        // Usually, the primitive rect can be found from information
+        // in the instance and primitive template.
+        let prim_local_rect = data_stores.get_local_prim_rect(
+            self,
+            prim_store,
+        );
+
+        let segment_instance_index = match self.kind {
+            PrimitiveInstanceKind::Rectangle { ref mut segment_instance_index, .. } |
+            PrimitiveInstanceKind::YuvImage { ref mut segment_instance_index, .. } => {
+                segment_instance_index
+            }
+            PrimitiveInstanceKind::Image { data_handle, image_instance_index, .. } => {
+                let image_data = &data_stores.image[data_handle].kind;
+                let image_instance = &mut prim_store.images[image_instance_index];
+                //Note: tiled images don't support automatic segmentation,
+                // they strictly produce one segment per visible tile instead.
+                if frame_state
+                    .resource_cache
+                    .get_image_properties(image_data.key)
+                    .and_then(|properties| properties.tiling)
+                    .is_some()
+                {
+                    image_instance.segment_instance_index = SegmentInstanceIndex::UNUSED;
+                    return;
+                }
+                &mut image_instance.segment_instance_index
+            }
+            PrimitiveInstanceKind::Picture { ref mut segment_instance_index, pic_index, .. } => {
+                let pic = &mut prim_store.pictures[pic_index.0];
+
+                // If this picture supports segment rendering
+                if pic.can_use_segments() {
+                    // If the segments have been invalidated, ensure the current
+                    // index of segments is invalid. This ensures that the segment
+                    // building logic below will be run.
+                    if !pic.segments_are_valid {
+                        *segment_instance_index = SegmentInstanceIndex::INVALID;
+                        pic.segments_are_valid = true;
+                    }
+
+                    segment_instance_index
+                } else {
+                    return;
+                }
+            }
+            PrimitiveInstanceKind::TextRun { .. } |
+            PrimitiveInstanceKind::NormalBorder { .. } |
+            PrimitiveInstanceKind::ImageBorder { .. } |
+            PrimitiveInstanceKind::Clear { .. } |
+            PrimitiveInstanceKind::LinearGradient { .. } |
+            PrimitiveInstanceKind::RadialGradient { .. } |
+            PrimitiveInstanceKind::ConicGradient { .. } |
+            PrimitiveInstanceKind::LineDecoration { .. } |
+            PrimitiveInstanceKind::Backdrop { .. } => {
+                // These primitives don't support / need segments.
+                return;
+            }
+        };
+
+        if *segment_instance_index == SegmentInstanceIndex::INVALID {
+            let mut segments: SmallVec<[BrushSegment; 8]> = SmallVec::new();
+
+            if write_brush_segment_description(
+                prim_local_rect,
+                self.local_clip_rect,
+                prim_clip_chain,
+                &mut frame_state.segment_builder,
+                frame_state.clip_store,
+                data_stores,
+            ) {
+                frame_state.segment_builder.build(|segment| {
+                    segments.push(
+                        BrushSegment::new(
+                            segment.rect.translate(-prim_local_rect.origin.to_vector()),
+                            segment.has_mask,
+                            segment.edge_flags,
+                            [0.0; 4],
+                            BrushFlags::PERSPECTIVE_INTERPOLATION,
+                        ),
+                    );
+                });
+            }
+
+            // If only a single segment is produced, there is no benefit to writing
+            // a segment instance array. Instead, just use the main primitive rect
+            // written into the GPU cache.
+            // TODO(gw): This is (sortof) a bandaid - due to a limitation in the current
+            //           brush encoding, we can only support a total of up to 2^16 segments.
+            //           This should be (more than) enough for any real world case, so for
+            //           now we can handle this by skipping cases where we were generating
+            //           segments where there is no benefit. The long term / robust fix
+            //           for this is to move the segment building to be done as a more
+            //           limited nine-patch system during scene building, removing arbitrary
+            //           segmentation during frame-building (see bug #1617491).
+            if segments.len() <= 1 {
+                *segment_instance_index = SegmentInstanceIndex::UNUSED;
+            } else {
+                let segments_range = segments_store.extend(segments);
+
+                let instance = SegmentedInstance {
+                    segments_range,
+                    gpu_cache_handle: GpuCacheHandle::new(),
+                };
+
+                *segment_instance_index = segment_instances_store.push(instance);
+            };
+        }
+    }
+
+    fn update_clip_task_for_brush(
+        &self,
+        prim_origin: &LayoutPoint,
+        prim_info: &mut PrimitiveVisibility,
+        prim_spatial_node_index: SpatialNodeIndex,
+        root_spatial_node_index: SpatialNodeIndex,
+        pic_context: &PictureContext,
+        pic_state: &mut PictureState,
+        frame_context: &FrameBuildingContext,
+        frame_state: &mut FrameBuildingState,
+        prim_store: &PrimitiveStore,
+        data_stores: &mut DataStores,
+        segments_store: &mut SegmentStorage,
+        segment_instances_store: &mut SegmentInstanceStorage,
+        clip_mask_instances: &mut Vec<ClipMaskKind>,
+        unclipped: &DeviceRect,
+        device_pixel_scale: DevicePixelScale,
+    ) -> bool {
+        let segments = match self.kind {
+            PrimitiveInstanceKind::TextRun { .. } |
+            PrimitiveInstanceKind::Clear { .. } |
+            PrimitiveInstanceKind::LineDecoration { .. } |
+            PrimitiveInstanceKind::Backdrop { .. } => {
+                return false;
+            }
+            PrimitiveInstanceKind::Image { image_instance_index, .. } => {
+                let segment_instance_index = prim_store
+                    .images[image_instance_index]
+                    .segment_instance_index;
+
+                if segment_instance_index == SegmentInstanceIndex::UNUSED {
+                    return false;
+                }
+
+                let segment_instance = &segment_instances_store[segment_instance_index];
+
+                &segments_store[segment_instance.segments_range]
+            }
+            PrimitiveInstanceKind::Picture { segment_instance_index, .. } => {
+                // Pictures may not support segment rendering at all (INVALID)
+                // or support segment rendering but choose not to due to size
+                // or some other factor (UNUSED).
+                if segment_instance_index == SegmentInstanceIndex::UNUSED ||
+                   segment_instance_index == SegmentInstanceIndex::INVALID {
+                    return false;
+                }
+
+                let segment_instance = &segment_instances_store[segment_instance_index];
+                &segments_store[segment_instance.segments_range]
+            }
+            PrimitiveInstanceKind::YuvImage { segment_instance_index, .. } |
+            PrimitiveInstanceKind::Rectangle { segment_instance_index, .. } => {
+                debug_assert!(segment_instance_index != SegmentInstanceIndex::INVALID);
+
+                if segment_instance_index == SegmentInstanceIndex::UNUSED {
+                    return false;
+                }
+
+                let segment_instance = &segment_instances_store[segment_instance_index];
+
+                &segments_store[segment_instance.segments_range]
+            }
+            PrimitiveInstanceKind::ImageBorder { data_handle, .. } => {
+                let border_data = &data_stores.image_border[data_handle].kind;
+
+                // TODO: This is quite messy - once we remove legacy primitives we
+                //       can change this to be a tuple match on (instance, template)
+                border_data.brush_segments.as_slice()
+            }
+            PrimitiveInstanceKind::NormalBorder { data_handle, .. } => {
+                let border_data = &data_stores.normal_border[data_handle].kind;
+
+                // TODO: This is quite messy - once we remove legacy primitives we
+                //       can change this to be a tuple match on (instance, template)
+                border_data.brush_segments.as_slice()
+            }
+            PrimitiveInstanceKind::LinearGradient { data_handle, .. } => {
+                let prim_data = &data_stores.linear_grad[data_handle];
+
+                // TODO: This is quite messy - once we remove legacy primitives we
+                //       can change this to be a tuple match on (instance, template)
+                if prim_data.brush_segments.is_empty() {
+                    return false;
+                }
+
+                prim_data.brush_segments.as_slice()
+            }
+            PrimitiveInstanceKind::RadialGradient { data_handle, .. } => {
+                let prim_data = &data_stores.radial_grad[data_handle];
+
+                // TODO: This is quite messy - once we remove legacy primitives we
+                //       can change this to be a tuple match on (instance, template)
+                if prim_data.brush_segments.is_empty() {
+                    return false;
+                }
+
+                prim_data.brush_segments.as_slice()
+            }
+            PrimitiveInstanceKind::ConicGradient { data_handle, .. } => {
+                let prim_data = &data_stores.conic_grad[data_handle];
+
+                // TODO: This is quite messy - once we remove legacy primitives we
+                //       can change this to be a tuple match on (instance, template)
+                if prim_data.brush_segments.is_empty() {
+                    return false;
+                }
+
+                prim_data.brush_segments.as_slice()
+            }
+        };
+
+        // If there are no segments, early out to avoid setting a valid
+        // clip task instance location below.
+        if segments.is_empty() {
+            return true;
+        }
+
+        // Set where in the clip mask instances array the clip mask info
+        // can be found for this primitive. Each segment will push the
+        // clip mask information for itself in update_clip_task below.
+        prim_info.clip_task_index = ClipTaskIndex(clip_mask_instances.len() as _);
+
+        // If we only built 1 segment, there is no point in re-running
+        // the clip chain builder. Instead, just use the clip chain
+        // instance that was built for the main primitive. This is a
+        // significant optimization for the common case.
+        if segments.len() == 1 {
+            let clip_mask_kind = segments[0].update_clip_task(
+                Some(&prim_info.clip_chain),
+                prim_info.clipped_world_rect,
+                root_spatial_node_index,
+                pic_context.surface_index,
+                pic_state,
+                frame_context,
+                frame_state,
+                &mut data_stores.clip,
+                unclipped,
+                device_pixel_scale,
+            );
+            clip_mask_instances.push(clip_mask_kind);
+        } else {
+            let dirty_world_rect = frame_state.current_dirty_region().combined;
+
+            for segment in segments {
+                // Build a clip chain for the smaller segment rect. This will
+                // often manage to eliminate most/all clips, and sometimes
+                // clip the segment completely.
+                frame_state.clip_store.set_active_clips_from_clip_chain(
+                    &prim_info.clip_chain,
+                    prim_spatial_node_index,
+                    &frame_context.spatial_tree,
+                );
+
+                let segment_clip_chain = frame_state
+                    .clip_store
+                    .build_clip_chain_instance(
+                        segment.local_rect.translate(prim_origin.to_vector()),
+                        &pic_state.map_local_to_pic,
+                        &pic_state.map_pic_to_world,
+                        &frame_context.spatial_tree,
+                        frame_state.gpu_cache,
+                        frame_state.resource_cache,
+                        device_pixel_scale,
+                        &dirty_world_rect,
+                        &mut data_stores.clip,
+                        false,
+                        self.is_chased(),
+                    );
+
+                let clip_mask_kind = segment.update_clip_task(
+                    segment_clip_chain.as_ref(),
+                    prim_info.clipped_world_rect,
+                    root_spatial_node_index,
+                    pic_context.surface_index,
+                    pic_state,
+                    frame_context,
+                    frame_state,
+                    &mut data_stores.clip,
+                    unclipped,
+                    device_pixel_scale,
+                );
+                clip_mask_instances.push(clip_mask_kind);
+            }
+        }
+
+        true
+    }
+
+    fn update_clip_task(
+        &mut self,
+        prim_origin: &LayoutPoint,
+        prim_spatial_node_index: SpatialNodeIndex,
+        root_spatial_node_index: SpatialNodeIndex,
+        pic_context: &PictureContext,
+        pic_state: &mut PictureState,
+        frame_context: &FrameBuildingContext,
+        frame_state: &mut FrameBuildingState,
+        prim_store: &mut PrimitiveStore,
+        data_stores: &mut DataStores,
+        scratch: &mut PrimitiveScratchBuffer,
+    ) {
+        let prim_info = &mut scratch.prim_info[self.visibility_info.0 as usize];
+        let device_pixel_scale = frame_state.surfaces[pic_context.surface_index.0].device_pixel_scale;
+
+        if self.is_chased() {
+            println!("\tupdating clip task with pic rect {:?}", prim_info.clip_chain.pic_clip_rect);
+        }
+
+        // Get the device space rect for the primitive if it was unclipped.
+        let unclipped = match get_unclipped_device_rect(
+            prim_info.clip_chain.pic_clip_rect,
+            &pic_state.map_pic_to_raster,
+            device_pixel_scale,
+        ) {
+            Some(rect) => rect,
+            None => return,
+        };
+
+        self.build_segments_if_needed(
+            &prim_info,
+            frame_state,
+            prim_store,
+            data_stores,
+            &mut scratch.segments,
+            &mut scratch.segment_instances,
+        );
+
+        // First try to  render this primitive's mask using optimized brush rendering.
+        if self.update_clip_task_for_brush(
+            prim_origin,
+            prim_info,
+            prim_spatial_node_index,
+            root_spatial_node_index,
+            pic_context,
+            pic_state,
+            frame_context,
+            frame_state,
+            prim_store,
+            data_stores,
+            &mut scratch.segments,
+            &mut scratch.segment_instances,
+            &mut scratch.clip_mask_instances,
+            &unclipped,
+            device_pixel_scale,
+        ) {
+            if self.is_chased() {
+                println!("\tsegment tasks have been created for clipping");
+            }
+            return;
+        }
+
+        if prim_info.clip_chain.needs_mask {
+            // Get a minimal device space rect, clipped to the screen that we
+            // need to allocate for the clip mask, as well as interpolated
+            // snap offsets.
+            if let Some(device_rect) = get_clipped_device_rect(
+                &unclipped,
+                &pic_state.map_raster_to_world,
+                prim_info.clipped_world_rect,
+                device_pixel_scale,
+            ) {
+                let (device_rect, device_pixel_scale) = adjust_mask_scale_for_max_size(device_rect, device_pixel_scale);
+
+                let clip_task_id = RenderTask::new_mask(
+                    device_rect,
+                    prim_info.clip_chain.clips_range,
+                    root_spatial_node_index,
+                    frame_state.clip_store,
+                    frame_state.gpu_cache,
+                    frame_state.resource_cache,
+                    frame_state.render_tasks,
+                    &mut data_stores.clip,
+                    device_pixel_scale,
+                    frame_context.fb_config,
+                );
+                if self.is_chased() {
+                    println!("\tcreated task {:?} with device rect {:?}",
+                        clip_task_id, device_rect);
+                }
+                // Set the global clip mask instance for this primitive.
+                let clip_task_index = ClipTaskIndex(scratch.clip_mask_instances.len() as _);
+                scratch.clip_mask_instances.push(ClipMaskKind::Mask(clip_task_id));
+                prim_info.clip_task_index = clip_task_index;
+                frame_state.render_tasks.add_dependency(
+                    frame_state.surfaces[pic_context.surface_index.0].render_tasks.unwrap().port,
+                    clip_task_id,
+                );
+            }
+        }
+    }
+}
+
+// Ensures that the size of mask render tasks are within MAX_MASK_SIZE.
+fn adjust_mask_scale_for_max_size(device_rect: DeviceRect, device_pixel_scale: DevicePixelScale) -> (DeviceIntRect, DevicePixelScale) {
+    if device_rect.width() > MAX_MASK_SIZE || device_rect.height() > MAX_MASK_SIZE {
+        // round_out will grow by 1 integer pixel if origin is on a
+        // fractional position, so keep that margin for error with -1:
+        let scale = (MAX_MASK_SIZE - 1.0) /
+            f32::max(device_rect.width(), device_rect.height());
+        let new_device_pixel_scale = device_pixel_scale * Scale::new(scale);
+        let new_device_rect = (device_rect.to_f32() * Scale::new(scale))
+            .round_out()
+            .to_i32();
+        (new_device_rect, new_device_pixel_scale)
+    } else {
+        (device_rect.to_i32(), device_pixel_scale)
+    }
+}
+
+/// Retrieve the exact unsnapped device space rectangle for a primitive.
+fn get_unclipped_device_rect(
+    prim_rect: PictureRect,
+    map_to_raster: &SpaceMapper<PicturePixel, RasterPixel>,
+    device_pixel_scale: DevicePixelScale,
+) -> Option<DeviceRect> {
+    let raster_rect = map_to_raster.map(&prim_rect)?;
+    let world_rect = raster_rect * Scale::new(1.0);
+    Some(world_rect * device_pixel_scale)
+}
+
+/// Given an unclipped device rect, try to find a minimal device space
+/// rect to allocate a clip mask for, by clipping to the screen. This
+/// function is very similar to get_raster_rects below. It is far from
+/// ideal, and should be refactored as part of the support for setting
+/// scale per-raster-root.
+fn get_clipped_device_rect(
+    unclipped: &DeviceRect,
+    map_to_world: &SpaceMapper<RasterPixel, WorldPixel>,
+    prim_bounding_rect: WorldRect,
+    device_pixel_scale: DevicePixelScale,
+) -> Option<DeviceRect> {
+    let unclipped_raster_rect = {
+        let world_rect = *unclipped * Scale::new(1.0);
+        let raster_rect = world_rect * device_pixel_scale.inverse();
+
+        raster_rect.cast_unit()
+    };
+
+    let unclipped_world_rect = map_to_world.map(&unclipped_raster_rect)?;
+
+    let clipped_world_rect = unclipped_world_rect.intersection(&prim_bounding_rect)?;
+
+    let clipped_raster_rect = map_to_world.unmap(&clipped_world_rect)?;
+
+    let clipped_raster_rect = clipped_raster_rect.intersection(&unclipped_raster_rect)?;
+
+    // Ensure that we won't try to allocate a zero-sized clip render task.
+    if clipped_raster_rect.is_empty() {
+        return None;
+    }
+
+    let clipped = raster_rect_to_device_pixels(
+        clipped_raster_rect,
+        device_pixel_scale,
+    );
+
+    Some(clipped)
+}
+
+pub fn get_raster_rects(
+    pic_rect: PictureRect,
+    map_to_raster: &SpaceMapper<PicturePixel, RasterPixel>,
+    map_to_world: &SpaceMapper<RasterPixel, WorldPixel>,
+    prim_bounding_rect: WorldRect,
+    device_pixel_scale: DevicePixelScale,
+) -> Option<(DeviceRect, DeviceRect)> {
+    let unclipped_raster_rect = map_to_raster.map(&pic_rect)?;
+
+    let unclipped = raster_rect_to_device_pixels(
+        unclipped_raster_rect,
+        device_pixel_scale,
+    );
+
+    let unclipped_world_rect = map_to_world.map(&unclipped_raster_rect)?;
+
+    let clipped_world_rect = unclipped_world_rect.intersection(&prim_bounding_rect)?;
+
+    let clipped_raster_rect = map_to_world.unmap(&clipped_world_rect)?;
+
+    let clipped_raster_rect = clipped_raster_rect.intersection(&unclipped_raster_rect)?;
+
+    let clipped = raster_rect_to_device_pixels(
+        clipped_raster_rect,
+        device_pixel_scale,
+    );
+
+    // Ensure that we won't try to allocate a zero-sized clip render task.
+    if clipped.is_empty() {
+        return None;
+    }
+
+    Some((clipped, unclipped))
+}
+
+/// Choose the decoration mask tile size for a given line.
+///
+/// Given a line with overall size `rect_size` and the given `orientation`,
+/// return the dimensions of a single mask tile for the decoration pattern
+/// described by `style` and `wavy_line_thickness`.
+///
+/// If `style` is `Solid`, no mask tile is necessary; return `None`. The other
+/// styles each have their own characteristic periods of repetition, so for each
+/// one, this function returns a `LayoutSize` with the right aspect ratio and
+/// whose specific size is convenient for the `cs_line_decoration.glsl` fragment
+/// shader to work with. The shader uses a local coordinate space in which the
+/// tile fills a rectangle with one corner at the origin, and with the size this
+/// function returns.
+///
+/// The returned size is not necessarily in pixels; device scaling and other
+/// concerns can still affect the actual task size.
+///
+/// Regardless of whether `orientation` is `Vertical` or `Horizontal`, the
+/// `width` and `height` of the returned size are always horizontal and
+/// vertical, respectively.
+pub fn get_line_decoration_size(
+    rect_size: &LayoutSize,
+    orientation: LineOrientation,
+    style: LineStyle,
+    wavy_line_thickness: f32,
+) -> Option<LayoutSize> {
+    let h = match orientation {
+        LineOrientation::Horizontal => rect_size.height,
+        LineOrientation::Vertical => rect_size.width,
+    };
+
+    // TODO(gw): The formulae below are based on the existing gecko and line
+    //           shader code. They give reasonable results for most inputs,
+    //           but could definitely do with a detailed pass to get better
+    //           quality on a wider range of inputs!
+    //           See nsCSSRendering::PaintDecorationLine in Gecko.
+
+    let (parallel, perpendicular) = match style {
+        LineStyle::Solid => {
+            return None;
+        }
+        LineStyle::Dashed => {
+            let dash_length = (3.0 * h).min(64.0).max(1.0);
+
+            (2.0 * dash_length, 4.0)
+        }
+        LineStyle::Dotted => {
+            let diameter = h.min(64.0).max(1.0);
+            let period = 2.0 * diameter;
+
+            (period, diameter)
+        }
+        LineStyle::Wavy => {
+            let line_thickness = wavy_line_thickness.max(1.0);
+            let slope_length = h - line_thickness;
+            let flat_length = ((line_thickness - 1.0) * 2.0).max(1.0);
+            let approx_period = 2.0 * (slope_length + flat_length);
+
+            (approx_period, h)
+        }
+    };
+
+    Some(match orientation {
+        LineOrientation::Horizontal => LayoutSize::new(parallel, perpendicular),
+        LineOrientation::Vertical => LayoutSize::new(perpendicular, parallel),
+    })
+}
+
+fn update_opacity_binding(
+    opacity_bindings: &mut OpacityBindingStorage,
+    opacity_binding_index: OpacityBindingIndex,
+    scene_properties: &SceneProperties,
+) {
+    if opacity_binding_index != OpacityBindingIndex::INVALID {
+        let binding = &mut opacity_bindings[opacity_binding_index];
+        binding.update(scene_properties);
+    }
 }
 
 /// Trait for primitives that are directly internable.
@@ -1414,8 +4520,8 @@ fn test_struct_sizes() {
     //     test expectations and move on.
     // (b) You made a structure larger. This is not necessarily a problem, but should only
     //     be done with care, and after checking if talos performance regresses badly.
-    assert_eq!(mem::size_of::<PrimitiveInstance>(), 152, "PrimitiveInstance size changed");
-    assert_eq!(mem::size_of::<PrimitiveInstanceKind>(), 24, "PrimitiveInstanceKind size changed");
+    assert_eq!(mem::size_of::<PrimitiveInstance>(), 80, "PrimitiveInstance size changed");
+    assert_eq!(mem::size_of::<PrimitiveInstanceKind>(), 40, "PrimitiveInstanceKind size changed");
     assert_eq!(mem::size_of::<PrimitiveTemplate>(), 56, "PrimitiveTemplate size changed");
     assert_eq!(mem::size_of::<PrimitiveTemplateKind>(), 28, "PrimitiveTemplateKind size changed");
     assert_eq!(mem::size_of::<PrimitiveKey>(), 36, "PrimitiveKey size changed");
diff --git a/third_party/webrender/webrender/src/prim_store/picture.rs b/third_party/webrender/webrender/src/prim_store/picture.rs
index d0815cdac87..46a52e0a807 100644
--- a/third_party/webrender/webrender/src/prim_store/picture.rs
+++ b/third_party/webrender/webrender/src/prim_store/picture.rs
@@ -60,7 +60,7 @@ pub enum FilterPrimitiveKey {
     Identity(ColorSpace, FilterPrimitiveInput),
     Flood(ColorSpace, ColorU),
     Blend(ColorSpace, MixBlendMode, FilterPrimitiveInput, FilterPrimitiveInput),
-    Blur(ColorSpace, Au, Au, FilterPrimitiveInput),
+    Blur(ColorSpace, Au, FilterPrimitiveInput),
     Opacity(ColorSpace, Au, FilterPrimitiveInput),
     ColorMatrix(ColorSpace, [Au; 20], FilterPrimitiveInput),
     DropShadow(ColorSpace, (VectorKey, Au, ColorU), FilterPrimitiveInput),
@@ -79,7 +79,7 @@ pub enum PictureCompositeKey {
     Identity,
 
     // FilterOp
-    Blur(Au, Au),
+    Blur(Au),
     Brightness(Au),
     Contrast(Au),
     Grayscale(Au),
@@ -140,8 +140,7 @@ impl From<Option<PictureCompositeMode>> for PictureCompositeKey {
             }
             Some(PictureCompositeMode::Filter(op)) => {
                 match op {
-                    Filter::Blur(width, height) =>
-                        PictureCompositeKey::Blur(Au::from_f32_px(width), Au::from_f32_px(height)),
+                    Filter::Blur(value) => PictureCompositeKey::Blur(Au::from_f32_px(value)),
                     Filter::Brightness(value) => PictureCompositeKey::Brightness(Au::from_f32_px(value)),
                     Filter::Contrast(value) => PictureCompositeKey::Contrast(Au::from_f32_px(value)),
                     Filter::Grayscale(value) => PictureCompositeKey::Grayscale(Au::from_f32_px(value)),
@@ -189,8 +188,7 @@ impl From<Option<PictureCompositeMode>> for PictureCompositeKey {
                         FilterPrimitiveKind::Identity(identity) => FilterPrimitiveKey::Identity(primitive.color_space, identity.input),
                         FilterPrimitiveKind::Blend(blend) => FilterPrimitiveKey::Blend(primitive.color_space, blend.mode, blend.input1, blend.input2),
                         FilterPrimitiveKind::Flood(flood) => FilterPrimitiveKey::Flood(primitive.color_space, flood.color.into()),
-                        FilterPrimitiveKind::Blur(blur) =>
-                            FilterPrimitiveKey::Blur(primitive.color_space, Au::from_f32_px(blur.width), Au::from_f32_px(blur.height), blur.input),
+                        FilterPrimitiveKind::Blur(blur) => FilterPrimitiveKey::Blur(primitive.color_space, Au::from_f32_px(blur.radius), blur.input),
                         FilterPrimitiveKind::Opacity(opacity) =>
                             FilterPrimitiveKey::Opacity(primitive.color_space, Au::from_f32_px(opacity.opacity), opacity.input),
                         FilterPrimitiveKind::ColorMatrix(color_matrix) => {
@@ -277,7 +275,6 @@ impl Internable for Picture {
     type Key = PictureKey;
     type StoreData = PictureTemplate;
     type InternData = ();
-    const PROFILE_COUNTER: usize = crate::profiler::INTERNED_PICTURES;
 }
 
 impl InternablePrimitive for Picture {
diff --git a/third_party/webrender/webrender/src/prim_store/text_run.rs b/third_party/webrender/webrender/src/prim_store/text_run.rs
index 2affe657b11..a5c96e3c11d 100644
--- a/third_party/webrender/webrender/src/prim_store/text_run.rs
+++ b/third_party/webrender/webrender/src/prim_store/text_run.rs
@@ -3,28 +3,26 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{ColorF, FontInstanceFlags, GlyphInstance, RasterSpace, Shadow};
-use api::units::{LayoutToWorldTransform, LayoutVector2D};
+use api::units::{LayoutToWorldTransform, LayoutVector2D, PictureRect};
 use crate::scene_building::{CreateShadow, IsVisible};
 use crate::frame_builder::FrameBuildingState;
 use crate::glyph_rasterizer::{FontInstance, FontTransform, GlyphKey, FONT_SIZE_LIMIT};
 use crate::gpu_cache::GpuCache;
 use crate::intern;
 use crate::internal_types::LayoutPrimitiveInfo;
-use crate::picture::SurfaceInfo;
+use crate::picture::{SubpixelMode, SurfaceInfo};
 use crate::prim_store::{PrimitiveOpacity,  PrimitiveScratchBuffer};
 use crate::prim_store::{PrimitiveStore, PrimKeyCommonData, PrimTemplateCommonData};
+use crate::render_task_graph::RenderTaskGraph;
 use crate::renderer::{MAX_VERTEX_TEXTURE_WIDTH};
 use crate::resource_cache::{ResourceCache};
 use crate::util::{MatrixHelpers};
-use crate::prim_store::{InternablePrimitive, PrimitiveInstanceKind};
-use crate::spatial_tree::{SpatialTree, SpatialNodeIndex, ROOT_SPATIAL_NODE_INDEX};
-use crate::space::SpaceSnapper;
-use crate::util::PrimaryArc;
-
+use crate::prim_store::{InternablePrimitive, PrimitiveInstanceKind, SpaceSnapper};
+use crate::spatial_tree::{SpatialTree, SpatialNodeIndex};
 use std::ops;
 use std::sync::Arc;
-
-use super::storage;
+use crate::storage;
+use crate::util::PrimaryArc;
 
 /// A run of glyphs, with associated font information.
 #[cfg_attr(feature = "capture", derive(Serialize))]
@@ -35,7 +33,6 @@ pub struct TextRunKey {
     pub font: FontInstance,
     pub glyphs: PrimaryArc<Vec<GlyphInstance>>,
     pub shadow: bool,
-    pub requested_raster_space: RasterSpace,
 }
 
 impl TextRunKey {
@@ -48,7 +45,6 @@ impl TextRunKey {
             font: text_run.font,
             glyphs: PrimaryArc(text_run.glyphs),
             shadow: text_run.shadow,
-            requested_raster_space: text_run.requested_raster_space,
         }
     }
 }
@@ -148,14 +144,12 @@ pub struct TextRun {
     #[ignore_malloc_size_of = "Measured via PrimaryArc"]
     pub glyphs: Arc<Vec<GlyphInstance>>,
     pub shadow: bool,
-    pub requested_raster_space: RasterSpace,
 }
 
 impl intern::Internable for TextRun {
     type Key = TextRunKey;
     type StoreData = TextRunTemplate;
     type InternData = ();
-    const PROFILE_COUNTER: usize = crate::profiler::INTERNED_TEXT_RUNS;
 }
 
 impl InternablePrimitive for TextRun {
@@ -181,8 +175,7 @@ impl InternablePrimitive for TextRun {
             reference_frame_relative_offset,
             snapped_reference_frame_relative_offset: reference_frame_relative_offset,
             shadow: key.shadow,
-            raster_scale: 1.0,
-            requested_raster_space: key.requested_raster_space,
+            raster_space: RasterSpace::Screen,
         });
 
         PrimitiveInstanceKind::TextRun{ data_handle, run_index }
@@ -190,12 +183,7 @@ impl InternablePrimitive for TextRun {
 }
 
 impl CreateShadow for TextRun {
-    fn create_shadow(
-        &self,
-        shadow: &Shadow,
-        blur_is_noop: bool,
-        current_raster_space: RasterSpace,
-    ) -> Self {
+    fn create_shadow(&self, shadow: &Shadow) -> Self {
         let mut font = FontInstance {
             color: shadow.color.into(),
             ..self.font.clone()
@@ -204,17 +192,10 @@ impl CreateShadow for TextRun {
             font.disable_subpixel_aa();
         }
 
-        let requested_raster_space = if blur_is_noop {
-            current_raster_space
-        } else {
-            RasterSpace::Local(1.0)
-        };
-
         TextRun {
             font,
             glyphs: self.glyphs.clone(),
             shadow: true,
-            requested_raster_space,
         }
     }
 }
@@ -233,8 +214,7 @@ pub struct TextRunPrimitive {
     pub reference_frame_relative_offset: LayoutVector2D,
     pub snapped_reference_frame_relative_offset: LayoutVector2D,
     pub shadow: bool,
-    pub raster_scale: f32,
-    pub requested_raster_space: RasterSpace,
+    pub raster_space: RasterSpace,
 }
 
 impl TextRunPrimitive {
@@ -244,8 +224,9 @@ impl TextRunPrimitive {
         surface: &SurfaceInfo,
         spatial_node_index: SpatialNodeIndex,
         transform: &LayoutToWorldTransform,
-        mut allow_subpixel: bool,
+        subpixel_mode: &SubpixelMode,
         raster_space: RasterSpace,
+        prim_rect: PictureRect,
         root_scaling_factor: f32,
         spatial_tree: &SpatialTree,
     ) -> bool {
@@ -255,8 +236,8 @@ impl TextRunPrimitive {
         //           will implicitly be part of the device pixel ratio for
         //           the (cached) local space surface, and so this code
         //           will no longer be required.
-        let raster_scale = raster_space.local_scale().unwrap_or(1.0).max(0.001);
 
+        let raster_scale = raster_space.local_scale().unwrap_or(1.0).max(0.001);
         // root_scaling_factor is used to scale very large pictures that establish
         // a raster root back to something sane, thus scale the device size accordingly.
         // to the shader it looks like a change in DPI which it already supports.
@@ -282,7 +263,7 @@ impl TextRunPrimitive {
         let font_transform = if transform_glyphs {
             // Get the font transform matrix (skew / scale) from the complete transform.
             // Fold in the device pixel scale.
-            self.raster_scale = 1.0;
+            self.raster_space = RasterSpace::Screen;
             FontTransform::from(transform)
         } else {
             if oversized {
@@ -294,12 +275,12 @@ impl TextRunPrimitive {
 
                 // Record the raster space the text needs to be snapped in. The original raster
                 // scale would have been too big.
-                self.raster_scale = limited_raster_scale;
+                self.raster_space = RasterSpace::Local(limited_raster_scale);
             } else {
                 // Record the raster space the text needs to be snapped in. We may have changed
                 // from RasterSpace::Screen due to a transform with perspective or without a 2d
                 // inverse, or it may have been RasterSpace::Local all along.
-                self.raster_scale = raster_scale;
+                self.raster_space = RasterSpace::Local(raster_scale);
             }
 
             // Rasterize the glyph without any transform
@@ -353,6 +334,21 @@ impl TextRunPrimitive {
             ..specified_font.clone()
         };
 
+        // If subpixel AA is disabled due to the backing surface the glyphs
+        // are being drawn onto, disable it (unless we are using the
+        // specifial subpixel mode that estimates background color).
+        let mut allow_subpixel = match subpixel_mode {
+            SubpixelMode::Allow => true,
+            SubpixelMode::Deny => false,
+            SubpixelMode::Conditional { allowed_rect, excluded_rects } => {
+                // Conditional mode allows subpixel AA to be enabled for this
+                // text run, so long as it doesn't intersect with any of the
+                // cutout rectangles in the list, and it's inside the allowed rect.
+                allowed_rect.contains_rect(&prim_rect) &&
+                excluded_rects.iter().all(|rect| !rect.intersects(&prim_rect))
+            }
+        };
+
         // If we are using special estimated background subpixel blending, then
         // we can allow it regardless of what the surface says.
         allow_subpixel |= self.used_font.bg_color.a != 0;
@@ -374,59 +370,32 @@ impl TextRunPrimitive {
         cache_dirty
     }
 
-    /// Gets the raster space to use when rendering this primitive.
-    /// Usually this would be the requested raster space. However, if
-    /// the primitive's spatial node or one of its ancestors is being pinch zoomed
-    /// then we round it. This prevents us rasterizing glyphs for every minor
-    /// change in zoom level, as that would be too expensive.
-    fn get_raster_space_for_prim(
-        &self,
-        prim_spatial_node_index: SpatialNodeIndex,
-        spatial_tree: &SpatialTree,
-    ) -> RasterSpace {
-        let prim_spatial_node = &spatial_tree.spatial_nodes[prim_spatial_node_index.0 as usize];
-        if prim_spatial_node.is_ancestor_or_self_zooming {
-            let scale_factors = spatial_tree
-                .get_relative_transform(prim_spatial_node_index, ROOT_SPATIAL_NODE_INDEX)
-                .scale_factors();
-
-            // Round the scale up to the nearest power of 2, but don't exceed 8.
-            let scale = scale_factors.0.max(scale_factors.1).min(8.0);
-            let rounded_up = 2.0f32.powf(scale.log2().ceil());
-
-            RasterSpace::Local(rounded_up)
-        } else {
-            self.requested_raster_space
-        }
-    }
-
     pub fn request_resources(
         &mut self,
         prim_offset: LayoutVector2D,
+        prim_rect: PictureRect,
         specified_font: &FontInstance,
         glyphs: &[GlyphInstance],
         transform: &LayoutToWorldTransform,
         surface: &SurfaceInfo,
         spatial_node_index: SpatialNodeIndex,
+        raster_space: RasterSpace,
         root_scaling_factor: f32,
-        allow_subpixel: bool,
+        subpixel_mode: &SubpixelMode,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
+        render_tasks: &mut RenderTaskGraph,
         spatial_tree: &SpatialTree,
         scratch: &mut PrimitiveScratchBuffer,
     ) {
-        let raster_space = self.get_raster_space_for_prim(
-            spatial_node_index,
-            spatial_tree,
-        );
-
         let cache_dirty = self.update_font_instance(
             specified_font,
             surface,
             spatial_node_index,
             transform,
-            allow_subpixel,
+            subpixel_mode,
             raster_space,
+            prim_rect,
             root_scaling_factor,
             spatial_tree,
         );
@@ -435,7 +404,7 @@ impl TextRunPrimitive {
             let subpx_dir = self.used_font.get_subpx_dir();
 
             let dps = surface.device_pixel_scale.0 * root_scaling_factor;
-            let transform = match raster_space {
+            let transform = match self.raster_space {
                 RasterSpace::Local(scale) => FontTransform::new(scale * dps, 0.0, 0.0, scale * dps),
                 RasterSpace::Screen => self.used_font.transform.scale(dps),
             };
@@ -452,6 +421,7 @@ impl TextRunPrimitive {
             self.used_font.clone(),
             &scratch.glyph_keys[self.glyph_keys_range],
             gpu_cache,
+            render_tasks,
         );
     }
 }
@@ -467,8 +437,8 @@ fn test_struct_sizes() {
     //     test expectations and move on.
     // (b) You made a structure larger. This is not necessarily a problem, but should only
     //     be done with care, and after checking if talos performance regresses badly.
-    assert_eq!(mem::size_of::<TextRun>(), 64, "TextRun size changed");
+    assert_eq!(mem::size_of::<TextRun>(), 56, "TextRun size changed");
     assert_eq!(mem::size_of::<TextRunTemplate>(), 80, "TextRunTemplate size changed");
-    assert_eq!(mem::size_of::<TextRunKey>(), 80, "TextRunKey size changed");
+    assert_eq!(mem::size_of::<TextRunKey>(), 72, "TextRunKey size changed");
     assert_eq!(mem::size_of::<TextRunPrimitive>(), 80, "TextRunPrimitive size changed");
 }
diff --git a/third_party/webrender/webrender/src/profiler.rs b/third_party/webrender/webrender/src/profiler.rs
index 702fa634ffb..cd4d6f5069c 100644
--- a/third_party/webrender/webrender/src/profiler.rs
+++ b/third_party/webrender/webrender/src/profiler.rs
@@ -2,772 +2,1093 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-//! # Overlay profiler
-//!
-//! ## Profiler UI string syntax
-//!
-//! Comma-separated list of of tokens with trailing and leading spaces trimmed.
-//! Each tokens can be:
-//! - A counter name with an optional prefix. The name corresponds to the displayed name (see the
-//!   counters vector below.
-//!   - By default (no prefix) the counter is shown as average + max over half a second.
-//!   - With a '#' prefix the counter is shown as a graph.
-//!   - With a '*' prefix the counter is shown as a change indicator.
-//!   - Some special counters such as GPU time queries have specific visualizations ignoring prefixes.
-//! - A preset name to append the preset to the UI (see PROFILER_PRESETS).
-//! - An empty token to insert a bit of vertical space.
-//! - A '|' token to start a new column.
-//! - A '_' token to start a new row.
-
 use api::{ColorF, ColorU};
-use crate::renderer::DebugRenderer;
-use crate::device::query::GpuTimer;
+use crate::debug_render::DebugRenderer;
+use crate::device::query::{GpuSampler, GpuTimer, NamedTag};
 use euclid::{Point2D, Rect, Size2D, vec2, default};
 use crate::internal_types::FastHashMap;
-use crate::renderer::{FullFrameStats, MAX_VERTEX_TEXTURE_WIDTH, wr_has_been_initialized};
-use api::units::DeviceIntSize;
+use crate::renderer::{MAX_VERTEX_TEXTURE_WIDTH, wr_has_been_initialized};
 use std::collections::vec_deque::VecDeque;
-use std::fmt::{Write, Debug};
-use std::f32;
+use std::{f32, mem};
 use std::ffi::CStr;
 use std::ops::Range;
 use std::time::Duration;
 use time::precise_time_ns;
 
-macro_rules! set_text {
-    ($dst:expr, $($arg:tt)*) => {
-        $dst.clear();
-        write!($dst, $($arg)*).unwrap();
-    };
+pub mod expected {
+    use std::ops::Range;
+    pub const AVG_BACKEND_CPU_TIME: Range<f64> =    0.0..3.0;
+    pub const MAX_BACKEND_CPU_TIME: Range<f64> =    0.0..6.0;
+    pub const AVG_RENDERER_CPU_TIME: Range<f64> =   0.0..5.0;
+    pub const MAX_RENDERER_CPU_TIME: Range<f64> =   0.0..10.0;
+    pub const AVG_IPC_TIME: Range<f64> =            0.0..2.0;
+    pub const MAX_IPC_TIME: Range<f64> =            0.0..4.0;
+    pub const AVG_GPU_TIME: Range<f64> =            0.0..8.0;
+    pub const MAX_GPU_TIME: Range<f64> =            0.0..15.0;
+    pub const DRAW_CALLS: Range<u64> =              1..100;
+    pub const VERTICES: Range<u64> =                10..25_000;
+    pub const TOTAL_PRIMITIVES: Range<u64> =        1..5000;
+    pub const VISIBLE_PRIMITIVES: Range<u64> =      1..5000;
+    pub const USED_TARGETS: Range<u64> =            1..4;
+    pub const COLOR_PASSES: Range<u64> =            1..4;
+    pub const ALPHA_PASSES: Range<u64> =            0..3;
+    pub const RENDERED_PICTURE_CACHE_TILES: Range<u64> = 0..5;
+    pub const TOTAL_PICTURE_CACHE_TILES: Range<u64> = 0..15;
+    pub const CREATED_TARGETS: Range<u64> =         0..3;
+    pub const CHANGED_TARGETS: Range<u64> =         0..3;
+    pub const TEXTURE_DATA_UPLOADED: Range<u64> =   0..10;
+    pub const GPU_CACHE_ROWS_TOTAL: Range<u64> =    1..50;
+    pub const GPU_CACHE_ROWS_UPDATED: Range<u64> =  0..25;
+    pub const GPU_CACHE_BLOCKS_TOTAL: Range<u64> =  1..65_000;
+    pub const GPU_CACHE_BLOCKS_UPDATED: Range<u64> = 0..1000;
+    pub const GPU_CACHE_BLOCKS_SAVED: Range<u64> =  0..50_000;
+    pub const DISPLAY_LIST_BUILD_TIME: Range<f64> = 0.0..3.0;
+    pub const MAX_SCENE_BUILD_TIME: Range<f64> = 0.0..3.0;
+    pub const DISPLAY_LIST_SEND_TIME: Range<f64> =  0.0..1.0;
+    pub const DISPLAY_LIST_TOTAL_TIME: Range<f64> = 0.0..4.0;
+    pub const NUM_FONT_TEMPLATES: Range<usize> =    0..50;
+    pub const FONT_TEMPLATES_MB: Range<f32> =       0.0..40.0;
+    pub const NUM_IMAGE_TEMPLATES: Range<usize> =   0..20;
+    pub const IMAGE_TEMPLATES_MB: Range<f32> =      0.0..10.0;
+    pub const DISPLAY_LIST_MB: Range<f32> =         0.0..0.2;
+    pub const NUM_RASTERIZED_BLOBS: Range<usize> =  0..25; // in tiles
+    pub const RASTERIZED_BLOBS_MB: Range<f32> =     0.0..4.0;
 }
 
 const GRAPH_WIDTH: f32 = 1024.0;
 const GRAPH_HEIGHT: f32 = 320.0;
 const GRAPH_PADDING: f32 = 8.0;
 const GRAPH_FRAME_HEIGHT: f32 = 16.0;
-const PROFILE_SPACING: f32 = 15.0;
-const PROFILE_PADDING: f32 = 10.0;
-const BACKGROUND_COLOR: ColorU = ColorU { r: 20, g: 20, b: 20, a: 220 };
-
-const ONE_SECOND_NS: u64 = 1_000_000_000;
-
-/// Profiler UI string presets. Defined in the profiler UI string syntax, can contain other presets.
-static PROFILER_PRESETS: &'static[(&'static str, &'static str)] = &[
-    // Default view, doesn't show everything, but still shows quite a bit.
-    (&"Default", &"FPS,|,Slow indicators,_,Time graphs,|,Frame times, ,Transaction times, ,Frame stats, ,Memory, ,Interners,_,GPU time queries,_,Paint phase graph"),
-    // Smaller, less intrusive overview
-    (&"Compact", &"FPS, ,Frame times, ,Frame stats"),
-    // Even less intrusive, only slow transactions and frame indicators.
-    (&"Slow indicators", &"*Slow transaction,*Slow frame"),
-
-    // Counters:
-
-    // Timing information for per layout transaction stages.
-    (&"Transaction times", &"DisplayList,Scene building,Content send,API send"),
-    // Timing information for per-frame stages.
-    (&"Frame times", &"Frame CPU total,Frame building,Visibility,Prepare,Batching,Glyph resolve,Texture cache update,Renderer,GPU"),
-    // Stats about the content of the frame.
-    (&"Frame stats", &"Primitives,Visible primitives,Draw calls,Vertices,Color passes,Alpha passes,Rendered picture tiles,Rasterized glyphs"),
-    // Texture cache allocation stats.
-    (&"Texture cache stats", &"Texture cache RGBA8 linear textures, Texture cache RGBA8 linear pixels, Texture cache RGBA8 linear pressure,
-        , ,Texture cache RGBA8 glyphs textures, Texture cache RGBA8 glyphs pixels, Texture cache RGBA8 glyphs pressure,
-        , ,Texture cache A8 glyphs textures, Texture cache A8 glyphs pixels, Texture cache A8 glyphs pressure,
-        , ,Texture cache A8 textures, Texture cache A8 pixels, Texture cache A8 pressure,
-        , ,Texture cache A16 textures, Texture cache A16 pixels, Texture cache A16 pressure,
-        , ,Texture cache RGBA8 nearest textures, Texture cache RGBA8 nearest pixels, Texture cache RGBA8 nearest pressure,
-        , ,Texture cache shared mem, Texture cache standalone mem, Texture cache standalone pressure,
-        , ,Texture cache eviction count, Texture cache youngest evicted"
-    ),
-    // Graphs to investigate driver overhead of texture cache updates.
-    (&"Texture upload perf", &"#Texture cache update,#Texture cache upload, ,#Staging CPU allocation,#Staging GPU allocation,#Staging CPU copy,#Staging GPU copy,#Upload time, ,#Upload copy batches,#Rasterized glyphs, ,#Cache texture creation,#Cache texture deletion"),
-
-    // Graphs:
-
-    // Graph overview of time spent in WebRender's main stages.
-    (&"Time graphs", &"#DisplayList,#Scene building,#Blob rasterization, ,#Frame CPU total,#Frame building,#Renderer,#Texture cache update, ,#GPU,"),
-    // Useful when investigating render backend bottlenecks.
-    (&"Backend graphs", &"#Frame building, #Visibility, #Prepare, #Batching, #Glyph resolve"),
-    // Useful when investigating renderer bottlenecks.
-    (&"Renderer graphs", &"#Rendered picture tiles,#Draw calls,#Rasterized glyphs,#Texture uploads,#Texture uploads mem, ,#Texture cache update,#Renderer,"),
-
-    // Misc:
-
-    (&"Memory", &"Image templates,Image templates mem,Font templates,Font templates mem,DisplayList mem,Picture tiles mem"),
-    (&"Interners", "Interned primitives,Interned clips,Interned pictures,Interned text runs,Interned normal borders,Interned image borders,Interned images,Interned YUV images,Interned line decorations,Interned linear gradients,Interned radial gradients,Interned conic gradients,Interned filter data,Interned backdrops"),
-    // Gpu sampler queries (need the pref gfx.webrender.debug.gpu-sampler-queries).
-    (&"GPU samplers", &"Alpha targets samplers,Transparent pass samplers,Opaque pass samplers,Total samplers"),
-];
-
-fn find_preset(name: &str) -> Option<&'static str> {
-    for preset in PROFILER_PRESETS {
-        if preset.0 == name {
-            return Some(preset.1);
-        }
-    }
+const PROFILE_PADDING: f32 = 8.0;
 
-    None
+const ONE_SECOND_NS: u64 = 1000000000;
+const AVERAGE_OVER_NS: u64 = ONE_SECOND_NS / 2;
+
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub enum ProfileStyle {
+    Full,
+    Compact,
+    Smart,
 }
 
-// The indices here must match the PROFILE_COUNTERS array (checked at runtime).
-pub const FRAME_BUILDING_TIME: usize = 0;
-pub const FRAME_VISIBILITY_TIME: usize = 1;
-pub const FRAME_PREPARE_TIME: usize = 2;
-pub const FRAME_BATCHING_TIME: usize = 3;
-
-pub const RENDERER_TIME: usize = 4;
-pub const TOTAL_FRAME_CPU_TIME: usize = 5;
-pub const GPU_TIME: usize = 6;
-
-pub const CONTENT_SEND_TIME: usize = 7;
-pub const API_SEND_TIME: usize = 8;
-
-pub const DISPLAY_LIST_BUILD_TIME: usize = 9;
-pub const DISPLAY_LIST_MEM: usize = 10;
-
-pub const SCENE_BUILD_TIME: usize = 11;
-
-pub const RASTERIZED_BLOBS: usize = 12;
-pub const RASTERIZED_BLOB_TILES: usize = 13;
-pub const RASTERIZED_BLOBS_PX: usize = 14;
-pub const BLOB_RASTERIZATION_TIME: usize = 15;
-
-pub const RASTERIZED_GLYPHS: usize = 16;
-pub const GLYPH_RESOLVE_TIME: usize = 17;
-
-pub const DRAW_CALLS: usize = 18;
-pub const VERTICES: usize = 19;
-pub const PRIMITIVES: usize = 20;
-pub const VISIBLE_PRIMITIVES: usize = 21;
-
-pub const USED_TARGETS: usize = 22;
-pub const CREATED_TARGETS: usize = 23;
-pub const PICTURE_CACHE_SLICES: usize = 24;
-
-pub const COLOR_PASSES: usize = 25;
-pub const ALPHA_PASSES: usize = 26;
-pub const PICTURE_TILES: usize = 27;
-pub const PICTURE_TILES_MEM: usize = 28;
-pub const RENDERED_PICTURE_TILES: usize = 29;
-pub const TEXTURE_UPLOADS: usize = 30;
-pub const TEXTURE_UPLOADS_MEM: usize = 31;
-
-pub const FONT_TEMPLATES: usize = 32;
-pub const FONT_TEMPLATES_MEM: usize = 33;
-pub const IMAGE_TEMPLATES: usize = 34;
-pub const IMAGE_TEMPLATES_MEM: usize = 35;
-
-pub const GPU_CACHE_ROWS_TOTAL: usize = 36;
-pub const GPU_CACHE_ROWS_UPDATED: usize = 37;
-pub const GPU_CACHE_BLOCKS_TOTAL: usize = 38;
-pub const GPU_CACHE_BLOCKS_UPDATED: usize = 39;
-pub const GPU_CACHE_BLOCKS_SAVED: usize = 40;
-
-pub const TEXTURE_CACHE_A8_PIXELS: usize = 41;
-pub const TEXTURE_CACHE_A8_TEXTURES: usize = 42;
-pub const TEXTURE_CACHE_A16_PIXELS: usize = 43;
-pub const TEXTURE_CACHE_A16_TEXTURES: usize = 44;
-pub const TEXTURE_CACHE_RGBA8_LINEAR_PIXELS: usize = 45;
-pub const TEXTURE_CACHE_RGBA8_LINEAR_TEXTURES: usize = 46;
-pub const TEXTURE_CACHE_RGBA8_NEAREST_PIXELS: usize = 47;
-pub const TEXTURE_CACHE_RGBA8_NEAREST_TEXTURES: usize = 48;
-pub const TEXTURE_CACHE_SHARED_MEM: usize = 49;
-pub const TEXTURE_CACHE_STANDALONE_MEM: usize = 50;
-
-pub const SLOW_FRAME: usize = 51;
-pub const SLOW_TXN: usize = 52;
-
-pub const GPU_CACHE_UPLOAD_TIME: usize = 53;
-pub const TEXTURE_CACHE_UPDATE_TIME: usize = 54;
-
-pub const FRAME_TIME: usize = 55;
-
-pub const ALPHA_TARGETS_SAMPLERS: usize = 56;
-pub const TRANSPARENT_PASS_SAMPLERS: usize = 57;
-pub const OPAQUE_PASS_SAMPLERS: usize = 58;
-pub const TOTAL_SAMPLERS: usize = 59;
-
-pub const INTERNED_PRIMITIVES: usize = 60;
-pub const INTERNED_CLIPS: usize = 61;
-pub const INTERNED_TEXT_RUNS: usize = 62;
-pub const INTERNED_NORMAL_BORDERS: usize = 63;
-pub const INTERNED_IMAGE_BORDERS: usize = 64;
-pub const INTERNED_IMAGES: usize = 65;
-pub const INTERNED_YUV_IMAGES: usize = 66;
-pub const INTERNED_LINE_DECORATIONS: usize = 67;
-pub const INTERNED_LINEAR_GRADIENTS: usize = 68;
-pub const INTERNED_RADIAL_GRADIENTS: usize = 69;
-pub const INTERNED_CONIC_GRADIENTS: usize = 70;
-pub const INTERNED_PICTURES: usize = 71;
-pub const INTERNED_FILTER_DATA: usize = 72;
-pub const INTERNED_BACKDROPS: usize = 73;
-pub const INTERNED_POLYGONS: usize = 74;
-
-pub const TEXTURE_CACHE_RGBA8_GLYPHS_PIXELS: usize = 75;
-pub const TEXTURE_CACHE_RGBA8_GLYPHS_TEXTURES: usize = 76;
-pub const TEXTURE_CACHE_A8_GLYPHS_PIXELS: usize = 77;
-pub const TEXTURE_CACHE_A8_GLYPHS_TEXTURES: usize = 78;
-
-pub const CPU_TEXTURE_ALLOCATION_TIME: usize = 79;
-pub const STAGING_TEXTURE_ALLOCATION_TIME: usize = 80;
-pub const UPLOAD_CPU_COPY_TIME: usize = 81;
-pub const UPLOAD_GPU_COPY_TIME: usize = 82;
-pub const UPLOAD_TIME: usize = 83;
-pub const UPLOAD_NUM_COPY_BATCHES: usize = 84;
-pub const TOTAL_UPLOAD_TIME: usize = 85;
-pub const CREATE_CACHE_TEXTURE_TIME: usize = 86;
-pub const DELETE_CACHE_TEXTURE_TIME: usize = 87;
-
-pub const TEXTURE_CACHE_COLOR8_LINEAR_PRESSURE: usize = 88;
-pub const TEXTURE_CACHE_COLOR8_NEAREST_PRESSURE: usize = 89;
-pub const TEXTURE_CACHE_COLOR8_GLYPHS_PRESSURE: usize = 90;
-pub const TEXTURE_CACHE_ALPHA8_PRESSURE: usize = 91;
-pub const TEXTURE_CACHE_ALPHA8_GLYPHS_PRESSURE: usize = 92;
-pub const TEXTURE_CACHE_ALPHA16_PRESSURE: usize = 93;
-pub const TEXTURE_CACHE_STANDALONE_PRESSURE: usize = 94;
-pub const TEXTURE_CACHE_EVICTION_COUNT: usize = 95;
-pub const TEXTURE_CACHE_YOUNGEST_EVICTION: usize = 96;
-
-pub const NUM_PROFILER_EVENTS: usize = 97;
+/// Defines the interface for hooking up an external profiler to WR.
+pub trait ProfilerHooks : Send + Sync {
+    /// Called at the beginning of a profile scope. The label must
+    /// be a C string (null terminated).
+    fn begin_marker(&self, label: &CStr);
 
-pub struct Profiler {
-    counters: Vec<Counter>,
-    gpu_frames: ProfilerFrameCollection,
-    frame_stats: ProfilerFrameCollection,
+    /// Called at the end of a profile scope. The label must
+    /// be a C string (null terminated).
+    fn end_marker(&self, label: &CStr);
 
-    start: u64,
-    avg_over_period: u64,
-    num_graph_samples: usize,
+    /// Called to mark an event happening. The label must
+    /// be a C string (null terminated).
+    fn event_marker(&self, label: &CStr);
 
-    // For FPS computation. Updated in update().
-    frame_timestamps_within_last_second: Vec<u64>,
+    /// Called with a duration to indicate a text marker that just ended. Text
+    /// markers allow different types of entries to be recorded on the same row
+    /// in the timeline, by adding labels to the entry.
+    ///
+    /// This variant is also useful when the caller only wants to record events
+    /// longer than a certain threshold, and thus they don't know in advance
+    /// whether the event will qualify.
+    fn add_text_marker(&self, label: &CStr, text: &str, duration: Duration);
 
-    ui: Vec<Item>,
+    /// Returns true if the current thread is being profiled.
+    fn thread_is_being_profiled(&self) -> bool;
 }
 
-impl Profiler {
-    pub fn new() -> Self {
+/// The current global profiler callbacks, if set by embedder.
+pub static mut PROFILER_HOOKS: Option<&'static dyn ProfilerHooks> = None;
 
-        fn float(name: &'static str, unit: &'static str, index: usize, expected: Expected<f64>) -> CounterDescriptor {
-            CounterDescriptor { name, unit, show_as: ShowAs::Float, index, expected }
+/// Set the profiler callbacks, or None to disable the profiler.
+/// This function must only ever be called before any WR instances
+/// have been created, or the hooks will not be set.
+pub fn set_profiler_hooks(hooks: Option<&'static dyn ProfilerHooks>) {
+    if !wr_has_been_initialized() {
+        unsafe {
+            PROFILER_HOOKS = hooks;
         }
+    }
+}
 
-        fn int(name: &'static str, unit: &'static str, index: usize, expected: Expected<i64>) -> CounterDescriptor {
-            CounterDescriptor { name, unit, show_as: ShowAs::Int, index, expected: expected.into_float() }
+/// A simple RAII style struct to manage a profile scope.
+pub struct ProfileScope {
+    name: &'static CStr,
+}
+
+/// Records a marker of the given duration that just ended.
+pub fn add_text_marker(label: &CStr, text: &str, duration: Duration) {
+    unsafe {
+        if let Some(ref hooks) = PROFILER_HOOKS {
+            hooks.add_text_marker(label, text, duration);
         }
+    }
+}
 
-        // Not in the list below:
-        // - "GPU time queries" shows the details of the GPU time queries if selected as a graph.
-        // - "GPU cache bars" shows some info about the GPU cache.
-
-        // TODO: This should be a global variable but to keep things readable we need to be able to
-        // use match in const fn which isn't supported by the current rustc version in gecko's build
-        // system.
-        let profile_counters = &[
-            float("Frame building", "ms", FRAME_BUILDING_TIME, expected(0.0..6.0).avg(0.0..3.0)),
-
-            float("Visibility", "ms", FRAME_VISIBILITY_TIME, expected(0.0..3.0).avg(0.0..2.0)),
-            float("Prepare", "ms", FRAME_PREPARE_TIME, expected(0.0..3.0).avg(0.0..2.0)),
-            float("Batching", "ms", FRAME_BATCHING_TIME, expected(0.0..3.0).avg(0.0..2.0)),
-
-            float("Renderer", "ms", RENDERER_TIME, expected(0.0..8.0).avg(0.0..5.0)),
-            float("Frame CPU total", "ms", TOTAL_FRAME_CPU_TIME, expected(0.0..15.0).avg(0.0..6.0)),
-            float("GPU", "ms", GPU_TIME, expected(0.0..15.0).avg(0.0..8.0)),
-
-            float("Content send", "ms", CONTENT_SEND_TIME, expected(0.0..1.0).avg(0.0..1.0)),
-            float("API send", "ms", API_SEND_TIME, expected(0.0..1.0).avg(0.0..0.4)),
-            float("DisplayList", "ms", DISPLAY_LIST_BUILD_TIME, expected(0.0..5.0).avg(0.0..3.0)),
-            float("DisplayList mem", "MB", DISPLAY_LIST_MEM, expected(0.0..20.0)),
-            float("Scene building", "ms", SCENE_BUILD_TIME, expected(0.0..4.0).avg(0.0..3.0)),
-
-            int("Rasterized blobs", "", RASTERIZED_BLOBS, expected(0..15)),
-            int("Rasterized blob tiles", "", RASTERIZED_BLOB_TILES, expected(0..15)),
-            int("Rasterized blob pixels", "px", RASTERIZED_BLOBS_PX, expected(0..300_000)),
-            float("Blob rasterization", "ms", BLOB_RASTERIZATION_TIME, expected(0.0..8.0)),
-
-            int("Rasterized glyphs", "", RASTERIZED_GLYPHS, expected(0..15)),
-            float("Glyph resolve", "ms", GLYPH_RESOLVE_TIME, expected(0.0..4.0)),
-
-            int("Draw calls", "", DRAW_CALLS, expected(1..120).avg(1..90)),
-            int("Vertices", "", VERTICES, expected(10..5000)),
-            int("Primitives", "", PRIMITIVES, expected(10..5000)),
-            int("Visible primitives", "", VISIBLE_PRIMITIVES, expected(1..5000)),
-
-            int("Used targets", "", USED_TARGETS, expected(1..4)),
-            int("Created targets", "", CREATED_TARGETS, expected(0..3)),
-            int("Picture cache slices", "", PICTURE_CACHE_SLICES, expected(0..5)),
-
-            int("Color passes", "", COLOR_PASSES, expected(1..4)),
-            int("Alpha passes", "", ALPHA_PASSES, expected(0..3)),
-            int("Picture tiles", "", PICTURE_TILES, expected(0..15)),
-            float("Picture tiles mem", "MB", PICTURE_TILES_MEM, expected(0.0..150.0)),
-            int("Rendered picture tiles", "", RENDERED_PICTURE_TILES, expected(0..5)),
-            int("Texture uploads", "", TEXTURE_UPLOADS, expected(0..10)),
-            float("Texture uploads mem", "MB", TEXTURE_UPLOADS_MEM, expected(0.0..10.0)),
-
-            int("Font templates", "", FONT_TEMPLATES, expected(0..40)),
-            float("Font templates mem", "MB", FONT_TEMPLATES_MEM, expected(0.0..20.0)),
-            int("Image templates", "", IMAGE_TEMPLATES, expected(0..100)),
-            float("Image templates mem", "MB", IMAGE_TEMPLATES_MEM, expected(0.0..50.0)),
-
-            int("GPU cache rows total", "", GPU_CACHE_ROWS_TOTAL, expected(1..50)),
-            int("GPU cache rows updated", "", GPU_CACHE_ROWS_UPDATED, expected(0..25)),
-            int("GPU blocks total", "", GPU_CACHE_BLOCKS_TOTAL, expected(1..65_000)),
-            int("GPU blocks updated", "", GPU_CACHE_BLOCKS_UPDATED, expected(0..1000)),
-            int("GPU blocks saved", "", GPU_CACHE_BLOCKS_SAVED, expected(0..50_000)),
-
-            int("Texture cache A8 pixels", "px", TEXTURE_CACHE_A8_PIXELS, expected(0..1_000_000)),
-            int("Texture cache A8 textures", "", TEXTURE_CACHE_A8_TEXTURES, expected(0..2)),
-            int("Texture cache A16 pixels", "px", TEXTURE_CACHE_A16_PIXELS, expected(0..260_000)),
-            int("Texture cache A16 textures", "", TEXTURE_CACHE_A16_TEXTURES, expected(0..2)),
-            int("Texture cache RGBA8 linear pixels", "px", TEXTURE_CACHE_RGBA8_LINEAR_PIXELS, expected(0..8_000_000)),
-            int("Texture cache RGBA8 linear textures", "", TEXTURE_CACHE_RGBA8_LINEAR_TEXTURES, expected(0..3)),
-            int("Texture cache RGBA8 nearest pixels", "px", TEXTURE_CACHE_RGBA8_NEAREST_PIXELS, expected(0..260_000)),
-            int("Texture cache RGBA8 nearest textures", "", TEXTURE_CACHE_RGBA8_NEAREST_TEXTURES, expected(0..2)),
-            float("Texture cache shared mem", "MB", TEXTURE_CACHE_SHARED_MEM, expected(0.0..100.0)),
-            float("Texture cache standalone mem", "MB", TEXTURE_CACHE_STANDALONE_MEM, expected(0.0..100.0)),
-
-
-            float("Slow frame", "", SLOW_FRAME, expected(0.0..0.0)),
-            float("Slow transaction", "", SLOW_TXN, expected(0.0..0.0)),
-
-            float("GPU cache upload", "ms", GPU_CACHE_UPLOAD_TIME, expected(0.0..2.0)),
-            float("Texture cache update", "ms", TEXTURE_CACHE_UPDATE_TIME, expected(0.0..3.0)),
-
-            float("Frame", "ms", FRAME_TIME, Expected::none()),
-
-            float("Alpha targets samplers", "%", ALPHA_TARGETS_SAMPLERS, Expected::none()),
-            float("Transparent pass samplers", "%", TRANSPARENT_PASS_SAMPLERS, Expected::none()),
-            float("Opaque pass samplers", "%", OPAQUE_PASS_SAMPLERS, Expected::none()),
-            float("Total samplers", "%", TOTAL_SAMPLERS, Expected::none()),
-
-            int("Interned primitives", "", INTERNED_PRIMITIVES, Expected::none()),
-            int("Interned clips", "", INTERNED_CLIPS, Expected::none()),
-            int("Interned text runs", "", INTERNED_TEXT_RUNS, Expected::none()),
-            int("Interned normal borders", "", INTERNED_NORMAL_BORDERS, Expected::none()),
-            int("Interned image borders", "", INTERNED_IMAGE_BORDERS, Expected::none()),
-            int("Interned images", "", INTERNED_IMAGES, Expected::none()),
-            int("Interned YUV images", "", INTERNED_YUV_IMAGES, Expected::none()),
-            int("Interned line decorations", "", INTERNED_LINE_DECORATIONS, Expected::none()),
-            int("Interned linear gradients", "", INTERNED_LINEAR_GRADIENTS, Expected::none()),
-            int("Interned radial gradients", "", INTERNED_RADIAL_GRADIENTS, Expected::none()),
-            int("Interned conic gradients", "", INTERNED_CONIC_GRADIENTS, Expected::none()),
-            int("Interned pictures", "", INTERNED_PICTURES, Expected::none()),
-            int("Interned filter data", "", INTERNED_FILTER_DATA, Expected::none()),
-            int("Interned backdrops", "", INTERNED_BACKDROPS, Expected::none()),
-            int("Interned polygons", "", INTERNED_POLYGONS, Expected::none()),
-
-            int("Texture cache RGBA8 glyphs pixels", "px", TEXTURE_CACHE_RGBA8_GLYPHS_PIXELS, expected(0..4_000_000)),
-            int("Texture cache RGBA8 glyphs textures", "", TEXTURE_CACHE_RGBA8_GLYPHS_TEXTURES, expected(0..2)),
-            int("Texture cache A8 glyphs pixels", "px", TEXTURE_CACHE_A8_GLYPHS_PIXELS, expected(0..4_000_000)),
-            int("Texture cache A8 glyphs textures", "", TEXTURE_CACHE_A8_GLYPHS_TEXTURES, expected(0..2)),
-
-            float("Staging CPU allocation", "ms", CPU_TEXTURE_ALLOCATION_TIME, Expected::none()),
-            float("Staging GPU allocation", "ms", STAGING_TEXTURE_ALLOCATION_TIME, Expected::none()),
-            float("Staging CPU copy", "ms", UPLOAD_CPU_COPY_TIME, Expected::none()),
-            float("Staging GPU copy", "ms", UPLOAD_GPU_COPY_TIME, Expected::none()),
-            float("Upload time", "ms", UPLOAD_TIME, Expected::none()),
-            int("Upload copy batches", "", UPLOAD_NUM_COPY_BATCHES, Expected::none()),
-            float("Texture cache upload", "ms", TOTAL_UPLOAD_TIME, expected(0.0..5.0)),
-            float("Cache texture creation", "ms", CREATE_CACHE_TEXTURE_TIME, expected(0.0..2.0)),
-            float("Cache texture deletion", "ms", DELETE_CACHE_TEXTURE_TIME, expected(0.0..1.0)),
-
-            float("Texture cache RGBA8 linear pressure", "", TEXTURE_CACHE_COLOR8_LINEAR_PRESSURE, expected(0.0..1.0)),
-            float("Texture cache RGBA8 nearest pressure", "", TEXTURE_CACHE_COLOR8_NEAREST_PRESSURE, expected(0.0..1.0)),
-            float("Texture cache RGBA8 glyphs pressure", "", TEXTURE_CACHE_COLOR8_GLYPHS_PRESSURE, expected(0.0..1.0)),
-            float("Texture cache A8 pressure", "", TEXTURE_CACHE_ALPHA8_PRESSURE, expected(0.0..1.0)),
-            float("Texture cache A8 glyphs pressure", "", TEXTURE_CACHE_ALPHA8_GLYPHS_PRESSURE, expected(0.0..1.0)),
-            float("Texture cache A16 pressure", "", TEXTURE_CACHE_ALPHA16_PRESSURE, expected(0.0..1.0)),
-            float("Texture cache standalone pressure", "", TEXTURE_CACHE_STANDALONE_PRESSURE, expected(0.0..1.0)),
-            int("Texture cache eviction count", "items", TEXTURE_CACHE_EVICTION_COUNT, Expected::none()),
-            int("Texture cache youngest evicted", "frames", TEXTURE_CACHE_YOUNGEST_EVICTION, Expected::none()),
-        ];
+/// Records a marker of the given duration that just ended.
+pub fn add_event_marker(label: &CStr) {
+    unsafe {
+        if let Some(ref hooks) = PROFILER_HOOKS {
+            hooks.event_marker(label);
+        }
+    }
+}
 
-        let mut counters = Vec::with_capacity(profile_counters.len());
+/// Returns true if the current thread is being profiled.
+pub fn thread_is_being_profiled() -> bool {
+    unsafe {
+        PROFILER_HOOKS.map_or(false, |h| h.thread_is_being_profiled())
+    }
+}
 
-        for (idx, descriptor) in profile_counters.iter().enumerate() {
-            debug_assert_eq!(descriptor.index, idx);
-            counters.push(Counter::new(descriptor));
+impl ProfileScope {
+    /// Begin a new profile scope
+    pub fn new(name: &'static CStr) -> Self {
+        unsafe {
+            if let Some(ref hooks) = PROFILER_HOOKS {
+                hooks.begin_marker(name);
+            }
         }
 
-        Profiler {
-            gpu_frames: ProfilerFrameCollection::new(),
-            frame_stats: ProfilerFrameCollection::new(),
+        ProfileScope {
+            name,
+        }
+    }
+}
 
-            counters,
-            start: precise_time_ns(),
-            avg_over_period: ONE_SECOND_NS / 2,
+impl Drop for ProfileScope {
+    fn drop(&mut self) {
+        unsafe {
+            if let Some(ref hooks) = PROFILER_HOOKS {
+                hooks.end_marker(self.name);
+            }
+        }
+    }
+}
 
-            num_graph_samples: 500, // Would it be useful to control this via a pref?
-            frame_timestamps_within_last_second: Vec::new(),
-            ui: Vec::new(),
+/// A helper macro to define profile scopes.
+macro_rules! profile_marker {
+    ($string:expr) => {
+        let _scope = $crate::profiler::ProfileScope::new(cstr!($string));
+    };
+}
+
+#[derive(Debug, Clone)]
+pub struct GpuProfileTag {
+    pub label: &'static str,
+    pub color: ColorF,
+}
+
+impl NamedTag for GpuProfileTag {
+    fn get_label(&self) -> &str {
+        self.label
+    }
+}
+
+trait ProfileCounter {
+    fn description(&self) -> &'static str;
+    fn value(&self) -> String;
+    fn is_expected(&self) -> bool;
+}
+
+#[derive(Clone)]
+pub struct IntProfileCounter {
+    description: &'static str,
+    value: usize,
+    expect: Option<Range<u64>>,
+}
+
+impl IntProfileCounter {
+    fn new(description: &'static str, expect: Option<Range<u64>>) -> Self {
+        IntProfileCounter {
+            description,
+            value: 0,
+            expect,
         }
     }
 
-    /// Sum a few counters and if the total amount is larger than a threshold, update
-    /// a specific counter.
-    ///
-    /// This is useful to monitor slow frame and slow transactions.
-    fn update_slow_event(&mut self, dst_counter: usize, counters: &[usize], threshold: f64) {
-        let mut total = 0.0;
-        for &counter in counters {
-            if self.counters[counter].value.is_finite() {
-                total += self.counters[counter].value;
-            }
+    #[inline(always)]
+    pub fn inc(&mut self) {
+        self.value += 1;
+    }
+
+    pub fn set(&mut self, value: usize) {
+        self.value = value;
+    }
+}
+
+impl ProfileCounter for IntProfileCounter {
+    fn description(&self) -> &'static str {
+        self.description
+    }
+
+    fn value(&self) -> String {
+        format!("{}", self.value)
+    }
+
+    fn is_expected(&self) -> bool {
+        self.expect.as_ref().map(|range| range.contains(&(self.value as u64))).unwrap_or(true)
+    }
+}
+
+/// A profile counter recording average and maximum integer values over time slices
+/// of half a second.
+#[derive(Clone)]
+pub struct AverageIntProfileCounter {
+    description: &'static str,
+    /// Start of the current time slice.
+    start_ns: u64,
+    /// Sum of the values recorded during the current time slice.
+    sum: u64,
+    /// Number of samples in the current time slice.
+    num_samples: u64,
+    /// The max value in in-progress time slice.
+    next_max: u64,
+    /// The max value of the previous time slice (displayed).
+    max: u64,
+    /// The average value of the previous time slice (displayed). 
+    avg: u64,
+    /// Intermediate accumulator for `add` and `inc`.
+    accum: u64,
+    /// Expected average range of values, if any.
+    expect_avg: Option<Range<u64>>,
+    /// Expected maximum range of values, if any.
+    expect_max: Option<Range<u64>>,
+}
+
+impl AverageIntProfileCounter {
+    pub fn new(
+        description: &'static str,
+        expect_avg: Option<Range<u64>>,
+        expect_max: Option<Range<u64>>,
+    ) -> Self {
+        AverageIntProfileCounter {
+            description,
+            start_ns: precise_time_ns(),
+            sum: 0,
+            num_samples: 0,
+            next_max: 0,
+            max: 0,
+            avg: 0,
+            accum: 0,
+            expect_avg,
+            expect_max,
         }
+    }
 
-        if total > threshold {
-            self.counters[dst_counter].set(total);
+    pub fn reset(&mut self) {
+        if self.accum > 0 {
+            self.set_u64(self.accum);
+            self.accum = 0;
         }
     }
 
-    // Call at the end of every frame, after setting the counter values and before drawing the counters.
-    pub fn update(&mut self) {
+    pub fn set(&mut self, val: usize) {
+        self.set_u64(val as u64);
+    }
+
+    pub fn set_u64(&mut self, val: u64) {
         let now = precise_time_ns();
-        let update_avg = (now - self.start) > self.avg_over_period;
-        if update_avg {
-            self.start = now;
+        if (now - self.start_ns) > AVERAGE_OVER_NS && self.num_samples > 0 {
+            self.avg = self.sum / self.num_samples;
+            self.max = self.next_max;
+            self.start_ns = now;
+            self.sum = 0;
+            self.num_samples = 0;
+            self.next_max = 0;
         }
-        let one_second_ago = now - ONE_SECOND_NS;
-        self.frame_timestamps_within_last_second.retain(|t| *t > one_second_ago);
-        self.frame_timestamps_within_last_second.push(now);
-
-        self.update_slow_event(
-            SLOW_FRAME,
-            &[TOTAL_FRAME_CPU_TIME],
-            15.0,
-        );
-        self.update_slow_event(
-            SLOW_TXN,
-            &[DISPLAY_LIST_BUILD_TIME, CONTENT_SEND_TIME, SCENE_BUILD_TIME],
-            80.0
-        );
+        self.next_max = self.next_max.max(val);
+        self.sum += val;
+        self.num_samples += 1;
+        self.accum = 0;
+    }
 
-        for counter in &mut self.counters {
-            counter.update(update_avg);
-        }
+    pub fn add(&mut self, val: usize) {
+        self.accum += val as u64;
+    }
+
+    pub fn inc(&mut self) {
+        self.accum += 1;
+    }
+
+    pub fn get_accum(&mut self) -> u64{
+        self.accum
+    }
+
+    /// Returns either the most up to date value if the counter is updated
+    /// with add add inc, or the average over the previous time slice.
+    pub fn get(&self) -> usize {
+        let result = if self.accum != 0 {
+            self.accum
+        } else {
+            self.avg
+        };
+
+        result as usize
     }
+}
 
-    pub fn update_frame_stats(&mut self, stats: FullFrameStats) {
-        if stats.gecko_display_list_time != 0.0 {
-          self.frame_stats.push(stats.into());
+impl ProfileCounter for AverageIntProfileCounter {
+    fn description(&self) -> &'static str {
+        self.description
+    }
+
+    fn value(&self) -> String {
+        format!("{:.2} (max {:.2})", self.avg, self.max)
+    }
+
+    fn is_expected(&self) -> bool {
+        self.expect_avg.as_ref().map(|range| range.contains(&self.avg)).unwrap_or(true)
+            && self.expect_max.as_ref().map(|range| range.contains(&self.max)).unwrap_or(true)
+    }
+}
+
+pub struct PercentageProfileCounter {
+    description: &'static str,
+    value: f32,
+}
+
+impl ProfileCounter for PercentageProfileCounter {
+    fn description(&self) -> &'static str {
+        self.description
+    }
+
+    fn value(&self) -> String {
+        format!("{:.2}%", self.value * 100.0)
+    }
+
+    fn is_expected(&self) -> bool { true }
+}
+
+#[derive(Clone)]
+pub struct ResourceProfileCounter {
+    description: &'static str,
+    value: usize,
+    // in bytes.
+    size: usize,
+    expected_count: Option<Range<usize>>,
+    // in MB
+    expected_size: Option<Range<f32>>,
+}
+
+impl ResourceProfileCounter {
+    fn new(
+        description: &'static str,
+        expected_count: Option<Range<usize>>,
+        expected_size: Option<Range<f32>>
+    ) -> Self {
+        ResourceProfileCounter {
+            description,
+            value: 0,
+            size: 0,
+            expected_count,
+            expected_size,
         }
     }
 
-    pub fn set_gpu_time_queries(&mut self, gpu_queries: Vec<GpuTimer>) {
-        let mut gpu_time_ns = 0;
-        for sample in &gpu_queries {
-            gpu_time_ns += sample.time_ns;
+    #[allow(dead_code)]
+    fn reset(&mut self) {
+        self.value = 0;
+        self.size = 0;
+    }
+
+    #[inline(always)]
+    pub fn inc(&mut self, size: usize) {
+        self.value += 1;
+        self.size += size;
+    }
+
+    pub fn set(&mut self, count: usize, size: usize) {
+        self.value = count;
+        self.size = size;
+    }
+
+    pub fn size_mb(&self) -> f32 {
+        self.size as f32 / (1024.0 * 1024.0)
+    }
+}
+
+impl ProfileCounter for ResourceProfileCounter {
+    fn description(&self) -> &'static str {
+        self.description
+    }
+
+    fn value(&self) -> String {
+        format!("{} ({:.2} MB)", self.value, self.size_mb())
+    }
+
+    fn is_expected(&self) -> bool {
+        self.expected_count.as_ref().map(|range| range.contains(&self.value)).unwrap_or(true)
+            && self.expected_size.as_ref().map(|range| range.contains(&self.size_mb())).unwrap_or(true)
+    }
+}
+
+#[derive(Clone)]
+pub struct TimeProfileCounter {
+    description: &'static str,
+    nanoseconds: u64,
+    invert: bool,
+    expect_ms: Option<Range<f64>>,
+}
+
+pub struct Timer<'a> {
+    start: u64,
+    result: &'a mut u64,
+}
+
+impl<'a> Drop for Timer<'a> {
+    fn drop(&mut self) {
+        let end = precise_time_ns();
+        *self.result += end - self.start;
+    }
+}
+
+impl TimeProfileCounter {
+    pub fn new(description: &'static str, invert: bool, expect_ms: Option<Range<f64>>) -> Self {
+        TimeProfileCounter {
+            description,
+            nanoseconds: 0,
+            invert,
+            expect_ms,
         }
+    }
 
-        self.gpu_frames.push(ProfilerFrame {
-          total_time: gpu_time_ns,
-          samples: gpu_queries
-        });
+    fn reset(&mut self) {
+        self.nanoseconds = 0;
+    }
+
+    #[allow(dead_code)]
+    pub fn set(&mut self, ns: u64) {
+        self.nanoseconds = ns;
+    }
+
+    pub fn profile<T, F>(&mut self, callback: F) -> T
+    where
+        F: FnOnce() -> T,
+    {
+        let t0 = precise_time_ns();
+        let val = callback();
+        let t1 = precise_time_ns();
+        let ns = t1 - t0;
+        self.nanoseconds += ns;
+        val
+    }
 
-        self.counters[GPU_TIME].set_f64(ns_to_ms(gpu_time_ns));
+    pub fn timer(&mut self) -> Timer {
+        Timer {
+            start: precise_time_ns(),
+            result: &mut self.nanoseconds,
+        }
+    }
+
+    pub fn inc(&mut self, ns: u64) {
+        self.nanoseconds += ns;
     }
 
-    // Find the index of a counter by its name.
-    pub fn index_of(&self, name: &str) -> Option<usize> {
-        self.counters.iter().position(|counter| counter.name == name)
+    pub fn get(&self) -> u64 {
+        self.nanoseconds
     }
 
-    // Define the profiler UI, see comment about the syntax at the top of this file.
-    pub fn set_ui(&mut self, names: &str) {
-        let mut selection = Vec::new();
+    pub fn get_ms(&self) -> f64 {
+        self.nanoseconds as f64 / 1000000.0
+    }
+}
 
-        self.append_to_ui(&mut selection, names);
+impl ProfileCounter for TimeProfileCounter {
+    fn description(&self) -> &'static str {
+        self.description
+    }
 
-        if selection == self.ui {
-            return;
+    fn value(&self) -> String {
+        if self.invert {
+            format!("{:.2} fps", 1000000000.0 / self.nanoseconds as f64)
+        } else {
+            format!("{:.2} ms", self.get_ms())
         }
+    }
+
+    fn is_expected(&self) -> bool {
+        self.expect_ms.as_ref()
+            .map(|range| range.contains(&(self.nanoseconds as f64 / 1000000.0)))
+            .unwrap_or(true)
+    }
+}
 
-        for counter in &mut self.counters {
-            counter.disable_graph();
+#[derive(Clone)]
+pub struct AverageTimeProfileCounter {
+    counter: AverageIntProfileCounter,
+    invert: bool,
+}
+
+impl AverageTimeProfileCounter {
+    pub fn new(
+        description: &'static str,
+        invert: bool,
+        expect_avg: Option<Range<f64>>,
+        expect_max: Option<Range<f64>>,
+    ) -> Self {
+        let expect_avg_ns = expect_avg.map(
+            |range| (range.start * 1000000.0) as u64 .. (range.end * 1000000.0) as u64
+        );
+        let expect_max_ns = expect_max.map(
+            |range| (range.start * 1000000.0) as u64 .. (range.end * 1000000.0) as u64
+        );
+
+        AverageTimeProfileCounter {
+            counter: AverageIntProfileCounter::new(
+                description,
+                expect_avg_ns,
+                expect_max_ns,
+            ),
+            invert,
         }
+    }
 
-        for item in &selection {
-            if let Item::Graph(idx) = item {
-                self.counters[*idx].enable_graph(self.num_graph_samples);
-            }
+    pub fn set(&mut self, ns: u64) {
+        self.counter.set_u64(ns);
+    }
+
+    #[allow(dead_code)]
+    pub fn profile<T, F>(&mut self, callback: F) -> T
+    where
+        F: FnOnce() -> T,
+    {
+        let t0 = precise_time_ns();
+        let val = callback();
+        let t1 = precise_time_ns();
+        self.counter.set_u64(t1 - t0);
+        val
+    }
+
+    pub fn avg_ms(&self) -> f64 { self.counter.avg as f64 / 1000000.0 }
+
+    pub fn max_ms(&self) -> f64 { self.counter.max as f64 / 1000000.0 }
+}
+
+impl ProfileCounter for AverageTimeProfileCounter {
+    fn description(&self) -> &'static str {
+        self.counter.description
+    }
+
+    fn value(&self) -> String {
+        if self.invert {
+            format!("{:.2} fps", 1000000000.0 / self.counter.avg as f64)
+        } else {
+            format!("{:.2} ms (max {:.2} ms)", self.avg_ms(), self.max_ms())
         }
+    }
 
-        self.ui = selection;
+    fn is_expected(&self) -> bool {
+        self.counter.is_expected()
     }
+}
 
-    fn append_to_ui(&mut self, selection: &mut Vec<Item>, names: &str) {
-        // Group successive counters together.
-        fn flush_counters(counters: &mut Vec<usize>, selection: &mut Vec<Item>) {
-            if !counters.is_empty() {
-                selection.push(Item::Counters(std::mem::take(counters)))
-            }
+
+#[derive(Clone)]
+pub struct FrameProfileCounters {
+    pub total_primitives: AverageIntProfileCounter,
+    pub visible_primitives: AverageIntProfileCounter,
+    pub targets_used: AverageIntProfileCounter,
+    pub targets_changed: AverageIntProfileCounter,
+    pub targets_created: AverageIntProfileCounter,
+}
+
+impl FrameProfileCounters {
+    pub fn new() -> Self {
+        FrameProfileCounters {
+            total_primitives: AverageIntProfileCounter::new(
+                "Total Primitives",
+                None, Some(expected::TOTAL_PRIMITIVES),
+            ),
+            visible_primitives: AverageIntProfileCounter::new(
+                "Visible Primitives",
+                None, Some(expected::VISIBLE_PRIMITIVES),
+            ),
+            targets_used: AverageIntProfileCounter::new(
+                "Used targets",
+                None, Some(expected::USED_TARGETS),
+            ),
+            targets_changed: AverageIntProfileCounter::new(
+                "Changed targets",
+                None, Some(expected::CHANGED_TARGETS),
+            ),
+            targets_created: AverageIntProfileCounter::new(
+                "Created targets",
+                None, Some(expected::CREATED_TARGETS),
+            ),
         }
+    }
 
-        let mut counters = Vec::new();
+    pub fn reset_targets(&mut self) {
+        self.targets_used.reset();
+        self.targets_changed.reset();
+        self.targets_created.reset();
+    }
+}
 
-        for name in names.split(",") {
-            let name = name.trim();
-            let is_graph = name.starts_with("#");
-            let is_indicator = name.starts_with("*");
-            let name = if is_graph || is_indicator {
-                &name[1..]
-            } else {
-                name
-            };
-            // See comment about the ui string syntax at the top of this file.
-            match name {
-                "" => {
-                    flush_counters(&mut counters, selection);
-                    selection.push(Item::Space);
-                }
-                "|" => {
-                    flush_counters(&mut counters, selection);
-                    selection.push(Item::Column);
-                }
-                "_" => {
-                    flush_counters(&mut counters, selection);
-                    selection.push(Item::Row);
-                }
-                "FPS" => {
-                    flush_counters(&mut counters, selection);
-                    selection.push(Item::Fps);
-                }
-                "GPU time queries" => {
-                    flush_counters(&mut counters, selection);
-                    selection.push(Item::GpuTimeQueries);
-                }
-                "GPU cache bars" => {
-                    flush_counters(&mut counters, selection);
-                    selection.push(Item::GpuCacheBars);
-                }
-                "Paint phase graph" => {
-                    flush_counters(&mut counters, selection);
-                    selection.push(Item::PaintPhaseGraph);
-                }
-                _ => {
-                    if let Some(idx) = self.index_of(name) {
-                        if is_graph {
-                            flush_counters(&mut counters, selection);
-                            selection.push(Item::Graph(idx));
-                        } else if is_indicator {
-                            flush_counters(&mut counters, selection);
-                            selection.push(Item::ChangeIndicator(idx));
-                        } else {
-                            counters.push(idx);
-                        }
-                    } else if let Some(preset_str) = find_preset(name) {
-                        flush_counters(&mut counters, selection);
-                        self.append_to_ui(selection, preset_str);
-                    } else {
-                        selection.push(Item::Text(format!("Unknonw counter: {}", name)));
-                    }
-                }
-            }
+#[derive(Clone)]
+pub struct TextureCacheProfileCounters {
+    pub pages_alpha8_linear: ResourceProfileCounter,
+    pub pages_alpha16_linear: ResourceProfileCounter,
+    pub pages_color8_linear: ResourceProfileCounter,
+    pub pages_color8_nearest: ResourceProfileCounter,
+    pub pages_picture: ResourceProfileCounter,
+    pub rasterized_blob_pixels: ResourceProfileCounter,
+    pub standalone_bytes: IntProfileCounter,
+    pub shared_bytes: IntProfileCounter,
+}
+
+impl TextureCacheProfileCounters {
+    pub fn new() -> Self {
+        TextureCacheProfileCounters {
+            pages_alpha8_linear: ResourceProfileCounter::new("Texture A8 cached pages", None, None),
+            pages_alpha16_linear: ResourceProfileCounter::new("Texture A16 cached pages", None, None),
+            pages_color8_linear: ResourceProfileCounter::new("Texture RGBA8 cached pages (L)", None, None),
+            pages_color8_nearest: ResourceProfileCounter::new("Texture RGBA8 cached pages (N)", None, None),
+            pages_picture: ResourceProfileCounter::new("Picture cached pages", None, None),
+            rasterized_blob_pixels: ResourceProfileCounter::new(
+                "Rasterized Blob Pixels",
+                Some(expected::NUM_RASTERIZED_BLOBS),
+                Some(expected::RASTERIZED_BLOBS_MB),
+            ),
+            standalone_bytes: IntProfileCounter::new("Standalone", None),
+            shared_bytes: IntProfileCounter::new("Shared", None),
         }
+    }
+}
+
+#[derive(Clone)]
+pub struct GpuCacheProfileCounters {
+    pub allocated_rows: AverageIntProfileCounter,
+    pub allocated_blocks: AverageIntProfileCounter,
+    pub updated_rows: AverageIntProfileCounter,
+    pub updated_blocks: AverageIntProfileCounter,
+    pub saved_blocks: AverageIntProfileCounter,
+}
 
-        flush_counters(&mut counters, selection);
+impl GpuCacheProfileCounters {
+    pub fn new() -> Self {
+        GpuCacheProfileCounters {
+            allocated_rows: AverageIntProfileCounter::new(
+                "GPU cache rows: total",
+                None, Some(expected::GPU_CACHE_ROWS_TOTAL),
+            ),
+            updated_rows: AverageIntProfileCounter::new(
+                "GPU cache rows: updated",
+                None, Some(expected::GPU_CACHE_ROWS_UPDATED),
+            ),
+            allocated_blocks: AverageIntProfileCounter::new(
+                "GPU cache blocks: total",
+                None, Some(expected::GPU_CACHE_BLOCKS_TOTAL),
+            ),
+            updated_blocks: AverageIntProfileCounter::new(
+                "GPU cache blocks: updated",
+                None, Some(expected::GPU_CACHE_BLOCKS_UPDATED),
+            ),
+            saved_blocks: AverageIntProfileCounter::new(
+                "GPU cache blocks: saved",
+                None, Some(expected::GPU_CACHE_BLOCKS_SAVED),
+            ),
+        }
     }
+}
+
+#[derive(Clone)]
+pub struct BackendProfileCounters {
+    pub total_time: TimeProfileCounter,
+    pub resources: ResourceProfileCounters,
+    pub txn: TransactionProfileCounters,
+    pub intern: InternProfileCounters,
+    pub scene_changed: bool,
+}
+
+#[derive(Clone)]
+pub struct ResourceProfileCounters {
+    pub font_templates: ResourceProfileCounter,
+    pub image_templates: ResourceProfileCounter,
+    pub texture_cache: TextureCacheProfileCounters,
+    pub gpu_cache: GpuCacheProfileCounters,
+    pub content_slices: IntProfileCounter,
+}
 
-    pub fn set_counters(&mut self, counters: &mut TransactionProfile) {
-        for (id, evt) in counters.events.iter_mut().enumerate() {
-            if let Event::Value(val) = *evt {
-                self.counters[id].set(val);
+#[derive(Clone)]
+pub struct TransactionProfileCounters {
+    pub display_list_build_time: TimeProfileCounter,
+    pub scene_build_time: TimeProfileCounter,
+    /// Time between when the display list is built and when it is sent by the API.
+    pub content_send_time: TimeProfileCounter,
+    /// Time between sending the SetDisplayList from the API and picking it up on
+    /// the render scene builder thread.
+    pub api_send_time: TimeProfileCounter,
+    /// Sum of content_send_time and api_send_time.
+    pub total_send_time: TimeProfileCounter,
+    pub display_lists: ResourceProfileCounter,
+}
+
+macro_rules! declare_intern_profile_counters {
+    ( $( $name:ident : $ty:ty, )+ ) => {
+        #[derive(Clone)]
+        pub struct InternProfileCounters {
+            $(
+                pub $name: ResourceProfileCounter,
+            )+
+        }
+
+        impl InternProfileCounters {
+            fn draw(
+                &self,
+                debug_renderer: &mut DebugRenderer,
+                draw_state: &mut DrawState,
+            ) {
+                Profiler::draw_counters(
+                    &[
+                        $(
+                            &self.$name,
+                        )+
+                    ],
+                    None,
+                    debug_renderer,
+                    false,
+                    draw_state,
+                );
             }
-            *evt = Event::None;
         }
     }
+}
+
+enumerate_interners!(declare_intern_profile_counters);
 
-    pub fn get(&self, id: usize) -> Option<f64> {
-        self.counters[id].get()
+impl TransactionProfileCounters {
+    pub fn set(
+        &mut self,
+        dl_build_start: u64,
+        dl_build_end: u64,
+        send_start: u64,
+        scene_build_start: u64,
+        scene_build_end: u64,
+        display_len: usize,
+    ) {
+        self.display_list_build_time.reset();
+        self.content_send_time.reset();
+        self.api_send_time.reset();
+        self.total_send_time.reset();
+        self.scene_build_time.reset();
+        self.display_lists.reset();
+
+        let dl_build_time = dl_build_end - dl_build_start;
+        let scene_build_time = scene_build_end - scene_build_start;
+        let content_send_time = send_start - dl_build_end;
+        let api_send_time = scene_build_start - send_start;
+        self.display_list_build_time.inc(dl_build_time);
+        self.scene_build_time.inc(scene_build_time);
+        self.content_send_time.inc(content_send_time);
+        self.api_send_time.inc(api_send_time);
+        self.total_send_time.inc(content_send_time + api_send_time);
+        self.display_lists.inc(display_len);
     }
+}
 
-    fn draw_counters(
-        counters: &[Counter],
-        selected: &[usize],
-        mut x: f32, mut y: f32,
-        text_buffer: &mut String,
-        debug_renderer: &mut DebugRenderer,
-    ) -> default::Rect<f32> {
-        let line_height = debug_renderer.line_height();
+impl BackendProfileCounters {
+    pub fn new() -> Self {
+        BackendProfileCounters {
+            total_time: TimeProfileCounter::new(
+                "Backend CPU Time", false,
+                Some(expected::MAX_BACKEND_CPU_TIME),
+            ),
+            resources: ResourceProfileCounters {
+                font_templates: ResourceProfileCounter::new(
+                    "Font Templates",
+                    Some(expected::NUM_FONT_TEMPLATES),
+                    Some(expected::FONT_TEMPLATES_MB),
+                ),
+                image_templates: ResourceProfileCounter::new(
+                    "Image Templates",
+                    Some(expected::NUM_IMAGE_TEMPLATES),
+                    Some(expected::IMAGE_TEMPLATES_MB),
+                ),
+                content_slices: IntProfileCounter::new(
+                    "Content Slices",
+                    None,
+                ),
+                texture_cache: TextureCacheProfileCounters::new(),
+                gpu_cache: GpuCacheProfileCounters::new(),
+            },
+            txn: TransactionProfileCounters {
+                display_list_build_time: TimeProfileCounter::new(
+                    "DisplayList Build Time", false,
+                    Some(expected::DISPLAY_LIST_BUILD_TIME)
+                ),
+                scene_build_time: TimeProfileCounter::new(
+                    "Scene build time", false,
+                    Some(expected::MAX_SCENE_BUILD_TIME),
+                ),
+                content_send_time: TimeProfileCounter::new(
+                    "Content Send Time", false,
+                    Some(expected::DISPLAY_LIST_SEND_TIME),
+                ),
+                api_send_time: TimeProfileCounter::new(
+                    "API Send Time", false,
+                    Some(expected::DISPLAY_LIST_SEND_TIME),
+                ),
+                total_send_time: TimeProfileCounter::new(
+                    "Total IPC Time", false,
+                    Some(expected::DISPLAY_LIST_TOTAL_TIME),
+                ),
+                display_lists: ResourceProfileCounter::new(
+                    "DisplayLists Sent",
+                    None, Some(expected::DISPLAY_LIST_MB),
+                ),
+            },
+            //TODO: generate this by a macro
+            intern: InternProfileCounters {
+                prim: ResourceProfileCounter::new("Interned primitives", None, None),
+                conic_grad: ResourceProfileCounter::new("Interned conic gradients", None, None),
+                image: ResourceProfileCounter::new("Interned images", None, None),
+                image_border: ResourceProfileCounter::new("Interned image borders", None, None),
+                line_decoration: ResourceProfileCounter::new("Interned line decorations", None, None),
+                linear_grad: ResourceProfileCounter::new("Interned linear gradients", None, None),
+                normal_border: ResourceProfileCounter::new("Interned normal borders", None, None),
+                picture: ResourceProfileCounter::new("Interned pictures", None, None),
+                radial_grad: ResourceProfileCounter::new("Interned radial gradients", None, None),
+                text_run: ResourceProfileCounter::new("Interned text runs", None, None),
+                yuv_image: ResourceProfileCounter::new("Interned YUV images", None, None),
+                clip: ResourceProfileCounter::new("Interned clips", None, None),
+                filter_data: ResourceProfileCounter::new("Interned filter data", None, None),
+                backdrop: ResourceProfileCounter::new("Interned backdrops", None, None),
+            },
+            scene_changed: false,
+        }
+    }
 
-        x += PROFILE_PADDING;
-        y += PROFILE_PADDING;
-        let origin = default::Point2D::new(x, y);
-        y += line_height * 0.5;
+    pub fn reset(&mut self) {
+        self.total_time.reset();
+        self.resources.texture_cache.rasterized_blob_pixels.reset();
+        self.scene_changed = false;
+    }
+}
 
-        let mut total_rect = Rect::zero();
+pub struct RendererProfileCounters {
+    pub frame_counter: IntProfileCounter,
+    pub frame_time: AverageTimeProfileCounter,
+    pub draw_calls: AverageIntProfileCounter,
+    pub vertices: AverageIntProfileCounter,
+    pub vao_count_and_size: ResourceProfileCounter,
+    pub color_passes: AverageIntProfileCounter,
+    pub alpha_passes: AverageIntProfileCounter,
+    pub texture_data_uploaded: AverageIntProfileCounter,
+    pub rendered_picture_cache_tiles: AverageIntProfileCounter,
+    pub total_picture_cache_tiles: AverageIntProfileCounter,
+}
 
-        let mut color_index = 0;
-        let colors = [
-            // Regular values,
-            ColorU::new(255, 255, 255, 255),
-            ColorU::new(255, 255, 0, 255),
-            // Unexpected values,
-            ColorU::new(255, 80, 0, 255),
-            ColorU::new(255, 0, 0, 255),
-        ];
+pub struct RendererProfileTimers {
+    pub cpu_time: TimeProfileCounter,
+    pub gpu_graph: TimeProfileCounter,
+    pub gpu_samples: Vec<GpuTimer<GpuProfileTag>>,
+}
 
-        for idx in selected {
-            // If The index is invalid, add some vertical space.
-            let counter = &counters[*idx];
+impl RendererProfileCounters {
+    pub fn new() -> Self {
+        RendererProfileCounters {
+            frame_counter: IntProfileCounter::new("Frame", None),
+            frame_time: AverageTimeProfileCounter::new(
+                "FPS", true, None, None,
+            ),
+            draw_calls: AverageIntProfileCounter::new(
+                "Draw Calls",
+                None, Some(expected::DRAW_CALLS),
+            ),
+            vertices: AverageIntProfileCounter::new(
+                "Vertices",
+                None, Some(expected::VERTICES),
+            ),
+            vao_count_and_size: ResourceProfileCounter::new("VAO", None, None),
+            color_passes: AverageIntProfileCounter::new(
+                "Color passes",
+                None, Some(expected::COLOR_PASSES),
+            ),
+            alpha_passes: AverageIntProfileCounter::new(
+                "Alpha passes",
+                None, Some(expected::ALPHA_PASSES),
+            ),
+            texture_data_uploaded: AverageIntProfileCounter::new(
+                "Texture data, kb",
+                None, Some(expected::TEXTURE_DATA_UPLOADED),
+            ),
+            rendered_picture_cache_tiles: AverageIntProfileCounter::new(
+                "Rendered tiles",
+                None, Some(expected::RENDERED_PICTURE_CACHE_TILES),
+            ),
+            total_picture_cache_tiles: AverageIntProfileCounter::new(
+                "Total tiles",
+                None, Some(expected::TOTAL_PICTURE_CACHE_TILES),
+            ),
+        }
+    }
 
-            let rect = debug_renderer.add_text(
-                x, y,
-                counter.name,
-                colors[color_index],
-                None,
-            );
-            color_index = (color_index + 1) % 2;
+    pub fn reset(&mut self) {
+        self.draw_calls.reset();
+        self.vertices.reset();
+        self.color_passes.reset();
+        self.alpha_passes.reset();
+        self.texture_data_uploaded.reset();
+        self.rendered_picture_cache_tiles.reset();
+        self.total_picture_cache_tiles.reset();
+    }
+}
 
-            total_rect = total_rect.union(&rect);
-            y += line_height;
+impl RendererProfileTimers {
+    pub fn new() -> Self {
+        RendererProfileTimers {
+            cpu_time: TimeProfileCounter::new("Renderer CPU Time", false, None),
+            gpu_samples: Vec::new(),
+            gpu_graph: TimeProfileCounter::new("GPU Time", false, None),
         }
+    }
+}
 
-        color_index = 0;
-        x = total_rect.max_x() + 60.0;
-        y = origin.y + line_height * 0.5;
-
-        for idx in selected {
-            let counter = &counters[*idx];
-            let expected_offset = if counter.has_unexpected_avg_max() { 2 } else { 0 };
+struct GraphStats {
+    min_value: f32,
+    mean_value: f32,
+    max_value: f32,
+}
 
-            counter.write_value(text_buffer);
+struct ProfileGraph {
+    max_samples: usize,
+    scale: f32,
+    values: VecDeque<f32>,
+    short_description: &'static str,
+    unit_description: &'static str,
+}
 
-            let rect = debug_renderer.add_text(
-                x,
-                y,
-                &text_buffer,
-                colors[color_index + expected_offset],
-                None,
-            );
-            color_index = (color_index + 1) % 2;
+impl ProfileGraph {
+    fn new(
+        max_samples: usize,
+        scale: f32,
+        short_description: &'static str,
+        unit_description: &'static str,
+    ) -> Self {
+        ProfileGraph {
+            max_samples,
+            scale,
+            values: VecDeque::new(),
+            short_description,
+            unit_description,
+        }
+    }
 
-            total_rect = total_rect.union(&rect);
-            y += line_height;
+    fn push(&mut self, ns: u64) {
+        let val = ns as f64 * self.scale as f64;
+        if self.values.len() == self.max_samples {
+            self.values.pop_back();
         }
+        self.values.push_front(val as f32);
+    }
 
-        total_rect = total_rect
-            .union(&Rect { origin, size: Size2D::new(1.0, 1.0) })
-            .inflate(PROFILE_PADDING, PROFILE_PADDING);
+    fn stats(&self) -> GraphStats {
+        let mut stats = GraphStats {
+            min_value: f32::MAX,
+            mean_value: 0.0,
+            max_value: -f32::MAX,
+        };
 
-        debug_renderer.add_quad(
-            total_rect.min_x(),
-            total_rect.min_y(),
-            total_rect.max_x(),
-            total_rect.max_y(),
-            BACKGROUND_COLOR,
-            BACKGROUND_COLOR,
-        );
+        for value in &self.values {
+            stats.min_value = stats.min_value.min(*value);
+            stats.mean_value += *value;
+            stats.max_value = stats.max_value.max(*value);
+        }
+
+        if !self.values.is_empty() {
+            stats.mean_value /= self.values.len() as f32;
+        }
 
-        total_rect
+        stats
     }
 
     fn draw_graph(
-        counter: &Counter,
+        &self,
         x: f32,
         y: f32,
-        text_buffer: &mut String,
+        description: &'static str,
         debug_renderer: &mut DebugRenderer,
     ) -> default::Rect<f32> {
-        let graph = counter.graph.as_ref().unwrap();
-
-        let max_samples = graph.values.capacity() as f32;
-
-        let size = Size2D::new(max_samples, 100.0);
+        let size = Size2D::new(600.0, 100.0);
         let line_height = debug_renderer.line_height();
-        let graph_rect = Rect::new(Point2D::new(x + PROFILE_PADDING, y + PROFILE_PADDING), size);
-        let mut rect = graph_rect.inflate(PROFILE_PADDING, PROFILE_PADDING);
+        let graph_rect = Rect::new(Point2D::new(x, y), size);
+        let mut rect = graph_rect.inflate(10.0, 10.0);
 
-        let stats = graph.stats();
+        let stats = self.stats();
 
         let text_color = ColorU::new(255, 255, 0, 255);
-        let text_origin = rect.origin + vec2(rect.size.width, 25.0);
-        set_text!(text_buffer, "{} ({})", counter.name, counter.unit);
+        let text_origin = rect.origin + vec2(rect.size.width, 20.0);
         debug_renderer.add_text(
             text_origin.x,
             text_origin.y,
-            if counter.unit == "" { counter.name } else { text_buffer },
+            description,
             ColorU::new(0, 255, 0, 255),
             None,
         );
-
-        set_text!(text_buffer, "Samples: {}", stats.samples);
-
         debug_renderer.add_text(
             text_origin.x,
             text_origin.y + line_height,
-            text_buffer,
+            &format!("Min: {:.2} {}", stats.min_value, self.unit_description),
+            text_color,
+            None,
+        );
+        debug_renderer.add_text(
+            text_origin.x,
+            text_origin.y + line_height * 2.0,
+            &format!("Mean: {:.2} {}", stats.mean_value, self.unit_description),
+            text_color,
+            None,
+        );
+        debug_renderer.add_text(
+            text_origin.x,
+            text_origin.y + line_height * 3.0,
+            &format!("Max: {:.2} {}", stats.max_value, self.unit_description),
             text_color,
             None,
         );
 
-        if stats.samples > 0 {
-            set_text!(text_buffer, "Min: {:.2} {}", stats.min, counter.unit);
-            debug_renderer.add_text(
-                text_origin.x,
-                text_origin.y + line_height * 2.0,
-                text_buffer,
-                text_color,
-                None,
-            );
-
-            set_text!(text_buffer, "Avg: {:.2} {}", stats.avg, counter.unit);
-            debug_renderer.add_text(
-                text_origin.x,
-                text_origin.y + line_height * 3.0,
-                text_buffer,
-                text_color,
-                None,
-            );
-
-            set_text!(text_buffer, "Max: {:.2} {}", stats.max, counter.unit);
-            debug_renderer.add_text(
-                text_origin.x,
-                text_origin.y + line_height * 4.0,
-                text_buffer,
-                text_color,
-                None,
-            );
-        }
-
-        rect.size.width += 220.0;
+        rect.size.width += 140.0;
         debug_renderer.add_quad(
-            rect.min_x(),
-            rect.min_y(),
-            rect.max_x(),
-            rect.max_y(),
-            BACKGROUND_COLOR,
-            BACKGROUND_COLOR,
+            rect.origin.x,
+            rect.origin.y,
+            rect.origin.x + rect.size.width + 10.0,
+            rect.origin.y + rect.size.height,
+            ColorU::new(25, 25, 25, 200),
+            ColorU::new(51, 51, 51, 200),
         );
 
         let bx1 = graph_rect.max_x();
         let by1 = graph_rect.max_y();
 
-        let w = graph_rect.size.width / max_samples;
+        let w = graph_rect.size.width / self.max_samples as f32;
         let h = graph_rect.size.height;
 
         let color_t0 = ColorU::new(0, 255, 0, 255);
         let color_b0 = ColorU::new(0, 180, 0, 255);
 
+        let color_t1 = ColorU::new(0, 255, 0, 255);
+        let color_b1 = ColorU::new(0, 180, 0, 255);
+
         let color_t2 = ColorU::new(255, 0, 0, 255);
         let color_b2 = ColorU::new(180, 0, 0, 255);
 
-        for (index, sample) in graph.values.iter().enumerate() {
-            if !sample.is_finite() {
-                // NAN means no sample this frame.
-                continue;
-            }
-            let sample = *sample as f32;
+        for (index, sample) in self.values.iter().enumerate() {
+            let sample = *sample;
             let x1 = bx1 - index as f32 * w;
             let x0 = x1 - w;
 
-            let y0 = by1 - (sample / stats.max as f32) as f32 * h;
+            let y0 = by1 - (sample / stats.max_value) as f32 * h;
             let y1 = by1;
 
-            let (color_top, color_bottom) = if counter.is_unexpected_value(sample as f64) {
-                (color_t2, color_b2)
-            } else {
+            let (color_top, color_bottom) = if sample < 1000.0 / 60.0 {
                 (color_t0, color_b0)
+            } else if sample < 1000.0 / 30.0 {
+                (color_t1, color_b1)
+            } else {
+                (color_t2, color_b2)
             };
 
             debug_renderer.add_quad(x0, y0, x1, y1, color_top, color_bottom);
@@ -775,164 +1096,51 @@ impl Profiler {
 
         rect
     }
+}
 
-
-    fn draw_change_indicator(
-        counter: &Counter,
-        x: f32, y: f32,
-        debug_renderer: &mut DebugRenderer
-    ) -> default::Rect<f32> {
-        let height = 10.0;
-        let width = 20.0;
-
-        // Draw the indicator red instead of blue if is is not within expected ranges.
-        let color = if counter.has_unexpected_value() || counter.has_unexpected_avg_max() {
-            ColorU::new(255, 20, 20, 255)
-        } else {
-            ColorU::new(0, 100, 250, 255)
-        };
-
-        let tx = counter.change_indicator as f32 * width;
-        debug_renderer.add_quad(
-            x,
-            y,
-            x + 15.0 * width,
-            y + height,
-            ColorU::new(0, 0, 0, 150),
-            ColorU::new(0, 0, 0, 150),
-        );
-
-        debug_renderer.add_quad(
-            x + tx,
-            y,
-            x + tx + width,
-            y + height,
-            color,
-            ColorU::new(25, 25, 25, 255),
-        );
-
-        Rect {
-            origin: Point2D::new(x, y),
-            size: Size2D::new(15.0 * width + 20.0, height),
-        }
+impl ProfileCounter for ProfileGraph {
+    fn description(&self) -> &'static str {
+        self.short_description
     }
 
-    fn draw_bar(
-        label: &str,
-        label_color: ColorU,
-        counters: &[(ColorU, usize)],
-        x: f32, y: f32,
-        debug_renderer: &mut DebugRenderer,
-    ) -> default::Rect<f32> {
-        let x = x + 8.0;
-        let y = y + 24.0;
-        let text_rect = debug_renderer.add_text(
-            x, y,
-            label,
-            label_color,
-            None,
-        );
-
-        let x_base = text_rect.max_x() + 10.0;
-        let width = 300.0;
-        let total_value = counters.last().unwrap().1;
-        let scale = width / total_value as f32;
-        let mut x_current = x_base;
-
-        for &(color, counter) in counters {
-            let x_stop = x_base + counter as f32 * scale;
-            debug_renderer.add_quad(
-                x_current,
-                text_rect.origin.y,
-                x_stop,
-                text_rect.max_y(),
-                color,
-                color,
-            );
-            x_current = x_stop;
-
-        }
-
-        let mut total_rect = text_rect;
-        total_rect.size.width += width + 10.0;
-
-        total_rect
+    fn value(&self) -> String {
+        format!("{:.2}ms", self.stats().mean_value)
     }
 
-    fn draw_gpu_cache_bars(&self, x: f32, mut y: f32, text_buffer: &mut String, debug_renderer: &mut DebugRenderer) -> default::Rect<f32> {
-        let color_updated = ColorU::new(0xFF, 0, 0, 0xFF);
-        let color_free = ColorU::new(0, 0, 0xFF, 0xFF);
-        let color_saved = ColorU::new(0, 0xFF, 0, 0xFF);
-
-        let updated_blocks = self.get(GPU_CACHE_BLOCKS_UPDATED).unwrap_or(0.0) as usize;
-        let saved_blocks = self.get(GPU_CACHE_BLOCKS_SAVED).unwrap_or(0.0) as usize;
-        let allocated_blocks = self.get(GPU_CACHE_BLOCKS_TOTAL).unwrap_or(0.0) as usize;
-        let allocated_rows = self.get(GPU_CACHE_ROWS_TOTAL).unwrap_or(0.0) as usize;
-        let updated_rows = self.get(GPU_CACHE_ROWS_UPDATED).unwrap_or(0.0) as usize;
-        let requested_blocks = updated_blocks + saved_blocks;
-        let total_blocks = allocated_rows * MAX_VERTEX_TEXTURE_WIDTH;
-
-        set_text!(text_buffer, "GPU cache rows ({}):", allocated_rows);
-
-        let rect0 = Profiler::draw_bar(
-            text_buffer,
-            ColorU::new(0xFF, 0xFF, 0xFF, 0xFF),
-            &[
-                (color_updated, updated_rows),
-                (color_free, allocated_rows),
-            ],
-            x, y,
-            debug_renderer,
-        );
-
-        y = rect0.max_y();
-
-        let rect1 = Profiler::draw_bar(
-            "GPU cache blocks",
-            ColorU::new(0xFF, 0xFF, 0, 0xFF),
-            &[
-                (color_updated, updated_blocks),
-                (color_saved, requested_blocks),
-                (color_free, allocated_blocks),
-                (ColorU::new(0, 0, 0, 0xFF), total_blocks),
-            ],
-            x, y,
-            debug_renderer,
-        );
+    fn is_expected(&self) -> bool { true }
+}
 
-        let total_rect = rect0.union(&rect1).inflate(10.0, 10.0);
-        debug_renderer.add_quad(
-            total_rect.origin.x,
-            total_rect.origin.y,
-            total_rect.origin.x + total_rect.size.width,
-            total_rect.origin.y + total_rect.size.height,
-            ColorF::new(0.1, 0.1, 0.1, 0.8).into(),
-            ColorF::new(0.2, 0.2, 0.2, 0.8).into(),
-        );
+struct GpuFrame {
+    total_time: u64,
+    samples: Vec<GpuTimer<GpuProfileTag>>,
+}
 
-        total_rect
-    }
+struct GpuFrameCollection {
+    frames: VecDeque<GpuFrame>,
+}
 
-    // Draws a frame graph for a given frame collection.
-    fn draw_frame_graph(
-        frame_collection: &ProfilerFrameCollection,
-        x: f32, y: f32,
-        debug_renderer: &mut DebugRenderer,
-    ) -> default::Rect<f32> {
-        let mut has_data = false;
-        for frame in &frame_collection.frames {
-            if !frame.samples.is_empty() {
-                has_data = true;
-                break;
-            }
+impl GpuFrameCollection {
+    fn new() -> Self {
+        GpuFrameCollection {
+            frames: VecDeque::new(),
         }
+    }
 
-        if !has_data {
-            return Rect::zero();
+    fn push(&mut self, total_time: u64, samples: Vec<GpuTimer<GpuProfileTag>>) {
+        if self.frames.len() == 20 {
+            self.frames.pop_back();
         }
+        self.frames.push_front(GpuFrame {
+            total_time,
+            samples,
+        });
+    }
+}
 
+impl GpuFrameCollection {
+    fn draw(&self, x: f32, y: f32, debug_renderer: &mut DebugRenderer) -> default::Rect<f32> {
         let graph_rect = Rect::new(
-            Point2D::new(x + GRAPH_PADDING, y + GRAPH_PADDING),
+            Point2D::new(x, y),
             Size2D::new(GRAPH_WIDTH, GRAPH_HEIGHT),
         );
         let bounding_rect = graph_rect.inflate(GRAPH_PADDING, GRAPH_PADDING);
@@ -942,27 +1150,22 @@ impl Profiler {
             bounding_rect.origin.y,
             bounding_rect.origin.x + bounding_rect.size.width,
             bounding_rect.origin.y + bounding_rect.size.height,
-            BACKGROUND_COLOR,
-            BACKGROUND_COLOR,
+            ColorU::new(25, 25, 25, 200),
+            ColorU::new(51, 51, 51, 200),
         );
 
         let w = graph_rect.size.width;
         let mut y0 = graph_rect.origin.y;
 
-        let mut max_time = frame_collection.frames
+        let max_time = self.frames
             .iter()
             .max_by_key(|f| f.total_time)
             .unwrap()
             .total_time as f32;
 
-        // If the max time is lower than 16ms, fix the scale
-        // at 16ms so that the graph is easier to interpret.
-        let baseline_ns = 16_000_000.0; // 16ms
-        max_time = max_time.max(baseline_ns);
-
         let mut tags_present = FastHashMap::default();
 
-        for frame in &frame_collection.frames {
+        for frame in &self.frames {
             let y1 = y0 + GRAPH_FRAME_HEIGHT;
 
             let mut current_ns = 0;
@@ -988,23 +1191,6 @@ impl Profiler {
             y0 = y1;
         }
 
-        // If the max time is higher than 16ms, show a vertical line at the
-        // 16ms mark.
-        if max_time > baseline_ns {
-            let x = graph_rect.origin.x + w * baseline_ns as f32 / max_time;
-            let height = frame_collection.frames.len() as f32 * GRAPH_FRAME_HEIGHT;
-
-            debug_renderer.add_quad(
-                x,
-                graph_rect.origin.y,
-                x + 4.0,
-                graph_rect.origin.y + height,
-                ColorU::new(120, 00, 00, 150),
-                ColorU::new(120, 00, 00, 100),
-            );
-        }
-
-
         // Add a legend to see which color correspond to what primitive.
         const LEGEND_SIZE: f32 = 20.0;
         const PADDED_LEGEND_SIZE: f32 = 25.0;
@@ -1014,8 +1200,8 @@ impl Profiler {
                 bounding_rect.origin.y,
                 bounding_rect.max_x() + GRAPH_PADDING + 200.0,
                 bounding_rect.origin.y + tags_present.len() as f32 * PADDED_LEGEND_SIZE + GRAPH_PADDING,
-                BACKGROUND_COLOR,
-                BACKGROUND_COLOR,
+                ColorU::new(25, 25, 25, 200),
+                ColorU::new(51, 51, 51, 200),
             );
         }
 
@@ -1040,719 +1226,676 @@ impl Profiler {
 
         bounding_rect
     }
-
-    pub fn draw_profile(
-        &mut self,
-        _frame_index: u64,
-        debug_renderer: &mut DebugRenderer,
-        device_size: DeviceIntSize,
-    ) {
-        let x_start = 20.0;
-        let mut y_start = 150.0;
-        let default_column_width = 400.0;
-
-        // set_text!(..) into this string instead of using format!(..) to avoid
-        // unnecessary allocations.
-        let mut text_buffer = String::with_capacity(32);
-
-        let mut column_width = default_column_width;
-        let mut max_y = y_start;
-
-        let mut x = x_start;
-        let mut y = y_start;
-
-        for elt in &self.ui {
-            let rect = match elt {
-                Item::Counters(indices) => {
-                    Profiler::draw_counters(&self.counters, &indices, x, y, &mut text_buffer, debug_renderer)
-                }
-                Item::Graph(idx) => {
-                    Profiler::draw_graph(&self.counters[*idx], x, y, &mut text_buffer, debug_renderer)
-                }
-                Item::ChangeIndicator(idx) => {
-                    Profiler::draw_change_indicator(&self.counters[*idx], x, y, debug_renderer)
-                }
-                Item::GpuTimeQueries => {
-                    Profiler::draw_frame_graph(&self.gpu_frames, x, y, debug_renderer)
-                }
-                Item::GpuCacheBars => {
-                    self.draw_gpu_cache_bars(x, y, &mut text_buffer, debug_renderer)
-                }
-                Item::PaintPhaseGraph => {
-                    Profiler::draw_frame_graph(&self.frame_stats, x, y, debug_renderer)
-                }
-                Item::Text(text) => {
-                    let p = 10.0;
-                    let mut rect = debug_renderer.add_text(
-                        x + p,
-                        y + p,
-                        &text,
-                        ColorU::new(255, 255, 255, 255),
-                        None,
-                    );
-                    rect = rect.inflate(p, p);
-
-                    debug_renderer.add_quad(
-                        rect.origin.x,
-                        rect.origin.y,
-                        rect.max_x(),
-                        rect.max_y(),
-                        BACKGROUND_COLOR,
-                        BACKGROUND_COLOR,
-                    );
-
-                    rect
-                }
-                Item::Fps => {
-                    let fps = self.frame_timestamps_within_last_second.len();
-                    set_text!(&mut text_buffer, "{} fps", fps);
-                    let mut rect = debug_renderer.add_text(
-                        x + PROFILE_PADDING,
-                        y + PROFILE_PADDING + 5.0,
-                        &text_buffer,
-                        ColorU::new(255, 255, 255, 255),
-                        None,
-                    );
-                    rect = rect.inflate(PROFILE_PADDING, PROFILE_PADDING);
-
-                    debug_renderer.add_quad(
-                        rect.min_x(),
-                        rect.min_y(),
-                        rect.max_x(),
-                        rect.max_y(),
-                        BACKGROUND_COLOR,
-                        BACKGROUND_COLOR,
-                    );
-
-                    rect
-                }
-                Item::Space => {
-                    Rect { origin: Point2D::new(x, y), size: Size2D::new(0.0, PROFILE_SPACING) }
-                }
-                Item::Column => {
-                    max_y = max_y.max(y);
-                    x += column_width + PROFILE_SPACING;
-                    y = y_start;
-                    column_width = default_column_width;
-
-                    continue;
-                }
-                Item::Row => {
-                    max_y = max_y.max(y);
-                    y_start = max_y + PROFILE_SPACING;
-                    y = y_start;
-                    x = x_start;
-                    column_width = default_column_width;
-
-                    continue;
-                }
-            };
-
-            column_width = column_width.max(rect.size.width);
-            y = rect.max_y();
-
-            if y > device_size.height as f32 - 100.0 {
-                max_y = max_y.max(y);
-                x += column_width + PROFILE_SPACING;
-                y = y_start;
-                column_width = default_column_width;
-            }
-        }
-    }
-
-    #[cfg(feature = "capture")]
-    pub fn dump_stats(&self, sink: &mut dyn std::io::Write) -> std::io::Result<()> {
-        for counter in &self.counters {
-            if counter.value.is_finite() {
-                writeln!(sink, "{} {:?}{}", counter.name, counter.value, counter.unit)?;
-            }
-        }
-
-        Ok(())
-    }
-}
-
-/// Defines the interface for hooking up an external profiler to WR.
-pub trait ProfilerHooks : Send + Sync {
-    /// Called at the beginning of a profile scope. The label must
-    /// be a C string (null terminated).
-    fn begin_marker(&self, label: &CStr);
-
-    /// Called at the end of a profile scope. The label must
-    /// be a C string (null terminated).
-    fn end_marker(&self, label: &CStr);
-
-    /// Called to mark an event happening. The label must
-    /// be a C string (null terminated).
-    fn event_marker(&self, label: &CStr);
-
-    /// Called with a duration to indicate a text marker that just ended. Text
-    /// markers allow different types of entries to be recorded on the same row
-    /// in the timeline, by adding labels to the entry.
-    ///
-    /// This variant is also useful when the caller only wants to record events
-    /// longer than a certain threshold, and thus they don't know in advance
-    /// whether the event will qualify.
-    fn add_text_marker(&self, label: &CStr, text: &str, duration: Duration);
-
-    /// Returns true if the current thread is being profiled.
-    fn thread_is_being_profiled(&self) -> bool;
 }
 
-/// The current global profiler callbacks, if set by embedder.
-pub static mut PROFILER_HOOKS: Option<&'static dyn ProfilerHooks> = None;
-
-/// Set the profiler callbacks, or None to disable the profiler.
-/// This function must only ever be called before any WR instances
-/// have been created, or the hooks will not be set.
-pub fn set_profiler_hooks(hooks: Option<&'static dyn ProfilerHooks>) {
-    if !wr_has_been_initialized() {
-        unsafe {
-            PROFILER_HOOKS = hooks;
-        }
-    }
+struct DrawState {
+    x_left: f32,
+    y_left: f32,
+    x_right: f32,
+    y_right: f32,
 }
 
-/// A simple RAII style struct to manage a profile scope.
-pub struct ProfileScope {
-    name: &'static CStr,
+pub struct Profiler {
+    draw_state: DrawState,
+    backend_graph: ProfileGraph,
+    renderer_graph: ProfileGraph,
+    gpu_graph: ProfileGraph,
+    ipc_graph: ProfileGraph,
+    display_list_build_graph: ProfileGraph,
+    scene_build_graph: ProfileGraph,
+    blob_raster_graph: ProfileGraph,
+    backend_time: AverageTimeProfileCounter,
+    renderer_time: AverageTimeProfileCounter,
+    gpu_time: AverageTimeProfileCounter,
+    ipc_time: AverageTimeProfileCounter,
+    gpu_frames: GpuFrameCollection,
+    cooldowns: Vec<i32>,
 }
 
-/// Records a marker of the given duration that just ended.
-pub fn add_text_marker(label: &CStr, text: &str, duration: Duration) {
-    unsafe {
-        if let Some(ref hooks) = PROFILER_HOOKS {
-            hooks.add_text_marker(label, text, duration);
+impl Profiler {
+    pub fn new() -> Self {
+        let to_ms_scale = 1.0 / 1000000.0;
+        Profiler {
+            draw_state: DrawState {
+                x_left: 0.0,
+                y_left: 0.0,
+                x_right: 0.0,
+                y_right: 0.0,
+            },
+            backend_graph: ProfileGraph::new(600, to_ms_scale, "Backend:", "ms"),
+            renderer_graph: ProfileGraph::new(600, to_ms_scale, "Renderer:", "ms"),
+            gpu_graph: ProfileGraph::new(600, to_ms_scale, "GPU:", "ms"),
+            ipc_graph: ProfileGraph::new(600, to_ms_scale, "IPC:", "ms"),
+            display_list_build_graph: ProfileGraph::new(600, to_ms_scale, "DisplayList build", "ms"),
+            scene_build_graph: ProfileGraph::new(600, to_ms_scale, "Scene build:", "ms"),
+            blob_raster_graph: ProfileGraph::new(600, 1.0, "Rasterized blob pixels:", "px"),
+            gpu_frames: GpuFrameCollection::new(),
+            backend_time: AverageTimeProfileCounter::new(
+                "Backend:", false,
+                Some(expected::AVG_BACKEND_CPU_TIME),
+                Some(expected::MAX_BACKEND_CPU_TIME),
+            ),
+            renderer_time: AverageTimeProfileCounter::new(
+                "Renderer:", false,
+                Some(expected::AVG_RENDERER_CPU_TIME),
+                Some(expected::MAX_RENDERER_CPU_TIME),
+            ),
+            ipc_time: AverageTimeProfileCounter::new(
+                "IPC:", false,
+                Some(expected::AVG_IPC_TIME),
+                Some(expected::MAX_IPC_TIME),
+            ),
+            gpu_time: AverageTimeProfileCounter::new(
+                "GPU:", false,
+                Some(expected::AVG_GPU_TIME),
+                Some(expected::MAX_GPU_TIME),
+            ),
+            cooldowns: Vec::new(),
         }
     }
-}
 
-/// Records a marker of the given duration that just ended.
-pub fn add_event_marker(label: &CStr) {
-    unsafe {
-        if let Some(ref hooks) = PROFILER_HOOKS {
-            hooks.event_marker(label);
-        }
-    }
-}
+    // If we have an array of "cooldown" counters, then only display profiles that
+    // are out of the ordinary and keep displaying them until the cooldown is over.
+    fn draw_counters<T: ProfileCounter + ?Sized>(
+        counters: &[&T],
+        mut cooldowns: Option<&mut [i32]>,
+        debug_renderer: &mut DebugRenderer,
+        left: bool,
+        draw_state: &mut DrawState,
+    ) {
+        let mut label_rect = Rect::zero();
+        let mut value_rect = Rect::zero();
+        let (mut current_x, mut current_y) = if left {
+            (draw_state.x_left, draw_state.y_left)
+        } else {
+            (draw_state.x_right, draw_state.y_right)
+        };
+        let mut color_index = 0;
+        let line_height = debug_renderer.line_height();
 
-/// Returns true if the current thread is being profiled.
-pub fn thread_is_being_profiled() -> bool {
-    unsafe {
-        PROFILER_HOOKS.map_or(false, |h| h.thread_is_being_profiled())
-    }
-}
+        let colors = [
+            // Regular values,
+            ColorU::new(255, 255, 255, 255),
+            ColorU::new(255, 255, 0, 255),
+            // Unexpected values,
+            ColorU::new(255, 80, 0, 255),
+            ColorU::new(255, 0, 0, 255),
+        ];
 
-impl ProfileScope {
-    /// Begin a new profile scope
-    pub fn new(name: &'static CStr) -> Self {
-        unsafe {
-            if let Some(ref hooks) = PROFILER_HOOKS {
-                hooks.begin_marker(name);
+        for (idx, counter) in counters.iter().enumerate() {
+            if let Some(cooldowns) = cooldowns.as_mut() {
+                if !counter.is_expected() {
+                    cooldowns[idx] = 40;
+                }
+                if cooldowns[idx] == 0 {
+                    continue;
+                }
             }
-        }
+            let rect = debug_renderer.add_text(
+                current_x,
+                current_y,
+                counter.description(),
+                colors[color_index],
+                None,
+            );
+            color_index = (color_index + 1) % 2;
 
-        ProfileScope {
-            name,
+            label_rect = label_rect.union(&rect);
+            current_y += line_height;
         }
-    }
-}
 
-impl Drop for ProfileScope {
-    fn drop(&mut self) {
-        unsafe {
-            if let Some(ref hooks) = PROFILER_HOOKS {
-                hooks.end_marker(self.name);
+        color_index = 0;
+        current_x = label_rect.origin.x + label_rect.size.width + 60.0;
+        current_y = if left { draw_state.y_left } else { draw_state.y_right };
+
+        for (idx, counter) in counters.iter().enumerate() {
+            let expected_offset = if counter.is_expected() || cooldowns.is_some() { 0 } else { 2 };
+            if let Some(cooldowns) = cooldowns.as_mut() {
+                if cooldowns[idx] > 0 {
+                    cooldowns[idx] -= 1;
+                } else {
+                    continue;
+                }
             }
-        }
-    }
-}
-
-/// A helper macro to define profile scopes.
-macro_rules! profile_marker {
-    ($string:expr) => {
-        let _scope = $crate::profiler::ProfileScope::new(cstr!($string));
-    };
-}
-
-#[derive(Debug, Clone)]
-pub struct GpuProfileTag {
-    pub label: &'static str,
-    pub color: ColorF,
-}
-
-/// Ranges of expected value for a profile counter.
-#[derive(Clone, Debug)]
-pub struct Expected<T> {
-    pub range: Option<Range<T>>,
-    pub avg: Option<Range<T>>,
-}
-
-impl<T> Expected<T> {
-     const fn none() -> Self {
-        Expected {
-            range: None,
-            avg: None,
-        }
-    }
-}
-
-const fn expected<T>(range: Range<T>) -> Expected<T> {
-    Expected {
-        range: Some(range),
-        avg: None,
-    }
-}
-
-impl Expected<f64> {
-    const fn avg(mut self, avg: Range<f64>) -> Self {
-        self.avg = Some(avg);
-        self
-    }
-}
-
-impl Expected<i64> {
-    const fn avg(mut self, avg: Range<i64>) -> Self {
-        self.avg = Some(avg);
-        self
-    }
-
-    fn into_float(self) -> Expected<f64> {
-        Expected {
-            range: match self.range {
-                Some(r) => Some(r.start as f64 .. r.end as f64),
-                None => None,
-            },
-            avg: match self.avg {
-                Some(r) => Some(r.start as f64 .. r.end as f64),
-                None => None,
-            },
-        }
-    }
-}
-
-pub struct CounterDescriptor {
-    pub name: &'static str,
-    pub unit: &'static str,
-    pub index: usize,
-    pub show_as: ShowAs,
-    pub expected: Expected<f64>,
-}
-
-#[derive(Debug)]
-pub struct Counter {
-    pub name: &'static str,
-    pub unit: &'static str,
-    pub show_as: ShowAs,
-    pub expected: Expected<f64>,
-
-    ///
-    value: f64,
-    /// Number of samples in the current time slice.
-    num_samples: u64,
-    /// Sum of the values recorded during the current time slice.
-    sum: f64,
-    /// The max value in in-progress time slice.
-    next_max: f64,
-    /// The max value of the previous time slice (displayed).
-    max: f64,
-    /// The average value of the previous time slice (displayed).
-    avg: f64,
-    /// Incremented when the counter changes.
-    change_indicator: u8,
-
-    /// Only used to check that the constants match the real index.
-    #[allow(dead_code)]
-    index: usize,
-
-    graph: Option<Graph>,
-}
+            let rect = debug_renderer.add_text(
+                current_x,
+                current_y,
+                &counter.value(),
+                colors[color_index + expected_offset],
+                None,
+            );
+            color_index = (color_index + 1) % 2;
 
-impl Counter {
-    pub fn new(descriptor: &CounterDescriptor) -> Self {
-        Counter {
-            name: descriptor.name,
-            unit: descriptor.unit,
-            show_as: descriptor.show_as,
-            expected: descriptor.expected.clone(),
-            index: descriptor.index,
-            value: std::f64::NAN,
-            num_samples: 0,
-            sum: 0.0,
-            next_max: 0.0,
-            max: 0.0,
-            avg: 0.0,
-            change_indicator: 0,
-            graph: None,
+            value_rect = value_rect.union(&rect);
+            current_y += line_height;
         }
-    }
-    pub fn set_f64(&mut self, val: f64) {
-        self.value = val;
-    }
-
-    pub fn set<T>(&mut self, val: T) where T: Into<f64> {
-        self.set_f64(val.into());
-    }
 
-    pub fn get(&self) -> Option<f64> {
-        if self.value.is_finite() {
-            Some(self.value)
+        let total_rect = label_rect.union(&value_rect).inflate(10.0, 10.0);
+        debug_renderer.add_quad(
+            total_rect.origin.x,
+            total_rect.origin.y,
+            total_rect.origin.x + total_rect.size.width,
+            total_rect.origin.y + total_rect.size.height,
+            ColorF::new(0.1, 0.1, 0.1, 0.8).into(),
+            ColorF::new(0.2, 0.2, 0.2, 0.8).into(),
+        );
+        let new_y = total_rect.origin.y + total_rect.size.height + 30.0;
+        if left {
+            draw_state.y_left = new_y;
         } else {
-            None
+            draw_state.y_right = new_y;
         }
     }
 
-    pub fn write_value(&self, output: &mut String) {
-        match self.show_as {
-            ShowAs::Float => {
-                set_text!(output, "{:.2} {} (max: {:.2})", self.avg, self.unit, self.max);
-            }
-            ShowAs::Int => {
-                set_text!(output, "{:.0} {} (max: {:.0})", self.avg.round(), self.unit, self.max.round());
-            }
-        }
-    }
-
-    pub fn enable_graph(&mut self, max_samples: usize) {
-        if self.graph.is_some() {
-            return;
-        }
-
-        self.graph = Some(Graph::new(max_samples));
-    }
-
-    pub fn disable_graph(&mut self) {
-        self.graph = None;
-    }
-
-    pub fn is_unexpected_value(&self, value: f64) -> bool {
-        if let Some(range) = &self.expected.range {
-            return value.is_finite() && value >= range.end;
-        }
-
-        false
-    }
+    fn draw_bar(
+        &mut self,
+        label: &str,
+        label_color: ColorU,
+        counters: &[(ColorU, &AverageIntProfileCounter)],
+        debug_renderer: &mut DebugRenderer,
+    ) -> default::Rect<f32> {
+        let mut rect = debug_renderer.add_text(
+            self.draw_state.x_left,
+            self.draw_state.y_left,
+            label,
+            label_color,
+            None,
+        );
 
-    pub fn has_unexpected_value(&self) -> bool {
-        self.is_unexpected_value(self.value)
-    }
+        let x_base = rect.origin.x + rect.size.width + 10.0;
+        let height = debug_renderer.line_height();
+        let width = (self.draw_state.x_right - 30.0 - x_base).max(0.0);
+        let total_value = counters.last().unwrap().1.get();
+        let scale = width / total_value as f32;
+        let mut x_current = x_base;
 
-    pub fn has_unexpected_avg_max(&self) -> bool {
-        if let Some(range) = &self.expected.range {
-            if self.max.is_finite() && self.max >= range.end {
-                return true;
-            }
+        for &(color, counter) in counters {
+            let x_stop = x_base + counter.get() as f32 * scale;
+            debug_renderer.add_quad(
+                x_current,
+                rect.origin.y,
+                x_stop,
+                rect.origin.y + height,
+                color,
+                color,
+            );
+            x_current = x_stop;
         }
 
-        if let Some(range) = &self.expected.avg {
-            if self.avg < range.start || self.avg >= range.end {
-                return true;
-            }
-        }
+        self.draw_state.y_left += height;
 
-        false
+        rect.size.width += width + 10.0;
+        rect
     }
 
-    fn update(&mut self, update_avg: bool) {
-        let updated = self.value.is_finite();
-        if updated {
-            self.next_max = self.next_max.max(self.value);
-            self.sum += self.value;
-            self.num_samples += 1;
-            self.change_indicator = (self.change_indicator + 1) % 15;
-        }
-
-        if let Some(graph) = &mut self.graph {
-            graph.set(self.value);
-        }
-
-        self.value = std::f64::NAN;
-
-        if update_avg && self.num_samples > 0 {
-            self.avg = self.sum / self.num_samples as f64;
-            self.max = self.next_max;
-            self.sum = 0.0;
-            self.num_samples = 0;
-            self.next_max = std::f64::MIN;
-        }
-    }
-}
+    fn draw_gpu_cache_bars(
+        &mut self,
+        counters: &GpuCacheProfileCounters,
+        debug_renderer: &mut DebugRenderer,
+    ) {
+        let color_updated = ColorU::new(0xFF, 0, 0, 0xFF);
+        let color_free = ColorU::new(0, 0, 0xFF, 0xFF);
+        let color_saved = ColorU::new(0, 0xFF, 0, 0xFF);
 
-#[derive(Copy, Clone, Debug)]
-pub enum Event {
-    Start(f64),
-    Value(f64),
-    None,
-}
+        let mut requested_blocks = AverageIntProfileCounter::new("", None, None);
+        requested_blocks.set(counters.updated_blocks.get() + counters.saved_blocks.get());
 
-// std::convert::From/TryFrom can't deal with integer to f64 so we roll our own...
-pub trait EventValue {
-    fn into_f64(self) -> f64;
-}
+        let mut total_blocks = AverageIntProfileCounter::new("", None, None);
+        total_blocks.set(counters.allocated_rows.get() * MAX_VERTEX_TEXTURE_WIDTH);
 
-impl EventValue for f64 { fn into_f64(self) -> f64 { self } }
-impl EventValue for f32 { fn into_f64(self) -> f64 { self as f64 } }
-impl EventValue for u32 { fn into_f64(self) -> f64 { self as f64 } }
-impl EventValue for i32 { fn into_f64(self) -> f64 { self as f64 } }
-impl EventValue for u64 { fn into_f64(self) -> f64 { self as f64 } }
-impl EventValue for usize { fn into_f64(self) -> f64 { self as f64 } }
-
-/// A container for profiling information that moves along the rendering pipeline
-/// and is handed off to the profiler at the end.
-pub struct TransactionProfile {
-    pub events: Vec<Event>,
-}
+        let rect0 = self.draw_bar(
+            &format!("GPU cache rows ({}):", counters.allocated_rows.get()),
+            ColorU::new(0xFF, 0xFF, 0xFF, 0xFF),
+            &[
+                (color_updated, &counters.updated_rows),
+                (color_free, &counters.allocated_rows),
+            ],
+            debug_renderer,
+        );
 
-impl TransactionProfile {
-    pub fn new() -> Self {
-        TransactionProfile {
-            events: vec![Event::None; NUM_PROFILER_EVENTS],
-        }
-    }
+        let rect1 = self.draw_bar(
+            "GPU cache blocks",
+            ColorU::new(0xFF, 0xFF, 0, 0xFF),
+            &[
+                (color_updated, &counters.updated_blocks),
+                (color_saved, &requested_blocks),
+                (color_free, &counters.allocated_blocks),
+                (ColorU::new(0, 0, 0, 0xFF), &total_blocks),
+            ],
+            debug_renderer,
+        );
 
-    pub fn start_time(&mut self, id: usize) {
-        let ms = ns_to_ms(precise_time_ns());
-        self.events[id] = Event::Start(ms);
-    }
+        let total_rect = rect0.union(&rect1).inflate(10.0, 10.0);
+        debug_renderer.add_quad(
+            total_rect.origin.x,
+            total_rect.origin.y,
+            total_rect.origin.x + total_rect.size.width,
+            total_rect.origin.y + total_rect.size.height,
+            ColorF::new(0.1, 0.1, 0.1, 0.8).into(),
+            ColorF::new(0.2, 0.2, 0.2, 0.8).into(),
+        );
 
-    pub fn end_time(&mut self, id: usize) -> f64 {
-        self.end_time_if_started(id).unwrap()
+        self.draw_state.y_left = total_rect.origin.y + total_rect.size.height + 30.0;
     }
 
-    /// Similar to end_time, but doesn't panic if not matched with start_time.
-    pub fn end_time_if_started(&mut self, id: usize) -> Option<f64> {
-        if let Event::Start(start) = self.events[id] {
-            let time = ns_to_ms(precise_time_ns()) - start;
-            self.events[id] = Event::Value(time);
-
-            Some(time)
-        } else {
-            None
-        }
-    }
+    fn draw_frame_bars(
+        &mut self,
+        counters: &FrameProfileCounters,
+        debug_renderer: &mut DebugRenderer,
+    ) {
+        let rect0 = self.draw_bar(
+            &format!("primitives ({}):", counters.total_primitives.get()),
+            ColorU::new(0xFF, 0xFF, 0xFF, 0xFF),
+            &[
+                (ColorU::new(0, 0, 0xFF, 0xFF), &counters.visible_primitives),
+                (ColorU::new(0, 0, 0, 0xFF), &counters.total_primitives),
+            ],
+            debug_renderer,
+        );
 
-    pub fn set<T>(&mut self, id: usize, value: T) where T: EventValue {
-        self.set_f64(id, value.into_f64());
-    }
+        let rect1 = self.draw_bar(
+            &format!("GPU targets ({}):", &counters.targets_used.get()),
+            ColorU::new(0xFF, 0xFF, 0, 0xFF),
+            &[
+                (ColorU::new(0, 0, 0xFF, 0xFF), &counters.targets_created),
+                (ColorU::new(0xFF, 0, 0, 0xFF), &counters.targets_changed),
+                (ColorU::new(0, 0xFF, 0, 0xFF), &counters.targets_used),
+            ],
+            debug_renderer,
+        );
 
+        let total_rect = rect0.union(&rect1).inflate(10.0, 10.0);
+        debug_renderer.add_quad(
+            total_rect.origin.x,
+            total_rect.origin.y,
+            total_rect.origin.x + total_rect.size.width,
+            total_rect.origin.y + total_rect.size.height,
+            ColorF::new(0.1, 0.1, 0.1, 0.8).into(),
+            ColorF::new(0.2, 0.2, 0.2, 0.8).into(),
+        );
 
-    pub fn set_f64(&mut self, id: usize, value: f64) {
-        self.events[id] = Event::Value(value);
+        self.draw_state.y_left = total_rect.origin.y + total_rect.size.height + 30.0;
     }
 
-    pub fn get(&self, id: usize) -> Option<f64> {
-        if let Event::Value(val) = self.events[id] {
-            Some(val)
-        } else {
-            None
-        }
+    fn draw_compact_profile(
+        &mut self,
+        backend_profile: &BackendProfileCounters,
+        renderer_profile: &RendererProfileCounters,
+        debug_renderer: &mut DebugRenderer,
+    ) {
+        Profiler::draw_counters(
+            &[
+                &renderer_profile.frame_time as &dyn ProfileCounter,
+                &renderer_profile.color_passes,
+                &renderer_profile.alpha_passes,
+                &renderer_profile.draw_calls,
+                &renderer_profile.vertices,
+                &renderer_profile.rendered_picture_cache_tiles,
+                &renderer_profile.texture_data_uploaded,
+                &backend_profile.resources.content_slices,
+                &self.ipc_time,
+                &self.backend_time,
+                &self.renderer_time,
+                &self.gpu_time,
+            ],
+            None,
+            debug_renderer,
+            true,
+            &mut self.draw_state,
+        );
     }
 
-    pub fn get_or(&self, id: usize, or: f64) -> f64 {
-        self.get(id).unwrap_or(or)
-    }
+    fn draw_full_profile(
+        &mut self,
+        frame_profiles: &[FrameProfileCounters],
+        backend_profile: &BackendProfileCounters,
+        renderer_profile: &RendererProfileCounters,
+        renderer_timers: &mut RendererProfileTimers,
+        gpu_samplers: &[GpuSampler<GpuProfileTag>],
+        screen_fraction: f32,
+        debug_renderer: &mut DebugRenderer,
+    ) {
+        Profiler::draw_counters(
+            &[
+                &renderer_profile.frame_time as &dyn ProfileCounter,
+                &renderer_profile.frame_counter,
+                &renderer_profile.color_passes,
+                &renderer_profile.alpha_passes,
+                &renderer_profile.rendered_picture_cache_tiles,
+                &renderer_profile.total_picture_cache_tiles,
+                &renderer_profile.texture_data_uploaded,
+                &backend_profile.resources.content_slices,
+                &backend_profile.resources.texture_cache.shared_bytes,
+                &backend_profile.resources.texture_cache.standalone_bytes,
+            ],
+            None,
+            debug_renderer,
+            true,
+            &mut self.draw_state
+        );
 
-    pub fn add<T>(&mut self, id: usize, n: T) where T: EventValue {
-        let n = n.into_f64();
+        self.draw_gpu_cache_bars(
+            &backend_profile.resources.gpu_cache,
+            debug_renderer,
+        );
 
-        let evt = &mut self.events[id];
+        Profiler::draw_counters(
+            &[
+                &backend_profile.resources.font_templates,
+                &backend_profile.resources.image_templates,
+            ],
+            None,
+            debug_renderer,
+            true,
+            &mut self.draw_state
+        );
 
-        let val = match *evt {
-            Event::Value(v) => v + n,
-            Event::None => n,
-            Event::Start(..) => { panic!(); }
-        };
+        backend_profile.intern.draw(debug_renderer, &mut self.draw_state);
 
-        *evt = Event::Value(val);
-    }
+        Profiler::draw_counters(
+            &[
+                &backend_profile.resources.texture_cache.pages_alpha8_linear,
+                &backend_profile.resources.texture_cache.pages_color8_linear,
+                &backend_profile.resources.texture_cache.pages_color8_nearest,
+                &backend_profile.txn.display_lists,
+            ],
+            None,
+            debug_renderer,
+            true,
+            &mut self.draw_state
+        );
 
-    pub fn inc(&mut self, id: usize) {
-        self.add(id, 1.0);
-    }
+        Profiler::draw_counters(
+            &[
+                &backend_profile.txn.display_list_build_time,
+                &backend_profile.txn.scene_build_time,
+                &backend_profile.txn.content_send_time,
+                &backend_profile.txn.api_send_time,
+                &backend_profile.txn.total_send_time,
+            ],
+            None,
+            debug_renderer,
+            true,
+            &mut self.draw_state
+        );
 
-    pub fn take(&mut self) -> Self {
-        TransactionProfile {
-            events: std::mem::take(&mut self.events),
+        for frame_profile in frame_profiles {
+            self.draw_frame_bars(frame_profile, debug_renderer);
         }
-    }
 
-    pub fn take_and_reset(&mut self) -> Self {
-        let events = std::mem::take(&mut self.events);
-
-        *self = TransactionProfile::new();
+        Profiler::draw_counters(
+            &[&renderer_profile.draw_calls, &renderer_profile.vertices],
+            None,
+            debug_renderer,
+            true,
+            &mut self.draw_state
+        );
 
-        TransactionProfile { events }
-    }
+        Profiler::draw_counters(
+            &[
+                &backend_profile.total_time,
+                &renderer_timers.cpu_time,
+                &renderer_timers.gpu_graph,
+            ],
+            None,
+            debug_renderer,
+            false,
+            &mut self.draw_state
+        );
 
-    pub fn merge(&mut self, other: &mut Self) {
-        for i in 0..self.events.len() {
-            match (self.events[i], other.events[i]) {
-                (Event::Value(v1), Event::Value(v2)) => {
-                    self.events[i] = Event::Value(v1.max(v2));
-                }
-                (Event::Value(_), _) => {}
-                (_, Event::Value(v2)) => {
-                    self.events[i] = Event::Value(v2);
-                }
-                (Event::None, evt) => {
-                    self.events[i] = evt;
+        if !gpu_samplers.is_empty() {
+            let mut samplers = Vec::<PercentageProfileCounter>::new();
+            // Gathering unique GPU samplers. This has O(N^2) complexity,
+            // but we only have a few samplers per target.
+            let mut total = 0.0;
+            for sampler in gpu_samplers {
+                let value = sampler.count as f32 * screen_fraction;
+                total += value;
+                match samplers.iter().position(|s| {
+                    s.description as *const _ == sampler.tag.label as *const _
+                }) {
+                    Some(pos) => samplers[pos].value += value,
+                    None => samplers.push(PercentageProfileCounter {
+                        description: sampler.tag.label,
+                        value,
+                    }),
                 }
-                (Event::Start(..), Event::Start(s)) => {
-                    self.events[i] = Event::Start(s);
-                }
-                _=> {}
             }
-            other.events[i] = Event::None;
+            samplers.push(PercentageProfileCounter {
+                description: "Total",
+                value: total,
+            });
+            let samplers: Vec<&dyn ProfileCounter> = samplers.iter().map(|sampler| {
+                sampler as &dyn ProfileCounter
+            }).collect();
+            Profiler::draw_counters(
+                &samplers,
+                None,
+                debug_renderer,
+                false,
+                &mut self.draw_state,
+            );
         }
-    }
 
-    pub fn clear(&mut self) {
-        for evt in &mut self.events {
-            *evt = Event::None;
-        }
+        let rect =
+            self.backend_graph
+                .draw_graph(self.draw_state.x_right, self.draw_state.y_right, "CPU (backend)", debug_renderer);
+        self.draw_state.y_right += rect.size.height + PROFILE_PADDING;
+        let rect = self.renderer_graph.draw_graph(
+            self.draw_state.x_right,
+            self.draw_state.y_right,
+            "CPU (renderer)",
+            debug_renderer,
+        );
+        self.draw_state.y_right += rect.size.height + PROFILE_PADDING;
+        let rect =
+            self.ipc_graph
+                .draw_graph(self.draw_state.x_right, self.draw_state.y_right, "DisplayList IPC", debug_renderer);
+        self.draw_state.y_right += rect.size.height + PROFILE_PADDING;
+
+        let rect = self.display_list_build_graph
+            .draw_graph(self.draw_state.x_right, self.draw_state.y_right, "DisplayList build", debug_renderer);
+        self.draw_state.y_right += rect.size.height + PROFILE_PADDING;
+
+        let rect = self.scene_build_graph
+            .draw_graph(self.draw_state.x_right, self.draw_state.y_right, "Scene build", debug_renderer);
+        self.draw_state.y_right += rect.size.height + PROFILE_PADDING;
+
+        let rect = self.gpu_graph
+            .draw_graph(self.draw_state.x_right, self.draw_state.y_right, "GPU", debug_renderer);
+        self.draw_state.y_right += rect.size.height + PROFILE_PADDING;
+
+        let rect = self.blob_raster_graph
+            .draw_graph(self.draw_state.x_right, self.draw_state.y_right, "Blob pixels", debug_renderer);
+        self.draw_state.y_right += rect.size.height + PROFILE_PADDING;
+
+        let rect = self.gpu_frames
+            .draw(self.draw_state.x_left, f32::max(self.draw_state.y_left, self.draw_state.y_right), debug_renderer);
+        self.draw_state.y_right += rect.size.height + PROFILE_PADDING;
     }
-}
 
-#[derive(Debug)]
-pub struct GraphStats {
-    pub min: f64,
-    pub avg: f64,
-    pub max: f64,
-    pub sum: f64,
-    pub samples: usize,
-}
-
-#[derive(Debug)]
-pub struct Graph {
-    values: VecDeque<f64>,
-}
+    fn draw_smart_profile(
+        &mut self,
+        backend_profile: &BackendProfileCounters,
+        renderer_profile: &RendererProfileCounters,
+        debug_renderer: &mut DebugRenderer,
+    ) {
+        while self.cooldowns.len() < 18 {
+            self.cooldowns.push(0);
+        }
 
-impl Graph {
-    fn new(max_samples: usize) -> Self {
-        let mut values = VecDeque::new();
-        values.reserve(max_samples);
+        // Always show the fps counter.
+        Profiler::draw_counters(
+            &[
+                &renderer_profile.frame_time,
+            ],
+            None,
+            debug_renderer,
+            true,
+            &mut self.draw_state,
+        );
 
-        Graph { values }
-    }
+        let mut start = 0;
+        let counters: &[&[&dyn ProfileCounter]] = &[
+            &[
+                &self.backend_time,
+                &self.renderer_time,
+                &self.gpu_time,
+            ],
+            &[
+                &renderer_profile.color_passes,
+                &renderer_profile.alpha_passes,
+                &renderer_profile.draw_calls,
+                &renderer_profile.vertices,
+                &renderer_profile.rendered_picture_cache_tiles,
+                &renderer_profile.total_picture_cache_tiles,
+            ],
+            &[
+                &backend_profile.resources.gpu_cache.allocated_rows,
+                &backend_profile.resources.gpu_cache.updated_rows,
+                &backend_profile.resources.gpu_cache.allocated_blocks,
+                &backend_profile.resources.gpu_cache.updated_blocks,
+                &backend_profile.resources.gpu_cache.saved_blocks,
+            ],
+            &[
+                &backend_profile.resources.image_templates,
+                &backend_profile.resources.font_templates,
+                &backend_profile.resources.texture_cache.rasterized_blob_pixels,
+                &backend_profile.txn.display_lists,
+            ],
+        ];
 
-    fn set(&mut self, val: f64) {
-        if self.values.len() == self.values.capacity() {
-            self.values.pop_back();
+        for group in counters {
+            let end = start + group.len();
+            Profiler::draw_counters(
+                &group[..],
+                Some(&mut self.cooldowns[start..end]),
+                debug_renderer,
+                true,
+                &mut self.draw_state,
+            );
+            start = end;
         }
-        self.values.push_front(val);
     }
 
-    pub fn stats(&self) -> GraphStats {
-        let mut stats = GraphStats {
-            min: f64::MAX,
-            avg: 0.0,
-            max: -f64::MAX,
-            sum: 0.0,
-            samples: 0,
-        };
-
-        let mut samples = 0;
-        for value in &self.values {
-            if value.is_finite() {
-                stats.min = stats.min.min(*value);
-                stats.max = stats.max.max(*value);
-                stats.sum += *value;
-                samples += 1;
-            }
+    pub fn draw_profile(
+        &mut self,
+        frame_profiles: &[FrameProfileCounters],
+        backend_profile: &BackendProfileCounters,
+        renderer_profile: &RendererProfileCounters,
+        renderer_timers: &mut RendererProfileTimers,
+        gpu_samplers: &[GpuSampler<GpuProfileTag>],
+        screen_fraction: f32,
+        debug_renderer: &mut DebugRenderer,
+        style: ProfileStyle,
+    ) {
+        self.draw_state.x_left = 20.0;
+        self.draw_state.y_left = 50.0;
+        self.draw_state.x_right = 450.0;
+        self.draw_state.y_right = 40.0;
+
+        let mut gpu_graph = 0;
+        let gpu_graphrs = mem::replace(&mut renderer_timers.gpu_samples, Vec::new());
+        for sample in &gpu_graphrs {
+            gpu_graph += sample.time_ns;
         }
-
-        if samples > 0 {
-            stats.avg = stats.sum / samples as f64;
-            stats.samples = samples;
+        renderer_timers.gpu_graph.set(gpu_graph);
+
+        self.backend_graph
+            .push(backend_profile.total_time.nanoseconds);
+        self.backend_time.set(backend_profile.total_time.nanoseconds);
+        self.renderer_graph
+            .push(renderer_timers.cpu_time.nanoseconds);
+        self.renderer_time.set(renderer_timers.cpu_time.nanoseconds);
+        self.ipc_graph
+            .push(backend_profile.txn.total_send_time.nanoseconds);
+        self.display_list_build_graph
+            .push(backend_profile.txn.display_list_build_time.nanoseconds);
+        self.scene_build_graph
+            .push(backend_profile.txn.scene_build_time.nanoseconds);
+        self.blob_raster_graph
+            .push(backend_profile.resources.texture_cache.rasterized_blob_pixels.size as u64);
+        self.ipc_time.set(backend_profile.txn.total_send_time.nanoseconds);
+        self.gpu_graph.push(gpu_graph);
+        self.gpu_time.set(gpu_graph);
+        self.gpu_frames.push(gpu_graph, gpu_graphrs);
+
+        match style {
+            ProfileStyle::Full => {
+                self.draw_full_profile(
+                    frame_profiles,
+                    backend_profile,
+                    renderer_profile,
+                    renderer_timers,
+                    gpu_samplers,
+                    screen_fraction,
+                    debug_renderer,
+                );
+            }
+            ProfileStyle::Compact => {
+                self.draw_compact_profile(
+                    backend_profile,
+                    renderer_profile,
+                    debug_renderer,
+                );
+            }
+            ProfileStyle::Smart => {
+                self.draw_smart_profile(
+                    backend_profile,
+                    renderer_profile,
+                    debug_renderer,
+                );
+            }
         }
-
-        stats
     }
 }
 
-#[derive(Copy, Clone, Debug, PartialEq, Eq)]
-pub enum ShowAs {
-    Float,
-    Int,
-}
-
-struct ProfilerFrame {
-    total_time: u64,
-    samples: Vec<GpuTimer>,
-}
-
-struct ProfilerFrameCollection {
-    frames: VecDeque<ProfilerFrame>,
+pub struct ChangeIndicator {
+    counter: u32,
 }
 
-impl ProfilerFrameCollection {
-    fn new() -> Self {
-        ProfilerFrameCollection {
-            frames: VecDeque::new(),
+impl ChangeIndicator {
+    pub fn new() -> Self {
+        ChangeIndicator {
+            counter: 0
         }
     }
 
-    fn push(&mut self, frame: ProfilerFrame) {
-        if self.frames.len() == 20 {
-            self.frames.pop_back();
-        }
-        self.frames.push_front(frame);
+    pub fn changed(&mut self) {
+        self.counter = (self.counter + 1) % 15;
     }
-}
-
-impl From<FullFrameStats> for ProfilerFrame {
-  fn from(stats: FullFrameStats) -> ProfilerFrame {
-    let new_sample = |time, label, color| -> GpuTimer {
-      let tag = GpuProfileTag {
-        label,
-        color
-      };
-
-      let time_ns = ms_to_ns(time);
-
-      GpuTimer {
-        tag, time_ns
-      }
-    };
 
-    let samples = vec![
-      new_sample(stats.gecko_display_list_time, "Gecko DL", ColorF { r: 0.0, g: 1.0, b: 0.0, a: 1.0 }),
-      new_sample(stats.wr_display_list_time, "WR DL", ColorF { r: 0.0, g: 1.0, b: 1.0, a: 1.0 }),
-      new_sample(stats.scene_build_time, "Scene Build", ColorF { r: 1.0, g: 0.0, b: 1.0, a: 1.0 }),
-      new_sample(stats.frame_build_time, "Frame Build", ColorF { r: 1.0, g: 0.0, b: 0.0, a: 1.0 }),
-    ];
+    const WIDTH : f32 = 20.0;
+    const HEIGHT: f32 = 10.0;
 
-    ProfilerFrame {
-      total_time: ms_to_ns(stats.total()),
-      samples
+    pub fn width() -> f32 {
+      ChangeIndicator::WIDTH * 16.0
     }
-  }
-}
-
-pub fn ns_to_ms(ns: u64) -> f64 {
-    ns as f64 / 1_000_000.0
-}
 
-pub fn ms_to_ns(ms: f64) -> u64 {
-  (ms * 1_000_000.0) as u64
-}
-
-pub fn bytes_to_mb(bytes: usize) -> f64 {
-    bytes as f64 / 1_000_000.0
-}
+    pub fn draw(
+        &self,
+        x: f32, y: f32,
+        color: ColorU,
+        debug_renderer: &mut DebugRenderer
+    ) {
+        let margin = 0.0;
+        let tx = self.counter as f32 * ChangeIndicator::WIDTH;
+        debug_renderer.add_quad(
+            x - margin,
+            y - margin,
+            x + 15.0 * ChangeIndicator::WIDTH + margin,
+            y + ChangeIndicator::HEIGHT + margin,
+            ColorU::new(0, 0, 0, 150),
+            ColorU::new(0, 0, 0, 150),
+        );
 
-#[derive(Debug, PartialEq)]
-enum Item {
-    Counters(Vec<usize>),
-    Graph(usize),
-    ChangeIndicator(usize),
-    Fps,
-    GpuTimeQueries,
-    GpuCacheBars,
-    PaintPhaseGraph,
-    Text(String),
-    Space,
-    Column,
-    Row,
+        debug_renderer.add_quad(
+            x + tx,
+            y,
+            x + tx + ChangeIndicator::WIDTH,
+            y + ChangeIndicator::HEIGHT,
+            color,
+            ColorU::new(25, 25, 25, 255),
+        );
+    }
 }
-
diff --git a/third_party/webrender/webrender/src/rectangle_occlusion.rs b/third_party/webrender/webrender/src/rectangle_occlusion.rs
deleted file mode 100644
index a79e4ba0261..00000000000
--- a/third_party/webrender/webrender/src/rectangle_occlusion.rs
+++ /dev/null
@@ -1,208 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-//! A simple occlusion culling algorithm for axis-aligned rectangles.
-//!
-//! ## Output
-//!
-//! Occlusion culling results in two lists of rectangles:
-//! 
-//! - The opaque list should be rendered first. None of its rectangles overlap so order doesn't matter
-//!   within the opaque pass.
-//! - The non-opaque list (or alpha list) which should be rendered in back-to-front order after the opaque pass.
-//!
-//! The output has minimal overdraw (no overdraw at all for opaque items and as little as possible for alpha ones).
-//!
-//! ## Algorithm overview
-//!
-//! The occlusion culling algorithm works in front-to-back order, accumulating rectangle in opaque and non-opaque lists.
-//! Each time a rectangle is added, it is first tested against existing opaque rectangles and potentially split into visible
-//! sub-rectangles, or even discarded completely. The front-to-back order ensures that once a rectangle is added it does not
-//! have to be modified again, making the underlying data structure trivial (append-only).
-//!
-//! ## splitting
-//!
-//! Partially visible rectangles are split into up to 4 visible sub-rectangles by each intersecting occluder.
-//!
-//! ```ascii
-//!  +----------------------+       +----------------------+
-//!  | rectangle            |       |                      |
-//!  |                      |       |                      |
-//!  |  +-----------+       |       +--+-----------+-------+
-//!  |  |occluder   |       |  -->  |  |\\\\\\\\\\\|       |
-//!  |  +-----------+       |       +--+-----------+-------+
-//!  |                      |       |                      |
-//!  +----------------------+       +----------------------+
-//! ```
-//!
-//! In the example above the rectangle is split into 4 visible parts with the central occluded part left out.
-//!
-//! This implementation favors longer horizontal bands instead creating nine-patches to deal with the corners.
-//! The advantage is that it produces less rectangles which is good for the performance of the algorithm and
-//! for SWGL which likes long horizontal spans, however it would cause artifacts if the resulting rectangles
-//! were to be drawn with a non-axis-aligned transformation.
-//!
-//! ## Performance
-//!
-//! The cost of the algorithm grows with the number of opaque rectangle as each new rectangle is tested against
-//! all previously added opaque rectangles.
-//!
-//! Note that opaque rectangles can either be added as opaque or non-opaque. This means a trade-off between
-//! overdraw and number of rectangles can be explored to adjust performance: Small opaque rectangles, especially
-//! towards the front of the scene, could be added as non-opaque to avoid causing many splits while adding only 
-//! a small amount of overdraw.
-//!
-//! This implementation is intended to be used with a small number of (opaque) items. A similar implementation
-//! could use a spatial acceleration structure for opaque rectangles to perform better with a large amount of
-//! occluders.
-//!
-
-use euclid::point2;
-use smallvec::SmallVec;
-use api::units::*;
-
-/// A visible part of a rectangle after occlusion culling.
-#[derive(Debug, PartialEq)]
-pub struct Item {
-    pub rectangle: DeviceBox2D,
-    pub key: usize,
-}
-
-/// A builder that applies occlusion culling with rectangles provided in front-to-back order.
-pub struct FrontToBackBuilder {
-    opaque_items: Vec<Item>,
-    alpha_items: Vec<Item>,
-}
-
-impl FrontToBackBuilder {
-
-    /// Pre-allocating constructor.
-    pub fn with_capacity(opaque: usize, alpha: usize) -> Self {
-        FrontToBackBuilder {
-            opaque_items: Vec::with_capacity(opaque),
-            alpha_items: Vec::with_capacity(alpha),
-        }
-    }
-
-    /// Add a rectangle, potentially splitting it and discarding the occluded parts if any.
-    ///
-    /// Returns true the rectangle is at least partially visible.
-    pub fn add(&mut self, rect: &DeviceBox2D, is_opaque: bool, key: usize) -> bool {
-        let mut fragments: SmallVec<[DeviceBox2D; 16]> = SmallVec::new();
-        fragments.push(*rect);
-
-        for item in &self.opaque_items {
-            if fragments.is_empty() {
-                break;
-            }
-            if item.rectangle.intersects(rect) {
-                apply_occluder(&item.rectangle, &mut fragments);
-            }
-        }
-
-        let list = if is_opaque {
-            &mut self.opaque_items
-        } else {
-            &mut self.alpha_items
-        };
-
-        for rect in &fragments {
-            list.push(Item {
-                rectangle: *rect,
-                key,
-            });
-        }
-
-        !fragments.is_empty()
-    }
-
-    /// Returns true if the provided rect is at least partially visible, without adding it.
-    pub fn test(&self, rect: &DeviceBox2D) -> bool {
-        let mut fragments: SmallVec<[DeviceBox2D; 16]> = SmallVec::new();
-        fragments.push(*rect);
-
-        for item in &self.opaque_items {
-            if item.rectangle.intersects(rect) {
-                apply_occluder(&item.rectangle, &mut fragments);
-            }
-        }
-
-        !fragments.is_empty()
-    }
-
-    /// The visible opaque rectangles (front-to-back order).
-    pub fn opaque_items(&self) -> &[Item] {
-        &self.opaque_items
-    }
-
-    /// The visible non-opaque rectangles (front-to-back order).
-    pub fn alpha_items(&self) -> &[Item] {
-        &self.alpha_items
-    }
-}
-
-
-// Split out the parts of the rects in the provided vector
-fn apply_occluder(occluder: &DeviceBox2D, rects: &mut SmallVec<[DeviceBox2D; 16]>) {
-    // Iterate in reverse order so that we can push new rects at the back without
-    // visiting them;
-    let mut i = rects.len() - 1;
-    loop {
-        let r = rects[i];
-
-        if r.intersects(occluder) {
-            let top = r.min.y < occluder.min.y;
-            let bottom = r.max.y > occluder.max.y;
-            let left = r.min.x < occluder.min.x;
-            let right = r.max.x > occluder.max.x;
-
-            if top {
-                rects.push(DeviceBox2D {
-                    min: r.min,
-                    max: point2(r.max.x, occluder.min.y),
-                });
-            }
-
-            if bottom {
-                rects.push(DeviceBox2D {
-                    min: point2(r.min.x, occluder.max.y),
-                    max: r.max,
-                });
-            }
-
-            if left {
-                let min_y = r.min.y.max(occluder.min.y);
-                let max_y = r.max.y.min(occluder.max.y);
-                rects.push(DeviceBox2D {
-                    min: point2(r.min.x, min_y),
-                    max: point2(occluder.min.x, max_y),
-                });
-            }
-
-            if right {
-                let min_y = r.min.y.max(occluder.min.y);
-                let max_y = r.max.y.min(occluder.max.y);
-                rects.push(DeviceBox2D {
-                    min: point2(occluder.max.x, min_y),
-                    max: point2(r.max.x, max_y),
-                });
-            }
-
-            // Remove the original rectangle, replacing it with
-            // one of the new ones we just added, or popping it
-            // if it is the last item.
-            if i == rects.len() {
-                rects.pop();
-            } else {
-                rects.swap_remove(i);
-            }
-        }
-
-        if i == 0 {
-            break;
-        }
-
-        i -= 1;
-    }
-}
diff --git a/third_party/webrender/webrender/src/render_api.rs b/third_party/webrender/webrender/src/render_api.rs
deleted file mode 100644
index b2c8a64e888..00000000000
--- a/third_party/webrender/webrender/src/render_api.rs
+++ /dev/null
@@ -1,1447 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#![deny(missing_docs)]
-
-use std::cell::Cell;
-use std::fmt;
-use std::marker::PhantomData;
-use std::path::PathBuf;
-use std::sync::Arc;
-use std::u32;
-use api::HitTestFlags;
-use time::precise_time_ns;
-//use crate::api::peek_poke::PeekPoke;
-use crate::api::channel::{Sender, single_msg_channel, unbounded_channel};
-use crate::api::{ColorF, BuiltDisplayList, IdNamespace, ExternalScrollId};
-use crate::api::{SharedFontInstanceMap, FontKey, FontInstanceKey, NativeFontHandle, ZoomFactor};
-use crate::api::{BlobImageData, BlobImageKey, ImageData, ImageDescriptor, ImageKey, Epoch, QualitySettings};
-use crate::api::{BlobImageParams, BlobImageRequest, BlobImageResult, AsyncBlobImageRasterizer, BlobImageHandler};
-use crate::api::{DocumentId, PipelineId, PropertyBindingId, PropertyBindingKey, ExternalEvent};
-use crate::api::{HitTestResult, HitTesterRequest, ApiHitTester, PropertyValue, DynamicProperties};
-use crate::api::{ScrollClamping, TileSize, NotificationRequest, DebugFlags, ScrollNodeState};
-use crate::api::{GlyphDimensionRequest, GlyphIndexRequest, GlyphIndex, GlyphDimensions};
-use crate::api::{FontInstanceOptions, FontInstancePlatformOptions, FontVariation};
-use crate::api::DEFAULT_TILE_SIZE;
-use crate::api::units::*;
-use crate::api_resources::ApiResources;
-use crate::scene_builder_thread::{SceneBuilderRequest, SceneBuilderResult};
-use crate::intern::InterningMemoryReport;
-use crate::profiler::{self, TransactionProfile};
-
-#[repr(C)]
-#[derive(Clone, Copy, Debug)]
-#[cfg_attr(any(feature = "serde"), derive(Deserialize, Serialize))]
-struct ResourceId(pub u32);
-
-/// Update of a persistent resource in WebRender.
-///
-/// ResourceUpdate changes keep theirs effect across display list changes.
-#[derive(Clone)]
-#[cfg_attr(any(feature = "serde"), derive(Deserialize, Serialize))]
-pub enum ResourceUpdate {
-    /// See `AddImage`.
-    AddImage(AddImage),
-    /// See `UpdateImage`.
-    UpdateImage(UpdateImage),
-    /// Delete an existing image resource.
-    ///
-    /// It is invalid to continue referring to the image key in any display list
-    /// in the transaction that contains the `DeleteImage` message and subsequent
-    /// transactions.
-    DeleteImage(ImageKey),
-    /// See `AddBlobImage`.
-    AddBlobImage(AddBlobImage),
-    /// See `UpdateBlobImage`.
-    UpdateBlobImage(UpdateBlobImage),
-    /// Delete existing blob image resource.
-    DeleteBlobImage(BlobImageKey),
-    /// See `AddBlobImage::visible_area`.
-    SetBlobImageVisibleArea(BlobImageKey, DeviceIntRect),
-    /// See `AddFont`.
-    AddFont(AddFont),
-    /// Deletes an already existing font resource.
-    ///
-    /// It is invalid to continue referring to the font key in any display list
-    /// in the transaction that contains the `DeleteImage` message and subsequent
-    /// transactions.
-    DeleteFont(FontKey),
-    /// See `AddFontInstance`.
-    AddFontInstance(AddFontInstance),
-    /// Deletes an already existing font instance resource.
-    ///
-    /// It is invalid to continue referring to the font instance in any display
-    /// list in the transaction that contains the `DeleteImage` message and
-    /// subsequent transactions.
-    DeleteFontInstance(FontInstanceKey),
-}
-
-impl fmt::Debug for ResourceUpdate {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            ResourceUpdate::AddImage(ref i) => f.write_fmt(format_args!(
-                "ResourceUpdate::AddImage size({:?})",
-                &i.descriptor.size
-            )),
-            ResourceUpdate::UpdateImage(ref i) => f.write_fmt(format_args!(
-                "ResourceUpdate::UpdateImage size({:?})",
-                &i.descriptor.size
-            )),
-            ResourceUpdate::AddBlobImage(ref i) => f.write_fmt(format_args!(
-                "ResourceUFpdate::AddBlobImage size({:?})",
-                &i.descriptor.size
-            )),
-            ResourceUpdate::UpdateBlobImage(i) => f.write_fmt(format_args!(
-                "ResourceUpdate::UpdateBlobImage size({:?})",
-                &i.descriptor.size
-            )),
-            ResourceUpdate::DeleteImage(..) => f.write_str("ResourceUpdate::DeleteImage"),
-            ResourceUpdate::DeleteBlobImage(..) => f.write_str("ResourceUpdate::DeleteBlobImage"),
-            ResourceUpdate::SetBlobImageVisibleArea(..) => f.write_str("ResourceUpdate::SetBlobImageVisibleArea"),
-            ResourceUpdate::AddFont(..) => f.write_str("ResourceUpdate::AddFont"),
-            ResourceUpdate::DeleteFont(..) => f.write_str("ResourceUpdate::DeleteFont"),
-            ResourceUpdate::AddFontInstance(..) => f.write_str("ResourceUpdate::AddFontInstance"),
-            ResourceUpdate::DeleteFontInstance(..) => f.write_str("ResourceUpdate::DeleteFontInstance"),
-        }
-    }
-}
-
-/// Whether to generate a frame, and if so, an id that allows tracking this
-/// transaction through the various frame stages.
-#[derive(Clone, Debug)]
-pub enum GenerateFrame {
-    /// Generate a frame if something changed.
-    Yes {
-        /// An id that allows tracking the frame transaction through the various
-        /// frame stages. Specified by the caller of generate_frame().
-        id: u64,
-    },
-    /// Don't generate a frame even if something has changed.
-    No,
-}
-
-impl GenerateFrame {
-    ///
-    pub fn as_bool(&self) -> bool {
-        match self {
-            GenerateFrame::Yes { .. } => true,
-            GenerateFrame::No => false,
-        }
-    }
-
-    /// Return the frame ID, if a frame is generated.
-    pub fn id(&self) -> Option<u64> {
-        match self {
-            GenerateFrame::Yes { id } => Some(*id),
-            GenerateFrame::No => None,
-        }
-    }
-}
-
-/// A Transaction is a group of commands to apply atomically to a document.
-///
-/// This mechanism ensures that:
-///  - no other message can be interleaved between two commands that need to be applied together.
-///  - no redundant work is performed if two commands in the same transaction cause the scene or
-///    the frame to be rebuilt.
-pub struct Transaction {
-    /// Operations affecting the scene (applied before scene building).
-    scene_ops: Vec<SceneMsg>,
-    /// Operations affecting the generation of frames (applied after scene building).
-    frame_ops: Vec<FrameMsg>,
-
-    notifications: Vec<NotificationRequest>,
-
-    /// Persistent resource updates to apply as part of this transaction.
-    pub resource_updates: Vec<ResourceUpdate>,
-
-    /// True if the transaction needs the scene building thread's attention.
-    /// False for things that can skip the scene builder, like APZ changes and
-    /// async images.
-    ///
-    /// Before this `Transaction` is converted to a `TransactionMsg`, we look
-    /// over its contents and set this if we're doing anything the scene builder
-    /// needs to know about, so this is only a default.
-    use_scene_builder_thread: bool,
-
-    /// Whether to generate a frame, and if so, an id that allows tracking this
-    /// transaction through the various frame stages. Specified by the caller of
-    /// generate_frame().
-    generate_frame: GenerateFrame,
-
-    /// Set to true in order to force re-rendering even if WebRender can't internally
-    /// detect that something has changed.
-    pub invalidate_rendered_frame: bool,
-
-    low_priority: bool,
-}
-
-impl Transaction {
-    /// Constructor.
-    pub fn new() -> Self {
-        Transaction {
-            scene_ops: Vec::new(),
-            frame_ops: Vec::new(),
-            resource_updates: Vec::new(),
-            notifications: Vec::new(),
-            use_scene_builder_thread: true,
-            generate_frame: GenerateFrame::No,
-            invalidate_rendered_frame: false,
-            low_priority: false,
-        }
-    }
-
-    /// Marks this transaction to allow it to skip going through the scene builder
-    /// thread.
-    ///
-    /// This is useful to avoid jank in transaction associated with animated
-    /// property updates, panning and zooming.
-    ///
-    /// Note that transactions that skip the scene builder thread can race ahead of
-    /// transactions that don't skip it.
-    pub fn skip_scene_builder(&mut self) {
-        self.use_scene_builder_thread = false;
-    }
-
-    /// Marks this transaction to enforce going through the scene builder thread.
-    pub fn use_scene_builder_thread(&mut self) {
-        self.use_scene_builder_thread = true;
-    }
-
-    /// Returns true if the transaction has no effect.
-    pub fn is_empty(&self) -> bool {
-        !self.generate_frame.as_bool() &&
-            !self.invalidate_rendered_frame &&
-            self.scene_ops.is_empty() &&
-            self.frame_ops.is_empty() &&
-            self.resource_updates.is_empty() &&
-            self.notifications.is_empty()
-    }
-
-    /// Update a pipeline's epoch.
-    pub fn update_epoch(&mut self, pipeline_id: PipelineId, epoch: Epoch) {
-        // We track epochs before and after scene building.
-        // This one will be applied to the pending scene right away:
-        self.scene_ops.push(SceneMsg::UpdateEpoch(pipeline_id, epoch));
-        // And this one will be applied to the currently built scene at the end
-        // of the transaction (potentially long after the scene_ops one).
-        self.frame_ops.push(FrameMsg::UpdateEpoch(pipeline_id, epoch));
-        // We could avoid the duplication here by storing the epoch updates in a
-        // separate array and let the render backend schedule the updates at the
-        // proper times, but it wouldn't make things simpler.
-    }
-
-    /// Sets the root pipeline.
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// # use webrender::api::{PipelineId};
-    /// # use webrender::api::units::{DeviceIntSize};
-    /// # use webrender::render_api::{RenderApiSender, Transaction};
-    /// # fn example() {
-    /// let pipeline_id = PipelineId(0, 0);
-    /// let mut txn = Transaction::new();
-    /// txn.set_root_pipeline(pipeline_id);
-    /// # }
-    /// ```
-    pub fn set_root_pipeline(&mut self, pipeline_id: PipelineId) {
-        self.scene_ops.push(SceneMsg::SetRootPipeline(pipeline_id));
-    }
-
-    /// Removes data associated with a pipeline from the internal data structures.
-    /// If the specified `pipeline_id` is for the root pipeline, the root pipeline
-    /// is reset back to `None`.
-    pub fn remove_pipeline(&mut self, pipeline_id: PipelineId) {
-        self.scene_ops.push(SceneMsg::RemovePipeline(pipeline_id));
-    }
-
-    /// Supplies a new frame to WebRender.
-    ///
-    /// Non-blocking, it notifies a worker process which processes the display list.
-    ///
-    /// Note: Scrolling doesn't require an own Frame.
-    ///
-    /// Arguments:
-    ///
-    /// * `epoch`: The unique Frame ID, monotonically increasing.
-    /// * `background`: The background color of this pipeline.
-    /// * `viewport_size`: The size of the viewport for this frame.
-    /// * `pipeline_id`: The ID of the pipeline that is supplying this display list.
-    /// * `display_list`: The root Display list used in this frame.
-    /// * `preserve_frame_state`: If a previous frame exists which matches this pipeline
-    ///                           id, this setting determines if frame state (such as scrolling
-    ///                           position) should be preserved for this new display list.
-    pub fn set_display_list(
-        &mut self,
-        epoch: Epoch,
-        background: Option<ColorF>,
-        viewport_size: LayoutSize,
-        (pipeline_id, mut display_list): (PipelineId, BuiltDisplayList),
-        preserve_frame_state: bool,
-    ) {
-        display_list.set_send_time_ns(precise_time_ns());
-        self.scene_ops.push(
-            SceneMsg::SetDisplayList {
-                display_list,
-                epoch,
-                pipeline_id,
-                background,
-                viewport_size,
-                preserve_frame_state,
-            }
-        );
-    }
-
-    /// Add a set of persistent resource updates to apply as part of this transaction.
-    pub fn update_resources(&mut self, mut resources: Vec<ResourceUpdate>) {
-        self.resource_updates.append(&mut resources);
-    }
-
-    // Note: Gecko uses this to get notified when a transaction that contains
-    // potentially long blob rasterization or scene build is ready to be rendered.
-    // so that the tab-switching integration can react adequately when tab
-    // switching takes too long. For this use case when matters is that the
-    // notification doesn't fire before scene building and blob rasterization.
-
-    /// Trigger a notification at a certain stage of the rendering pipeline.
-    ///
-    /// Not that notification requests are skipped during serialization, so is is
-    /// best to use them for synchronization purposes and not for things that could
-    /// affect the WebRender's state.
-    pub fn notify(&mut self, event: NotificationRequest) {
-        self.notifications.push(event);
-    }
-
-    /// Setup the output region in the framebuffer for a given document.
-    pub fn set_document_view(
-        &mut self,
-        device_rect: DeviceIntRect,
-        device_pixel_ratio: f32,
-    ) {
-        assert!(device_pixel_ratio > 0.0);
-        window_size_sanity_check(device_rect.size);
-        self.scene_ops.push(
-            SceneMsg::SetDocumentView {
-                device_rect,
-                device_pixel_ratio,
-            },
-        );
-    }
-
-    /// Scrolls the node identified by the given external scroll id to the
-    /// given scroll position, relative to the pre-scrolled offset for the
-    /// scrolling layer. That is, providing an origin of (0,0) will reset
-    /// any WR-side scrolling and just render the display items at the
-    /// pre-scrolled offsets as provided in the display list. Larger `origin`
-    /// values will cause the layer to be scrolled further towards the end of
-    /// the scroll range.
-    /// If the ScrollClamping argument is set to clamp, the scroll position
-    /// is clamped to what WebRender understands to be the bounds of the
-    /// scroll range, based on the sizes of the scrollable content and the
-    /// scroll port.
-    pub fn scroll_node_with_id(
-        &mut self,
-        origin: LayoutPoint,
-        id: ExternalScrollId,
-        clamp: ScrollClamping,
-    ) {
-        self.frame_ops.push(FrameMsg::ScrollNodeWithId(origin, id, clamp));
-    }
-
-    /// Set the current quality / performance settings for this document.
-    pub fn set_quality_settings(&mut self, settings: QualitySettings) {
-        self.scene_ops.push(SceneMsg::SetQualitySettings { settings });
-    }
-
-    ///
-    pub fn set_page_zoom(&mut self, page_zoom: ZoomFactor) {
-        self.scene_ops.push(SceneMsg::SetPageZoom(page_zoom));
-    }
-
-    ///
-    pub fn set_pinch_zoom(&mut self, pinch_zoom: ZoomFactor) {
-        self.frame_ops.push(FrameMsg::SetPinchZoom(pinch_zoom));
-    }
-
-    ///
-    pub fn set_is_transform_async_zooming(&mut self, is_zooming: bool, animation_id: PropertyBindingId) {
-        self.frame_ops.push(FrameMsg::SetIsTransformAsyncZooming(is_zooming, animation_id));
-    }
-
-    ///
-    pub fn set_pan(&mut self, pan: DeviceIntPoint) {
-        self.frame_ops.push(FrameMsg::SetPan(pan));
-    }
-
-    /// Generate a new frame. When it's done and a RenderNotifier has been set
-    /// in `webrender::Renderer`, [new_frame_ready()][notifier] gets called.
-    /// Note that the notifier is called even if the frame generation was a
-    /// no-op; the arguments passed to `new_frame_ready` will provide information
-    /// as to when happened.
-    ///
-    /// [notifier]: trait.RenderNotifier.html#tymethod.new_frame_ready
-    pub fn generate_frame(&mut self, id: u64) {
-        self.generate_frame = GenerateFrame::Yes{ id };
-    }
-
-    /// Invalidate rendered frame. It ensure that frame will be rendered during
-    /// next frame generation. WebRender could skip frame rendering if there
-    /// is no update.
-    /// But there are cases that needs to force rendering.
-    ///  - Content of image is updated by reusing same ExternalImageId.
-    ///  - Platform requests it if pixels become stale (like wakeup from standby).
-    pub fn invalidate_rendered_frame(&mut self) {
-        self.invalidate_rendered_frame = true;
-    }
-
-    /// Supply a list of animated property bindings that should be used to resolve
-    /// bindings in the current display list.
-    pub fn update_dynamic_properties(&mut self, properties: DynamicProperties) {
-        self.frame_ops.push(FrameMsg::UpdateDynamicProperties(properties));
-    }
-
-    /// Add to the list of animated property bindings that should be used to
-    /// resolve bindings in the current display list. This is a convenience method
-    /// so the caller doesn't have to figure out all the dynamic properties before
-    /// setting them on the transaction but can do them incrementally.
-    pub fn append_dynamic_transform_properties(&mut self, transforms: Vec<PropertyValue<LayoutTransform>>) {
-        self.frame_ops.push(FrameMsg::AppendDynamicTransformProperties(transforms));
-    }
-
-    /// Consumes this object and just returns the frame ops.
-    pub fn get_frame_ops(self) -> Vec<FrameMsg> {
-        self.frame_ops
-    }
-
-    fn finalize(self, document_id: DocumentId) -> Box<TransactionMsg> {
-        Box::new(TransactionMsg {
-            document_id,
-            scene_ops: self.scene_ops,
-            frame_ops: self.frame_ops,
-            resource_updates: self.resource_updates,
-            notifications: self.notifications,
-            use_scene_builder_thread: self.use_scene_builder_thread,
-            generate_frame: self.generate_frame,
-            invalidate_rendered_frame: self.invalidate_rendered_frame,
-            low_priority: self.low_priority,
-            blob_rasterizer: None,
-            blob_requests: Vec::new(),
-            rasterized_blobs: Vec::new(),
-            profile: TransactionProfile::new(),
-        })
-    }
-
-    /// See `ResourceUpdate::AddImage`.
-    pub fn add_image(
-        &mut self,
-        key: ImageKey,
-        descriptor: ImageDescriptor,
-        data: ImageData,
-        tiling: Option<TileSize>,
-    ) {
-        self.resource_updates.push(ResourceUpdate::AddImage(AddImage {
-            key,
-            descriptor,
-            data,
-            tiling,
-        }));
-    }
-
-    /// See `ResourceUpdate::UpdateImage`.
-    pub fn update_image(
-        &mut self,
-        key: ImageKey,
-        descriptor: ImageDescriptor,
-        data: ImageData,
-        dirty_rect: &ImageDirtyRect,
-    ) {
-        self.resource_updates.push(ResourceUpdate::UpdateImage(UpdateImage {
-            key,
-            descriptor,
-            data,
-            dirty_rect: *dirty_rect,
-        }));
-    }
-
-    /// See `ResourceUpdate::DeleteImage`.
-    pub fn delete_image(&mut self, key: ImageKey) {
-        self.resource_updates.push(ResourceUpdate::DeleteImage(key));
-    }
-
-    /// See `ResourceUpdate::AddBlobImage`.
-    pub fn add_blob_image(
-        &mut self,
-        key: BlobImageKey,
-        descriptor: ImageDescriptor,
-        data: Arc<BlobImageData>,
-        visible_rect: DeviceIntRect,
-        tile_size: Option<TileSize>,
-    ) {
-        self.resource_updates.push(
-            ResourceUpdate::AddBlobImage(AddBlobImage {
-                key,
-                descriptor,
-                data,
-                visible_rect,
-                tile_size: tile_size.unwrap_or(DEFAULT_TILE_SIZE),
-            })
-        );
-    }
-
-    /// See `ResourceUpdate::UpdateBlobImage`.
-    pub fn update_blob_image(
-        &mut self,
-        key: BlobImageKey,
-        descriptor: ImageDescriptor,
-        data: Arc<BlobImageData>,
-        visible_rect: DeviceIntRect,
-        dirty_rect: &BlobDirtyRect,
-    ) {
-        self.resource_updates.push(
-            ResourceUpdate::UpdateBlobImage(UpdateBlobImage {
-                key,
-                descriptor,
-                data,
-                visible_rect,
-                dirty_rect: *dirty_rect,
-            })
-        );
-    }
-
-    /// See `ResourceUpdate::DeleteBlobImage`.
-    pub fn delete_blob_image(&mut self, key: BlobImageKey) {
-        self.resource_updates.push(ResourceUpdate::DeleteBlobImage(key));
-    }
-
-    /// See `ResourceUpdate::SetBlobImageVisibleArea`.
-    pub fn set_blob_image_visible_area(&mut self, key: BlobImageKey, area: DeviceIntRect) {
-        self.resource_updates.push(ResourceUpdate::SetBlobImageVisibleArea(key, area));
-    }
-
-    /// See `ResourceUpdate::AddFont`.
-    pub fn add_raw_font(&mut self, key: FontKey, bytes: Vec<u8>, index: u32) {
-        self.resource_updates
-            .push(ResourceUpdate::AddFont(AddFont::Raw(key, Arc::new(bytes), index)));
-    }
-
-    /// See `ResourceUpdate::AddFont`.
-    pub fn add_native_font(&mut self, key: FontKey, native_handle: NativeFontHandle) {
-        self.resource_updates
-            .push(ResourceUpdate::AddFont(AddFont::Native(key, native_handle)));
-    }
-
-    /// See `ResourceUpdate::DeleteFont`.
-    pub fn delete_font(&mut self, key: FontKey) {
-        self.resource_updates.push(ResourceUpdate::DeleteFont(key));
-    }
-
-    /// See `ResourceUpdate::AddFontInstance`.
-    pub fn add_font_instance(
-        &mut self,
-        key: FontInstanceKey,
-        font_key: FontKey,
-        glyph_size: f32,
-        options: Option<FontInstanceOptions>,
-        platform_options: Option<FontInstancePlatformOptions>,
-        variations: Vec<FontVariation>,
-    ) {
-        self.resource_updates
-            .push(ResourceUpdate::AddFontInstance(AddFontInstance {
-                key,
-                font_key,
-                glyph_size,
-                options,
-                platform_options,
-                variations,
-            }));
-    }
-
-    /// See `ResourceUpdate::DeleteFontInstance`.
-    pub fn delete_font_instance(&mut self, key: FontInstanceKey) {
-        self.resource_updates.push(ResourceUpdate::DeleteFontInstance(key));
-    }
-
-    /// A hint that this transaction can be processed at a lower priority. High-
-    /// priority transactions can jump ahead of regular-priority transactions,
-    /// but both high- and regular-priority transactions are processed in order
-    /// relative to other transactions of the same priority.
-    pub fn set_low_priority(&mut self, low_priority: bool) {
-        self.low_priority = low_priority;
-    }
-
-    /// Returns whether this transaction is marked as low priority.
-    pub fn is_low_priority(&self) -> bool {
-        self.low_priority
-    }
-}
-
-///
-pub struct DocumentTransaction {
-    ///
-    pub document_id: DocumentId,
-    ///
-    pub transaction: Transaction,
-}
-
-/// Represents a transaction in the format sent through the channel.
-pub struct TransactionMsg {
-    ///
-    pub document_id: DocumentId,
-    /// Changes that require re-building the scene.
-    pub scene_ops: Vec<SceneMsg>,
-    /// Changes to animated properties that do not require re-building the scene.
-    pub frame_ops: Vec<FrameMsg>,
-    /// Updates to resources that persist across display lists.
-    pub resource_updates: Vec<ResourceUpdate>,
-    /// Whether to trigger frame building and rendering if something has changed.
-    pub generate_frame: GenerateFrame,
-    /// Whether to force frame building and rendering even if no changes are internally
-    /// observed.
-    pub invalidate_rendered_frame: bool,
-    /// Whether to enforce that this transaction go through the scene builder.
-    pub use_scene_builder_thread: bool,
-    ///
-    pub low_priority: bool,
-
-    /// Handlers to notify at certain points of the pipeline.
-    pub notifications: Vec<NotificationRequest>,
-    ///
-    pub blob_rasterizer: Option<Box<dyn AsyncBlobImageRasterizer>>,
-    ///
-    pub blob_requests: Vec<BlobImageParams>,
-    ///
-    pub rasterized_blobs: Vec<(BlobImageRequest, BlobImageResult)>,
-    /// Collect various data along the rendering pipeline to display it in the embedded profiler.
-    pub profile: TransactionProfile,
-}
-
-impl fmt::Debug for TransactionMsg {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        writeln!(f, "threaded={}, genframe={:?}, invalidate={}, low_priority={}",
-                        self.use_scene_builder_thread,
-                        self.generate_frame,
-                        self.invalidate_rendered_frame,
-                        self.low_priority,
-                    ).unwrap();
-        for scene_op in &self.scene_ops {
-            writeln!(f, "\t\t{:?}", scene_op).unwrap();
-        }
-
-        for frame_op in &self.frame_ops {
-            writeln!(f, "\t\t{:?}", frame_op).unwrap();
-        }
-
-        for resource_update in &self.resource_updates {
-            writeln!(f, "\t\t{:?}", resource_update).unwrap();
-        }
-        Ok(())
-    }
-}
-
-impl TransactionMsg {
-    /// Returns true if this transaction has no effect.
-    pub fn is_empty(&self) -> bool {
-        !self.generate_frame.as_bool() &&
-            !self.invalidate_rendered_frame &&
-            self.scene_ops.is_empty() &&
-            self.frame_ops.is_empty() &&
-            self.resource_updates.is_empty() &&
-            self.notifications.is_empty()
-    }
-}
-
-/// Creates an image resource with provided parameters.
-///
-/// Must be matched with a `DeleteImage` at some point to prevent memory leaks.
-#[derive(Clone)]
-#[cfg_attr(any(feature = "serde"), derive(Deserialize, Serialize))]
-pub struct AddImage {
-    /// A key to identify the image resource.
-    pub key: ImageKey,
-    /// Properties of the image.
-    pub descriptor: ImageDescriptor,
-    /// The pixels of the image.
-    pub data: ImageData,
-    /// An optional tiling scheme to apply when storing the image's data
-    /// on the GPU. Applies to both width and heights of the tiles.
-    ///
-    /// Note that WebRender may internally chose to tile large images
-    /// even if this member is set to `None`.
-    pub tiling: Option<TileSize>,
-}
-
-/// Updates an already existing image resource.
-#[derive(Clone)]
-#[cfg_attr(any(feature = "serde"), derive(Deserialize, Serialize))]
-pub struct UpdateImage {
-    /// The key identfying the image resource to update.
-    pub key: ImageKey,
-    /// Properties of the image.
-    pub descriptor: ImageDescriptor,
-    /// The pixels of the image.
-    pub data: ImageData,
-    /// An optional dirty rect that lets WebRender optimize the amount of
-    /// data to transfer to the GPU.
-    ///
-    /// The data provided must still represent the entire image.
-    pub dirty_rect: ImageDirtyRect,
-}
-
-/// Creates a blob-image resource with provided parameters.
-///
-/// Must be matched with a `DeleteImage` at some point to prevent memory leaks.
-#[derive(Clone)]
-#[cfg_attr(any(feature = "serde"), derive(Deserialize, Serialize))]
-pub struct AddBlobImage {
-    /// A key to identify the blob-image resource.
-    pub key: BlobImageKey,
-    /// Properties of the image.
-    pub descriptor: ImageDescriptor,
-    /// The blob-image's serialized commands.
-    pub data: Arc<BlobImageData>,
-    /// The portion of the plane in the blob-image's internal coordinate
-    /// system that is stretched to fill the image display item.
-    ///
-    /// Unlike regular images, blob images are not limited in size. The
-    /// top-left corner of their internal coordinate system is also not
-    /// necessary at (0, 0).
-    /// This means that blob images can be updated to insert/remove content
-    /// in any direction to support panning and zooming.
-    pub visible_rect: DeviceIntRect,
-    /// The blob image's tile size to apply when rasterizing the blob-image
-    /// and when storing its rasterized data on the GPU.
-    /// Applies to both width and heights of the tiles.
-    ///
-    /// All blob images are tiled.
-    pub tile_size: TileSize,
-}
-
-/// Updates an already existing blob-image resource.
-#[derive(Clone)]
-#[cfg_attr(any(feature = "serde"), derive(Deserialize, Serialize))]
-pub struct UpdateBlobImage {
-    /// The key identfying the blob-image resource to update.
-    pub key: BlobImageKey,
-    /// Properties of the image.
-    pub descriptor: ImageDescriptor,
-    /// The blob-image's serialized commands.
-    pub data: Arc<BlobImageData>,
-    /// See `AddBlobImage::visible_rect`.
-    pub visible_rect: DeviceIntRect,
-    /// An optional dirty rect that lets WebRender optimize the amount of
-    /// data to to rasterize and transfer to the GPU.
-    pub dirty_rect: BlobDirtyRect,
-}
-
-/// Creates a font resource.
-///
-/// Must be matched with a corresponding `ResourceUpdate::DeleteFont` at some point to prevent
-/// memory leaks.
-#[derive(Clone)]
-#[cfg_attr(any(feature = "serde"), derive(Deserialize, Serialize))]
-pub enum AddFont {
-    ///
-    Raw(FontKey, Arc<Vec<u8>>, u32),
-    ///
-    Native(FontKey, NativeFontHandle),
-}
-
-/// Creates a font instance resource.
-///
-/// Must be matched with a corresponding `DeleteFontInstance` at some point
-/// to prevent memory leaks.
-#[derive(Clone)]
-#[cfg_attr(any(feature = "serde"), derive(Deserialize, Serialize))]
-pub struct AddFontInstance {
-    /// A key to identify the font instance.
-    pub key: FontInstanceKey,
-    /// The font resource's key.
-    pub font_key: FontKey,
-    /// Glyph size in app units.
-    pub glyph_size: f32,
-    ///
-    pub options: Option<FontInstanceOptions>,
-    ///
-    pub platform_options: Option<FontInstancePlatformOptions>,
-    ///
-    pub variations: Vec<FontVariation>,
-}
-
-/// Frame messages affect building the scene.
-pub enum SceneMsg {
-    ///
-    UpdateEpoch(PipelineId, Epoch),
-    ///
-    SetPageZoom(ZoomFactor),
-    ///
-    SetRootPipeline(PipelineId),
-    ///
-    RemovePipeline(PipelineId),
-    ///
-    SetDisplayList {
-        ///
-        display_list: BuiltDisplayList,
-        ///
-        epoch: Epoch,
-        ///
-        pipeline_id: PipelineId,
-        ///
-        background: Option<ColorF>,
-        ///
-        viewport_size: LayoutSize,
-        ///
-        preserve_frame_state: bool,
-    },
-    ///
-    SetDocumentView {
-        ///
-        device_rect: DeviceIntRect,
-        ///
-        device_pixel_ratio: f32,
-    },
-    /// Set the current quality / performance configuration for this document.
-    SetQualitySettings {
-        /// The set of available quality / performance config values.
-        settings: QualitySettings,
-    },
-}
-
-/// Frame messages affect frame generation (applied after building the scene).
-pub enum FrameMsg {
-    ///
-    UpdateEpoch(PipelineId, Epoch),
-    ///
-    HitTest(Option<PipelineId>, WorldPoint, HitTestFlags, Sender<HitTestResult>),
-    ///
-    RequestHitTester(Sender<Arc<dyn ApiHitTester>>),
-    ///
-    SetPan(DeviceIntPoint),
-    ///
-    ScrollNodeWithId(LayoutPoint, ExternalScrollId, ScrollClamping),
-    ///
-    GetScrollNodeState(Sender<Vec<ScrollNodeState>>),
-    ///
-    UpdateDynamicProperties(DynamicProperties),
-    ///
-    AppendDynamicTransformProperties(Vec<PropertyValue<LayoutTransform>>),
-    ///
-    SetPinchZoom(ZoomFactor),
-    ///
-    SetIsTransformAsyncZooming(bool, PropertyBindingId),
-}
-
-impl fmt::Debug for SceneMsg {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        f.write_str(match *self {
-            SceneMsg::UpdateEpoch(..) => "SceneMsg::UpdateEpoch",
-            SceneMsg::SetDisplayList { .. } => "SceneMsg::SetDisplayList",
-            SceneMsg::SetPageZoom(..) => "SceneMsg::SetPageZoom",
-            SceneMsg::RemovePipeline(..) => "SceneMsg::RemovePipeline",
-            SceneMsg::SetDocumentView { .. } => "SceneMsg::SetDocumentView",
-            SceneMsg::SetRootPipeline(..) => "SceneMsg::SetRootPipeline",
-            SceneMsg::SetQualitySettings { .. } => "SceneMsg::SetQualitySettings",
-        })
-    }
-}
-
-impl fmt::Debug for FrameMsg {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        f.write_str(match *self {
-            FrameMsg::UpdateEpoch(..) => "FrameMsg::UpdateEpoch",
-            FrameMsg::HitTest(..) => "FrameMsg::HitTest",
-            FrameMsg::RequestHitTester(..) => "FrameMsg::RequestHitTester",
-            FrameMsg::SetPan(..) => "FrameMsg::SetPan",
-            FrameMsg::ScrollNodeWithId(..) => "FrameMsg::ScrollNodeWithId",
-            FrameMsg::GetScrollNodeState(..) => "FrameMsg::GetScrollNodeState",
-            FrameMsg::UpdateDynamicProperties(..) => "FrameMsg::UpdateDynamicProperties",
-            FrameMsg::AppendDynamicTransformProperties(..) => "FrameMsg::AppendDynamicTransformProperties",
-            FrameMsg::SetPinchZoom(..) => "FrameMsg::SetPinchZoom",
-            FrameMsg::SetIsTransformAsyncZooming(..) => "FrameMsg::SetIsTransformAsyncZooming",
-        })
-    }
-}
-
-bitflags!{
-    /// Bit flags for WR stages to store in a capture.
-    // Note: capturing `FRAME` without `SCENE` is not currently supported.
-    pub struct CaptureBits: u8 {
-        ///
-        const SCENE = 0x1;
-        ///
-        const FRAME = 0x2;
-        ///
-        const TILE_CACHE = 0x4;
-        ///
-        const EXTERNAL_RESOURCES = 0x8;
-    }
-}
-
-bitflags!{
-    /// Mask for clearing caches in debug commands.
-    pub struct ClearCache: u8 {
-        ///
-        const IMAGES = 0b1;
-        ///
-        const GLYPHS = 0b10;
-        ///
-        const GLYPH_DIMENSIONS = 0b100;
-        ///
-        const RENDER_TASKS = 0b1000;
-        ///
-        const TEXTURE_CACHE = 0b10000;
-        /// Clear render target pool
-        const RENDER_TARGETS = 0b100000;
-    }
-}
-
-/// Information about a loaded capture of each document
-/// that is returned by `RenderBackend`.
-#[derive(Clone, Debug)]
-pub struct CapturedDocument {
-    ///
-    pub document_id: DocumentId,
-    ///
-    pub root_pipeline_id: Option<PipelineId>,
-}
-
-/// Update of the state of built-in debugging facilities.
-#[derive(Clone)]
-pub enum DebugCommand {
-    /// Sets the provided debug flags.
-    SetFlags(DebugFlags),
-    /// Configure if dual-source blending is used, if available.
-    EnableDualSourceBlending(bool),
-    /// Save a capture of all the documents state.
-    SaveCapture(PathBuf, CaptureBits),
-    /// Load a capture of all the documents state.
-    LoadCapture(PathBuf, Option<(u32, u32)>, Sender<CapturedDocument>),
-    /// Start capturing a sequence of scene/frame changes.
-    StartCaptureSequence(PathBuf, CaptureBits),
-    /// Stop capturing a sequence of scene/frame changes.
-    StopCaptureSequence,
-    /// Clear cached resources, forcing them to be re-uploaded from templates.
-    ClearCaches(ClearCache),
-    /// Enable/disable native compositor usage
-    EnableNativeCompositor(bool),
-    /// Enable/disable parallel job execution with rayon.
-    EnableMultithreading(bool),
-    /// Sets the maximum amount of existing batches to visit before creating a new one.
-    SetBatchingLookback(u32),
-    /// Invalidate GPU cache, forcing the update from the CPU mirror.
-    InvalidateGpuCache,
-    /// Causes the scene builder to pause for a given amount of milliseconds each time it
-    /// processes a transaction.
-    SimulateLongSceneBuild(u32),
-    /// Set an override tile size to use for picture caches
-    SetPictureTileSize(Option<DeviceIntSize>),
-}
-
-/// Message sent by the `RenderApi` to the render backend thread.
-pub enum ApiMsg {
-    /// Adds a new document namespace.
-    CloneApi(Sender<IdNamespace>),
-    /// Adds a new document namespace.
-    CloneApiByClient(IdNamespace),
-    /// Adds a new document with given initial size.
-    AddDocument(DocumentId, DeviceIntSize),
-    /// A message targeted at a particular document.
-    UpdateDocuments(Vec<Box<TransactionMsg>>),
-    /// Flush from the caches anything that isn't necessary, to free some memory.
-    MemoryPressure,
-    /// Collects a memory report.
-    ReportMemory(Sender<Box<MemoryReport>>),
-    /// Change debugging options.
-    DebugCommand(DebugCommand),
-    /// Message from the scene builder thread.
-    SceneBuilderResult(SceneBuilderResult),
-}
-
-impl fmt::Debug for ApiMsg {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        f.write_str(match *self {
-            ApiMsg::CloneApi(..) => "ApiMsg::CloneApi",
-            ApiMsg::CloneApiByClient(..) => "ApiMsg::CloneApiByClient",
-            ApiMsg::AddDocument(..) => "ApiMsg::AddDocument",
-            ApiMsg::UpdateDocuments(..) => "ApiMsg::UpdateDocuments",
-            ApiMsg::MemoryPressure => "ApiMsg::MemoryPressure",
-            ApiMsg::ReportMemory(..) => "ApiMsg::ReportMemory",
-            ApiMsg::DebugCommand(..) => "ApiMsg::DebugCommand",
-            ApiMsg::SceneBuilderResult(..) => "ApiMsg::SceneBuilderResult",
-        })
-    }
-}
-
-/// Allows the API to communicate with WebRender.
-///
-/// This object is created along with the `Renderer` and it's main use from a
-/// user perspective is to create one or several `RenderApi` objects.
-pub struct RenderApiSender {
-    api_sender: Sender<ApiMsg>,
-    scene_sender: Sender<SceneBuilderRequest>,
-    low_priority_scene_sender: Sender<SceneBuilderRequest>,
-    blob_image_handler: Option<Box<dyn BlobImageHandler>>,
-    shared_font_instances: SharedFontInstanceMap,
-}
-
-impl RenderApiSender {
-    /// Used internally by the `Renderer`.
-    pub fn new(
-        api_sender: Sender<ApiMsg>,
-        scene_sender: Sender<SceneBuilderRequest>,
-        low_priority_scene_sender: Sender<SceneBuilderRequest>,
-        blob_image_handler: Option<Box<dyn BlobImageHandler>>,
-        shared_font_instances: SharedFontInstanceMap,
-    ) -> Self {
-        RenderApiSender {
-            api_sender,
-            scene_sender,
-            low_priority_scene_sender,
-            blob_image_handler,
-            shared_font_instances,
-        }
-    }
-
-    /// Creates a new resource API object with a dedicated namespace.
-    pub fn create_api(&self) -> RenderApi {
-        let (sync_tx, sync_rx) = single_msg_channel();
-        let msg = ApiMsg::CloneApi(sync_tx);
-        self.api_sender.send(msg).expect("Failed to send CloneApi message");
-        let namespace_id = sync_rx.recv().expect("Failed to receive CloneApi reply");
-        RenderApi {
-            api_sender: self.api_sender.clone(),
-            scene_sender: self.scene_sender.clone(),
-            low_priority_scene_sender: self.low_priority_scene_sender.clone(),
-            namespace_id,
-            next_id: Cell::new(ResourceId(0)),
-            resources: ApiResources::new(
-                self.blob_image_handler.as_ref().map(|handler| handler.create_similar()),
-                self.shared_font_instances.clone(),
-            ),
-        }
-    }
-
-    /// Creates a new resource API object with a dedicated namespace.
-    /// Namespace id is allocated by client.
-    ///
-    /// The function could be used only when RendererOptions::namespace_alloc_by_client is true.
-    /// When the option is true, create_api() could not be used to prevent namespace id conflict.
-    pub fn create_api_by_client(&self, namespace_id: IdNamespace) -> RenderApi {
-        let msg = ApiMsg::CloneApiByClient(namespace_id);
-        self.api_sender.send(msg).expect("Failed to send CloneApiByClient message");
-        RenderApi {
-            api_sender: self.api_sender.clone(),
-            scene_sender: self.scene_sender.clone(),
-            low_priority_scene_sender: self.low_priority_scene_sender.clone(),
-            namespace_id,
-            next_id: Cell::new(ResourceId(0)),
-            resources: ApiResources::new(
-                self.blob_image_handler.as_ref().map(|handler| handler.create_similar()),
-                self.shared_font_instances.clone(),
-            ),
-        }
-    }
-}
-
-/// The main entry point to interact with WebRender.
-pub struct RenderApi {
-    api_sender: Sender<ApiMsg>,
-    scene_sender: Sender<SceneBuilderRequest>,
-    low_priority_scene_sender: Sender<SceneBuilderRequest>,
-    namespace_id: IdNamespace,
-    next_id: Cell<ResourceId>,
-    resources: ApiResources,
-}
-
-impl RenderApi {
-    /// Returns the namespace ID used by this API object.
-    pub fn get_namespace_id(&self) -> IdNamespace {
-        self.namespace_id
-    }
-
-    ///
-    pub fn create_sender(&self) -> RenderApiSender {
-        RenderApiSender::new(
-            self.api_sender.clone(),
-            self.scene_sender.clone(),
-            self.low_priority_scene_sender.clone(),
-            self.resources.blob_image_handler.as_ref().map(|handler| handler.create_similar()),
-            self.resources.get_shared_font_instances(),
-        )
-    }
-
-    /// Add a document to the WebRender instance.
-    ///
-    /// Instances can manage one or several documents (using the same render backend thread).
-    /// Each document will internally correspond to a single scene, and scenes are made of
-    /// one or several pipelines.
-    pub fn add_document(&self, initial_size: DeviceIntSize) -> DocumentId {
-        let new_id = self.next_unique_id();
-        self.add_document_with_id(initial_size, new_id)
-    }
-
-    /// See `add_document`
-    pub fn add_document_with_id(&self,
-                                initial_size: DeviceIntSize,
-                                id: u32) -> DocumentId {
-        window_size_sanity_check(initial_size);
-
-        let document_id = DocumentId::new(self.namespace_id, id);
-
-        // We send this message to both the render backend and the scene builder instead of having
-        // the scene builder thread forward it to the render backend as we do elswhere. This is because
-        // some transactions can skip the scene builder thread and we want to avoid them arriving before
-        // the render backend knows about the existence of the corresponding document id.
-        // It may not be necessary, though.
-        self.api_sender.send(
-            ApiMsg::AddDocument(document_id, initial_size)
-        ).unwrap();
-        self.scene_sender.send(
-            SceneBuilderRequest::AddDocument(document_id, initial_size)
-        ).unwrap();
-
-        document_id
-    }
-
-    /// Delete a document.
-    pub fn delete_document(&self, document_id: DocumentId) {
-        self.low_priority_scene_sender.send(
-            SceneBuilderRequest::DeleteDocument(document_id)
-        ).unwrap();
-    }
-
-    /// Generate a new font key
-    pub fn generate_font_key(&self) -> FontKey {
-        let new_id = self.next_unique_id();
-        FontKey::new(self.namespace_id, new_id)
-    }
-
-    /// Generate a new font instance key
-    pub fn generate_font_instance_key(&self) -> FontInstanceKey {
-        let new_id = self.next_unique_id();
-        FontInstanceKey::new(self.namespace_id, new_id)
-    }
-
-    /// Gets the dimensions for the supplied glyph keys
-    ///
-    /// Note: Internally, the internal texture cache doesn't store
-    /// 'empty' textures (height or width = 0)
-    /// This means that glyph dimensions e.g. for spaces (' ') will mostly be None.
-    pub fn get_glyph_dimensions(
-        &self,
-        key: FontInstanceKey,
-        glyph_indices: Vec<GlyphIndex>,
-    ) -> Vec<Option<GlyphDimensions>> {
-        let (sender, rx) = single_msg_channel();
-        let msg = SceneBuilderRequest::GetGlyphDimensions(GlyphDimensionRequest {
-            key,
-            glyph_indices,
-            sender
-        });
-        self.low_priority_scene_sender.send(msg).unwrap();
-        rx.recv().unwrap()
-    }
-
-    /// Gets the glyph indices for the supplied string. These
-    /// can be used to construct GlyphKeys.
-    pub fn get_glyph_indices(&self, key: FontKey, text: &str) -> Vec<Option<u32>> {
-        let (sender, rx) = single_msg_channel();
-        let msg = SceneBuilderRequest::GetGlyphIndices(GlyphIndexRequest {
-            key,
-            text: text.to_string(),
-            sender,
-        });
-        self.low_priority_scene_sender.send(msg).unwrap();
-        rx.recv().unwrap()
-    }
-
-    /// Creates an `ImageKey`.
-    pub fn generate_image_key(&self) -> ImageKey {
-        let new_id = self.next_unique_id();
-        ImageKey::new(self.namespace_id, new_id)
-    }
-
-    /// Creates a `BlobImageKey`.
-    pub fn generate_blob_image_key(&self) -> BlobImageKey {
-        BlobImageKey(self.generate_image_key())
-    }
-
-    /// A Gecko-specific notification mechanism to get some code executed on the
-    /// `Renderer`'s thread, mostly replaced by `NotificationHandler`. You should
-    /// probably use the latter instead.
-    pub fn send_external_event(&self, evt: ExternalEvent) {
-        let msg = SceneBuilderRequest::ExternalEvent(evt);
-        self.low_priority_scene_sender.send(msg).unwrap();
-    }
-
-    /// Notify WebRender that now is a good time to flush caches and release
-    /// as much memory as possible.
-    pub fn notify_memory_pressure(&self) {
-        self.api_sender.send(ApiMsg::MemoryPressure).unwrap();
-    }
-
-    /// Synchronously requests memory report.
-    pub fn report_memory(&self, _ops: malloc_size_of::MallocSizeOfOps) -> MemoryReport {
-        let (tx, rx) = single_msg_channel();
-        self.api_sender.send(ApiMsg::ReportMemory(tx)).unwrap();
-        *rx.recv().unwrap()
-    }
-
-    /// Update debugging flags.
-    pub fn set_debug_flags(&self, flags: DebugFlags) {
-        let cmd = DebugCommand::SetFlags(flags);
-        self.api_sender.send(ApiMsg::DebugCommand(cmd)).unwrap();
-    }
-
-    /// Stop RenderBackend's task until shut down
-    pub fn stop_render_backend(&self) {
-        self.low_priority_scene_sender.send(SceneBuilderRequest::StopRenderBackend).unwrap();
-    }
-
-    /// Shut the WebRender instance down.
-    pub fn shut_down(&self, synchronously: bool) {
-        if synchronously {
-            let (tx, rx) = single_msg_channel();
-            self.low_priority_scene_sender.send(SceneBuilderRequest::ShutDown(Some(tx))).unwrap();
-            rx.recv().unwrap();
-        } else {
-            self.low_priority_scene_sender.send(SceneBuilderRequest::ShutDown(None)).unwrap();
-        }
-    }
-
-    /// Create a new unique key that can be used for
-    /// animated property bindings.
-    pub fn generate_property_binding_key<T: Copy>(&self) -> PropertyBindingKey<T> {
-        let new_id = self.next_unique_id();
-        PropertyBindingKey {
-            id: PropertyBindingId {
-                namespace: self.namespace_id,
-                uid: new_id,
-            },
-            _phantom: PhantomData,
-        }
-    }
-
-    #[inline]
-    fn next_unique_id(&self) -> u32 {
-        let ResourceId(id) = self.next_id.get();
-        self.next_id.set(ResourceId(id + 1));
-        id
-    }
-
-    // For use in Wrench only
-    #[doc(hidden)]
-    pub fn send_message(&self, msg: ApiMsg) {
-        self.api_sender.send(msg).unwrap();
-    }
-
-    /// Creates a transaction message from a single frame message.
-    fn frame_message(&self, msg: FrameMsg, document_id: DocumentId) -> Box<TransactionMsg> {
-        Box::new(TransactionMsg {
-            document_id,
-            scene_ops: Vec::new(),
-            frame_ops: vec![msg],
-            resource_updates: Vec::new(),
-            notifications: Vec::new(),
-            generate_frame: GenerateFrame::No,
-            invalidate_rendered_frame: false,
-            use_scene_builder_thread: false,
-            low_priority: false,
-            blob_rasterizer: None,
-            blob_requests: Vec::new(),
-            rasterized_blobs: Vec::new(),
-            profile: TransactionProfile::new(),
-        })
-    }
-
-    /// A helper method to send document messages.
-    fn send_frame_msg(&self, document_id: DocumentId, msg: FrameMsg) {
-        // This assertion fails on Servo use-cases, because it creates different
-        // `RenderApi` instances for layout and compositor.
-        //assert_eq!(document_id.0, self.namespace_id);
-        self.api_sender
-            .send(ApiMsg::UpdateDocuments(vec![self.frame_message(msg, document_id)]))
-            .unwrap()
-    }
-
-    /// Send a transaction to WebRender.
-    pub fn send_transaction(&mut self, document_id: DocumentId, transaction: Transaction) {
-        let mut transaction = transaction.finalize(document_id);
-
-        self.resources.update(&mut transaction);
-
-        if transaction.generate_frame.as_bool() {
-            transaction.profile.start_time(profiler::API_SEND_TIME);
-            transaction.profile.start_time(profiler::TOTAL_FRAME_CPU_TIME);
-        }
-
-        if transaction.use_scene_builder_thread {
-            let sender = if transaction.low_priority {
-                &mut self.low_priority_scene_sender
-            } else {
-                &mut self.scene_sender
-            };
-
-            sender.send(SceneBuilderRequest::Transactions(vec![transaction])).unwrap();
-        } else {
-            self.api_sender.send(ApiMsg::UpdateDocuments(vec![transaction])).unwrap();
-        }
-    }
-
-    /// Does a hit test on display items in the specified document, at the given
-    /// point. If a pipeline_id is specified, it is used to further restrict the
-    /// hit results so that only items inside that pipeline are matched. The vector
-    /// of hit results will contain all display items that match, ordered from
-    /// front to back.
-    pub fn hit_test(&self,
-        document_id: DocumentId,
-        pipeline_id: Option<PipelineId>,
-        point: WorldPoint,
-        flags: HitTestFlags,
-    ) -> HitTestResult {
-        let (tx, rx) = single_msg_channel();
-
-        self.send_frame_msg(
-            document_id,
-            FrameMsg::HitTest(pipeline_id, point, flags, tx)
-        );
-        rx.recv().unwrap()
-    }
-
-    /// Synchronously request an object that can perform fast hit testing queries.
-    pub fn request_hit_tester(&self, document_id: DocumentId) -> HitTesterRequest {
-        let (tx, rx) = single_msg_channel();
-        self.send_frame_msg(
-            document_id,
-            FrameMsg::RequestHitTester(tx)
-        );
-
-        HitTesterRequest { rx }
-    }
-
-    ///
-    pub fn get_scroll_node_state(&self, document_id: DocumentId) -> Vec<ScrollNodeState> {
-        let (tx, rx) = single_msg_channel();
-        self.send_frame_msg(document_id, FrameMsg::GetScrollNodeState(tx));
-        rx.recv().unwrap()
-    }
-
-    // Some internal scheduling magic that leaked into the API.
-    // Buckle up and see APZUpdater.cpp for more info about what this is about.
-    #[doc(hidden)]
-    pub fn wake_scene_builder(&self) {
-        self.scene_sender.send(SceneBuilderRequest::WakeUp).unwrap();
-    }
-
-    /// Block until a round-trip to the scene builder thread has completed. This
-    /// ensures that any transactions (including ones deferred to the scene
-    /// builder thread) have been processed.
-    pub fn flush_scene_builder(&self) {
-        let (tx, rx) = single_msg_channel();
-        self.low_priority_scene_sender.send(SceneBuilderRequest::Flush(tx)).unwrap();
-        rx.recv().unwrap(); // Block until done.
-    }
-
-    /// Save a capture of the current frame state for debugging.
-    pub fn save_capture(&self, path: PathBuf, bits: CaptureBits) {
-        let msg = ApiMsg::DebugCommand(DebugCommand::SaveCapture(path, bits));
-        self.send_message(msg);
-    }
-
-    /// Load a capture of the current frame state for debugging.
-    pub fn load_capture(&self, path: PathBuf, ids: Option<(u32, u32)>) -> Vec<CapturedDocument> {
-        // First flush the scene builder otherwise async scenes might clobber
-        // the capture we are about to load.
-        self.flush_scene_builder();
-
-        let (tx, rx) = unbounded_channel();
-        let msg = ApiMsg::DebugCommand(DebugCommand::LoadCapture(path, ids, tx));
-        self.send_message(msg);
-
-        let mut documents = Vec::new();
-        while let Ok(captured_doc) = rx.recv() {
-            documents.push(captured_doc);
-        }
-        documents
-    }
-
-    /// Start capturing a sequence of frames.
-    pub fn start_capture_sequence(&self, path: PathBuf, bits: CaptureBits) {
-        let msg = ApiMsg::DebugCommand(DebugCommand::StartCaptureSequence(path, bits));
-        self.send_message(msg);
-    }
-
-    /// Stop capturing sequences of frames.
-    pub fn stop_capture_sequence(&self) {
-        let msg = ApiMsg::DebugCommand(DebugCommand::StopCaptureSequence);
-        self.send_message(msg);
-    }
-
-    /// Update the state of builtin debugging facilities.
-    pub fn send_debug_cmd(&mut self, cmd: DebugCommand) {
-        if let DebugCommand::EnableMultithreading(enable) = cmd {
-            // TODO(nical) we should enable it for all RenderApis.
-            self.resources.enable_multithreading(enable);
-        }
-        let msg = ApiMsg::DebugCommand(cmd);
-        self.send_message(msg);
-    }
-}
-
-impl Drop for RenderApi {
-    fn drop(&mut self) {
-        let msg = SceneBuilderRequest::ClearNamespace(self.namespace_id);
-        let _ = self.low_priority_scene_sender.send(msg);
-    }
-}
-
-
-fn window_size_sanity_check(size: DeviceIntSize) {
-    // Anything bigger than this will crash later when attempting to create
-    // a render task.
-    use crate::render_task::MAX_RENDER_TASK_SIZE;
-    if size.width > MAX_RENDER_TASK_SIZE || size.height > MAX_RENDER_TASK_SIZE {
-        panic!("Attempting to create a {}x{} window/document", size.width, size.height);
-    }
-}
-
-/// Collection of heap sizes, in bytes.
-/// cbindgen:derive-eq=false
-/// cbindgen:derive-ostream=false
-#[repr(C)]
-#[allow(missing_docs)]
-#[derive(AddAssign, Clone, Debug, Default)]
-pub struct MemoryReport {
-    //
-    // CPU Memory.
-    //
-    pub clip_stores: usize,
-    pub gpu_cache_metadata: usize,
-    pub gpu_cache_cpu_mirror: usize,
-    pub render_tasks: usize,
-    pub hit_testers: usize,
-    pub fonts: usize,
-    pub weak_fonts: usize,
-    pub images: usize,
-    pub rasterized_blobs: usize,
-    pub shader_cache: usize,
-    pub interning: InterningMemoryReport,
-    pub display_list: usize,
-    pub upload_staging_memory: usize,
-    pub swgl: usize,
-
-    //
-    // GPU memory.
-    //
-    pub gpu_cache_textures: usize,
-    pub vertex_data_textures: usize,
-    pub render_target_textures: usize,
-    pub texture_cache_textures: usize,
-    pub texture_cache_structures: usize,
-    pub depth_target_textures: usize,
-    pub texture_upload_pbos: usize,
-    pub swap_chain: usize,
-    pub render_texture_hosts: usize,
-    pub upload_staging_textures: usize,
-}
diff --git a/third_party/webrender/webrender/src/render_backend.rs b/third_party/webrender/webrender/src/render_backend.rs
index 825e981b5cf..e22596a1bab 100644
--- a/third_party/webrender/webrender/src/render_backend.rs
+++ b/third_party/webrender/webrender/src/render_backend.rs
@@ -8,39 +8,37 @@
 //! See the comment at the top of the `renderer` module for a description of
 //! how these two pieces interact.
 
-use api::{DebugFlags, BlobImageHandler};
-use api::{DocumentId, ExternalScrollId, HitTestResult};
-use api::{IdNamespace, PipelineId, RenderNotifier, ScrollClamping};
+use api::{ApiMsg, ClearCache, DebugCommand, DebugFlags, BlobImageHandler};
+use api::{DocumentId, DocumentLayer, ExternalScrollId, FrameMsg, HitTestFlags, HitTestResult};
+use api::{IdNamespace, MemoryReport, PipelineId, RenderNotifier, ScrollClamping};
+use api::{ScrollLocation, TransactionMsg, ResourceUpdate};
 use api::{NotificationRequest, Checkpoint, QualitySettings};
-use api::{PrimitiveKeyKind};
+use api::{ClipIntern, FilterDataIntern, PrimitiveKeyKind};
 use api::units::*;
-use api::channel::{single_msg_channel, Sender, Receiver};
 #[cfg(any(feature = "capture", feature = "replay"))]
-use crate::render_api::CaptureBits;
+use api::CaptureBits;
 #[cfg(feature = "replay")]
-use crate::render_api::CapturedDocument;
-use crate::render_api::{MemoryReport, TransactionMsg, ResourceUpdate, ApiMsg, FrameMsg, ClearCache, DebugCommand};
-use crate::clip::{ClipIntern, PolygonIntern, ClipStoreScratchBuffer};
-use crate::filterdata::FilterDataIntern;
+use api::CapturedDocument;
+use crate::spatial_tree::SpatialNodeIndex;
 #[cfg(any(feature = "capture", feature = "replay"))]
 use crate::capture::CaptureConfig;
 use crate::composite::{CompositorKind, CompositeDescriptor};
-use crate::frame_builder::{FrameBuilder, FrameBuilderConfig, FrameScratchBuffer};
+#[cfg(feature = "debugger")]
+use crate::debug_server;
+use crate::frame_builder::{FrameBuilder, FrameBuilderConfig};
 use crate::glyph_rasterizer::{FontInstance};
 use crate::gpu_cache::GpuCache;
 use crate::hit_test::{HitTest, HitTester, SharedHitTester};
 use crate::intern::DataStore;
-#[cfg(any(feature = "capture", feature = "replay"))]
-use crate::internal_types::DebugOutput;
-use crate::internal_types::{FastHashMap, RenderedDocument, ResultMsg};
+use crate::internal_types::{DebugOutput, FastHashMap, RenderedDocument, ResultMsg};
 use malloc_size_of::{MallocSizeOf, MallocSizeOfOps};
-use crate::picture::{TileCacheLogger, PictureScratchBuffer, SliceId, TileCacheInstance, TileCacheParams};
+use crate::picture::{RetainedTiles, TileCacheLogger};
 use crate::prim_store::{PrimitiveScratchBuffer, PrimitiveInstance};
 use crate::prim_store::{PrimitiveInstanceKind, PrimTemplateCommonData, PrimitiveStore};
 use crate::prim_store::interned::*;
-use crate::profiler::{self, TransactionProfile};
-use crate::render_task_graph::RenderTaskGraphBuilder;
-use crate::renderer::{AsyncPropertySampler, FullFrameStats, PipelineInfo};
+use crate::profiler::{BackendProfileCounters, ResourceProfileCounters};
+use crate::render_task_graph::RenderTaskGraphCounters;
+use crate::renderer::{AsyncPropertySampler, PipelineInfo};
 use crate::resource_cache::ResourceCache;
 #[cfg(feature = "replay")]
 use crate::resource_cache::PlainCacheOwn;
@@ -52,12 +50,15 @@ use crate::scene::{BuiltScene, SceneProperties};
 use crate::scene_builder_thread::*;
 #[cfg(feature = "serialize")]
 use serde::{Serialize, Deserialize};
+#[cfg(feature = "debugger")]
+use serde_json;
 #[cfg(feature = "replay")]
 use std::collections::hash_map::Entry::{Occupied, Vacant};
 use std::sync::Arc;
 use std::sync::atomic::{AtomicUsize, Ordering};
+use std::sync::mpsc::{channel, Sender, Receiver};
 use std::time::{UNIX_EPOCH, SystemTime};
-use std::{mem, u32};
+use std::u32;
 #[cfg(feature = "capture")]
 use std::path::PathBuf;
 #[cfg(feature = "replay")]
@@ -79,6 +80,7 @@ pub struct DocumentView {
 #[derive(Copy, Clone)]
 pub struct SceneView {
     pub device_rect: DeviceIntRect,
+    pub layer: DocumentLayer,
     pub device_pixel_ratio: f32,
     pub page_zoom_factor: f32,
     pub quality_settings: QualitySettings,
@@ -164,9 +166,9 @@ impl ::std::ops::Sub<usize> for FrameId {
         FrameId(self.0 - other)
     }
 }
+
 enum RenderBackendStatus {
     Continue,
-    StopRenderBackend,
     ShutDown(Option<Sender<()>>),
 }
 
@@ -272,12 +274,12 @@ macro_rules! declare_data_stores {
             fn apply_updates(
                 &mut self,
                 updates: InternerUpdates,
-                profile: &mut TransactionProfile,
+                profile_counters: &mut BackendProfileCounters,
             ) {
                 $(
                     self.$name.apply_updates(
                         updates.$name,
-                        profile,
+                        &mut profile_counters.intern.$name,
                     );
                 )+
             }
@@ -285,7 +287,7 @@ macro_rules! declare_data_stores {
     }
 }
 
-crate::enumerate_interners!(declare_data_stores);
+enumerate_interners!(declare_data_stores);
 
 impl DataStores {
     /// Returns the local rect for a primitive. For most primitives, this is
@@ -346,8 +348,7 @@ impl DataStores {
                 let prim_data = &self.line_decoration[data_handle];
                 &prim_data.common
             }
-            PrimitiveInstanceKind::LinearGradient { data_handle, .. }
-            | PrimitiveInstanceKind::CachedLinearGradient { data_handle, .. } => {
+            PrimitiveInstanceKind::LinearGradient { data_handle, .. } => {
                 let prim_data = &self.linear_grad[data_handle];
                 &prim_data.common
             }
@@ -382,36 +383,6 @@ impl DataStores {
     }
 }
 
-#[derive(Default)]
-pub struct ScratchBuffer {
-    pub primitive: PrimitiveScratchBuffer,
-    pub picture: PictureScratchBuffer,
-    pub frame: FrameScratchBuffer,
-    pub clip_store: ClipStoreScratchBuffer,
-}
-
-impl ScratchBuffer {
-    pub fn begin_frame(&mut self) {
-        self.primitive.begin_frame();
-        self.picture.begin_frame();
-        self.frame.begin_frame();
-    }
-
-    pub fn recycle(&mut self, recycler: &mut Recycler) {
-        self.primitive.recycle(recycler);
-        self.picture.recycle(recycler);
-        self.frame.recycle(recycler);
-    }
-
-    pub fn memory_pressure(&mut self) {
-        // TODO: causes browser chrome test crashes on windows.
-        //self.primitive = Default::default();
-        self.picture = Default::default();
-        self.frame = Default::default();
-        self.clip_store = Default::default();
-    }
-}
-
 struct Document {
     /// The id of this document
     id: DocumentId,
@@ -432,9 +403,6 @@ struct Document {
     /// The builder object that prodces frames, kept around to preserve some retained state.
     frame_builder: FrameBuilder,
 
-    /// Allows graphs of render tasks to be created, and then built into an immutable graph output.
-    rg_builder: RenderTaskGraphBuilder,
-
     /// A data structure to allow hit testing against rendered frames. This is updated
     /// every time we produce a fully rendered frame.
     hit_tester: Option<Arc<HitTester>>,
@@ -460,26 +428,23 @@ struct Document {
     /// Contains various vecs of data that is used only during frame building,
     /// where we want to recycle the memory each new display list, to avoid constantly
     /// re-allocating and moving memory around.
-    scratch: ScratchBuffer,
+    scratch: PrimitiveScratchBuffer,
+    /// Keep track of the size of render task graph to pre-allocate memory up-front
+    /// the next frame.
+    render_task_counters: RenderTaskGraphCounters,
 
     #[cfg(feature = "replay")]
     loaded_scene: Scene,
 
     /// Tracks the state of the picture cache tiles that were composited on the previous frame.
     prev_composite_descriptor: CompositeDescriptor,
-
-    /// Tracks if we need to invalidate dirty rects for this document, due to the picture
-    /// cache slice configuration having changed when a new scene is swapped in.
-    dirty_rects_are_valid: bool,
-
-    profile: TransactionProfile,
-    frame_stats: Option<FullFrameStats>,
 }
 
 impl Document {
     pub fn new(
         id: DocumentId,
         size: DeviceIntSize,
+        layer: DocumentLayer,
         default_device_pixel_ratio: f32,
     ) -> Self {
         Document {
@@ -488,6 +453,7 @@ impl Document {
             view: DocumentView {
                 scene: SceneView {
                     device_rect: size.into(),
+                    layer,
                     page_zoom_factor: 1.0,
                     device_pixel_ratio: default_device_pixel_ratio,
                     quality_settings: QualitySettings::default(),
@@ -508,14 +474,11 @@ impl Document {
             rendered_frame_is_valid: false,
             has_built_scene: false,
             data_stores: DataStores::default(),
-            scratch: ScratchBuffer::default(),
+            scratch: PrimitiveScratchBuffer::new(),
+            render_task_counters: RenderTaskGraphCounters::new(),
             #[cfg(feature = "replay")]
             loaded_scene: Scene::new(),
             prev_composite_descriptor: CompositeDescriptor::empty(),
-            dirty_rects_are_valid: true,
-            profile: TransactionProfile::new(),
-            rg_builder: RenderTaskGraphBuilder::new(),
-            frame_stats: None,
         }
     }
 
@@ -535,6 +498,34 @@ impl Document {
             FrameMsg::UpdateEpoch(pipeline_id, epoch) => {
                 self.scene.pipeline_epochs.insert(pipeline_id, epoch);
             }
+            FrameMsg::Scroll(delta, cursor) => {
+                profile_scope!("Scroll");
+
+                let node_index = match self.hit_tester {
+                    Some(ref hit_tester) => {
+                        // Ideally we would call self.scroll_nearest_scrolling_ancestor here, but
+                        // we need have to avoid a double-borrow.
+                        let test = HitTest::new(None, cursor, HitTestFlags::empty());
+                        hit_tester.find_node_under_point(test)
+                    }
+                    None => {
+                        None
+                    }
+                };
+
+                if self.hit_tester.is_some()
+                    && self.scroll_nearest_scrolling_ancestor(delta, node_index) {
+                    self.hit_tester_is_valid = false;
+                    self.frame_is_valid = false;
+                }
+
+                return DocumentOps {
+                    // TODO: Does it make sense to track this as a scrolling even if we
+                    // ended up not scrolling anything?
+                    scroll: true,
+                    ..DocumentOps::nop()
+                };
+            }
             FrameMsg::HitTest(pipeline_id, point, flags, tx) => {
                 if !self.hit_tester_is_valid {
                     self.rebuild_hit_tester();
@@ -607,13 +598,10 @@ impl Document {
         &mut self,
         resource_cache: &mut ResourceCache,
         gpu_cache: &mut GpuCache,
+        resource_profile: &mut ResourceProfileCounters,
         debug_flags: DebugFlags,
         tile_cache_logger: &mut TileCacheLogger,
-        tile_caches: &mut FastHashMap<SliceId, Box<TileCacheInstance>>,
-        frame_stats: Option<FullFrameStats>
     ) -> RenderedDocument {
-        let frame_build_start_time = precise_time_ns();
-
         let accumulated_scale_factor = self.view.accumulated_scale_factor();
         let pan = self.view.frame.pan.to_f32() / accumulated_scale_factor;
 
@@ -628,44 +616,31 @@ impl Document {
                 &mut self.scene,
                 resource_cache,
                 gpu_cache,
-                &mut self.rg_builder,
                 self.stamp,
                 accumulated_scale_factor,
+                self.view.scene.layer,
                 self.view.scene.device_rect.origin,
                 pan,
+                resource_profile,
                 &self.dynamic_properties,
                 &mut self.data_stores,
                 &mut self.scratch,
+                &mut self.render_task_counters,
                 debug_flags,
                 tile_cache_logger,
-                tile_caches,
-                self.dirty_rects_are_valid,
-                &mut self.profile,
             );
 
             frame
         };
 
         self.frame_is_valid = true;
-        self.dirty_rects_are_valid = true;
 
         let is_new_scene = self.has_built_scene;
         self.has_built_scene = false;
 
-        let frame_build_time_ms =
-            profiler::ns_to_ms(precise_time_ns() - frame_build_start_time);
-        self.profile.set(profiler::FRAME_BUILDING_TIME, frame_build_time_ms);
-
-        let frame_stats = frame_stats.map(|mut stats| {
-            stats.frame_build_time += frame_build_time_ms;
-            stats
-        });
-
         RenderedDocument {
             frame,
             is_new_scene,
-            profile: self.profile.take_and_reset(),
-            frame_stats: frame_stats
         }
     }
 
@@ -679,7 +654,7 @@ impl Document {
             &self.dynamic_properties,
         );
 
-        let hit_tester = Arc::new(self.scene.create_hit_tester());
+        let hit_tester = Arc::new(self.scene.create_hit_tester(&self.data_stores.clip));
         self.hit_tester = Some(Arc::clone(&hit_tester));
         self.shared_hit_tester.update(hit_tester);
         self.hit_tester_is_valid = true;
@@ -694,6 +669,15 @@ impl Document {
         }
     }
 
+    /// Returns true if any nodes actually changed position or false otherwise.
+    pub fn scroll_nearest_scrolling_ancestor(
+        &mut self,
+        scroll_location: ScrollLocation,
+        scroll_node_index: Option<SpatialNodeIndex>,
+    ) -> bool {
+        self.scene.spatial_tree.scroll_nearest_scrolling_ancestor(scroll_location, scroll_node_index)
+    }
+
     /// Returns true if the node actually changed position or false otherwise.
     pub fn scroll_node(
         &mut self,
@@ -704,77 +688,33 @@ impl Document {
         self.scene.spatial_tree.scroll_node(origin, id, clamp)
     }
 
-    /// Update the state of tile caches when a new scene is being swapped in to
-    /// the render backend. Retain / reuse existing caches if possible, and
-    /// destroy any now unused caches.
-    fn update_tile_caches_for_new_scene(
-        &mut self,
-        mut requested_tile_caches: FastHashMap<SliceId, TileCacheParams>,
-        tile_caches: &mut FastHashMap<SliceId, Box<TileCacheInstance>>,
-        resource_cache: &mut ResourceCache,
-    ) {
-        let mut new_tile_caches = FastHashMap::default();
-        new_tile_caches.reserve(requested_tile_caches.len());
-
-        // Step through the tile caches that are needed for the new scene, and see
-        // if we have an existing cache that can be reused.
-        for (slice_id, params) in requested_tile_caches.drain() {
-            let tile_cache = match tile_caches.remove(&slice_id) {
-                Some(mut existing_tile_cache) => {
-                    // Found an existing cache - update the cache params and reuse it
-                    existing_tile_cache.prepare_for_new_scene(
-                        params,
-                        resource_cache,
-                    );
-                    existing_tile_cache
-                }
-                None => {
-                    // No cache exists so create a new one
-                    Box::new(TileCacheInstance::new(params))
-                }
-            };
-
-            new_tile_caches.insert(slice_id, tile_cache);
-        }
-
-        // Replace current tile cache map, and return what was left over,
-        // which are now unused.
-        let unused_tile_caches = mem::replace(
-            tile_caches,
-            new_tile_caches,
-        );
-
-        if !unused_tile_caches.is_empty() {
-            // If the slice configuration changed, assume we can't rely on the
-            // current dirty rects for next composite
-            self.dirty_rects_are_valid = false;
-
-            // Destroy any native surfaces allocated by these unused caches
-            for (_, tile_cache) in unused_tile_caches {
-                tile_cache.destroy(resource_cache);
-            }
-        }
-    }
-
     pub fn new_async_scene_ready(
         &mut self,
-        mut built_scene: BuiltScene,
+        built_scene: BuiltScene,
         recycler: &mut Recycler,
-        tile_caches: &mut FastHashMap<SliceId, Box<TileCacheInstance>>,
-        resource_cache: &mut ResourceCache,
     ) {
         self.frame_is_valid = false;
         self.hit_tester_is_valid = false;
 
-        self.update_tile_caches_for_new_scene(
-            mem::replace(&mut built_scene.tile_cache_config.tile_caches, FastHashMap::default()),
-            tile_caches,
-            resource_cache,
-        );
-
+        // Give the old scene a chance to destroy any resources.
+        // Right now, all this does is build a hash map of any cached
+        // surface tiles, that can be provided to the next scene.
+        // TODO(nical) - It's a bit awkward how these retained tiles live
+        // in the scene's prim store then temporarily in the frame builder
+        // and then presumably back in the prim store during the next frame
+        // build.
+        let mut retained_tiles = RetainedTiles::new();
+        self.scene.prim_store.destroy(&mut retained_tiles);
         let old_scrolling_states = self.scene.spatial_tree.drain();
+
         self.scene = built_scene;
+
+        // Provide any cached tiles from the previous scene to
+        // the newly built one.
+        self.frame_builder.set_retained_resources(retained_tiles);
+
         self.scratch.recycle(recycler);
+
         self.scene.spatial_tree.finalize_and_apply_pending_scroll_offsets(old_scrolling_states);
     }
 }
@@ -813,6 +753,9 @@ pub struct RenderBackend {
     api_rx: Receiver<ApiMsg>,
     result_tx: Sender<ResultMsg>,
     scene_tx: Sender<SceneBuilderRequest>,
+    low_priority_scene_tx: Sender<SceneBuilderRequest>,
+    backend_scene_tx: Sender<BackendSceneBuilderRequest>,
+    scene_rx: Receiver<SceneBuilderResult>,
 
     default_device_pixel_ratio: f32,
 
@@ -836,19 +779,10 @@ pub struct RenderBackend {
     blob_image_handler: Option<Box<dyn BlobImageHandler>>,
 
     recycler: Recycler,
-
     #[cfg(feature = "capture")]
-    /// If `Some`, do 'sequence capture' logging, recording updated documents,
-    /// frames, etc. This is set only through messages from the scene builder,
-    /// so all control of sequence capture goes through there.
     capture_config: Option<CaptureConfig>,
-
     #[cfg(feature = "replay")]
     loaded_resource_sequence_id: u32,
-
-    /// A map of tile caches. These are stored in the backend as they are
-    /// persisted between both frame and scenes.
-    tile_caches: FastHashMap<SliceId, Box<TileCacheInstance>>,
 }
 
 impl RenderBackend {
@@ -856,6 +790,9 @@ impl RenderBackend {
         api_rx: Receiver<ApiMsg>,
         result_tx: Sender<ResultMsg>,
         scene_tx: Sender<SceneBuilderRequest>,
+        low_priority_scene_tx: Sender<SceneBuilderRequest>,
+        backend_scene_tx: Sender<BackendSceneBuilderRequest>,
+        scene_rx: Receiver<SceneBuilderResult>,
         default_device_pixel_ratio: f32,
         resource_cache: ResourceCache,
         notifier: Box<dyn RenderNotifier>,
@@ -870,6 +807,9 @@ impl RenderBackend {
             api_rx,
             result_tx,
             scene_tx,
+            low_priority_scene_tx,
+            backend_scene_tx,
+            scene_rx,
             default_device_pixel_ratio,
             resource_cache,
             gpu_cache: GpuCache::new(),
@@ -888,7 +828,6 @@ impl RenderBackend {
             capture_config: None,
             #[cfg(feature = "replay")]
             loaded_resource_sequence_id: 0,
-            tile_caches: FastHashMap::default(),
         }
     }
 
@@ -896,7 +835,7 @@ impl RenderBackend {
         IdNamespace(NEXT_NAMESPACE_ID.fetch_add(1, Ordering::Relaxed) as u32)
     }
 
-    pub fn run(&mut self) {
+    pub fn run(&mut self, mut profile_counters: BackendProfileCounters) {
         let mut frame_counter: u32 = 0;
         let mut status = RenderBackendStatus::Continue;
 
@@ -905,49 +844,109 @@ impl RenderBackend {
         }
 
         while let RenderBackendStatus::Continue = status {
-            status = match self.api_rx.recv() {
-                Ok(msg) => {
-                    self.process_api_msg(msg, &mut frame_counter)
-                }
-                Err(..) => { RenderBackendStatus::ShutDown(None) }
-            };
-        }
+            while let Ok(msg) = self.scene_rx.try_recv() {
+                profile_scope!("rb_msg");
 
-        if let RenderBackendStatus::StopRenderBackend = status {
-            while let Ok(msg) = self.api_rx.recv() {
                 match msg {
-                    ApiMsg::SceneBuilderResult(SceneBuilderResult::ExternalEvent(evt)) => {
-                        self.notifier.external_event(evt);
+                    SceneBuilderResult::Transactions(txns, result_tx) => {
+                        self.process_transaction(
+                            txns,
+                            result_tx,
+                            &mut frame_counter,
+                            &mut profile_counters,
+                        );
+                        self.bookkeep_after_frames();
+                    },
+                    #[cfg(feature = "capture")]
+                    SceneBuilderResult::CapturedTransactions(txns, capture_config, result_tx) => {
+                        if let Some(ref mut old_config) = self.capture_config {
+                            assert!(old_config.scene_id <= capture_config.scene_id);
+                            if old_config.scene_id < capture_config.scene_id {
+                                old_config.scene_id = capture_config.scene_id;
+                                old_config.frame_id = 0;
+                            }
+                        } else {
+                            self.capture_config = Some(capture_config);
+                        }
+
+                        let built_frame = self.process_transaction(
+                            txns,
+                            result_tx,
+                            &mut frame_counter,
+                            &mut profile_counters,
+                        );
+
+                        if built_frame {
+                            self.save_capture_sequence();
+                        }
+
+                        self.bookkeep_after_frames();
+                    },
+                    SceneBuilderResult::GetGlyphDimensions(request) => {
+                        let mut glyph_dimensions = Vec::with_capacity(request.glyph_indices.len());
+                        if let Some(base) = self.resource_cache.get_font_instance(request.key) {
+                            let font = FontInstance::from_base(Arc::clone(&base));
+                            for glyph_index in &request.glyph_indices {
+                                let glyph_dim = self.resource_cache.get_glyph_dimensions(&font, *glyph_index);
+                                glyph_dimensions.push(glyph_dim);
+                            }
+                        }
+                        request.sender.send(glyph_dimensions).unwrap();
+                    }
+                    SceneBuilderResult::GetGlyphIndices(request) => {
+                        let mut glyph_indices = Vec::with_capacity(request.text.len());
+                        for ch in request.text.chars() {
+                            let index = self.resource_cache.get_glyph_index(request.key, ch);
+                            glyph_indices.push(index);
+                        }
+                        request.sender.send(glyph_indices).unwrap();
                     }
-                    ApiMsg::SceneBuilderResult(SceneBuilderResult::FlushComplete(tx)) => {
-                        // If somebody's blocked waiting for a flush, how did they
-                        // trigger the RB thread to shut down? This shouldn't happen
-                        // but handle it gracefully anyway.
-                        debug_assert!(false);
+                    SceneBuilderResult::FlushComplete(tx) => {
                         tx.send(()).ok();
                     }
-                    ApiMsg::SceneBuilderResult(SceneBuilderResult::ShutDown(sender)) => {
-                        info!("Recycling stats: {:?}", self.recycler);
-                        status = RenderBackendStatus::ShutDown(sender);
-                        break;
-                   }
-                    _ => {},
+                    SceneBuilderResult::ExternalEvent(evt) => {
+                        self.notifier.external_event(evt);
+                    }
+                    SceneBuilderResult::ClearNamespace(id) => {
+                        self.resource_cache.clear_namespace(id);
+                        self.documents.retain(|doc_id, _doc| doc_id.namespace_id != id);
+                        if let Some(handler) = &mut self.blob_image_handler {
+                            handler.clear_namespace(id);
+                        }
+                    }
+                    SceneBuilderResult::Stopped => {
+                        panic!("We haven't sent a Stop yet, how did we get a Stopped back?");
+                    }
+                    SceneBuilderResult::DocumentsForDebugger(json) => {
+                        let msg = ResultMsg::DebugOutput(DebugOutput::FetchDocuments(json));
+                        self.result_tx.send(msg).unwrap();
+                        self.notifier.wake_up();
+                    }
                 }
             }
+
+            status = match self.api_rx.recv() {
+                Ok(msg) => {
+                    self.process_api_msg(msg, &mut profile_counters, &mut frame_counter)
+                }
+                Err(..) => { RenderBackendStatus::ShutDown(None) }
+            };
         }
 
+        let _ = self.low_priority_scene_tx.send(SceneBuilderRequest::Stop);
         // Ensure we read everything the scene builder is sending us from
         // inflight messages, otherwise the scene builder might panic.
-        while let Ok(msg) = self.api_rx.try_recv() {
+        while let Ok(msg) = self.scene_rx.recv() {
             match msg {
-                ApiMsg::SceneBuilderResult(SceneBuilderResult::FlushComplete(tx)) => {
+                SceneBuilderResult::FlushComplete(tx) => {
                     // If somebody's blocked waiting for a flush, how did they
                     // trigger the RB thread to shut down? This shouldn't happen
                     // but handle it gracefully anyway.
                     debug_assert!(false);
                     tx.send(()).ok();
                 }
-                _ => {},
+                SceneBuilderResult::Stopped => break,
+                _ => continue,
             }
         }
 
@@ -970,33 +969,41 @@ impl RenderBackend {
         mut txns: Vec<Box<BuiltTransaction>>,
         result_tx: Option<Sender<SceneSwapResult>>,
         frame_counter: &mut u32,
+        profile_counters: &mut BackendProfileCounters,
     ) -> bool {
         self.prepare_for_frames();
         self.maybe_force_nop_documents(
             frame_counter,
+            profile_counters,
             |document_id| txns.iter().any(|txn| txn.document_id == document_id));
 
         let mut built_frame = false;
         for mut txn in txns.drain(..) {
            let has_built_scene = txn.built_scene.is_some();
 
+           if let Some(timings) = txn.timings {
+               if has_built_scene {
+                   profile_counters.scene_changed = true;
+               }
+
+               profile_counters.txn.set(
+                   timings.builder_start_time_ns,
+                   timings.builder_end_time_ns,
+                   timings.send_time_ns,
+                   timings.scene_build_start_time_ns,
+                   timings.scene_build_end_time_ns,
+                   timings.display_list_len,
+               );
+            }
+
             if let Some(doc) = self.documents.get_mut(&txn.document_id) {
                 doc.removed_pipelines.append(&mut txn.removed_pipelines);
                 doc.view.scene = txn.view;
-                doc.profile.merge(&mut txn.profile);
-
-                doc.frame_stats = if let Some(stats) = &doc.frame_stats {
-                    Some(stats.merge(&txn.frame_stats))
-                } else {
-                    Some(txn.frame_stats)
-                };
 
                 if let Some(built_scene) = txn.built_scene.take() {
                     doc.new_async_scene_ready(
                         built_scene,
                         &mut self.recycler,
-                        &mut self.tile_caches,
-                        &mut self.resource_cache,
                     );
                 }
 
@@ -1010,7 +1017,7 @@ impl RenderBackend {
                             self.tile_cache_logger.serialize_updates(&updates);
                         }
                     }
-                    doc.data_stores.apply_updates(updates, &mut doc.profile);
+                    doc.data_stores.apply_updates(updates, profile_counters);
                 }
 
                 // Build the hit tester while the APZ lock is held so that its content
@@ -1020,7 +1027,7 @@ impl RenderBackend {
                 }
 
                 if let Some(ref tx) = result_tx {
-                    let (resume_tx, resume_rx) = single_msg_channel();
+                    let (resume_tx, resume_rx) = channel();
                     tx.send(SceneSwapResult::Complete(resume_tx)).unwrap();
                     // Block until the post-swap hook has completed on
                     // the scene builder thread. We need to do this before
@@ -1034,12 +1041,6 @@ impl RenderBackend {
                         .spatial_tree
                         .discard_frame_state_for_pipeline(*pipeline_id);
                 }
-
-                self.resource_cache.add_rasterized_blob_images(
-                    txn.rasterized_blobs.take(),
-                    &mut doc.profile,
-                );
-
             } else {
                 // The document was removed while we were building it, skip it.
                 // TODO: we might want to just ensure that removed documents are
@@ -1050,15 +1051,20 @@ impl RenderBackend {
                 continue;
             }
 
+            self.resource_cache.add_rasterized_blob_images(
+                txn.rasterized_blobs.take(),
+                &mut profile_counters.resources.texture_cache,
+            );
+
             built_frame |= self.update_document(
                 txn.document_id,
                 txn.resource_updates.take(),
                 txn.frame_ops.take(),
                 txn.notifications.take(),
                 txn.render_frame,
-                None,
                 txn.invalidate_rendered_frame,
                 frame_counter,
+                profile_counters,
                 has_built_scene,
             );
         }
@@ -1069,9 +1075,23 @@ impl RenderBackend {
     fn process_api_msg(
         &mut self,
         msg: ApiMsg,
+        profile_counters: &mut BackendProfileCounters,
         frame_counter: &mut u32,
     ) -> RenderBackendStatus {
         match msg {
+            ApiMsg::WakeUp => {}
+            ApiMsg::WakeSceneBuilder => {
+                self.scene_tx.send(SceneBuilderRequest::WakeUp).unwrap();
+            }
+            ApiMsg::FlushSceneBuilder(tx) => {
+                self.low_priority_scene_tx.send(SceneBuilderRequest::Flush(tx)).unwrap();
+            }
+            ApiMsg::GetGlyphDimensions(request) => {
+                self.scene_tx.send(SceneBuilderRequest::GetGlyphDimensions(request)).unwrap();
+            }
+            ApiMsg::GetGlyphIndices(request) => {
+                self.scene_tx.send(SceneBuilderRequest::GetGlyphIndices(request)).unwrap();
+            }
             ApiMsg::CloneApi(sender) => {
                 assert!(!self.namespace_alloc_by_client);
                 sender.send(self.next_namespace_id()).unwrap();
@@ -1080,14 +1100,32 @@ impl RenderBackend {
                 assert!(self.namespace_alloc_by_client);
                 debug_assert!(!self.documents.iter().any(|(did, _doc)| did.namespace_id == namespace_id));
             }
-            ApiMsg::AddDocument(document_id, initial_size) => {
+            ApiMsg::AddDocument(document_id, initial_size, layer) => {
                 let document = Document::new(
                     document_id,
                     initial_size,
+                    layer,
                     self.default_device_pixel_ratio,
                 );
                 let old = self.documents.insert(document_id, document);
                 debug_assert!(old.is_none());
+
+                self.scene_tx.send(
+                    SceneBuilderRequest::AddDocument(document_id, initial_size, layer)
+                ).unwrap();
+
+            }
+            ApiMsg::DeleteDocument(document_id) => {
+                self.documents.remove(&document_id);
+                self.low_priority_scene_tx.send(
+                    SceneBuilderRequest::DeleteDocument(document_id)
+                ).unwrap();
+            }
+            ApiMsg::ExternalEvent(evt) => {
+                self.low_priority_scene_tx.send(SceneBuilderRequest::ExternalEvent(evt)).unwrap();
+            }
+            ApiMsg::ClearNamespace(id) => {
+                self.low_priority_scene_tx.send(SceneBuilderRequest::ClearNamespace(id)).unwrap();
             }
             ApiMsg::MemoryPressure => {
                 // This is drastic. It will basically flush everything out of the cache,
@@ -1102,17 +1140,13 @@ impl RenderBackend {
 
                 self.gpu_cache.clear();
 
-                for (_, doc) in &mut self.documents {
-                    doc.scratch.memory_pressure();
-                }
-
                 let resource_updates = self.resource_cache.pending_updates();
                 let msg = ResultMsg::UpdateResources {
                     resource_updates,
                     memory_pressure: true,
                 };
                 self.result_tx.send(msg).unwrap();
-                self.notifier.wake_up(false);
+                self.notifier.wake_up();
             }
             ApiMsg::ReportMemory(tx) => {
                 self.report_memory(tx);
@@ -1135,9 +1169,19 @@ impl RenderBackend {
 
                         return RenderBackendStatus::Continue;
                     }
+                    DebugCommand::FetchDocuments => {
+                        // Ask SceneBuilderThread to send JSON presentation of the documents,
+                        // that will be forwarded to Renderer.
+                        self.send_backend_message(BackendSceneBuilderRequest::DocumentsForDebugger);
+                        return RenderBackendStatus::Continue;
+                    }
+                    DebugCommand::FetchClipScrollTree => {
+                        let json = self.get_spatial_tree_for_debugger();
+                        ResultMsg::DebugOutput(DebugOutput::FetchClipScrollTree(json))
+                    }
                     #[cfg(feature = "capture")]
                     DebugCommand::SaveCapture(root, bits) => {
-                        let output = self.save_capture(root, bits);
+                        let output = self.save_capture(root, bits, profile_counters);
                         ResultMsg::DebugOutput(output)
                     },
                     #[cfg(feature = "capture")]
@@ -1161,7 +1205,7 @@ impl RenderBackend {
                             config.frame_id = frame_id;
                         }
 
-                        self.load_capture(config);
+                        self.load_capture(config, profile_counters);
 
                         for (id, doc) in &self.documents {
                             let captured = CapturedDocument {
@@ -1213,19 +1257,19 @@ impl RenderBackend {
                         return RenderBackendStatus::Continue;
                     }
                     DebugCommand::SimulateLongSceneBuild(time_ms) => {
-                        let _ = self.scene_tx.send(SceneBuilderRequest::SimulateLongSceneBuild(time_ms));
+                        self.scene_tx.send(SceneBuilderRequest::SimulateLongSceneBuild(time_ms)).unwrap();
+                        return RenderBackendStatus::Continue;
+                    }
+                    DebugCommand::SimulateLongLowPrioritySceneBuild(time_ms) => {
+                        self.low_priority_scene_tx.send(
+                            SceneBuilderRequest::SimulateLongLowPrioritySceneBuild(time_ms)
+                        ).unwrap();
                         return RenderBackendStatus::Continue;
                     }
                     DebugCommand::SetFlags(flags) => {
                         self.resource_cache.set_debug_flags(flags);
                         self.gpu_cache.set_debug_flags(flags);
 
-                        let force_invalidation = flags.contains(DebugFlags::FORCE_PICTURE_INVALIDATION);
-                        if self.frame_config.force_invalidation != force_invalidation {
-                            self.frame_config.force_invalidation = force_invalidation;
-                            self.update_frame_builder_config();
-                        }
-
                         // If we're toggling on the GPU cache debug display, we
                         // need to blow away the cache. This is because we only
                         // send allocation/free notifications to the renderer
@@ -1246,108 +1290,19 @@ impl RenderBackend {
                     _ => ResultMsg::DebugCommand(option),
                 };
                 self.result_tx.send(msg).unwrap();
-                self.notifier.wake_up(true);
+                self.notifier.wake_up();
+            }
+            ApiMsg::ShutDown(sender) => {
+                info!("Recycling stats: {:?}", self.recycler);
+                return RenderBackendStatus::ShutDown(sender);
             }
             ApiMsg::UpdateDocuments(transaction_msgs) => {
                 self.prepare_transactions(
                     transaction_msgs,
                     frame_counter,
+                    profile_counters,
                 );
             }
-            ApiMsg::SceneBuilderResult(msg) => {
-                return self.process_scene_builder_result(msg, frame_counter);
-            }
-        }
-
-        RenderBackendStatus::Continue
-    }
-
-    fn process_scene_builder_result(
-        &mut self,
-        msg: SceneBuilderResult,
-        frame_counter: &mut u32,
-    ) -> RenderBackendStatus {
-        profile_scope!("sb_msg");
-
-        match msg {
-            SceneBuilderResult::Transactions(txns, result_tx) => {
-                self.process_transaction(
-                    txns,
-                    result_tx,
-                    frame_counter,
-                );
-                self.bookkeep_after_frames();
-            },
-            #[cfg(feature = "capture")]
-            SceneBuilderResult::CapturedTransactions(txns, capture_config, result_tx) => {
-                if let Some(ref mut old_config) = self.capture_config {
-                    assert!(old_config.scene_id <= capture_config.scene_id);
-                    if old_config.scene_id < capture_config.scene_id {
-                        old_config.scene_id = capture_config.scene_id;
-                        old_config.frame_id = 0;
-                    }
-                } else {
-                    self.capture_config = Some(capture_config);
-                }
-
-                let built_frame = self.process_transaction(
-                    txns,
-                    result_tx,
-                    frame_counter,
-                );
-
-                if built_frame {
-                    self.save_capture_sequence();
-                }
-
-                self.bookkeep_after_frames();
-            },
-            #[cfg(feature = "capture")]
-            SceneBuilderResult::StopCaptureSequence => {
-                self.capture_config = None;
-            }
-            SceneBuilderResult::GetGlyphDimensions(request) => {
-                let mut glyph_dimensions = Vec::with_capacity(request.glyph_indices.len());
-                if let Some(base) = self.resource_cache.get_font_instance(request.key) {
-                    let font = FontInstance::from_base(Arc::clone(&base));
-                    for glyph_index in &request.glyph_indices {
-                        let glyph_dim = self.resource_cache.get_glyph_dimensions(&font, *glyph_index);
-                        glyph_dimensions.push(glyph_dim);
-                    }
-                }
-                request.sender.send(glyph_dimensions).unwrap();
-            }
-            SceneBuilderResult::GetGlyphIndices(request) => {
-                let mut glyph_indices = Vec::with_capacity(request.text.len());
-                for ch in request.text.chars() {
-                    let index = self.resource_cache.get_glyph_index(request.key, ch);
-                    glyph_indices.push(index);
-                }
-                request.sender.send(glyph_indices).unwrap();
-            }
-            SceneBuilderResult::FlushComplete(tx) => {
-                tx.send(()).ok();
-            }
-            SceneBuilderResult::ExternalEvent(evt) => {
-                self.notifier.external_event(evt);
-            }
-            SceneBuilderResult::ClearNamespace(id) => {
-                self.resource_cache.clear_namespace(id);
-                self.documents.retain(|doc_id, _doc| doc_id.namespace_id != id);
-                if let Some(handler) = &mut self.blob_image_handler {
-                    handler.clear_namespace(id);
-                }
-            }
-            SceneBuilderResult::DeleteDocument(document_id) => {
-                self.documents.remove(&document_id);
-            }
-            SceneBuilderResult::StopRenderBackend => {
-                return RenderBackendStatus::StopRenderBackend;
-            }
-            SceneBuilderResult::ShutDown(sender) => {
-                info!("Recycling stats: {:?}", self.recycler);
-                return RenderBackendStatus::ShutDown(sender);
-            }
         }
 
         RenderBackendStatus::Continue
@@ -1355,7 +1310,7 @@ impl RenderBackend {
 
     fn update_frame_builder_config(&self) {
         self.send_backend_message(
-            SceneBuilderRequest::SetFrameBuilderConfig(
+            BackendSceneBuilderRequest::SetFrameBuilderConfig(
                 self.frame_config.clone()
             )
         );
@@ -1377,37 +1332,55 @@ impl RenderBackend {
         &mut self,
         txns: Vec<Box<TransactionMsg>>,
         frame_counter: &mut u32,
+        profile_counters: &mut BackendProfileCounters,
     ) {
-        self.prepare_for_frames();
-        self.maybe_force_nop_documents(
-            frame_counter,
-            |document_id| txns.iter().any(|txn| txn.document_id == document_id));
+        let mut use_scene_builder = txns.iter()
+            .any(|transaction_msg| transaction_msg.use_scene_builder_thread);
+        let use_high_priority = txns.iter()
+            .any(|transaction_msg| !transaction_msg.low_priority);
+
+        use_scene_builder = use_scene_builder || txns.iter().any(|txn| {
+            !txn.scene_ops.is_empty()
+                || !txn.blob_requests.is_empty()
+                || txn.blob_rasterizer.is_some()
+        });
 
-        let mut built_frame = false;
-        for mut txn in txns {
-            if txn.generate_frame.as_bool() {
-                txn.profile.end_time(profiler::API_SEND_TIME);
+        if !use_scene_builder {
+            self.prepare_for_frames();
+            self.maybe_force_nop_documents(
+                frame_counter,
+                profile_counters,
+                |document_id| txns.iter().any(|txn| txn.document_id == document_id));
+
+            let mut built_frame = false;
+            for mut txn in txns {
+                built_frame |= self.update_document(
+                    txn.document_id,
+                    txn.resource_updates.take(),
+                    txn.frame_ops.take(),
+                    txn.notifications.take(),
+                    txn.generate_frame,
+                    txn.invalidate_rendered_frame,
+                    frame_counter,
+                    profile_counters,
+                    false
+                );
+            }
+            if built_frame {
+                #[cfg(feature = "capture")]
+                self.save_capture_sequence();
             }
+            self.bookkeep_after_frames();
+            return;
+        }
 
-            self.documents.get_mut(&txn.document_id).unwrap().profile.merge(&mut txn.profile);
+        let tx = if use_high_priority {
+            &self.scene_tx
+        } else {
+            &self.low_priority_scene_tx
+        };
 
-            built_frame |= self.update_document(
-                txn.document_id,
-                txn.resource_updates.take(),
-                txn.frame_ops.take(),
-                txn.notifications.take(),
-                txn.generate_frame.as_bool(),
-                txn.generate_frame.id(),
-                txn.invalidate_rendered_frame,
-                frame_counter,
-                false
-            );
-        }
-        if built_frame {
-            #[cfg(feature = "capture")]
-            self.save_capture_sequence();
-        }
-        self.bookkeep_after_frames();
+        tx.send(SceneBuilderRequest::Transactions(txns)).unwrap();
     }
 
     /// In certain cases, resources shared by multiple documents have to run
@@ -1418,6 +1391,7 @@ impl RenderBackend {
     /// to force a frame build.
     fn maybe_force_nop_documents<F>(&mut self,
                                     frame_counter: &mut u32,
+                                    profile_counters: &mut BackendProfileCounters,
                                     document_already_present: F) where
         F: Fn(DocumentId) -> bool {
         if self.requires_frame_build() {
@@ -1434,9 +1408,9 @@ impl RenderBackend {
                     Vec::default(),
                     Vec::default(),
                     false,
-                    None,
                     false,
                     frame_counter,
+                    profile_counters,
                     false);
             }
             #[cfg(feature = "capture")]
@@ -1454,24 +1428,24 @@ impl RenderBackend {
         mut frame_ops: Vec<FrameMsg>,
         mut notifications: Vec<NotificationRequest>,
         mut render_frame: bool,
-        generated_frame_id: Option<u64>,
         invalidate_rendered_frame: bool,
         frame_counter: &mut u32,
+        profile_counters: &mut BackendProfileCounters,
         has_built_scene: bool,
     ) -> bool {
         let requested_frame = render_frame;
 
         let requires_frame_build = self.requires_frame_build();
         let doc = self.documents.get_mut(&document_id).unwrap();
-
         // If we have a sampler, get more frame ops from it and add them
         // to the transaction. This is a hook to allow the WR user code to
         // fiddle with things after a potentially long scene build, but just
         // before rendering. This is useful for rendering with the latest
         // async transforms.
-        if requested_frame {
+        if requested_frame || has_built_scene {
             if let Some(ref sampler) = self.sampler {
-                frame_ops.append(&mut sampler.sample(document_id, generated_frame_id));
+                frame_ops.append(&mut sampler.sample(document_id,
+                                                     &doc.scene.pipeline_epochs));
             }
         }
 
@@ -1481,6 +1455,7 @@ impl RenderBackend {
         // for something wrench specific and we should remove it.
         let mut scroll = false;
         for frame_msg in frame_ops {
+            let _timer = profile_counters.total_time.timer();
             let op = doc.process_frame_msg(frame_msg);
             scroll |= op.scroll;
         }
@@ -1493,7 +1468,7 @@ impl RenderBackend {
 
         self.resource_cache.post_scene_building_update(
             resource_updates,
-            &mut doc.profile,
+            &mut profile_counters.resources,
         );
 
         if doc.dynamic_properties.flush_pending_updates() {
@@ -1521,9 +1496,13 @@ impl RenderBackend {
         // external image with NativeTexture or when platform requested to composite frame.
         if invalidate_rendered_frame {
             doc.rendered_frame_is_valid = false;
-            if doc.scene.config.compositor_kind.should_redraw_on_invalidation() {
-                let msg = ResultMsg::ForceRedraw;
-                self.result_tx.send(msg).unwrap();
+            if let CompositorKind::Draw { max_partial_present_rects, .. } = doc.scene.config.compositor_kind {
+
+              // When partial present is enabled, we need to force redraw.
+              if max_partial_present_rects > 0 {
+                  let msg = ResultMsg::ForceRedraw;
+                  self.result_tx.send(msg).unwrap();
+              }
             }
         }
 
@@ -1535,17 +1514,15 @@ impl RenderBackend {
 
             // borrow ck hack for profile_counters
             let (pending_update, rendered_document) = {
+                let _timer = profile_counters.total_time.timer();
                 let frame_build_start_time = precise_time_ns();
 
-                let frame_stats = doc.frame_stats.take();
-
                 let rendered_document = doc.build_frame(
                     &mut self.resource_cache,
                     &mut self.gpu_cache,
+                    &mut profile_counters.resources,
                     self.debug_flags,
                     &mut self.tile_cache_logger,
-                    &mut self.tile_caches,
-                    frame_stats
                 );
 
                 debug!("generated frame for document {:?} with {} passes",
@@ -1605,8 +1582,10 @@ impl RenderBackend {
                 document_id,
                 rendered_document,
                 pending_update,
+                profile_counters.clone()
             );
             self.result_tx.send(msg).unwrap();
+            profile_counters.reset();
         } else if requested_frame {
             // WR-internal optimization to avoid doing a bunch of render work if
             // there's no pixels. We still want to pretend to render and request
@@ -1646,8 +1625,32 @@ impl RenderBackend {
         build_frame
     }
 
-    fn send_backend_message(&self, msg: SceneBuilderRequest) {
-        self.scene_tx.send(msg).unwrap();
+    fn send_backend_message(&self, msg: BackendSceneBuilderRequest) {
+        self.backend_scene_tx.send(msg).unwrap();
+        self.low_priority_scene_tx.send(SceneBuilderRequest::BackendMessage).unwrap();
+    }
+
+    #[cfg(not(feature = "debugger"))]
+    fn get_spatial_tree_for_debugger(&self) -> String {
+        String::new()
+    }
+
+    #[cfg(feature = "debugger")]
+    fn get_spatial_tree_for_debugger(&self) -> String {
+        use crate::print_tree::PrintableTree;
+
+        let mut debug_root = debug_server::SpatialTreeList::new();
+
+        for (_, doc) in &self.documents {
+            let debug_node = debug_server::TreeNode::new("document spatial tree");
+            let mut builder = debug_server::TreeNodeBuilder::new(debug_node);
+
+            doc.scene.spatial_tree.print_with(&mut builder);
+
+            debug_root.add(builder.build());
+        }
+
+        serde_json::to_string(&debug_root).unwrap()
     }
 
     fn report_memory(&mut self, tx: Sender<Box<MemoryReport>>) {
@@ -1666,15 +1669,12 @@ impl RenderBackend {
         }
 
         (*report) += self.resource_cache.report_memory(op);
-        report.texture_cache_structures = self.resource_cache
-            .texture_cache
-            .report_memory(ops);
 
         // Send a message to report memory on the scene-builder thread, which
         // will add its report to this one and send the result back to the original
         // thread waiting on the request.
         self.send_backend_message(
-            SceneBuilderRequest::ReportMemory(report, tx)
+            BackendSceneBuilderRequest::ReportMemory(report, tx)
         );
     }
 
@@ -1709,6 +1709,7 @@ impl RenderBackend {
         &mut self,
         root: PathBuf,
         bits: CaptureBits,
+        profile_counters: &mut BackendProfileCounters,
     ) -> DebugOutput {
         use std::fs;
         use crate::render_task_graph::dump_render_tasks_as_svg;
@@ -1731,10 +1732,9 @@ impl RenderBackend {
                 let rendered_document = doc.build_frame(
                     &mut self.resource_cache,
                     &mut self.gpu_cache,
+                    &mut profile_counters.resources,
                     self.debug_flags,
                     &mut self.tile_cache_logger,
-                    &mut self.tile_caches,
-                    None,
                 );
                 // After we rendered the frames, there are pending updates to both
                 // GPU cache and resources. Instead of serializing them, we are going to make sure
@@ -1753,34 +1753,15 @@ impl RenderBackend {
                 let file_name = format!("built-clips-{}-{}", id.namespace_id.0, id.id);
                 config.serialize_for_frame(&doc.scene.clip_store, file_name);
                 let file_name = format!("scratch-{}-{}", id.namespace_id.0, id.id);
-                config.serialize_for_frame(&doc.scratch.primitive, file_name);
+                config.serialize_for_frame(&doc.scratch, file_name);
                 let file_name = format!("render-tasks-{}-{}.svg", id.namespace_id.0, id.id);
-                let mut render_tasks_file = fs::File::create(&config.file_path_for_frame(file_name, "svg"))
+                let mut svg_file = fs::File::create(&config.file_path_for_frame(file_name, "svg"))
                     .expect("Failed to open the SVG file.");
                 dump_render_tasks_as_svg(
                     &rendered_document.frame.render_tasks,
-                    &mut render_tasks_file
+                    &rendered_document.frame.passes,
+                    &mut svg_file
                 ).unwrap();
-
-                let file_name = format!("texture-cache-color-linear-{}-{}.svg", id.namespace_id.0, id.id);
-                let mut texture_file = fs::File::create(&config.file_path_for_frame(file_name, "svg"))
-                    .expect("Failed to open the SVG file.");
-                self.resource_cache.texture_cache.dump_color8_linear_as_svg(&mut texture_file).unwrap();
-
-                let file_name = format!("texture-cache-color8-glyphs-{}-{}.svg", id.namespace_id.0, id.id);
-                let mut texture_file = fs::File::create(&config.file_path_for_frame(file_name, "svg"))
-                    .expect("Failed to open the SVG file.");
-                self.resource_cache.texture_cache.dump_color8_glyphs_as_svg(&mut texture_file).unwrap();
-
-                let file_name = format!("texture-cache-alpha8-glyphs-{}-{}.svg", id.namespace_id.0, id.id);
-                let mut texture_file = fs::File::create(&config.file_path_for_frame(file_name, "svg"))
-                    .expect("Failed to open the SVG file.");
-                self.resource_cache.texture_cache.dump_alpha8_glyphs_as_svg(&mut texture_file).unwrap();
-
-                let file_name = format!("texture-cache-alpha8-linear-{}-{}.svg", id.namespace_id.0, id.id);
-                let mut texture_file = fs::File::create(&config.file_path_for_frame(file_name, "svg"))
-                    .expect("Failed to open the SVG file.");
-                self.resource_cache.texture_cache.dump_alpha8_linear_as_svg(&mut texture_file).unwrap();
             }
 
             let data_stores_name = format!("data-stores-{}-{}", id.namespace_id.0, id.id);
@@ -1800,7 +1781,7 @@ impl RenderBackend {
 
         debug!("\tscene builder");
         self.send_backend_message(
-            SceneBuilderRequest::SaveScene(config.clone())
+            BackendSceneBuilderRequest::SaveScene(config.clone())
         );
 
         debug!("\tresource cache");
@@ -1849,7 +1830,7 @@ impl RenderBackend {
         bits: CaptureBits,
     ) {
         self.send_backend_message(
-            SceneBuilderRequest::StartCaptureSequence(CaptureConfig::new(root, bits))
+            BackendSceneBuilderRequest::StartCaptureSequence(CaptureConfig::new(root, bits))
         );
     }
 
@@ -1858,7 +1839,7 @@ impl RenderBackend {
         &mut self,
     ) {
         self.send_backend_message(
-            SceneBuilderRequest::StopCaptureSequence
+            BackendSceneBuilderRequest::StopCaptureSequence
         );
     }
 
@@ -1866,6 +1847,7 @@ impl RenderBackend {
     fn load_capture(
         &mut self,
         mut config: CaptureConfig,
+        profile_counters: &mut BackendProfileCounters,
     ) {
         debug!("capture: loading {:?}", config.frame_root());
         let backend = config.deserialize_for_frame::<PlainRenderBackend, _>("backend")
@@ -1971,13 +1953,10 @@ impl RenderBackend {
                         rendered_frame_is_valid: false,
                         has_built_scene: false,
                         data_stores,
-                        scratch: ScratchBuffer::default(),
+                        scratch: PrimitiveScratchBuffer::new(),
+                        render_task_counters: RenderTaskGraphCounters::new(),
                         loaded_scene: scene.clone(),
                         prev_composite_descriptor: CompositeDescriptor::empty(),
-                        dirty_rects_are_valid: false,
-                        profile: TransactionProfile::new(),
-                        rg_builder: RenderTaskGraphBuilder::new(),
-                        frame_stats: None,
                     };
                     entry.insert(doc);
                 }
@@ -1994,10 +1973,12 @@ impl RenderBackend {
 
                     let msg_publish = ResultMsg::PublishDocument(
                         id,
-                        RenderedDocument { frame, is_new_scene: true, profile: TransactionProfile::new(), frame_stats: None },
+                        RenderedDocument { frame, is_new_scene: true },
                         self.resource_cache.pending_updates(),
+                        profile_counters.clone(),
                     );
                     self.result_tx.send(msg_publish).unwrap();
+                    profile_counters.reset();
 
                     self.notifier.new_frame_ready(id, false, true, None);
 
@@ -2021,7 +2002,7 @@ impl RenderBackend {
 
         if !scenes_to_build.is_empty() {
             self.send_backend_message(
-                SceneBuilderRequest::LoadScenes(scenes_to_build)
+                BackendSceneBuilderRequest::LoadScenes(scenes_to_build)
             );
         }
     }
diff --git a/third_party/webrender/webrender/src/render_target.rs b/third_party/webrender/webrender/src/render_target.rs
index 301ce4ec4d8..9c62297c929 100644
--- a/third_party/webrender/webrender/src/render_target.rs
+++ b/third_party/webrender/webrender/src/render_target.rs
@@ -4,32 +4,42 @@
 
 
 use api::units::*;
-use api::{ColorF, ImageFormat, LineOrientation, BorderStyle};
-use crate::batch::{AlphaBatchBuilder, AlphaBatchContainer, BatchTextures};
+use api::{ColorF, PremultipliedColorF, ImageFormat, LineOrientation, BorderStyle, PipelineId};
+use crate::batch::{AlphaBatchBuilder, AlphaBatchContainer, BatchTextures, resolve_image};
 use crate::batch::{ClipBatcher, BatchBuilder};
 use crate::spatial_tree::{SpatialTree, ROOT_SPATIAL_NODE_INDEX};
 use crate::clip::ClipStore;
 use crate::composite::CompositeState;
-use crate::frame_builder::FrameGlobalResources;
+use crate::device::Texture;
+use crate::frame_builder::{FrameGlobalResources};
 use crate::gpu_cache::{GpuCache, GpuCacheAddress};
 use crate::gpu_types::{BorderInstance, SvgFilterInstance, BlurDirection, BlurInstance, PrimitiveHeaders, ScalingInstance};
 use crate::gpu_types::{TransformPalette, ZBufferIdGenerator};
-use crate::internal_types::{FastHashMap, TextureSource, CacheTextureId};
-use crate::picture::{SliceId, SurfaceInfo, ResolvedSurfaceTexture, TileCacheInstance};
-use crate::prim_store::{PrimitiveStore, DeferredResolve, PrimitiveScratchBuffer};
-use crate::prim_store::gradient::{
-    FastLinearGradientInstance, LinearGradientInstance, RadialGradientInstance,
-    ConicGradientInstance,
-};
+use crate::internal_types::{FastHashMap, TextureSource, LayerIndex, Swizzle, SavedTargetIndex};
+use crate::picture::{SurfaceInfo, ResolvedSurfaceTexture};
+use crate::prim_store::{PrimitiveStore, DeferredResolve, PrimitiveScratchBuffer, PrimitiveVisibilityMask};
+use crate::prim_store::gradient::GRADIENT_FP_STOPS;
 use crate::render_backend::DataStores;
-use crate::render_task::{RenderTaskKind, RenderTaskAddress};
+use crate::render_task::{RenderTaskKind, RenderTaskAddress, ClearMode, BlitSource};
 use crate::render_task::{RenderTask, ScalingTask, SvgFilterInfo};
 use crate::render_task_graph::{RenderTaskGraph, RenderTaskId};
 use crate::resource_cache::ResourceCache;
+use crate::texture_allocator::{ArrayAllocationTracker, FreeRectSlice};
+use std::{cmp, mem};
+
 
 const STYLE_SOLID: i32 = ((BorderStyle::Solid as i32) << 8) | ((BorderStyle::Solid as i32) << 16);
 const STYLE_MASK: i32 = 0x00FF_FF00;
 
+/// According to apitrace, textures larger than 2048 break fast clear
+/// optimizations on some intel drivers. We sometimes need to go larger, but
+/// we try to avoid it. This can go away when proper tiling support lands,
+/// since we can then split large primitives across multiple textures.
+const IDEAL_MAX_TEXTURE_DIMENSION: i32 = 2048;
+/// If we ever need a larger texture than the ideal, we better round it up to a
+/// reasonable number in order to have a bit of leeway in placing things inside.
+const TEXTURE_DIMENSION_MASK: i32 = 0xFF;
+
 /// A tag used to identify the output format of a `RenderTarget`.
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
@@ -59,7 +69,6 @@ pub struct RenderTargetContext<'a, 'rc> {
     pub scratch: &'a PrimitiveScratchBuffer,
     pub screen_world_rect: WorldRect,
     pub globals: &'a FrameGlobalResources,
-    pub tile_caches: &'a FastHashMap<SliceId, Box<TileCacheInstance>>,
 }
 
 /// Represents a number of rendering operations on a surface.
@@ -78,10 +87,8 @@ pub struct RenderTargetContext<'a, 'rc> {
 pub trait RenderTarget {
     /// Creates a new RenderTarget of the given type.
     fn new(
-        texture_id: CacheTextureId,
         screen_size: DeviceIntSize,
         gpu_supports_fast_clears: bool,
-        used_rect: DeviceIntRect,
     ) -> Self;
 
     /// Optional hook to provide additional processing for the target at the
@@ -90,7 +97,7 @@ pub trait RenderTarget {
         &mut self,
         _ctx: &mut RenderTargetContext,
         _gpu_cache: &mut GpuCache,
-        _render_tasks: &RenderTaskGraph,
+        _render_tasks: &mut RenderTaskGraph,
         _deferred_resolves: &mut Vec<DeferredResolve>,
         _prim_headers: &mut PrimitiveHeaders,
         _transforms: &mut TransformPalette,
@@ -116,10 +123,13 @@ pub trait RenderTarget {
         render_tasks: &RenderTaskGraph,
         clip_store: &ClipStore,
         transforms: &mut TransformPalette,
+        deferred_resolves: &mut Vec<DeferredResolve>,
     );
 
     fn needs_depth(&self) -> bool;
-    fn texture_id(&self) -> CacheTextureId;
+
+    fn used_rect(&self) -> DeviceIntRect;
+    fn add_used(&mut self, rect: DeviceIntRect);
 }
 
 /// A series of `RenderTarget` instances, serving as the high-level container
@@ -146,21 +156,40 @@ pub trait RenderTarget {
 /// previous pass it depends on.
 ///
 /// Note that in some cases (like drop-shadows), we can depend on the output of
-/// a pass earlier than the immediately-preceding pass.
+/// a pass earlier than the immediately-preceding pass. See `SavedTargetIndex`.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct RenderTargetList<T> {
+    screen_size: DeviceIntSize,
     pub format: ImageFormat,
+    /// The maximum width and height of any single primitive we've encountered
+    /// that will be drawn to a dynamic location.
+    ///
+    /// We initially create our per-slice allocators with a width and height of
+    /// IDEAL_MAX_TEXTURE_DIMENSION. If we encounter a larger primitive, the
+    /// allocation will fail, but we'll bump max_dynamic_size, which will cause the
+    /// allocator for the next slice to be just large enough to accomodate it.
+    pub max_dynamic_size: DeviceIntSize,
     pub targets: Vec<T>,
+    pub saved_index: Option<SavedTargetIndex>,
+    pub alloc_tracker: ArrayAllocationTracker,
+    gpu_supports_fast_clears: bool,
 }
 
 impl<T: RenderTarget> RenderTargetList<T> {
     pub fn new(
+        screen_size: DeviceIntSize,
         format: ImageFormat,
+        gpu_supports_fast_clears: bool,
     ) -> Self {
         RenderTargetList {
+            screen_size,
             format,
+            max_dynamic_size: DeviceIntSize::new(0, 0),
             targets: Vec::new(),
+            saved_index: None,
+            alloc_tracker: ArrayAllocationTracker::new(),
+            gpu_supports_fast_clears,
         }
     }
 
@@ -168,16 +197,16 @@ impl<T: RenderTarget> RenderTargetList<T> {
         &mut self,
         ctx: &mut RenderTargetContext,
         gpu_cache: &mut GpuCache,
-        render_tasks: &RenderTaskGraph,
+        render_tasks: &mut RenderTaskGraph,
         deferred_resolves: &mut Vec<DeferredResolve>,
+        saved_index: Option<SavedTargetIndex>,
         prim_headers: &mut PrimitiveHeaders,
         transforms: &mut TransformPalette,
         z_generator: &mut ZBufferIdGenerator,
         composite_state: &mut CompositeState,
     ) {
-        if self.targets.is_empty() {
-            return;
-        }
+        debug_assert_eq!(None, self.saved_index);
+        self.saved_index = saved_index;
 
         for target in &mut self.targets {
             target.build(
@@ -193,9 +222,63 @@ impl<T: RenderTarget> RenderTargetList<T> {
         }
     }
 
+    pub fn allocate(
+        &mut self,
+        alloc_size: DeviceIntSize,
+    ) -> (RenderTargetIndex, DeviceIntPoint) {
+        let (free_rect_slice, origin) = match self.alloc_tracker.allocate(&alloc_size) {
+            Some(allocation) => allocation,
+            None => {
+                // Have the allocator restrict slice sizes to our max ideal
+                // dimensions, unless we've already gone bigger on a previous
+                // slice.
+                let rounded_dimensions = DeviceIntSize::new(
+                    (self.max_dynamic_size.width + TEXTURE_DIMENSION_MASK) & !TEXTURE_DIMENSION_MASK,
+                    (self.max_dynamic_size.height + TEXTURE_DIMENSION_MASK) & !TEXTURE_DIMENSION_MASK,
+                );
+                let allocator_dimensions = DeviceIntSize::new(
+                    cmp::max(IDEAL_MAX_TEXTURE_DIMENSION, rounded_dimensions.width),
+                    cmp::max(IDEAL_MAX_TEXTURE_DIMENSION, rounded_dimensions.height),
+                );
+
+                assert!(alloc_size.width <= allocator_dimensions.width &&
+                    alloc_size.height <= allocator_dimensions.height);
+                let slice = FreeRectSlice(self.targets.len() as u32);
+                self.targets.push(T::new(self.screen_size, self.gpu_supports_fast_clears));
+
+                self.alloc_tracker.extend(
+                    slice,
+                    allocator_dimensions,
+                    alloc_size,
+                );
+
+                (slice, DeviceIntPoint::zero())
+            }
+        };
+
+        if alloc_size.is_empty() && self.targets.is_empty() {
+            // push an unused target here, only if we don't have any
+            self.targets.push(T::new(self.screen_size, self.gpu_supports_fast_clears));
+        }
+
+        self.targets[free_rect_slice.0 as usize]
+            .add_used(DeviceIntRect::new(origin, alloc_size));
+
+        (RenderTargetIndex(free_rect_slice.0 as usize), origin)
+    }
+
     pub fn needs_depth(&self) -> bool {
         self.targets.iter().any(|target| target.needs_depth())
     }
+
+    pub fn check_ready(&self, t: &Texture) {
+        let dimensions = t.get_dimensions();
+        assert!(dimensions.width >= self.max_dynamic_size.width);
+        assert!(dimensions.height >= self.max_dynamic_size.height);
+        assert_eq!(t.get_format(), self.format);
+        assert_eq!(t.get_layer_count() as usize, self.targets.len());
+        assert!(t.supports_depth() >= self.needs_depth());
+    }
 }
 
 
@@ -208,14 +291,16 @@ impl<T: RenderTarget> RenderTargetList<T> {
 pub struct ColorRenderTarget {
     pub alpha_batch_containers: Vec<AlphaBatchContainer>,
     // List of blur operations to apply for this render target.
-    pub vertical_blurs: FastHashMap<TextureSource, Vec<BlurInstance>>,
-    pub horizontal_blurs: FastHashMap<TextureSource, Vec<BlurInstance>>,
+    pub vertical_blurs: Vec<BlurInstance>,
+    pub horizontal_blurs: Vec<BlurInstance>,
+    pub readbacks: Vec<DeviceIntRect>,
     pub scalings: FastHashMap<TextureSource, Vec<ScalingInstance>>,
     pub svg_filters: Vec<(BatchTextures, Vec<SvgFilterInstance>)>,
     pub blits: Vec<BlitJob>,
+    // List of frame buffer outputs for this render target.
+    pub outputs: Vec<FrameOutput>,
     alpha_tasks: Vec<RenderTaskId>,
     screen_size: DeviceIntSize,
-    pub texture_id: CacheTextureId,
     // Track the used rect of the render target, so that
     // we can set a scissor rect and only clear to the
     // used portion of the target as an optimization.
@@ -224,22 +309,21 @@ pub struct ColorRenderTarget {
 
 impl RenderTarget for ColorRenderTarget {
     fn new(
-        texture_id: CacheTextureId,
         screen_size: DeviceIntSize,
         _: bool,
-        used_rect: DeviceIntRect,
     ) -> Self {
         ColorRenderTarget {
             alpha_batch_containers: Vec::new(),
-            vertical_blurs: FastHashMap::default(),
-            horizontal_blurs: FastHashMap::default(),
+            vertical_blurs: Vec::new(),
+            horizontal_blurs: Vec::new(),
+            readbacks: Vec::new(),
             scalings: FastHashMap::default(),
             svg_filters: Vec::new(),
             blits: Vec::new(),
+            outputs: Vec::new(),
             alpha_tasks: Vec::new(),
             screen_size,
-            texture_id,
-            used_rect,
+            used_rect: DeviceIntRect::zero(),
         }
     }
 
@@ -247,7 +331,7 @@ impl RenderTarget for ColorRenderTarget {
         &mut self,
         ctx: &mut RenderTargetContext,
         gpu_cache: &mut GpuCache,
-        render_tasks: &RenderTaskGraph,
+        render_tasks: &mut RenderTaskGraph,
         deferred_resolves: &mut Vec<DeferredResolve>,
         prim_headers: &mut PrimitiveHeaders,
         transforms: &mut TransformPalette,
@@ -261,6 +345,15 @@ impl RenderTarget for ColorRenderTarget {
             profile_scope!("alpha_task");
             let task = &render_tasks[*task_id];
 
+            match task.clear_mode {
+                ClearMode::One |
+                ClearMode::Zero => {
+                    panic!("bug: invalid clear mode for color task");
+                }
+                ClearMode::DontCare |
+                ClearMode::Transparent => {}
+            }
+
             match task.kind {
                 RenderTaskKind::Picture(ref pic_task) => {
                     let pic = &ctx.prim_store.pictures[pic_task.pic_index.0];
@@ -276,7 +369,7 @@ impl RenderTarget for ColorRenderTarget {
                         }
                     };
 
-                    let target_rect = task.get_target_rect();
+                    let (target_rect, _) = task.get_target_rect();
 
                     let scissor_rect = if pic_task.can_merge {
                         None
@@ -284,18 +377,6 @@ impl RenderTarget for ColorRenderTarget {
                         Some(target_rect)
                     };
 
-                    // Typical workloads have a single or a few batch builders with a
-                    // large number of batches (regular pictres) and a higher number
-                    // of batch builders with only a single or two batches (for example
-                    // rendering isolated primitives to compute their shadows).
-                    // We can easily guess which category we are in for each picture
-                    // by checking whether it has multiple clusters.
-                    let prealloc_batch_count = if pic.prim_list.clusters.len() > 1 {
-                        128
-                    } else {
-                        0
-                    };
-
                     // TODO(gw): The type names of AlphaBatchBuilder and BatchBuilder
                     //           are still confusing. Once more of the picture caching
                     //           improvement code lands, the AlphaBatchBuilder and
@@ -306,9 +387,8 @@ impl RenderTarget for ColorRenderTarget {
                         ctx.break_advanced_blend_batches,
                         ctx.batch_lookback_count,
                         *task_id,
-                        (*task_id).into(),
-                        None,
-                        prealloc_batch_count,
+                        render_tasks.get_task_address(*task_id),
+                        PrimitiveVisibilityMask::all(),
                     );
 
                     let mut batch_builder = BatchBuilder::new(
@@ -351,18 +431,15 @@ impl RenderTarget for ColorRenderTarget {
         }
     }
 
-    fn texture_id(&self) -> CacheTextureId {
-        self.texture_id
-    }
-
     fn add_task(
         &mut self,
         task_id: RenderTaskId,
-        _ctx: &RenderTargetContext,
+        ctx: &RenderTargetContext,
         gpu_cache: &mut GpuCache,
         render_tasks: &RenderTaskGraph,
         _: &ClipStore,
         _: &mut TransformPalette,
+        deferred_resolves: &mut Vec<DeferredResolve>,
     ) {
         profile_scope!("add_task");
         let task = &render_tasks[task_id];
@@ -372,22 +449,30 @@ impl RenderTarget for ColorRenderTarget {
                 add_blur_instances(
                     &mut self.vertical_blurs,
                     BlurDirection::Vertical,
-                    task_id.into(),
-                    task.children[0],
-                    render_tasks,
+                    render_tasks.get_task_address(task_id),
+                    render_tasks.get_task_address(task.children[0]),
                 );
             }
             RenderTaskKind::HorizontalBlur(..) => {
                 add_blur_instances(
                     &mut self.horizontal_blurs,
                     BlurDirection::Horizontal,
-                    task_id.into(),
-                    task.children[0],
-                    render_tasks,
+                    render_tasks.get_task_address(task_id),
+                    render_tasks.get_task_address(task.children[0]),
                 );
             }
-            RenderTaskKind::Picture(..) => {
+            RenderTaskKind::Picture(ref task_info) => {
+                let pic = &ctx.prim_store.pictures[task_info.pic_index.0];
                 self.alpha_tasks.push(task_id);
+
+                // If this pipeline is registered as a frame output
+                // store the information necessary to do the copy.
+                if let Some(pipeline_id) = pic.frame_output_pipeline_id {
+                    self.outputs.push(FrameOutput {
+                        pipeline_id,
+                        task_id,
+                    });
+                }
             }
             RenderTaskKind::SvgFilter(ref task_info) => {
                 add_svg_filter_instances(
@@ -400,32 +485,69 @@ impl RenderTarget for ColorRenderTarget {
                     task_info.extra_gpu_cache_handle.map(|handle| gpu_cache.get_address(&handle)),
                 )
             }
-            RenderTaskKind::Image(..) |
-            RenderTaskKind::Cached(..) |
             RenderTaskKind::ClipRegion(..) |
             RenderTaskKind::Border(..) |
             RenderTaskKind::CacheMask(..) |
-            RenderTaskKind::FastLinearGradient(..) |
-            RenderTaskKind::LinearGradient(..) |
-            RenderTaskKind::RadialGradient(..) |
-            RenderTaskKind::ConicGradient(..) |
+            RenderTaskKind::Gradient(..) |
             RenderTaskKind::LineDecoration(..) => {
                 panic!("Should not be added to color target!");
             }
-            RenderTaskKind::Readback(..) => {}
+            RenderTaskKind::Readback(device_rect) => {
+                self.readbacks.push(device_rect);
+            }
             RenderTaskKind::Scaling(ref info) => {
                 add_scaling_instances(
                     info,
                     &mut self.scalings,
                     task,
                     task.children.first().map(|&child| &render_tasks[child]),
+                    ctx.resource_cache,
+                    gpu_cache,
+                    deferred_resolves,
                 );
             }
             RenderTaskKind::Blit(ref task_info) => {
+                let source = match task_info.source {
+                    BlitSource::Image { key } => {
+                        // Get the cache item for the source texture.
+                        let cache_item = resolve_image(
+                            key.request,
+                            ctx.resource_cache,
+                            gpu_cache,
+                            deferred_resolves,
+                        );
+
+                        // Work out a source rect to copy from the texture, depending on whether
+                        // a sub-rect is present or not.
+                        let source_rect = key.texel_rect.map_or(cache_item.uv_rect.to_i32(), |sub_rect| {
+                            DeviceIntRect::new(
+                                DeviceIntPoint::new(
+                                    cache_item.uv_rect.origin.x as i32 + sub_rect.origin.x,
+                                    cache_item.uv_rect.origin.y as i32 + sub_rect.origin.y,
+                                ),
+                                sub_rect.size,
+                            )
+                        });
+
+                        // Store the blit job for the renderer to execute, including
+                        // the allocated destination rect within this target.
+                        BlitJobSource::Texture(
+                            cache_item.texture_id,
+                            cache_item.texture_layer,
+                            source_rect,
+                        )
+                    }
+                    BlitSource::RenderTask { task_id } => {
+                        BlitJobSource::RenderTask(task_id)
+                    }
+                };
+
                 let target_rect = task
-                    .get_target_rect();
+                    .get_target_rect()
+                    .0
+                    .inner_rect(task_info.padding);
                 self.blits.push(BlitJob {
-                    source: task_info.source,
+                    source,
                     target_rect,
                 });
             }
@@ -439,6 +561,14 @@ impl RenderTarget for ColorRenderTarget {
             !ab.opaque_batches.is_empty()
         })
     }
+
+    fn used_rect(&self) -> DeviceIntRect {
+        self.used_rect
+    }
+
+    fn add_used(&mut self, rect: DeviceIntRect) {
+        self.used_rect = self.used_rect.union(&rect);
+    }
 }
 
 /// Contains the work (in the form of instance arrays) needed to fill an alpha
@@ -450,36 +580,33 @@ impl RenderTarget for ColorRenderTarget {
 pub struct AlphaRenderTarget {
     pub clip_batcher: ClipBatcher,
     // List of blur operations to apply for this render target.
-    pub vertical_blurs: FastHashMap<TextureSource, Vec<BlurInstance>>,
-    pub horizontal_blurs: FastHashMap<TextureSource, Vec<BlurInstance>>,
+    pub vertical_blurs: Vec<BlurInstance>,
+    pub horizontal_blurs: Vec<BlurInstance>,
     pub scalings: FastHashMap<TextureSource, Vec<ScalingInstance>>,
     pub zero_clears: Vec<RenderTaskId>,
     pub one_clears: Vec<RenderTaskId>,
-    pub texture_id: CacheTextureId,
+    // Track the used rect of the render target, so that
+    // we can set a scissor rect and only clear to the
+    // used portion of the target as an optimization.
+    pub used_rect: DeviceIntRect,
 }
 
 impl RenderTarget for AlphaRenderTarget {
     fn new(
-        texture_id: CacheTextureId,
         _: DeviceIntSize,
         gpu_supports_fast_clears: bool,
-        _: DeviceIntRect,
     ) -> Self {
         AlphaRenderTarget {
             clip_batcher: ClipBatcher::new(gpu_supports_fast_clears),
-            vertical_blurs: FastHashMap::default(),
-            horizontal_blurs: FastHashMap::default(),
+            vertical_blurs: Vec::new(),
+            horizontal_blurs: Vec::new(),
             scalings: FastHashMap::default(),
             zero_clears: Vec::new(),
             one_clears: Vec::new(),
-            texture_id,
+            used_rect: DeviceIntRect::zero(),
         }
     }
 
-    fn texture_id(&self) -> CacheTextureId {
-        self.texture_id
-    }
-
     fn add_task(
         &mut self,
         task_id: RenderTaskId,
@@ -488,51 +615,55 @@ impl RenderTarget for AlphaRenderTarget {
         render_tasks: &RenderTaskGraph,
         clip_store: &ClipStore,
         transforms: &mut TransformPalette,
+        deferred_resolves: &mut Vec<DeferredResolve>,
     ) {
         profile_scope!("add_task");
         let task = &render_tasks[task_id];
-        let target_rect = task.get_target_rect();
+        let (target_rect, _) = task.get_target_rect();
+
+        match task.clear_mode {
+            ClearMode::Zero => {
+                self.zero_clears.push(task_id);
+            }
+            ClearMode::One => {
+                self.one_clears.push(task_id);
+            }
+            ClearMode::DontCare => {}
+            ClearMode::Transparent => {
+                panic!("bug: invalid clear mode for alpha task");
+            }
+        }
 
         match task.kind {
-            RenderTaskKind::Image(..) |
-            RenderTaskKind::Cached(..) |
             RenderTaskKind::Readback(..) |
             RenderTaskKind::Picture(..) |
             RenderTaskKind::Blit(..) |
             RenderTaskKind::Border(..) |
             RenderTaskKind::LineDecoration(..) |
-            RenderTaskKind::FastLinearGradient(..) |
-            RenderTaskKind::LinearGradient(..) |
-            RenderTaskKind::RadialGradient(..) |
-            RenderTaskKind::ConicGradient(..) |
+            RenderTaskKind::Gradient(..) |
             RenderTaskKind::SvgFilter(..) => {
                 panic!("BUG: should not be added to alpha target!");
             }
             RenderTaskKind::VerticalBlur(..) => {
-                self.zero_clears.push(task_id);
                 add_blur_instances(
                     &mut self.vertical_blurs,
                     BlurDirection::Vertical,
-                    task_id.into(),
-                    task.children[0],
-                    render_tasks,
+                    render_tasks.get_task_address(task_id),
+                    render_tasks.get_task_address(task.children[0]),
                 );
             }
             RenderTaskKind::HorizontalBlur(..) => {
-                self.zero_clears.push(task_id);
                 add_blur_instances(
                     &mut self.horizontal_blurs,
                     BlurDirection::Horizontal,
-                    task_id.into(),
-                    task.children[0],
-                    render_tasks,
+                    render_tasks.get_task_address(task_id),
+                    render_tasks.get_task_address(task.children[0]),
                 );
             }
             RenderTaskKind::CacheMask(ref task_info) => {
-                let clear_to_one = self.clip_batcher.add(
+                self.clip_batcher.add(
                     task_info.clip_node_range,
                     task_info.root_spatial_node_index,
-                    render_tasks,
                     ctx.resource_cache,
                     gpu_cache,
                     clip_store,
@@ -542,26 +673,19 @@ impl RenderTarget for AlphaRenderTarget {
                     task_info.actual_rect,
                     &ctx.screen_world_rect,
                     task_info.device_pixel_scale,
-                    ctx.global_device_pixel_scale,
                     target_rect.origin.to_f32(),
-                    task_info.actual_rect.origin,
+                    task_info.actual_rect.origin.to_f32(),
                 );
-                if task_info.clear_to_one || clear_to_one {
-                    self.one_clears.push(task_id);
-                }
             }
             RenderTaskKind::ClipRegion(ref region_task) => {
-                if region_task.clear_to_one {
-                    self.one_clears.push(task_id);
-                }
                 let device_rect = DeviceRect::new(
                     DevicePoint::zero(),
                     target_rect.size.to_f32(),
                 );
                 self.clip_batcher.add_clip_region(
+                    region_task.clip_data_address,
                     region_task.local_pos,
                     device_rect,
-                    region_task.clip_data.clone(),
                     target_rect.origin.to_f32(),
                     DevicePoint::zero(),
                     region_task.device_pixel_scale.0,
@@ -573,6 +697,9 @@ impl RenderTarget for AlphaRenderTarget {
                     &mut self.scalings,
                     task,
                     task.children.first().map(|&child| &render_tasks[child]),
+                    ctx.resource_cache,
+                    gpu_cache,
+                    deferred_resolves,
                 );
             }
             #[cfg(test)]
@@ -583,6 +710,14 @@ impl RenderTarget for AlphaRenderTarget {
     fn needs_depth(&self) -> bool {
         false
     }
+
+    fn used_rect(&self) -> DeviceIntRect {
+        self.used_rect
+    }
+
+    fn add_used(&mut self, rect: DeviceIntRect) {
+        self.used_rect = self.used_rect.union(&rect);
+    }
 }
 
 #[cfg_attr(feature = "capture", derive(Serialize))]
@@ -599,53 +734,49 @@ pub struct PictureCacheTarget {
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct TextureCacheRenderTarget {
     pub target_kind: RenderTargetKind,
-    pub horizontal_blurs: FastHashMap<TextureSource, Vec<BlurInstance>>,
+    pub horizontal_blurs: Vec<BlurInstance>,
     pub blits: Vec<BlitJob>,
     pub border_segments_complex: Vec<BorderInstance>,
     pub border_segments_solid: Vec<BorderInstance>,
     pub clears: Vec<DeviceIntRect>,
     pub line_decorations: Vec<LineDecorationJob>,
-    pub fast_linear_gradients: Vec<FastLinearGradientInstance>,
-    pub linear_gradients: Vec<LinearGradientInstance>,
-    pub radial_gradients: Vec<RadialGradientInstance>,
-    pub conic_gradients: Vec<ConicGradientInstance>,
+    pub gradients: Vec<GradientJob>,
 }
 
 impl TextureCacheRenderTarget {
     pub fn new(target_kind: RenderTargetKind) -> Self {
         TextureCacheRenderTarget {
             target_kind,
-            horizontal_blurs: FastHashMap::default(),
+            horizontal_blurs: vec![],
             blits: vec![],
             border_segments_complex: vec![],
             border_segments_solid: vec![],
             clears: vec![],
             line_decorations: vec![],
-            fast_linear_gradients: vec![],
-            linear_gradients: vec![],
-            radial_gradients: vec![],
-            conic_gradients: vec![],
+            gradients: vec![],
         }
     }
 
     pub fn add_task(
         &mut self,
         task_id: RenderTaskId,
-        render_tasks: &RenderTaskGraph,
-        gpu_cache: &mut GpuCache,
+        render_tasks: &mut RenderTaskGraph,
     ) {
         profile_scope!("add_task");
-        let task_address = task_id.into();
+        let task_address = render_tasks.get_task_address(task_id);
+        let src_task_address = render_tasks[task_id].children.get(0).map(|src_task_id| {
+            render_tasks.get_task_address(*src_task_id)
+        });
 
-        let task = &render_tasks[task_id];
+        let task = &mut render_tasks[task_id];
         let target_rect = task.get_target_rect();
 
         match task.kind {
             RenderTaskKind::LineDecoration(ref info) => {
-                self.clears.push(target_rect);
+                self.clears.push(target_rect.0);
 
                 self.line_decorations.push(LineDecorationJob {
-                    task_rect: target_rect.to_f32(),
+                    task_rect: target_rect.0.to_f32(),
                     local_size: info.local_size,
                     style: info.style as i32,
                     axis_select: match info.orientation {
@@ -660,27 +791,31 @@ impl TextureCacheRenderTarget {
                     &mut self.horizontal_blurs,
                     BlurDirection::Horizontal,
                     task_address,
-                    task.children[0],
-                    render_tasks,
+                    src_task_address.unwrap(),
                 );
             }
             RenderTaskKind::Blit(ref task_info) => {
-                // Add a blit job to copy from an existing render
-                // task to this target.
-                self.blits.push(BlitJob {
-                    source: task_info.source,
-                    target_rect,
-                });
+                match task_info.source {
+                    BlitSource::Image { .. } => {
+                        // reading/writing from the texture cache at the same time
+                        // is undefined behavior.
+                        panic!("bug: a single blit cannot be to/from texture cache");
+                    }
+                    BlitSource::RenderTask { task_id } => {
+                        // Add a blit job to copy from an existing render
+                        // task to this target.
+                        self.blits.push(BlitJob {
+                            source: BlitJobSource::RenderTask(task_id),
+                            target_rect: target_rect.0.inner_rect(task_info.padding),
+                        });
+                    }
+                }
             }
-            RenderTaskKind::Border(ref task_info) => {
-                self.clears.push(target_rect);
-
-                let task_origin = target_rect.origin.to_f32();
-                // TODO(gw): Clone here instead of a move of this vec, since the frame
-                //           graph is immutable by this point. It's rare that borders
-                //           are drawn since they are persisted in the texture cache,
-                //           but perhaps this could be improved in future.
-                let instances = task_info.instances.clone();
+            RenderTaskKind::Border(ref mut task_info) => {
+                self.clears.push(target_rect.0);
+
+                let task_origin = target_rect.0.origin.to_f32();
+                let instances = mem::replace(&mut task_info.instances, Vec::new());
                 for mut instance in instances {
                     // TODO(gw): It may be better to store the task origin in
                     //           the render task data instead of per instance.
@@ -692,20 +827,28 @@ impl TextureCacheRenderTarget {
                     }
                 }
             }
-            RenderTaskKind::FastLinearGradient(ref task_info) => {
-                self.fast_linear_gradients.push(task_info.to_instance(&target_rect));
-            }
-            RenderTaskKind::LinearGradient(ref task_info) => {
-                self.linear_gradients.push(task_info.to_instance(&target_rect, gpu_cache));
-            }
-            RenderTaskKind::RadialGradient(ref task_info) => {
-                self.radial_gradients.push(task_info.to_instance(&target_rect, gpu_cache));
-            }
-            RenderTaskKind::ConicGradient(ref task_info) => {
-                self.conic_gradients.push(task_info.to_instance(&target_rect, gpu_cache));
+            RenderTaskKind::Gradient(ref task_info) => {
+                let mut stops = [0.0; 4];
+                let mut colors = [PremultipliedColorF::BLACK; 4];
+
+                let axis_select = match task_info.orientation {
+                    LineOrientation::Horizontal => 0.0,
+                    LineOrientation::Vertical => 1.0,
+                };
+
+                for (stop, (offset, color)) in task_info.stops.iter().zip(stops.iter_mut().zip(colors.iter_mut())) {
+                    *offset = stop.offset;
+                    *color = ColorF::from(stop.color).premultiplied();
+                }
+
+                self.gradients.push(GradientJob {
+                    task_rect: target_rect.0.to_f32(),
+                    axis_select,
+                    stops,
+                    colors,
+                    start_stop: [task_info.start_point, task_info.end_point],
+                });
             }
-            RenderTaskKind::Image(..) |
-            RenderTaskKind::Cached(..) |
             RenderTaskKind::VerticalBlur(..) |
             RenderTaskKind::Picture(..) |
             RenderTaskKind::ClipRegion(..) |
@@ -722,24 +865,18 @@ impl TextureCacheRenderTarget {
 }
 
 fn add_blur_instances(
-    instances: &mut FastHashMap<TextureSource, Vec<BlurInstance>>,
+    instances: &mut Vec<BlurInstance>,
     blur_direction: BlurDirection,
     task_address: RenderTaskAddress,
-    src_task_id: RenderTaskId,
-    render_tasks: &RenderTaskGraph,
+    src_task_address: RenderTaskAddress,
 ) {
-    let source = render_tasks[src_task_id].get_texture_source();
-
     let instance = BlurInstance {
         task_address,
-        src_task_address: src_task_id.into(),
+        src_task_address,
         blur_direction,
     };
 
-    instances
-        .entry(source)
-        .or_insert(Vec::new())
-        .push(instance);
+    instances.push(instance);
 }
 
 fn add_scaling_instances(
@@ -747,15 +884,55 @@ fn add_scaling_instances(
     instances: &mut FastHashMap<TextureSource, Vec<ScalingInstance>>,
     target_task: &RenderTask,
     source_task: Option<&RenderTask>,
+    resource_cache: &ResourceCache,
+    gpu_cache: &mut GpuCache,
+    deferred_resolves: &mut Vec<DeferredResolve>,
 ) {
     let target_rect = target_task
         .get_target_rect()
+        .0
         .inner_rect(task.padding)
         .to_f32();
 
-    let source = source_task.unwrap().get_texture_source();
+    let (source, (source_rect, source_layer)) = match task.image {
+        Some(key) => {
+            assert!(source_task.is_none());
+
+            // Get the cache item for the source texture.
+            let cache_item = resolve_image(
+                key.request,
+                resource_cache,
+                gpu_cache,
+                deferred_resolves,
+            );
+
+            // Work out a source rect to copy from the texture, depending on whether
+            // a sub-rect is present or not.
+            let source_rect = key.texel_rect.map_or(cache_item.uv_rect, |sub_rect| {
+                DeviceIntRect::new(
+                    DeviceIntPoint::new(
+                        cache_item.uv_rect.origin.x + sub_rect.origin.x,
+                        cache_item.uv_rect.origin.y + sub_rect.origin.y,
+                    ),
+                    sub_rect.size,
+                )
+            });
 
-    let source_rect = source_task.unwrap().get_target_rect().to_f32();
+            (
+                cache_item.texture_id,
+                (source_rect, cache_item.texture_layer as LayerIndex),
+            )
+        }
+        None => {
+            (
+                match task.target_kind {
+                    RenderTargetKind::Color => TextureSource::PrevPassColor,
+                    RenderTargetKind::Alpha => TextureSource::PrevPassAlpha,
+                },
+                source_task.unwrap().location.to_source_rect(),
+            )
+        }
+    };
 
     instances
         .entry(source)
@@ -763,6 +940,7 @@ fn add_scaling_instances(
         .push(ScalingInstance {
             target_rect,
             source_rect,
+            source_layer: source_layer as i32,
         });
 }
 
@@ -775,14 +953,20 @@ fn add_svg_filter_instances(
     input_2_task: Option<RenderTaskId>,
     extra_data_address: Option<GpuCacheAddress>,
 ) {
-    let mut textures = BatchTextures::empty();
+    let mut textures = BatchTextures::no_texture();
 
-    if let Some(id) = input_1_task {
-        textures.input.colors[0] = render_tasks[id].get_texture_source();
+    if let Some(saved_index) = input_1_task.map(|id| &render_tasks[id].saved_index) {
+        textures.colors[0] = match saved_index {
+            Some(saved_index) => TextureSource::RenderTaskCache(*saved_index, Swizzle::default()),
+            None => TextureSource::PrevPassColor,
+        };
     }
 
-    if let Some(id) = input_2_task {
-        textures.input.colors[1] = render_tasks[id].get_texture_source();
+    if let Some(saved_index) = input_2_task.map(|id| &render_tasks[id].saved_index) {
+        textures.colors[1] = match saved_index {
+            Some(saved_index) => TextureSource::RenderTaskCache(*saved_index, Swizzle::default()),
+            None => TextureSource::PrevPassColor,
+        };
     }
 
     let kind = match filter {
@@ -836,9 +1020,9 @@ fn add_svg_filter_instances(
     };
 
     let instance = SvgFilterInstance {
-        task_address: task_id.into(),
-        input_1_task_address: input_1_task.map(|id| id.into()).unwrap_or(RenderTaskAddress(0)),
-        input_2_task_address: input_2_task.map(|id| id.into()).unwrap_or(RenderTaskAddress(0)),
+        task_address: render_tasks.get_task_address(task_id),
+        input_1_task_address: input_1_task.map(|id| render_tasks.get_task_address(id)).unwrap_or(RenderTaskAddress(0)),
+        input_2_task_address: input_2_task.map(|id| render_tasks.get_task_address(id)).unwrap_or(RenderTaskAddress(0)),
         kind,
         input_count,
         generic_int,
@@ -857,17 +1041,25 @@ fn add_svg_filter_instances(
     instances.push((textures, vec![instance]));
 }
 
+// Defines where the source data for a blit job can be found.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum BlitJobSource {
+    Texture(TextureSource, i32, DeviceIntRect),
+    RenderTask(RenderTaskId),
+}
+
 // Information required to do a blit from a source to a target.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct BlitJob {
-    pub source: RenderTaskId,
+    pub source: BlitJobSource,
     pub target_rect: DeviceIntRect,
 }
 
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
-#[derive(Clone, Debug)]
+#[derive(Debug)]
 pub struct LineDecorationJob {
     pub task_rect: DeviceRect,
     pub local_size: LayoutSize,
@@ -875,3 +1067,25 @@ pub struct LineDecorationJob {
     pub style: i32,
     pub axis_select: f32,
 }
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[repr(C)]
+pub struct GradientJob {
+    pub task_rect: DeviceRect,
+    pub stops: [f32; GRADIENT_FP_STOPS],
+    pub colors: [PremultipliedColorF; GRADIENT_FP_STOPS],
+    pub axis_select: f32,
+    pub start_stop: [f32; 2],
+}
+
+/// Frame output information for a given pipeline ID.
+/// Storing the task ID allows the renderer to find
+/// the target rect within the render target that this
+/// pipeline exists at.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct FrameOutput {
+    pub task_id: RenderTaskId,
+    pub pipeline_id: PipelineId,
+}
diff --git a/third_party/webrender/webrender/src/render_task.rs b/third_party/webrender/webrender/src/render_task.rs
index ad8bdbe4975..acfae0ebae6 100644
--- a/third_party/webrender/webrender/src/render_task.rs
+++ b/third_party/webrender/webrender/src/render_task.rs
@@ -5,35 +5,34 @@
 use api::{CompositeOperator, FilterPrimitive, FilterPrimitiveInput, FilterPrimitiveKind};
 use api::{LineStyle, LineOrientation, ClipMode, MixBlendMode, ColorF, ColorSpace};
 use api::units::*;
-use crate::batch::BatchFilter;
-use crate::clip::{ClipDataStore, ClipItemKind, ClipStore, ClipNodeRange};
+use crate::clip::{ClipDataStore, ClipItemKind, ClipStore, ClipNodeRange, ClipNodeFlags};
 use crate::spatial_tree::SpatialNodeIndex;
 use crate::filterdata::SFilterData;
 use crate::frame_builder::FrameBuilderConfig;
 use crate::gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle};
 use crate::gpu_types::{BorderInstance, ImageSource, UvRectKind};
-use crate::internal_types::{CacheTextureId, FastHashMap, TextureSource, Swizzle};
-use crate::picture::{ResolvedSurfaceTexture, SurfaceInfo};
-use crate::prim_store::{ClipData, PictureIndex};
-use crate::prim_store::gradient::{
-    FastLinearGradientTask, RadialGradientTask,
-    ConicGradientTask, LinearGradientTask,
-};
-use crate::resource_cache::{ResourceCache, ImageRequest};
+use crate::internal_types::{CacheTextureId, FastHashMap, LayerIndex, SavedTargetIndex};
+use crate::picture::ResolvedSurfaceTexture;
+use crate::prim_store::{PictureIndex, PrimitiveVisibilityMask};
+use crate::prim_store::image::ImageCacheKey;
+use crate::prim_store::gradient::{GRADIENT_FP_STOPS, GradientStopKey};
+#[cfg(feature = "debugger")]
+use crate::print_tree::{PrintTreePrinter};
+use crate::resource_cache::ResourceCache;
 use std::{usize, f32, i32, u32};
-use crate::render_target::RenderTargetKind;
-use crate::render_task_graph::{PassId, RenderTaskId, RenderTaskGraphBuilder};
-use crate::render_task_cache::{RenderTaskCacheEntryHandle, RenderTaskCacheKey, RenderTaskCacheKeyKind, RenderTaskParent};
+use crate::render_target::{RenderTargetIndex, RenderTargetKind};
+use crate::render_task_graph::{RenderTaskGraph, RenderTaskId};
+use crate::render_task_cache::{RenderTaskCacheKey, RenderTaskCacheKeyKind};
 use smallvec::SmallVec;
 
+const RENDER_TASK_SIZE_SANITY_CHECK: i32 = 16000;
 const FLOATS_PER_RENDER_TASK_INFO: usize = 8;
-pub const MAX_RENDER_TASK_SIZE: i32 = 16384;
 pub const MAX_BLUR_STD_DEVIATION: f32 = 4.0;
 pub const MIN_DOWNSCALING_RT_SIZE: i32 = 8;
 
 fn render_task_sanity_check(size: &DeviceIntSize) {
-    if size.width > MAX_RENDER_TASK_SIZE ||
-        size.height > MAX_RENDER_TASK_SIZE {
+    if size.width > RENDER_TASK_SIZE_SANITY_CHECK ||
+        size.height > RENDER_TASK_SIZE_SANITY_CHECK {
         error!("Attempting to create a render task of size {}x{}", size.width, size.height);
         panic!();
     }
@@ -45,122 +44,93 @@ fn render_task_sanity_check(size: &DeviceIntSize) {
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct RenderTaskAddress(pub u16);
 
-impl Into<RenderTaskAddress> for RenderTaskId {
-    fn into(self) -> RenderTaskAddress {
-        RenderTaskAddress(self.index as u16)
-    }
-}
-
-/// A render task location that targets a persistent output buffer which
-/// will be retained over multiple frames.
-#[derive(Clone, Debug, Eq, PartialEq, Hash)]
+/// Identifies the output buffer location for a given `RenderTask`.
+#[derive(Clone, Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
-pub enum StaticRenderTaskSurface {
+pub enum RenderTaskLocation {
+    /// The `RenderTask` should be drawn to a fixed region in a specific render
+    /// target. This is used for the root `RenderTask`, where the main
+    /// framebuffer is used as the render target.
+    Fixed(DeviceIntRect),
+    /// The `RenderTask` should be drawn to a target provided by the atlas
+    /// allocator. This is the most common case.
+    ///
+    /// The second member specifies the width and height of the task
+    /// output, and the first member is initially left as `None`. During the
+    /// build phase, we invoke `RenderTargetList::alloc()` and store the
+    /// resulting location in the first member. That location identifies the
+    /// render target and the offset of the allocated region within that target.
+    Dynamic(Option<(DeviceIntPoint, RenderTargetIndex)>, DeviceIntSize),
     /// The output of the `RenderTask` will be persisted beyond this frame, and
     /// thus should be drawn into the `TextureCache`.
     TextureCache {
         /// Which texture in the texture cache should be drawn into.
         texture: CacheTextureId,
-        /// What format this texture cache surface is
-        target_kind: RenderTargetKind,
-    },
-    /// Only used as a source for render tasks, can be any texture including an
-    /// external one.
-    ReadOnly {
-        source: TextureSource,
+        /// The target layer in the above texture.
+        layer: LayerIndex,
+        /// The target region within the above layer.
+        rect: DeviceIntRect,
+
     },
     /// This render task will be drawn to a picture cache texture that is
     /// persisted between both frames and scenes, if the content remains valid.
     PictureCache {
         /// Describes either a WR texture or a native OS compositor target
         surface: ResolvedSurfaceTexture,
-    },
-}
-
-/// Identifies the output buffer location for a given `RenderTask`.
-#[derive(Clone, Debug)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub enum RenderTaskLocation {
-    // Towards the beginning of the frame, most task locations are typically not
-    // known yet, in which case they are set to one of the following variants:
-
-    /// A dynamic task that has not yet been allocated a texture and rect.
-    Unallocated {
-        /// Requested size of this render task
-        size: DeviceIntSize,
-    },
-    /// Will be replaced by a Static location after the texture cache update.
-    CacheRequest {
+        /// Size in device pixels of this picture cache tile.
         size: DeviceIntSize,
     },
-
-    // Before batching begins, we expect that locations have been resolved to
-    // one of the following variants:
-
-    /// The `RenderTask` should be drawn to a target provided by the atlas
-    /// allocator. This is the most common case.
-    Dynamic {
-        /// Texture that this task was allocated to render on
-        texture_id: CacheTextureId,
-        /// Rectangle in the texture this task occupies
-        rect: DeviceIntRect,
-    },
-    /// A task that is output to a persistent / retained target.
-    Static {
-        /// Target to draw to
-        surface: StaticRenderTaskSurface,
-        /// Rectangle in the texture this task occupies
-        rect: DeviceIntRect,
-    },
 }
 
 impl RenderTaskLocation {
     /// Returns true if this is a dynamic location.
     pub fn is_dynamic(&self) -> bool {
         match *self {
-            RenderTaskLocation::Dynamic { .. } => true,
+            RenderTaskLocation::Dynamic(..) => true,
             _ => false,
         }
     }
 
     pub fn size(&self) -> DeviceIntSize {
         match self {
-            RenderTaskLocation::Unallocated { size } => *size,
-            RenderTaskLocation::Dynamic { rect, .. } => rect.size,
-            RenderTaskLocation::Static { rect, .. } => rect.size,
-            RenderTaskLocation::CacheRequest { size } => *size,
+            RenderTaskLocation::Fixed(rect) => rect.size,
+            RenderTaskLocation::Dynamic(_, size) => *size,
+            RenderTaskLocation::TextureCache { rect, .. } => rect.size,
+            RenderTaskLocation::PictureCache { size, .. } => *size,
         }
     }
-}
 
-#[derive(Debug)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct CachedTask {
-    pub target_kind: RenderTargetKind,
+    pub fn to_source_rect(&self) -> (DeviceIntRect, LayerIndex) {
+        match *self {
+            RenderTaskLocation::Fixed(rect) => (rect, 0),
+            RenderTaskLocation::Dynamic(None, _) => panic!("Expected position to be set for the task!"),
+            RenderTaskLocation::Dynamic(Some((origin, layer)), size) => (DeviceIntRect::new(origin, size), layer.0 as LayerIndex),
+            RenderTaskLocation::TextureCache { rect, layer, .. } => (rect, layer),
+            RenderTaskLocation::PictureCache { .. } => {
+                panic!("bug: picture cache tasks should never be a source!");
+            }
+        }
+    }
 }
 
 #[derive(Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct CacheMaskTask {
-    pub actual_rect: DeviceRect,
+    pub actual_rect: DeviceIntRect,
     pub root_spatial_node_index: SpatialNodeIndex,
     pub clip_node_range: ClipNodeRange,
     pub device_pixel_scale: DevicePixelScale,
-    pub clear_to_one: bool,
 }
 
 #[derive(Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct ClipRegionTask {
+    pub clip_data_address: GpuCacheAddress,
     pub local_pos: LayoutPoint,
     pub device_pixel_scale: DevicePixelScale,
-    pub clip_data: ClipData,
-    pub clear_to_one: bool,
 }
 
 #[cfg_attr(feature = "capture", derive(Serialize))]
@@ -168,10 +138,14 @@ pub struct ClipRegionTask {
 pub struct PictureTask {
     pub pic_index: PictureIndex,
     pub can_merge: bool,
-    pub content_origin: DevicePoint,
+    pub content_origin: DeviceIntPoint,
+    pub uv_rect_handle: GpuCacheHandle,
     pub surface_spatial_node_index: SpatialNodeIndex,
+    uv_rect_kind: UvRectKind,
     pub device_pixel_scale: DevicePixelScale,
-    pub batch_filter: Option<BatchFilter>,
+    /// A bitfield that describes which dirty regions should be included
+    /// in batches built for this picture task.
+    pub vis_mask: PrimitiveVisibilityMask,
     pub scissor_rect: Option<DeviceIntRect>,
     pub valid_rect: Option<DeviceIntRect>,
 }
@@ -182,27 +156,16 @@ pub struct PictureTask {
 pub struct BlurTask {
     pub blur_std_deviation: f32,
     pub target_kind: RenderTargetKind,
+    pub uv_rect_handle: GpuCacheHandle,
     pub blur_region: DeviceIntSize,
+    uv_rect_kind: UvRectKind,
 }
 
 impl BlurTask {
-    // In order to do the blur down-scaling passes without introducing errors, we need the
-    // source of each down-scale pass to be a multuple of two. If need be, this inflates
-    // the source size so that each down-scale pass will sample correctly.
-    pub fn adjusted_blur_source_size(original_size: DeviceSize, mut std_dev: DeviceSize) -> DeviceSize {
-        let mut adjusted_size = original_size;
-        let mut scale_factor = 1.0;
-        while std_dev.width > MAX_BLUR_STD_DEVIATION && std_dev.height > MAX_BLUR_STD_DEVIATION {
-            if adjusted_size.width < MIN_DOWNSCALING_RT_SIZE as f32 ||
-               adjusted_size.height < MIN_DOWNSCALING_RT_SIZE as f32 {
-                break;
-            }
-            std_dev = std_dev * 0.5;
-            scale_factor *= 2.0;
-            adjusted_size = (original_size.to_f32() / scale_factor).ceil();
-        }
-
-        adjusted_size * scale_factor
+    #[cfg(feature = "debugger")]
+    fn print_with<T: PrintTreePrinter>(&self, pt: &mut T) {
+        pt.add_item(format!("std deviation: {}", self.blur_std_deviation));
+        pt.add_item(format!("target: {:?}", self.target_kind));
     }
 }
 
@@ -211,9 +174,24 @@ impl BlurTask {
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct ScalingTask {
     pub target_kind: RenderTargetKind,
+    pub image: Option<ImageCacheKey>,
+    uv_rect_kind: UvRectKind,
     pub padding: DeviceIntSideOffsets,
 }
 
+// Where the source data for a blit task can be found.
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum BlitSource {
+    Image {
+        key: ImageCacheKey,
+    },
+    RenderTask {
+        task_id: RenderTaskId,
+    },
+}
+
 #[derive(Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
@@ -225,7 +203,18 @@ pub struct BorderTask {
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct BlitTask {
-    pub source: RenderTaskId,
+    pub source: BlitSource,
+    pub padding: DeviceIntSideOffsets,
+}
+
+#[derive(Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct GradientTask {
+    pub stops: [GradientStopKey; GRADIENT_FP_STOPS],
+    pub orientation: LineOrientation,
+    pub start_point: f32,
+    pub end_point: f32,
 }
 
 #[derive(Debug)]
@@ -262,16 +251,8 @@ pub enum SvgFilterInfo {
 pub struct SvgFilterTask {
     pub info: SvgFilterInfo,
     pub extra_gpu_cache_handle: Option<GpuCacheHandle>,
-}
-
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct ReadbackTask {
-    // The offset of the rect that needs to be read back, in the
-    // device space of the surface that will be read back from.
-    // If this is None, there is no readback surface available
-    // and this is a dummy (empty) readback.
-    pub readback_origin: Option<DevicePoint>,
+    pub uv_rect_handle: GpuCacheHandle,
+    uv_rect_kind: UvRectKind,
 }
 
 #[derive(Debug)]
@@ -284,40 +265,25 @@ pub struct RenderTaskData {
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub enum RenderTaskKind {
-    Image(ImageRequest),
-    Cached(CachedTask),
     Picture(PictureTask),
     CacheMask(CacheMaskTask),
     ClipRegion(ClipRegionTask),
     VerticalBlur(BlurTask),
     HorizontalBlur(BlurTask),
-    Readback(ReadbackTask),
+    Readback(DeviceIntRect),
     Scaling(ScalingTask),
     Blit(BlitTask),
     Border(BorderTask),
     LineDecoration(LineDecorationTask),
-    FastLinearGradient(FastLinearGradientTask),
-    LinearGradient(LinearGradientTask),
-    RadialGradient(RadialGradientTask),
-    ConicGradient(ConicGradientTask),
+    Gradient(GradientTask),
     SvgFilter(SvgFilterTask),
     #[cfg(test)]
     Test(RenderTargetKind),
 }
 
 impl RenderTaskKind {
-    pub fn is_a_rendering_operation(&self) -> bool {
-        match self {
-            &RenderTaskKind::Image(..) => false,
-            &RenderTaskKind::Cached(..) => false,
-            _ => true,
-        }
-    }
-
     pub fn as_str(&self) -> &'static str {
         match *self {
-            RenderTaskKind::Image(..) => "Image",
-            RenderTaskKind::Cached(..) => "Cached",
             RenderTaskKind::Picture(..) => "Picture",
             RenderTaskKind::CacheMask(..) => "CacheMask",
             RenderTaskKind::ClipRegion(..) => "ClipRegion",
@@ -328,141 +294,241 @@ impl RenderTaskKind {
             RenderTaskKind::Blit(..) => "Blit",
             RenderTaskKind::Border(..) => "Border",
             RenderTaskKind::LineDecoration(..) => "LineDecoration",
-            RenderTaskKind::FastLinearGradient(..) => "FastLinearGradient",
-            RenderTaskKind::LinearGradient(..) => "LinearGradient",
-            RenderTaskKind::RadialGradient(..) => "RadialGradient",
-            RenderTaskKind::ConicGradient(..) => "ConicGradient",
+            RenderTaskKind::Gradient(..) => "Gradient",
             RenderTaskKind::SvgFilter(..) => "SvgFilter",
             #[cfg(test)]
             RenderTaskKind::Test(..) => "Test",
         }
     }
+}
 
-    pub fn target_kind(&self) -> RenderTargetKind {
-        match *self {
-            RenderTaskKind::Image(..) |
-            RenderTaskKind::LineDecoration(..) |
-            RenderTaskKind::Readback(..) |
-            RenderTaskKind::Border(..) |
-            RenderTaskKind::FastLinearGradient(..) |
-            RenderTaskKind::LinearGradient(..) |
-            RenderTaskKind::RadialGradient(..) |
-            RenderTaskKind::ConicGradient(..) |
-            RenderTaskKind::Picture(..) |
-            RenderTaskKind::Blit(..) |
-            RenderTaskKind::SvgFilter(..) => {
-                RenderTargetKind::Color
-            }
+#[derive(Debug, Copy, Clone, PartialEq)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum ClearMode {
+    // Applicable to color and alpha targets.
+    Zero,
+    One,
+    /// This task doesn't care what it is cleared to - it will completely overwrite it.
+    DontCare,
+
+    // Applicable to color targets only.
+    Transparent,
+}
 
-            RenderTaskKind::ClipRegion(..) |
-            RenderTaskKind::CacheMask(..) => {
-                RenderTargetKind::Alpha
-            }
+/// In order to avoid duplicating the down-scaling and blur passes when a picture has several blurs,
+/// we use a local (primitive-level) cache of the render tasks generated for a single shadowed primitive
+/// in a single frame.
+pub type BlurTaskCache = FastHashMap<BlurTaskKey, RenderTaskId>;
 
-            RenderTaskKind::VerticalBlur(ref task_info) |
-            RenderTaskKind::HorizontalBlur(ref task_info) => {
-                task_info.target_kind
-            }
+/// Since we only use it within a single primitive, the key only needs to contain the down-scaling level
+/// and the blur std deviation.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum BlurTaskKey {
+    DownScale(u32),
+    Blur { downscale_level: u32, stddev_x: u32, stddev_y: u32 },
+}
 
-            RenderTaskKind::Scaling(ref task_info) => {
-                task_info.target_kind
-            }
+impl BlurTaskKey {
+    fn downscale_and_blur(downscale_level: u32, blur_stddev: DeviceSize) -> Self {
+        // Quantise the std deviations and store it as integers to work around
+        // Eq and Hash's f32 allergy.
+        // The blur radius is rounded before RenderTask::new_blur so we don't need
+        // a lot of precision.
+        const QUANTIZATION_FACTOR: f32 = 1024.0;
+        let stddev_x = (blur_stddev.width * QUANTIZATION_FACTOR) as u32;
+        let stddev_y = (blur_stddev.height * QUANTIZATION_FACTOR) as u32;
+        BlurTaskKey::Blur { downscale_level, stddev_x, stddev_y }
+    }
+}
 
-            RenderTaskKind::Cached(ref task_info) => {
-                task_info.target_kind
-            }
+// The majority of render tasks have 0, 1 or 2 dependencies, except for pictures that
+// typically have dozens to hundreds of dependencies. SmallVec with 2 inline elements
+// avoids many tiny heap allocations in pages with a lot of text shadows and other
+// types of render tasks.
+pub type TaskDependencies = SmallVec<[RenderTaskId;2]>;
 
-            #[cfg(test)]
-            RenderTaskKind::Test(kind) => kind,
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct RenderTask {
+    pub location: RenderTaskLocation,
+    pub children: TaskDependencies,
+    pub kind: RenderTaskKind,
+    pub clear_mode: ClearMode,
+    pub saved_index: Option<SavedTargetIndex>,
+}
+
+impl RenderTask {
+    #[inline]
+    pub fn with_dynamic_location(
+        size: DeviceIntSize,
+        children: TaskDependencies,
+        kind: RenderTaskKind,
+        clear_mode: ClearMode,
+    ) -> Self {
+        render_task_sanity_check(&size);
+
+        RenderTask {
+            location: RenderTaskLocation::Dynamic(None, size),
+            children,
+            kind,
+            clear_mode,
+            saved_index: None,
+        }
+    }
+
+    #[cfg(test)]
+    pub fn new_test(
+        target: RenderTargetKind,
+        location: RenderTaskLocation,
+        children: TaskDependencies,
+    ) -> Self {
+        RenderTask {
+            location,
+            children,
+            kind: RenderTaskKind::Test(target),
+            clear_mode: ClearMode::Transparent,
+            saved_index: None,
         }
     }
 
     pub fn new_picture(
-        size: DeviceIntSize,
+        location: RenderTaskLocation,
         unclipped_size: DeviceSize,
         pic_index: PictureIndex,
-        content_origin: DevicePoint,
+        content_origin: DeviceIntPoint,
+        uv_rect_kind: UvRectKind,
         surface_spatial_node_index: SpatialNodeIndex,
         device_pixel_scale: DevicePixelScale,
-        batch_filter: Option<BatchFilter>,
+        vis_mask: PrimitiveVisibilityMask,
         scissor_rect: Option<DeviceIntRect>,
         valid_rect: Option<DeviceIntRect>,
     ) -> Self {
+        let size = match location {
+            RenderTaskLocation::Dynamic(_, size) => size,
+            RenderTaskLocation::Fixed(rect) => rect.size,
+            RenderTaskLocation::TextureCache { rect, .. } => rect.size,
+            RenderTaskLocation::PictureCache { size, .. } => size,
+        };
+
         render_task_sanity_check(&size);
 
         let can_merge = size.width as f32 >= unclipped_size.width &&
                         size.height as f32 >= unclipped_size.height;
 
-        RenderTaskKind::Picture(PictureTask {
-            pic_index,
-            content_origin,
-            can_merge,
-            surface_spatial_node_index,
-            device_pixel_scale,
-            batch_filter,
-            scissor_rect,
-            valid_rect,
-        })
+        RenderTask {
+            location,
+            children: TaskDependencies::new(),
+            kind: RenderTaskKind::Picture(PictureTask {
+                pic_index,
+                content_origin,
+                can_merge,
+                uv_rect_handle: GpuCacheHandle::new(),
+                uv_rect_kind,
+                surface_spatial_node_index,
+                device_pixel_scale,
+                vis_mask,
+                scissor_rect,
+                valid_rect,
+            }),
+            clear_mode: ClearMode::Transparent,
+            saved_index: None,
+        }
     }
 
-    pub fn new_readback(
-        readback_origin: Option<DevicePoint>,
+    pub fn new_gradient(
+        size: DeviceIntSize,
+        stops: [GradientStopKey; GRADIENT_FP_STOPS],
+        orientation: LineOrientation,
+        start_point: f32,
+        end_point: f32,
     ) -> Self {
-        RenderTaskKind::Readback(
-            ReadbackTask {
-                readback_origin,
-            }
+        RenderTask::with_dynamic_location(
+            size,
+            TaskDependencies::new(),
+            RenderTaskKind::Gradient(GradientTask {
+                stops,
+                orientation,
+                start_point,
+                end_point,
+            }),
+            ClearMode::DontCare,
         )
     }
 
-    pub fn new_line_decoration(
-        style: LineStyle,
-        orientation: LineOrientation,
-        wavy_line_thickness: f32,
-        local_size: LayoutSize,
+    pub fn new_readback(screen_rect: DeviceIntRect) -> Self {
+        RenderTask::with_dynamic_location(
+            screen_rect.size,
+            TaskDependencies::new(),
+            RenderTaskKind::Readback(screen_rect),
+            ClearMode::Transparent,
+        )
+    }
+
+    pub fn new_blit(
+        size: DeviceIntSize,
+        source: BlitSource,
     ) -> Self {
-        RenderTaskKind::LineDecoration(LineDecorationTask {
-            style,
-            orientation,
-            wavy_line_thickness,
-            local_size,
-        })
+        RenderTask::new_blit_with_padding(size, DeviceIntSideOffsets::zero(), source)
     }
 
-    pub fn new_border_segment(
-        instances: Vec<BorderInstance>,
+    pub fn new_blit_with_padding(
+        padded_size: DeviceIntSize,
+        padding: DeviceIntSideOffsets,
+        source: BlitSource,
     ) -> Self {
-        RenderTaskKind::Border(BorderTask {
-            instances,
-        })
+        // If this blit uses a render task as a source,
+        // ensure it's added as a child task. This will
+        // ensure it gets allocated in the correct pass
+        // and made available as an input when this task
+        // executes.
+        let children = match source {
+            BlitSource::RenderTask { task_id } => smallvec![task_id],
+            BlitSource::Image { .. } => smallvec![],
+        };
+
+        RenderTask::with_dynamic_location(
+            padded_size,
+            children,
+            RenderTaskKind::Blit(BlitTask {
+                source,
+                padding,
+            }),
+            ClearMode::Transparent,
+        )
     }
 
-    pub fn new_rounded_rect_mask(
-        local_pos: LayoutPoint,
-        clip_data: ClipData,
-        device_pixel_scale: DevicePixelScale,
-        fb_config: &FrameBuilderConfig,
+    pub fn new_line_decoration(
+        size: DeviceIntSize,
+        style: LineStyle,
+        orientation: LineOrientation,
+        wavy_line_thickness: f32,
+        local_size: LayoutSize,
     ) -> Self {
-        RenderTaskKind::ClipRegion(ClipRegionTask {
-            local_pos,
-            device_pixel_scale,
-            clip_data,
-            clear_to_one: fb_config.gpu_supports_fast_clears,
-        })
+        RenderTask::with_dynamic_location(
+            size,
+            TaskDependencies::new(),
+            RenderTaskKind::LineDecoration(LineDecorationTask {
+                style,
+                orientation,
+                wavy_line_thickness,
+                local_size,
+            }),
+            ClearMode::Transparent,
+        )
     }
 
     pub fn new_mask(
-        outer_rect: DeviceRect,
+        outer_rect: DeviceIntRect,
         clip_node_range: ClipNodeRange,
         root_spatial_node_index: SpatialNodeIndex,
         clip_store: &mut ClipStore,
         gpu_cache: &mut GpuCache,
         resource_cache: &mut ResourceCache,
-        rg_builder: &mut RenderTaskGraphBuilder,
+        render_tasks: &mut RenderTaskGraph,
         clip_data_store: &mut ClipDataStore,
         device_pixel_scale: DevicePixelScale,
         fb_config: &FrameBuilderConfig,
-        surfaces: &[SurfaceInfo],
     ) -> RenderTaskId {
         // Step through the clip sources that make up this mask. If we find
         // any box-shadow clip sources, request that image from the render
@@ -473,24 +539,7 @@ impl RenderTaskKind {
         // TODO(gw): If this ever shows up in a profile, we could pre-calculate
         //           whether a ClipSources contains any box-shadows and skip
         //           this iteration for the majority of cases.
-        let task_size = outer_rect.size.to_i32();
-
-        // If we have a potentially tiled clip mask, clear the mask area first. Otherwise,
-        // the first (primary) clip mask will overwrite all the clip mask pixels with
-        // blending disabled to set to the initial value.
-
-        let clip_task_id = rg_builder.add().init(
-            RenderTask::new_dynamic(
-                task_size,
-                RenderTaskKind::CacheMask(CacheMaskTask {
-                    actual_rect: outer_rect,
-                    clip_node_range,
-                    root_spatial_node_index,
-                    device_pixel_scale,
-                    clear_to_one: fb_config.gpu_supports_fast_clears,
-                }),
-            )
-        );
+        let mut needs_clear = fb_config.gpu_supports_fast_clears;
 
         for i in 0 .. clip_node_range.count {
             let clip_instance = clip_store.get_instance_from_range(&clip_node_range, i);
@@ -502,342 +551,123 @@ impl RenderTaskKind {
                         .expect("bug: no cache key set")
                         .clone();
                     let blur_radius_dp = cache_key.blur_radius_dp as f32;
-                    let device_pixel_scale = DevicePixelScale::new(cache_key.device_pixel_scale.to_f32_px());
+                    let clip_data_address = gpu_cache.get_address(&source.clip_data_handle);
 
                     // Request a cacheable render task with a blurred, minimal
                     // sized box-shadow rect.
-                    source.render_task = Some(resource_cache.request_render_task(
+                    source.cache_handle = Some(resource_cache.request_render_task(
                         RenderTaskCacheKey {
                             size: cache_size,
                             kind: RenderTaskCacheKeyKind::BoxShadow(cache_key),
                         },
                         gpu_cache,
-                        rg_builder,
+                        render_tasks,
                         None,
                         false,
-                        RenderTaskParent::RenderTask(clip_task_id),
-                        surfaces,
-                        |rg_builder| {
-                            let clip_data = ClipData::rounded_rect(
-                                source.minimal_shadow_rect.size,
-                                &source.shadow_radius,
-                                ClipMode::Clip,
-                            );
-
+                        |render_tasks| {
                             // Draw the rounded rect.
-                            let mask_task_id = rg_builder.add().init(RenderTask::new_dynamic(
+                            let mask_task_id = render_tasks.add().init(RenderTask::new_rounded_rect_mask(
                                 cache_size,
-                                RenderTaskKind::new_rounded_rect_mask(
-                                    source.minimal_shadow_rect.origin,
-                                    clip_data,
-                                    device_pixel_scale,
-                                    fb_config,
-                                ),
+                                clip_data_address,
+                                source.minimal_shadow_rect.origin,
+                                device_pixel_scale,
+                                fb_config,
                             ));
 
                             // Blur it
                             RenderTask::new_blur(
                                 DeviceSize::new(blur_radius_dp, blur_radius_dp),
                                 mask_task_id,
-                                rg_builder,
+                                render_tasks,
                                 RenderTargetKind::Alpha,
+                                ClearMode::Zero,
                                 None,
                                 cache_size,
                             )
                         }
                     ));
                 }
-                ClipItemKind::Rectangle { .. } |
+                ClipItemKind::Rectangle { mode: ClipMode::Clip, .. } => {
+                    if !clip_instance.flags.contains(ClipNodeFlags::SAME_COORD_SYSTEM) {
+                        // This is conservative - it's only the case that we actually need
+                        // a clear here if we end up adding this mask via add_tiled_clip_mask,
+                        // but for simplicity we will just clear if any of these are encountered,
+                        // since they are rare.
+                        needs_clear = true;
+                    }
+                }
+                ClipItemKind::Rectangle { mode: ClipMode::ClipOut, .. } |
                 ClipItemKind::RoundedRectangle { .. } |
                 ClipItemKind::Image { .. } => {}
             }
         }
 
-        clip_task_id
-    }
-
-    // Write (up to) 8 floats of data specific to the type
-    // of render task that is provided to the GPU shaders
-    // via a vertex texture.
-    pub fn write_task_data(
-        &self,
-        target_rect: DeviceIntRect,
-    ) -> RenderTaskData {
-        // NOTE: The ordering and layout of these structures are
-        //       required to match both the GPU structures declared
-        //       in prim_shared.glsl, and also the uses in submit_batch()
-        //       in renderer.rs.
-        // TODO(gw): Maybe there's a way to make this stuff a bit
-        //           more type-safe. Although, it will always need
-        //           to be kept in sync with the GLSL code anyway.
-
-        let data = match self {
-            RenderTaskKind::Picture(ref task) => {
-                // Note: has to match `PICTURE_TYPE_*` in shaders
-                [
-                    task.device_pixel_scale.0,
-                    task.content_origin.x,
-                    task.content_origin.y,
-                    0.0,
-                ]
-            }
-            RenderTaskKind::CacheMask(ref task) => {
-                [
-                    task.device_pixel_scale.0,
-                    task.actual_rect.origin.x,
-                    task.actual_rect.origin.y,
-                    0.0,
-                ]
-            }
-            RenderTaskKind::ClipRegion(ref task) => {
-                [
-                    task.device_pixel_scale.0,
-                    0.0,
-                    0.0,
-                    0.0,
-                ]
-            }
-            RenderTaskKind::VerticalBlur(ref task) |
-            RenderTaskKind::HorizontalBlur(ref task) => {
-                [
-                    task.blur_std_deviation,
-                    task.blur_region.width as f32,
-                    task.blur_region.height as f32,
-                    0.0,
-                ]
-            }
-            RenderTaskKind::Image(..) |
-            RenderTaskKind::Cached(..) |
-            RenderTaskKind::Readback(..) |
-            RenderTaskKind::Scaling(..) |
-            RenderTaskKind::Border(..) |
-            RenderTaskKind::LineDecoration(..) |
-            RenderTaskKind::FastLinearGradient(..) |
-            RenderTaskKind::LinearGradient(..) |
-            RenderTaskKind::RadialGradient(..) |
-            RenderTaskKind::ConicGradient(..) |
-            RenderTaskKind::Blit(..) => {
-                [0.0; 4]
-            }
-
-
-            RenderTaskKind::SvgFilter(ref task) => {
-                match task.info {
-                    SvgFilterInfo::Opacity(opacity) => [opacity, 0.0, 0.0, 0.0],
-                    SvgFilterInfo::Offset(offset) => [offset.x, offset.y, 0.0, 0.0],
-                    _ => [0.0; 4]
-                }
-            }
-
-            #[cfg(test)]
-            RenderTaskKind::Test(..) => {
-                [0.0; 4]
-            }
+        // If we have a potentially tiled clip mask, clear the mask area first. Otherwise,
+        // the first (primary) clip mask will overwrite all the clip mask pixels with
+        // blending disabled to set to the initial value.
+        let clear_mode = if needs_clear {
+            ClearMode::One
+        } else {
+            ClearMode::DontCare
         };
 
-        RenderTaskData {
-            data: [
-                target_rect.origin.x as f32,
-                target_rect.origin.y as f32,
-                target_rect.size.width as f32,
-                target_rect.size.height as f32,
-                data[0],
-                data[1],
-                data[2],
-                data[3],
-            ]
-        }
-    }
-
-    pub fn write_gpu_blocks(
-        &mut self,
-        gpu_cache: &mut GpuCache,
-    ) {
-        if let RenderTaskKind::SvgFilter(ref mut filter_task) = self {
-            match filter_task.info {
-                SvgFilterInfo::ColorMatrix(ref matrix) => {
-                    let handle = filter_task.extra_gpu_cache_handle.get_or_insert_with(GpuCacheHandle::new);
-                    if let Some(mut request) = gpu_cache.request(handle) {
-                        for i in 0..5 {
-                            request.push([matrix[i*4], matrix[i*4+1], matrix[i*4+2], matrix[i*4+3]]);
-                        }
-                    }
-                }
-                SvgFilterInfo::DropShadow(color) |
-                SvgFilterInfo::Flood(color) => {
-                    let handle = filter_task.extra_gpu_cache_handle.get_or_insert_with(GpuCacheHandle::new);
-                    if let Some(mut request) = gpu_cache.request(handle) {
-                        request.push(color.to_array());
-                    }
-                }
-                SvgFilterInfo::ComponentTransfer(ref data) => {
-                    let handle = filter_task.extra_gpu_cache_handle.get_or_insert_with(GpuCacheHandle::new);
-                    if let Some(request) = gpu_cache.request(handle) {
-                        data.update(request);
-                    }
-                }
-                SvgFilterInfo::Composite(ref operator) => {
-                    if let CompositeOperator::Arithmetic(k_vals) = operator {
-                        let handle = filter_task.extra_gpu_cache_handle.get_or_insert_with(GpuCacheHandle::new);
-                        if let Some(mut request) = gpu_cache.request(handle) {
-                            request.push(*k_vals);
-                        }
-                    }
-                }
-                _ => {},
-            }
-        }
-    }
-}
-
-/// In order to avoid duplicating the down-scaling and blur passes when a picture has several blurs,
-/// we use a local (primitive-level) cache of the render tasks generated for a single shadowed primitive
-/// in a single frame.
-pub type BlurTaskCache = FastHashMap<BlurTaskKey, RenderTaskId>;
-
-/// Since we only use it within a single primitive, the key only needs to contain the down-scaling level
-/// and the blur std deviation.
-#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
-pub enum BlurTaskKey {
-    DownScale(u32),
-    Blur { downscale_level: u32, stddev_x: u32, stddev_y: u32 },
-}
-
-impl BlurTaskKey {
-    fn downscale_and_blur(downscale_level: u32, blur_stddev: DeviceSize) -> Self {
-        // Quantise the std deviations and store it as integers to work around
-        // Eq and Hash's f32 allergy.
-        // The blur radius is rounded before RenderTask::new_blur so we don't need
-        // a lot of precision.
-        const QUANTIZATION_FACTOR: f32 = 1024.0;
-        let stddev_x = (blur_stddev.width * QUANTIZATION_FACTOR) as u32;
-        let stddev_y = (blur_stddev.height * QUANTIZATION_FACTOR) as u32;
-        BlurTaskKey::Blur { downscale_level, stddev_x, stddev_y }
-    }
-}
-
-// The majority of render tasks have 0, 1 or 2 dependencies, except for pictures that
-// typically have dozens to hundreds of dependencies. SmallVec with 2 inline elements
-// avoids many tiny heap allocations in pages with a lot of text shadows and other
-// types of render tasks.
-pub type TaskDependencies = SmallVec<[RenderTaskId;2]>;
-
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct RenderTask {
-    pub location: RenderTaskLocation,
-    pub children: TaskDependencies,
-    pub kind: RenderTaskKind,
-
-    // TODO(gw): These fields and perhaps others can become private once the
-    //           frame_graph / render_task source files are unified / cleaned up.
-    pub free_after: PassId,
-    pub render_on: PassId,
-
-    /// The gpu cache handle for the render task's destination rect.
-    ///
-    /// Will be set to None if the render task is cached, in which case the texture cache
-    /// manages the handle.
-    pub uv_rect_handle: GpuCacheHandle,
-    pub cache_handle: Option<RenderTaskCacheEntryHandle>,
-    uv_rect_kind: UvRectKind,
-}
-
-impl RenderTask {
-    pub fn new(
-        location: RenderTaskLocation,
-        kind: RenderTaskKind,
-    ) -> Self {
-        render_task_sanity_check(&location.size());
-
-        RenderTask {
-            location,
-            children: TaskDependencies::new(),
-            kind,
-            free_after: PassId::MAX,
-            render_on: PassId::MIN,
-            uv_rect_handle: GpuCacheHandle::new(),
-            uv_rect_kind: UvRectKind::Rect,
-            cache_handle: None,
-        }
-    }
-
-    pub fn new_dynamic(
-        size: DeviceIntSize,
-        kind: RenderTaskKind,
-    ) -> Self {
-        RenderTask::new(
-            RenderTaskLocation::Unallocated { size },
-            kind,
+        render_tasks.add().init(
+            RenderTask::with_dynamic_location(
+                outer_rect.size,
+                smallvec![],
+                RenderTaskKind::CacheMask(CacheMaskTask {
+                    actual_rect: outer_rect,
+                    clip_node_range,
+                    root_spatial_node_index,
+                    device_pixel_scale,
+                }),
+                clear_mode,
+            )
         )
     }
 
-    pub fn with_uv_rect_kind(mut self, uv_rect_kind: UvRectKind) -> Self {
-        self.uv_rect_kind = uv_rect_kind;
-        self
-    }
-
-    pub fn new_image(
+    pub fn new_rounded_rect_mask(
         size: DeviceIntSize,
-        request: ImageRequest,
+        clip_data_address: GpuCacheAddress,
+        local_pos: LayoutPoint,
+        device_pixel_scale: DevicePixelScale,
+        fb_config: &FrameBuilderConfig,
     ) -> Self {
-        // Note: this is a special constructor for image render tasks that does not
-        // do the render task size sanity check. This is because with SWGL we purposefully
-        // avoid tiling large images. There is no upload with SWGL so whatever was
-        // successfully allocated earlier will be what shaders read, regardless of the size
-        // and copying into tiles would only slow things down.
-        // As a result we can run into very large images being added to the frame graph
-        // (this is covered by a few reftests on the CI).
+        let clear_mode = if fb_config.gpu_supports_fast_clears {
+            ClearMode::One
+        } else {
+            ClearMode::DontCare
+        };
 
-        RenderTask {
-            location: RenderTaskLocation::CacheRequest { size, },
-            children: TaskDependencies::new(),
-            kind: RenderTaskKind::Image(request),
-            free_after: PassId::MAX,
-            render_on: PassId::MIN,
-            uv_rect_handle: GpuCacheHandle::new(),
-            uv_rect_kind: UvRectKind::Rect,
-            cache_handle: None,
-        }
+        RenderTask::with_dynamic_location(
+            size,
+            TaskDependencies::new(),
+            RenderTaskKind::ClipRegion(ClipRegionTask {
+                clip_data_address,
+                local_pos,
+                device_pixel_scale,
+            }),
+            clear_mode,
+        )
     }
 
-
-    #[cfg(test)]
-    pub fn new_test(
-        location: RenderTaskLocation,
-        target: RenderTargetKind,
-    ) -> Self {
-        RenderTask {
-            location,
-            children: TaskDependencies::new(),
-            kind: RenderTaskKind::Test(target),
-            free_after: PassId::MAX,
-            render_on: PassId::MIN,
-            uv_rect_handle: GpuCacheHandle::new(),
-            uv_rect_kind: UvRectKind::Rect,
-            cache_handle: None,
+    // In order to do the blur down-scaling passes without introducing errors, we need the
+    // source of each down-scale pass to be a multuple of two. If need be, this inflates
+    // the source size so that each down-scale pass will sample correctly.
+    pub fn adjusted_blur_source_size(original_size: DeviceSize, mut std_dev: DeviceSize) -> DeviceSize {
+        let mut adjusted_size = original_size;
+        let mut scale_factor = 1.0;
+        while std_dev.width > MAX_BLUR_STD_DEVIATION && std_dev.height > MAX_BLUR_STD_DEVIATION {
+            if adjusted_size.width < MIN_DOWNSCALING_RT_SIZE as f32 ||
+               adjusted_size.height < MIN_DOWNSCALING_RT_SIZE as f32 {
+                break;
+            }
+            std_dev = std_dev * 0.5;
+            scale_factor *= 2.0;
+            adjusted_size = (original_size.to_f32() / scale_factor).ceil();
         }
-    }
 
-    pub fn new_blit(
-        size: DeviceIntSize,
-        source: RenderTaskId,
-        rg_builder: &mut RenderTaskGraphBuilder,
-    ) -> RenderTaskId {
-        // If this blit uses a render task as a source,
-        // ensure it's added as a child task. This will
-        // ensure it gets allocated in the correct pass
-        // and made available as an input when this task
-        // executes.
-
-        let blit_task_id = rg_builder.add().init(RenderTask::new_dynamic(
-            size,
-            RenderTaskKind::Blit(BlitTask { source }),
-        ));
-
-        rg_builder.add_dependency(blit_task_id, source);
-
-        blit_task_id
+        adjusted_size * scale_factor
     }
 
     // Construct a render task to apply a blur to a primitive.
@@ -861,16 +691,17 @@ impl RenderTask {
     pub fn new_blur(
         blur_std_deviation: DeviceSize,
         src_task_id: RenderTaskId,
-        rg_builder: &mut RenderTaskGraphBuilder,
+        render_tasks: &mut RenderTaskGraph,
         target_kind: RenderTargetKind,
+        clear_mode: ClearMode,
         mut blur_cache: Option<&mut BlurTaskCache>,
         blur_region: DeviceIntSize,
     ) -> RenderTaskId {
         // Adjust large std deviation value.
         let mut adjusted_blur_std_deviation = blur_std_deviation;
         let (blur_target_size, uv_rect_kind) = {
-            let src_task = rg_builder.get_task(src_task_id);
-            (src_task.location.size(), src_task.uv_rect_kind())
+            let src_task = &render_tasks[src_task_id];
+            (src_task.get_dynamic_size(), src_task.uv_rect_kind())
         };
         let mut adjusted_blur_target_size = blur_target_size;
         let mut downscaling_src_task_id = src_task_id;
@@ -894,7 +725,7 @@ impl RenderTask {
             downscaling_src_task_id = cached_task.unwrap_or_else(|| {
                 RenderTask::new_scaling(
                     downscaling_src_task_id,
-                    rg_builder,
+                    render_tasks,
                     target_kind,
                     adjusted_blur_target_size,
                 )
@@ -918,27 +749,31 @@ impl RenderTask {
         let blur_region = blur_region / (scale_factor as i32);
 
         let blur_task_id = cached_task.unwrap_or_else(|| {
-            let blur_task_v = rg_builder.add().init(RenderTask::new_dynamic(
+            let blur_task_v = render_tasks.add().init(RenderTask::with_dynamic_location(
                 adjusted_blur_target_size,
+                smallvec![downscaling_src_task_id],
                 RenderTaskKind::VerticalBlur(BlurTask {
                     blur_std_deviation: adjusted_blur_std_deviation.height,
                     target_kind,
+                    uv_rect_handle: GpuCacheHandle::new(),
                     blur_region,
+                    uv_rect_kind,
                 }),
-            ).with_uv_rect_kind(uv_rect_kind));
-            rg_builder.add_dependency(blur_task_v, downscaling_src_task_id);
+                clear_mode,
+            ));
 
-            let task_id = rg_builder.add().init(RenderTask::new_dynamic(
+            render_tasks.add().init(RenderTask::with_dynamic_location(
                 adjusted_blur_target_size,
+                smallvec![blur_task_v],
                 RenderTaskKind::HorizontalBlur(BlurTask {
                     blur_std_deviation: adjusted_blur_std_deviation.width,
                     target_kind,
+                    uv_rect_handle: GpuCacheHandle::new(),
                     blur_region,
+                    uv_rect_kind,
                 }),
-            ).with_uv_rect_kind(uv_rect_kind));
-            rg_builder.add_dependency(task_id, blur_task_v);
-
-            task_id
+                clear_mode,
+            ))
         });
 
         if let Some(ref mut cache) = blur_cache {
@@ -948,15 +783,29 @@ impl RenderTask {
         blur_task_id
     }
 
+    pub fn new_border_segment(
+        size: DeviceIntSize,
+        instances: Vec<BorderInstance>,
+    ) -> Self {
+        RenderTask::with_dynamic_location(
+            size,
+            TaskDependencies::new(),
+            RenderTaskKind::Border(BorderTask {
+                instances,
+            }),
+            ClearMode::Transparent,
+        )
+    }
+
     pub fn new_scaling(
         src_task_id: RenderTaskId,
-        rg_builder: &mut RenderTaskGraphBuilder,
+        render_tasks: &mut RenderTaskGraph,
         target_kind: RenderTargetKind,
         size: DeviceIntSize,
     ) -> RenderTaskId {
         Self::new_scaling_with_padding(
-            src_task_id,
-            rg_builder,
+            BlitSource::RenderTask { task_id: src_task_id },
+            render_tasks,
             target_kind,
             size,
             DeviceIntSideOffsets::zero(),
@@ -964,33 +813,36 @@ impl RenderTask {
     }
 
     pub fn new_scaling_with_padding(
-        source: RenderTaskId,
-        rg_builder: &mut RenderTaskGraphBuilder,
+        source: BlitSource,
+        render_tasks: &mut RenderTaskGraph,
         target_kind: RenderTargetKind,
         padded_size: DeviceIntSize,
         padding: DeviceIntSideOffsets,
     ) -> RenderTaskId {
-        let uv_rect_kind = rg_builder.get_task(source).uv_rect_kind();
+        let (uv_rect_kind, children, image) = match source {
+            BlitSource::RenderTask { task_id } => (render_tasks[task_id].uv_rect_kind(), smallvec![task_id], None),
+            BlitSource::Image { key } => (UvRectKind::Rect, smallvec![], Some(key)),
+        };
 
-        let task_id = rg_builder.add().init(
-            RenderTask::new_dynamic(
+        render_tasks.add().init(
+            RenderTask::with_dynamic_location(
                 padded_size,
+                children,
                 RenderTaskKind::Scaling(ScalingTask {
                     target_kind,
+                    image,
+                    uv_rect_kind,
                     padding,
                 }),
-            ).with_uv_rect_kind(uv_rect_kind)
-        );
-
-        rg_builder.add_dependency(task_id, source);
-
-        task_id
+                ClearMode::DontCare,
+            )
+        )
     }
 
     pub fn new_svg_filter(
         filter_primitives: &[FilterPrimitive],
         filter_datas: &[SFilterData],
-        rg_builder: &mut RenderTaskGraphBuilder,
+        render_tasks: &mut RenderTaskGraph,
         content_size: DeviceIntSize,
         uv_rect_kind: UvRectKind,
         original_task_id: RenderTaskId,
@@ -1005,7 +857,7 @@ impl RenderTask {
         let get_task_input = |
             input: &FilterPrimitiveInput,
             filter_primitives: &[FilterPrimitive],
-            rg_builder: &mut RenderTaskGraphBuilder,
+            render_tasks: &mut RenderTaskGraph,
             cur_index: usize,
             outputs: &[RenderTaskId],
             original: RenderTaskId,
@@ -1019,22 +871,20 @@ impl RenderTask {
 
             match (input_color_space, color_space) {
                 (ColorSpace::Srgb, ColorSpace::LinearRgb) => {
-                    task_id = RenderTask::new_svg_filter_primitive(
+                    task_id = render_tasks.add().init(RenderTask::new_svg_filter_primitive(
                         smallvec![task_id],
                         content_size,
                         uv_rect_kind,
                         SvgFilterInfo::SrgbToLinear,
-                        rg_builder,
-                    );
+                    ));
                 },
                 (ColorSpace::LinearRgb, ColorSpace::Srgb) => {
-                    task_id = RenderTask::new_svg_filter_primitive(
+                    task_id = render_tasks.add().init(RenderTask::new_svg_filter_primitive(
                         smallvec![task_id],
                         content_size,
                         uv_rect_kind,
                         SvgFilterInfo::LinearToSrgb,
-                        rg_builder,
-                    );
+                    ));
                 },
                 _ => {},
             }
@@ -1051,7 +901,7 @@ impl RenderTask {
                     get_task_input(
                         &identity.input,
                         filter_primitives,
-                        rg_builder,
+                        render_tasks,
                         cur_index,
                         &outputs,
                         original_task_id,
@@ -1062,7 +912,7 @@ impl RenderTask {
                     let input_1_task_id = get_task_input(
                         &blend.input1,
                         filter_primitives,
-                        rg_builder,
+                        render_tasks,
                         cur_index,
                         &outputs,
                         original_task_id,
@@ -1071,37 +921,34 @@ impl RenderTask {
                     let input_2_task_id = get_task_input(
                         &blend.input2,
                         filter_primitives,
-                        rg_builder,
+                        render_tasks,
                         cur_index,
                         &outputs,
                         original_task_id,
                         primitive.color_space
                     );
 
-                    RenderTask::new_svg_filter_primitive(
+                    render_tasks.add().init(RenderTask::new_svg_filter_primitive(
                         smallvec![input_1_task_id, input_2_task_id],
                         content_size,
                         uv_rect_kind,
                         SvgFilterInfo::Blend(blend.mode),
-                        rg_builder,
-                    )
+                    ))
                 },
                 FilterPrimitiveKind::Flood(ref flood) => {
-                    RenderTask::new_svg_filter_primitive(
+                    render_tasks.add().init(RenderTask::new_svg_filter_primitive(
                         smallvec![],
                         content_size,
                         uv_rect_kind,
                         SvgFilterInfo::Flood(flood.color),
-                        rg_builder,
-                    )
+                    ))
                 }
                 FilterPrimitiveKind::Blur(ref blur) => {
-                    let width_std_deviation = blur.width * device_pixel_scale.0;
-                    let height_std_deviation = blur.height * device_pixel_scale.0;
+                    let blur_std_deviation = blur.radius * device_pixel_scale.0;
                     let input_task_id = get_task_input(
                         &blur.input,
                         filter_primitives,
-                        rg_builder,
+                        render_tasks,
                         cur_index,
                         &outputs,
                         original_task_id,
@@ -1109,18 +956,18 @@ impl RenderTask {
                     );
 
                     RenderTask::new_blur(
-                        DeviceSize::new(width_std_deviation, height_std_deviation),
+                        DeviceSize::new(blur_std_deviation, blur_std_deviation),
                         // TODO: This is a hack to ensure that a blur task's input is always
                         // in the blur's previous pass.
-                        RenderTask::new_svg_filter_primitive(
+                        render_tasks.add().init(RenderTask::new_svg_filter_primitive(
                             smallvec![input_task_id],
                             content_size,
                             uv_rect_kind,
                             SvgFilterInfo::Identity,
-                            rg_builder,
-                        ),
-                        rg_builder,
+                        )),
+                        render_tasks,
                         RenderTargetKind::Color,
+                        ClearMode::Transparent,
                         None,
                         content_size,
                     )
@@ -1129,45 +976,43 @@ impl RenderTask {
                     let input_task_id = get_task_input(
                         &opacity.input,
                         filter_primitives,
-                        rg_builder,
+                        render_tasks,
                         cur_index,
                         &outputs,
                         original_task_id,
                         primitive.color_space
                     );
 
-                    RenderTask::new_svg_filter_primitive(
+                    render_tasks.add().init(RenderTask::new_svg_filter_primitive(
                         smallvec![input_task_id],
                         content_size,
                         uv_rect_kind,
                         SvgFilterInfo::Opacity(opacity.opacity),
-                        rg_builder,
-                    )
+                    ))
                 }
                 FilterPrimitiveKind::ColorMatrix(ref color_matrix) => {
                     let input_task_id = get_task_input(
                         &color_matrix.input,
                         filter_primitives,
-                        rg_builder,
+                        render_tasks,
                         cur_index,
                         &outputs,
                         original_task_id,
                         primitive.color_space
                     );
 
-                    RenderTask::new_svg_filter_primitive(
+                    render_tasks.add().init(RenderTask::new_svg_filter_primitive(
                         smallvec![input_task_id],
                         content_size,
                         uv_rect_kind,
                         SvgFilterInfo::ColorMatrix(Box::new(color_matrix.matrix)),
-                        rg_builder,
-                    )
+                    ))
                 }
                 FilterPrimitiveKind::DropShadow(ref drop_shadow) => {
                     let input_task_id = get_task_input(
                         &drop_shadow.input,
                         filter_primitives,
-                        rg_builder,
+                        render_tasks,
                         cur_index,
                         &outputs,
                         original_task_id,
@@ -1177,36 +1022,37 @@ impl RenderTask {
                     let blur_std_deviation = drop_shadow.shadow.blur_radius * device_pixel_scale.0;
                     let offset = drop_shadow.shadow.offset * LayoutToWorldScale::new(1.0) * device_pixel_scale;
 
-                    let offset_task_id = RenderTask::new_svg_filter_primitive(
-                        smallvec![input_task_id],
-                        content_size,
-                        uv_rect_kind,
-                        SvgFilterInfo::Offset(offset),
-                        rg_builder,
+                    let offset_task_id = render_tasks.add().init(
+                        RenderTask::new_svg_filter_primitive(
+                            smallvec![input_task_id],
+                            content_size,
+                            uv_rect_kind,
+                            SvgFilterInfo::Offset(offset),
+                        )
                     );
 
                     let blur_task_id = RenderTask::new_blur(
                         DeviceSize::new(blur_std_deviation, blur_std_deviation),
                         offset_task_id,
-                        rg_builder,
+                        render_tasks,
                         RenderTargetKind::Color,
+                        ClearMode::Transparent,
                         None,
                         content_size,
                     );
 
-                    RenderTask::new_svg_filter_primitive(
+                    render_tasks.add().init(RenderTask::new_svg_filter_primitive(
                         smallvec![input_task_id, blur_task_id],
                         content_size,
                         uv_rect_kind,
                         SvgFilterInfo::DropShadow(drop_shadow.shadow.color),
-                        rg_builder,
-                    )
+                    ))
                 }
                 FilterPrimitiveKind::ComponentTransfer(ref component_transfer) => {
                     let input_task_id = get_task_input(
                         &component_transfer.input,
                         filter_primitives,
-                        rg_builder,
+                        render_tasks,
                         cur_index,
                         &outputs,
                         original_task_id,
@@ -1218,20 +1064,19 @@ impl RenderTask {
                     if filter_data.is_identity() {
                         input_task_id
                     } else {
-                        RenderTask::new_svg_filter_primitive(
+                        render_tasks.add().init(RenderTask::new_svg_filter_primitive(
                             smallvec![input_task_id],
                             content_size,
                             uv_rect_kind,
                             SvgFilterInfo::ComponentTransfer(filter_data.clone()),
-                            rg_builder,
-                        )
+                        ))
                     }
                 }
                 FilterPrimitiveKind::Offset(ref info) => {
                     let input_task_id = get_task_input(
                         &info.input,
                         filter_primitives,
-                        rg_builder,
+                        render_tasks,
                         cur_index,
                         &outputs,
                         original_task_id,
@@ -1239,19 +1084,18 @@ impl RenderTask {
                     );
 
                     let offset = info.offset * LayoutToWorldScale::new(1.0) * device_pixel_scale;
-                    RenderTask::new_svg_filter_primitive(
+                    render_tasks.add().init(RenderTask::new_svg_filter_primitive(
                         smallvec![input_task_id],
                         content_size,
                         uv_rect_kind,
                         SvgFilterInfo::Offset(offset),
-                        rg_builder,
-                    )
+                    ))
                 }
                 FilterPrimitiveKind::Composite(info) => {
                     let input_1_task_id = get_task_input(
                         &info.input1,
                         filter_primitives,
-                        rg_builder,
+                        render_tasks,
                         cur_index,
                         &outputs,
                         original_task_id,
@@ -1260,20 +1104,19 @@ impl RenderTask {
                     let input_2_task_id = get_task_input(
                         &info.input2,
                         filter_primitives,
-                        rg_builder,
+                        render_tasks,
                         cur_index,
                         &outputs,
                         original_task_id,
                         primitive.color_space
                     );
 
-                    RenderTask::new_svg_filter_primitive(
+                    render_tasks.add().init(RenderTask::new_svg_filter_primitive(
                         smallvec![input_1_task_id, input_2_task_id],
                         content_size,
                         uv_rect_kind,
                         SvgFilterInfo::Composite(info.operator),
-                        rg_builder,
-                    )
+                    ))
                 }
             };
             outputs.push(render_task_id);
@@ -1284,13 +1127,12 @@ impl RenderTask {
 
         // Convert to sRGB if needed
         if filter_primitives.last().unwrap().color_space == ColorSpace::LinearRgb {
-            render_task_id = RenderTask::new_svg_filter_primitive(
+            render_task_id = render_tasks.add().init(RenderTask::new_svg_filter_primitive(
                 smallvec![render_task_id],
                 content_size,
                 uv_rect_kind,
                 SvgFilterInfo::LinearToSrgb,
-                rg_builder,
-            );
+            ));
         }
 
         render_task_id
@@ -1301,71 +1143,191 @@ impl RenderTask {
         target_size: DeviceIntSize,
         uv_rect_kind: UvRectKind,
         info: SvgFilterInfo,
-        rg_builder: &mut RenderTaskGraphBuilder,
-    ) -> RenderTaskId {
-        let task_id = rg_builder.add().init(RenderTask::new_dynamic(
+    ) -> Self {
+        RenderTask::with_dynamic_location(
             target_size,
+            tasks,
             RenderTaskKind::SvgFilter(SvgFilterTask {
                 extra_gpu_cache_handle: None,
+                uv_rect_handle: GpuCacheHandle::new(),
+                uv_rect_kind,
                 info,
             }),
-        ).with_uv_rect_kind(uv_rect_kind));
-
-        for child_id in tasks {
-            rg_builder.add_dependency(task_id, child_id);
-        }
-
-        task_id
+            ClearMode::Transparent,
+        )
     }
 
     pub fn uv_rect_kind(&self) -> UvRectKind {
-        self.uv_rect_kind
-    }
+        match self.kind {
+            RenderTaskKind::CacheMask(..) |
+            RenderTaskKind::Readback(..) => {
+                unreachable!("bug: unexpected render task");
+            }
 
-    pub fn get_texture_address(&self, gpu_cache: &GpuCache) -> GpuCacheAddress {
-        gpu_cache.get_address(&self.uv_rect_handle)
-    }
+            RenderTaskKind::Picture(ref task) => {
+                task.uv_rect_kind
+            }
 
-    pub fn get_dynamic_size(&self) -> DeviceIntSize {
-        self.location.size()
+            RenderTaskKind::VerticalBlur(ref task) |
+            RenderTaskKind::HorizontalBlur(ref task) => {
+                task.uv_rect_kind
+            }
+
+            RenderTaskKind::Scaling(ref task) => {
+                task.uv_rect_kind
+            }
+
+            RenderTaskKind::SvgFilter(ref task) => {
+                task.uv_rect_kind
+            }
+
+            RenderTaskKind::ClipRegion(..) |
+            RenderTaskKind::Border(..) |
+            RenderTaskKind::Gradient(..) |
+            RenderTaskKind::LineDecoration(..) |
+            RenderTaskKind::Blit(..) => {
+                UvRectKind::Rect
+            }
+
+            #[cfg(test)]
+            RenderTaskKind::Test(..) => {
+                unreachable!("Unexpected render task");
+            }
+        }
     }
 
-    pub fn get_target_texture(&self) -> CacheTextureId {
-        match self.location {
-            RenderTaskLocation::Dynamic { texture_id, .. } => {
-                assert_ne!(texture_id, CacheTextureId::INVALID);
-                texture_id
+    // Write (up to) 8 floats of data specific to the type
+    // of render task that is provided to the GPU shaders
+    // via a vertex texture.
+    pub fn write_task_data(&self) -> RenderTaskData {
+        // NOTE: The ordering and layout of these structures are
+        //       required to match both the GPU structures declared
+        //       in prim_shared.glsl, and also the uses in submit_batch()
+        //       in renderer.rs.
+        // TODO(gw): Maybe there's a way to make this stuff a bit
+        //           more type-safe. Although, it will always need
+        //           to be kept in sync with the GLSL code anyway.
+
+        let data = match self.kind {
+            RenderTaskKind::Picture(ref task) => {
+                // Note: has to match `PICTURE_TYPE_*` in shaders
+                [
+                    task.device_pixel_scale.0,
+                    task.content_origin.x as f32,
+                    task.content_origin.y as f32,
+                ]
+            }
+            RenderTaskKind::CacheMask(ref task) => {
+                [
+                    task.device_pixel_scale.0,
+                    task.actual_rect.origin.x as f32,
+                    task.actual_rect.origin.y as f32,
+                ]
             }
-            RenderTaskLocation::CacheRequest { .. } |
-            RenderTaskLocation::Unallocated { .. } |
-            RenderTaskLocation::Static { .. } => {
+            RenderTaskKind::ClipRegion(ref task) => {
+                [
+                    task.device_pixel_scale.0,
+                    0.0,
+                    0.0,
+                ]
+            }
+            RenderTaskKind::VerticalBlur(ref task) |
+            RenderTaskKind::HorizontalBlur(ref task) => {
+                [
+                    task.blur_std_deviation,
+                    task.blur_region.width as f32,
+                    task.blur_region.height as f32,
+                ]
+            }
+            RenderTaskKind::Readback(..) |
+            RenderTaskKind::Scaling(..) |
+            RenderTaskKind::Border(..) |
+            RenderTaskKind::LineDecoration(..) |
+            RenderTaskKind::Gradient(..) |
+            RenderTaskKind::Blit(..) => {
+                [0.0; 3]
+            }
+
+
+            RenderTaskKind::SvgFilter(ref task) => {
+                match task.info {
+                    SvgFilterInfo::Opacity(opacity) => [opacity, 0.0, 0.0],
+                    SvgFilterInfo::Offset(offset) => [offset.x, offset.y, 0.0],
+                    _ => [0.0; 3]
+                }
+            }
+
+            #[cfg(test)]
+            RenderTaskKind::Test(..) => {
                 unreachable!();
             }
+        };
+
+        let (mut target_rect, target_index) = self.get_target_rect();
+        // The primitives inside a fixed-location render task
+        // are already placed to their corresponding positions,
+        // so the shader doesn't need to shift by the origin.
+        if let RenderTaskLocation::Fixed(_) = self.location {
+            target_rect.origin = DeviceIntPoint::origin();
+        }
+
+        RenderTaskData {
+            data: [
+                target_rect.origin.x as f32,
+                target_rect.origin.y as f32,
+                target_rect.size.width as f32,
+                target_rect.size.height as f32,
+                target_index.0 as f32,
+                data[0],
+                data[1],
+                data[2],
+            ]
         }
     }
 
-    pub fn get_texture_source(&self) -> TextureSource {
-        match self.location {
-            RenderTaskLocation::Dynamic { texture_id, .. } => {
-                assert_ne!(texture_id, CacheTextureId::INVALID);
-                TextureSource::TextureCache(texture_id, Swizzle::default())
+    pub fn get_texture_address(&self, gpu_cache: &GpuCache) -> GpuCacheAddress {
+        match self.kind {
+            RenderTaskKind::Picture(ref info) => {
+                gpu_cache.get_address(&info.uv_rect_handle)
             }
-            RenderTaskLocation::Static { surface:  StaticRenderTaskSurface::ReadOnly { source }, .. } => {
-                source
+            RenderTaskKind::VerticalBlur(ref info) |
+            RenderTaskKind::HorizontalBlur(ref info) => {
+                gpu_cache.get_address(&info.uv_rect_handle)
             }
-            RenderTaskLocation::Static { surface: StaticRenderTaskSurface::TextureCache { texture, .. }, .. } => {
-                TextureSource::TextureCache(texture, Swizzle::default())
+            RenderTaskKind::SvgFilter(ref info) => {
+                gpu_cache.get_address(&info.uv_rect_handle)
             }
-            RenderTaskLocation::Static { .. } |
-            RenderTaskLocation::CacheRequest { .. } |
-            RenderTaskLocation::Unallocated { .. } => {
-                unreachable!();
+            RenderTaskKind::ClipRegion(..) |
+            RenderTaskKind::Readback(..) |
+            RenderTaskKind::Scaling(..) |
+            RenderTaskKind::Blit(..) |
+            RenderTaskKind::Border(..) |
+            RenderTaskKind::CacheMask(..) |
+            RenderTaskKind::Gradient(..) |
+            RenderTaskKind::LineDecoration(..) => {
+                panic!("texture handle not supported for this task kind");
             }
+            #[cfg(test)]
+            RenderTaskKind::Test(..) => {
+                panic!("RenderTask tests aren't expected to exercise this code");
+            }
+        }
+    }
+
+    pub fn get_dynamic_size(&self) -> DeviceIntSize {
+        match self.location {
+            RenderTaskLocation::Fixed(..) => DeviceIntSize::zero(),
+            RenderTaskLocation::Dynamic(_, size) => size,
+            RenderTaskLocation::TextureCache { rect, .. } => rect.size,
+            RenderTaskLocation::PictureCache { size, .. } => size,
         }
     }
 
-    pub fn get_target_rect(&self) -> DeviceIntRect {
+    pub fn get_target_rect(&self) -> (DeviceIntRect, RenderTargetIndex) {
         match self.location {
+            RenderTaskLocation::Fixed(rect) => {
+                (rect, RenderTargetIndex(0))
+            }
             // Previously, we only added render tasks after the entire
             // primitive chain was determined visible. This meant that
             // we could assert any render task in the list was also
@@ -1380,52 +1342,223 @@ impl RenderTask {
             // TODO(gw): Consider some kind of tag or other method
             //           to mark a task as unused explicitly. This
             //           would allow us to restore this debug check.
-            RenderTaskLocation::Dynamic { rect, .. } => rect,
-            RenderTaskLocation::Static { rect, .. } => rect,
-            RenderTaskLocation::CacheRequest { .. }
-            | RenderTaskLocation::Unallocated { .. } => {
-                panic!("bug: get_target_rect called before allocating");
+            RenderTaskLocation::Dynamic(Some((origin, target_index)), size) => {
+                (DeviceIntRect::new(origin, size), target_index)
+            }
+            RenderTaskLocation::Dynamic(None, _) => {
+                (DeviceIntRect::zero(), RenderTargetIndex(0))
+            }
+            RenderTaskLocation::TextureCache {layer, rect, .. } => {
+                (rect, RenderTargetIndex(layer as usize))
+            }
+            RenderTaskLocation::PictureCache { ref surface, size, .. } => {
+                let layer = match surface {
+                    ResolvedSurfaceTexture::TextureCache { layer, .. } => *layer,
+                    ResolvedSurfaceTexture::Native { .. } => 0,
+                };
+
+                (
+                    DeviceIntRect::new(
+                        DeviceIntPoint::zero(),
+                        size,
+                    ),
+                    RenderTargetIndex(layer as usize),
+                )
             }
         }
     }
 
     pub fn target_kind(&self) -> RenderTargetKind {
-        self.kind.target_kind()
+        match self.kind {
+            RenderTaskKind::LineDecoration(..) |
+            RenderTaskKind::Readback(..) |
+            RenderTaskKind::Border(..) |
+            RenderTaskKind::Gradient(..) |
+            RenderTaskKind::Picture(..) |
+            RenderTaskKind::Blit(..) |
+            RenderTaskKind::SvgFilter(..) => {
+                RenderTargetKind::Color
+            }
+
+            RenderTaskKind::ClipRegion(..) |
+            RenderTaskKind::CacheMask(..) => {
+                RenderTargetKind::Alpha
+            }
+
+            RenderTaskKind::VerticalBlur(ref task_info) |
+            RenderTaskKind::HorizontalBlur(ref task_info) => {
+                task_info.target_kind
+            }
+
+            RenderTaskKind::Scaling(ref task_info) => {
+                task_info.target_kind
+            }
+
+            #[cfg(test)]
+            RenderTaskKind::Test(kind) => kind,
+        }
     }
 
     pub fn write_gpu_blocks(
         &mut self,
-        target_rect: DeviceIntRect,
         gpu_cache: &mut GpuCache,
     ) {
         profile_scope!("write_gpu_blocks");
+        let (target_rect, target_index) = self.get_target_rect();
 
-        self.kind.write_gpu_blocks(gpu_cache);
-
-        if self.cache_handle.is_some() {
-            // The uv rect handle of cached render tasks is requested and set by the
-            // render task cache.
-            return;
-        }
+        let (cache_handle, uv_rect_kind) = match self.kind {
+            RenderTaskKind::HorizontalBlur(ref mut info) |
+            RenderTaskKind::VerticalBlur(ref mut info) => {
+                (&mut info.uv_rect_handle, info.uv_rect_kind)
+            }
+            RenderTaskKind::Picture(ref mut info) => {
+                (&mut info.uv_rect_handle, info.uv_rect_kind)
+            }
+            RenderTaskKind::SvgFilter(ref mut info) => {
+                (&mut info.uv_rect_handle, info.uv_rect_kind)
+            }
+            RenderTaskKind::Readback(..) |
+            RenderTaskKind::Scaling(..) |
+            RenderTaskKind::Blit(..) |
+            RenderTaskKind::ClipRegion(..) |
+            RenderTaskKind::Border(..) |
+            RenderTaskKind::CacheMask(..) |
+            RenderTaskKind::Gradient(..) |
+            RenderTaskKind::LineDecoration(..) => {
+                return;
+            }
+            #[cfg(test)]
+            RenderTaskKind::Test(..) => {
+                panic!("RenderTask tests aren't expected to exercise this code");
+            }
+        };
 
-        if let Some(mut request) = gpu_cache.request(&mut self.uv_rect_handle) {
+        if let Some(mut request) = gpu_cache.request(cache_handle) {
             let p0 = target_rect.min().to_f32();
             let p1 = target_rect.max().to_f32();
             let image_source = ImageSource {
                 p0,
                 p1,
-                user_data: [0.0; 4],
-                uv_rect_kind: self.uv_rect_kind,
+                texture_layer: target_index.0 as f32,
+                user_data: [0.0; 3],
+                uv_rect_kind,
             };
             image_source.write_gpu_blocks(&mut request);
         }
+
+        if let RenderTaskKind::SvgFilter(ref mut filter_task) = self.kind {
+            match filter_task.info {
+                SvgFilterInfo::ColorMatrix(ref matrix) => {
+                    let handle = filter_task.extra_gpu_cache_handle.get_or_insert_with(GpuCacheHandle::new);
+                    if let Some(mut request) = gpu_cache.request(handle) {
+                        for i in 0..5 {
+                            request.push([matrix[i*4], matrix[i*4+1], matrix[i*4+2], matrix[i*4+3]]);
+                        }
+                    }
+                }
+                SvgFilterInfo::DropShadow(color) |
+                SvgFilterInfo::Flood(color) => {
+                    let handle = filter_task.extra_gpu_cache_handle.get_or_insert_with(GpuCacheHandle::new);
+                    if let Some(mut request) = gpu_cache.request(handle) {
+                        request.push(color.to_array());
+                    }
+                }
+                SvgFilterInfo::ComponentTransfer(ref data) => {
+                    let handle = filter_task.extra_gpu_cache_handle.get_or_insert_with(GpuCacheHandle::new);
+                    if let Some(request) = gpu_cache.request(handle) {
+                        data.update(request);
+                    }
+                }
+                SvgFilterInfo::Composite(ref operator) => {
+                    if let CompositeOperator::Arithmetic(k_vals) = operator {
+                        let handle = filter_task.extra_gpu_cache_handle.get_or_insert_with(GpuCacheHandle::new);
+                        if let Some(mut request) = gpu_cache.request(handle) {
+                            request.push(*k_vals);
+                        }
+                    }
+                }
+                _ => {},
+            }
+        }
     }
 
-    /// Called by the render task cache.
-    ///
-    /// Tells the render task that it is cached (which means its gpu cache
-    /// handle is managed by the texture cache).
-    pub fn mark_cached(&mut self, handle: RenderTaskCacheEntryHandle) {
-        self.cache_handle = Some(handle);
+    #[cfg(feature = "debugger")]
+    pub fn print_with<T: PrintTreePrinter>(&self, pt: &mut T, tree: &RenderTaskGraph) -> bool {
+        match self.kind {
+            RenderTaskKind::Picture(ref task) => {
+                pt.new_level(format!("Picture of {:?}", task.pic_index));
+            }
+            RenderTaskKind::CacheMask(ref task) => {
+                pt.new_level(format!("CacheMask with {} clips", task.clip_node_range.count));
+                pt.add_item(format!("rect: {:?}", task.actual_rect));
+            }
+            RenderTaskKind::LineDecoration(..) => {
+                pt.new_level("LineDecoration".to_owned());
+            }
+            RenderTaskKind::ClipRegion(..) => {
+                pt.new_level("ClipRegion".to_owned());
+            }
+            RenderTaskKind::VerticalBlur(ref task) => {
+                pt.new_level("VerticalBlur".to_owned());
+                task.print_with(pt);
+            }
+            RenderTaskKind::HorizontalBlur(ref task) => {
+                pt.new_level("HorizontalBlur".to_owned());
+                task.print_with(pt);
+            }
+            RenderTaskKind::Readback(ref rect) => {
+                pt.new_level("Readback".to_owned());
+                pt.add_item(format!("rect: {:?}", rect));
+            }
+            RenderTaskKind::Scaling(ref kind) => {
+                pt.new_level("Scaling".to_owned());
+                pt.add_item(format!("kind: {:?}", kind));
+            }
+            RenderTaskKind::Border(..) => {
+                pt.new_level("Border".to_owned());
+            }
+            RenderTaskKind::Blit(ref task) => {
+                pt.new_level("Blit".to_owned());
+                pt.add_item(format!("source: {:?}", task.source));
+            }
+            RenderTaskKind::Gradient(..) => {
+                pt.new_level("Gradient".to_owned());
+            }
+            RenderTaskKind::SvgFilter(ref task) => {
+                pt.new_level("SvgFilter".to_owned());
+                pt.add_item(format!("primitive: {:?}", task.info));
+            }
+            #[cfg(test)]
+            RenderTaskKind::Test(..) => {
+                pt.new_level("Test".to_owned());
+            }
+        }
+
+        pt.add_item(format!("clear to: {:?}", self.clear_mode));
+        pt.add_item(format!("dimensions: {:?}", self.location.size()));
+
+        for &child_id in &self.children {
+            if tree[child_id].print_with(pt, tree) {
+                pt.add_item(format!("self: {:?}", child_id))
+            }
+        }
+
+        pt.end_level();
+        true
+    }
+
+    /// Mark this render task for keeping the results alive up until the end of the frame.
+    #[inline]
+    pub fn mark_for_saving(&mut self) {
+        match self.location {
+            RenderTaskLocation::Fixed(..) |
+            RenderTaskLocation::Dynamic(..) => {
+                self.saved_index = Some(SavedTargetIndex::PENDING);
+            }
+            RenderTaskLocation::TextureCache { .. } |
+            RenderTaskLocation::PictureCache { .. } => {
+                panic!("Unable to mark a permanently cached task for saving!");
+            }
+        }
     }
 }
diff --git a/third_party/webrender/webrender/src/render_task_cache.rs b/third_party/webrender/webrender/src/render_task_cache.rs
index 370897f30b0..22ea235e33e 100644
--- a/third_party/webrender/webrender/src/render_task_cache.rs
+++ b/third_party/webrender/webrender/src/render_task_cache.rs
@@ -11,34 +11,15 @@ use crate::device::TextureFilter;
 use crate::freelist::{FreeList, FreeListHandle, WeakFreeListHandle};
 use crate::gpu_cache::GpuCache;
 use crate::internal_types::FastHashMap;
-use crate::picture::{SurfaceIndex, SurfaceInfo};
 use crate::prim_store::image::ImageCacheKey;
-use crate::prim_store::gradient::{
-    FastLinearGradientCacheKey, LinearGradientCacheKey, RadialGradientCacheKey,
-    ConicGradientCacheKey,
-};
+use crate::prim_store::gradient::GradientCacheKey;
 use crate::prim_store::line_dec::LineDecorationCacheKey;
 use crate::resource_cache::CacheItem;
 use std::{mem, usize, f32, i32};
-use crate::texture_cache::{TextureCache, TextureCacheHandle, Eviction, TargetShader};
+use crate::texture_cache::{TextureCache, TextureCacheHandle, Eviction};
 use crate::render_target::RenderTargetKind;
-use crate::render_task::{RenderTask, StaticRenderTaskSurface, RenderTaskLocation, RenderTaskKind, CachedTask};
-use crate::render_task_graph::{RenderTaskGraphBuilder, RenderTaskId};
-use crate::frame_builder::add_child_render_task;
-use euclid::Scale;
-
-const MAX_CACHE_TASK_SIZE: f32 = 4096.0;
-
-/// Describes a parent dependency for a render task. Render tasks
-/// may depend on a surface (e.g. when a surface uses a cached border)
-/// or an arbitrary render task (e.g. when a clip mask uses a blurred
-/// box-shadow input).
-pub enum RenderTaskParent {
-    /// Parent is a surface
-    Surface(SurfaceIndex),
-    /// Parent is a render task
-    RenderTask(RenderTaskId),
-}
+use crate::render_task::{RenderTask, RenderTaskLocation};
+use crate::render_task_graph::{RenderTaskGraph, RenderTaskId};
 
 #[derive(Clone, Debug, Hash, PartialEq, Eq)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
@@ -48,10 +29,7 @@ pub enum RenderTaskCacheKeyKind {
     Image(ImageCacheKey),
     BorderSegment(BorderSegmentCacheKey),
     LineDecoration(LineDecorationCacheKey),
-    FastLinearGradient(FastLinearGradientCacheKey),
-    LinearGradient(LinearGradientCacheKey),
-    RadialGradient(RadialGradientCacheKey),
-    ConicGradient(ConicGradientCacheKey),
+    Gradient(GradientCacheKey),
 }
 
 #[derive(Clone, Debug, Hash, PartialEq, Eq)]
@@ -66,16 +44,9 @@ pub struct RenderTaskCacheKey {
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct RenderTaskCacheEntry {
-    user_data: Option<[f32; 4]>,
-    target_kind: RenderTargetKind,
+    user_data: Option<[f32; 3]>,
     is_opaque: bool,
-    frame_id: u64,
     pub handle: TextureCacheHandle,
-    /// If a render task was generated for this cache entry on _this_ frame,
-    /// we need to track the task id here. This allows us to hook it up as
-    /// a dependency of any parent tasks that make a reqiest from the render
-    /// task cache.
-    pub render_task_id: Option<RenderTaskId>,
 }
 
 #[derive(Debug, MallocSizeOf)]
@@ -90,7 +61,6 @@ pub enum RenderTaskCacheMarker {}
 pub struct RenderTaskCache {
     map: FastHashMap<RenderTaskCacheKey, FreeListHandle<RenderTaskCacheMarker>>,
     cache_entries: FreeList<RenderTaskCacheEntry, RenderTaskCacheMarker>,
-    frame_id: u64,
 }
 
 pub type RenderTaskCacheEntryHandle = WeakFreeListHandle<RenderTaskCacheMarker>;
@@ -100,7 +70,6 @@ impl RenderTaskCache {
         RenderTaskCache {
             map: FastHashMap::default(),
             cache_entries: FreeList::new(),
-            frame_id: 0,
         }
     }
 
@@ -113,7 +82,6 @@ impl RenderTaskCache {
         &mut self,
         texture_cache: &mut TextureCache,
     ) {
-        self.frame_id += 1;
         profile_scope!("begin_frame");
         // Drop any items from the cache that have been
         // evicted from the texture cache.
@@ -129,35 +97,17 @@ impl RenderTaskCache {
         // from here so that this hash map doesn't
         // grow indefinitely!
         let cache_entries = &mut self.cache_entries;
-        let frame_id = self.frame_id;
 
         self.map.retain(|_, handle| {
-            let mut retain = texture_cache.is_allocated(
+            let retain = texture_cache.is_allocated(
                 &cache_entries.get(handle).handle,
             );
-            if retain {
-                let entry = cache_entries.get_mut(&handle);
-                if frame_id > entry.frame_id + 10 {
-                    texture_cache.evict_handle(&entry.handle);
-                    retain = false;
-                }
-            }
-
             if !retain {
                 let handle = mem::replace(handle, FreeListHandle::invalid());
                 cache_entries.free(handle);
             }
-
             retain
         });
-
-        // Clear out the render task ID of any remaining cache entries that were drawn
-        // on the previous frame, so we don't accidentally hook up stale dependencies
-        // when building the frame graph.
-        for (_, handle) in &self.map {
-            let entry = self.cache_entries.get_mut(handle);
-            entry.render_task_id = None;
-        }
     }
 
     fn alloc_render_task(
@@ -167,11 +117,17 @@ impl RenderTaskCache {
         texture_cache: &mut TextureCache,
     ) {
         // Find out what size to alloc in the texture cache.
-        let size = render_task.location.size();
-        let target_kind = render_task.target_kind();
+        let size = match render_task.location {
+            RenderTaskLocation::Fixed(..) |
+            RenderTaskLocation::PictureCache { .. } |
+            RenderTaskLocation::TextureCache { .. } => {
+                panic!("BUG: dynamic task was expected");
+            }
+            RenderTaskLocation::Dynamic(_, size) => size,
+        };
 
         // Select the right texture page to allocate from.
-        let image_format = match target_kind {
+        let image_format = match render_task.target_kind() {
             RenderTargetKind::Color => texture_cache.shared_color_expected_format(),
             RenderTargetKind::Alpha => texture_cache.shared_alpha_expected_format(),
         };
@@ -196,28 +152,24 @@ impl RenderTaskCache {
             descriptor,
             TextureFilter::Linear,
             None,
-            entry.user_data.unwrap_or([0.0; 4]),
+            entry.user_data.unwrap_or([0.0; 3]),
             DirtyRect::All,
             gpu_cache,
             None,
             render_task.uv_rect_kind(),
             Eviction::Auto,
-            TargetShader::Default,
         );
 
         // Get the allocation details in the texture cache, and store
-        // this in the render task. The renderer will draw this task
-        // into the appropriate rect of the texture cache on this frame.
-        let (texture_id, uv_rect, _, _, _) =
+        // this in the render task. The renderer will draw this
+        // task into the appropriate layer and rect of the texture
+        // cache on this frame.
+        let (texture_id, texture_layer, uv_rect, _, _) =
             texture_cache.get_cache_location(&entry.handle);
 
-        let surface = StaticRenderTaskSurface::TextureCache {
+        render_task.location = RenderTaskLocation::TextureCache {
             texture: texture_id,
-            target_kind,
-        };
-
-        render_task.location = RenderTaskLocation::Static {
-            surface,
+            layer: texture_layer,
             rect: uv_rect.to_i32(),
         };
     }
@@ -227,18 +179,14 @@ impl RenderTaskCache {
         key: RenderTaskCacheKey,
         texture_cache: &mut TextureCache,
         gpu_cache: &mut GpuCache,
-        rg_builder: &mut RenderTaskGraphBuilder,
-        user_data: Option<[f32; 4]>,
+        render_tasks: &mut RenderTaskGraph,
+        user_data: Option<[f32; 3]>,
         is_opaque: bool,
-        parent: RenderTaskParent,
-        surfaces: &[SurfaceInfo],
         f: F,
-    ) -> Result<RenderTaskId, ()>
+    ) -> Result<RenderTaskCacheEntryHandle, ()>
     where
-        F: FnOnce(&mut RenderTaskGraphBuilder) -> Result<RenderTaskId, ()>,
+        F: FnOnce(&mut RenderTaskGraph) -> Result<RenderTaskId, ()>,
     {
-        let frame_id = self.frame_id;
-        let size = key.size;
         // Get the texture cache handle for this cache key,
         // or create one.
         let cache_entries = &mut self.cache_entries;
@@ -246,79 +194,31 @@ impl RenderTaskCache {
             let entry = RenderTaskCacheEntry {
                 handle: TextureCacheHandle::invalid(),
                 user_data,
-                target_kind: RenderTargetKind::Color, // will be set below.
                 is_opaque,
-                frame_id,
-                render_task_id: None,
             };
             cache_entries.insert(entry)
         });
         let cache_entry = cache_entries.get_mut(entry_handle);
-        cache_entry.frame_id = self.frame_id;
 
         // Check if this texture cache handle is valid.
         if texture_cache.request(&cache_entry.handle, gpu_cache) {
             // Invoke user closure to get render task chain
             // to draw this into the texture cache.
-            let render_task_id = f(rg_builder)?;
+            let render_task_id = f(render_tasks)?;
+            render_tasks.cacheable_render_tasks.push(render_task_id);
 
             cache_entry.user_data = user_data;
             cache_entry.is_opaque = is_opaque;
-            cache_entry.render_task_id = Some(render_task_id);
-
-            let render_task = rg_builder.get_task_mut(render_task_id);
-
-            render_task.mark_cached(entry_handle.weak());
-            cache_entry.target_kind = render_task.kind.target_kind();
 
             RenderTaskCache::alloc_render_task(
-                render_task,
+                &mut render_tasks[render_task_id],
                 cache_entry,
                 gpu_cache,
                 texture_cache,
             );
         }
 
-        // If this render task cache is being drawn this frame, ensure we hook up the
-        // render task for it as a dependency of any render task that uses this as
-        // an input source.
-        if let Some(render_task_id) = cache_entry.render_task_id {
-            match parent {
-                RenderTaskParent::Surface(surface_index) => {
-                    // If parent is a surface, use helper fn to add this dependency,
-                    // which correctly takes account of the render task configuration
-                    // of the surface.
-                    add_child_render_task(
-                        surface_index,
-                        render_task_id,
-                        surfaces,
-                        rg_builder
-                    );
-                }
-                RenderTaskParent::RenderTask(parent_render_task_id) => {
-                    // For render tasks, just add it as a direct dependency on the
-                    // task graph builder.
-                    rg_builder.add_dependency(
-                        parent_render_task_id,
-                        render_task_id,
-                    );
-                }
-            }
-
-            return Ok(render_task_id);
-        }
-
-        let target_kind = cache_entry.target_kind;
-        let mut task = RenderTask::new(
-            RenderTaskLocation::CacheRequest { size, },
-            RenderTaskKind::Cached(CachedTask {
-                target_kind,
-            }),
-        );
-        task.mark_cached(entry_handle.weak());
-        let render_task_id = rg_builder.add().init(task);
-
-        Ok(render_task_id)
+        Ok(entry_handle.weak())
     }
 
     pub fn get_cache_entry(
@@ -359,17 +259,9 @@ impl RenderTaskCache {
 // Gecko tests.
 // Note: zero-square tasks are prohibited in WR task graph, so
 // we ensure each dimension to be at least the length of 1 after rounding.
-pub fn to_cache_size(size: LayoutSize, device_pixel_scale: &mut Scale<f32, LayoutPixel, DevicePixel>) -> DeviceIntSize {
-    let mut device_size = (size * *device_pixel_scale).round();
-
-    if device_size.width > MAX_CACHE_TASK_SIZE || device_size.height > MAX_CACHE_TASK_SIZE {
-        let scale = MAX_CACHE_TASK_SIZE / f32::max(device_size.width, device_size.height);
-        *device_pixel_scale = *device_pixel_scale * Scale::new(scale);
-        device_size = (size * *device_pixel_scale).round();
-    }
-
+pub fn to_cache_size(size: DeviceSize) -> DeviceIntSize {
     DeviceIntSize::new(
-        1.max(device_size.width as i32),
-        1.max(device_size.height as i32),
+        1.max(size.width.round() as i32),
+        1.max(size.height.round() as i32),
     )
 }
diff --git a/third_party/webrender/webrender/src/render_task_graph.rs b/third_party/webrender/webrender/src/render_task_graph.rs
index aa089a15d3a..3058a988386 100644
--- a/third_party/webrender/webrender/src/render_task_graph.rs
+++ b/third_party/webrender/webrender/src/render_task_graph.rs
@@ -1,808 +1,448 @@
-// This Source Code Form is subject to the terms of the Mozilla Public
-// License, v. 2.0. If a copy of the MPL was not distributed with this
-// file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
 //! This module contains the render task graph.
 //!
 //! Code associated with creating specific render tasks is in the render_task
 //! module.
 
-use api::units::*;
 use api::ImageFormat;
-use crate::gpu_cache::{GpuCache, GpuCacheAddress};
-use crate::internal_types::{TextureSource, CacheTextureId, FastHashMap, FastHashSet};
+use api::units::*;
+use crate::internal_types::{CacheTextureId, FastHashMap, SavedTargetIndex};
 use crate::render_backend::FrameId;
-use crate::render_task::{StaticRenderTaskSurface, RenderTaskLocation, RenderTask};
-use crate::render_target::RenderTargetKind;
-use crate::render_task::{RenderTaskData, RenderTaskKind};
-use crate::resource_cache::ResourceCache;
-use crate::texture_pack::GuillotineAllocator;
-use crate::prim_store::DeferredResolve;
-use crate::image_source::{resolve_image, resolve_cached_render_task};
-use crate::util::VecHelper;
-use smallvec::SmallVec;
-use std::mem;
-
-use crate::render_target::{RenderTargetList, ColorRenderTarget};
+use crate::render_target::{RenderTarget, RenderTargetKind, RenderTargetList, ColorRenderTarget};
 use crate::render_target::{PictureCacheTarget, TextureCacheRenderTarget, AlphaRenderTarget};
-use crate::util::Allocation;
-use std::{usize, f32};
+use crate::render_task::{BlitSource, RenderTask, RenderTaskKind, RenderTaskAddress, RenderTaskData};
+use crate::render_task::{RenderTaskLocation};
+use crate::util::{VecHelper, Allocation};
+use std::{cmp, usize, f32, i32, u32};
 
-/// According to apitrace, textures larger than 2048 break fast clear
-/// optimizations on some intel drivers. We sometimes need to go larger, but
-/// we try to avoid it.
-const MAX_SHARED_SURFACE_SIZE: i32 = 2048;
-
-/// If we ever need a larger texture than the ideal, we better round it up to a
-/// reasonable number in order to have a bit of leeway in case the size of this
-/// this target is changing each frame.
-const TEXTURE_DIMENSION_MASK: i32 = 0xFF;
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct RenderTaskGraph {
+    pub tasks: Vec<RenderTask>,
+    pub task_data: Vec<RenderTaskData>,
+    /// Tasks that don't have dependencies, and that may be shared between
+    /// picture tasks.
+    ///
+    /// We render these unconditionally before-rendering the rest of the tree.
+    pub cacheable_render_tasks: Vec<RenderTaskId>,
+    next_saved: SavedTargetIndex,
+    frame_id: FrameId,
+}
 
 /// Allows initializing a render task directly into the render task buffer.
 ///
 /// See utils::VecHelpers. RenderTask is fairly large so avoiding the move when
-/// pushing into the vector can save a lot of expensive memcpys on pages with many
+/// pushing into the vector can save a lot of exensive memcpys on pages with many
 /// render tasks.
 pub struct RenderTaskAllocation<'a> {
-    pub alloc: Allocation<'a, RenderTask>,
+    alloc: Allocation<'a, RenderTask>,
+    #[cfg(debug_assertions)]
+    frame_id: FrameId,
 }
 
 impl<'l> RenderTaskAllocation<'l> {
     #[inline(always)]
     pub fn init(self, value: RenderTask) -> RenderTaskId {
         RenderTaskId {
-            index: self.alloc.init(value) as u16,
+            index: self.alloc.init(value) as u32,
+            #[cfg(debug_assertions)]
+            frame_id: self.frame_id,
         }
     }
 }
 
-#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
-#[derive(MallocSizeOf)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct RenderTaskId {
-    pub index: u16,
-}
-
-impl RenderTaskId {
-    pub const INVALID: RenderTaskId = RenderTaskId {
-        index: u16::MAX,
-    };
-}
-
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-#[derive(Debug, Copy, Clone, Hash, Eq, PartialEq, PartialOrd, Ord)]
-pub struct PassId(usize);
-
-impl PassId {
-    pub const MIN: PassId = PassId(0);
-    pub const MAX: PassId = PassId(!0);
-}
-
-/// An internal representation of a dynamic surface that tasks can be
-/// allocated into. Maintains some extra metadata about each surface
-/// during the graph build.
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-struct Surface {
-    /// Whether this is a color or alpha render target
-    kind: RenderTargetKind,
-    /// Allocator for this surface texture
-    allocator: GuillotineAllocator,
-    /// We can only allocate into this for reuse if it's a shared surface
-    is_shared: bool,
-}
-
-impl Surface {
-    /// Allocate a rect within a shared surfce. Returns None if the
-    /// format doesn't match, or allocation fails.
-    fn alloc_rect(
-        &mut self,
-        size: DeviceIntSize,
-        kind: RenderTargetKind,
-        is_shared: bool,
-    ) -> Option<DeviceIntPoint> {
-        if self.kind == kind && self.is_shared == is_shared {
-            self.allocator
-                .allocate(&size)
-                .map(|(_slice, origin)| origin)
-        } else {
-            None
+impl RenderTaskGraph {
+    pub fn new(frame_id: FrameId, counters: &RenderTaskGraphCounters) -> Self {
+        // Preallocate a little more than what we needed in the previous frame so that small variations
+        // in the number of items don't cause us to constantly reallocate.
+        let extra_items = 8;
+        RenderTaskGraph {
+            tasks: Vec::with_capacity(counters.tasks_len + extra_items),
+            task_data: Vec::with_capacity(counters.task_data_len + extra_items),
+            cacheable_render_tasks: Vec::with_capacity(counters.cacheable_render_tasks_len + extra_items),
+            next_saved: SavedTargetIndex(0),
+            frame_id,
         }
     }
-}
-
-/// A sub-pass can draw to either a dynamic (temporary render target) surface,
-/// or a persistent surface (texture or picture cache).
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-#[derive(Debug)]
-pub enum SubPassSurface {
-    /// A temporary (intermediate) surface.
-    Dynamic {
-        /// The renderer texture id
-        texture_id: CacheTextureId,
-        /// Color / alpha render target
-        target_kind: RenderTargetKind,
-        /// The rectangle occupied by tasks in this surface. Used as a clear
-        /// optimization on some GPUs.
-        used_rect: DeviceIntRect,
-    },
-    Persistent {
-        /// Reference to the texture or picture cache surface being drawn to.
-        surface: StaticRenderTaskSurface,
-    },
-}
-
-/// A subpass is a specific render target, and a list of tasks to draw to it.
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct SubPass {
-    /// The surface this subpass draws to
-    pub surface: SubPassSurface,
-    /// The tasks assigned to this subpass.
-    pub task_ids: Vec<RenderTaskId>,
-}
-
-/// A pass expresses dependencies between tasks. Each pass consists of a number
-/// of subpasses.
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct Pass {
-    /// The tasks assigned to this render pass
-    pub task_ids: Vec<RenderTaskId>,
-    /// The subpasses that make up this dependency pass
-    pub sub_passes: Vec<SubPass>,
-    /// A list of intermediate surfaces that can be invalidated after
-    /// this pass completes.
-    pub textures_to_invalidate: Vec<CacheTextureId>,
-}
-
-/// The RenderTaskGraph is the immutable representation of the render task graph. It is
-/// built by the RenderTaskGraphBuilder, and is constructed once per frame.
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct RenderTaskGraph {
-    /// List of tasks added to the graph
-    pub tasks: Vec<RenderTask>,
-
-    /// The passes that were created, based on dependencies between tasks
-    pub passes: Vec<Pass>,
-
-    /// Current frame id, used for debug validation
-    frame_id: FrameId,
 
-    /// GPU specific data for each task that is made available to shaders
-    pub task_data: Vec<RenderTaskData>,
-
-    /// Total number of intermediate surfaces that will be drawn to, used for test validation.
-    #[cfg(test)]
-    surface_count: usize,
-
-    /// Total number of real allocated textures that will be drawn to, used for test validation.
-    #[cfg(test)]
-    unique_surfaces: FastHashSet<CacheTextureId>,
-}
-
-/// The persistent interface that is used during frame building to construct the
-/// frame graph.
-pub struct RenderTaskGraphBuilder {
-    /// List of tasks added to the builder
-    tasks: Vec<RenderTask>,
-
-    /// List of task roots
-    roots: FastHashSet<RenderTaskId>,
-
-    /// Input dependencies where the input is a persistent target,
-    /// rather than a specific render task id. Useful for expressing
-    /// when a task relies on a readback of a surface that is partially
-    /// drawn to.
-    target_inputs: Vec<(RenderTaskId, StaticRenderTaskSurface)>,
-
-    /// Current frame id, used for debug validation
-    frame_id: FrameId,
-
-    /// A list of texture surfaces that can be freed at the end of a pass. Retained
-    /// here to reduce heap allocations.
-    textures_to_free: FastHashSet<CacheTextureId>,
-
-    // Keep a map of `texture_id` to metadata about surfaces that are currently
-    // borrowed from the render target pool.
-    active_surfaces: FastHashMap<CacheTextureId, Surface>,
-
-    /// A temporary buffer used by assign_free_pass. Kept here to avoid heap reallocs
-    child_task_buffer: Vec<RenderTaskId>,
-}
-
-impl RenderTaskGraphBuilder {
-    /// Construct a new graph builder. Typically constructed once and maintained
-    /// over many frames, to avoid extra heap allocations where possible.
-    pub fn new() -> Self {
-        RenderTaskGraphBuilder {
-            tasks: Vec::new(),
-            roots: FastHashSet::default(),
-            target_inputs: Vec::new(),
-            frame_id: FrameId::INVALID,
-            textures_to_free: FastHashSet::default(),
-            active_surfaces: FastHashMap::default(),
-            child_task_buffer: Vec::new(),
+    pub fn counters(&self) -> RenderTaskGraphCounters {
+        RenderTaskGraphCounters {
+            tasks_len: self.tasks.len(),
+            task_data_len: self.task_data.len(),
+            cacheable_render_tasks_len: self.cacheable_render_tasks.len(),
         }
     }
 
-    pub fn frame_id(&self) -> FrameId {
-        self.frame_id
-    }
-
-    /// Begin a new frame
-    pub fn begin_frame(&mut self, frame_id: FrameId) {
-        self.frame_id = frame_id;
-        self.roots.clear();
-    }
-
-    /// Get immutable access to a task
-    // TODO(gw): There's only a couple of places that existing code needs to access
-    //           a task during the building step. Perhaps we can remove this?
-    pub fn get_task(
-        &self,
-        task_id: RenderTaskId,
-    ) -> &RenderTask {
-        &self.tasks[task_id.index as usize]
-    }
-
-    /// Get mutable access to a task
-    // TODO(gw): There's only a couple of places that existing code needs to access
-    //           a task during the building step. Perhaps we can remove this?
-    pub fn get_task_mut(
-        &mut self,
-        task_id: RenderTaskId,
-    ) -> &mut RenderTask {
-        &mut self.tasks[task_id.index as usize]
-    }
-
-    /// Add a new task to the graph.
     pub fn add(&mut self) -> RenderTaskAllocation {
-        // Assume every task is a root to start with
-        self.roots.insert(
-            RenderTaskId { index: self.tasks.len() as u16 }
-        );
-
         RenderTaskAllocation {
             alloc: self.tasks.alloc(),
+            #[cfg(debug_assertions)]
+            frame_id: self.frame_id,
         }
     }
 
-    /// Express a dependency, such that `task_id` depends on `input` as a texture source.
+    /// Express a render task dependency between a parent and child task.
+    /// This is used to assign tasks to render passes.
     pub fn add_dependency(
         &mut self,
-        task_id: RenderTaskId,
-        input: RenderTaskId,
+        parent_id: RenderTaskId,
+        child_id: RenderTaskId,
     ) {
-        self.tasks[task_id.index as usize].children.push(input);
-
-        // Once a task is an input, it's no longer a root
-        self.roots.remove(&input);
+        let parent = &mut self[parent_id];
+        parent.children.push(child_id);
     }
 
-    /// Register a persistent surface as an input dependency of a task (readback).
-    pub fn add_target_input(
+    /// Assign this frame's render tasks to render passes ordered so that passes appear
+    /// earlier than the ones that depend on them.
+    pub fn generate_passes(
         &mut self,
-        task_id: RenderTaskId,
-        target: StaticRenderTaskSurface,
-    ) {
-        self.target_inputs.push((task_id, target));
+        main_render_task: Option<RenderTaskId>,
+        screen_size: DeviceIntSize,
+        gpu_supports_fast_clears: bool,
+    ) -> Vec<RenderPass> {
+        profile_scope!("generate_passes");
+        let mut passes = Vec::new();
+
+        if !self.cacheable_render_tasks.is_empty() {
+            self.generate_passes_impl(
+                &self.cacheable_render_tasks[..],
+                screen_size,
+                gpu_supports_fast_clears,
+                false,
+                &mut passes,
+            );
+        }
+
+        if let Some(main_task) = main_render_task {
+            self.generate_passes_impl(
+                &[main_task],
+                screen_size,
+                gpu_supports_fast_clears,
+                true,
+                &mut passes,
+            );
+        }
+
+
+        self.resolve_target_conflicts(&mut passes);
+
+        passes
     }
 
-    /// End the graph building phase and produce the immutable task graph for this frame
-    pub fn end_frame(
-        &mut self,
-        resource_cache: &mut ResourceCache,
-        gpu_cache: &mut GpuCache,
-        deferred_resolves: &mut Vec<DeferredResolve>,
-    ) -> RenderTaskGraph {
-        // Copy the render tasks over to the immutable graph output
-        let task_count = self.tasks.len();
-        let tasks = mem::replace(
-            &mut self.tasks,
-            Vec::with_capacity(task_count),
-        );
-
-        let mut graph = RenderTaskGraph {
-            tasks,
-            passes: Vec::new(),
-            task_data: Vec::with_capacity(task_count),
-            frame_id: self.frame_id,
-            #[cfg(test)]
-            surface_count: 0,
-            #[cfg(test)]
-            unique_surfaces: FastHashSet::default(),
-        };
-
-        // Handle late mapping of dependencies on a specific persistent target.
-        // NOTE: This functionality isn't used by current callers of the frame graph, but
-        //       will be used in future (for example, to express readbacks of partially
-        //       rendered picture tiles for mix-blend-mode etc).
-        if !self.target_inputs.is_empty() {
-            // Create a mapping from persistent surface id -> render task root (used below):
-            let mut roots = FastHashMap::default();
-            roots.reserve(self.roots.len());
-            for root_id in &self.roots {
-                let task = &graph.tasks[root_id.index as usize];
-                match task.location {
-                    RenderTaskLocation::Static { ref surface, .. } => {
-                        // We should never encounter a graph where the same surface is a
-                        // render root more than one.
-                        assert!(!roots.contains_key(surface));
-                        roots.insert(surface.clone(), *root_id);
-                    }
-                    RenderTaskLocation::Dynamic { .. }
-                    | RenderTaskLocation::CacheRequest { .. }
-                    | RenderTaskLocation::Unallocated { .. } => {
-                        // Intermediate surfaces can't be render roots, they should always
-                        // be a dependency of a render root.
-                        panic!("bug: invalid root");
-                    }
-                }
+    /// Assign the render tasks from the tree rooted at root_task to render passes and
+    /// append them to the `passes` vector so that the passes that we depend on end up
+    /// _earlier_ in the pass list.
+    fn generate_passes_impl(
+        &self,
+        root_tasks: &[RenderTaskId],
+        screen_size: DeviceIntSize,
+        gpu_supports_fast_clears: bool,
+        for_main_framebuffer: bool,
+        passes: &mut Vec<RenderPass>,
+    ) {
+        // We recursively visit tasks from the roots (main and cached render tasks), to figure out
+        // which ones affect the frame and which passes they should be assigned to.
+        //
+        // We track the maximum depth of each task (how far it is from the roots) as well as the total
+        // maximum depth of the graph to determine each tasks' pass index. In a nutshell, depth 0 is
+        // for the last render pass (for example the main framebuffer), while the highest depth
+        // corresponds to the first pass.
+
+        fn assign_task_depth(
+            tasks: &[RenderTask],
+            task_id: RenderTaskId,
+            task_depth: i32,
+            task_max_depths: &mut [i32],
+            max_depth: &mut i32,
+        ) {
+            *max_depth = std::cmp::max(*max_depth, task_depth);
+
+            let task_max_depth = &mut task_max_depths[task_id.index as usize];
+            if task_depth > *task_max_depth {
+                *task_max_depth = task_depth;
+            } else {
+                // If this task has already been processed at a larger depth,
+                // there is no need to process it again.
+                return;
             }
-            assert_eq!(roots.len(), self.roots.len());
-
-            // Now resolve those dependencies on persistent targets and add them
-            // as a render task dependency.
-            for (task_id, target_id) in self.target_inputs.drain(..) {
-                match roots.get(&target_id) {
-                    Some(root_task_id) => {
-                        graph.tasks[task_id.index as usize].children.push(*root_task_id);
-                        self.roots.remove(root_task_id);
-                    }
-                    None => {
-                        println!("WARN: {:?} depends on root {:?} but it has no tasks!",
-                            task_id,
-                            target_id,
-                        );
-                    }
-                }
+
+            let task = &tasks[task_id.index as usize];
+            for child in &task.children {
+                assign_task_depth(
+                    tasks,
+                    *child,
+                    task_depth + 1,
+                    task_max_depths,
+                    max_depth,
+                );
             }
         }
 
-        // Two traversals of the graph are required. The first pass determines how many passes
-        // are required, and assigns render tasks a pass to be drawn on. The second pass determines
-        // when the last time a render task is used as an input, and assigns what pass the surface
-        // backing that render task can be freed (the surface is then returned to the render target
-        // pool and may be aliased / reused during subsequent passes).
-
-        let mut pass_count = 0;
-
-        // Traverse each root, and assign `render_on` for each task and count number of required passes
-        for root_id in &self.roots {
-            assign_render_pass(
-                *root_id,
-                PassId(0),
-                &mut graph,
-                &mut pass_count,
+        // The maximum depth of each task. Values that are still equal to -1 after recursively visiting
+        // the nodes correspond to tasks that don't contribute to the frame.
+        let mut task_max_depths = vec![-1; self.tasks.len()];
+        let mut max_depth = 0;
+
+        for root_task in root_tasks {
+            assign_task_depth(
+                &self.tasks,
+                *root_task,
+                0,
+                &mut task_max_depths,
+                &mut max_depth,
             );
         }
 
-        // Determine which pass each task can be freed on, which depends on which is
-        // the last task that has this as an input.
-        for i in 0 .. graph.tasks.len() {
-            let task_id = RenderTaskId { index: i as u16 };
-            assign_free_pass(
-                task_id,
-                &mut self.child_task_buffer,
-                &mut graph,
-            );
+        let offset = passes.len();
+
+        passes.reserve(max_depth as usize + 1);
+        for _ in 0..max_depth {
+            passes.alloc().init(RenderPass::new_off_screen(screen_size, gpu_supports_fast_clears));
         }
 
-        // Construct passes array for tasks to be assigned to below
-        for _ in 0 .. pass_count+1 {
-            graph.passes.push(Pass {
-                task_ids: Vec::new(),
-                sub_passes: Vec::new(),
-                textures_to_invalidate: Vec::new(),
-            });
+        if for_main_framebuffer {
+            passes.alloc().init(RenderPass::new_main_framebuffer(screen_size, gpu_supports_fast_clears));
+        } else {
+            passes.alloc().init(RenderPass::new_off_screen(screen_size, gpu_supports_fast_clears));
         }
 
-        // Assign tasks to each pass based on their `render_on` attribute
-        for (index, task) in graph.tasks.iter().enumerate() {
-            if task.kind.is_a_rendering_operation() {
-                let id = RenderTaskId { index: index as u16 };
-                graph.passes[task.render_on.0].task_ids.push(id);
+        // Assign tasks to their render passes.
+        for task_index in 0..self.tasks.len() {
+            if task_max_depths[task_index] < 0 {
+                // The task wasn't visited, it means it doesn't contribute to this frame.
+                continue;
             }
+            let pass_index = offset + (max_depth - task_max_depths[task_index]) as usize;
+            let task_id = RenderTaskId {
+                index: task_index as u32,
+                #[cfg(debug_assertions)]
+                frame_id: self.frame_id,
+            };
+            let task = &self.tasks[task_index];
+            passes[pass_index as usize].add_render_task(
+                task_id,
+                task.get_dynamic_size(),
+                task.target_kind(),
+                &task.location,
+            );
         }
+    }
 
-        // At this point, tasks are assigned to each dependency pass. Now we
-        // can go through each pass and create sub-passes, assigning each task
-        // to a target and destination rect.
-        assert!(self.active_surfaces.is_empty());
-
-        for (pass_id, pass) in graph.passes.iter_mut().enumerate().rev() {
-            assert!(self.textures_to_free.is_empty());
-
-            for task_id in &pass.task_ids {
-                let task = &mut graph.tasks[task_id.index as usize];
-
-                match task.location {
-                    RenderTaskLocation::Unallocated { size } => {
-                        let mut location = None;
-                        let kind = task.kind.target_kind();
-
-                        // Allow this render task to use a shared surface target if it
-                        // is freed straight after this pass. Tasks that must remain
-                        // allocated for inputs on subsequent passes are always assigned
-                        // to a standalone surface, to simplify lifetime management of
-                        // render targets.
-
-                        let can_use_shared_surface =
-                            task.render_on == PassId(task.free_after.0 + 1);
-
-                        if can_use_shared_surface {
-                            // If we can use a shared surface, step through the existing shared
-                            // surfaces for this subpass, and see if we can allocate the task
-                            // to one of these targets.
-                            for sub_pass in &mut pass.sub_passes {
-                                if let SubPassSurface::Dynamic { texture_id, ref mut used_rect, .. } = sub_pass.surface {
-                                    let surface = self.active_surfaces.get_mut(&texture_id).unwrap();
-                                    if let Some(p) = surface.alloc_rect(size, kind, true) {
-                                        location = Some((texture_id, p));
-                                        *used_rect = used_rect.union(&DeviceIntRect::new(p, size));
-                                        sub_pass.task_ids.push(*task_id);
-                                        break;
-                                    }
-                                }
-                            }
-                        }
-
-                        if location.is_none() {
-                            // If it wasn't possible to allocate the task to a shared surface, get a new
-                            // render target from the resource cache pool/
-
-                            // If this is a really large task, don't bother allocating it as a potential
-                            // shared surface for other tasks.
-
-                            let can_use_shared_surface = can_use_shared_surface &&
-                                size.width <= MAX_SHARED_SURFACE_SIZE &&
-                                size.height <= MAX_SHARED_SURFACE_SIZE;
-
-                            let surface_size = if can_use_shared_surface {
-                                DeviceIntSize::new(
-                                    MAX_SHARED_SURFACE_SIZE,
-                                    MAX_SHARED_SURFACE_SIZE,
-                                )
-                            } else {
-                                // Round up size here to avoid constant re-allocs during resizing
-                                DeviceIntSize::new(
-                                    (size.width + TEXTURE_DIMENSION_MASK) & !TEXTURE_DIMENSION_MASK,
-                                    (size.height + TEXTURE_DIMENSION_MASK) & !TEXTURE_DIMENSION_MASK,
-                                )
-                            };
-
-                            let format = match kind {
-                                RenderTargetKind::Color => ImageFormat::RGBA8,
-                                RenderTargetKind::Alpha => ImageFormat::R8,
-                            };
-
-                            // Get render target of appropriate size and format from resource cache
-                            let texture_id = resource_cache.get_or_create_render_target_from_pool(
-                                surface_size,
-                                format,
-                            );
-
-                            // Allocate metadata we need about this surface while it's active
-                            let mut surface = Surface {
-                                kind,
-                                allocator: GuillotineAllocator::new(Some(surface_size)),
-                                is_shared: can_use_shared_surface,
-                            };
-
-                            // Allocation of the task must fit in this new surface!
-                            let p = surface.alloc_rect(
-                                size,
-                                kind,
-                                can_use_shared_surface,
-                            ).expect("bug: alloc must succeed!");
-
-                            location = Some((texture_id, p));
-
-                            // Store the metadata about this newly active surface. We should never
-                            // get a target surface with the same texture_id as a currently active surface.
-                            let _prev_surface = self.active_surfaces.insert(texture_id, surface);
-                            assert!(_prev_surface.is_none());
-
-                            // Store some information about surface allocations if in test mode
-                            #[cfg(test)]
-                            {
-                                graph.surface_count += 1;
-                                graph.unique_surfaces.insert(texture_id);
-                            }
-
-                            // Add the target as a new subpass for this render pass.
-                            pass.sub_passes.push(SubPass {
-                                surface: SubPassSurface::Dynamic {
-                                    texture_id,
-                                    target_kind: kind,
-                                    used_rect: DeviceIntRect::new(p, size),
-                                },
-                                task_ids: vec![*task_id],
-                            });
-                        }
-
-                        // By now, we must have allocated a surface and rect for this task, so assign it!
-                        assert!(location.is_some());
-                        task.location = RenderTaskLocation::Dynamic {
-                            texture_id: location.unwrap().0,
-                            rect: DeviceIntRect::new(location.unwrap().1, size),
-                        };
-                    }
-                    RenderTaskLocation::Static { ref surface, .. } => {
-                        // No need to allocate for this surface, since it's a persistent
-                        // target. Instead, just create a new sub-pass for it.
-                        pass.sub_passes.push(SubPass {
-                            surface: SubPassSurface::Persistent {
-                                surface: surface.clone(),
-                            },
-                            task_ids: vec![*task_id],
-                        });
-                    }
-                    RenderTaskLocation::CacheRequest { .. } => {
-                        // No need to allocate nor to create a sub-path for read-only locations.
-                    }
-                    RenderTaskLocation::Dynamic { .. } => {
-                        // Dynamic tasks shouldn't be allocated by this point
-                        panic!("bug: encountered an already allocated task");
-                    }
-                }
-
-                // Return the shared surfaces from this pass
-                let task = &graph.tasks[task_id.index as usize];
-                for child_id in &task.children {
-                    let child_task = &graph.tasks[child_id.index as usize];
-                    match child_task.location {
-                        RenderTaskLocation::Unallocated { .. } => panic!("bug: must be allocated"),
-                        RenderTaskLocation::Dynamic { texture_id, .. } => {
-                            // If this task can be freed after this pass, include it in the
-                            // unique set of textures to be returned to the render target pool below.
-                            if child_task.free_after == PassId(pass_id) {
-                                self.textures_to_free.insert(texture_id);
-                            }
-                        }
-                        RenderTaskLocation::Static { .. } => {}
-                        RenderTaskLocation::CacheRequest { .. } => {}
-                    }
-                }
+    /// Resolve conflicts between the generated passes and the limitiations of our target
+    /// allocation scheme.
+    ///
+    /// The render task graph operates with a ping-pong target allocation scheme where
+    /// a set of targets is written to by even passes and a different set of targets is
+    /// written to by odd passes.
+    /// Since tasks cannot read and write the same target, we can run into issues if a
+    /// task pass in N + 2 reads the result of a task in pass N.
+    /// To avoid such cases have to insert blit tasks to copy the content of the task
+    /// into pass N + 1 which is readable by pass N + 2.
+    ///
+    /// In addition, allocated rects of pass N are currently not tracked and can be
+    /// overwritten by allocations in later passes on the same target, unless the task
+    /// has been marked for saving, which perserves the allocated rect until the end of
+    /// the frame. This is a big hammer, hopefully we won't need to mark many passes
+    /// for saving. A better solution would be to track allocations through the entire
+    /// graph, there is a prototype of that in https://github.com/nical/toy-render-graph/
+    fn resolve_target_conflicts(&mut self, passes: &mut [RenderPass]) {
+        // Keep track of blit tasks we inserted to avoid adding several blits for the same
+        // task.
+        let mut task_redirects = vec![None; self.tasks.len()];
+
+        let mut task_passes = vec![-1; self.tasks.len()];
+        for pass_index in 0..passes.len() {
+            for task in &passes[pass_index].tasks {
+                task_passes[task.index as usize] = pass_index as i32;
             }
+        }
 
-            // Return no longer used textures to the pool, so that they can be reused / aliased
-            // by later passes.
-            for texture_id in self.textures_to_free.drain() {
-                resource_cache.return_render_target_to_pool(texture_id);
-                self.active_surfaces.remove(&texture_id).unwrap();
-                pass.textures_to_invalidate.push(texture_id);
+        for task_index in 0..self.tasks.len() {
+            if task_passes[task_index] < 0 {
+                // The task doesn't contribute to this frame.
+                continue;
             }
-        }
 
-        // By now, all surfaces that were borrowed from the render target pool must
-        // be returned to the resource cache, or we are leaking intermediate surfaces!
-        assert!(self.active_surfaces.is_empty());
-
-        // Each task is now allocated to a surface and target rect. Write that to the
-        // GPU blocks and task_data. After this point, the graph is returned and is
-        // considered to be immutable for the rest of the frame building process.
-
-        for task in &mut graph.tasks {
-            // First check whether the render task texture and uv rects are managed
-            // externally. This is the case for image tasks and cached tasks. In both
-            // cases it results in a finding the information in the texture cache.
-            let cache_item = if let Some(ref cache_handle) = task.cache_handle {
-                Some(resolve_cached_render_task(
-                    cache_handle,
-                    resource_cache,
-                ))
-            } else if let RenderTaskKind::Image(request) = &task.kind {
-                Some(resolve_image(
-                    *request,
-                    resource_cache,
-                    gpu_cache,
-                    deferred_resolves,
-                ))
-            } else {
-                // General case (non-cached non-image tasks).
-                None
-            };
+            let pass_index = task_passes[task_index];
+
+            // Go through each dependency and check whether they belong
+            // to a pass that uses the same targets and/or are more than
+            // one pass behind.
+            for nth_child in 0..self.tasks[task_index].children.len() {
+                let child_task_index = self.tasks[task_index].children[nth_child].index as usize;
+                let child_pass_index = task_passes[child_task_index];
+
+                if child_pass_index == pass_index - 1 {
+                    // This should be the most common case.
+                    continue;
+                }
 
-            if let Some(cache_item) = cache_item {
-                // Update the render task even if the item is invalid.
-                // We'll handle it later and it's easier to not have to
-                // deal with unexpected location variants like
-                // RenderTaskLocation::CacheRequest when we do.
-                let source = cache_item.texture_id;
-                task.uv_rect_handle = cache_item.uv_rect_handle;
-                task.location = RenderTaskLocation::Static {
-                    surface: StaticRenderTaskSurface::ReadOnly { source },
-                    rect: cache_item.uv_rect,
+                // TODO: Picture tasks don't support having their dependency tasks redirected.
+                // Pictures store their respective render task(s) on their SurfaceInfo.
+                // We cannot blit the picture task here because we would need to update the
+                // surface's render tasks, but we don't have access to that info here.
+                // Also a surface may be expecting a picture task and not a blit task, so
+                // even if we could update the surface's render task(s), it might cause other issues.
+                // For now we mark the task to be saved rather than trying to redirect to a blit task.
+                let task_is_picture = if let RenderTaskKind::Picture(..) = self.tasks[task_index].kind {
+                    true
+                } else {
+                    false
                 };
-            }
-            // Give the render task an opportunity to add any
-            // information to the GPU cache, if appropriate.
-            let target_rect = task.get_target_rect();
 
-            task.write_gpu_blocks(
-                target_rect,
-                gpu_cache,
-            );
+                if child_pass_index % 2 != pass_index % 2 || task_is_picture {
+                    // The tasks and its dependency aren't on the same targets,
+                    // but the dependency needs to be kept alive.
+                    self.tasks[child_task_index].mark_for_saving();
+                    continue;
+                }
 
-            graph.task_data.push(
-                task.kind.write_task_data(target_rect)
-            );
-        }
+                if let Some(blit_id) = task_redirects[child_task_index] {
+                    // We already resolved a similar conflict with a blit task,
+                    // reuse the same blit instead of creating a new one.
+                    self.tasks[task_index].children[nth_child] = blit_id;
 
-        graph
-    }
-}
+                    // Mark for saving if the blit is more than pass appart from
+                    // our task.
+                    if child_pass_index < pass_index - 2 {
+                        self.tasks[blit_id.index as usize].mark_for_saving();
+                    }
 
-impl RenderTaskGraph {
-    /// Print the render task graph to console
-    #[allow(dead_code)]
-    pub fn print(
-        &self,
-    ) {
-        println!("-- RenderTaskGraph --");
-
-        for (i, task) in self.tasks.iter().enumerate() {
-            println!("Task {}: render_on={} free_after={} {:?}",
-                i,
-                task.render_on.0,
-                task.free_after.0,
-                task.kind.as_str(),
-            );
-        }
+                    continue;
+                }
+
+                // Our dependency is an even number of passes behind, need
+                // to insert a blit to ensure we don't read and write from
+                // the same target.
 
-        for (p, pass) in self.passes.iter().enumerate() {
-            println!("Pass {}:", p);
+                let child_task_id = RenderTaskId {
+                    index: child_task_index as u32,
+                    #[cfg(debug_assertions)]
+                    frame_id: self.frame_id,
+                };
 
-            for (s, sub_pass) in pass.sub_passes.iter().enumerate() {
-                println!("\tSubPass {}: {:?}",
-                    s,
-                    sub_pass.surface,
+                let mut blit = RenderTask::new_blit(
+                    self.tasks[child_task_index].location.size(),
+                    BlitSource::RenderTask { task_id: child_task_id },
                 );
 
-                for task_id in &sub_pass.task_ids {
-                    println!("\t\tTask {:?}", task_id.index);
+                // Mark for saving if the blit is more than pass appart from
+                // our task.
+                if child_pass_index < pass_index - 2 {
+                    blit.mark_for_saving();
                 }
+
+                let blit_id = RenderTaskId {
+                    index: self.tasks.len() as u32,
+                    #[cfg(debug_assertions)]
+                    frame_id: self.frame_id,
+                };
+
+                self.tasks.alloc().init(blit);
+
+                passes[child_pass_index as usize + 1].tasks.push(blit_id);
+
+                self.tasks[task_index].children[nth_child] = blit_id;
+                task_redirects[child_task_index] = Some(blit_id);
             }
         }
     }
 
-    pub fn resolve_location(
-        &self,
-        task_id: impl Into<Option<RenderTaskId>>,
-        gpu_cache: &GpuCache,
-    ) -> Option<(GpuCacheAddress, TextureSource)> {
-        self.resolve_impl(task_id.into()?, gpu_cache)
+    pub fn get_task_address(&self, id: RenderTaskId) -> RenderTaskAddress {
+        #[cfg(all(debug_assertions, not(feature = "replay")))]
+        debug_assert_eq!(self.frame_id, id.frame_id);
+        RenderTaskAddress(id.index as u16)
     }
 
-    fn resolve_impl(
-        &self,
-        task_id: RenderTaskId,
-        gpu_cache: &GpuCache,
-    ) -> Option<(GpuCacheAddress, TextureSource)> {
-        let task = &self[task_id];
-        let texture_source = task.get_texture_source();
-
-        if let TextureSource::Invalid = texture_source {
-            return None;
+    pub fn write_task_data(&mut self) {
+        profile_scope!("write_task_data");
+        for task in &self.tasks {
+            self.task_data.push(task.write_task_data());
         }
-
-        let uv_address = task.get_texture_address(gpu_cache);
-
-        Some((uv_address, texture_source))
     }
 
-
-    /// Return the surface and texture counts, used for testing
-    #[cfg(test)]
-    pub fn surface_counts(&self) -> (usize, usize) {
-        (self.surface_count, self.unique_surfaces.len())
+    pub fn save_target(&mut self) -> SavedTargetIndex {
+        let id = self.next_saved;
+        self.next_saved.0 += 1;
+        id
     }
 
-    /// Return current frame id, used for validation
     #[cfg(debug_assertions)]
     pub fn frame_id(&self) -> FrameId {
         self.frame_id
     }
 }
 
-/// Batching uses index access to read information about tasks
 impl std::ops::Index<RenderTaskId> for RenderTaskGraph {
     type Output = RenderTask;
     fn index(&self, id: RenderTaskId) -> &RenderTask {
+        #[cfg(all(debug_assertions, not(feature = "replay")))]
+        debug_assert_eq!(self.frame_id, id.frame_id);
         &self.tasks[id.index as usize]
     }
 }
 
-/// Recursive helper to assign pass that a task should render on
-fn assign_render_pass(
-    id: RenderTaskId,
-    pass: PassId,
-    graph: &mut RenderTaskGraph,
-    pass_count: &mut usize,
-) {
-    let task = &mut graph.tasks[id.index as usize];
-
-    // No point in recursing into paths in the graph if this task already
-    // has been set to draw after this pass.
-    if task.render_on > pass {
-        return;
+impl std::ops::IndexMut<RenderTaskId> for RenderTaskGraph {
+    fn index_mut(&mut self, id: RenderTaskId) -> &mut RenderTask {
+        #[cfg(all(debug_assertions, not(feature = "replay")))]
+        debug_assert_eq!(self.frame_id, id.frame_id);
+        &mut self.tasks[id.index as usize]
     }
+}
 
-    let next_pass = if task.kind.is_a_rendering_operation() {
-        // Keep count of number of passes needed
-        *pass_count = pass.0.max(*pass_count);
-        PassId(pass.0 + 1)
-    } else {
-        // If the node is not a rendering operation, it doesn't create a
-        // render pass, so we don't increment the pass count. 
-        // For now we expect non-rendering nodes to be leafs of the graph.
-        // We don't strictly depend on it but it simplifies the mental model.
-        debug_assert!(task.children.is_empty());
-        pass
-    };
+#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub struct RenderTaskId {
+    pub index: u32,
 
-    // A task should be rendered on the earliest pass in the dependency
-    // graph that it's required. Using max here ensures the correct value
-    // in the presence of multiple paths to this task from the root(s).
-    task.render_on = task.render_on.max(pass);
-
-    // TODO(gw): Work around the borrowck - maybe we could structure the dependencies
-    //           storage better, to avoid this?
-    let mut child_task_ids: SmallVec<[RenderTaskId; 8]> = SmallVec::new();
-    child_task_ids.extend_from_slice(&task.children);
-
-    for child_id in child_task_ids {
-        assign_render_pass(
-            child_id,
-            next_pass,
-            graph,
-            pass_count,
-        );
-    }
+    #[cfg(debug_assertions)]
+    #[cfg_attr(feature = "replay", serde(default = "FrameId::first"))]
+    frame_id: FrameId,
 }
 
-fn assign_free_pass(
-    id: RenderTaskId,
-    child_task_buffer: &mut Vec<RenderTaskId>,
-    graph: &mut RenderTaskGraph,
-) {
-    let task = &graph.tasks[id.index as usize];
-    let render_on = task.render_on;
-    debug_assert!(child_task_buffer.is_empty());
-
-    // TODO(gw): Work around the borrowck - maybe we could structure the dependencies
-    //           storage better, to avoid this?
-    child_task_buffer.extend_from_slice(&task.children);
-
-    for child_id in child_task_buffer.drain(..) {
-        let child_task = &mut graph.tasks[child_id.index as usize];
-
-        // Each dynamic child task can free its backing surface after the last
-        // task that references it as an input. Using min here ensures the
-        // safe time to free this surface in the presence of multiple paths
-        // to this task from the root(s).
-        match child_task.location {
-            RenderTaskLocation::CacheRequest { .. } => {}
-            RenderTaskLocation::Static { .. } => {
-                // never get freed anyway, so can leave untouched
-                // (could validate that they remain at PassId::MIN)
-            }
-            RenderTaskLocation::Unallocated { .. } => {
-                child_task.free_after = child_task.free_after.min(render_on);
-            }
-            RenderTaskLocation::Dynamic { .. } => {
-                panic!("bug: should not be allocated yet");
-            }
+#[derive(Debug)]
+pub struct RenderTaskGraphCounters {
+    tasks_len: usize,
+    task_data_len: usize,
+    cacheable_render_tasks_len: usize,
+}
+
+impl RenderTaskGraphCounters {
+    pub fn new() -> Self {
+        RenderTaskGraphCounters {
+            tasks_len: 0,
+            task_data_len: 0,
+            cacheable_render_tasks_len: 0,
         }
     }
 }
 
+impl RenderTaskId {
+    pub const INVALID: RenderTaskId = RenderTaskId {
+        index: u32::MAX,
+        #[cfg(debug_assertions)]
+        frame_id: FrameId::INVALID,
+    };
+}
+
+/// Contains the set of `RenderTarget`s specific to the kind of pass.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum RenderPassKind {
+    /// The final pass to the main frame buffer, where we have a single color
+    /// target for display to the user.
+    MainFramebuffer {
+        main_target: ColorRenderTarget,
+    },
+    /// An intermediate pass, where we may have multiple targets.
+    OffScreen {
+        alpha: RenderTargetList<AlphaRenderTarget>,
+        color: RenderTargetList<ColorRenderTarget>,
+        texture_cache: FastHashMap<(CacheTextureId, usize), TextureCacheRenderTarget>,
+        picture_cache: Vec<PictureCacheTarget>,
+    },
+}
+
 /// A render pass represents a set of rendering operations that don't depend on one
 /// another.
 ///
@@ -811,35 +451,90 @@ fn assign_free_pass(
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct RenderPass {
-    /// The subpasses that describe targets being rendered to in this pass
-    pub alpha: RenderTargetList<AlphaRenderTarget>,
-    pub color: RenderTargetList<ColorRenderTarget>,
-    pub texture_cache: FastHashMap<CacheTextureId, TextureCacheRenderTarget>,
-    pub picture_cache: Vec<PictureCacheTarget>,
-    pub textures_to_invalidate: Vec<CacheTextureId>,
+    /// The kind of pass, as well as the set of targets associated with that
+    /// kind of pass.
+    pub kind: RenderPassKind,
+    /// The set of tasks to be performed in this pass, as indices into the
+    /// `RenderTaskGraph`.
+    pub tasks: Vec<RenderTaskId>,
+    /// Screen size in device pixels - used for opaque alpha batch break threshold.
+    pub screen_size: DeviceIntSize,
 }
 
 impl RenderPass {
+    /// Creates a pass for the main framebuffer. There is only one of these, and
+    /// it is always the last pass.
+    pub fn new_main_framebuffer(
+        screen_size: DeviceIntSize,
+        gpu_supports_fast_clears: bool,
+    ) -> Self {
+        let main_target = ColorRenderTarget::new(screen_size, gpu_supports_fast_clears);
+        RenderPass {
+            kind: RenderPassKind::MainFramebuffer {
+                main_target,
+            },
+            tasks: vec![],
+            screen_size,
+        }
+    }
+
     /// Creates an intermediate off-screen pass.
-    pub fn new(src: &Pass) -> Self {
+    pub fn new_off_screen(
+        screen_size: DeviceIntSize,
+        gpu_supports_fast_clears: bool,
+    ) -> Self {
         RenderPass {
-            color: RenderTargetList::new(
-                ImageFormat::RGBA8,
-            ),
-            alpha: RenderTargetList::new(
-                ImageFormat::R8,
-            ),
-            texture_cache: FastHashMap::default(),
-            picture_cache: Vec::new(),
-            textures_to_invalidate: src.textures_to_invalidate.clone(),
+            kind: RenderPassKind::OffScreen {
+                color: RenderTargetList::new(
+                    screen_size,
+                    ImageFormat::RGBA8,
+                    gpu_supports_fast_clears,
+                ),
+                alpha: RenderTargetList::new(
+                    screen_size,
+                    ImageFormat::R8,
+                    gpu_supports_fast_clears,
+                ),
+                texture_cache: FastHashMap::default(),
+                picture_cache: Vec::new(),
+            },
+            tasks: vec![],
+            screen_size,
+        }
+    }
+
+    /// Adds a task to this pass.
+    pub fn add_render_task(
+        &mut self,
+        task_id: RenderTaskId,
+        size: DeviceIntSize,
+        target_kind: RenderTargetKind,
+        location: &RenderTaskLocation,
+    ) {
+        if let RenderPassKind::OffScreen { ref mut color, ref mut alpha, .. } = self.kind {
+            // If this will be rendered to a dynamically-allocated region on an
+            // off-screen render target, update the max-encountered size. We don't
+            // need to do this for things drawn to the texture cache, since those
+            // don't affect our render target allocation.
+            if location.is_dynamic() {
+                let max_size = match target_kind {
+                    RenderTargetKind::Color => &mut color.max_dynamic_size,
+                    RenderTargetKind::Alpha => &mut alpha.max_dynamic_size,
+                };
+                max_size.width = cmp::max(max_size.width, size.width);
+                max_size.height = cmp::max(max_size.height, size.height);
+            }
         }
+
+        self.tasks.push(task_id);
     }
 }
 
 // Dump an SVG visualization of the render graph for debugging purposes
-#[cfg(feature = "capture")]
+#[allow(dead_code)]
 pub fn dump_render_tasks_as_svg(
     render_tasks: &RenderTaskGraph,
+    passes: &[RenderPass],
     output: &mut dyn std::io::Write,
 ) -> std::io::Result<()> {
     use svg_fmt::*;
@@ -864,10 +559,10 @@ pub fn dump_render_tasks_as_svg(
         size: Text,
     }
 
-    for pass in render_tasks.passes.iter().rev() {
+    for pass in passes {
         let mut layout = VerticalLayout::new(x, margin, node_width);
 
-        for task_id in &pass.task_ids {
+        for task_id in &pass.tasks {
             let task_index = task_id.index as usize;
             let task = &render_tasks.tasks[task_index];
 
@@ -876,7 +571,8 @@ pub fn dump_render_tasks_as_svg(
             let tx = rect.x + rect.w / 2.0;
             let ty = rect.y + 10.0;
 
-            let label = text(tx, ty, format!("{}", task.kind.as_str()));
+            let saved = if task.saved_index.is_some() { " (Saved)" } else { "" };
+            let label = text(tx, ty, format!("{}{}", task.kind.as_str(), saved));
             let size = text(tx, ty + 12.0, format!("{:?}", task.location.size()));
 
             nodes[task_index] = Some(Node { rect, label, size });
@@ -1017,266 +713,175 @@ fn dump_task_dependency_link(
     }
 }
 
-/// Construct a picture cache render task location for testing
-#[cfg(test)]
-fn pc_target(
-    surface_id: u64,
-    tile_x: i32,
-    tile_y: i32,
-) -> RenderTaskLocation {
-    use crate::{
-        composite::{NativeSurfaceId, NativeTileId},
-        picture::ResolvedSurfaceTexture,
-    };
-
-    let width = 512;
-    let height = 512;
-
-    RenderTaskLocation::Static {
-        surface: StaticRenderTaskSurface::PictureCache {
-            surface: ResolvedSurfaceTexture::Native {
-                id: NativeTileId {
-                    surface_id: NativeSurfaceId(surface_id),
-                    x: tile_x,
-                    y: tile_y,
-                },
-                size: DeviceIntSize::new(width, height),
-            },
-        },
-        rect: DeviceIntSize::new(width, height).into(),
-    }
-}
-
 #[cfg(test)]
-impl RenderTaskGraphBuilder {
-    fn test_expect(
-        mut self,
-        pass_count: usize,
-        total_surface_count: usize,
-        unique_surfaces: &[(i32, i32, ImageFormat)],
-    ) {
-        use crate::render_backend::FrameStamp;
-        use api::{DocumentId, IdNamespace};
-
-        let mut rc = ResourceCache::new_for_testing();
-        let mut gc =  GpuCache::new();
-
-        let mut frame_stamp = FrameStamp::first(DocumentId::new(IdNamespace(1), 1));
-        frame_stamp.advance();
-        gc.prepare_for_frames();
-        gc.begin_frame(frame_stamp);
-
-        let g = self.end_frame(&mut rc, &mut gc, &mut Vec::new());
-        g.print();
-
-        assert_eq!(g.passes.len(), pass_count);
-        assert_eq!(g.surface_counts(), (total_surface_count, unique_surfaces.len()));
-
-        rc.validate_surfaces(unique_surfaces);
-    }
-}
-
-/// Construct a testing render task with given location
+use euclid::{size2, rect};
 #[cfg(test)]
-fn task_location(location: RenderTaskLocation) -> RenderTask {
-    RenderTask::new_test(
-        location,
-        RenderTargetKind::Color,
-    )
-}
+use smallvec::SmallVec;
 
-/// Construct a dynamic render task location for testing
 #[cfg(test)]
-fn task_dynamic(size: i32) -> RenderTask {
-    RenderTask::new_test(
-        RenderTaskLocation::Unallocated { size: DeviceIntSize::new(size, size) },
-        RenderTargetKind::Color,
-    )
+fn dyn_location(w: i32, h: i32) -> RenderTaskLocation {
+    RenderTaskLocation::Dynamic(None, size2(w, h))
 }
 
 #[test]
-fn fg_test_1() {
-    // Test that a root target can be used as an input for readbacks
-    // This functionality isn't currently used, but will be in future.
+fn diamond_task_graph() {
+    // A simple diamon shaped task graph.
+    //
+    //     [b1]
+    //    /    \
+    // [a]      [main_pic]
+    //    \    /
+    //     [b2]
 
-    let mut gb = RenderTaskGraphBuilder::new();
+    let color = RenderTargetKind::Color;
 
-    let root_target = pc_target(0, 0, 0);
+    let counters = RenderTaskGraphCounters::new();
+    let mut tasks = RenderTaskGraph::new(FrameId::first(), &counters);
 
-    let root = gb.add().init(task_location(root_target.clone()));
+    let a = tasks.add().init(RenderTask::new_test(color, dyn_location(640, 640), SmallVec::new()));
+    let b1 = tasks.add().init(RenderTask::new_test(color, dyn_location(320, 320), smallvec![a]));
+    let b2 = tasks.add().init(RenderTask::new_test(color, dyn_location(320, 320), smallvec![a]));
 
-    let readback = gb.add().init(task_dynamic(100));
-    gb.add_dependency(readback, root);
+    let main_pic = tasks.add().init(RenderTask::new_test(
+        color,
+        RenderTaskLocation::Fixed(rect(0, 0, 3200, 1800)),
+        smallvec![b1, b2],
+    ));
 
-    let mix_blend_content = gb.add().init(task_dynamic(50));
+    let initial_number_of_tasks = tasks.tasks.len();
 
-    let content = gb.add().init(task_location(root_target));
-    gb.add_dependency(content, readback);
-    gb.add_dependency(content, mix_blend_content);
+    let passes = tasks.generate_passes(Some(main_pic), size2(3200, 1800), true);
 
-    gb.test_expect(3, 1, &[
-        (2048, 2048, ImageFormat::RGBA8),
-    ]);
-}
+    // We should not have added any blits.
+    assert_eq!(tasks.tasks.len(), initial_number_of_tasks);
 
-#[test]
-fn fg_test_2() {
-    // Test that texture cache tasks can be added and scheduled correctly as inputs
-    // to picture cache tasks. Ensure that no dynamic surfaces are allocated from the
-    // target pool in this case.
+    assert_eq!(passes.len(), 3);
+    assert_eq!(passes[0].tasks, vec![a]);
 
-    let mut gb = RenderTaskGraphBuilder::new();
+    assert_eq!(passes[1].tasks.len(), 2);
+    assert!(passes[1].tasks.contains(&b1));
+    assert!(passes[1].tasks.contains(&b2));
 
-    let pc_root = gb.add().init(task_location(pc_target(0, 0, 0)));
+    assert_eq!(passes[2].tasks, vec![main_pic]);
+}
 
-    let tc_0 = StaticRenderTaskSurface::TextureCache {
-        texture: CacheTextureId(0),
-        target_kind: RenderTargetKind::Color,
-    };
+#[test]
+fn blur_task_graph() {
+    // This test simulates a complicated shadow stack effect with target allocation
+    // conflicts to resolve.
 
-    let tc_1 = StaticRenderTaskSurface::TextureCache {
-        texture: CacheTextureId(1),
-        target_kind: RenderTargetKind::Color,
-    };
+    let color = RenderTargetKind::Color;
 
-    gb.add_target_input(
-        pc_root,
-        tc_0.clone(),
-    );
+    let counters = RenderTaskGraphCounters::new();
+    let mut tasks = RenderTaskGraph::new(FrameId::first(), &counters);
 
-    gb.add_target_input(
-        pc_root,
-        tc_1.clone(),
-    );
+    let pic = tasks.add().init(RenderTask::new_test(color, dyn_location(640, 640), SmallVec::new()));
+    let scale1 = tasks.add().init(RenderTask::new_test(color, dyn_location(320, 320), smallvec![pic]));
+    let scale2 = tasks.add().init(RenderTask::new_test(color, dyn_location(160, 160), smallvec![scale1]));
+    let scale3 = tasks.add().init(RenderTask::new_test(color, dyn_location(80, 80), smallvec![scale2]));
+    let scale4 = tasks.add().init(RenderTask::new_test(color, dyn_location(40, 40), smallvec![scale3]));
 
-    gb.add().init(
-        task_location(RenderTaskLocation::Static { surface: tc_0.clone(), rect: DeviceIntSize::new(128, 128).into() }),
-    );
+    let vblur1 = tasks.add().init(RenderTask::new_test(color, dyn_location(40, 40), smallvec![scale4]));
+    let hblur1 = tasks.add().init(RenderTask::new_test(color, dyn_location(40, 40), smallvec![vblur1]));
 
-    gb.add().init(
-        task_location(RenderTaskLocation::Static { surface: tc_1.clone(), rect: DeviceIntSize::new(128, 128).into() }),
-    );
+    let vblur2 = tasks.add().init(RenderTask::new_test(color, dyn_location(40, 40), smallvec![scale4]));
+    let hblur2 = tasks.add().init(RenderTask::new_test(color, dyn_location(40, 40), smallvec![vblur2]));
 
-    gb.test_expect(2, 0, &[]);
-}
+    // Insert a task that is an even number of passes away from its dependency.
+    // This means the source and destination are on the same target and we have to resolve
+    // this conflict by automatically inserting a blit task.
+    let vblur3 = tasks.add().init(RenderTask::new_test(color, dyn_location(80, 80), smallvec![scale3]));
+    let hblur3 = tasks.add().init(RenderTask::new_test(color, dyn_location(80, 80), smallvec![vblur3]));
 
-#[test]
-fn fg_test_3() {
-    // Test that small targets are allocated in a shared surface, and that large
-    // tasks are allocated in a rounded up texture size.
+    // Insert a task that is an odd number > 1 of passes away from its dependency.
+    // This should force us to mark the dependency "for saving" to keep its content valid
+    // until the task can access it.
+    let vblur4 = tasks.add().init(RenderTask::new_test(color, dyn_location(160, 160), smallvec![scale2]));
+    let hblur4 = tasks.add().init(RenderTask::new_test(color, dyn_location(160, 160), smallvec![vblur4]));
 
-    let mut gb = RenderTaskGraphBuilder::new();
+    let main_pic = tasks.add().init(RenderTask::new_test(
+        color,
+        RenderTaskLocation::Fixed(rect(0, 0, 3200, 1800)),
+        smallvec![hblur1, hblur2, hblur3, hblur4],
+    ));
 
-    let pc_root = gb.add().init(task_location(pc_target(0, 0, 0)));
+    let initial_number_of_tasks = tasks.tasks.len();
 
-    let child_pic_0 = gb.add().init(task_dynamic(128));
-    let child_pic_1 = gb.add().init(task_dynamic(3000));
+    let passes = tasks.generate_passes(Some(main_pic), size2(3200, 1800), true);
 
-    gb.add_dependency(pc_root, child_pic_0);
-    gb.add_dependency(pc_root, child_pic_1);
+    // We should have added a single blit task.
+    assert_eq!(tasks.tasks.len(), initial_number_of_tasks + 1);
 
-    gb.test_expect(2, 2, &[
-        (2048, 2048, ImageFormat::RGBA8),
-        (3072, 3072, ImageFormat::RGBA8),
-    ]);
-}
+    // vblur3's dependency to scale3 should be replaced by a blit.
+    let blit = tasks[vblur3].children[0];
+    assert!(blit != scale3);
 
-#[test]
-fn fg_test_4() {
-    // Test that for a simple dependency chain of tasks, that render
-    // target surfaces are aliased and reused between passes where possible.
+    match tasks[blit].kind {
+        RenderTaskKind::Blit(..) => {}
+        _ => { panic!("This should be a blit task."); }
+    }
 
-    let mut gb = RenderTaskGraphBuilder::new();
+    assert_eq!(passes.len(), 8);
 
-    let pc_root = gb.add().init(task_location(pc_target(0, 0, 0)));
+    assert_eq!(passes[0].tasks, vec![pic]);
+    assert_eq!(passes[1].tasks, vec![scale1]);
+    assert_eq!(passes[2].tasks, vec![scale2]);
+    assert_eq!(passes[3].tasks, vec![scale3]);
 
-    let child_pic_0 = gb.add().init(task_dynamic(128));
-    let child_pic_1 = gb.add().init(task_dynamic(128));
-    let child_pic_2 = gb.add().init(task_dynamic(128));
+    assert_eq!(passes[4].tasks.len(), 2);
+    assert!(passes[4].tasks.contains(&scale4));
+    assert!(passes[4].tasks.contains(&blit));
 
-    gb.add_dependency(pc_root, child_pic_0);
-    gb.add_dependency(child_pic_0, child_pic_1);
-    gb.add_dependency(child_pic_1, child_pic_2);
+    assert_eq!(passes[5].tasks.len(), 4);
+    assert!(passes[5].tasks.contains(&vblur1));
+    assert!(passes[5].tasks.contains(&vblur2));
+    assert!(passes[5].tasks.contains(&vblur3));
+    assert!(passes[5].tasks.contains(&vblur4));
 
-    gb.test_expect(4, 3, &[
-        (2048, 2048, ImageFormat::RGBA8),
-        (2048, 2048, ImageFormat::RGBA8),
-    ]);
-}
+    assert_eq!(passes[6].tasks.len(), 4);
+    assert!(passes[6].tasks.contains(&hblur1));
+    assert!(passes[6].tasks.contains(&hblur2));
+    assert!(passes[6].tasks.contains(&hblur3));
+    assert!(passes[6].tasks.contains(&hblur4));
 
-#[test]
-fn fg_test_5() {
-    // Test that a task that is used as an input by direct parent and also
-    // distance ancestor are scheduled correctly, and allocates the correct
-    // number of passes, taking advantage of surface reuse / aliasing where feasible.
-
-    let mut gb = RenderTaskGraphBuilder::new();
-
-    let pc_root = gb.add().init(task_location(pc_target(0, 0, 0)));
-
-    let child_pic_0 = gb.add().init(task_dynamic(128));
-    let child_pic_1 = gb.add().init(task_dynamic(64));
-    let child_pic_2 = gb.add().init(task_dynamic(32));
-    let child_pic_3 = gb.add().init(task_dynamic(16));
-
-    gb.add_dependency(pc_root, child_pic_0);
-    gb.add_dependency(child_pic_0, child_pic_1);
-    gb.add_dependency(child_pic_1, child_pic_2);
-    gb.add_dependency(child_pic_2, child_pic_3);
-    gb.add_dependency(pc_root, child_pic_3);
-
-    gb.test_expect(5, 4, &[
-        (256, 256, ImageFormat::RGBA8),
-        (2048, 2048, ImageFormat::RGBA8),
-        (2048, 2048, ImageFormat::RGBA8),
-    ]);
+    assert_eq!(passes[7].tasks, vec![main_pic]);
+
+    // See vblur4's comment above.
+    assert!(tasks[scale2].saved_index.is_some());
 }
 
 #[test]
-fn fg_test_6() {
-    // Test that a task that is used as an input dependency by two parent
-    // tasks is correctly allocated and freed.
-
-    let mut gb = RenderTaskGraphBuilder::new();
-
-    let pc_root_1 = gb.add().init(task_location(pc_target(0, 0, 0)));
-    let pc_root_2 = gb.add().init(task_location(pc_target(0, 1, 0)));
+fn culled_tasks() {
+    // This test checks that tasks that do not contribute to the frame don't appear in the
+    // generated passes.
 
-    let child_pic = gb.add().init(task_dynamic(128));
+    let color = RenderTargetKind::Color;
 
-    gb.add_dependency(pc_root_1, child_pic);
-    gb.add_dependency(pc_root_2, child_pic);
-
-    gb.test_expect(2, 1, &[
-        (2048, 2048, ImageFormat::RGBA8),
-    ]);
-}
-
-#[test]
-fn fg_test_7() {
-    // Test that a standalone surface is not incorrectly used to
-    // allocate subsequent shared task rects.
+    let counters = RenderTaskGraphCounters::new();
+    let mut tasks = RenderTaskGraph::new(FrameId::first(), &counters);
 
-    let mut gb = RenderTaskGraphBuilder::new();
+    let a1 = tasks.add().init(RenderTask::new_test(color, dyn_location(640, 640), SmallVec::new()));
+    let _a2 = tasks.add().init(RenderTask::new_test(color, dyn_location(320, 320), smallvec![a1]));
 
-    let pc_root = gb.add().init(task_location(pc_target(0, 0, 0)));
+    let b1 = tasks.add().init(RenderTask::new_test(color, dyn_location(640, 640), SmallVec::new()));
+    let b2 = tasks.add().init(RenderTask::new_test(color, dyn_location(320, 320), smallvec![b1]));
+    let _b3 = tasks.add().init(RenderTask::new_test(color, dyn_location(320, 320), smallvec![b2]));
 
-    let child0 = gb.add().init(task_dynamic(16));
-    let child1 = gb.add().init(task_dynamic(16));
+    let main_pic = tasks.add().init(RenderTask::new_test(
+        color,
+        RenderTaskLocation::Fixed(rect(0, 0, 3200, 1800)),
+        smallvec![b2],
+    ));
 
-    let child2 = gb.add().init(task_dynamic(16));
-    let child3 = gb.add().init(task_dynamic(16));
+    let initial_number_of_tasks = tasks.tasks.len();
 
-    gb.add_dependency(pc_root, child0);
-    gb.add_dependency(child0, child1);
-    gb.add_dependency(pc_root, child1);
+    let passes = tasks.generate_passes(Some(main_pic), size2(3200, 1800), true);
 
-    gb.add_dependency(pc_root, child2);
-    gb.add_dependency(child2, child3);
+    // We should not have added any blits.
+    assert_eq!(tasks.tasks.len(), initial_number_of_tasks);
 
-    gb.test_expect(3, 3, &[
-        (256, 256, ImageFormat::RGBA8),
-        (2048, 2048, ImageFormat::RGBA8),
-        (2048, 2048, ImageFormat::RGBA8),
-    ]);
+    assert_eq!(passes.len(), 3);
+    assert_eq!(passes[0].tasks, vec![b1]);
+    assert_eq!(passes[1].tasks, vec![b2]);
+    assert_eq!(passes[2].tasks, vec![main_pic]);
 }
diff --git a/third_party/webrender/webrender/src/renderer/mod.rs b/third_party/webrender/webrender/src/renderer.rs
index b5649bb6318..3552a304f76 100644
--- a/third_party/webrender/webrender/src/renderer/mod.rs
+++ b/third_party/webrender/webrender/src/renderer.rs
@@ -34,117 +34,96 @@
 //! up the scissor, are accepting already transformed coordinates, which we can get by
 //! calling `DrawTarget::to_framebuffer_rect`
 
-use api::{BlobImageHandler, ColorF, ColorU, MixBlendMode};
-use api::{DocumentId, Epoch, ExternalImageHandler};
-use api::CrashAnnotator;
-#[cfg(feature = "replay")]
-use api::ExternalImageId;
-use api::{ExternalImageSource, ExternalImageType, FontRenderMode, ImageFormat};
-use api::{PipelineId, ImageRendering, Checkpoint, NotificationRequest};
-use api::{VoidPtrToSizeFn, PremultipliedColorF};
-use api::{RenderNotifier, ImageBufferKind, SharedFontInstanceMap};
+use api::{ApiMsg, BlobImageHandler, ColorF, ColorU, MixBlendMode};
+use api::{DocumentId, Epoch, ExternalImageHandler, ExternalImageId};
+use api::{ExternalImageSource, ExternalImageType, FontRenderMode, FrameMsg, ImageFormat};
+use api::{PipelineId, ImageRendering, Checkpoint, NotificationRequest, OutputImageHandler};
+use api::{DebugCommand, MemoryReport, VoidPtrToSizeFn, PremultipliedColorF};
+use api::{RenderApiSender, RenderNotifier, TextureTarget, SharedFontInstanceMap};
 #[cfg(feature = "replay")]
 use api::ExternalImage;
 use api::units::*;
-use api::channel::{unbounded_channel, Receiver};
 pub use api::DebugFlags;
-use core::time::Duration;
-
-use crate::render_api::{RenderApiSender, DebugCommand, FrameMsg, MemoryReport};
 use crate::batch::{AlphaBatchContainer, BatchKind, BatchFeatures, BatchTextures, BrushBatchKind, ClipBatchList};
 #[cfg(any(feature = "capture", feature = "replay"))]
 use crate::capture::{CaptureConfig, ExternalCaptureImage, PlainExternalImage};
-use crate::composite::{CompositeState, CompositeTileSurface, ResolvedExternalSurface, CompositorSurfaceTransform};
-use crate::composite::{CompositorKind, Compositor, NativeTileId, CompositeFeatures, CompositeSurfaceFormat, ResolvedExternalSurfaceColorData};
+use crate::composite::{CompositeState, CompositeTileSurface, CompositeTile, ResolvedExternalSurface};
+use crate::composite::{CompositorKind, Compositor, NativeTileId, CompositeSurfaceFormat, ResolvedExternalSurfaceColorData};
 use crate::composite::{CompositorConfig, NativeSurfaceOperationDetails, NativeSurfaceId, NativeSurfaceOperation};
-use crate::composite::TileKind;
-use crate::c_str;
 use crate::debug_colors;
-use crate::device::{DepthFunction, Device, DrawTarget, ExternalTexture, GpuFrameId};
-use crate::device::{ProgramCache, ReadTarget, ShaderError, Texture, TextureFilter, TextureFlags, TextureSlot};
-use crate::device::{UploadMethod, UploadPBOPool, VertexUsageHint};
-use crate::device::query::{GpuSampler, GpuTimer};
-#[cfg(feature = "capture")]
-use crate::device::FBOId;
-use crate::debug_item::DebugItem;
+use crate::debug_render::{DebugItem, DebugRenderer};
+use crate::device::{DepthFunction, Device, GpuFrameId, Program, UploadMethod, Texture, PBO};
+use crate::device::{DrawTarget, ExternalTexture, FBOId, ReadTarget, TextureSlot};
+use crate::device::{ShaderError, TextureFilter, TextureFlags,
+             VertexUsageHint, VAO, VBO, CustomVAO};
+use crate::device::ProgramCache;
+use crate::device::query::GpuTimer;
+use euclid::{rect, Transform3D, Scale, default};
 use crate::frame_builder::{Frame, ChasePrimitive, FrameBuilderConfig};
+use gleam::gl;
 use crate::glyph_cache::GlyphCache;
 use crate::glyph_rasterizer::{GlyphFormat, GlyphRasterizer};
-use crate::gpu_cache::{GpuCacheUpdate, GpuCacheUpdateList};
+use crate::gpu_cache::{GpuBlockData, GpuCacheUpdate, GpuCacheUpdateList};
 use crate::gpu_cache::{GpuCacheDebugChunk, GpuCacheDebugCmd};
-use crate::gpu_types::{PrimitiveInstanceData, ScalingInstance, SvgFilterInstance};
-use crate::gpu_types::{BlurInstance, ClearInstance, CompositeInstance, ZBufferId};
+use crate::gpu_types::{PrimitiveHeaderI, PrimitiveHeaderF, ScalingInstance, SvgFilterInstance, TransformData};
+use crate::gpu_types::{ClearInstance, CompositeInstance, ResolveInstanceData, ZBufferId};
 use crate::internal_types::{TextureSource, ResourceCacheError};
-#[cfg(any(feature = "capture", feature = "replay"))]
-use crate::internal_types::DebugOutput;
-use crate::internal_types::{CacheTextureId, FastHashMap, FastHashSet, RenderedDocument, ResultMsg};
-use crate::internal_types::{TextureCacheAllocInfo, TextureCacheAllocationKind, TextureUpdateList};
-use crate::internal_types::{RenderTargetInfo, Swizzle, DeferredResolveIndex};
-use crate::picture::{self, ResolvedSurfaceTexture};
+use crate::internal_types::{CacheTextureId, DebugOutput, FastHashMap, FastHashSet, LayerIndex, RenderedDocument, ResultMsg};
+use crate::internal_types::{TextureCacheAllocationKind, TextureCacheUpdate, TextureUpdateList, TextureUpdateSource};
+use crate::internal_types::{RenderTargetInfo, SavedTargetIndex, Swizzle};
+use malloc_size_of::MallocSizeOfOps;
+use crate::picture::{RecordedDirtyRegion, tile_cache_sizes, ResolvedSurfaceTexture};
 use crate::prim_store::DeferredResolve;
-use crate::profiler::{self, GpuProfileTag, TransactionProfile};
-use crate::profiler::{Profiler, add_event_marker, add_text_marker, thread_is_being_profiled};
+use crate::profiler::{BackendProfileCounters, FrameProfileCounters, TimeProfileCounter,
+               GpuProfileTag, RendererProfileCounters, RendererProfileTimers};
+use crate::profiler::{Profiler, ChangeIndicator, ProfileStyle, add_event_marker, thread_is_being_profiled};
 use crate::device::query::{GpuProfiler, GpuDebugMethod};
+use rayon::{ThreadPool, ThreadPoolBuilder};
 use crate::render_backend::{FrameId, RenderBackend};
 use crate::render_task_graph::RenderTaskGraph;
-use crate::render_task::{RenderTask, RenderTaskKind, ReadbackTask};
+use crate::render_task::{RenderTask, RenderTaskData, RenderTaskKind};
 use crate::resource_cache::ResourceCache;
 use crate::scene_builder_thread::{SceneBuilderThread, SceneBuilderThreadChannels, LowPrioritySceneBuilderThread};
 use crate::screen_capture::AsyncScreenshotGrabber;
+use crate::shade::{Shaders, WrShaders};
+use smallvec::SmallVec;
+use crate::texture_cache::TextureCache;
 use crate::render_target::{AlphaRenderTarget, ColorRenderTarget, PictureCacheTarget};
-use crate::render_target::{RenderTarget, TextureCacheRenderTarget};
-use crate::render_target::{RenderTargetKind, BlitJob};
-use crate::texture_cache::{TextureCache, TextureCacheConfig};
-use crate::tile_cache::PictureCacheDebugInfo;
+use crate::render_target::{RenderTarget, TextureCacheRenderTarget, RenderTargetList};
+use crate::render_target::{RenderTargetKind, BlitJob, BlitJobSource};
+use crate::render_task_graph::RenderPassKind;
 use crate::util::drain_filter;
-use crate::host_utils::{thread_started, thread_stopped};
-use crate::rectangle_occlusion as occlusion;
-use upload::{upload_to_texture_cache, UploadTexturePool};
-
-use euclid::{rect, Transform3D, Scale, default};
-use gleam::gl;
-use malloc_size_of::MallocSizeOfOps;
-use rayon::{ThreadPool, ThreadPoolBuilder};
+use crate::c_str;
 
-use std::{
-    cell::RefCell,
-    collections::VecDeque,
-    f32,
-    mem,
-    num::NonZeroUsize,
-    path::PathBuf,
-    rc::Rc,
-    sync::Arc,
-    sync::atomic::{AtomicBool, Ordering},
-    thread,
-};
-#[cfg(any(feature = "capture", feature = "replay"))]
+use std;
+use std::cmp;
+use std::collections::VecDeque;
 use std::collections::hash_map::Entry;
+use std::f32;
+use std::marker::PhantomData;
+use std::mem;
+use std::os::raw::c_void;
+use std::path::PathBuf;
+use std::rc::Rc;
+use std::sync::Arc;
+use std::sync::atomic::{AtomicBool, Ordering};
+use std::sync::mpsc::{channel, Sender, Receiver};
+use std::thread;
+use std::cell::RefCell;
 use tracy_rs::register_thread_with_profiler;
 use time::precise_time_ns;
+use std::ffi::CString;
 
-mod debug;
-mod gpu_cache;
-mod shade;
-mod vertex;
-mod upload;
-
-pub use debug::DebugRenderer;
-pub use shade::{Shaders, SharedShaders};
-pub use vertex::{desc, VertexArrayKind, MAX_VERTEX_TEXTURE_WIDTH};
-
-/// Use this hint for all vertex data re-initialization. This allows
-/// the driver to better re-use RBOs internally.
-pub const ONE_TIME_USAGE_HINT: VertexUsageHint = VertexUsageHint::Stream;
-
-/// Is only false if no WR instances have ever been created.
-static HAS_BEEN_INITIALIZED: AtomicBool = AtomicBool::new(false);
-
-/// Returns true if a WR instance has ever been initialized in this process.
-pub fn wr_has_been_initialized() -> bool {
-    HAS_BEEN_INITIALIZED.load(Ordering::SeqCst)
+cfg_if! {
+    if #[cfg(feature = "debugger")] {
+        use serde_json;
+        use crate::debug_server;
+    }
 }
 
+const DEFAULT_BATCH_LOOKBACK_COUNT: usize = 10;
+const VERTEX_TEXTURE_EXTRA_ROWS: i32 = 10;
+
 /// The size of the array of each type of vertex data texture that
 /// is round-robin-ed each frame during bind_frame_data. Doing this
 /// helps avoid driver stalls while updating the texture in some
@@ -155,7 +134,21 @@ pub fn wr_has_been_initialized() -> bool {
 /// combination of UBO/SSBO usage. Although this only affects some
 /// platforms, it's enabled on all platforms to reduce testing
 /// differences between platforms.
-pub const VERTEX_DATA_TEXTURE_COUNT: usize = 3;
+const VERTEX_DATA_TEXTURE_COUNT: usize = 3;
+
+/// Is only false if no WR instances have ever been created.
+static HAS_BEEN_INITIALIZED: AtomicBool = AtomicBool::new(false);
+
+/// Returns true if a WR instance has ever been initialized in this process.
+pub fn wr_has_been_initialized() -> bool {
+    HAS_BEEN_INITIALIZED.load(Ordering::SeqCst)
+}
+
+pub const MAX_VERTEX_TEXTURE_WIDTH: usize = webrender_build::MAX_VERTEX_TEXTURE_WIDTH;
+/// Enabling this toggle would force the GPU cache scattered texture to
+/// be resized every frame, which enables GPU debuggers to see if this
+/// is performed correctly.
+const GPU_CACHE_RESIZE_TEST: bool = false;
 
 /// Number of GPU blocks per UV rectangle provided for an image.
 pub const BLOCKS_PER_UV_RECT: usize = 2;
@@ -168,6 +161,14 @@ const GPU_TAG_BRUSH_LINEAR_GRADIENT: GpuProfileTag = GpuProfileTag {
     label: "B_LinearGradient",
     color: debug_colors::POWDERBLUE,
 };
+const GPU_TAG_BRUSH_RADIAL_GRADIENT: GpuProfileTag = GpuProfileTag {
+    label: "B_RadialGradient",
+    color: debug_colors::LIGHTPINK,
+};
+const GPU_TAG_BRUSH_CONIC_GRADIENT: GpuProfileTag = GpuProfileTag {
+    label: "B_ConicGradient",
+    color: debug_colors::GREEN,
+};
 const GPU_TAG_BRUSH_YUV_IMAGE: GpuProfileTag = GpuProfileTag {
     label: "B_YuvImage",
     color: debug_colors::DARKGREEN,
@@ -200,20 +201,8 @@ const GPU_TAG_CACHE_LINE_DECORATION: GpuProfileTag = GpuProfileTag {
     label: "C_LineDecoration",
     color: debug_colors::YELLOWGREEN,
 };
-const GPU_TAG_CACHE_FAST_LINEAR_GRADIENT: GpuProfileTag = GpuProfileTag {
-    label: "C_FastLinearGradient",
-    color: debug_colors::BROWN,
-};
-const GPU_TAG_CACHE_LINEAR_GRADIENT: GpuProfileTag = GpuProfileTag {
-    label: "C_LinearGradient",
-    color: debug_colors::BROWN,
-};
-const GPU_TAG_CACHE_RADIAL_GRADIENT: GpuProfileTag = GpuProfileTag {
-    label: "C_RadialGradient",
-    color: debug_colors::BROWN,
-};
-const GPU_TAG_CACHE_CONIC_GRADIENT: GpuProfileTag = GpuProfileTag {
-    label: "C_ConicGradient",
+const GPU_TAG_CACHE_GRADIENT: GpuProfileTag = GpuProfileTag {
+    label: "C_Gradient",
     color: debug_colors::BROWN,
 };
 const GPU_TAG_SETUP_TARGET: GpuProfileTag = GpuProfileTag {
@@ -245,15 +234,15 @@ const GPU_TAG_SCALE: GpuProfileTag = GpuProfileTag {
     color: debug_colors::GHOSTWHITE,
 };
 const GPU_SAMPLER_TAG_ALPHA: GpuProfileTag = GpuProfileTag {
-    label: "Alpha targets",
+    label: "Alpha Targets",
     color: debug_colors::BLACK,
 };
 const GPU_SAMPLER_TAG_OPAQUE: GpuProfileTag = GpuProfileTag {
-    label: "Opaque pass",
+    label: "Opaque Pass",
     color: debug_colors::BLACK,
 };
 const GPU_SAMPLER_TAG_TRANSPARENT: GpuProfileTag = GpuProfileTag {
-    label: "Transparent pass",
+    label: "Transparent Pass",
     color: debug_colors::BLACK,
 };
 const GPU_TAG_SVG_FILTER: GpuProfileTag = GpuProfileTag {
@@ -272,9 +261,30 @@ const GPU_TAG_CLEAR: GpuProfileTag = GpuProfileTag {
 /// The clear color used for the texture cache when the debug display is enabled.
 /// We use a shade of blue so that we can still identify completely blue items in
 /// the texture cache.
-pub const TEXTURE_CACHE_DBG_CLEAR_COLOR: [f32; 4] = [0.0, 0.0, 0.8, 1.0];
+const TEXTURE_CACHE_DBG_CLEAR_COLOR: [f32; 4] = [0.0, 0.0, 0.8, 1.0];
 
 impl BatchKind {
+    #[cfg(feature = "debugger")]
+    fn debug_name(&self) -> &'static str {
+        match *self {
+            BatchKind::SplitComposite => "SplitComposite",
+            BatchKind::Brush(kind) => {
+                match kind {
+                    BrushBatchKind::Solid => "Brush (Solid)",
+                    BrushBatchKind::Image(..) => "Brush (Image)",
+                    BrushBatchKind::Blend => "Brush (Blend)",
+                    BrushBatchKind::MixBlend { .. } => "Brush (Composite)",
+                    BrushBatchKind::YuvImage(..) => "Brush (YuvImage)",
+                    BrushBatchKind::ConicGradient => "Brush (ConicGradient)",
+                    BrushBatchKind::RadialGradient => "Brush (RadialGradient)",
+                    BrushBatchKind::LinearGradient => "Brush (LinearGradient)",
+                    BrushBatchKind::Opacity => "Brush (Opacity)",
+                }
+            }
+            BatchKind::TextRun(_) => "TextRun",
+        }
+    }
+
     fn sampler_tag(&self) -> GpuProfileTag {
         match *self {
             BatchKind::SplitComposite => GPU_TAG_PRIM_SPLIT_COMPOSITE,
@@ -285,6 +295,8 @@ impl BatchKind {
                     BrushBatchKind::Blend => GPU_TAG_BRUSH_BLEND,
                     BrushBatchKind::MixBlend { .. } => GPU_TAG_BRUSH_MIXBLEND,
                     BrushBatchKind::YuvImage(..) => GPU_TAG_BRUSH_YUV_IMAGE,
+                    BrushBatchKind::ConicGradient => GPU_TAG_BRUSH_CONIC_GRADIENT,
+                    BrushBatchKind::RadialGradient => GPU_TAG_BRUSH_RADIAL_GRADIENT,
                     BrushBatchKind::LinearGradient => GPU_TAG_BRUSH_LINEAR_GRADIENT,
                     BrushBatchKind::Opacity => GPU_TAG_BRUSH_OPACITY,
                 }
@@ -312,21 +324,19 @@ pub enum ShaderColorMode {
     SubpixelWithBgColorPass1 = 4,
     SubpixelWithBgColorPass2 = 5,
     SubpixelDualSource = 6,
-    BitmapShadow = 7,
+    Bitmap = 7,
     ColorBitmap = 8,
     Image = 9,
-    MultiplyDualSource = 10,
 }
 
 impl From<GlyphFormat> for ShaderColorMode {
     fn from(format: GlyphFormat) -> ShaderColorMode {
         match format {
-            GlyphFormat::Alpha |
-            GlyphFormat::TransformedAlpha |
-            GlyphFormat::Bitmap => ShaderColorMode::Alpha,
+            GlyphFormat::Alpha | GlyphFormat::TransformedAlpha => ShaderColorMode::Alpha,
             GlyphFormat::Subpixel | GlyphFormat::TransformedSubpixel => {
                 panic!("Subpixel glyph formats must be handled separately.");
             }
+            GlyphFormat::Bitmap => ShaderColorMode::Bitmap,
             GlyphFormat::ColorBitmap => ShaderColorMode::ColorBitmap,
         }
     }
@@ -342,13 +352,14 @@ pub(crate) enum TextureSampler {
     Color0,
     Color1,
     Color2,
+    PrevPassAlpha,
+    PrevPassColor,
     GpuCache,
     TransformPalette,
     RenderTasks,
     Dither,
     PrimitiveHeadersF,
     PrimitiveHeadersI,
-    ClipMask,
 }
 
 impl TextureSampler {
@@ -370,17 +381,551 @@ impl Into<TextureSlot> for TextureSampler {
             TextureSampler::Color0 => TextureSlot(0),
             TextureSampler::Color1 => TextureSlot(1),
             TextureSampler::Color2 => TextureSlot(2),
-            TextureSampler::GpuCache => TextureSlot(3),
-            TextureSampler::TransformPalette => TextureSlot(4),
-            TextureSampler::RenderTasks => TextureSlot(5),
-            TextureSampler::Dither => TextureSlot(6),
-            TextureSampler::PrimitiveHeadersF => TextureSlot(7),
-            TextureSampler::PrimitiveHeadersI => TextureSlot(8),
-            TextureSampler::ClipMask => TextureSlot(9),
+            TextureSampler::PrevPassAlpha => TextureSlot(3),
+            TextureSampler::PrevPassColor => TextureSlot(4),
+            TextureSampler::GpuCache => TextureSlot(5),
+            TextureSampler::TransformPalette => TextureSlot(6),
+            TextureSampler::RenderTasks => TextureSlot(7),
+            TextureSampler::Dither => TextureSlot(8),
+            TextureSampler::PrimitiveHeadersF => TextureSlot(9),
+            TextureSampler::PrimitiveHeadersI => TextureSlot(10),
         }
     }
 }
 
+#[derive(Debug, Clone, Copy)]
+#[repr(C)]
+pub struct PackedVertex {
+    pub pos: [f32; 2],
+}
+
+pub(crate) mod desc {
+    use crate::device::{VertexAttribute, VertexAttributeKind, VertexDescriptor};
+
+    pub const PRIM_INSTANCES: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[
+            VertexAttribute {
+                name: "aPosition",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aData",
+                count: 4,
+                kind: VertexAttributeKind::I32,
+            },
+        ],
+    };
+
+    pub const BLUR: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[
+            VertexAttribute {
+                name: "aPosition",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aBlurRenderTaskAddress",
+                count: 1,
+                kind: VertexAttributeKind::U16,
+            },
+            VertexAttribute {
+                name: "aBlurSourceTaskAddress",
+                count: 1,
+                kind: VertexAttributeKind::U16,
+            },
+            VertexAttribute {
+                name: "aBlurDirection",
+                count: 1,
+                kind: VertexAttributeKind::I32,
+            },
+        ],
+    };
+
+    pub const LINE: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[
+            VertexAttribute {
+                name: "aPosition",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aTaskRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aLocalSize",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aWavyLineThickness",
+                count: 1,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aStyle",
+                count: 1,
+                kind: VertexAttributeKind::I32,
+            },
+            VertexAttribute {
+                name: "aAxisSelect",
+                count: 1,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+    };
+
+    pub const GRADIENT: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[
+            VertexAttribute {
+                name: "aPosition",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aTaskRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aStops",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            // TODO(gw): We should probably pack these as u32 colors instead
+            //           of passing as full float vec4 here. It won't make much
+            //           difference in real world, since these are only invoked
+            //           rarely, when creating the cache.
+            VertexAttribute {
+                name: "aColor0",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aColor1",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aColor2",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aColor3",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aAxisSelect",
+                count: 1,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aStartStop",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+    };
+
+    pub const BORDER: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[
+            VertexAttribute {
+                name: "aPosition",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aTaskOrigin",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aColor0",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aColor1",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aFlags",
+                count: 1,
+                kind: VertexAttributeKind::I32,
+            },
+            VertexAttribute {
+                name: "aWidths",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aRadii",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipParams1",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipParams2",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+    };
+
+    pub const SCALE: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[
+            VertexAttribute {
+                name: "aPosition",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aScaleTargetRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aScaleSourceRect",
+                count: 4,
+                kind: VertexAttributeKind::I32,
+            },
+            VertexAttribute {
+                name: "aScaleSourceLayer",
+                count: 1,
+                kind: VertexAttributeKind::I32,
+            },
+        ],
+    };
+
+    pub const CLIP: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[
+            VertexAttribute {
+                name: "aPosition",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aTransformIds",
+                count: 2,
+                kind: VertexAttributeKind::I32,
+            },
+            VertexAttribute {
+                name: "aClipDataResourceAddress",
+                count: 4,
+                kind: VertexAttributeKind::U16,
+            },
+            VertexAttribute {
+                name: "aClipLocalPos",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipTileRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipDeviceArea",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aClipOrigins",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aDevicePixelScale",
+                count: 1,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+    };
+
+    pub const GPU_CACHE_UPDATE: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[
+            VertexAttribute {
+                name: "aPosition",
+                count: 2,
+                kind: VertexAttributeKind::U16Norm,
+            },
+            VertexAttribute {
+                name: "aValue",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+        instance_attributes: &[],
+    };
+
+    pub const RESOLVE: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[
+            VertexAttribute {
+                name: "aPosition",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+    };
+
+    pub const SVG_FILTER: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[
+            VertexAttribute {
+                name: "aPosition",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aFilterRenderTaskAddress",
+                count: 1,
+                kind: VertexAttributeKind::U16,
+            },
+            VertexAttribute {
+                name: "aFilterInput1TaskAddress",
+                count: 1,
+                kind: VertexAttributeKind::U16,
+            },
+            VertexAttribute {
+                name: "aFilterInput2TaskAddress",
+                count: 1,
+                kind: VertexAttributeKind::U16,
+            },
+            VertexAttribute {
+                name: "aFilterKind",
+                count: 1,
+                kind: VertexAttributeKind::U16,
+            },
+            VertexAttribute {
+                name: "aFilterInputCount",
+                count: 1,
+                kind: VertexAttributeKind::U16,
+            },
+            VertexAttribute {
+                name: "aFilterGenericInt",
+                count: 1,
+                kind: VertexAttributeKind::U16,
+            },
+            VertexAttribute {
+                name: "aFilterExtraDataAddress",
+                count: 2,
+                kind: VertexAttributeKind::U16,
+            },
+        ],
+    };
+
+    pub const VECTOR_STENCIL: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[
+            VertexAttribute {
+                name: "aPosition",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aFromPosition",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aCtrlPosition",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aToPosition",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aFromNormal",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aCtrlNormal",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aToNormal",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aPathID",
+                count: 1,
+                kind: VertexAttributeKind::U16,
+            },
+            VertexAttribute {
+                name: "aPad",
+                count: 1,
+                kind: VertexAttributeKind::U16,
+            },
+        ],
+    };
+
+    pub const VECTOR_COVER: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[
+            VertexAttribute {
+                name: "aPosition",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aTargetRect",
+                count: 4,
+                kind: VertexAttributeKind::I32,
+            },
+            VertexAttribute {
+                name: "aStencilOrigin",
+                count: 2,
+                kind: VertexAttributeKind::I32,
+            },
+            VertexAttribute {
+                name: "aSubpixel",
+                count: 1,
+                kind: VertexAttributeKind::U16,
+            },
+            VertexAttribute {
+                name: "aPad",
+                count: 1,
+                kind: VertexAttributeKind::U16,
+            },
+        ],
+    };
+
+    pub const COMPOSITE: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[
+            VertexAttribute {
+                name: "aPosition",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aDeviceRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aDeviceClipRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aColor",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aParams",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aUvRect0",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aUvRect1",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aUvRect2",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aTextureLayers",
+                count: 3,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+    };
+
+    pub const CLEAR: VertexDescriptor = VertexDescriptor {
+        vertex_attributes: &[
+            VertexAttribute {
+                name: "aPosition",
+                count: 2,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+        instance_attributes: &[
+            VertexAttribute {
+                name: "aRect",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+            VertexAttribute {
+                name: "aColor",
+                count: 4,
+                kind: VertexAttributeKind::F32,
+            },
+        ],
+    };
+}
+
+#[derive(Debug, Copy, Clone)]
+pub(crate) enum VertexArrayKind {
+    Primitive,
+    Blur,
+    Clip,
+    VectorStencil,
+    VectorCover,
+    Border,
+    Scale,
+    LineDecoration,
+    Gradient,
+    Resolve,
+    SvgFilter,
+    Composite,
+    Clear,
+}
+
 #[derive(Clone, Debug, PartialEq)]
 pub enum GraphicsApi {
     OpenGL,
@@ -393,6 +938,28 @@ pub struct GraphicsApiInfo {
     pub version: String,
 }
 
+#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+pub enum ImageBufferKind {
+    Texture2D = 0,
+    TextureRect = 1,
+    TextureExternal = 2,
+    Texture2DArray = 3,
+}
+
+//TODO: those types are the same, so let's merge them
+impl From<TextureTarget> for ImageBufferKind {
+    fn from(target: TextureTarget) -> Self {
+        match target {
+            TextureTarget::Default => ImageBufferKind::Texture2D,
+            TextureTarget::Rect => ImageBufferKind::TextureRect,
+            TextureTarget::Array => ImageBufferKind::Texture2DArray,
+            TextureTarget::External => ImageBufferKind::TextureExternal,
+        }
+    }
+}
+
 #[derive(Debug)]
 pub struct GpuProfile {
     pub frame_id: GpuFrameId,
@@ -400,7 +967,7 @@ pub struct GpuProfile {
 }
 
 impl GpuProfile {
-    fn new(frame_id: GpuFrameId, timers: &[GpuTimer]) -> GpuProfile {
+    fn new<T>(frame_id: GpuFrameId, timers: &[GpuTimer<T>]) -> GpuProfile {
         let mut paint_time_ns = 0;
         for timer in timers {
             paint_time_ns += timer.time_ns;
@@ -447,6 +1014,13 @@ enum PartialPresentMode {
     },
 }
 
+/// A Texture that has been initialized by the `device` module and is ready to
+/// be used.
+struct ActiveTexture {
+    texture: Texture,
+    saved_index: Option<SavedTargetIndex>,
+}
+
 /// Helper struct for resolving device Textures for use during rendering passes.
 ///
 /// Manages the mapping between the at-a-distance texture handles used by the
@@ -457,24 +1031,51 @@ struct TextureResolver {
     texture_cache_map: FastHashMap<CacheTextureId, Texture>,
 
     /// Map of external image IDs to native textures.
-    external_images: FastHashMap<DeferredResolveIndex, ExternalTexture>,
+    external_images: FastHashMap<(ExternalImageId, u8), ExternalTexture>,
 
     /// A special 1x1 dummy texture used for shaders that expect to work with
     /// the output of the previous pass but are actually running in the first
     /// pass.
     dummy_cache_texture: Texture,
+
+    /// The outputs of the previous pass, if applicable.
+    prev_pass_color: Option<ActiveTexture>,
+    prev_pass_alpha: Option<ActiveTexture>,
+
+    /// Saved render targets from previous passes. This is used when a pass
+    /// needs access to the result of a pass other than the immediately-preceding
+    /// one. In this case, the `RenderTask` will get a non-`None` `saved_index`,
+    /// which will cause the resulting render target to be persisted in this list
+    /// (at that index) until the end of the frame.
+    saved_targets: Vec<Texture>,
+
+    /// Pool of idle render target textures ready for re-use.
+    ///
+    /// Naively, it would seem like we only ever need two pairs of (color,
+    /// alpha) render targets: one for the output of the previous pass (serving
+    /// as input to the current pass), and one for the output of the current
+    /// pass. However, there are cases where the output of one pass is used as
+    /// the input to multiple future passes. For example, drop-shadows draw the
+    /// picture in pass X, then reference it in pass X+1 to create the blurred
+    /// shadow, and pass the results of both X and X+1 to pass X+2 draw the
+    /// actual content.
+    ///
+    /// See the comments in `allocate_target_texture` for more insight on why
+    /// reuse is a win.
+    render_target_pool: Vec<Texture>,
 }
 
 impl TextureResolver {
     fn new(device: &mut Device) -> TextureResolver {
         let dummy_cache_texture = device
             .create_texture(
-                ImageBufferKind::Texture2D,
+                TextureTarget::Array,
                 ImageFormat::RGBA8,
                 1,
                 1,
                 TextureFilter::Linear,
                 None,
+                1,
             );
         device.upload_texture_immediate(
             &dummy_cache_texture,
@@ -485,6 +1086,10 @@ impl TextureResolver {
             texture_cache_map: FastHashMap::default(),
             external_images: FastHashMap::default(),
             dummy_cache_texture,
+            prev_pass_alpha: None,
+            prev_pass_color: None,
+            saved_targets: Vec::default(),
+            render_target_pool: Vec::new(),
         }
     }
 
@@ -494,23 +1099,141 @@ impl TextureResolver {
         for (_id, texture) in self.texture_cache_map {
             device.delete_texture(texture);
         }
+
+        for texture in self.render_target_pool {
+            device.delete_texture(texture);
+        }
     }
 
     fn begin_frame(&mut self) {
+        assert!(self.prev_pass_color.is_none());
+        assert!(self.prev_pass_alpha.is_none());
+        assert!(self.saved_targets.is_empty());
+    }
+
+    fn end_frame(&mut self, device: &mut Device, frame_id: GpuFrameId) {
+        // return the cached targets to the pool
+        self.end_pass(device, None, None);
+        // return the saved targets as well
+        while let Some(target) = self.saved_targets.pop() {
+            self.return_to_pool(device, target);
+        }
+
+        // GC the render target pool, if it's currently > 32 MB in size.
+        //
+        // We use a simple scheme whereby we drop any texture that hasn't been used
+        // in the last 60 frames, until we are below the size threshold. This should
+        // generally prevent any sustained build-up of unused textures, unless we don't
+        // generate frames for a long period. This can happen when the window is
+        // minimized, and we probably want to flush all the WebRender caches in that case [1].
+        // There is also a second "red line" memory threshold which prevents
+        // memory exhaustion if many render targets are allocated within a small
+        // number of frames. For now this is set at 320 MB (10x the normal memory threshold).
+        //
+        // [1] https://bugzilla.mozilla.org/show_bug.cgi?id=1494099
+        self.gc_targets(
+            device,
+            frame_id,
+            32 * 1024 * 1024,
+            32 * 1024 * 1024 * 10,
+            60,
+        );
+    }
+
+    /// Transfers ownership of a render target back to the pool.
+    fn return_to_pool(&mut self, device: &mut Device, target: Texture) {
+        device.invalidate_render_target(&target);
+        self.render_target_pool.push(target);
+    }
+
+    /// Frees any memory possible, in the event of a memory pressure signal.
+    fn on_memory_pressure(
+        &mut self,
+        device: &mut Device,
+    ) {
+        // Clear all textures in the render target pool
+        for target in self.render_target_pool.drain(..) {
+            device.delete_texture(target);
+        }
+    }
+
+    /// Drops all targets from the render target pool that do not satisfy the predicate.
+    pub fn gc_targets(
+        &mut self,
+        device: &mut Device,
+        current_frame_id: GpuFrameId,
+        total_bytes_threshold: usize,
+        total_bytes_red_line_threshold: usize,
+        frames_threshold: usize,
+    ) {
+        // Get the total GPU memory size used by the current render target pool
+        let mut rt_pool_size_in_bytes: usize = self.render_target_pool
+            .iter()
+            .map(|t| t.size_in_bytes())
+            .sum();
+
+        // If the total size of the pool is less than the threshold, don't bother
+        // trying to GC any targets
+        if rt_pool_size_in_bytes <= total_bytes_threshold {
+            return;
+        }
+
+        // Sort the current pool by age, so that we remove oldest textures first
+        self.render_target_pool.sort_by_key(|t| t.last_frame_used());
+
+        // We can't just use retain() because `Texture` requires manual cleanup.
+        let mut retained_targets = SmallVec::<[Texture; 8]>::new();
+
+        for target in self.render_target_pool.drain(..) {
+            // Drop oldest textures until we are under the allowed size threshold.
+            // However, if it's been used in very recently, it is always kept around,
+            // which ensures we don't thrash texture allocations on pages that do
+            // require a very large render target pool and are regularly changing.
+            if (rt_pool_size_in_bytes > total_bytes_red_line_threshold) ||
+               (rt_pool_size_in_bytes > total_bytes_threshold &&
+                !target.used_recently(current_frame_id, frames_threshold))
+            {
+                rt_pool_size_in_bytes -= target.size_in_bytes();
+                device.delete_texture(target);
+            } else {
+                retained_targets.push(target);
+            }
+        }
+
+        self.render_target_pool.extend(retained_targets);
     }
 
     fn end_pass(
         &mut self,
         device: &mut Device,
-        textures_to_invalidate: &[CacheTextureId],
+        a8_texture: Option<ActiveTexture>,
+        rgba8_texture: Option<ActiveTexture>,
     ) {
-        // For any texture that is no longer needed, immediately
-        // invalidate it so that tiled GPUs don't need to resolve it
-        // back to memory.
-        for texture_id in textures_to_invalidate {
-            let render_target = &self.texture_cache_map[texture_id];
-            device.invalidate_render_target(render_target);
+        // If we have cache textures from previous pass, return them to the pool.
+        // Also assign the pool index of those cache textures to last pass's index because this is
+        // the result of last pass.
+        // Note: the order here is important, needs to match the logic in `RenderPass::build()`.
+        if let Some(at) = self.prev_pass_color.take() {
+            if let Some(index) = at.saved_index {
+                assert_eq!(self.saved_targets.len(), index.0);
+                self.saved_targets.push(at.texture);
+            } else {
+                self.return_to_pool(device, at.texture);
+            }
         }
+        if let Some(at) = self.prev_pass_alpha.take() {
+            if let Some(index) = at.saved_index {
+                assert_eq!(self.saved_targets.len(), index.0);
+                self.saved_targets.push(at.texture);
+            } else {
+                self.return_to_pool(device, at.texture);
+            }
+        }
+
+        // We have another pass to process, make these textures available
+        // as inputs to the next pass.
+        self.prev_pass_color = rgba8_texture;
+        self.prev_pass_alpha = a8_texture;
     }
 
     // Bind a source texture to the device.
@@ -524,9 +1247,27 @@ impl TextureResolver {
                 device.bind_texture(sampler, &self.dummy_cache_texture, swizzle);
                 swizzle
             }
-            TextureSource::External(ref index, _) => {
+            TextureSource::PrevPassAlpha => {
+                let texture = match self.prev_pass_alpha {
+                    Some(ref at) => &at.texture,
+                    None => &self.dummy_cache_texture,
+                };
+                let swizzle = Swizzle::default();
+                device.bind_texture(sampler, texture, swizzle);
+                swizzle
+            }
+            TextureSource::PrevPassColor => {
+                let texture = match self.prev_pass_color {
+                    Some(ref at) => &at.texture,
+                    None => &self.dummy_cache_texture,
+                };
+                let swizzle = Swizzle::default();
+                device.bind_texture(sampler, texture, swizzle);
+                swizzle
+            }
+            TextureSource::External(external_image) => {
                 let texture = self.external_images
-                    .get(index)
+                    .get(&(external_image.id, external_image.channel_index))
                     .expect("BUG: External image should be resolved by now");
                 device.bind_external_texture(sampler, texture);
                 Swizzle::default()
@@ -536,6 +1277,28 @@ impl TextureResolver {
                 device.bind_texture(sampler, texture, swizzle);
                 swizzle
             }
+            TextureSource::RenderTaskCache(saved_index, swizzle) => {
+                if saved_index.0 < self.saved_targets.len() {
+                    let texture = &self.saved_targets[saved_index.0];
+                    device.bind_texture(sampler, texture, swizzle)
+                } else {
+                    // Check if this saved index is referring to a the prev pass
+                    if Some(saved_index) == self.prev_pass_color.as_ref().and_then(|at| at.saved_index) {
+                        let texture = match self.prev_pass_color {
+                            Some(ref at) => &at.texture,
+                            None => &self.dummy_cache_texture,
+                        };
+                        device.bind_texture(sampler, texture, swizzle);
+                    } else if Some(saved_index) == self.prev_pass_alpha.as_ref().and_then(|at| at.saved_index) {
+                        let texture = match self.prev_pass_alpha {
+                            Some(ref at) => &at.texture,
+                            None => &self.dummy_cache_texture,
+                        };
+                        device.bind_texture(sampler, texture, swizzle);
+                    }
+                }
+                swizzle
+            }
         }
     }
 
@@ -548,12 +1311,29 @@ impl TextureResolver {
             TextureSource::Dummy => {
                 Some((&self.dummy_cache_texture, Swizzle::default()))
             }
+            TextureSource::PrevPassAlpha => Some((
+                match self.prev_pass_alpha {
+                    Some(ref at) => &at.texture,
+                    None => &self.dummy_cache_texture,
+                },
+                Swizzle::default(),
+            )),
+            TextureSource::PrevPassColor => Some((
+                match self.prev_pass_color {
+                    Some(ref at) => &at.texture,
+                    None => &self.dummy_cache_texture,
+                },
+                Swizzle::default(),
+            )),
             TextureSource::External(..) => {
                 panic!("BUG: External textures cannot be resolved, they can only be bound.");
             }
             TextureSource::TextureCache(index, swizzle) => {
                 Some((&self.texture_cache_map[&index], swizzle))
             }
+            TextureSource::RenderTaskCache(saved_index, swizzle) => {
+                Some((&self.saved_targets[saved_index.0], swizzle))
+            }
         }
     }
 
@@ -565,9 +1345,9 @@ impl TextureResolver {
         default_value: TexelRect,
     ) -> TexelRect {
         match source {
-            TextureSource::External(ref index, _) => {
+            TextureSource::External(ref external_image) => {
                 let texture = self.external_images
-                    .get(index)
+                    .get(&(external_image.id, external_image.channel_index))
                     .expect("BUG: External image should be resolved by now");
                 texture.get_uv_rect()
             }
@@ -577,21 +1357,6 @@ impl TextureResolver {
         }
     }
 
-    /// Returns the size of the texture in pixels
-    fn get_texture_size(&self, texture: &TextureSource) -> DeviceIntSize {
-        match *texture {
-            TextureSource::Invalid => DeviceIntSize::zero(),
-            TextureSource::TextureCache(id, _) => {
-                self.texture_cache_map[&id].get_dimensions()
-            },
-            TextureSource::External(index, _) => {
-                let uv_rect = self.external_images[&index].get_uv_rect();
-                (uv_rect.uv1 - uv_rect.uv0).abs().to_size().to_i32()
-            },
-            TextureSource::Dummy => DeviceIntSize::new(1, 1),
-        }
-    }
-
     fn report_memory(&self) -> MemoryReport {
         let mut report = MemoryReport::default();
 
@@ -600,6 +1365,9 @@ impl TextureResolver {
         for t in self.texture_cache_map.values() {
             report.texture_cache_textures += t.size_in_bytes();
         }
+        for t in self.render_target_pool.iter() {
+            report.render_target_textures += t.size_in_bytes();
+        }
 
         report
     }
@@ -617,39 +1385,533 @@ pub enum BlendMode {
     SubpixelConstantTextColor(ColorF),
     SubpixelWithBgColor,
     Advanced(MixBlendMode),
-    MultiplyDualSource,
-    Screen,
-    Exclusion,
 }
 
-impl BlendMode {
-    /// Decides when a given mix-blend-mode can be implemented in terms of
-    /// simple blending, dual-source blending, advanced blending, or not at
-    /// all based on available capabilities.
-    pub fn from_mix_blend_mode(
-        mode: MixBlendMode,
-        advanced_blend: bool,
-        coherent: bool,
-        dual_source: bool,
-    ) -> Option<BlendMode> {
-        // If we emulate a mix-blend-mode via simple or dual-source blending,
-        // care must be taken to output alpha As + Ad*(1-As) regardless of what
-        // the RGB output is to comply with the mix-blend-mode spec.
-        Some(match mode {
-            // If we have coherent advanced blend, just use that.
-            _ if advanced_blend && coherent => BlendMode::Advanced(mode),
-            // Screen can be implemented as Cs + Cd - Cs*Cd => Cs + Cd*(1-Cs)
-            MixBlendMode::Screen => BlendMode::Screen,
-            // Exclusion can be implemented as Cs + Cd - 2*Cs*Cd => Cs*(1-Cd) + Cd*(1-Cs)
-            MixBlendMode::Exclusion => BlendMode::Exclusion,
-            // Multiply can be implemented as Cs*Cd + Cs*(1-Ad) + Cd*(1-As) => Cs*(1-Ad) + Cd*(1 - SRC1=(As-Cs))
-            MixBlendMode::Multiply if dual_source => BlendMode::MultiplyDualSource,
-            // Otherwise, use advanced blend without coherency if available.
-            _ if advanced_blend => BlendMode::Advanced(mode),
-            // If advanced blend is not available, then we have to use brush_mix_blend.
-            _ => return None,
+/// Tracks the state of each row in the GPU cache texture.
+struct CacheRow {
+    /// Mirrored block data on CPU for this row. We store a copy of
+    /// the data on the CPU side to improve upload batching.
+    cpu_blocks: Box<[GpuBlockData; MAX_VERTEX_TEXTURE_WIDTH]>,
+    /// The first offset in this row that is dirty.
+    min_dirty: u16,
+    /// The last offset in this row that is dirty.
+    max_dirty: u16,
+}
+
+impl CacheRow {
+    fn new() -> Self {
+        CacheRow {
+            cpu_blocks: Box::new([GpuBlockData::EMPTY; MAX_VERTEX_TEXTURE_WIDTH]),
+            min_dirty: MAX_VERTEX_TEXTURE_WIDTH as _,
+            max_dirty: 0,
+        }
+    }
+
+    fn is_dirty(&self) -> bool {
+        return self.min_dirty < self.max_dirty;
+    }
+
+    fn clear_dirty(&mut self) {
+        self.min_dirty = MAX_VERTEX_TEXTURE_WIDTH as _;
+        self.max_dirty = 0;
+    }
+
+    fn add_dirty(&mut self, block_offset: usize, block_count: usize) {
+        self.min_dirty = self.min_dirty.min(block_offset as _);
+        self.max_dirty = self.max_dirty.max((block_offset + block_count) as _);
+    }
+
+    fn dirty_blocks(&self) -> &[GpuBlockData] {
+        return &self.cpu_blocks[self.min_dirty as usize .. self.max_dirty as usize];
+    }
+}
+
+/// The bus over which CPU and GPU versions of the GPU cache
+/// get synchronized.
+enum GpuCacheBus {
+    /// PBO-based updates, currently operate on a row granularity.
+    /// Therefore, are subject to fragmentation issues.
+    PixelBuffer {
+        /// PBO used for transfers.
+        buffer: PBO,
+        /// Per-row data.
+        rows: Vec<CacheRow>,
+    },
+    /// Shader-based scattering updates. Currently rendered by a set
+    /// of points into the GPU texture, each carrying a `GpuBlockData`.
+    Scatter {
+        /// Special program to run the scattered update.
+        program: Program,
+        /// VAO containing the source vertex buffers.
+        vao: CustomVAO,
+        /// VBO for positional data, supplied as normalized `u16`.
+        buf_position: VBO<[u16; 2]>,
+        /// VBO for gpu block data.
+        buf_value: VBO<GpuBlockData>,
+        /// Currently stored block count.
+        count: usize,
+    },
+}
+
+/// The device-specific representation of the cache texture in gpu_cache.rs
+struct GpuCacheTexture {
+    texture: Option<Texture>,
+    bus: GpuCacheBus,
+}
+
+impl GpuCacheTexture {
+
+    /// Ensures that we have an appropriately-sized texture. Returns true if a
+    /// new texture was created.
+    fn ensure_texture(&mut self, device: &mut Device, height: i32) {
+        // If we already have a texture that works, we're done.
+        if self.texture.as_ref().map_or(false, |t| t.get_dimensions().height >= height) {
+            if GPU_CACHE_RESIZE_TEST {
+                // Special debug mode - resize the texture even though it's fine.
+            } else {
+                return;
+            }
+        }
+
+        // Take the old texture, if any.
+        let blit_source = self.texture.take();
+
+        // Create the new texture.
+        assert!(height >= 2, "Height is too small for ANGLE");
+        let new_size = DeviceIntSize::new(MAX_VERTEX_TEXTURE_WIDTH as _, height);
+        // If glCopyImageSubData is supported, this texture doesn't need
+        // to be a render target. This prevents GL errors due to framebuffer
+        // incompleteness on devices that don't support RGBAF32 render targets.
+        // TODO(gw): We still need a proper solution for the subset of devices
+        //           that don't support glCopyImageSubData *OR* rendering to a
+        //           RGBAF32 render target. These devices will currently fail
+        //           to resize the GPU cache texture.
+        let supports_copy_image_sub_data = device.get_capabilities().supports_copy_image_sub_data;
+        let rt_info =  if supports_copy_image_sub_data {
+            None
+        } else {
+            Some(RenderTargetInfo { has_depth: false })
+        };
+        let mut texture = device.create_texture(
+            TextureTarget::Default,
+            ImageFormat::RGBAF32,
+            new_size.width,
+            new_size.height,
+            TextureFilter::Nearest,
+            rt_info,
+            1,
+        );
+
+        // Blit the contents of the previous texture, if applicable.
+        if let Some(blit_source) = blit_source {
+            device.blit_renderable_texture(&mut texture, &blit_source);
+            device.delete_texture(blit_source);
+        }
+
+        self.texture = Some(texture);
+    }
+
+    fn new(device: &mut Device, use_scatter: bool) -> Result<Self, RendererError> {
+        let bus = if use_scatter {
+            let program = device.create_program_linked(
+                "gpu_cache_update",
+                &[],
+                &desc::GPU_CACHE_UPDATE,
+            )?;
+            let buf_position = device.create_vbo();
+            let buf_value = device.create_vbo();
+            //Note: the vertex attributes have to be supplied in the same order
+            // as for program creation, but each assigned to a different stream.
+            let vao = device.create_custom_vao(&[
+                buf_position.stream_with(&desc::GPU_CACHE_UPDATE.vertex_attributes[0..1]),
+                buf_value   .stream_with(&desc::GPU_CACHE_UPDATE.vertex_attributes[1..2]),
+            ]);
+            GpuCacheBus::Scatter {
+                program,
+                vao,
+                buf_position,
+                buf_value,
+                count: 0,
+            }
+        } else {
+            let buffer = device.create_pbo();
+            GpuCacheBus::PixelBuffer {
+                buffer,
+                rows: Vec::new(),
+            }
+        };
+
+        Ok(GpuCacheTexture {
+            texture: None,
+            bus,
         })
     }
+
+    fn deinit(mut self, device: &mut Device) {
+        if let Some(t) = self.texture.take() {
+            device.delete_texture(t);
+        }
+        match self.bus {
+            GpuCacheBus::PixelBuffer { buffer, ..} => {
+                device.delete_pbo(buffer);
+            }
+            GpuCacheBus::Scatter { program, vao, buf_position, buf_value, ..} => {
+                device.delete_program(program);
+                device.delete_custom_vao(vao);
+                device.delete_vbo(buf_position);
+                device.delete_vbo(buf_value);
+            }
+        }
+    }
+
+    fn get_height(&self) -> i32 {
+        self.texture.as_ref().map_or(0, |t| t.get_dimensions().height)
+    }
+
+    fn prepare_for_updates(
+        &mut self,
+        device: &mut Device,
+        total_block_count: usize,
+        max_height: i32,
+    ) {
+        self.ensure_texture(device, max_height);
+        match self.bus {
+            GpuCacheBus::PixelBuffer { .. } => {},
+            GpuCacheBus::Scatter {
+                ref mut buf_position,
+                ref mut buf_value,
+                ref mut count,
+                ..
+            } => {
+                *count = 0;
+                if total_block_count > buf_value.allocated_count() {
+                    device.allocate_vbo(buf_position, total_block_count, VertexUsageHint::Stream);
+                    device.allocate_vbo(buf_value,    total_block_count, VertexUsageHint::Stream);
+                }
+            }
+        }
+    }
+
+    fn update(&mut self, device: &mut Device, updates: &GpuCacheUpdateList) {
+        match self.bus {
+            GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
+                for update in &updates.updates {
+                    match *update {
+                        GpuCacheUpdate::Copy {
+                            block_index,
+                            block_count,
+                            address,
+                        } => {
+                            let row = address.v as usize;
+
+                            // Ensure that the CPU-side shadow copy of the GPU cache data has enough
+                            // rows to apply this patch.
+                            while rows.len() <= row {
+                                // Add a new row.
+                                rows.push(CacheRow::new());
+                            }
+
+                            // Copy the blocks from the patch array in the shadow CPU copy.
+                            let block_offset = address.u as usize;
+                            let data = &mut rows[row].cpu_blocks;
+                            for i in 0 .. block_count {
+                                data[block_offset + i] = updates.blocks[block_index + i];
+                            }
+
+                            // This row is dirty (needs to be updated in GPU texture).
+                            rows[row].add_dirty(block_offset, block_count);
+                        }
+                    }
+                }
+            }
+            GpuCacheBus::Scatter {
+                ref buf_position,
+                ref buf_value,
+                ref mut count,
+                ..
+            } => {
+                //TODO: re-use this heap allocation
+                // Unused positions will be left as 0xFFFF, which translates to
+                // (1.0, 1.0) in the vertex output position and gets culled out
+                let mut position_data = vec![[!0u16; 2]; updates.blocks.len()];
+                let size = self.texture.as_ref().unwrap().get_dimensions().to_usize();
+
+                for update in &updates.updates {
+                    match *update {
+                        GpuCacheUpdate::Copy {
+                            block_index,
+                            block_count,
+                            address,
+                        } => {
+                            // Convert the absolute texel position into normalized
+                            let y = ((2*address.v as usize + 1) << 15) / size.height;
+                            for i in 0 .. block_count {
+                                let x = ((2*address.u as usize + 2*i + 1) << 15) / size.width;
+                                position_data[block_index + i] = [x as _, y as _];
+                            }
+                        }
+                    }
+                }
+
+                device.fill_vbo(buf_value, &updates.blocks, *count);
+                device.fill_vbo(buf_position, &position_data, *count);
+                *count += position_data.len();
+            }
+        }
+    }
+
+    fn flush(&mut self, device: &mut Device) -> usize {
+        let texture = self.texture.as_ref().unwrap();
+        match self.bus {
+            GpuCacheBus::PixelBuffer { ref buffer, ref mut rows } => {
+                let rows_dirty = rows
+                    .iter()
+                    .filter(|row| row.is_dirty())
+                    .count();
+                if rows_dirty == 0 {
+                    return 0
+                }
+
+                let (upload_size, _) = device.required_upload_size_and_stride(
+                    DeviceIntSize::new(MAX_VERTEX_TEXTURE_WIDTH as i32, 1),
+                    texture.get_format(),
+                );
+
+                let mut uploader = device.upload_texture(
+                    texture,
+                    buffer,
+                    rows_dirty * upload_size,
+                );
+
+                for (row_index, row) in rows.iter_mut().enumerate() {
+                    if !row.is_dirty() {
+                        continue;
+                    }
+
+                    let blocks = row.dirty_blocks();
+                    let rect = DeviceIntRect::new(
+                        DeviceIntPoint::new(row.min_dirty as i32, row_index as i32),
+                        DeviceIntSize::new(blocks.len() as i32, 1),
+                    );
+
+                    uploader.upload(rect, 0, None, None, blocks.as_ptr(), blocks.len());
+
+                    row.clear_dirty();
+                }
+
+                rows_dirty
+            }
+            GpuCacheBus::Scatter { ref program, ref vao, count, .. } => {
+                device.disable_depth();
+                device.set_blend(false);
+                device.bind_program(program);
+                device.bind_custom_vao(vao);
+                device.bind_draw_target(
+                    DrawTarget::from_texture(
+                        texture,
+                        0,
+                        false,
+                    ),
+                );
+                device.draw_nonindexed_points(0, count as _);
+                0
+            }
+        }
+    }
+}
+
+struct VertexDataTexture<T> {
+    texture: Option<Texture>,
+    format: ImageFormat,
+    pbo: PBO,
+    _marker: PhantomData<T>,
+}
+
+impl<T> VertexDataTexture<T> {
+    fn new(
+        device: &mut Device,
+        format: ImageFormat,
+    ) -> Self {
+        VertexDataTexture {
+            texture: None,
+            format,
+            pbo: device.create_pbo(),
+            _marker: PhantomData,
+        }
+    }
+
+    /// Returns a borrow of the GPU texture. Panics if it hasn't been initialized.
+    fn texture(&self) -> &Texture {
+        self.texture.as_ref().unwrap()
+    }
+
+    /// Returns an estimate of the GPU memory consumed by this VertexDataTexture.
+    fn size_in_bytes(&self) -> usize {
+        self.texture.as_ref().map_or(0, |t| t.size_in_bytes())
+    }
+
+    fn update(&mut self, device: &mut Device, data: &mut Vec<T>) {
+        debug_assert!(mem::size_of::<T>() % 16 == 0);
+        let texels_per_item = mem::size_of::<T>() / 16;
+        let items_per_row = MAX_VERTEX_TEXTURE_WIDTH / texels_per_item;
+        debug_assert_ne!(items_per_row, 0);
+
+        // Ensure we always end up with a texture when leaving this method.
+        let mut len = data.len();
+        if len == 0 {
+            if self.texture.is_some() {
+                return;
+            }
+            data.reserve(items_per_row);
+            len = items_per_row;
+        } else {
+            // Extend the data array to have enough capacity to upload at least
+            // a multiple of the row size.  This ensures memory safety when the
+            // array is passed to OpenGL to upload to the GPU.
+            let extra = len % items_per_row;
+            if extra != 0 {
+                let padding = items_per_row - extra;
+                data.reserve(padding);
+                len += padding;
+            }
+        }
+
+        let needed_height = (len / items_per_row) as i32;
+        let existing_height = self.texture.as_ref().map_or(0, |t| t.get_dimensions().height);
+
+        // Create a new texture if needed.
+        //
+        // These textures are generally very small, which is why we don't bother
+        // with incremental updates and just re-upload every frame. For most pages
+        // they're one row each, and on stress tests like css-francine they end up
+        // in the 6-14 range. So we size the texture tightly to what we need (usually
+        // 1), and shrink it if the waste would be more than `VERTEX_TEXTURE_EXTRA_ROWS`
+        // rows. This helps with memory overhead, especially because there are several
+        // instances of these textures per Renderer.
+        if needed_height > existing_height || needed_height + VERTEX_TEXTURE_EXTRA_ROWS < existing_height {
+            // Drop the existing texture, if any.
+            if let Some(t) = self.texture.take() {
+                device.delete_texture(t);
+            }
+
+            let texture = device.create_texture(
+                TextureTarget::Default,
+                self.format,
+                MAX_VERTEX_TEXTURE_WIDTH as i32,
+                // Ensure height is at least two to work around
+                // https://bugs.chromium.org/p/angleproject/issues/detail?id=3039
+                needed_height.max(2),
+                TextureFilter::Nearest,
+                None,
+                1,
+            );
+            self.texture = Some(texture);
+        }
+
+        // Note: the actual width can be larger than the logical one, with a few texels
+        // of each row unused at the tail. This is needed because there is still hardware
+        // (like Intel iGPUs) that prefers power-of-two sizes of textures ([1]).
+        //
+        // [1] https://software.intel.com/en-us/articles/opengl-performance-tips-power-of-two-textures-have-better-performance
+        let logical_width = if needed_height == 1 {
+            data.len() * texels_per_item
+        } else {
+            MAX_VERTEX_TEXTURE_WIDTH - (MAX_VERTEX_TEXTURE_WIDTH % texels_per_item)
+        }; 
+
+        let rect = DeviceIntRect::new(
+            DeviceIntPoint::zero(),
+            DeviceIntSize::new(logical_width as i32, needed_height),
+        );
+
+        debug_assert!(len <= data.capacity(), "CPU copy will read out of bounds");
+        let (upload_size, _) = device.required_upload_size_and_stride(
+            rect.size,
+            self.texture().get_format(),
+        );
+        if upload_size > 0 {
+            device
+                .upload_texture(self.texture(), &self.pbo, upload_size)
+                .upload(rect, 0, None, None, data.as_ptr(), len);
+        }
+    }
+
+    fn deinit(mut self, device: &mut Device) {
+        device.delete_pbo(self.pbo);
+        if let Some(t) = self.texture.take() {
+            device.delete_texture(t);
+        }
+    }
+}
+
+struct FrameOutput {
+    last_access: GpuFrameId,
+    fbo_id: FBOId,
+}
+
+#[derive(PartialEq)]
+struct TargetSelector {
+    size: DeviceIntSize,
+    num_layers: usize,
+    format: ImageFormat,
+}
+
+struct LazyInitializedDebugRenderer {
+    debug_renderer: Option<DebugRenderer>,
+    failed: bool,
+}
+
+impl LazyInitializedDebugRenderer {
+    pub fn new() -> Self {
+        Self {
+            debug_renderer: None,
+            failed: false,
+        }
+    }
+
+    pub fn get_mut<'a>(&'a mut self, device: &mut Device) -> Option<&'a mut DebugRenderer> {
+        if self.failed {
+            return None;
+        }
+        if self.debug_renderer.is_none() {
+            match DebugRenderer::new(device) {
+                Ok(renderer) => { self.debug_renderer = Some(renderer); }
+                Err(_) => {
+                    // The shader compilation code already logs errors.
+                    self.failed = true;
+                }
+            }
+        }
+
+        self.debug_renderer.as_mut()
+    }
+
+    /// Returns mut ref to `DebugRenderer` if one already exists, otherwise returns `None`.
+    pub fn try_get_mut<'a>(&'a mut self) -> Option<&'a mut DebugRenderer> {
+        self.debug_renderer.as_mut()
+    }
+
+    pub fn deinit(self, device: &mut Device) {
+        if let Some(debug_renderer) = self.debug_renderer {
+            debug_renderer.deinit(device);
+        }
+    }
+}
+
+// NB: If you add more VAOs here, be sure to deinitialize them in
+// `Renderer::deinit()` below.
+pub struct RendererVAOs {
+    prim_vao: VAO,
+    blur_vao: VAO,
+    clip_vao: VAO,
+    border_vao: VAO,
+    line_vao: VAO,
+    scale_vao: VAO,
+    gradient_vao: VAO,
+    resolve_vao: VAO,
+    svg_filter_vao: VAO,
+    composite_vao: VAO,
+    clear_vao: VAO,
 }
 
 /// Information about the state of the debugging / profiler overlay in native compositing mode.
@@ -671,50 +1933,87 @@ impl DebugOverlayState {
     }
 }
 
-/// Tracks buffer damage rects over a series of frames.
-#[derive(Debug, Default)]
-struct BufferDamageTracker {
-    damage_rects: [DeviceRect; 2],
-    current_offset: usize,
+pub struct VertexDataTextures {
+    prim_header_f_texture: VertexDataTexture<PrimitiveHeaderF>,
+    prim_header_i_texture: VertexDataTexture<PrimitiveHeaderI>,
+    transforms_texture: VertexDataTexture<TransformData>,
+    render_task_texture: VertexDataTexture<RenderTaskData>,
 }
 
-impl BufferDamageTracker {
-    /// Sets the damage rect for the current frame. Should only be called *after*
-    /// get_damage_rect() has been called to get the current backbuffer's damage rect.
-    fn push_dirty_rect(&mut self, rect: &DeviceRect) {
-        self.damage_rects[self.current_offset] = rect.clone();
-        self.current_offset = match self.current_offset {
-            0 => self.damage_rects.len() - 1,
-            n => n - 1,
-        }
-    }
-
-    /// Gets the damage rect for the current backbuffer, given the backbuffer's age.
-    /// (The number of frames since it was previously the backbuffer.)
-    /// Returns an empty rect if the buffer is valid, and None if the entire buffer is invalid.
-    fn get_damage_rect(&self, buffer_age: usize) -> Option<DeviceRect> {
-        match buffer_age {
-            // 0 means this is a new buffer, so is completely invalid.
-            0 => None,
-            // 1 means this backbuffer was also the previous frame's backbuffer
-            // (so must have been copied to the frontbuffer). It is therefore entirely valid.
-            1 => Some(DeviceRect::zero()),
-            // We must calculate the union of the damage rects since this buffer was previously
-            // the backbuffer.
-            n if n <= self.damage_rects.len() + 1 => {
-                Some(
-                    self.damage_rects.iter()
-                        .cycle()
-                        .skip(self.current_offset + 1)
-                        .take(n - 1)
-                        .fold(DeviceRect::zero(), |acc, r| acc.union(r))
-                )
-            }
-            // The backbuffer is older than the number of frames for which we track,
-            // so we treat it as entirely invalid.
-            _ => None,
+impl VertexDataTextures {
+    fn new(
+        device: &mut Device,
+    ) -> Self {
+        VertexDataTextures {
+            prim_header_f_texture: VertexDataTexture::new(device, ImageFormat::RGBAF32),
+            prim_header_i_texture: VertexDataTexture::new(device, ImageFormat::RGBAI32),
+            transforms_texture: VertexDataTexture::new(device, ImageFormat::RGBAF32),
+            render_task_texture: VertexDataTexture::new(device, ImageFormat::RGBAF32),
         }
     }
+
+    fn update(
+        &mut self,
+        device: &mut Device,
+        frame: &mut Frame,
+    ) {
+        self.prim_header_f_texture.update(
+            device,
+            &mut frame.prim_headers.headers_float,
+        );
+        device.bind_texture(
+            TextureSampler::PrimitiveHeadersF,
+            &self.prim_header_f_texture.texture(),
+            Swizzle::default(),
+        );
+
+        self.prim_header_i_texture.update(
+            device,
+            &mut frame.prim_headers.headers_int,
+        );
+        device.bind_texture(
+            TextureSampler::PrimitiveHeadersI,
+            &self.prim_header_i_texture.texture(),
+            Swizzle::default(),
+        );
+
+        self.transforms_texture.update(
+            device,
+            &mut frame.transform_palette,
+        );
+        device.bind_texture(
+            TextureSampler::TransformPalette,
+            &self.transforms_texture.texture(),
+            Swizzle::default(),
+        );
+
+        self.render_task_texture.update(
+            device,
+            &mut frame.render_tasks.task_data,
+        );
+        device.bind_texture(
+            TextureSampler::RenderTasks,
+            &self.render_task_texture.texture(),
+            Swizzle::default(),
+        );
+    }
+
+    fn size_in_bytes(&self) -> usize {
+        self.prim_header_f_texture.size_in_bytes() +
+        self.prim_header_i_texture.size_in_bytes() +
+        self.transforms_texture.size_in_bytes() +
+        self.render_task_texture.size_in_bytes()
+    }
+
+    fn deinit(
+        self,
+        device: &mut Device,
+    ) {
+        self.transforms_texture.deinit(device);
+        self.prim_header_f_texture.deinit(device);
+        self.prim_header_i_texture.deinit(device);
+        self.render_task_texture.deinit(device);
+    }
 }
 
 /// The renderer is responsible for submitting to the GPU the work prepared by the
@@ -724,6 +2023,7 @@ impl BufferDamageTracker {
 /// one per OS window), and all instances share the same thread.
 pub struct Renderer {
     result_rx: Receiver<ResultMsg>,
+    debug_server: Box<dyn DebugServer>,
     pub device: Device,
     pending_texture_updates: Vec<TextureUpdateList>,
     /// True if there are any TextureCacheUpdate pending.
@@ -732,7 +2032,7 @@ pub struct Renderer {
     pending_gpu_cache_updates: Vec<GpuCacheUpdateList>,
     pending_gpu_cache_clear: bool,
     pending_shader_updates: Vec<PathBuf>,
-    active_documents: FastHashMap<DocumentId, RenderedDocument>,
+    active_documents: Vec<(DocumentId, RenderedDocument)>,
 
     shaders: Rc<RefCell<Shaders>>,
 
@@ -742,23 +2042,26 @@ pub struct Renderer {
     enable_clear_scissor: bool,
     enable_advanced_blend_barriers: bool,
     clear_caches_with_quads: bool,
-    clear_alpha_targets_with_quads: bool,
 
-    debug: debug::LazyInitializedDebugRenderer,
+    debug: LazyInitializedDebugRenderer,
     debug_flags: DebugFlags,
-    profile: TransactionProfile,
-    frame_counter: u64,
-    resource_upload_time: f64,
-    gpu_cache_upload_time: f64,
+    backend_profile_counters: BackendProfileCounters,
+    profile_counters: RendererProfileCounters,
+    resource_upload_time: u64,
+    gpu_cache_upload_time: u64,
     profiler: Profiler,
+    new_frame_indicator: ChangeIndicator,
+    new_scene_indicator: ChangeIndicator,
+    slow_frame_indicator: ChangeIndicator,
+    slow_txn_indicator: ChangeIndicator,
 
     last_time: u64,
 
-    pub gpu_profiler: GpuProfiler,
-    vaos: vertex::RendererVAOs,
+    pub gpu_profile: GpuProfiler<GpuProfileTag>,
+    vaos: RendererVAOs,
 
-    gpu_cache_texture: gpu_cache::GpuCacheTexture,
-    vertex_data_textures: Vec<vertex::VertexDataTextures>,
+    gpu_cache_texture: GpuCacheTexture,
+    vertex_data_textures: Vec<VertexDataTextures>,
     current_vertex_data_textures: usize,
 
     /// When the GPU cache debugger is enabled, we keep track of the live blocks
@@ -774,8 +2077,8 @@ pub struct Renderer {
     // Manages and resolves source textures IDs to real texture IDs.
     texture_resolver: TextureResolver,
 
-    texture_upload_pbo_pool: UploadPBOPool,
-    staging_texture_pool: UploadTexturePool,
+    // A PBO used to do asynchronous texture cache uploads.
+    texture_cache_upload_pbo: PBO,
 
     dither_matrix_texture: Option<Texture>,
 
@@ -783,10 +2086,18 @@ pub struct Renderer {
     /// application to provide external buffers for image data.
     external_image_handler: Option<Box<dyn ExternalImageHandler>>,
 
+    /// Optional trait object that allows the client
+    /// application to provide a texture handle to
+    /// copy the WR output to.
+    output_image_handler: Option<Box<dyn OutputImageHandler>>,
+
     /// Optional function pointers for measuring memory used by a given
     /// heap-allocated pointer.
     size_of_ops: Option<MallocSizeOfOps>,
 
+    // Currently allocated FBOs for output frames.
+    output_targets: FastHashMap<u32, FrameOutput>,
+
     pub renderer_errors: Vec<RendererError>,
 
     pub(in crate) async_frame_recorder: Option<AsyncScreenshotGrabber>,
@@ -839,13 +2150,10 @@ pub struct Renderer {
     /// State related to the debug / profiling overlays
     debug_overlay_state: DebugOverlayState,
 
-    /// Tracks the dirty rectangles from previous frames. Used on platforms
-    /// that require keeping the front buffer fully correct when doing
+    /// The dirty rectangle from the previous frame, used on platforms that
+    /// require keeping the front buffer fully correct when doing
     /// partial present (e.g. unix desktop with EGL_EXT_buffer_age).
-    buffer_damage_tracker: BufferDamageTracker,
-
-    max_primitive_instance_count: usize,
-    enable_instancing: bool,
+    prev_dirty_rect: DeviceRect,
 }
 
 #[derive(Debug)]
@@ -896,7 +2204,8 @@ impl Renderer {
         gl: Rc<dyn gl::Gl>,
         notifier: Box<dyn RenderNotifier>,
         mut options: RendererOptions,
-        shaders: Option<&SharedShaders>,
+        shaders: Option<&mut WrShaders>,
+        start_size: DeviceIntSize,
     ) -> Result<(Self, RenderApiSender), RendererError> {
         if !wr_has_been_initialized() {
             // If the profiler feature is enabled, try to load the profiler shared library
@@ -914,17 +2223,19 @@ impl Renderer {
 
         HAS_BEEN_INITIALIZED.store(true, Ordering::SeqCst);
 
-        let (api_tx, api_rx) = unbounded_channel();
-        let (result_tx, result_rx) = unbounded_channel();
+        let (api_tx, api_rx) = channel();
+        let (result_tx, result_rx) = channel();
         let gl_type = gl.get_type();
 
+        let debug_server = new_debug_server(options.start_debug_server, api_tx.clone());
+
         let mut device = Device::new(
             gl,
-            options.crash_annotator.clone(),
             options.resource_override_path.clone(),
             options.use_optimized_shaders,
             options.upload_method.clone(),
             options.cached_programs.take(),
+            options.allow_pixel_local_storage_support,
             options.allow_texture_storage_support,
             options.allow_texture_swizzling,
             options.dump_shader_source.take(),
@@ -936,40 +2247,43 @@ impl Renderer {
         let swizzle_settings = device.swizzle_settings();
         let use_dual_source_blending =
             device.get_capabilities().supports_dual_source_blending &&
-            options.allow_dual_source_blending;
+            options.allow_dual_source_blending &&
+            // If using pixel local storage, subpixel AA isn't supported (we disable it on all
+            // mobile devices explicitly anyway).
+            !device.get_capabilities().supports_pixel_local_storage;
         let ext_blend_equation_advanced =
             options.allow_advanced_blend_equation &&
             device.get_capabilities().supports_advanced_blend_equation;
         let ext_blend_equation_advanced_coherent =
             device.supports_extension("GL_KHR_blend_equation_advanced_coherent");
 
-        // 2048 is the minimum that the texture cache can work with.
-        const MIN_TEXTURE_SIZE: i32 = 2048;
-        let mut max_internal_texture_size = device.max_texture_size();
-        if max_internal_texture_size < MIN_TEXTURE_SIZE {
+        // 512 is the minimum that the texture cache can work with.
+        const MIN_TEXTURE_SIZE: i32 = 512;
+        if let Some(user_limit) = options.max_texture_size {
+            assert!(user_limit >= MIN_TEXTURE_SIZE);
+            device.clamp_max_texture_size(user_limit);
+        }
+        if device.max_texture_size() < MIN_TEXTURE_SIZE {
             // Broken GL contexts can return a max texture size of zero (See #1260).
             // Better to gracefully fail now than panic as soon as a texture is allocated.
             error!(
                 "Device reporting insufficient max texture size ({})",
-                max_internal_texture_size
+                device.max_texture_size()
             );
             return Err(RendererError::MaxTextureSize);
         }
-        if let Some(internal_limit) = options.max_internal_texture_size {
-            assert!(internal_limit >= MIN_TEXTURE_SIZE);
-            max_internal_texture_size = max_internal_texture_size.min(internal_limit);
-        }
-
-        let image_tiling_threshold = options.image_tiling_threshold
-            .min(max_internal_texture_size);
+        let max_texture_size = device.max_texture_size();
+        let max_texture_layers = device.max_texture_layers();
 
         device.begin_frame();
 
         let shaders = match shaders {
-            Some(shaders) => Rc::clone(shaders),
+            Some(shaders) => Rc::clone(&shaders.shaders),
             None => Rc::new(RefCell::new(Shaders::new(&mut device, gl_type, &options)?)),
         };
 
+        let backend_profile_counters = BackendProfileCounters::new();
+
         let dither_matrix_texture = if options.enable_dithering {
             let dither_matrix: [u8; 64] = [
                 0,
@@ -1039,12 +2353,13 @@ impl Renderer {
             ];
 
             let texture = device.create_texture(
-                ImageBufferKind::Texture2D,
+                TextureTarget::Default,
                 ImageFormat::R8,
                 8,
                 8,
                 TextureFilter::Nearest,
                 None,
+                1,
             );
             device.upload_texture_immediate(&texture, &dither_matrix);
 
@@ -1053,20 +2368,41 @@ impl Renderer {
             None
         };
 
-        let max_primitive_instance_count =
-            RendererOptions::MAX_INSTANCE_BUFFER_SIZE / mem::size_of::<PrimitiveInstanceData>();
-        let vaos = vertex::RendererVAOs::new(
-            &mut device,
-            if options.enable_instancing { None } else { NonZeroUsize::new(max_primitive_instance_count) },
-        );
+        let x0 = 0.0;
+        let y0 = 0.0;
+        let x1 = 1.0;
+        let y1 = 1.0;
+
+        let quad_indices: [u16; 6] = [0, 1, 2, 2, 1, 3];
+        let quad_vertices = [
+            PackedVertex { pos: [x0, y0] },
+            PackedVertex { pos: [x1, y0] },
+            PackedVertex { pos: [x0, y1] },
+            PackedVertex { pos: [x1, y1] },
+        ];
+
+        let prim_vao = device.create_vao(&desc::PRIM_INSTANCES);
+        device.bind_vao(&prim_vao);
+        device.update_vao_indices(&prim_vao, &quad_indices, VertexUsageHint::Static);
+        device.update_vao_main_vertices(&prim_vao, &quad_vertices, VertexUsageHint::Static);
+
+        let blur_vao = device.create_vao_with_new_instances(&desc::BLUR, &prim_vao);
+        let clip_vao = device.create_vao_with_new_instances(&desc::CLIP, &prim_vao);
+        let border_vao = device.create_vao_with_new_instances(&desc::BORDER, &prim_vao);
+        let scale_vao = device.create_vao_with_new_instances(&desc::SCALE, &prim_vao);
+        let line_vao = device.create_vao_with_new_instances(&desc::LINE, &prim_vao);
+        let gradient_vao = device.create_vao_with_new_instances(&desc::GRADIENT, &prim_vao);
+        let resolve_vao = device.create_vao_with_new_instances(&desc::RESOLVE, &prim_vao);
+        let svg_filter_vao = device.create_vao_with_new_instances(&desc::SVG_FILTER, &prim_vao);
+        let composite_vao = device.create_vao_with_new_instances(&desc::COMPOSITE, &prim_vao);
+        let clear_vao = device.create_vao_with_new_instances(&desc::CLEAR, &prim_vao);
+        let texture_cache_upload_pbo = device.create_pbo();
 
-        let texture_upload_pbo_pool = UploadPBOPool::new(&mut device, options.upload_pbo_default_size);
-        let staging_texture_pool = UploadTexturePool::new();
         let texture_resolver = TextureResolver::new(&mut device);
 
         let mut vertex_data_textures = Vec::new();
         for _ in 0 .. VERTEX_DATA_TEXTURE_COUNT {
-            vertex_data_textures.push(vertex::VertexDataTextures::new());
+            vertex_data_textures.push(VertexDataTextures::new(&mut device));
         }
 
         // On some (mostly older, integrated) GPUs, the normal GPU texture cache update path
@@ -1077,23 +2413,17 @@ impl Renderer {
         // We want a better solution long-term, but for now this is a significant performance
         // improvement on HD4600 era GPUs, and shouldn't hurt performance in a noticeable
         // way on other systems running under ANGLE.
-        let is_software = device.get_capabilities().renderer_name.starts_with("Software");
+        let is_angle = device.get_capabilities().renderer_name.contains("ANGLE");
 
-        // On other GL platforms, like macOS or Android, creating many PBOs is very inefficient.
-        // This is what happens in GPU cache updates in PBO path. Instead, we switch everything
-        // except software GL to use the GPU scattered updates.
-        let supports_scatter = device.get_capabilities().supports_color_buffer_float;
-        let gpu_cache_texture = gpu_cache::GpuCacheTexture::new(
+        let gpu_cache_texture = GpuCacheTexture::new(
             &mut device,
-            supports_scatter && !is_software,
+            is_angle,
         )?;
 
         device.end_frame();
 
         let backend_notifier = notifier.clone();
 
-        let clear_alpha_targets_with_quads = !device.get_capabilities().supports_alpha_target_clears;
-
         let prefer_subpixel_aa = options.force_subpixel_aa || (options.enable_subpixel_aa && use_dual_source_blending);
         let default_font_render_mode = match (options.enable_aa, prefer_subpixel_aa) {
             (true, true) => FontRenderMode::Subpixel,
@@ -1102,7 +2432,7 @@ impl Renderer {
         };
 
         let compositor_kind = match options.compositor_config {
-            CompositorConfig::Draw { max_partial_present_rects, draw_previous_partial_present_regions, .. } => {
+            CompositorConfig::Draw { max_partial_present_rects, draw_previous_partial_present_regions } => {
                 CompositorKind::Draw { max_partial_present_rects, draw_previous_partial_present_regions }
             }
             CompositorConfig::Native { ref compositor, max_update_rects, .. } => {
@@ -1110,7 +2440,7 @@ impl Renderer {
 
                 CompositorKind::Native {
                     max_update_rects,
-                    capabilities,
+                    virtual_surface_size: capabilities.virtual_surface_size,
                 }
             }
         };
@@ -1120,20 +2450,17 @@ impl Renderer {
             dual_source_blending_is_enabled: true,
             dual_source_blending_is_supported: use_dual_source_blending,
             chase_primitive: options.chase_primitive,
+            global_enable_picture_caching: options.enable_picture_caching,
             testing: options.testing,
             gpu_supports_fast_clears: options.gpu_supports_fast_clears,
             gpu_supports_advanced_blend: ext_blend_equation_advanced,
             advanced_blend_is_coherent: ext_blend_equation_advanced_coherent,
-            gpu_supports_render_target_partial_update: device.get_capabilities().supports_render_target_partial_update,
-            external_images_require_copy: !device.get_capabilities().supports_image_external_essl3,
-            batch_lookback_count: RendererOptions::BATCH_LOOKBACK_COUNT,
+            batch_lookback_count: options.batch_lookback_count,
             background_color: options.clear_color,
             compositor_kind,
             tile_size_override: None,
             max_depth_ids: device.max_depth_ids(),
-            max_target_size: max_internal_texture_size,
-            force_invalidation: false,
-            is_software,
+            max_target_size: max_texture_size,
         };
         info!("WR {:?}", config);
 
@@ -1143,6 +2470,9 @@ impl Renderer {
         let enclosing_size_of_op = options.enclosing_size_of_op;
         let make_size_of_ops =
             move || size_of_op.map(|o| MallocSizeOfOps::new(o, enclosing_size_of_op));
+        let thread_listener = Arc::new(options.thread_listener);
+        let thread_listener_for_rayon_start = thread_listener.clone();
+        let thread_listener_for_rayon_end = thread_listener.clone();
         let workers = options
             .workers
             .take()
@@ -1151,34 +2481,44 @@ impl Renderer {
                     .thread_name(|idx|{ format!("WRWorker#{}", idx) })
                     .start_handler(move |idx| {
                         register_thread_with_profiler(format!("WRWorker#{}", idx));
-                        thread_started(&format!("WRWorker#{}", idx));
+                        if let Some(ref thread_listener) = *thread_listener_for_rayon_start {
+                            thread_listener.thread_started(&format!("WRWorker#{}", idx));
+                        }
                     })
-                    .exit_handler(move |_idx| {
-                        thread_stopped();
+                    .exit_handler(move |idx| {
+                        if let Some(ref thread_listener) = *thread_listener_for_rayon_end {
+                            thread_listener.thread_stopped(&format!("WRWorker#{}", idx));
+                        }
                     })
                     .build();
                 Arc::new(worker.unwrap())
             });
         let sampler = options.sampler;
         let namespace_alloc_by_client = options.namespace_alloc_by_client;
+        let max_glyph_cache_size = options.max_glyph_cache_size.unwrap_or(GlyphCache::DEFAULT_MAX_BYTES_USED);
 
         let font_instances = SharedFontInstanceMap::new();
 
         let blob_image_handler = options.blob_image_handler.take();
+        let thread_listener_for_render_backend = thread_listener.clone();
+        let thread_listener_for_scene_builder = thread_listener.clone();
+        let thread_listener_for_lp_scene_builder = thread_listener.clone();
         let scene_builder_hooks = options.scene_builder_hooks;
         let rb_thread_name = format!("WRRenderBackend#{}", options.renderer_id.unwrap_or(0));
         let scene_thread_name = format!("WRSceneBuilder#{}", options.renderer_id.unwrap_or(0));
         let lp_scene_thread_name = format!("WRSceneBuilderLP#{}", options.renderer_id.unwrap_or(0));
-        let glyph_rasterizer = GlyphRasterizer::new(workers, device.get_capabilities().supports_r8_texture_upload)?;
+        let glyph_rasterizer = GlyphRasterizer::new(workers)?;
 
-        let (scene_builder_channels, scene_tx) =
+        let (scene_builder_channels, scene_tx, backend_scene_tx, scene_rx) =
             SceneBuilderThreadChannels::new(api_tx.clone());
 
         let sb_font_instances = font_instances.clone();
 
         thread::Builder::new().name(scene_thread_name.clone()).spawn(move || {
             register_thread_with_profiler(scene_thread_name.clone());
-            thread_started(&scene_thread_name);
+            if let Some(ref thread_listener) = *thread_listener_for_scene_builder {
+                thread_listener.thread_started(&scene_thread_name);
+            }
 
             let mut scene_builder = SceneBuilderThread::new(
                 config,
@@ -1190,24 +2530,31 @@ impl Renderer {
             );
             scene_builder.run();
 
-            thread_stopped();
+            if let Some(ref thread_listener) = *thread_listener_for_scene_builder {
+                thread_listener.thread_stopped(&scene_thread_name);
+            }
         })?;
 
         let low_priority_scene_tx = if options.support_low_priority_transactions {
-            let (low_priority_scene_tx, low_priority_scene_rx) = unbounded_channel();
+            let (low_priority_scene_tx, low_priority_scene_rx) = channel();
             let lp_builder = LowPrioritySceneBuilderThread {
                 rx: low_priority_scene_rx,
                 tx: scene_tx.clone(),
+                simulate_slow_ms: 0,
             };
 
             thread::Builder::new().name(lp_scene_thread_name.clone()).spawn(move || {
                 register_thread_with_profiler(lp_scene_thread_name.clone());
-                thread_started(&lp_scene_thread_name);
+                if let Some(ref thread_listener) = *thread_listener_for_lp_scene_builder {
+                    thread_listener.thread_started(&lp_scene_thread_name);
+                }
 
                 let mut scene_builder = lp_builder;
                 scene_builder.run();
 
-                thread_stopped();
+                if let Some(ref thread_listener) = *thread_listener_for_lp_scene_builder {
+                    thread_listener.thread_stopped(&lp_scene_thread_name);
+                }
             })?;
 
             low_priority_scene_tx
@@ -1219,29 +2566,32 @@ impl Renderer {
             .as_ref()
             .map(|handler| handler.create_similar());
 
-        let texture_cache_config = options.texture_cache_config.clone();
-        let mut picture_tile_size = options.picture_tile_size.unwrap_or(picture::TILE_SIZE_DEFAULT);
-        // Clamp the picture tile size to reasonable values.
-        picture_tile_size.width = picture_tile_size.width.max(128).min(4096);
-        picture_tile_size.height = picture_tile_size.height.max(128).min(4096);
-
-        let rb_scene_tx = scene_tx.clone();
         let rb_font_instances = font_instances.clone();
         let enable_multithreading = options.enable_multithreading;
+        let texture_cache_eviction_threshold_bytes = options.texture_cache_eviction_threshold_bytes;
+        let texture_cache_max_evictions_per_frame = options.texture_cache_max_evictions_per_frame;
         thread::Builder::new().name(rb_thread_name.clone()).spawn(move || {
             register_thread_with_profiler(rb_thread_name.clone());
-            thread_started(&rb_thread_name);
+            if let Some(ref thread_listener) = *thread_listener_for_render_backend {
+                thread_listener.thread_started(&rb_thread_name);
+            }
 
             let texture_cache = TextureCache::new(
-                max_internal_texture_size,
-                image_tiling_threshold,
-                picture_tile_size,
+                max_texture_size,
+                max_texture_layers,
+                if config.global_enable_picture_caching {
+                    tile_cache_sizes(config.testing)
+                } else {
+                    &[]
+                },
+                start_size,
                 color_cache_formats,
                 swizzle_settings,
-                &texture_cache_config,
+                texture_cache_eviction_threshold_bytes,
+                texture_cache_max_evictions_per_frame,
             );
 
-            let glyph_cache = GlyphCache::new();
+            let glyph_cache = GlyphCache::new(max_glyph_cache_size);
 
             let mut resource_cache = ResourceCache::new(
                 texture_cache,
@@ -1255,7 +2605,10 @@ impl Renderer {
             let mut backend = RenderBackend::new(
                 api_rx,
                 result_tx,
-                rb_scene_tx,
+                scene_tx,
+                low_priority_scene_tx,
+                backend_scene_tx,
+                scene_rx,
                 device_pixel_ratio,
                 resource_cache,
                 backend_notifier,
@@ -1266,8 +2619,10 @@ impl Renderer {
                 debug_flags,
                 namespace_alloc_by_client,
             );
-            backend.run();
-            thread_stopped();
+            backend.run(backend_profile_counters);
+            if let Some(ref thread_listener) = *thread_listener_for_render_backend {
+                thread_listener.thread_stopped(&rb_thread_name);
+            }
         })?;
 
         let debug_method = if !options.enable_gpu_markers {
@@ -1284,14 +2639,15 @@ impl Renderer {
 
         info!("using {:?}", debug_method);
 
-        let gpu_profiler = GpuProfiler::new(Rc::clone(device.rc_gl()), debug_method);
+        let gpu_profile = GpuProfiler::new(Rc::clone(device.rc_gl()), debug_method);
         #[cfg(feature = "capture")]
         let read_fbo = device.create_fbo();
 
         let mut renderer = Renderer {
             result_rx,
+            debug_server,
             device,
-            active_documents: FastHashMap::default(),
+            active_documents: Vec::new(),
             pending_texture_updates: Vec::new(),
             pending_texture_cache_updates: false,
             pending_native_surface_updates: Vec::new(),
@@ -1299,36 +2655,52 @@ impl Renderer {
             pending_gpu_cache_clear: false,
             pending_shader_updates: Vec::new(),
             shaders,
-            debug: debug::LazyInitializedDebugRenderer::new(),
+            debug: LazyInitializedDebugRenderer::new(),
             debug_flags: DebugFlags::empty(),
-            profile: TransactionProfile::new(),
-            frame_counter: 0,
-            resource_upload_time: 0.0,
-            gpu_cache_upload_time: 0.0,
+            backend_profile_counters: BackendProfileCounters::new(),
+            profile_counters: RendererProfileCounters::new(),
+            resource_upload_time: 0,
+            gpu_cache_upload_time: 0,
             profiler: Profiler::new(),
+            new_frame_indicator: ChangeIndicator::new(),
+            new_scene_indicator: ChangeIndicator::new(),
+            slow_frame_indicator: ChangeIndicator::new(),
+            slow_txn_indicator: ChangeIndicator::new(),
             max_recorded_profiles: options.max_recorded_profiles,
             clear_color: options.clear_color,
             enable_clear_scissor: options.enable_clear_scissor,
             enable_advanced_blend_barriers: !ext_blend_equation_advanced_coherent,
             clear_caches_with_quads: options.clear_caches_with_quads,
-            clear_alpha_targets_with_quads,
             last_time: 0,
-            gpu_profiler,
-            vaos,
+            gpu_profile,
+            vaos: RendererVAOs {
+                prim_vao,
+                blur_vao,
+                clip_vao,
+                border_vao,
+                scale_vao,
+                gradient_vao,
+                resolve_vao,
+                line_vao,
+                svg_filter_vao,
+                composite_vao,
+                clear_vao,
+            },
             vertex_data_textures,
             current_vertex_data_textures: 0,
             pipeline_info: PipelineInfo::default(),
             dither_matrix_texture,
             external_image_handler: None,
+            output_image_handler: None,
             size_of_ops: make_size_of_ops(),
+            output_targets: FastHashMap::default(),
             cpu_profiles: VecDeque::new(),
             gpu_profiles: VecDeque::new(),
             gpu_cache_texture,
             gpu_cache_debug_chunks: Vec::new(),
             gpu_cache_frame_id: FrameId::INVALID,
             gpu_cache_overflow: false,
-            texture_upload_pbo_pool,
-            staging_texture_pool,
+            texture_cache_upload_pbo,
             texture_resolver,
             renderer_errors: Vec::new(),
             async_frame_recorder: None,
@@ -1348,22 +2720,14 @@ impl Renderer {
             current_compositor_kind: compositor_kind,
             allocated_native_surfaces: FastHashSet::default(),
             debug_overlay_state: DebugOverlayState::new(),
-            buffer_damage_tracker: BufferDamageTracker::default(),
-            max_primitive_instance_count,
-            enable_instancing: options.enable_instancing,
+            prev_dirty_rect: DeviceRect::zero(),
         };
 
         // We initially set the flags to default and then now call set_debug_flags
         // to ensure any potential transition when enabling a flag is run.
         renderer.set_debug_flags(debug_flags);
 
-        let sender = RenderApiSender::new(
-            api_tx,
-            scene_tx,
-            low_priority_scene_tx,
-            blob_image_handler,
-            font_instances,
-        );
+        let sender = RenderApiSender::new(api_tx, blob_image_handler, font_instances);
         Ok((renderer, sender))
     }
 
@@ -1395,12 +2759,8 @@ impl Renderer {
         self.device.preferred_color_formats().external
     }
 
-    pub fn required_texture_stride_alignment(&self, format: ImageFormat) -> usize {
-        self.device.required_pbo_stride().num_bytes(format).get()
-    }
-
-    pub fn set_clear_color(&mut self, color: Option<ColorF>) {
-        self.clear_color = color;
+    pub fn optimal_texture_stride_alignment(&self, format: ImageFormat) -> usize {
+        self.device.optimal_pbo_stride().num_bytes(format).get()
     }
 
     pub fn flush_pipeline_info(&mut self) -> PipelineInfo {
@@ -1417,7 +2777,6 @@ impl Renderer {
     /// Should be called before `render()`, as texture cache updates are done here.
     pub fn update(&mut self) {
         profile_scope!("update");
-
         // Pull any pending results and return the most recent.
         while let Ok(msg) = self.result_rx.try_recv() {
             match msg {
@@ -1429,31 +2788,30 @@ impl Renderer {
                 }
                 ResultMsg::PublishDocument(
                     document_id,
-                    mut doc,
+                    doc,
                     resource_update_list,
+                    profile_counters,
                 ) => {
-                    // Add a new document to the active set
-
-                    // If the document we are replacing must be drawn (in order to
-                    // update the texture cache), issue a render just to
-                    // off-screen targets, ie pass None to render_impl. We do this
-                    // because a) we don't need to render to the main framebuffer
-                    // so it is cheaper not to, and b) doing so without a
-                    // subsequent present would break partial present.
-                    if let Some(mut prev_doc) = self.active_documents.remove(&document_id) {
-                        doc.profile.merge(&mut prev_doc.profile);
-
-                        if prev_doc.frame.must_be_drawn() {
-                            self.render_impl(
-                                document_id,
-                                &mut prev_doc,
-                                None,
-                                0,
-                            ).ok();
-                        }
+                    if doc.is_new_scene {
+                        self.new_scene_indicator.changed();
                     }
 
-                    self.active_documents.insert(document_id, doc);
+                    // Add a new document to the active set, expressed as a `Vec` in order
+                    // to re-order based on `DocumentLayer` during rendering.
+                    match self.active_documents.iter().position(|&(id, _)| id == document_id) {
+                        Some(pos) => {
+                            // If the document we are replacing must be drawn
+                            // (in order to update the texture cache), issue
+                            // a render just to off-screen targets.
+                            if self.active_documents[pos].1.frame.must_be_drawn() {
+                                let device_size = self.device_size;
+                                self.render_impl(device_size).ok();
+                            }
+
+                            self.active_documents[pos].1 = doc;
+                        }
+                        None => self.active_documents.push((document_id, doc)),
+                    }
 
                     // IMPORTANT: The pending texture cache updates must be applied
                     //            *after* the previous frame has been rendered above
@@ -1469,6 +2827,7 @@ impl Renderer {
                     self.pending_texture_cache_updates |= !resource_update_list.texture_updates.updates.is_empty();
                     self.pending_texture_updates.push(resource_update_list.texture_updates);
                     self.pending_native_surface_updates.extend(resource_update_list.native_surface_updates);
+                    self.backend_profile_counters = profile_counters;
                     self.documents_seen.insert(document_id);
                 }
                 ResultMsg::UpdateGpuCache(mut list) => {
@@ -1511,22 +2870,15 @@ impl Renderer {
                         // if any of the existing documents have not rendered yet, and
                         // have picture/texture cache targets, force a render so that
                         // those targets are updated.
-                        let active_documents = mem::replace(
-                            &mut self.active_documents,
-                            FastHashMap::default(),
-                        );
-                        for (doc_id, mut doc) in active_documents {
-                            if doc.frame.must_be_drawn() {
-                                // As this render will not be presented, we must pass None to
-                                // render_impl. This avoids interfering with partial present
-                                // logic, as well as being more efficient.
-                                self.render_impl(
-                                    doc_id,
-                                    &mut doc,
-                                    None,
-                                    0,
-                                ).ok();
-                            }
+                        let must_be_drawn = self.active_documents
+                            .iter()
+                            .any(|(_, doc)| {
+                                doc.frame.must_be_drawn()
+                            });
+
+                        if must_be_drawn {
+                            let device_size = self.device_size;
+                            self.render_impl(device_size).ok();
                         }
                     }
 
@@ -1544,11 +2896,19 @@ impl Renderer {
                     // the device module asserts if we delete textures while
                     // not in a frame.
                     if memory_pressure {
-                        self.texture_upload_pbo_pool.on_memory_pressure(&mut self.device);
-                        self.staging_texture_pool.delete_textures(&mut self.device);
+                        self.texture_resolver.on_memory_pressure(
+                            &mut self.device,
+                        );
                     }
 
                     self.device.end_frame();
+                    // If we receive a `PublishDocument` message followed by this one
+                    // within the same update we need to cancel the frame because we
+                    // might have deleted the resources in use in the frame due to a
+                    // memory pressure event.
+                    if memory_pressure {
+                        self.active_documents.clear();
+                    }
                 }
                 ResultMsg::AppendNotificationRequests(mut notifications) => {
                     // We need to know specifically if there are any pending
@@ -1572,6 +2932,10 @@ impl Renderer {
                     self.pending_shader_updates.push(path);
                 }
                 ResultMsg::DebugOutput(output) => match output {
+                    DebugOutput::FetchDocuments(string) |
+                    DebugOutput::FetchClipScrollTree(string) => {
+                        self.debug_server.send(string);
+                    }
                     #[cfg(feature = "capture")]
                     DebugOutput::SaveCapture(config, deferred) => {
                         self.save_capture(config, deferred);
@@ -1589,12 +2953,234 @@ impl Renderer {
         }
     }
 
+    #[cfg(not(feature = "debugger"))]
+    fn get_screenshot_for_debugger(&mut self) -> String {
+        // Avoid unused param warning.
+        let _ = &self.debug_server;
+        String::new()
+    }
+
+    #[cfg(feature = "debugger")]
+    fn get_screenshot_for_debugger(&mut self) -> String {
+        use api::{ImageDescriptor, ImageDescriptorFlags};
+
+        let desc = ImageDescriptor::new(1024, 768, ImageFormat::BGRA8, ImageDescriptorFlags::IS_OPAQUE);
+        let data = self.device.read_pixels(&desc);
+        let screenshot = debug_server::Screenshot::new(desc.size, data);
+
+        serde_json::to_string(&screenshot).unwrap()
+    }
+
+    #[cfg(not(feature = "debugger"))]
+    fn get_passes_for_debugger(&self) -> String {
+        // Avoid unused param warning.
+        let _ = &self.debug_server;
+        String::new()
+    }
+
+    #[cfg(feature = "debugger")]
+    fn debug_alpha_target(target: &AlphaRenderTarget) -> debug_server::Target {
+        let mut debug_target = debug_server::Target::new("A8");
+
+        debug_target.add(
+            debug_server::BatchKind::Cache,
+            "Scalings",
+            target.scalings.len(),
+        );
+        debug_target.add(
+            debug_server::BatchKind::Cache,
+            "Zero Clears",
+            target.zero_clears.len(),
+        );
+        debug_target.add(
+            debug_server::BatchKind::Cache,
+            "One Clears",
+            target.one_clears.len(),
+        );
+        debug_target.add(
+            debug_server::BatchKind::Clip,
+            "BoxShadows [p]",
+            target.clip_batcher.primary_clips.box_shadows.len(),
+        );
+        debug_target.add(
+            debug_server::BatchKind::Clip,
+            "BoxShadows [s]",
+            target.clip_batcher.secondary_clips.box_shadows.len(),
+        );
+        debug_target.add(
+            debug_server::BatchKind::Cache,
+            "Vertical Blur",
+            target.vertical_blurs.len(),
+        );
+        debug_target.add(
+            debug_server::BatchKind::Cache,
+            "Horizontal Blur",
+            target.horizontal_blurs.len(),
+        );
+        debug_target.add(
+            debug_server::BatchKind::Clip,
+            "Slow Rectangles [p]",
+            target.clip_batcher.primary_clips.slow_rectangles.len(),
+        );
+        debug_target.add(
+            debug_server::BatchKind::Clip,
+            "Fast Rectangles [p]",
+            target.clip_batcher.primary_clips.fast_rectangles.len(),
+        );
+        debug_target.add(
+            debug_server::BatchKind::Clip,
+            "Slow Rectangles [s]",
+            target.clip_batcher.secondary_clips.slow_rectangles.len(),
+        );
+        debug_target.add(
+            debug_server::BatchKind::Clip,
+            "Fast Rectangles [s]",
+            target.clip_batcher.secondary_clips.fast_rectangles.len(),
+        );
+        for (_, items) in target.clip_batcher.primary_clips.images.iter() {
+            debug_target.add(debug_server::BatchKind::Clip, "Image mask [p]", items.len());
+        }
+        for (_, items) in target.clip_batcher.secondary_clips.images.iter() {
+            debug_target.add(debug_server::BatchKind::Clip, "Image mask [s]", items.len());
+        }
+
+        debug_target
+    }
+
+    #[cfg(feature = "debugger")]
+    fn debug_color_target(target: &ColorRenderTarget) -> debug_server::Target {
+        let mut debug_target = debug_server::Target::new("RGBA8");
+
+        debug_target.add(
+            debug_server::BatchKind::Cache,
+            "Scalings",
+            target.scalings.len(),
+        );
+        debug_target.add(
+            debug_server::BatchKind::Cache,
+            "Readbacks",
+            target.readbacks.len(),
+        );
+        debug_target.add(
+            debug_server::BatchKind::Cache,
+            "Vertical Blur",
+            target.vertical_blurs.len(),
+        );
+        debug_target.add(
+            debug_server::BatchKind::Cache,
+            "Horizontal Blur",
+            target.horizontal_blurs.len(),
+        );
+        debug_target.add(
+            debug_server::BatchKind::Cache,
+            "SVG Filters",
+            target.svg_filters.iter().map(|(_, batch)| batch.len()).sum(),
+        );
+
+        for alpha_batch_container in &target.alpha_batch_containers {
+            for batch in alpha_batch_container.opaque_batches.iter().rev() {
+                debug_target.add(
+                    debug_server::BatchKind::Opaque,
+                    batch.key.kind.debug_name(),
+                    batch.instances.len(),
+                );
+            }
+
+            for batch in &alpha_batch_container.alpha_batches {
+                debug_target.add(
+                    debug_server::BatchKind::Alpha,
+                    batch.key.kind.debug_name(),
+                    batch.instances.len(),
+                );
+            }
+        }
+
+        debug_target
+    }
+
+    #[cfg(feature = "debugger")]
+    fn debug_texture_cache_target(target: &TextureCacheRenderTarget) -> debug_server::Target {
+        let mut debug_target = debug_server::Target::new("Texture Cache");
+
+        debug_target.add(
+            debug_server::BatchKind::Cache,
+            "Horizontal Blur",
+            target.horizontal_blurs.len(),
+        );
+
+        debug_target
+    }
+
+    #[cfg(feature = "debugger")]
+    fn get_passes_for_debugger(&self) -> String {
+        let mut debug_passes = debug_server::PassList::new();
+
+        for &(_, ref render_doc) in &self.active_documents {
+            for pass in &render_doc.frame.passes {
+                let mut debug_targets = Vec::new();
+                match pass.kind {
+                    RenderPassKind::MainFramebuffer { ref main_target, .. } => {
+                        debug_targets.push(Self::debug_color_target(main_target));
+                    }
+                    RenderPassKind::OffScreen { ref alpha, ref color, ref texture_cache, .. } => {
+                        debug_targets.extend(alpha.targets.iter().map(Self::debug_alpha_target));
+                        debug_targets.extend(color.targets.iter().map(Self::debug_color_target));
+                        debug_targets.extend(texture_cache.iter().map(|(_, target)| Self::debug_texture_cache_target(target)));
+                    }
+                }
+
+                debug_passes.add(debug_server::Pass { targets: debug_targets });
+            }
+        }
+
+        serde_json::to_string(&debug_passes).unwrap()
+    }
+
+    #[cfg(not(feature = "debugger"))]
+    fn get_render_tasks_for_debugger(&self) -> String {
+        String::new()
+    }
+
+    #[cfg(feature = "debugger")]
+    fn get_render_tasks_for_debugger(&self) -> String {
+        let mut debug_root = debug_server::RenderTaskList::new();
+
+        for &(_, ref render_doc) in &self.active_documents {
+            let debug_node = debug_server::TreeNode::new("document render tasks");
+            let mut builder = debug_server::TreeNodeBuilder::new(debug_node);
+
+            let render_tasks = &render_doc.frame.render_tasks;
+            match render_tasks.tasks.first() {
+                Some(main_task) => main_task.print_with(&mut builder, render_tasks),
+                None => continue,
+            };
+
+            debug_root.add(builder.build());
+        }
+
+        serde_json::to_string(&debug_root).unwrap()
+    }
+
     fn handle_debug_command(&mut self, command: DebugCommand) {
         match command {
             DebugCommand::EnableDualSourceBlending(_) |
             DebugCommand::SetPictureTileSize(_) => {
                 panic!("Should be handled by render backend");
             }
+            DebugCommand::FetchDocuments |
+            DebugCommand::FetchClipScrollTree => {}
+            DebugCommand::FetchRenderTasks => {
+                let json = self.get_render_tasks_for_debugger();
+                self.debug_server.send(json);
+            }
+            DebugCommand::FetchPasses => {
+                let json = self.get_passes_for_debugger();
+                self.debug_server.send(json);
+            }
+            DebugCommand::FetchScreenshot => {
+                let json = self.get_screenshot_for_debugger();
+                self.debug_server.send(json);
+            }
             DebugCommand::SaveCapture(..) |
             DebugCommand::LoadCapture(..) |
             DebugCommand::StartCaptureSequence(..) |
@@ -1603,11 +3189,22 @@ impl Renderer {
             }
             DebugCommand::ClearCaches(_)
             | DebugCommand::SimulateLongSceneBuild(_)
+            | DebugCommand::SimulateLongLowPrioritySceneBuild(_)
             | DebugCommand::EnableNativeCompositor(_)
             | DebugCommand::SetBatchingLookback(_)
             | DebugCommand::EnableMultithreading(_) => {}
             DebugCommand::InvalidateGpuCache => {
-                self.gpu_cache_texture.invalidate();
+                match self.gpu_cache_texture.bus {
+                    GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
+                        info!("Invalidating GPU caches");
+                        for row in rows {
+                            row.add_dirty(0, MAX_VERTEX_TEXTURE_WIDTH);
+                        }
+                    }
+                    GpuCacheBus::Scatter { .. } => {
+                        warn!("Unable to invalidate scattered GPU cache");
+                    }
+                }
             }
             DebugCommand::SetFlags(flags) => {
                 self.set_debug_flags(flags);
@@ -1620,6 +3217,11 @@ impl Renderer {
         self.external_image_handler = Some(handler);
     }
 
+    /// Set a callback for handling external outputs.
+    pub fn set_output_image_handler(&mut self, handler: Box<dyn OutputImageHandler>) {
+        self.output_image_handler = Some(handler);
+    }
+
     /// Retrieve (and clear) the current list of recorded frame profiles.
     pub fn get_frame_profiles(&mut self) -> (Vec<CpuProfile>, Vec<GpuProfile>) {
         let cpu_profiles = self.cpu_profiles.drain(..).collect();
@@ -1636,46 +3238,13 @@ impl Renderer {
     /// Renders the current frame.
     ///
     /// A Frame is supplied by calling [`generate_frame()`][webrender_api::Transaction::generate_frame].
-    /// buffer_age is the age of the current backbuffer. It is only relevant if partial present
-    /// is active, otherwise 0 should be passed here.
     pub fn render(
         &mut self,
         device_size: DeviceIntSize,
-        buffer_age: usize,
     ) -> Result<RenderResults, Vec<RendererError>> {
         self.device_size = Some(device_size);
 
-        // TODO(gw): We want to make the active document that is
-        //           being rendered configurable via the public
-        //           API in future. For now, just select the last
-        //           added document as the active one to render
-        //           (Gecko only ever creates a single document
-        //           per renderer right now).
-        let doc_id = self.active_documents.keys().last().cloned();
-
-        let result = match doc_id {
-            Some(doc_id) => {
-                // Remove the doc from the map to appease the borrow checker
-                let mut doc = self.active_documents
-                    .remove(&doc_id)
-                    .unwrap();
-
-                let result = self.render_impl(
-                    doc_id,
-                    &mut doc,
-                    Some(device_size),
-                    buffer_age,
-                );
-
-                self.active_documents.insert(doc_id, doc);
-
-                result
-            }
-            None => {
-                self.last_time = precise_time_ns();
-                Ok(RenderResults::default())
-            }
-        };
+        let result = self.render_impl(Some(device_size));
 
         drain_filter(
             &mut self.notifications,
@@ -1702,7 +3271,10 @@ impl Renderer {
             DebugFlags::RENDER_TARGET_DBG |
             DebugFlags::TEXTURE_CACHE_DBG |
             DebugFlags::EPOCHS |
+            DebugFlags::NEW_FRAME_INDICATOR |
+            DebugFlags::NEW_SCENE_INDICATOR |
             DebugFlags::GPU_CACHE_DBG |
+            DebugFlags::SLOW_FRAME_INDICATOR |
             DebugFlags::PICTURE_CACHING_DBG |
             DebugFlags::PRIMITIVE_DBG |
             DebugFlags::ZOOM_DBG
@@ -1738,21 +3310,13 @@ impl Renderer {
     }
 
     /// Bind a draw target for the debug / profiler overlays, if required.
-    fn bind_debug_overlay(&mut self, device_size: DeviceIntSize) -> Option<DrawTarget> {
+    fn bind_debug_overlay(&mut self) {
         // Debug overlay setup are only required in native compositing mode
         if self.debug_overlay_state.is_enabled {
             if let CompositorKind::Native { .. } = self.current_compositor_kind {
                 let compositor = self.compositor_config.compositor().unwrap();
                 let surface_size = self.debug_overlay_state.current_size.unwrap();
 
-                // Ensure old surface is invalidated before binding
-                compositor.invalidate_tile(
-                    NativeTileId::DEBUG_OVERLAY,
-                    DeviceIntRect::new(
-                        DeviceIntPoint::zero(),
-                        surface_size,
-                    ),
-                );
                 // Bind the native surface
                 let surface_info = compositor.bind(
                     NativeTileId::DEBUG_OVERLAY,
@@ -1777,19 +3341,10 @@ impl Renderer {
                 // When native compositing, clear the debug overlay each frame.
                 self.device.clear_target(
                     Some([0.0, 0.0, 0.0, 0.0]),
-                    None, // debug renderer does not use depth
+                    Some(1.0),
                     None,
                 );
-
-                Some(draw_target)
-            } else {
-                // If we're not using the native compositor, then the default
-                // frame buffer is already bound. Create a DrawTarget for it and
-                // return it.
-                Some(DrawTarget::new_default(device_size, self.device.surface_origin_is_top_left()))
             }
-        } else {
-            None
         }
     }
 
@@ -1804,35 +3359,32 @@ impl Renderer {
 
                 compositor.add_surface(
                     NativeSurfaceId::DEBUG_OVERLAY,
-                    CompositorSurfaceTransform::identity(),
+                    DeviceIntPoint::zero(),
                     DeviceIntRect::new(
                         DeviceIntPoint::zero(),
                         self.debug_overlay_state.current_size.unwrap(),
                     ),
-                    ImageRendering::Auto,
                 );
             }
         }
     }
 
-    // If device_size is None, don't render to the main frame buffer. This is useful to
-    // update texture cache render tasks but avoid doing a full frame render. If the
-    // render is not going to be presented, then this must be set to None, as performing a
-    // composite without a present will confuse partial present.
+    // If device_size is None, don't render
+    // to the main frame buffer. This is useful
+    // to update texture cache render tasks but
+    // avoid doing a full frame render.
     fn render_impl(
         &mut self,
-        doc_id: DocumentId,
-        active_doc: &mut RenderedDocument,
         device_size: Option<DeviceIntSize>,
-        buffer_age: usize,
     ) -> Result<RenderResults, Vec<RendererError>> {
         profile_scope!("render");
         let mut results = RenderResults::default();
-        self.profile.start_time(profiler::RENDERER_TIME);
-
-        self.staging_texture_pool.begin_frame();
+        if self.active_documents.is_empty() {
+            self.last_time = precise_time_ns();
+            return Ok(results);
+        }
 
-        let compositor_kind = active_doc.frame.composite_state.compositor_kind;
+        let compositor_kind = self.active_documents[0].1.frame.composite_state.compositor_kind;
         // CompositorKind is updated
         if self.current_compositor_kind != compositor_kind {
             let enable = match (self.current_compositor_kind, compositor_kind) {
@@ -1849,19 +3401,21 @@ impl Renderer {
                 (CompositorKind::Draw { .. }, CompositorKind::Native { .. }) => {
                     true
                 }
-                (current_compositor_kind, active_doc_compositor_kind) => {
-                    warn!("Compositor mismatch, assuming this is Wrench running. Current {:?}, active {:?}",
-                        current_compositor_kind, active_doc_compositor_kind);
-                    false
+                (_, _) => {
+                    unreachable!();
                 }
             };
 
-            if let Some(config) = self.compositor_config.compositor() {
-                config.enable_native_compositor(enable);
-            }
+            self.compositor_config
+                .compositor()
+                .unwrap()
+                .enable_native_compositor(enable);
             self.current_compositor_kind = compositor_kind;
         }
 
+        let mut frame_profiles = Vec::new();
+        let mut profile_timers = RendererProfileTimers::new();
+
         // The texture resolver scope should be outside of any rendering, including
         // debug rendering. This ensures that when we return render targets to the
         // pool via glInvalidateFramebuffer, we don't do any debug rendering after
@@ -1870,14 +3424,28 @@ impl Renderer {
         // resolve step when the debug overlay is enabled.
         self.texture_resolver.begin_frame();
 
-        if let Some(device_size) = device_size {
-            self.update_gpu_profile(device_size);
-        }
+        let profile_samplers = {
+            let _gm = self.gpu_profile.start_marker("build samples");
+            // Block CPU waiting for last frame's GPU profiles to arrive.
+            // In general this shouldn't block unless heavily GPU limited.
+            let (gpu_frame_id, timers, samplers) = self.gpu_profile.build_samples();
+
+            if self.max_recorded_profiles > 0 {
+                while self.gpu_profiles.len() >= self.max_recorded_profiles {
+                    self.gpu_profiles.pop_front();
+                }
+                self.gpu_profiles
+                    .push_back(GpuProfile::new(gpu_frame_id, &timers));
+            }
+            profile_timers.gpu_samples = timers;
+            samplers
+        };
 
-        let cpu_frame_id = {
-            let _gm = self.gpu_profiler.start_marker("begin frame");
+
+        let cpu_frame_id = profile_timers.cpu_time.profile(|| {
+            let _gm = self.gpu_profile.start_marker("begin frame");
             let frame_id = self.device.begin_frame();
-            self.gpu_profiler.begin_frame(frame_id);
+            self.gpu_profile.begin_frame(frame_id);
 
             self.device.disable_scissor();
             self.device.disable_depth();
@@ -1888,33 +3456,41 @@ impl Renderer {
             self.update_native_surfaces();
 
             frame_id
-        };
-
-        if let Some(device_size) = device_size {
-            // Inform the client that we are starting a composition transaction if native
-            // compositing is enabled. This needs to be done early in the frame, so that
-            // we can create debug overlays after drawing the main surfaces.
-            if let CompositorKind::Native { .. } = self.current_compositor_kind {
-                let compositor = self.compositor_config.compositor().unwrap();
-                compositor.begin_frame();
-            }
+        });
 
-            // Update the state of the debug overlay surface, ensuring that
-            // the compositor mode has a suitable surface to draw to, if required.
-            self.update_debug_overlay(device_size);
+        // Inform the client that we are starting a composition transaction if native
+        // compositing is enabled. This needs to be done early in the frame, so that
+        // we can create debug overlays after drawing the main surfaces.
+        if let CompositorKind::Native { .. } = self.current_compositor_kind {
+            let compositor = self.compositor_config.compositor().unwrap();
+            compositor.begin_frame();
         }
 
-        let frame = &mut active_doc.frame;
-        let profile = &mut active_doc.profile;
-        assert!(self.current_compositor_kind == frame.composite_state.compositor_kind);
+        profile_timers.cpu_time.profile(|| {
+            //Note: another borrowck dance
+            let mut active_documents = mem::replace(&mut self.active_documents, Vec::default());
+            // sort by the document layer id
+            active_documents.sort_by_key(|&(_, ref render_doc)| render_doc.frame.layer);
 
-        if self.shared_texture_cache_cleared {
-            assert!(self.documents_seen.contains(&doc_id),
-                    "Cleared texture cache without sending new document frame.");
-        }
+            #[cfg(feature = "replay")]
+            self.texture_resolver.external_images.extend(
+                self.owned_external_images.iter().map(|(key, value)| (*key, value.clone()))
+            );
+
+            let last_document_index = active_documents.len() - 1;
+            for (doc_index, (document_id, RenderedDocument { ref mut frame, .. })) in active_documents.iter_mut().enumerate() {
+                assert!(self.current_compositor_kind == frame.composite_state.compositor_kind);
 
-        match self.prepare_gpu_cache(&frame.deferred_resolves) {
-            Ok(..) => {
+                if self.shared_texture_cache_cleared {
+                    assert!(self.documents_seen.contains(&document_id),
+                            "Cleared texture cache without sending new document frame.");
+                }
+
+                frame.profile_counters.reset_targets();
+                if let Err(e) = self.prepare_gpu_cache(frame) {
+                    self.renderer_errors.push(e);
+                    continue;
+                }
                 assert!(frame.gpu_cache_frame_id <= self.gpu_cache_frame_id,
                     "Received frame depends on a later GPU cache epoch ({:?}) than one we received last via `UpdateGpuCache` ({:?})",
                     frame.gpu_cache_frame_id, self.gpu_cache_frame_id);
@@ -1927,55 +3503,81 @@ impl Renderer {
                 self.draw_frame(
                     frame,
                     device_size,
-                    buffer_age,
+                    cpu_frame_id,
                     &mut results,
+                    doc_index == 0,
                 );
 
-                // TODO(nical): do this automatically by selecting counters in the wr profiler
                 // Profile marker for the number of invalidated picture cache
                 if thread_is_being_profiled() {
-                    let duration = Duration::new(0,0);
-                    if let Some(n) = self.profile.get(profiler::RENDERED_PICTURE_TILES) {
-                        let message = (n as usize).to_string();
-                        add_text_marker(cstr!("NumPictureCacheInvalidated"), &message, duration);
-                    }
+                    let num_invalidated = self.profile_counters.rendered_picture_cache_tiles.get_accum();
+                    let message = format!("NumPictureCacheInvalidated: {}", num_invalidated);
+                    add_event_marker(&(CString::new(message).unwrap()));
                 }
 
                 if device_size.is_some() {
                     self.draw_frame_debug_items(&frame.debug_items);
                 }
+                if self.debug_flags.contains(DebugFlags::PROFILER_DBG) {
+                    frame_profiles.push(frame.profile_counters.clone());
+                }
 
-                self.profile.merge(profile);
-            }
-            Err(e) => {
-                self.renderer_errors.push(e);
+                let dirty_regions =
+                    mem::replace(&mut frame.recorded_dirty_regions, Vec::new());
+                results.recorded_dirty_regions.extend(dirty_regions);
+
+                // If we're the last document, don't call end_pass here, because we'll
+                // be moving on to drawing the debug overlays. See the comment above
+                // the end_pass call in draw_frame about debug draw overlays
+                // for a bit more context.
+                if doc_index != last_document_index {
+                    self.texture_resolver.end_pass(&mut self.device, None, None);
+                }
             }
-        }
 
-        self.unlock_external_images(&frame.deferred_resolves);
+            self.unlock_external_images();
+            self.active_documents = active_documents;
 
-        let _gm = self.gpu_profiler.start_marker("end frame");
-        self.gpu_profiler.end_frame();
+            let _gm = self.gpu_profile.start_marker("end frame");
+            self.gpu_profile.end_frame();
+        });
+
+        if let Some(device_size) = device_size {
+            // Update the state of the debug overlay surface, ensuring that
+            // the compositor mode has a suitable surface to draw to, if required.
+            self.update_debug_overlay(device_size);
 
-        let debug_overlay = device_size.and_then(|device_size| {
             // Bind a surface to draw the debug / profiler information to.
-            self.bind_debug_overlay(device_size).map(|draw_target| {
-                self.draw_render_target_debug(&draw_target);
-                self.draw_texture_cache_debug(&draw_target);
-                self.draw_gpu_cache_debug(device_size);
-                self.draw_zoom_debug(device_size);
-                self.draw_epoch_debug();
-                draw_target
-            })
-        });
+            self.bind_debug_overlay();
 
-        self.profile.end_time(profiler::RENDERER_TIME);
-        self.profile.end_time_if_started(profiler::TOTAL_FRAME_CPU_TIME);
+            self.draw_render_target_debug(device_size);
+            self.draw_texture_cache_debug(device_size);
+            self.draw_gpu_cache_debug(device_size);
+            self.draw_zoom_debug(device_size);
+            self.draw_epoch_debug();
+        }
 
         let current_time = precise_time_ns();
         if device_size.is_some() {
-            let time = profiler::ns_to_ms(current_time - self.last_time);
-            self.profile.set(profiler::FRAME_TIME, time);
+            let ns = current_time - self.last_time;
+            self.profile_counters.frame_time.set(ns);
+        }
+
+        let frame_cpu_time_ns = self.backend_profile_counters.total_time.get()
+            + profile_timers.cpu_time.get();
+        let frame_cpu_time_ms = frame_cpu_time_ns as f64 / 1000000.0;
+        if frame_cpu_time_ms > 16.0 {
+            self.slow_frame_indicator.changed();
+        }
+
+        if self.backend_profile_counters.scene_changed {
+            let txn_time_ns = self.backend_profile_counters.txn.total_send_time.get()
+                + self.backend_profile_counters.txn.display_list_build_time.get()
+                + self.backend_profile_counters.txn.scene_build_time.get();
+            let txn_time_ms = txn_time_ns as f64 / 1000000.0;
+            if txn_time_ms > 100.0 {
+                self.slow_txn_indicator.changed();
+            }
         }
 
         if self.max_recorded_profiles > 0 {
@@ -1984,80 +3586,119 @@ impl Renderer {
             }
             let cpu_profile = CpuProfile::new(
                 cpu_frame_id,
-                (self.profile.get_or(profiler::FRAME_BUILDING_TIME, 0.0) * 1000000.0) as u64,
-                (self.profile.get_or(profiler::RENDERER_TIME, 0.0) * 1000000.0) as u64,
-                self.profile.get_or(profiler::DRAW_CALLS, 0.0) as usize,
+                self.backend_profile_counters.total_time.get(),
+                profile_timers.cpu_time.get(),
+                self.profile_counters.draw_calls.get(),
             );
             self.cpu_profiles.push_back(cpu_profile);
         }
 
-        if thread_is_being_profiled() {
-            let duration = Duration::new(0,0);
-            let message = (self.profile.get_or(profiler::DRAW_CALLS, 0.0) as usize).to_string();
-            add_text_marker(cstr!("NumDrawCalls"), &message, duration);
-        }
-
-        results.stats.texture_upload_mb = self.profile.get_or(profiler::TEXTURE_UPLOADS_MEM, 0.0);
-        self.frame_counter += 1;
-        results.stats.resource_upload_time = self.resource_upload_time;
-        self.resource_upload_time = 0.0;
-        results.stats.gpu_cache_upload_time = self.gpu_cache_upload_time;
-        self.gpu_cache_upload_time = 0.0;
-
-        if let Some(stats) = active_doc.frame_stats.take() {
-          // Copy the full frame stats to RendererStats
-          results.stats.merge(&stats);
-
-          self.profiler.update_frame_stats(stats);
-        }
-
-        // Note: this clears the values in self.profile.
-        self.profiler.set_counters(&mut self.profile);
-
-        // Note: profile counters must be set before this or they will count for next frame.
-        self.profiler.update();
-
-        if self.debug_flags.intersects(DebugFlags::PROFILER_DBG | DebugFlags::PROFILER_CAPTURE) {
+        if self.debug_flags.contains(DebugFlags::PROFILER_DBG) {
             if let Some(device_size) = device_size {
                 //TODO: take device/pixel ratio into equation?
                 if let Some(debug_renderer) = self.debug.get_mut(&mut self.device) {
+                    let style = if self.debug_flags.contains(DebugFlags::SMART_PROFILER) {
+                        ProfileStyle::Smart
+                    } else if self.debug_flags.contains(DebugFlags::COMPACT_PROFILER) {
+                        ProfileStyle::Compact
+                    } else {
+                        ProfileStyle::Full
+                    };
+
+                    let screen_fraction = 1.0 / device_size.to_f32().area();
                     self.profiler.draw_profile(
-                        self.frame_counter,
+                        &frame_profiles,
+                        &self.backend_profile_counters,
+                        &self.profile_counters,
+                        &mut profile_timers,
+                        &profile_samplers,
+                        screen_fraction,
                         debug_renderer,
-                        device_size,
+                        style,
                     );
                 }
             }
         }
 
-        if self.debug_flags.contains(DebugFlags::ECHO_DRIVER_MESSAGES) {
-            self.device.echo_driver_messages();
+        let mut x = 0.0;
+        if self.debug_flags.contains(DebugFlags::NEW_FRAME_INDICATOR) {
+            if let Some(debug_renderer) = self.debug.get_mut(&mut self.device) {
+                self.new_frame_indicator.changed();
+                self.new_frame_indicator.draw(
+                    x, 0.0,
+                    ColorU::new(0, 110, 220, 255),
+                    debug_renderer,
+                );
+                x += ChangeIndicator::width();
+            }
         }
 
-        if let Some(debug_renderer) = self.debug.try_get_mut() {
-            let small_screen = self.debug_flags.contains(DebugFlags::SMALL_SCREEN);
-            let scale = if small_screen { 1.6 } else { 1.0 };
-            // TODO(gw): Tidy this up so that compositor config integrates better
-            //           with the (non-compositor) surface y-flip options.
-            let surface_origin_is_top_left = match self.current_compositor_kind {
-                CompositorKind::Native { .. } => true,
-                CompositorKind::Draw { .. } => self.device.surface_origin_is_top_left(),
-            };
-            // If there is a debug overlay, render it. Otherwise, just clear
-            // the debug renderer.
-            debug_renderer.render(
-                &mut self.device,
-                debug_overlay.and(device_size),
-                scale,
-                surface_origin_is_top_left,
-            );
+        if self.debug_flags.contains(DebugFlags::NEW_SCENE_INDICATOR) {
+            if let Some(debug_renderer) = self.debug.get_mut(&mut self.device) {
+                self.new_scene_indicator.draw(
+                    x, 0.0,
+                    ColorU::new(0, 220, 110, 255),
+                    debug_renderer,
+                );
+                x += ChangeIndicator::width();
+            }
         }
 
-        self.staging_texture_pool.end_frame(&mut self.device);
-        self.texture_upload_pbo_pool.end_frame(&mut self.device);
-        self.device.end_frame();
+        if self.debug_flags.contains(DebugFlags::SLOW_FRAME_INDICATOR) {
+            if let Some(debug_renderer) = self.debug.get_mut(&mut self.device) {
+                self.slow_txn_indicator.draw(
+                    x, 0.0,
+                    ColorU::new(250, 80, 80, 255),
+                    debug_renderer,
+                );
+                self.slow_frame_indicator.draw(
+                    x, 10.0,
+                    ColorU::new(220, 30, 10, 255),
+                    debug_renderer,
+                );
+            }
+        }
 
-        if debug_overlay.is_some() {
+        if self.debug_flags.contains(DebugFlags::ECHO_DRIVER_MESSAGES) {
+            self.device.echo_driver_messages();
+        }
+
+        results.stats.texture_upload_kb = self.profile_counters.texture_data_uploaded.get();
+        self.backend_profile_counters.reset();
+        self.profile_counters.reset();
+        self.profile_counters.frame_counter.inc();
+        results.stats.resource_upload_time = self.resource_upload_time;
+        self.resource_upload_time = 0;
+        results.stats.gpu_cache_upload_time = self.gpu_cache_upload_time;
+        self.gpu_cache_upload_time = 0;
+
+        profile_timers.cpu_time.profile(|| {
+            if let Some(debug_renderer) = self.debug.try_get_mut() {
+                let small_screen = self.debug_flags.contains(DebugFlags::SMALL_SCREEN);
+                let scale = if small_screen { 1.6 } else { 1.0 };
+                // TODO(gw): Tidy this up so that compositor config integrates better
+                //           with the (non-compositor) surface y-flip options.
+                let surface_origin_is_top_left = match self.current_compositor_kind {
+                    CompositorKind::Native { .. } => true,
+                    CompositorKind::Draw { .. } => self.device.surface_origin_is_top_left(),
+                };
+                debug_renderer.render(
+                    &mut self.device,
+                    device_size,
+                    scale,
+                    surface_origin_is_top_left,
+                );
+            }
+            // See comment for texture_resolver.begin_frame() for explanation
+            // of why this must be done after all rendering, including debug
+            // overlays. The end_frame() call implicitly calls end_pass(), which
+            // should ensure any left over render targets get invalidated and
+            // returned to the pool correctly.
+            self.texture_resolver.end_frame(&mut self.device, cpu_frame_id);
+            self.device.end_frame();
+        });
+
+        if device_size.is_some() {
             self.last_time = current_time;
 
             // Unbind the target for the debug overlay. No debug or profiler drawing
@@ -2065,15 +3706,13 @@ impl Renderer {
             self.unbind_debug_overlay();
         }
 
-        if device_size.is_some() { 
-            // Inform the client that we are finished this composition transaction if native
-            // compositing is enabled. This must be called after any debug / profiling compositor
-            // surfaces have been drawn and added to the visual tree.
-            if let CompositorKind::Native { .. } = self.current_compositor_kind {
-                profile_scope!("compositor.end_frame");
-                let compositor = self.compositor_config.compositor().unwrap();
-                compositor.end_frame();
-            }
+        // Inform the client that we are finished this composition transaction if native
+        // compositing is enabled. This must be called after any debug / profiling compositor
+        // surfaces have been drawn and added to the visual tree.
+        if let CompositorKind::Native { .. } = self.current_compositor_kind {
+            profile_scope!("compositor.end_frame");
+            let compositor = self.compositor_config.compositor().unwrap();
+            compositor.end_frame();
         }
 
         self.documents_seen.clear();
@@ -2086,132 +3725,117 @@ impl Renderer {
         }
     }
 
-    fn update_gpu_profile(&mut self, device_size: DeviceIntSize) {
-        let _gm = self.gpu_profiler.start_marker("build samples");
-        // Block CPU waiting for last frame's GPU profiles to arrive.
-        // In general this shouldn't block unless heavily GPU limited.
-        let (gpu_frame_id, timers, samplers) = self.gpu_profiler.build_samples();
+    fn update_gpu_cache(&mut self) {
+        let _gm = self.gpu_profile.start_marker("gpu cache update");
+
+        // For an artificial stress test of GPU cache resizing,
+        // always pass an extra update list with at least one block in it.
+        let gpu_cache_height = self.gpu_cache_texture.get_height();
+        if gpu_cache_height != 0 && GPU_CACHE_RESIZE_TEST {
+            self.pending_gpu_cache_updates.push(GpuCacheUpdateList {
+                frame_id: FrameId::INVALID,
+                clear: false,
+                height: gpu_cache_height,
+                blocks: vec![[1f32; 4].into()],
+                updates: Vec::new(),
+                debug_commands: Vec::new(),
+            });
+        }
 
-        if self.max_recorded_profiles > 0 {
-            while self.gpu_profiles.len() >= self.max_recorded_profiles {
-                self.gpu_profiles.pop_front();
-            }
+        let (updated_blocks, max_requested_height) = self
+            .pending_gpu_cache_updates
+            .iter()
+            .fold((0, gpu_cache_height), |(count, height), list| {
+                (count + list.blocks.len(), cmp::max(height, list.height))
+            });
 
-            self.gpu_profiles.push_back(GpuProfile::new(gpu_frame_id, &timers));
+        if max_requested_height > self.get_max_texture_size() && !self.gpu_cache_overflow {
+            self.gpu_cache_overflow = true;
+            self.renderer_errors.push(RendererError::MaxTextureSize);
         }
 
-        self.profiler.set_gpu_time_queries(timers);
-
-        if !samplers.is_empty() {
-            let screen_fraction = 1.0 / device_size.to_f32().area();
+        // Note: if we decide to switch to scatter-style GPU cache update
+        // permanently, we can have this code nicer with `BufferUploader` kind
+        // of helper, similarly to how `TextureUploader` API is used.
+        self.gpu_cache_texture.prepare_for_updates(
+            &mut self.device,
+            updated_blocks,
+            max_requested_height,
+        );
 
-            fn accumulate_sampler_value(description: &str, samplers: &[GpuSampler]) -> f32 {
-                let mut accum = 0.0;
-                for sampler in samplers {
-                    if sampler.tag.label != description {
-                        continue;
-                    }
+        for update_list in self.pending_gpu_cache_updates.drain(..) {
+            assert!(update_list.height <= max_requested_height);
+            if update_list.frame_id > self.gpu_cache_frame_id {
+                self.gpu_cache_frame_id = update_list.frame_id
+            }
+            self.gpu_cache_texture
+                .update(&mut self.device, &update_list);
+        }
 
-                    accum += sampler.count as f32;
-                }
+        let mut upload_time = TimeProfileCounter::new("GPU cache upload time", false, Some(0.0..2.0));
+        let updated_rows = upload_time.profile(|| {
+            self.gpu_cache_texture.flush(&mut self.device)
+        });
+        self.gpu_cache_upload_time += upload_time.get();
 
-                accum
-            }
+        let counters = &mut self.backend_profile_counters.resources.gpu_cache;
+        counters.updated_rows.set(updated_rows);
+        counters.updated_blocks.set(updated_blocks);
+    }
 
-            let alpha_targets = accumulate_sampler_value(&"Alpha targets", &samplers) * screen_fraction;
-            let transparent_pass = accumulate_sampler_value(&"Transparent pass", &samplers) * screen_fraction;
-            let opaque_pass = accumulate_sampler_value(&"Opaque pass", &samplers) * screen_fraction;
-            self.profile.set(profiler::ALPHA_TARGETS_SAMPLERS, alpha_targets);
-            self.profile.set(profiler::TRANSPARENT_PASS_SAMPLERS, transparent_pass);
-            self.profile.set(profiler::OPAQUE_PASS_SAMPLERS, opaque_pass);
-            self.profile.set(profiler::TOTAL_SAMPLERS, alpha_targets + transparent_pass + opaque_pass);
+    fn prepare_gpu_cache(&mut self, frame: &Frame) -> Result<(), RendererError> {
+        if self.pending_gpu_cache_clear {
+            let use_scatter =
+                matches!(self.gpu_cache_texture.bus, GpuCacheBus::Scatter { .. });
+            let new_cache = GpuCacheTexture::new(&mut self.device, use_scatter)?;
+            let old_cache = mem::replace(&mut self.gpu_cache_texture, new_cache);
+            old_cache.deinit(&mut self.device);
+            self.pending_gpu_cache_clear = false;
         }
+
+        let deferred_update_list = self.update_deferred_resolves(&frame.deferred_resolves);
+        self.pending_gpu_cache_updates.extend(deferred_update_list);
+
+        self.update_gpu_cache();
+
+        // Note: the texture might have changed during the `update`,
+        // so we need to bind it here.
+        self.device.bind_texture(
+            TextureSampler::GpuCache,
+            self.gpu_cache_texture.texture.as_ref().unwrap(),
+            Swizzle::default(),
+        );
+
+        Ok(())
     }
 
     fn update_texture_cache(&mut self) {
         profile_scope!("update_texture_cache");
 
-        let _gm = self.gpu_profiler.start_marker("texture cache update");
+        let _gm = self.gpu_profile.start_marker("texture cache update");
         let mut pending_texture_updates = mem::replace(&mut self.pending_texture_updates, vec![]);
         self.pending_texture_cache_updates = false;
 
-        self.profile.start_time(profiler::TEXTURE_CACHE_UPDATE_TIME);
-
-        let mut create_cache_texture_time = 0;
-        let mut delete_cache_texture_time = 0;
-
-        for update_list in pending_texture_updates.drain(..) {
-            // Find any textures that will need to be deleted in this group of allocations.
-            let mut pending_deletes = Vec::new();
-            for allocation in &update_list.allocations {
-                let old = self.texture_resolver.texture_cache_map.remove(&allocation.id);
-                match allocation.kind {
-                    TextureCacheAllocationKind::Alloc(_) => {
-                        assert!(old.is_none(), "Renderer and backend disagree!");
-                    }
-                    TextureCacheAllocationKind::Reset(_) |
-                    TextureCacheAllocationKind::Free => {
-                        assert!(old.is_some(), "Renderer and backend disagree!");
-                    }
-                }
-                if let Some(texture) = old {
-                    // Regenerate the cache allocation info so we can search through deletes for reuse.
-                    let size = texture.get_dimensions();
-                    let info = TextureCacheAllocInfo {
-                        width: size.width,
-                        height: size.height,
-                        format: texture.get_format(),
-                        filter: texture.get_filter(),
-                        target: texture.get_target(),
-                        is_shared_cache: texture.flags().contains(TextureFlags::IS_SHARED_TEXTURE_CACHE),
-                        has_depth: texture.supports_depth(),
+        let mut upload_time = TimeProfileCounter::new("Resource upload time", false, Some(0.0..2.0));
+        upload_time.profile(|| {
+            for update_list in pending_texture_updates.drain(..) {
+                for allocation in update_list.allocations {
+                    match allocation.kind {
+                        TextureCacheAllocationKind::Alloc(_) => add_event_marker(c_str!("TextureCacheAlloc")),
+                        TextureCacheAllocationKind::Realloc(_) => add_event_marker(c_str!("TextureCacheRealloc")),
+                        TextureCacheAllocationKind::Reset(_) => add_event_marker(c_str!("TextureCacheReset")),
+                        TextureCacheAllocationKind::Free => add_event_marker(c_str!("TextureCacheFree")),
                     };
-                    pending_deletes.push((texture, info));
-                }
-            }
-            // Look for any alloc or reset that has matching alloc info and save it from being deleted.
-            let mut reused_textures = VecDeque::with_capacity(pending_deletes.len());
-            for allocation in &update_list.allocations {
-                match allocation.kind {
-                    TextureCacheAllocationKind::Alloc(ref info) |
-                    TextureCacheAllocationKind::Reset(ref info) => {
-                        reused_textures.push_back(
-                            pending_deletes.iter()
-                                .position(|(_, old_info)| *old_info == *info)
-                                .map(|index| pending_deletes.swap_remove(index).0)
-                        );
-                    }
-                    TextureCacheAllocationKind::Free => {}
-                }
-            }
-            // Now that we've saved as many deletions for reuse as we can, actually delete whatever is left.
-            if !pending_deletes.is_empty() {
-                let delete_texture_start = precise_time_ns();
-                for (texture, _) in pending_deletes {
-                    add_event_marker(c_str!("TextureCacheFree"));
-                    self.device.delete_texture(texture);
-                }
-                delete_cache_texture_time += precise_time_ns() - delete_texture_start;
-            }
-
-            for allocation in update_list.allocations {
-                match allocation.kind {
-                    TextureCacheAllocationKind::Alloc(_) => add_event_marker(c_str!("TextureCacheAlloc")),
-                    TextureCacheAllocationKind::Reset(_) => add_event_marker(c_str!("TextureCacheReset")),
-                    TextureCacheAllocationKind::Free => {}
-                };
-                match allocation.kind {
-                    TextureCacheAllocationKind::Alloc(ref info) |
-                    TextureCacheAllocationKind::Reset(ref info) => {
-                        let create_cache_texture_start = precise_time_ns();
-                        // Create a new native texture, as requested by the texture cache.
-                        // If we managed to reuse a deleted texture, then prefer that instead.
-                        //
-                        // Ensure no PBO is bound when creating the texture storage,
-                        // or GL will attempt to read data from there.
-                        let mut texture = reused_textures.pop_front().unwrap_or(None).unwrap_or_else(|| {
-                            self.device.create_texture(
-                                info.target,
+                    let old = match allocation.kind {
+                        TextureCacheAllocationKind::Alloc(ref info) |
+                        TextureCacheAllocationKind::Realloc(ref info) |
+                        TextureCacheAllocationKind::Reset(ref info) => {
+                            // Create a new native texture, as requested by the texture cache.
+                            //
+                            // Ensure no PBO is bound when creating the texture storage,
+                            // or GL will attempt to read data from there.
+                            let mut texture = self.device.create_texture(
+                                TextureTarget::Array,
                                 info.format,
                                 info.width,
                                 info.height,
@@ -2219,230 +3843,302 @@ impl Renderer {
                                 // This needs to be a render target because some render
                                 // tasks get rendered into the texture cache.
                                 Some(RenderTargetInfo { has_depth: info.has_depth }),
-                            )
-                        });
+                                info.layer_count,
+                            );
 
-                        if info.is_shared_cache {
-                            texture.flags_mut()
-                                .insert(TextureFlags::IS_SHARED_TEXTURE_CACHE);
-
-                            // On Mali-Gxx devices we use batched texture uploads as it performs much better.
-                            // However, due to another driver bug we must ensure the textures are fully cleared,
-                            // otherwise we get visual artefacts when blitting to the texture cache.
-                            if self.device.use_batched_texture_uploads() &&
-                                !self.device.get_capabilities().supports_render_target_partial_update
-                            {
-                                self.clear_texture(&texture, [0.0; 4]);
-                            }
+                            if info.is_shared_cache {
+                                texture.flags_mut()
+                                    .insert(TextureFlags::IS_SHARED_TEXTURE_CACHE);
 
-                            // Textures in the cache generally don't need to be cleared,
-                            // but we do so if the debug display is active to make it
-                            // easier to identify unallocated regions.
-                            if self.debug_flags.contains(DebugFlags::TEXTURE_CACHE_DBG) {
-                                self.clear_texture(&texture, TEXTURE_CACHE_DBG_CLEAR_COLOR);
+                                // Textures in the cache generally don't need to be cleared,
+                                // but we do so if the debug display is active to make it
+                                // easier to identify unallocated regions.
+                                if self.debug_flags.contains(DebugFlags::TEXTURE_CACHE_DBG) {
+                                    self.clear_texture(&texture, TEXTURE_CACHE_DBG_CLEAR_COLOR);
+                                }
                             }
+
+                            self.texture_resolver.texture_cache_map.insert(allocation.id, texture)
+                        }
+                        TextureCacheAllocationKind::Free => {
+                            self.texture_resolver.texture_cache_map.remove(&allocation.id)
                         }
+                    };
 
-                        create_cache_texture_time += precise_time_ns() - create_cache_texture_start;
+                    match allocation.kind {
+                        TextureCacheAllocationKind::Alloc(_) => {
+                            assert!(old.is_none(), "Renderer and backend disagree!");
+                        }
+                        TextureCacheAllocationKind::Realloc(_) => {
+                            self.device.blit_renderable_texture(
+                                self.texture_resolver.texture_cache_map.get_mut(&allocation.id).unwrap(),
+                                old.as_ref().unwrap(),
+                            );
+                        }
+                        TextureCacheAllocationKind::Reset(_) |
+                        TextureCacheAllocationKind::Free => {
+                            assert!(old.is_some(), "Renderer and backend disagree!");
+                        }
+                    }
 
-                        self.texture_resolver.texture_cache_map.insert(allocation.id, texture);
+                    if let Some(old) = old {
+                        self.device.delete_texture(old);
                     }
-                    TextureCacheAllocationKind::Free => {}
-                };
-            }
+                }
 
-            upload_to_texture_cache(self, update_list.updates);
-        }
+                for (texture_id, updates) in update_list.updates {
+                    let texture = &self.texture_resolver.texture_cache_map[&texture_id];
+                    let device = &mut self.device;
 
-        if create_cache_texture_time > 0 {
-            self.profile.set(
-                profiler::CREATE_CACHE_TEXTURE_TIME,
-                profiler::ns_to_ms(create_cache_texture_time)
-            );
-        }
-        if delete_cache_texture_time > 0 {
-            self.profile.set(
-                profiler::DELETE_CACHE_TEXTURE_TIME,
-                profiler::ns_to_ms(delete_cache_texture_time)
-            )
-        }
+                    // Calculate the total size of buffer required to upload all updates.
+                    let required_size = updates.iter().map(|update| {
+                        // Perform any debug clears now. As this requires a mutable borrow of device,
+                        // it must be done before all the updates which require a TextureUploader.
+                        if let TextureUpdateSource::DebugClear = update.source  {
+                            let draw_target = DrawTarget::from_texture(
+                                texture,
+                                update.layer_index as usize,
+                                false,
+                            );
+                            device.bind_draw_target(draw_target);
+                            device.clear_target(
+                                Some(TEXTURE_CACHE_DBG_CLEAR_COLOR),
+                                None,
+                                Some(draw_target.to_framebuffer_rect(update.rect.to_i32()))
+                            );
 
-        let t = self.profile.end_time(profiler::TEXTURE_CACHE_UPDATE_TIME);
-        self.resource_upload_time += t;
+                            0
+                        } else {
+                            let (upload_size, _) = device.required_upload_size_and_stride(
+                                update.rect.size,
+                                texture.get_format(),
+                            );
+                            upload_size
+                        }
+                    }).sum();
 
-        drain_filter(
-            &mut self.notifications,
-            |n| { n.when() == Checkpoint::FrameTexturesUpdated },
-            |n| { n.notify(); },
-        );
+                    if required_size == 0 {
+                        continue;
+                    }
+
+                    // For best performance we use a single TextureUploader for all uploads.
+                    // Using individual TextureUploaders was causing performance issues on some drivers
+                    // due to allocating too many PBOs.
+                    let mut uploader = device.upload_texture(
+                        texture,
+                        &self.texture_cache_upload_pbo,
+                        required_size
+                    );
+
+                    for update in updates {
+                        let TextureCacheUpdate { rect, stride, offset, layer_index, format_override, source } = update;
+
+                        let bytes_uploaded = match source {
+                            TextureUpdateSource::Bytes { data } => {
+                                let data = &data[offset as usize ..];
+                                uploader.upload(
+                                    rect,
+                                    layer_index,
+                                    stride,
+                                    format_override,
+                                    data.as_ptr(),
+                                    data.len(),
+                                )
+                            }
+                            TextureUpdateSource::External { id, channel_index } => {
+                                let handler = self.external_image_handler
+                                    .as_mut()
+                                    .expect("Found external image, but no handler set!");
+                                // The filter is only relevant for NativeTexture external images.
+                                let dummy_data;
+                                let data = match handler.lock(id, channel_index, ImageRendering::Auto).source {
+                                    ExternalImageSource::RawData(data) => {
+                                        &data[offset as usize ..]
+                                    }
+                                    ExternalImageSource::Invalid => {
+                                        // Create a local buffer to fill the pbo.
+                                        let bpp = texture.get_format().bytes_per_pixel();
+                                        let width = stride.unwrap_or(rect.size.width * bpp);
+                                        let total_size = width * rect.size.height;
+                                        // WR haven't support RGBAF32 format in texture_cache, so
+                                        // we use u8 type here.
+                                        dummy_data = vec![0xFFu8; total_size as usize];
+                                        &dummy_data
+                                    }
+                                    ExternalImageSource::NativeTexture(eid) => {
+                                        panic!("Unexpected external texture {:?} for the texture cache update of {:?}", eid, id);
+                                    }
+                                };
+                                let size = uploader.upload(
+                                    rect,
+                                    layer_index,
+                                    stride,
+                                    format_override,
+                                    data.as_ptr(),
+                                    data.len()
+                                );
+                                handler.unlock(id, channel_index);
+                                size
+                            }
+                            TextureUpdateSource::DebugClear => {
+                                // DebugClear updates are handled separately.
+                                0
+                            }
+                        };
+                        self.profile_counters.texture_data_uploaded.add(bytes_uploaded >> 10);
+                    }
+                }
+
+                if update_list.clears_shared_cache {
+                    self.shared_texture_cache_cleared = true;
+                }
+            }
+
+            drain_filter(
+                &mut self.notifications,
+                |n| { n.when() == Checkpoint::FrameTexturesUpdated },
+                |n| { n.notify(); },
+            );
+        });
+        self.resource_upload_time += upload_time.get();
     }
 
-    fn bind_textures(&mut self, textures: &BatchTextures) {
-        for i in 0 .. 3 {
-            self.texture_resolver.bind(
-                &textures.input.colors[i],
+    pub(crate) fn draw_instanced_batch<T>(
+        &mut self,
+        data: &[T],
+        vertex_array_kind: VertexArrayKind,
+        textures: &BatchTextures,
+        stats: &mut RendererStats,
+    ) {
+        let mut swizzles = [Swizzle::default(); 3];
+        for i in 0 .. textures.colors.len() {
+            let swizzle = self.texture_resolver.bind(
+                &textures.colors[i],
                 TextureSampler::color(i),
                 &mut self.device,
             );
+            if cfg!(debug_assertions) {
+                swizzles[i] = swizzle;
+                for j in 0 .. i {
+                    if textures.colors[j] == textures.colors[i] && swizzles[j] != swizzle {
+                        error!("Swizzling conflict in {:?}", textures);
+                    }
+                }
+            }
         }
 
-        self.texture_resolver.bind(
-            &textures.clip_mask,
-            TextureSampler::ClipMask,
-            &mut self.device,
-        );
-
         // TODO: this probably isn't the best place for this.
         if let Some(ref texture) = self.dither_matrix_texture {
             self.device.bind_texture(TextureSampler::Dither, texture, Swizzle::default());
         }
+
+        self.draw_instanced_batch_with_previously_bound_textures(data, vertex_array_kind, stats)
     }
 
-    fn draw_instanced_batch<T: Clone>(
+    pub(crate) fn draw_instanced_batch_with_previously_bound_textures<T>(
         &mut self,
         data: &[T],
         vertex_array_kind: VertexArrayKind,
-        textures: &BatchTextures,
         stats: &mut RendererStats,
     ) {
-        self.bind_textures(textures);
-
         // If we end up with an empty draw call here, that means we have
         // probably introduced unnecessary batch breaks during frame
         // building - so we should be catching this earlier and removing
         // the batch.
         debug_assert!(!data.is_empty());
 
-        let vao = &self.vaos[vertex_array_kind];
+        let vao = get_vao(vertex_array_kind, &self.vaos);
+
         self.device.bind_vao(vao);
 
-        let chunk_size = if self.debug_flags.contains(DebugFlags::DISABLE_BATCHING) {
-            1
-        } else if vertex_array_kind == VertexArrayKind::Primitive {
-            self.max_primitive_instance_count
-        } else {
-            data.len()
-        };
+        let batched = !self.debug_flags.contains(DebugFlags::DISABLE_BATCHING);
 
-        for chunk in data.chunks(chunk_size) {
-            if self.enable_instancing {
-                self.device
-                    .update_vao_instances(vao, chunk, ONE_TIME_USAGE_HINT, None);
-                self.device
-                    .draw_indexed_triangles_instanced_u16(6, chunk.len() as i32);
-            } else {
-                self.device
-                    .update_vao_instances(vao, chunk, ONE_TIME_USAGE_HINT, NonZeroUsize::new(4));
+        if batched {
+            self.device
+                .update_vao_instances(vao, data, VertexUsageHint::Stream);
+            self.device
+                .draw_indexed_triangles_instanced_u16(6, data.len() as i32);
+            self.profile_counters.draw_calls.inc();
+            stats.total_draw_calls += 1;
+        } else {
+            for i in 0 .. data.len() {
                 self.device
-                    .draw_indexed_triangles(6 * chunk.len() as i32);
+                    .update_vao_instances(vao, &data[i .. i + 1], VertexUsageHint::Stream);
+                self.device.draw_triangles_u16(0, 6);
+                self.profile_counters.draw_calls.inc();
+                stats.total_draw_calls += 1;
             }
-            self.profile.inc(profiler::DRAW_CALLS);
-            stats.total_draw_calls += 1;
         }
 
-        self.profile.add(profiler::VERTICES, 6 * data.len());
+        self.profile_counters.vertices.add(6 * data.len());
     }
 
     fn handle_readback_composite(
         &mut self,
         draw_target: DrawTarget,
         uses_scissor: bool,
+        source: &RenderTask,
         backdrop: &RenderTask,
         readback: &RenderTask,
     ) {
-        // Extract the rectangle in the backdrop surface's device space of where
-        // we need to read from.
-        let readback_origin = match readback.kind {
-            RenderTaskKind::Readback(ReadbackTask { readback_origin: Some(o), .. }) => o,
-            RenderTaskKind::Readback(ReadbackTask { readback_origin: None, .. }) => {
-                // If this is a dummy readback, just early out. We know that the
-                // clear of the target will ensure the task rect is already zero alpha,
-                // so it won't affect the rendering output.
-                return;
-            }
-            _ => unreachable!(),
-        };
-
         if uses_scissor {
             self.device.disable_scissor();
         }
 
-        let texture_source = TextureSource::TextureCache(
-            readback.get_target_texture(),
-            Swizzle::default(),
-        );
         let (cache_texture, _) = self.texture_resolver
-            .resolve(&texture_source).expect("bug: no source texture");
+            .resolve(&TextureSource::PrevPassColor)
+            .unwrap();
 
         // Before submitting the composite batch, do the
         // framebuffer readbacks that are needed for each
         // composite operation in this batch.
-        let readback_rect = readback.get_target_rect();
-        let backdrop_rect = backdrop.get_target_rect();
-        let (backdrop_screen_origin, _) = match backdrop.kind {
+        let (readback_rect, readback_layer) = readback.get_target_rect();
+        let (backdrop_rect, _) = backdrop.get_target_rect();
+        let (backdrop_screen_origin, backdrop_scale) = match backdrop.kind {
+            RenderTaskKind::Picture(ref task_info) => (task_info.content_origin, task_info.device_pixel_scale),
+            _ => panic!("bug: composite on non-picture?"),
+        };
+        let (source_screen_origin, source_scale) = match source.kind {
             RenderTaskKind::Picture(ref task_info) => (task_info.content_origin, task_info.device_pixel_scale),
             _ => panic!("bug: composite on non-picture?"),
         };
 
         // Bind the FBO to blit the backdrop to.
-        // Called per-instance in case the FBO changes. The device will skip
-        // the GL call if the requested target is already bound.
+        // Called per-instance in case the layer (and therefore FBO)
+        // changes. The device will skip the GL call if the requested
+        // target is already bound.
         let cache_draw_target = DrawTarget::from_texture(
             cache_texture,
+            readback_layer.0 as usize,
             false,
         );
 
-        // Get the rect that we ideally want, in space of the parent surface
-        let wanted_rect = DeviceRect::new(
-            readback_origin,
-            readback_rect.size.to_f32(),
-        );
+        let source_in_backdrop_space = source_screen_origin.to_f32() * (backdrop_scale.0 / source_scale.0);
 
-        // Get the rect that is available on the parent surface. It may be smaller
-        // than desired because this is a picture cache tile covering only part of
-        // the wanted rect and/or because the parent surface was clipped.
-        let avail_rect = DeviceRect::new(
-            backdrop_screen_origin,
-            backdrop_rect.size.to_f32(),
+        let mut src = DeviceIntRect::new(
+            (source_in_backdrop_space + (backdrop_rect.origin - backdrop_screen_origin).to_f32()).to_i32(),
+            readback_rect.size,
         );
+        let mut dest = readback_rect.to_i32();
+        let device_to_framebuffer = Scale::new(1i32);
 
-        if let Some(int_rect) = wanted_rect.intersection(&avail_rect) {
-            // If there is a valid intersection, work out the correct origins and
-            // sizes of the copy rects, and do the blit.
-            let copy_size = int_rect.size.to_i32();
-
-            let src_origin = backdrop_rect.origin.to_f32() +
-                int_rect.origin.to_vector() -
-                backdrop_screen_origin.to_vector();
-
-            let src = DeviceIntRect::new(
-                src_origin.to_i32(),
-                copy_size,
-            );
-
-            let dest_origin = readback_rect.origin.to_f32() +
-                int_rect.origin.to_vector() -
-                readback_origin.to_vector();
-
-            let dest = DeviceIntRect::new(
-                dest_origin.to_i32(),
-                copy_size,
-            );
-
-            // Should always be drawing to picture cache tiles or off-screen surface!
-            debug_assert!(!draw_target.is_default());
-            let device_to_framebuffer = Scale::new(1i32);
-
-            self.device.blit_render_target(
-                draw_target.into(),
-                src * device_to_framebuffer,
-                cache_draw_target,
-                dest * device_to_framebuffer,
-                TextureFilter::Linear,
-            );
+        // Need to invert the y coordinates and flip the image vertically when
+        // reading back from the framebuffer.
+        if draw_target.is_default() {
+            src.origin.y = draw_target.dimensions().height as i32 - src.size.height - src.origin.y;
+            dest.origin.y += dest.size.height;
+            dest.size.height = -dest.size.height;
         }
 
-        // Restore draw target to current pass render target, and reset
+        self.device.blit_render_target(
+            draw_target.into(),
+            src * device_to_framebuffer,
+            cache_draw_target,
+            dest * device_to_framebuffer,
+            TextureFilter::Linear,
+        );
+
+        // Restore draw target to current pass render target + layer, and reset
         // the read target.
         self.device.bind_draw_target(draw_target);
         self.device.reset_read_target();
@@ -2457,25 +4153,30 @@ impl Renderer {
         blits: &[BlitJob],
         render_tasks: &RenderTaskGraph,
         draw_target: DrawTarget,
+        content_origin: &DeviceIntPoint,
     ) {
         if blits.is_empty() {
             return;
         }
 
-        let _timer = self.gpu_profiler.start_timer(GPU_TAG_BLIT);
+        let _timer = self.gpu_profile.start_timer(GPU_TAG_BLIT);
 
         // TODO(gw): For now, we don't bother batching these by source texture.
         //           If if ever shows up as an issue, we can easily batch them.
         for blit in blits {
-            let (source, source_rect) = {
-                // A blit from the child render task into this target.
-                // TODO(gw): Support R8 format here once we start
-                //           creating mips for alpha masks.
-                let task = &render_tasks[blit.source];
-                let source_rect = task.get_target_rect();
-                let source_texture = task.get_texture_source();
-
-                (source_texture, source_rect)
+            let (source, layer, source_rect) = match blit.source {
+                BlitJobSource::Texture(texture_id, layer, source_rect) => {
+                    // A blit from a texture into this target.
+                    (texture_id, layer as usize, source_rect)
+                }
+                BlitJobSource::RenderTask(task_id) => {
+                    // A blit from the child render task into this target.
+                    // TODO(gw): Support R8 format here once we start
+                    //           creating mips for alpha masks.
+                    let source = &render_tasks[task_id];
+                    let (source_rect, layer) = source.get_target_rect();
+                    (TextureSource::PrevPassColor, layer.0, source_rect)
+                }
             };
 
             debug_assert_eq!(source_rect.size, blit.target_rect.size);
@@ -2489,6 +4190,7 @@ impl Renderer {
 
             let read_target = DrawTarget::from_texture(
                 texture,
+                layer,
                 false,
             );
 
@@ -2496,7 +4198,7 @@ impl Renderer {
                 read_target.into(),
                 read_target.to_framebuffer_rect(source_rect),
                 draw_target,
-                draw_target.to_framebuffer_rect(blit.target_rect),
+                draw_target.to_framebuffer_rect(blit.target_rect.translate(-content_origin.to_vector())),
                 TextureFilter::Linear,
             );
         }
@@ -2512,25 +4214,22 @@ impl Renderer {
             return
         }
 
-        let _timer = self.gpu_profiler.start_timer(GPU_TAG_SCALE);
-
-        for (source, instances) in scalings {
-            let buffer_kind = source.image_buffer_kind();
+        let _timer = self.gpu_profile.start_timer(GPU_TAG_SCALE);
 
-            self.shaders
-                .borrow_mut()
-                .get_scale_shader(buffer_kind)
-                .bind(
-                    &mut self.device,
-                    &projection,
-                    Some(self.texture_resolver.get_texture_size(source).to_f32()),
-                    &mut self.renderer_errors,
-                );
+        self.shaders
+            .borrow_mut()
+            .cs_scale
+            .bind(
+                &mut self.device,
+                &projection,
+                &mut self.renderer_errors,
+            );
 
+        for (source, instances) in scalings {
             self.draw_instanced_batch(
                 instances,
                 VertexArrayKind::Scale,
-                &BatchTextures::composite_rgb(*source),
+                &BatchTextures::color(*source),
                 stats,
             );
         }
@@ -2547,12 +4246,11 @@ impl Renderer {
             return;
         }
 
-        let _timer = self.gpu_profiler.start_timer(GPU_TAG_SVG_FILTER);
+        let _timer = self.gpu_profile.start_timer(GPU_TAG_SVG_FILTER);
 
         self.shaders.borrow_mut().cs_svg_filter.bind(
             &mut self.device,
             &projection,
-            None,
             &mut self.renderer_errors
         );
 
@@ -2568,29 +4266,25 @@ impl Renderer {
         &mut self,
         target: &PictureCacheTarget,
         draw_target: DrawTarget,
+        content_origin: DeviceIntPoint,
         projection: &default::Transform3D<f32>,
         render_tasks: &RenderTaskGraph,
         stats: &mut RendererStats,
     ) {
         profile_scope!("draw_picture_cache_target");
 
-        self.profile.inc(profiler::RENDERED_PICTURE_TILES);
-        let _gm = self.gpu_profiler.start_marker("picture cache target");
+        self.profile_counters.rendered_picture_cache_tiles.inc();
+        let _gm = self.gpu_profile.start_marker("picture cache target");
         let framebuffer_kind = FramebufferKind::Other;
 
         {
-            let _timer = self.gpu_profiler.start_timer(GPU_TAG_SETUP_TARGET);
+            let _timer = self.gpu_profile.start_timer(GPU_TAG_SETUP_TARGET);
             self.device.bind_draw_target(draw_target);
             self.device.enable_depth_write();
             self.set_blend(false, framebuffer_kind);
 
             let clear_color = target.clear_color.map(|c| c.to_array());
-            let scissor_rect = if self.device.get_capabilities().supports_render_target_partial_update {
-                target.alpha_batch_container.task_scissor_rect
-            } else {
-                None
-            };
-            match scissor_rect {
+            match target.alpha_batch_container.task_scissor_rect {
                 // If updating only a dirty rect within a picture cache target, the
                 // clear must also be scissored to that dirty region.
                 Some(r) if self.clear_caches_with_quads => {
@@ -2610,13 +4304,12 @@ impl Renderer {
                     self.shaders.borrow_mut().ps_clear.bind(
                         &mut self.device,
                         &projection,
-                        None,
                         &mut self.renderer_errors,
                     );
                     self.draw_instanced_batch(
                         &[instance],
                         VertexArrayKind::Clear,
-                        &BatchTextures::empty(),
+                        &BatchTextures::no_texture(),
                         stats,
                     );
                     if clear_color.is_none() {
@@ -2627,7 +4320,10 @@ impl Renderer {
                 }
                 other => {
                     let scissor_rect = other.map(|rect| {
-                        draw_target.build_scissor_rect(Some(rect))
+                        draw_target.build_scissor_rect(
+                            Some(rect),
+                            content_origin,
+                        )
                     });
                     self.device.clear_target(clear_color, Some(1.0), scissor_rect);
                 }
@@ -2638,13 +4334,12 @@ impl Renderer {
         self.draw_alpha_batch_container(
             &target.alpha_batch_container,
             draw_target,
+            content_origin,
             framebuffer_kind,
             projection,
             render_tasks,
             stats,
         );
-
-        self.device.invalidate_depth_target();
     }
 
     /// Draw an alpha batch container into a given draw target. This is used
@@ -2653,6 +4348,7 @@ impl Renderer {
         &mut self,
         alpha_batch_container: &AlphaBatchContainer,
         draw_target: DrawTarget,
+        content_origin: DeviceIntPoint,
         framebuffer_kind: FramebufferKind,
         projection: &default::Transform3D<f32>,
         render_tasks: &RenderTaskGraph,
@@ -2664,14 +4360,15 @@ impl Renderer {
             self.device.enable_scissor();
             let scissor_rect = draw_target.build_scissor_rect(
                 alpha_batch_container.task_scissor_rect,
+                content_origin,
             );
             self.device.set_scissor_rect(scissor_rect)
         }
 
         if !alpha_batch_container.opaque_batches.is_empty()
             && !self.debug_flags.contains(DebugFlags::DISABLE_OPAQUE_PASS) {
-            let _gl = self.gpu_profiler.start_marker("opaque batches");
-            let opaque_sampler = self.gpu_profiler.start_sampler(GPU_SAMPLER_TAG_OPAQUE);
+            let _gl = self.gpu_profile.start_marker("opaque batches");
+            let opaque_sampler = self.gpu_profile.start_sampler(GPU_SAMPLER_TAG_OPAQUE);
             self.set_blend(false, framebuffer_kind);
             //Note: depth equality is needed for split planes
             self.device.enable_depth(DepthFunction::LessEqual);
@@ -2689,13 +4386,13 @@ impl Renderer {
                     }
 
                     self.shaders.borrow_mut()
-                        .get(&batch.key, batch.features, self.debug_flags, &self.device)
+                        .get(&batch.key, batch.features, self.debug_flags)
                         .bind(
-                            &mut self.device, projection, None,
+                            &mut self.device, projection,
                             &mut self.renderer_errors,
                         );
 
-                    let _timer = self.gpu_profiler.start_timer(batch.key.kind.sampler_tag());
+                    let _timer = self.gpu_profile.start_timer(batch.key.kind.sampler_tag());
                     self.draw_instanced_batch(
                         &batch.instances,
                         VertexArrayKind::Primitive,
@@ -2705,20 +4402,37 @@ impl Renderer {
                 }
 
             self.device.disable_depth_write();
-            self.gpu_profiler.finish_sampler(opaque_sampler);
+            self.gpu_profile.finish_sampler(opaque_sampler);
         } else {
             self.device.disable_depth();
         }
 
         if !alpha_batch_container.alpha_batches.is_empty()
             && !self.debug_flags.contains(DebugFlags::DISABLE_ALPHA_PASS) {
-            let _gl = self.gpu_profiler.start_marker("alpha batches");
-            let transparent_sampler = self.gpu_profiler.start_sampler(GPU_SAMPLER_TAG_TRANSPARENT);
+            let _gl = self.gpu_profile.start_marker("alpha batches");
+            let transparent_sampler = self.gpu_profile.start_sampler(GPU_SAMPLER_TAG_TRANSPARENT);
             self.set_blend(true, framebuffer_kind);
 
             let mut prev_blend_mode = BlendMode::None;
             let shaders_rc = self.shaders.clone();
 
+            // If the device supports pixel local storage, initialize the PLS buffer for
+            // the transparent pass. This involves reading the current framebuffer value
+            // and storing that in PLS.
+            // TODO(gw): This is quite expensive and relies on framebuffer fetch being
+            //           available. We can probably switch the opaque pass over to use
+            //           PLS too, and remove this pass completely.
+            if self.device.get_capabilities().supports_pixel_local_storage {
+                // TODO(gw): If using PLS, the fixed function blender is disabled. It's possible
+                //           we could take advantage of this by skipping batching on the blend
+                //           mode in these cases.
+                self.init_pixel_local_storage(
+                    alpha_batch_container.task_rect,
+                    projection,
+                    stats,
+                );
+            }
+
             for batch in &alpha_batch_container.alpha_batches {
                 if should_skip_batch(&batch.key.kind, self.debug_flags) {
                     continue;
@@ -2729,7 +4443,6 @@ impl Renderer {
                     &batch.key,
                     batch.features | BatchFeatures::ALPHA_PASS,
                     self.debug_flags,
-                    &self.device,
                 );
 
                 if batch.key.blend_mode != prev_blend_mode {
@@ -2767,7 +4480,6 @@ impl Renderer {
                             shader.bind(
                                 &mut self.device,
                                 projection,
-                                None,
                                 &mut self.renderer_errors,
                             );
                             self.device.switch_mode(ShaderColorMode::SubpixelWithBgColorPass0 as _);
@@ -2778,37 +4490,28 @@ impl Renderer {
                             }
                             self.device.set_blend_mode_advanced(mode);
                         }
-                        BlendMode::MultiplyDualSource => {
-                            self.device.set_blend_mode_multiply_dual_source();
-                        }
-                        BlendMode::Screen => {
-                            self.device.set_blend_mode_screen();
-                        }
-                        BlendMode::Exclusion => {
-                            self.device.set_blend_mode_exclusion();
-                        }
                     }
                     prev_blend_mode = batch.key.blend_mode;
                 }
 
                 // Handle special case readback for composites.
-                if let BatchKind::Brush(BrushBatchKind::MixBlend { task_id, backdrop_id }) = batch.key.kind {
+                if let BatchKind::Brush(BrushBatchKind::MixBlend { task_id, source_id, backdrop_id }) = batch.key.kind {
                     // composites can't be grouped together because
                     // they may overlap and affect each other.
                     debug_assert_eq!(batch.instances.len(), 1);
                     self.handle_readback_composite(
                         draw_target,
                         uses_scissor,
+                        &render_tasks[source_id],
                         &render_tasks[task_id],
                         &render_tasks[backdrop_id],
                     );
                 }
 
-                let _timer = self.gpu_profiler.start_timer(batch.key.kind.sampler_tag());
+                let _timer = self.gpu_profile.start_timer(batch.key.kind.sampler_tag());
                 shader.bind(
                     &mut self.device,
                     projection,
-                    None,
                     &mut self.renderer_errors,
                 );
 
@@ -2825,7 +4528,6 @@ impl Renderer {
                     shader.bind(
                         &mut self.device,
                         projection,
-                        None,
                         &mut self.renderer_errors,
                     );
                     self.device.switch_mode(ShaderColorMode::SubpixelWithBgColorPass1 as _);
@@ -2842,7 +4544,6 @@ impl Renderer {
                     shader.bind(
                         &mut self.device,
                         projection,
-                        None,
                         &mut self.renderer_errors,
                     );
                     self.device.switch_mode(ShaderColorMode::SubpixelWithBgColorPass2 as _);
@@ -2856,8 +4557,19 @@ impl Renderer {
                 }
             }
 
+            // If the device supports pixel local storage, resolve the PLS values.
+            // This pass reads the final PLS color value, and writes it to a normal
+            // fragment output.
+            if self.device.get_capabilities().supports_pixel_local_storage {
+                self.resolve_pixel_local_storage(
+                    alpha_batch_container.task_rect,
+                    projection,
+                    stats,
+                );
+            }
+
             self.set_blend(false, framebuffer_kind);
-            self.gpu_profiler.finish_sampler(transparent_sampler);
+            self.gpu_profile.finish_sampler(transparent_sampler);
         }
 
         self.device.disable_depth();
@@ -2876,7 +4588,7 @@ impl Renderer {
             return;
         }
 
-        let opaque_sampler = self.gpu_profiler.start_sampler(GPU_SAMPLER_TAG_OPAQUE);
+        let opaque_sampler = self.gpu_profile.start_sampler(GPU_SAMPLER_TAG_OPAQUE);
 
         self.device.disable_depth();
         self.set_blend(false, FramebufferKind::Main);
@@ -2933,19 +4645,19 @@ impl Renderer {
                         .get_composite_shader(
                             CompositeSurfaceFormat::Yuv,
                             surface.image_buffer_kind,
-                            CompositeFeatures::empty(),
                         ).bind(
                             &mut self.device,
                             &projection,
-                            None,
                             &mut self.renderer_errors
                         );
 
-                    let textures = BatchTextures::composite_yuv(
-                        planes[0].texture,
-                        planes[1].texture,
-                        planes[2].texture,
-                    );
+                    let textures = BatchTextures {
+                        colors: [
+                            planes[0].texture,
+                            planes[1].texture,
+                            planes[2].texture,
+                        ],
+                    };
 
                     // When the texture is an external texture, the UV rect is not known when
                     // the external surface descriptor is created, because external textures
@@ -2953,9 +4665,9 @@ impl Renderer {
                     // the frame render. To handle this, query the texture resolver for the
                     // UV rect if it's an external texture, otherwise use the default UV rect.
                     let uv_rects = [
-                        self.texture_resolver.get_uv_rect(&textures.input.colors[0], planes[0].uv_rect),
-                        self.texture_resolver.get_uv_rect(&textures.input.colors[1], planes[1].uv_rect),
-                        self.texture_resolver.get_uv_rect(&textures.input.colors[2], planes[2].uv_rect),
+                        self.texture_resolver.get_uv_rect(&textures.colors[0], planes[0].uv_rect),
+                        self.texture_resolver.get_uv_rect(&textures.colors[1], planes[1].uv_rect),
+                        self.texture_resolver.get_uv_rect(&textures.colors[2], planes[2].uv_rect),
                     ];
 
                     let instance = CompositeInstance::new_yuv(
@@ -2967,36 +4679,42 @@ impl Renderer {
                         color_space,
                         format,
                         rescale,
+                        [
+                            planes[0].texture_layer as f32,
+                            planes[1].texture_layer as f32,
+                            planes[2].texture_layer as f32,
+                        ],
                         uv_rects,
                     );
 
                     ( textures, instance )
                 },
                 ResolvedExternalSurfaceColorData::Rgb{ ref plane, flip_y, .. } => {
+
                     self.shaders
                         .borrow_mut()
                         .get_composite_shader(
                             CompositeSurfaceFormat::Rgba,
                             surface.image_buffer_kind,
-                            CompositeFeatures::empty(),
                         ).bind(
                             &mut self.device,
                             &projection,
-                            None,
                             &mut self.renderer_errors
                         );
 
-                    let textures = BatchTextures::composite_rgb(plane.texture);
-                    let mut uv_rect = self.texture_resolver.get_uv_rect(&textures.input.colors[0], plane.uv_rect);
+                    let textures = BatchTextures::color(plane.texture);
+                    let mut uv_rect = self.texture_resolver.get_uv_rect(&textures.colors[0], plane.uv_rect);
                     if flip_y {
                         let y = uv_rect.uv0.y;
                         uv_rect.uv0.y = uv_rect.uv1.y;
                         uv_rect.uv1.y = y;
                     }
+
                     let instance = CompositeInstance::new_rgb(
                         surface_rect.to_f32(),
                         surface_rect.to_f32(),
                         PremultipliedColorF::WHITE,
+                        plane.texture_layer as f32,
                         ZBufferId(0),
                         uv_rect,
                     );
@@ -3018,80 +4736,96 @@ impl Renderer {
                 .unbind();
         }
 
-        self.gpu_profiler.finish_sampler(opaque_sampler);
+        self.gpu_profile.finish_sampler(opaque_sampler);
     }
 
     /// Draw a list of tiles to the framebuffer
-    fn draw_tile_list<'a, I: Iterator<Item = &'a occlusion::Item>>(
+    fn draw_tile_list<'a, I: Iterator<Item = &'a CompositeTile>>(
         &mut self,
         tiles_iter: I,
-        composite_state: &CompositeState,
         external_surfaces: &[ResolvedExternalSurface],
         projection: &default::Transform3D<f32>,
+        partial_present_mode: Option<PartialPresentMode>,
         stats: &mut RendererStats,
     ) {
-        let mut current_shader_params = (
-            CompositeSurfaceFormat::Rgba,
-            ImageBufferKind::Texture2D,
-            CompositeFeatures::empty(),
-            None,
-        );
-        let mut current_textures = BatchTextures::empty();
-        let mut instances = Vec::new();
-
         self.shaders
             .borrow_mut()
             .get_composite_shader(
-                current_shader_params.0,
-                current_shader_params.1,
-                current_shader_params.2,
+                CompositeSurfaceFormat::Rgba,
+                ImageBufferKind::Texture2DArray,
             ).bind(
                 &mut self.device,
                 projection,
-                None,
                 &mut self.renderer_errors
             );
 
-        for item in tiles_iter {
-            let tile = &composite_state.tiles[item.key];
+        let mut current_shader_params = (CompositeSurfaceFormat::Rgba, ImageBufferKind::Texture2DArray);
+        let mut current_textures = BatchTextures::no_texture();
+        let mut instances = Vec::new();
+
+        for tile in tiles_iter {
+            // Determine a clip rect to apply to this tile, depending on what
+            // the partial present mode is.
+            let partial_clip_rect = match partial_present_mode {
+                Some(PartialPresentMode::Single { dirty_rect }) => dirty_rect,
+                None => tile.rect,
+            };
 
-            let clip_rect = item.rectangle.to_rect();
+            let clip_rect = match partial_clip_rect.intersection(&tile.clip_rect) {
+                Some(rect) => rect,
+                None => continue,
+            };
+
+            // Simple compositor needs the valid rect in device space to match clip rect
+            let valid_device_rect = tile.valid_rect.translate(
+                tile.rect.origin.to_vector()
+            );
+
+            // Only composite the part of the tile that contains valid pixels
+            let clip_rect = match clip_rect.intersection(&valid_device_rect) {
+                Some(rect) => rect,
+                None => continue,
+            };
 
             // Work out the draw params based on the tile surface
             let (instance, textures, shader_params) = match tile.surface {
                 CompositeTileSurface::Color { color } => {
-                    let dummy = TextureSource::Dummy;
-                    let image_buffer_kind = dummy.image_buffer_kind();
-                    let instance = CompositeInstance::new(
-                        tile.rect,
-                        clip_rect,
-                        color.premultiplied(),
-                        tile.z_id,
-                    );
-                    let features = instance.get_rgb_features();
                     (
-                        instance,
-                        BatchTextures::composite_rgb(dummy),
-                        (CompositeSurfaceFormat::Rgba, image_buffer_kind, features, None),
+                        CompositeInstance::new(
+                            tile.rect,
+                            clip_rect,
+                            color.premultiplied(),
+                            0.0,
+                            tile.z_id,
+                        ),
+                        BatchTextures::color(TextureSource::Dummy),
+                        (CompositeSurfaceFormat::Rgba, ImageBufferKind::Texture2DArray),
                     )
                 }
-                CompositeTileSurface::Texture { surface: ResolvedSurfaceTexture::TextureCache { texture } } => {
-                    let instance = CompositeInstance::new(
-                        tile.rect,
-                        clip_rect,
-                        PremultipliedColorF::WHITE,
-                        tile.z_id,
-                    );
-                    let features = instance.get_rgb_features();
+                CompositeTileSurface::Clear => {
                     (
-                        instance,
-                        BatchTextures::composite_rgb(texture),
-                        (
-                            CompositeSurfaceFormat::Rgba,
-                            ImageBufferKind::Texture2D,
-                            features,
-                            None,
+                        CompositeInstance::new(
+                            tile.rect,
+                            clip_rect,
+                            PremultipliedColorF::BLACK,
+                            0.0,
+                            tile.z_id,
                         ),
+                        BatchTextures::color(TextureSource::Dummy),
+                        (CompositeSurfaceFormat::Rgba, ImageBufferKind::Texture2DArray),
+                    )
+                }
+                CompositeTileSurface::Texture { surface: ResolvedSurfaceTexture::TextureCache { texture, layer } } => {
+                    (
+                        CompositeInstance::new(
+                            tile.rect,
+                            clip_rect,
+                            PremultipliedColorF::WHITE,
+                            layer as f32,
+                            tile.z_id,
+                        ),
+                        BatchTextures::color(texture),
+                        (CompositeSurfaceFormat::Rgba, ImageBufferKind::Texture2DArray),
                     )
                 }
                 CompositeTileSurface::ExternalSurface { external_surface_index } => {
@@ -3099,11 +4833,14 @@ impl Renderer {
 
                     match surface.color_data {
                         ResolvedExternalSurfaceColorData::Yuv{ ref planes, color_space, format, rescale, .. } => {
-                            let textures = BatchTextures::composite_yuv(
-                                planes[0].texture,
-                                planes[1].texture,
-                                planes[2].texture,
-                            );
+
+                            let textures = BatchTextures {
+                                colors: [
+                                    planes[0].texture,
+                                    planes[1].texture,
+                                    planes[2].texture,
+                                ],
+                            };
 
                             // When the texture is an external texture, the UV rect is not known when
                             // the external surface descriptor is created, because external textures
@@ -3111,9 +4848,9 @@ impl Renderer {
                             // the frame render. To handle this, query the texture resolver for the
                             // UV rect if it's an external texture, otherwise use the default UV rect.
                             let uv_rects = [
-                                self.texture_resolver.get_uv_rect(&textures.input.colors[0], planes[0].uv_rect),
-                                self.texture_resolver.get_uv_rect(&textures.input.colors[1], planes[1].uv_rect),
-                                self.texture_resolver.get_uv_rect(&textures.input.colors[2], planes[2].uv_rect),
+                                self.texture_resolver.get_uv_rect(&textures.colors[0], planes[0].uv_rect),
+                                self.texture_resolver.get_uv_rect(&textures.colors[1], planes[1].uv_rect),
+                                self.texture_resolver.get_uv_rect(&textures.colors[2], planes[2].uv_rect),
                             ];
 
                             (
@@ -3124,15 +4861,15 @@ impl Renderer {
                                     color_space,
                                     format,
                                     rescale,
+                                    [
+                                        planes[0].texture_layer as f32,
+                                        planes[1].texture_layer as f32,
+                                        planes[2].texture_layer as f32,
+                                    ],
                                     uv_rects,
                                 ),
                                 textures,
-                                (
-                                    CompositeSurfaceFormat::Yuv,
-                                    surface.image_buffer_kind,
-                                    CompositeFeatures::empty(),
-                                    None
-                                ),
+                                (CompositeSurfaceFormat::Yuv, surface.image_buffer_kind),
                             )
                         },
                         ResolvedExternalSurfaceColorData::Rgb{ ref plane, flip_y, .. } => {
@@ -3143,43 +4880,22 @@ impl Renderer {
                                 uv_rect.uv0.y = uv_rect.uv1.y;
                                 uv_rect.uv1.y = y;
                             }
-                            let instance = CompositeInstance::new_rgb(
-                                tile.rect,
-                                clip_rect,
-                                PremultipliedColorF::WHITE,
-                                tile.z_id,
-                                uv_rect,
-                            );
-                            let features = instance.get_rgb_features();
+
                             (
-                                instance,
-                                BatchTextures::composite_rgb(plane.texture),
-                                (
-                                    CompositeSurfaceFormat::Rgba,
-                                    surface.image_buffer_kind,
-                                    features,
-                                    Some(self.texture_resolver.get_texture_size(&plane.texture).to_f32()),
+                                CompositeInstance::new_rgb(
+                                    tile.rect,
+                                    clip_rect,
+                                    PremultipliedColorF::WHITE,
+                                    plane.texture_layer as f32,
+                                    tile.z_id,
+                                    uv_rect,
                                 ),
+                                BatchTextures::color(plane.texture),
+                                (CompositeSurfaceFormat::Rgba, surface.image_buffer_kind),
                             )
                         },
                     }
                 }
-                CompositeTileSurface::Clear => {
-                    let dummy = TextureSource::Dummy;
-                    let image_buffer_kind = dummy.image_buffer_kind();
-                    let instance = CompositeInstance::new(
-                        tile.rect,
-                        clip_rect,
-                        PremultipliedColorF::BLACK,
-                        tile.z_id,
-                    );
-                    let features = instance.get_rgb_features();
-                    (
-                        instance,
-                        BatchTextures::composite_rgb(dummy),
-                        (CompositeSurfaceFormat::Rgba, image_buffer_kind, features, None),
-                    )
-                }
                 CompositeTileSurface::Texture { surface: ResolvedSurfaceTexture::Native { .. } } => {
                     unreachable!("bug: found native surface in simple composite path");
                 }
@@ -3204,11 +4920,10 @@ impl Renderer {
             if shader_params != current_shader_params {
                 self.shaders
                     .borrow_mut()
-                    .get_composite_shader(shader_params.0, shader_params.1, shader_params.2)
+                    .get_composite_shader(shader_params.0, shader_params.1)
                     .bind(
                         &mut self.device,
                         projection,
-                        shader_params.3,
                         &mut self.renderer_errors
                     );
 
@@ -3239,142 +4954,145 @@ impl Renderer {
     fn composite_simple(
         &mut self,
         composite_state: &CompositeState,
+        clear_framebuffer: bool,
         draw_target: DrawTarget,
         projection: &default::Transform3D<f32>,
         results: &mut RenderResults,
-        partial_present_mode: Option<PartialPresentMode>,
+        max_partial_present_rects: usize,
+        draw_previous_partial_present_regions: bool,
     ) {
-        let _gm = self.gpu_profiler.start_marker("framebuffer");
-        let _timer = self.gpu_profiler.start_timer(GPU_TAG_COMPOSITE);
+        let _gm = self.gpu_profile.start_marker("framebuffer");
+        let _timer = self.gpu_profile.start_timer(GPU_TAG_COMPOSITE);
 
         self.device.bind_draw_target(draw_target);
-        self.device.disable_depth_write();
-        self.device.disable_depth();
-
-        // If using KHR_partial_update, call eglSetDamageRegion.
-        // This must be called exactly once per frame, and prior to any rendering to the main
-        // framebuffer. Additionally, on Mali-G77 we encountered rendering issues when calling
-        // this earlier in the frame, during offscreen render passes. So call it now, immediately
-        // before rendering to the main framebuffer. See bug 1685276 for details.
-        if let Some(partial_present) = self.compositor_config.partial_present() {
-            if let Some(PartialPresentMode::Single { dirty_rect }) = partial_present_mode {
-                partial_present.set_buffer_damage_region(&[dirty_rect.to_i32()]);
-            }
-        }
-
-        let cap = composite_state.tiles.len();
+        self.device.enable_depth(DepthFunction::LessEqual);
+        self.device.enable_depth_write();
 
-        let mut occlusion = occlusion::FrontToBackBuilder::with_capacity(cap, cap);
-        let mut clear_tiles = Vec::new();
+        // Determine the partial present mode for this frame, which is used during
+        // framebuffer clears and calculating the clip rect for each tile that is drawn.
+        let mut partial_present_mode = None;
 
-        for (idx, tile) in composite_state.tiles.iter().enumerate() {
-            // Clear tiles overwrite whatever is under them, so they are treated as opaque.
-            let is_opaque = tile.kind != TileKind::Alpha;
+        if max_partial_present_rects > 0 {
+            // We can only use partial present if we have valid dirty rects and the
+            // client hasn't reset partial present state since last frame.
+            if composite_state.dirty_rects_are_valid && !self.force_redraw {
+                let mut combined_dirty_rect = DeviceRect::zero();
 
-            // Determine a clip rect to apply to this tile, depending on what
-            // the partial present mode is.
-            let partial_clip_rect = match partial_present_mode {
-                Some(PartialPresentMode::Single { dirty_rect }) => dirty_rect.to_box2d(),
-                None => tile.rect.to_box2d(),
-            };
+                // Work out how many dirty rects WR produced, and if that's more than
+                // what the device supports.
+                for tile in composite_state.opaque_tiles.iter().chain(composite_state.alpha_tiles.iter()) {
+                    let dirty_rect = tile.dirty_rect.translate(tile.rect.origin.to_vector());
+                    combined_dirty_rect = combined_dirty_rect.union(&dirty_rect);
+                }
 
-            // Simple compositor needs the valid rect in device space to match clip rect
-            let valid_device_rect = tile.valid_rect.translate(
-                tile.rect.origin.to_vector()
-            ).to_box2d();
+                let combined_dirty_rect = combined_dirty_rect.round();
+                let combined_dirty_rect_i32 = combined_dirty_rect.to_i32();
+                // If nothing has changed, don't return any dirty rects at all (the client
+                // can use this as a signal to skip present completely).
+                if !combined_dirty_rect.is_empty() {
+                    results.dirty_rects.push(combined_dirty_rect_i32);
+                }
 
-            let rect = tile.rect.to_box2d()
-                .intersection_unchecked(&tile.clip_rect.to_box2d())
-                .intersection_unchecked(&partial_clip_rect)
-                .intersection_unchecked(&valid_device_rect);
+                // If the implementation requires manually keeping the buffer consistent,
+                // combine the previous frame's damage for tile clipping.
+                // (Not for the returned region though, that should be from this frame only)
+                partial_present_mode = Some(PartialPresentMode::Single {
+                    dirty_rect: if draw_previous_partial_present_regions {
+                        combined_dirty_rect.union(&self.prev_dirty_rect)
+                    } else { combined_dirty_rect },
+                });
 
-            if rect.is_empty() {
-                continue;
-            }
+                if draw_previous_partial_present_regions {
+                    self.prev_dirty_rect = combined_dirty_rect;
+                }
+            } else {
+                // If we don't have a valid partial present scenario, return a single
+                // dirty rect to the client that covers the entire framebuffer.
+                let fb_rect = DeviceIntRect::new(
+                    DeviceIntPoint::zero(),
+                    draw_target.dimensions(),
+                );
+                results.dirty_rects.push(fb_rect);
 
-            if tile.kind == TileKind::Clear {
-                // Clear tiles are specific to how we render the window buttons on
-                // Windows 8. We can get away with drawing them at the end on top
-                // of everything else, which we do to avoid having to juggle with
-                // the blend state.
-                clear_tiles.push(occlusion::Item { rectangle: rect, key: idx });
-                continue;
+                if draw_previous_partial_present_regions {
+                    self.prev_dirty_rect = fb_rect.to_f32();
+                }
             }
 
-            occlusion.add(&rect, is_opaque, idx);
+            self.force_redraw = false;
         }
 
-        // Clear the framebuffer
-        let clear_color = self.clear_color.map(|color| color.to_array());
+        // Clear the framebuffer, if required
+        if clear_framebuffer {
+            let clear_color = self.clear_color.map(|color| color.to_array());
 
-        match partial_present_mode {
-            Some(PartialPresentMode::Single { dirty_rect }) => {
-                // There is no need to clear if the dirty rect is occluded. Additionally,
-                // on Mali-G77 we have observed artefacts when calling glClear (even with
-                // the empty scissor rect set) after calling eglSetDamageRegion with an
-                // empty damage region. So avoid clearing in that case. See bug 1709548.
-                if !dirty_rect.is_empty() && occlusion.test(&dirty_rect.to_box2d()) {
+            match partial_present_mode {
+                Some(PartialPresentMode::Single { dirty_rect }) => {
                     // We have a single dirty rect, so clear only that
                     self.device.clear_target(clear_color,
-                                             None,
+                                             Some(1.0),
                                              Some(draw_target.to_framebuffer_rect(dirty_rect.to_i32())));
                 }
-            }
-            None => {
-                // Partial present is disabled, so clear the entire framebuffer
-                self.device.clear_target(clear_color,
-                                         None,
-                                         None);
+                None => {
+                    // Partial present is disabled, so clear the entire framebuffer
+                    self.device.clear_target(clear_color,
+                                             Some(1.0),
+                                             None);
+                }
             }
         }
 
         // We are only interested in tiles backed with actual cached pixels so we don't
         // count clear tiles here.
-        let num_tiles = composite_state.tiles
-            .iter()
-            .filter(|tile| tile.kind != TileKind::Clear).count();
-        self.profile.set(profiler::PICTURE_TILES, num_tiles);
-
-        if !occlusion.opaque_items().is_empty() {
-            let opaque_sampler = self.gpu_profiler.start_sampler(GPU_SAMPLER_TAG_OPAQUE);
+        let num_tiles = composite_state.opaque_tiles.len()
+            + composite_state.alpha_tiles.len();
+        self.profile_counters.total_picture_cache_tiles.set(num_tiles);
+
+        // Draw opaque tiles first, front-to-back to get maxmum
+        // z-reject efficiency.
+        if !composite_state.opaque_tiles.is_empty() {
+            let opaque_sampler = self.gpu_profile.start_sampler(GPU_SAMPLER_TAG_OPAQUE);
+            self.device.enable_depth_write();
             self.set_blend(false, FramebufferKind::Main);
             self.draw_tile_list(
-                occlusion.opaque_items().iter(),
-                &composite_state,
+                composite_state.opaque_tiles.iter().rev(),
                 &composite_state.external_surfaces,
                 projection,
+                partial_present_mode,
                 &mut results.stats,
             );
-            self.gpu_profiler.finish_sampler(opaque_sampler);
+            self.gpu_profile.finish_sampler(opaque_sampler);
         }
 
-        // Draw alpha tiles
-        if !occlusion.alpha_items().is_empty() {
-            let transparent_sampler = self.gpu_profiler.start_sampler(GPU_SAMPLER_TAG_TRANSPARENT);
+        if !composite_state.clear_tiles.is_empty() {
+            let transparent_sampler = self.gpu_profile.start_sampler(GPU_SAMPLER_TAG_TRANSPARENT);
+            self.device.disable_depth_write();
             self.set_blend(true, FramebufferKind::Main);
-            self.set_blend_mode_premultiplied_alpha(FramebufferKind::Main);
+            self.device.set_blend_mode_premultiplied_dest_out();
             self.draw_tile_list(
-                occlusion.alpha_items().iter().rev(),
-                &composite_state,
+                composite_state.clear_tiles.iter(),
                 &composite_state.external_surfaces,
                 projection,
+                partial_present_mode,
                 &mut results.stats,
             );
-            self.gpu_profiler.finish_sampler(transparent_sampler);
+            self.gpu_profile.finish_sampler(transparent_sampler);
         }
 
-        if !clear_tiles.is_empty() {
-            let transparent_sampler = self.gpu_profiler.start_sampler(GPU_SAMPLER_TAG_TRANSPARENT);
+        // Draw alpha tiles
+        if !composite_state.alpha_tiles.is_empty() {
+            let transparent_sampler = self.gpu_profile.start_sampler(GPU_SAMPLER_TAG_TRANSPARENT);
+            self.device.disable_depth_write();
             self.set_blend(true, FramebufferKind::Main);
-            self.device.set_blend_mode_premultiplied_dest_out();
+            self.set_blend_mode_premultiplied_alpha(FramebufferKind::Main);
             self.draw_tile_list(
-                clear_tiles.iter(),
-                &composite_state,
+                composite_state.alpha_tiles.iter(),
                 &composite_state.external_surfaces,
                 projection,
+                partial_present_mode,
                 &mut results.stats,
             );
-            self.gpu_profiler.finish_sampler(transparent_sampler);
+            self.gpu_profile.finish_sampler(transparent_sampler);
         }
     }
 
@@ -3382,16 +5100,18 @@ impl Renderer {
         &mut self,
         draw_target: DrawTarget,
         target: &ColorRenderTarget,
+        content_origin: DeviceIntPoint,
         clear_color: Option<[f32; 4]>,
         clear_depth: Option<f32>,
         render_tasks: &RenderTaskGraph,
         projection: &default::Transform3D<f32>,
+        frame_id: GpuFrameId,
         stats: &mut RendererStats,
     ) {
         profile_scope!("draw_color_target");
 
-        self.profile.inc(profiler::COLOR_PASSES);
-        let _gm = self.gpu_profiler.start_marker("color target");
+        self.profile_counters.color_passes.inc();
+        let _gm = self.gpu_profile.start_marker("color target");
 
         // sanity check for the depth buffer
         if let DrawTarget::Texture { with_depth, .. } = draw_target {
@@ -3405,7 +5125,7 @@ impl Renderer {
         };
 
         {
-            let _timer = self.gpu_profiler.start_timer(GPU_TAG_SETUP_TARGET);
+            let _timer = self.gpu_profile.start_timer(GPU_TAG_SETUP_TARGET);
             self.device.bind_draw_target(draw_target);
             self.device.disable_depth();
             self.set_blend(false, framebuffer_kind);
@@ -3436,7 +5156,7 @@ impl Renderer {
                     // target slices were minimum 2048x2048. Now that we size
                     // them adaptively, this may be less of a win (except perhaps
                     // on a mostly-unused last slice of a large texture array).
-                    Some(draw_target.to_framebuffer_rect(target.used_rect))
+                    Some(draw_target.to_framebuffer_rect(target.used_rect()))
                 }
                 DrawTarget::Texture { .. } | DrawTarget::External { .. } => {
                     None
@@ -3456,9 +5176,7 @@ impl Renderer {
 
         // Handle any blits from the texture cache to this target.
         self.handle_blits(
-            &target.blits,
-            render_tasks,
-            draw_target,
+            &target.blits, render_tasks, draw_target, &content_origin,
         );
 
         // Draw any blurs for this target.
@@ -3468,22 +5186,26 @@ impl Renderer {
         //           fast path blur shaders for common
         //           blur radii with fixed weights.
         if !target.vertical_blurs.is_empty() || !target.horizontal_blurs.is_empty() {
-            let _timer = self.gpu_profiler.start_timer(GPU_TAG_BLUR);
+            let _timer = self.gpu_profile.start_timer(GPU_TAG_BLUR);
 
             self.set_blend(false, framebuffer_kind);
             self.shaders.borrow_mut().cs_blur_rgba8
-                .bind(&mut self.device, projection, None, &mut self.renderer_errors);
+                .bind(&mut self.device, projection, &mut self.renderer_errors);
 
             if !target.vertical_blurs.is_empty() {
-                self.draw_blurs(
+                self.draw_instanced_batch(
                     &target.vertical_blurs,
+                    VertexArrayKind::Blur,
+                    &BatchTextures::no_texture(),
                     stats,
                 );
             }
 
             if !target.horizontal_blurs.is_empty() {
-                self.draw_blurs(
+                self.draw_instanced_batch(
                     &target.horizontal_blurs,
+                    VertexArrayKind::Blur,
+                    &BatchTextures::no_texture(),
                     stats,
                 );
             }
@@ -3508,6 +5230,7 @@ impl Renderer {
             self.draw_alpha_batch_container(
                 alpha_batch_container,
                 draw_target,
+                content_origin,
                 framebuffer_kind,
                 projection,
                 render_tasks,
@@ -3515,27 +5238,47 @@ impl Renderer {
             );
         }
 
-        if clear_depth.is_some() {
-            self.device.invalidate_depth_target();
-        }
-    }
-
-    fn draw_blurs(
-        &mut self,
-        blurs: &FastHashMap<TextureSource, Vec<BlurInstance>>,
-        stats: &mut RendererStats,
-    ) {
-        for (texture, blurs) in blurs {
-            let textures = BatchTextures::composite_rgb(
-                *texture,
-            );
-
-            self.draw_instanced_batch(
-                blurs,
-                VertexArrayKind::Blur,
-                &textures,
-                stats,
-            );
+        // For any registered image outputs on this render target,
+        // get the texture from caller and blit it.
+        for output in &target.outputs {
+            let handler = self.output_image_handler
+                .as_mut()
+                .expect("Found output image, but no handler set!");
+            if let Some((texture_id, output_size)) = handler.lock(output.pipeline_id) {
+                let fbo_id = match self.output_targets.entry(texture_id) {
+                    Entry::Vacant(entry) => {
+                        let fbo_id = self.device.create_fbo_for_external_texture(texture_id);
+                        entry.insert(FrameOutput {
+                            fbo_id,
+                            last_access: frame_id,
+                        });
+                        fbo_id
+                    }
+                    Entry::Occupied(mut entry) => {
+                        let target = entry.get_mut();
+                        target.last_access = frame_id;
+                        target.fbo_id
+                    }
+                };
+                let (src_rect, _) = render_tasks[output.task_id].get_target_rect();
+                if !self.device.surface_origin_is_top_left() {
+                    self.device.blit_render_target_invert_y(
+                        draw_target.into(),
+                        draw_target.to_framebuffer_rect(src_rect.translate(-content_origin.to_vector())),
+                        DrawTarget::External { fbo: fbo_id, size: output_size },
+                        output_size.into(),
+                    );
+                } else {
+                    self.device.blit_render_target(
+                        draw_target.into(),
+                        draw_target.to_framebuffer_rect(src_rect.translate(-content_origin.to_vector())),
+                        DrawTarget::External { fbo: fbo_id, size: output_size },
+                        output_size.into(),
+                        TextureFilter::Linear,
+                    );
+                }
+                handler.unlock(output.pipeline_id);
+            }
         }
     }
 
@@ -3543,7 +5286,6 @@ impl Renderer {
     fn draw_clip_batch_list(
         &mut self,
         list: &ClipBatchList,
-        draw_target: &DrawTarget,
         projection: &default::Transform3D<f32>,
         stats: &mut RendererStats,
     ) {
@@ -3553,85 +5295,72 @@ impl Renderer {
 
         // draw rounded cornered rectangles
         if !list.slow_rectangles.is_empty() {
-            let _gm2 = self.gpu_profiler.start_marker("slow clip rectangles");
+            let _gm2 = self.gpu_profile.start_marker("slow clip rectangles");
             self.shaders.borrow_mut().cs_clip_rectangle_slow.bind(
                 &mut self.device,
                 projection,
-                None,
                 &mut self.renderer_errors,
             );
             self.draw_instanced_batch(
                 &list.slow_rectangles,
-                VertexArrayKind::ClipRect,
-                &BatchTextures::empty(),
+                VertexArrayKind::Clip,
+                &BatchTextures::no_texture(),
                 stats,
             );
         }
         if !list.fast_rectangles.is_empty() {
-            let _gm2 = self.gpu_profiler.start_marker("fast clip rectangles");
+            let _gm2 = self.gpu_profile.start_marker("fast clip rectangles");
             self.shaders.borrow_mut().cs_clip_rectangle_fast.bind(
                 &mut self.device,
                 projection,
-                None,
                 &mut self.renderer_errors,
             );
             self.draw_instanced_batch(
                 &list.fast_rectangles,
-                VertexArrayKind::ClipRect,
-                &BatchTextures::empty(),
+                VertexArrayKind::Clip,
+                &BatchTextures::no_texture(),
                 stats,
             );
         }
-
         // draw box-shadow clips
         for (mask_texture_id, items) in list.box_shadows.iter() {
-            let _gm2 = self.gpu_profiler.start_marker("box-shadows");
-            let textures = BatchTextures::composite_rgb(*mask_texture_id);
+            let _gm2 = self.gpu_profile.start_marker("box-shadows");
+            let textures = BatchTextures {
+                colors: [
+                    *mask_texture_id,
+                    TextureSource::Invalid,
+                    TextureSource::Invalid,
+                ],
+            };
             self.shaders.borrow_mut().cs_clip_box_shadow
-                .bind(&mut self.device, projection, None, &mut self.renderer_errors);
+                .bind(&mut self.device, projection, &mut self.renderer_errors);
             self.draw_instanced_batch(
                 items,
-                VertexArrayKind::ClipBoxShadow,
+                VertexArrayKind::Clip,
                 &textures,
                 stats,
             );
         }
 
         // draw image masks
-        let mut using_scissor = false;
-        for ((mask_texture_id, clip_rect), items) in list.images.iter() {
-            let _gm2 = self.gpu_profiler.start_marker("clip images");
-            // Some image masks may require scissoring to ensure they don't draw
-            // outside their task's target bounds. Axis-aligned primitives will
-            // be clamped inside the shader and should not require scissoring.
-            // TODO: We currently assume scissor state is off by default for
-            // alpha targets here, but in the future we may want to track the
-            // current scissor state so that this can be properly saved and
-            // restored here.
-            if let Some(clip_rect) = clip_rect {
-                if !using_scissor {
-                    self.device.enable_scissor();
-                    using_scissor = true;
-                }
-                let scissor_rect = draw_target.build_scissor_rect(Some(*clip_rect));
-                self.device.set_scissor_rect(scissor_rect);
-            } else if using_scissor {
-                self.device.disable_scissor();
-                using_scissor = false;
-            }
-            let textures = BatchTextures::composite_rgb(*mask_texture_id);
+        for (mask_texture_id, items) in list.images.iter() {
+            let _gm2 = self.gpu_profile.start_marker("clip images");
+            let textures = BatchTextures {
+                colors: [
+                    *mask_texture_id,
+                    TextureSource::Invalid,
+                    TextureSource::Invalid,
+                ],
+            };
             self.shaders.borrow_mut().cs_clip_image
-                .bind(&mut self.device, projection, None, &mut self.renderer_errors);
+                .bind(&mut self.device, projection, &mut self.renderer_errors);
             self.draw_instanced_batch(
                 items,
-                VertexArrayKind::ClipImage,
+                VertexArrayKind::Clip,
                 &textures,
                 stats,
             );
         }
-        if using_scissor {
-            self.device.disable_scissor();
-        }
     }
 
     fn draw_alpha_target(
@@ -3644,88 +5373,41 @@ impl Renderer {
     ) {
         profile_scope!("draw_alpha_target");
 
-        self.profile.inc(profiler::ALPHA_PASSES);
-        let _gm = self.gpu_profiler.start_marker("alpha target");
-        let alpha_sampler = self.gpu_profiler.start_sampler(GPU_SAMPLER_TAG_ALPHA);
+        self.profile_counters.alpha_passes.inc();
+        let _gm = self.gpu_profile.start_marker("alpha target");
+        let alpha_sampler = self.gpu_profile.start_sampler(GPU_SAMPLER_TAG_ALPHA);
 
         {
-            let _timer = self.gpu_profiler.start_timer(GPU_TAG_SETUP_TARGET);
+            let _timer = self.gpu_profile.start_timer(GPU_TAG_SETUP_TARGET);
             self.device.bind_draw_target(draw_target);
             self.device.disable_depth();
             self.device.disable_depth_write();
             self.set_blend(false, FramebufferKind::Other);
 
-            let zero_color = [0.0, 0.0, 0.0, 0.0];
-            let one_color = [1.0, 1.0, 1.0, 1.0];
+            // TODO(gw): Applying a scissor rect and minimal clear here
+            // is a very large performance win on the Intel and nVidia
+            // GPUs that I have tested with. It's possible it may be a
+            // performance penalty on other GPU types - we should test this
+            // and consider different code paths.
 
-            // On some Mali-T devices we have observed crashes in subsequent draw calls
-            // immediately after clearing the alpha render target regions with glClear().
-            // Using the shader to clear the regions avoids the crash. See bug 1638593.
-            if self.clear_alpha_targets_with_quads
-                && !(target.zero_clears.is_empty() && target.one_clears.is_empty())
-            {
-                let zeroes = target.zero_clears
-                    .iter()
-                    .map(|task_id| {
-                        let rect = render_tasks[*task_id].get_target_rect().to_f32();
-                        ClearInstance {
-                            rect: [
-                                rect.origin.x, rect.origin.y,
-                                rect.size.width, rect.size.height,
-                            ],
-                            color: zero_color,
-                        }
-                    });
-
-                let ones = target.one_clears
-                    .iter()
-                    .map(|task_id| {
-                        let rect = render_tasks[*task_id].get_target_rect().to_f32();
-                        ClearInstance {
-                            rect: [
-                                rect.origin.x, rect.origin.y,
-                                rect.size.width, rect.size.height,
-                            ],
-                            color: one_color,
-                        }
-                    });
-
-                let instances = zeroes.chain(ones).collect::<Vec<_>>();
-                self.shaders.borrow_mut().ps_clear.bind(
-                    &mut self.device,
-                    &projection,
+            let zero_color = [0.0, 0.0, 0.0, 0.0];
+            for &task_id in &target.zero_clears {
+                let (rect, _) = render_tasks[task_id].get_target_rect();
+                self.device.clear_target(
+                    Some(zero_color),
                     None,
-                    &mut self.renderer_errors,
+                    Some(draw_target.to_framebuffer_rect(rect)),
                 );
-                self.draw_instanced_batch(
-                    &instances,
-                    VertexArrayKind::Clear,
-                    &BatchTextures::empty(),
-                    stats,
-                );
-            } else {
-                // TODO(gw): Applying a scissor rect and minimal clear here
-                // is a very large performance win on the Intel and nVidia
-                // GPUs that I have tested with. It's possible it may be a
-                // performance penalty on other GPU types - we should test this
-                // and consider different code paths.
-                for &task_id in &target.zero_clears {
-                    let rect = render_tasks[task_id].get_target_rect();
-                    self.device.clear_target(
-                        Some(zero_color),
-                        None,
-                        Some(draw_target.to_framebuffer_rect(rect)),
-                    );
-                }
+            }
 
-                for &task_id in &target.one_clears {
-                    let rect = render_tasks[task_id].get_target_rect();
-                    self.device.clear_target(
-                        Some(one_color),
-                        None,
-                        Some(draw_target.to_framebuffer_rect(rect)),
-                    );
-                }
+            let one_color = [1.0, 1.0, 1.0, 1.0];
+            for &task_id in &target.one_clears {
+                let (rect, _) = render_tasks[task_id].get_target_rect();
+                self.device.clear_target(
+                    Some(one_color),
+                    None,
+                    Some(draw_target.to_framebuffer_rect(rect)),
+                );
             }
         }
 
@@ -3736,21 +5418,25 @@ impl Renderer {
         //           fast path blur shaders for common
         //           blur radii with fixed weights.
         if !target.vertical_blurs.is_empty() || !target.horizontal_blurs.is_empty() {
-            let _timer = self.gpu_profiler.start_timer(GPU_TAG_BLUR);
+            let _timer = self.gpu_profile.start_timer(GPU_TAG_BLUR);
 
             self.shaders.borrow_mut().cs_blur_a8
-                .bind(&mut self.device, projection, None, &mut self.renderer_errors);
+                .bind(&mut self.device, projection, &mut self.renderer_errors);
 
             if !target.vertical_blurs.is_empty() {
-                self.draw_blurs(
+                self.draw_instanced_batch(
                     &target.vertical_blurs,
+                    VertexArrayKind::Blur,
+                    &BatchTextures::no_texture(),
                     stats,
                 );
             }
 
             if !target.horizontal_blurs.is_empty() {
-                self.draw_blurs(
+                self.draw_instanced_batch(
                     &target.horizontal_blurs,
+                    VertexArrayKind::Blur,
+                    &BatchTextures::no_texture(),
                     stats,
                 );
             }
@@ -3764,7 +5450,7 @@ impl Renderer {
 
         // Draw the clip items into the tiled alpha mask.
         {
-            let _timer = self.gpu_profiler.start_timer(GPU_TAG_CACHE_CLIP);
+            let _timer = self.gpu_profile.start_timer(GPU_TAG_CACHE_CLIP);
 
             // TODO(gw): Consider grouping multiple clip masks per shader
             //           invocation here to reduce memory bandwith further?
@@ -3775,7 +5461,6 @@ impl Renderer {
             self.set_blend(false, FramebufferKind::Other);
             self.draw_clip_batch_list(
                 &target.clip_batcher.primary_clips,
-                &draw_target,
                 projection,
                 stats,
             );
@@ -3786,49 +5471,58 @@ impl Renderer {
             self.set_blend_mode_multiply(FramebufferKind::Other);
             self.draw_clip_batch_list(
                 &target.clip_batcher.secondary_clips,
-                &draw_target,
                 projection,
                 stats,
             );
         }
 
-        self.gpu_profiler.finish_sampler(alpha_sampler);
+        self.gpu_profile.finish_sampler(alpha_sampler);
     }
 
     fn draw_texture_cache_target(
         &mut self,
         texture: &CacheTextureId,
+        layer: LayerIndex,
         target: &TextureCacheRenderTarget,
         render_tasks: &RenderTaskGraph,
         stats: &mut RendererStats,
     ) {
         profile_scope!("draw_texture_cache_target");
 
+        let texture_source = TextureSource::TextureCache(*texture, Swizzle::default());
+        let projection = {
+            let (texture, _) = self.texture_resolver
+                .resolve(&texture_source)
+                .expect("BUG: invalid target texture");
+            let target_size = texture.get_dimensions();
+
+            Transform3D::ortho(
+                0.0,
+                target_size.width as f32,
+                0.0,
+                target_size.height as f32,
+                self.device.ortho_near_plane(),
+                self.device.ortho_far_plane(),
+            )
+        };
+
         self.device.disable_depth();
         self.device.disable_depth_write();
 
         self.set_blend(false, FramebufferKind::Other);
 
-        let texture = &self.texture_resolver.texture_cache_map[texture];
-        let target_size = texture.get_dimensions();
-
-        let projection = Transform3D::ortho(
-            0.0,
-            target_size.width as f32,
-            0.0,
-            target_size.height as f32,
-            self.device.ortho_near_plane(),
-            self.device.ortho_far_plane(),
-        );
-
-        let draw_target = DrawTarget::from_texture(
-            texture,
-            false,
-        );
-        self.device.bind_draw_target(draw_target);
-
         {
-            let _timer = self.gpu_profiler.start_timer(GPU_TAG_CLEAR);
+            let _timer = self.gpu_profile.start_timer(GPU_TAG_CLEAR);
+
+            let (texture, _) = self.texture_resolver
+                .resolve(&texture_source)
+                .expect("BUG: invalid target texture");
+            let draw_target = DrawTarget::from_texture(
+                texture,
+                layer,
+                false,
+            );
+            self.device.bind_draw_target(draw_target);
 
             self.device.disable_depth();
             self.device.disable_depth_write();
@@ -3849,13 +5543,12 @@ impl Renderer {
                 self.shaders.borrow_mut().ps_clear.bind(
                     &mut self.device,
                     &projection,
-                    None,
                     &mut self.renderer_errors,
                 );
                 self.draw_instanced_batch(
                     &instances,
                     VertexArrayKind::Clear,
-                    &BatchTextures::empty(),
+                    &BatchTextures::no_texture(),
                     stats,
                 );
             } else {
@@ -3870,9 +5563,7 @@ impl Renderer {
 
             // Handle any blits to this texture from child tasks.
             self.handle_blits(
-                &target.blits,
-                render_tasks,
-                draw_target,
+                &target.blits, render_tasks, draw_target, &DeviceIntPoint::zero(),
             );
         }
 
@@ -3880,7 +5571,7 @@ impl Renderer {
         if !target.border_segments_solid.is_empty() ||
            !target.border_segments_complex.is_empty()
         {
-            let _timer = self.gpu_profiler.start_timer(GPU_TAG_CACHE_BORDER);
+            let _timer = self.gpu_profile.start_timer(GPU_TAG_CACHE_BORDER);
 
             self.set_blend(true, FramebufferKind::Other);
             self.set_blend_mode_premultiplied_alpha(FramebufferKind::Other);
@@ -3889,14 +5580,13 @@ impl Renderer {
                 self.shaders.borrow_mut().cs_border_solid.bind(
                     &mut self.device,
                     &projection,
-                    None,
                     &mut self.renderer_errors,
                 );
 
                 self.draw_instanced_batch(
                     &target.border_segments_solid,
                     VertexArrayKind::Border,
-                    &BatchTextures::empty(),
+                    &BatchTextures::no_texture(),
                     stats,
                 );
             }
@@ -3905,14 +5595,13 @@ impl Renderer {
                 self.shaders.borrow_mut().cs_border_segment.bind(
                     &mut self.device,
                     &projection,
-                    None,
                     &mut self.renderer_errors,
                 );
 
                 self.draw_instanced_batch(
                     &target.border_segments_complex,
                     VertexArrayKind::Border,
-                    &BatchTextures::empty(),
+                    &BatchTextures::no_texture(),
                     stats,
                 );
             }
@@ -3922,7 +5611,7 @@ impl Renderer {
 
         // Draw any line decorations for this target.
         if !target.line_decorations.is_empty() {
-            let _timer = self.gpu_profiler.start_timer(GPU_TAG_CACHE_LINE_DECORATION);
+            let _timer = self.gpu_profile.start_timer(GPU_TAG_CACHE_LINE_DECORATION);
 
             self.set_blend(true, FramebufferKind::Other);
             self.set_blend_mode_premultiplied_alpha(FramebufferKind::Other);
@@ -3930,130 +5619,55 @@ impl Renderer {
             self.shaders.borrow_mut().cs_line_decoration.bind(
                 &mut self.device,
                 &projection,
-                None,
                 &mut self.renderer_errors,
             );
 
             self.draw_instanced_batch(
                 &target.line_decorations,
                 VertexArrayKind::LineDecoration,
-                &BatchTextures::empty(),
-                stats,
-            );
-
-            self.set_blend(false, FramebufferKind::Other);
-        }
-
-        // Draw any fast path linear gradients for this target.
-        if !target.fast_linear_gradients.is_empty() {
-            let _timer = self.gpu_profiler.start_timer(GPU_TAG_CACHE_FAST_LINEAR_GRADIENT);
-
-            self.set_blend(false, FramebufferKind::Other);
-
-            self.shaders.borrow_mut().cs_fast_linear_gradient.bind(
-                &mut self.device,
-                &projection,
-                None,
-                &mut self.renderer_errors,
-            );
-
-            self.draw_instanced_batch(
-                &target.fast_linear_gradients,
-                VertexArrayKind::FastLinearGradient,
-                &BatchTextures::empty(),
-                stats,
-            );
-        }
-
-        // Draw any linear gradients for this target.
-        if !target.linear_gradients.is_empty() {
-            let _timer = self.gpu_profiler.start_timer(GPU_TAG_CACHE_LINEAR_GRADIENT);
-
-            self.set_blend(false, FramebufferKind::Other);
-
-            self.shaders.borrow_mut().cs_linear_gradient.bind(
-                &mut self.device,
-                &projection,
-                None,
-                &mut self.renderer_errors,
-            );
-
-            if let Some(ref texture) = self.dither_matrix_texture {
-                self.device.bind_texture(TextureSampler::Dither, texture, Swizzle::default());
-            }
-
-            self.draw_instanced_batch(
-                &target.linear_gradients,
-                VertexArrayKind::LinearGradient,
-                &BatchTextures::empty(),
+                &BatchTextures::no_texture(),
                 stats,
             );
-        }
-
-        // Draw any radial gradients for this target.
-        if !target.radial_gradients.is_empty() {
-            let _timer = self.gpu_profiler.start_timer(GPU_TAG_CACHE_RADIAL_GRADIENT);
 
             self.set_blend(false, FramebufferKind::Other);
-
-            self.shaders.borrow_mut().cs_radial_gradient.bind(
-                &mut self.device,
-                &projection,
-                None,
-                &mut self.renderer_errors,
-            );
-
-            if let Some(ref texture) = self.dither_matrix_texture {
-                self.device.bind_texture(TextureSampler::Dither, texture, Swizzle::default());
-            }
-
-            self.draw_instanced_batch(
-                &target.radial_gradients,
-                VertexArrayKind::RadialGradient,
-                &BatchTextures::empty(),
-                stats,
-            );
         }
 
-        // Draw any conic gradients for this target.
-        if !target.conic_gradients.is_empty() {
-            let _timer = self.gpu_profiler.start_timer(GPU_TAG_CACHE_CONIC_GRADIENT);
+        // Draw any gradients for this target.
+        if !target.gradients.is_empty() {
+            let _timer = self.gpu_profile.start_timer(GPU_TAG_CACHE_GRADIENT);
 
             self.set_blend(false, FramebufferKind::Other);
 
-            self.shaders.borrow_mut().cs_conic_gradient.bind(
+            self.shaders.borrow_mut().cs_gradient.bind(
                 &mut self.device,
                 &projection,
-                None,
                 &mut self.renderer_errors,
             );
 
-            if let Some(ref texture) = self.dither_matrix_texture {
-                self.device.bind_texture(TextureSampler::Dither, texture, Swizzle::default());
-            }
-
             self.draw_instanced_batch(
-                &target.conic_gradients,
-                VertexArrayKind::ConicGradient,
-                &BatchTextures::empty(),
+                &target.gradients,
+                VertexArrayKind::Gradient,
+                &BatchTextures::no_texture(),
                 stats,
             );
         }
 
         // Draw any blurs for this target.
         if !target.horizontal_blurs.is_empty() {
-            let _timer = self.gpu_profiler.start_timer(GPU_TAG_BLUR);
+            let _timer = self.gpu_profile.start_timer(GPU_TAG_BLUR);
 
             {
                 let mut shaders = self.shaders.borrow_mut();
                 match target.target_kind {
                     RenderTargetKind::Alpha => &mut shaders.cs_blur_a8,
                     RenderTargetKind::Color => &mut shaders.cs_blur_rgba8,
-                }.bind(&mut self.device, &projection, None, &mut self.renderer_errors);
+                }.bind(&mut self.device, &projection, &mut self.renderer_errors);
             }
 
-            self.draw_blurs(
+            self.draw_instanced_batch(
                 &target.horizontal_blurs,
+                VertexArrayKind::Blur,
+                &BatchTextures::no_texture(),
                 stats,
             );
         }
@@ -4081,8 +5695,8 @@ impl Renderer {
             debug_commands: Vec::new(),
         };
 
-        for (i, deferred_resolve) in deferred_resolves.iter().enumerate() {
-            self.gpu_profiler.place_marker("deferred resolve");
+        for deferred_resolve in deferred_resolves {
+            self.gpu_profile.place_marker("deferred resolve");
             let props = &deferred_resolve.image_properties;
             let ext_image = props
                 .external_image
@@ -4131,7 +5745,7 @@ impl Renderer {
 
             self.texture_resolver
                 .external_images
-                .insert(DeferredResolveIndex(i as u32), texture);
+                .insert((ext_image.id, ext_image.channel_index), texture);
 
             list.updates.push(GpuCacheUpdate::Copy {
                 block_index: list.blocks.len(),
@@ -4145,145 +5759,112 @@ impl Renderer {
         Some(list)
     }
 
-    fn unlock_external_images(
-        &mut self,
-        deferred_resolves: &[DeferredResolve],
-    ) {
+    fn unlock_external_images(&mut self) {
         if !self.texture_resolver.external_images.is_empty() {
             let handler = self.external_image_handler
                 .as_mut()
                 .expect("Found external image, but no handler set!");
 
-            for (index, _) in self.texture_resolver.external_images.drain() {
-                let props = &deferred_resolves[index.0 as usize].image_properties;
-                let ext_image = props
-                    .external_image
-                    .expect("BUG: Deferred resolves must be external images!");
-                handler.unlock(ext_image.id, ext_image.channel_index);
+            for (ext_data, _) in self.texture_resolver.external_images.drain() {
+                handler.unlock(ext_data.0, ext_data.1);
             }
         }
     }
 
-    /// Update the dirty rects based on current compositing mode and config
-    // TODO(gw): This can be tidied up significantly once the Draw compositor
-    //           is implemented in terms of the compositor trait.
-    fn calculate_dirty_rects(
+    /// Allocates a texture to be used as the output for a rendering pass.
+    ///
+    /// We make an effort to reuse render targe textures across passes and
+    /// across frames when the format and dimensions match. Because we use
+    /// immutable storage, we can't resize textures.
+    ///
+    /// We could consider approaches to re-use part of a larger target, if
+    /// available. However, we'd need to be careful about eviction. Currently,
+    /// render targets are freed if they haven't been used in 30 frames. If we
+    /// used partial targets, we'd need to track how _much_ of the target has
+    /// been used in the last 30 frames, since we could otherwise end up
+    /// keeping an enormous target alive indefinitely by constantly using it
+    /// in situations where a much smaller target would suffice.
+    fn allocate_target_texture<T: RenderTarget>(
         &mut self,
-        buffer_age: usize,
-        composite_state: &CompositeState,
-        draw_target_dimensions: DeviceIntSize,
-        results: &mut RenderResults,
-    ) -> Option<PartialPresentMode> {
-        let mut partial_present_mode = None;
-
-        let (max_partial_present_rects, draw_previous_partial_present_regions) = match self.current_compositor_kind {
-            CompositorKind::Native { .. } => {
-                // Assume that we can return a single dirty rect for native
-                // compositor for now, and that there is no buffer-age functionality.
-                // These params can be exposed by the compositor capabilities struct
-                // as the Draw compositor is ported to use it.
-                (1, false)
-            }
-            CompositorKind::Draw { draw_previous_partial_present_regions, max_partial_present_rects } => {
-                (max_partial_present_rects, draw_previous_partial_present_regions)
-            }
-        };
-
-        if max_partial_present_rects > 0 {
-            let prev_frames_damage_rect = if let Some(..) = self.compositor_config.partial_present() {
-                self.buffer_damage_tracker
-                    .get_damage_rect(buffer_age)
-                    .or_else(|| Some(DeviceRect::from_size(draw_target_dimensions.to_f32())))
-            } else {
-                None
-            };
-
-            let can_use_partial_present =
-                composite_state.dirty_rects_are_valid &&
-                !self.force_redraw &&
-                !(prev_frames_damage_rect.is_none() && draw_previous_partial_present_regions) &&
-                !self.debug_overlay_state.is_enabled;
-
-            if can_use_partial_present {
-                let mut combined_dirty_rect = DeviceRect::zero();
-
-                // Work out how many dirty rects WR produced, and if that's more than
-                // what the device supports.
-                for tile in &composite_state.tiles {
-                    if tile.kind == TileKind::Clear {
-                        continue;
-                    }
-                    let tile_dirty_rect = tile.dirty_rect.translate(tile.rect.origin.to_vector());
-                    let transformed_dirty_rect = if let Some(transform) = tile.transform {
-                        transform.outer_transformed_rect(&tile_dirty_rect)
-                    } else {
-                        Some(tile_dirty_rect)
-                    };
-
-                    if let Some(dirty_rect) = transformed_dirty_rect {
-                        combined_dirty_rect = combined_dirty_rect.union(&dirty_rect);
-                    }
-                }
-
-                let combined_dirty_rect = combined_dirty_rect.round();
-                let combined_dirty_rect_i32 = combined_dirty_rect.to_i32();
-                // Return this frame's dirty region. If nothing has changed, don't return any dirty
-                // rects at all (the client can use this as a signal to skip present completely).
-                if !combined_dirty_rect.is_empty() {
-                    results.dirty_rects.push(combined_dirty_rect_i32);
-                }
-
-                // Track this frame's dirty region, for calculating subsequent frames' damage.
-                if draw_previous_partial_present_regions {
-                    self.buffer_damage_tracker.push_dirty_rect(&combined_dirty_rect);
-                }
+        list: &mut RenderTargetList<T>,
+        counters: &mut FrameProfileCounters,
+    ) -> Option<ActiveTexture> {
+        if list.targets.is_empty() {
+            return None
+        }
+
+        // Get a bounding rect of all the layers, and round it up to a multiple
+        // of 256. This improves render target reuse when resizing the window,
+        // since we don't need to create a new render target for each slightly-
+        // larger frame.
+        let mut bounding_rect = DeviceIntRect::zero();
+        for t in list.targets.iter() {
+            bounding_rect = t.used_rect().union(&bounding_rect);
+        }
+        debug_assert_eq!(bounding_rect.origin, DeviceIntPoint::zero());
+        let dimensions = DeviceIntSize::new(
+            (bounding_rect.size.width + 255) & !255,
+            (bounding_rect.size.height + 255) & !255,
+        );
 
-                // If the implementation requires manually keeping the buffer consistent,
-                // then we must combine this frame's dirty region with that of previous frames
-                // to determine the total_dirty_rect. The is used to determine what region we
-                // render to, and is what we send to the compositor as the buffer damage region
-                // (eg for KHR_partial_update).
-                let total_dirty_rect = if draw_previous_partial_present_regions {
-                    combined_dirty_rect.union(&prev_frames_damage_rect.unwrap())
-                } else {
-                    combined_dirty_rect
-                };
+        counters.targets_used.inc();
 
-                partial_present_mode = Some(PartialPresentMode::Single {
-                    dirty_rect: total_dirty_rect,
-                });
-            } else {
-                // If we don't have a valid partial present scenario, return a single
-                // dirty rect to the client that covers the entire framebuffer.
-                let fb_rect = DeviceIntRect::new(
-                    DeviceIntPoint::zero(),
-                    draw_target_dimensions,
-                );
-                results.dirty_rects.push(fb_rect);
-
-                if draw_previous_partial_present_regions {
-                    self.buffer_damage_tracker.push_dirty_rect(&fb_rect.to_f32());
+        // Try finding a match in the existing pool. If there's no match, we'll
+        // create a new texture.
+        let selector = TargetSelector {
+            size: dimensions,
+            num_layers: list.targets.len(),
+            format: list.format,
+        };
+        let index = self.texture_resolver.render_target_pool
+            .iter()
+            .position(|texture| {
+                selector == TargetSelector {
+                    size: texture.get_dimensions(),
+                    num_layers: texture.get_layer_count() as usize,
+                    format: texture.get_format(),
                 }
-            }
+            });
 
-            self.force_redraw = false;
-        }
+        let rt_info = RenderTargetInfo { has_depth: list.needs_depth() };
+        let texture = if let Some(idx) = index {
+            let mut t = self.texture_resolver.render_target_pool.swap_remove(idx);
+            self.device.reuse_render_target::<u8>(&mut t, rt_info);
+            t
+        } else {
+            counters.targets_created.inc();
+            self.device.create_texture(
+                TextureTarget::Array,
+                list.format,
+                dimensions.width,
+                dimensions.height,
+                TextureFilter::Linear,
+                Some(rt_info),
+                list.targets.len() as _,
+            )
+        };
 
-        partial_present_mode
+        list.check_ready(&texture);
+        Some(ActiveTexture {
+            texture,
+            saved_index: list.saved_index.clone(),
+        })
     }
 
     fn bind_frame_data(&mut self, frame: &mut Frame) {
         profile_scope!("bind_frame_data");
 
-        let _timer = self.gpu_profiler.start_timer(GPU_TAG_SETUP_DATA);
+        let _timer = self.gpu_profile.start_timer(GPU_TAG_SETUP_DATA);
 
         self.vertex_data_textures[self.current_vertex_data_textures].update(
             &mut self.device,
-            &mut self.texture_upload_pbo_pool,
             frame,
         );
         self.current_vertex_data_textures =
             (self.current_vertex_data_textures + 1) % VERTEX_DATA_TEXTURE_COUNT;
+
+        debug_assert!(self.texture_resolver.prev_pass_alpha.is_none());
+        debug_assert!(self.texture_resolver.prev_pass_color.is_none());
     }
 
     fn update_native_surfaces(&mut self) {
@@ -4297,17 +5878,9 @@ impl Renderer {
                             let _inserted = self.allocated_native_surfaces.insert(id);
                             debug_assert!(_inserted, "bug: creating existing surface");
                             compositor.create_surface(
-                                    id,
-                                    virtual_offset,
-                                    tile_size,
-                                    is_opaque,
-                            );
-                        }
-                        NativeSurfaceOperationDetails::CreateExternalSurface { id, is_opaque } => {
-                            let _inserted = self.allocated_native_surfaces.insert(id);
-                            debug_assert!(_inserted, "bug: creating existing surface");
-                            compositor.create_external_surface(
                                 id,
+                                virtual_offset,
+                                tile_size,
                                 is_opaque,
                             );
                         }
@@ -4322,9 +5895,6 @@ impl Renderer {
                         NativeSurfaceOperationDetails::DestroyTile { id } => {
                             compositor.destroy_tile(id);
                         }
-                        NativeSurfaceOperationDetails::AttachExternalImage { id, external_image } => {
-                            compositor.attach_external_image(id, external_image);
-                        }
                     }
                 }
             }
@@ -4340,14 +5910,15 @@ impl Renderer {
         &mut self,
         frame: &mut Frame,
         device_size: Option<DeviceIntSize>,
-        buffer_age: usize,
+        frame_id: GpuFrameId,
         results: &mut RenderResults,
+        clear_framebuffer: bool,
     ) {
         profile_scope!("draw_frame");
 
         // These markers seem to crash a lot on Android, see bug 1559834
         #[cfg(not(target_os = "android"))]
-        let _gm = self.gpu_profiler.start_marker("draw frame");
+        let _gm = self.gpu_profile.start_marker("draw frame");
 
         if frame.passes.is_empty() {
             frame.has_been_rendered = true;
@@ -4360,341 +5931,382 @@ impl Renderer {
 
         self.bind_frame_data(frame);
 
-        // Determine the present mode and dirty rects, if device_size
-        // is Some(..). If it's None, no composite will occur and only
-        // picture cache and texture cache targets will be updated.
-        // TODO(gw): Split Frame so that it's clearer when a composite
-        //           is occurring.
-        let present_mode = device_size.and_then(|device_size| {
-            self.calculate_dirty_rects(
-                buffer_age,
-                &frame.composite_state,
-                device_size,
-                results,
-            )
-        });
-
-        // If we have a native OS compositor, then make use of that interface to
-        // specify how to composite each of the picture cache surfaces. First, we
-        // need to find each tile that may be bound and updated later in the frame
-        // and invalidate it so that the native render compositor knows that these
-        // tiles can't be composited early. Next, after all such tiles have been
-        // invalidated, then we queue surfaces for native composition by the render
-        // compositor before we actually update the tiles. This allows the render
-        // compositor to start early composition while the tiles are updating.
-        if let CompositorKind::Native { .. } = self.current_compositor_kind {
-            let compositor = self.compositor_config.compositor().unwrap();
-            // Invalidate any native surface tiles that might be updated by passes.
-            if !frame.has_been_rendered {
-                for tile in &frame.composite_state.tiles {
-                    if tile.kind == TileKind::Clear {
-                        continue;
-                    }
-                    if !tile.dirty_rect.is_empty() {
-                        if let CompositeTileSurface::Texture { surface: ResolvedSurfaceTexture::Native { id, .. } } =
-                            tile.surface {
-                            let valid_rect = tile.valid_rect
-                                .round()
-                                .to_i32();
-                            compositor.invalidate_tile(id, valid_rect);
-                        }
-                    }
-                }
-            }
-            // Ensure any external surfaces that might be used during early composition
-            // are invalidated first so that the native compositor can properly schedule
-            // composition to happen only when the external surface is updated.
-            // See update_external_native_surfaces for more details.
-            for surface in &frame.composite_state.external_surfaces {
-                if let Some((native_surface_id, size)) = surface.update_params {
-                    let surface_rect = size.into();
-                    compositor.invalidate_tile(NativeTileId { surface_id: native_surface_id, x: 0, y: 0 }, surface_rect);
-                }
-            }
-            // Finally queue native surfaces for early composition, if applicable. By now,
-            // we have already invalidated any tiles that such surfaces may depend upon, so
-            // the native render compositor can keep track of when to actually schedule
-            // composition as surfaces are updated.
-            if device_size.is_some() {
-                frame.composite_state.composite_native(
-                    &results.dirty_rects,
-                    &mut **compositor,
-                );
-            }
-        }
-
         for (_pass_index, pass) in frame.passes.iter_mut().enumerate() {
             #[cfg(not(target_os = "android"))]
-            let _gm = self.gpu_profiler.start_marker(&format!("pass {}", _pass_index));
-
-            profile_scope!("offscreen target");
-
-            // If this frame has already been drawn, then any texture
-            // cache targets have already been updated and can be
-            // skipped this time.
-            if !frame.has_been_rendered {
-                for (&texture_id, target) in &pass.texture_cache {
-                    self.draw_texture_cache_target(
-                        &texture_id,
-                        target,
-                        &frame.render_tasks,
-                        &mut results.stats,
-                    );
-                }
+            let _gm = self.gpu_profile.start_marker(&format!("pass {}", _pass_index));
 
-                if !pass.picture_cache.is_empty() {
-                    self.profile.inc(profiler::COLOR_PASSES);
-                }
+            self.texture_resolver.bind(
+                &TextureSource::PrevPassAlpha,
+                TextureSampler::PrevPassAlpha,
+                &mut self.device,
+            );
+            self.texture_resolver.bind(
+                &TextureSource::PrevPassColor,
+                TextureSampler::PrevPassColor,
+                &mut self.device,
+            );
 
-                // Draw picture caching tiles for this pass.
-                for picture_target in &pass.picture_cache {
-                    results.stats.color_target_count += 1;
+            match pass.kind {
+                RenderPassKind::MainFramebuffer { ref main_target, .. } => {
+                    profile_scope!("main target");
 
-                    let draw_target = match picture_target.surface {
-                        ResolvedSurfaceTexture::TextureCache { ref texture } => {
-                            let (texture, _) = self.texture_resolver
-                                .resolve(texture)
-                                .expect("bug");
+                    if let Some(device_size) = device_size {
+                        results.stats.color_target_count += 1;
 
-                            DrawTarget::from_texture(
-                                texture,
-                                true,
-                            )
+                        let offset = frame.content_origin.to_f32();
+                        let size = frame.device_rect.size.to_f32();
+                        let surface_origin_is_top_left = self.device.surface_origin_is_top_left();
+                        let (bottom, top) = if surface_origin_is_top_left {
+                          (offset.y, offset.y + size.height)
+                        } else {
+                          (offset.y + size.height, offset.y)
+                        };
+
+                        let projection = Transform3D::ortho(
+                            offset.x,
+                            offset.x + size.width,
+                            bottom,
+                            top,
+                            self.device.ortho_near_plane(),
+                            self.device.ortho_far_plane(),
+                        );
+
+                        let fb_scale = Scale::<_, _, FramebufferPixel>::new(1i32);
+                        let mut fb_rect = frame.device_rect * fb_scale;
+
+                        if !surface_origin_is_top_left {
+                            fb_rect.origin.y = device_size.height - fb_rect.origin.y - fb_rect.size.height;
                         }
-                        ResolvedSurfaceTexture::Native { id, size } => {
-                            let surface_info = match self.current_compositor_kind {
+
+                        let draw_target = DrawTarget::Default {
+                            rect: fb_rect,
+                            total_size: device_size * fb_scale,
+                            surface_origin_is_top_left,
+                        };
+
+                        // Picture caching can be enabled / disabled dynamically from frame to
+                        // frame. This is determined by what the frame builder selected, and is
+                        // passed to the renderer via the composite state.
+                        if frame.composite_state.picture_caching_is_enabled {
+                            // If we have a native OS compositor, then make use of that interface
+                            // to specify how to composite each of the picture cache surfaces.
+                            match self.current_compositor_kind {
                                 CompositorKind::Native { .. } => {
+                                    self.update_external_native_surfaces(
+                                        &frame.composite_state.external_surfaces,
+                                        results,
+                                    );
                                     let compositor = self.compositor_config.compositor().unwrap();
-                                    compositor.bind(
-                                        id,
-                                        picture_target.dirty_rect,
-                                        picture_target.valid_rect,
-                                    )
+                                    frame.composite_state.composite_native(&mut **compositor);
                                 }
-                                CompositorKind::Draw { .. } => {
-                                    unreachable!();
+                                CompositorKind::Draw { max_partial_present_rects, draw_previous_partial_present_regions, .. } => {
+                                    self.composite_simple(
+                                        &frame.composite_state,
+                                        clear_framebuffer,
+                                        draw_target,
+                                        &projection,
+                                        results,
+                                        max_partial_present_rects,
+                                        draw_previous_partial_present_regions,
+                                    );
                                 }
-                            };
-
-                            DrawTarget::NativeSurface {
-                                offset: surface_info.origin,
-                                external_fbo_id: surface_info.fbo_id,
-                                dimensions: size,
-                            }
-                        }
-                    };
-
-                    let projection = Transform3D::ortho(
-                        0.0,
-                        draw_target.dimensions().width as f32,
-                        0.0,
-                        draw_target.dimensions().height as f32,
-                        self.device.ortho_near_plane(),
-                        self.device.ortho_far_plane(),
-                    );
-
-                    self.draw_picture_cache_target(
-                        picture_target,
-                        draw_target,
-                        &projection,
-                        &frame.render_tasks,
-                        &mut results.stats,
-                    );
-
-                    // Native OS surfaces must be unbound at the end of drawing to them
-                    if let ResolvedSurfaceTexture::Native { .. } = picture_target.surface {
-                        match self.current_compositor_kind {
-                            CompositorKind::Native { .. } => {
-                                let compositor = self.compositor_config.compositor().unwrap();
-                                compositor.unbind();
                             }
-                            CompositorKind::Draw { .. } => {
-                                unreachable!();
+                        } else {
+                            if clear_framebuffer {
+                                let clear_color = self.clear_color.map(|color| color.to_array());
+                                self.device.bind_draw_target(draw_target);
+                                self.device.enable_depth_write();
+                                self.device.clear_target(clear_color,
+                                                         Some(1.0),
+                                                         None);
                             }
+
+                            // If picture caching is disabled, we will be drawing the entire
+                            // framebuffer. In that case, we need to push a screen size dirty
+                            // rect, in case partial present is enabled (an empty array of
+                            // dirty rects when partial present is enabled is interpreted by
+                            // Gecko as meaning nothing has changed and a swap is not required).
+                            results.dirty_rects.push(frame.device_rect);
+
+                            self.draw_color_target(
+                                draw_target,
+                                main_target,
+                                frame.content_origin,
+                                None,
+                                None,
+                                &frame.render_tasks,
+                                &projection,
+                                frame_id,
+                                &mut results.stats,
+                            );
                         }
                     }
                 }
-            }
+                RenderPassKind::OffScreen {
+                    ref mut alpha,
+                    ref mut color,
+                    ref mut texture_cache,
+                    ref mut picture_cache,
+                } => {
+                    profile_scope!("offscreen target");
+
+                    let alpha_tex = self.allocate_target_texture(alpha, &mut frame.profile_counters);
+                    let color_tex = self.allocate_target_texture(color, &mut frame.profile_counters);
+
+                    // If this frame has already been drawn, then any texture
+                    // cache targets have already been updated and can be
+                    // skipped this time.
+                    if !frame.has_been_rendered {
+                        for (&(texture_id, target_index), target) in texture_cache {
+                            self.draw_texture_cache_target(
+                                &texture_id,
+                                target_index,
+                                target,
+                                &frame.render_tasks,
+                                &mut results.stats,
+                            );
+                        }
 
-            for target in &pass.alpha.targets {
-                results.stats.alpha_target_count += 1;
+                        if !picture_cache.is_empty() {
+                            self.profile_counters.color_passes.inc();
+                        }
 
-                let texture_id = target.texture_id();
+                        // Draw picture caching tiles for this pass.
+                        for picture_target in picture_cache {
+                            results.stats.color_target_count += 1;
 
-                let alpha_tex = self.texture_resolver
-                    .texture_cache_map
-                    .get_mut(&texture_id)
-                    .expect("bug: texture not allocated");
+                            let draw_target = match picture_target.surface {
+                                ResolvedSurfaceTexture::TextureCache { ref texture, layer } => {
+                                    let (texture, _) = self.texture_resolver
+                                        .resolve(texture)
+                                        .expect("bug");
 
-                let draw_target = DrawTarget::from_texture(
-                    alpha_tex,
-                    false,
-                );
+                                    DrawTarget::from_texture(
+                                        texture,
+                                        layer as usize,
+                                        true,
+                                    )
+                                }
+                                ResolvedSurfaceTexture::Native { id, size } => {
+                                    let surface_info = match self.current_compositor_kind {
+                                        CompositorKind::Native { .. } => {
+                                            let compositor = self.compositor_config.compositor().unwrap();
+                                            compositor.bind(
+                                                id,
+                                                picture_target.dirty_rect,
+                                                picture_target.valid_rect,
+                                            )
+                                        }
+                                        CompositorKind::Draw { .. } => {
+                                            unreachable!();
+                                        }
+                                    };
+
+                                    DrawTarget::NativeSurface {
+                                        offset: surface_info.origin,
+                                        external_fbo_id: surface_info.fbo_id,
+                                        dimensions: size,
+                                    }
+                                }
+                            };
 
-                let projection = Transform3D::ortho(
-                    0.0,
-                    draw_target.dimensions().width as f32,
-                    0.0,
-                    draw_target.dimensions().height as f32,
-                    self.device.ortho_near_plane(),
-                    self.device.ortho_far_plane(),
-                );
+                            let projection = Transform3D::ortho(
+                                0.0,
+                                draw_target.dimensions().width as f32,
+                                0.0,
+                                draw_target.dimensions().height as f32,
+                                self.device.ortho_near_plane(),
+                                self.device.ortho_far_plane(),
+                            );
 
-                self.draw_alpha_target(
-                    draw_target,
-                    target,
-                    &projection,
-                    &frame.render_tasks,
-                    &mut results.stats,
-                );
-            }
+                            self.draw_picture_cache_target(
+                                picture_target,
+                                draw_target,
+                                frame.content_origin,
+                                &projection,
+                                &frame.render_tasks,
+                                &mut results.stats,
+                            );
 
-            let color_rt_info = RenderTargetInfo { has_depth: pass.color.needs_depth() };
+                            // Native OS surfaces must be unbound at the end of drawing to them
+                            if let ResolvedSurfaceTexture::Native { .. } = picture_target.surface {
+                                match self.current_compositor_kind {
+                                    CompositorKind::Native { .. } => {
+                                        let compositor = self.compositor_config.compositor().unwrap();
+                                        compositor.unbind();
+                                    }
+                                    CompositorKind::Draw { .. } => {
+                                        unreachable!();
+                                    }
+                                }
+                            }
+                        }
+                    }
 
-            for target in &pass.color.targets {
-                results.stats.color_target_count += 1;
+                    for (target_index, target) in alpha.targets.iter().enumerate() {
+                        results.stats.alpha_target_count += 1;
+                        let draw_target = DrawTarget::from_texture(
+                            &alpha_tex.as_ref().unwrap().texture,
+                            target_index,
+                            false,
+                        );
 
-                let texture_id = target.texture_id();
+                        let projection = Transform3D::ortho(
+                            0.0,
+                            draw_target.dimensions().width as f32,
+                            0.0,
+                            draw_target.dimensions().height as f32,
+                            self.device.ortho_near_plane(),
+                            self.device.ortho_far_plane(),
+                        );
 
-                let color_tex = self.texture_resolver
-                    .texture_cache_map
-                    .get_mut(&texture_id)
-                    .expect("bug: texture not allocated");
+                        self.draw_alpha_target(
+                            draw_target,
+                            target,
+                            &projection,
+                            &frame.render_tasks,
+                            &mut results.stats,
+                        );
+                    }
 
-                self.device.reuse_render_target::<u8>(
-                    color_tex,
-                    color_rt_info,
-                );
+                    for (target_index, target) in color.targets.iter().enumerate() {
+                        results.stats.color_target_count += 1;
+                        let draw_target = DrawTarget::from_texture(
+                            &color_tex.as_ref().unwrap().texture,
+                            target_index,
+                            target.needs_depth(),
+                        );
 
-                let draw_target = DrawTarget::from_texture(
-                    color_tex,
-                    target.needs_depth(),
-                );
+                        let projection = Transform3D::ortho(
+                            0.0,
+                            draw_target.dimensions().width as f32,
+                            0.0,
+                            draw_target.dimensions().height as f32,
+                            self.device.ortho_near_plane(),
+                            self.device.ortho_far_plane(),
+                        );
 
-                let projection = Transform3D::ortho(
-                    0.0,
-                    draw_target.dimensions().width as f32,
-                    0.0,
-                    draw_target.dimensions().height as f32,
-                    self.device.ortho_near_plane(),
-                    self.device.ortho_far_plane(),
-                );
+                        let clear_depth = if target.needs_depth() {
+                            Some(1.0)
+                        } else {
+                            None
+                        };
 
-                let clear_depth = if target.needs_depth() {
-                    Some(1.0)
-                } else {
-                    None
-                };
+                        self.draw_color_target(
+                            draw_target,
+                            target,
+                            frame.content_origin,
+                            Some([0.0, 0.0, 0.0, 0.0]),
+                            clear_depth,
+                            &frame.render_tasks,
+                            &projection,
+                            frame_id,
+                            &mut results.stats,
+                        );
+                    }
 
-                self.draw_color_target(
-                    draw_target,
-                    target,
-                    Some([0.0, 0.0, 0.0, 0.0]),
-                    clear_depth,
-                    &frame.render_tasks,
-                    &projection,
-                    &mut results.stats,
-                );
+                    // Only end the pass here and invalidate previous textures for
+                    // off-screen targets. Deferring return of the inputs to the
+                    // frame buffer until the implicit end_pass in end_frame allows
+                    // debug draw overlays to be added without triggering a copy
+                    // resolve stage in mobile / tiled GPUs.
+                    self.texture_resolver.end_pass(
+                        &mut self.device,
+                        alpha_tex,
+                        color_tex,
+                    );
+                }
             }
-
-            // Only end the pass here and invalidate previous textures for
-            // off-screen targets. Deferring return of the inputs to the
-            // frame buffer until the implicit end_pass in end_frame allows
-            // debug draw overlays to be added without triggering a copy
-            // resolve stage in mobile / tiled GPUs.
-            self.texture_resolver.end_pass(
-                &mut self.device,
-                &pass.textures_to_invalidate,
-            );
             {
                 profile_scope!("gl.flush");
                 self.device.gl().flush();
             }
         }
 
-        self.composite_frame(
-            frame,
-            device_size,
-            results,
-            present_mode,
-        );
+        if let Some(device_size) = device_size {
+            self.draw_frame_debug_items(&frame.debug_items);
+            self.draw_render_target_debug(device_size);
+            self.draw_texture_cache_debug(device_size);
+            self.draw_gpu_cache_debug(device_size);
+            self.draw_zoom_debug(device_size);
+        }
+        self.draw_epoch_debug();
+
+        // Garbage collect any frame outputs that weren't used this frame.
+        let device = &mut self.device;
+        self.output_targets
+            .retain(|_, target| if target.last_access != frame_id {
+                device.delete_fbo(target.fbo_id);
+                false
+            } else {
+                true
+            });
 
         frame.has_been_rendered = true;
     }
 
-    fn composite_frame(
+    /// Initialize the PLS block, by reading the current framebuffer color.
+    pub fn init_pixel_local_storage(
         &mut self,
-        frame: &mut Frame,
-        device_size: Option<DeviceIntSize>,
-        results: &mut RenderResults,
-        present_mode: Option<PartialPresentMode>,
+        task_rect: DeviceIntRect,
+        projection: &default::Transform3D<f32>,
+        stats: &mut RendererStats,
     ) {
-        profile_scope!("main target");
+        self.device.enable_pixel_local_storage(true);
 
-        if let Some(device_size) = device_size {
-            results.stats.color_target_count += 1;
-            results.picture_cache_debug = mem::replace(
-                &mut frame.composite_state.picture_cache_debug,
-                PictureCacheDebugInfo::new(),
+        self.shaders
+            .borrow_mut()
+            .pls_init
+            .as_mut()
+            .unwrap()
+            .bind(
+                &mut self.device,
+                projection,
+                &mut self.renderer_errors,
             );
 
-            let size = frame.device_rect.size.to_f32();
-            let surface_origin_is_top_left = self.device.surface_origin_is_top_left();
-            let (bottom, top) = if surface_origin_is_top_left {
-              (0.0, size.height)
-            } else {
-              (size.height, 0.0)
-            };
+        let instances = [
+            ResolveInstanceData::new(task_rect),
+        ];
 
-            let projection = Transform3D::ortho(
-                0.0,
-                size.width,
-                bottom,
-                top,
-                self.device.ortho_near_plane(),
-                self.device.ortho_far_plane(),
-            );
+        self.draw_instanced_batch(
+            &instances,
+            VertexArrayKind::Resolve,
+            &BatchTextures::no_texture(),
+            stats,
+        );
+    }
 
-            let fb_scale = Scale::<_, _, FramebufferPixel>::new(1i32);
-            let mut fb_rect = frame.device_rect * fb_scale;
+    /// Resolve the current PLS structure, writing it to a fragment color output.
+    pub fn resolve_pixel_local_storage(
+        &mut self,
+        task_rect: DeviceIntRect,
+        projection: &default::Transform3D<f32>,
+        stats: &mut RendererStats,
+    ) {
+        self.shaders
+            .borrow_mut()
+            .pls_resolve
+            .as_mut()
+            .unwrap()
+            .bind(
+                &mut self.device,
+                projection,
+                &mut self.renderer_errors,
+            );
 
-            if !surface_origin_is_top_left {
-                fb_rect.origin.y = device_size.height - fb_rect.origin.y - fb_rect.size.height;
-            }
+        let instances = [
+            ResolveInstanceData::new(task_rect),
+        ];
 
-            let draw_target = DrawTarget::Default {
-                rect: fb_rect,
-                total_size: device_size * fb_scale,
-                surface_origin_is_top_left,
-            };
+        self.draw_instanced_batch(
+            &instances,
+            VertexArrayKind::Resolve,
+            &BatchTextures::no_texture(),
+            stats,
+        );
 
-            // If we have a native OS compositor, then make use of that interface
-            // to specify how to composite each of the picture cache surfaces.
-            match self.current_compositor_kind {
-                CompositorKind::Native { .. } => {
-                    // We have already queued surfaces for early native composition by this point.
-                    // All that is left is to finally update any external native surfaces that were
-                    // invalidated so that composition can complete.
-                    self.update_external_native_surfaces(
-                        &frame.composite_state.external_surfaces,
-                        results,
-                    );
-                }
-                CompositorKind::Draw { .. } => {
-                    self.composite_simple(
-                        &frame.composite_state,
-                        draw_target,
-                        &projection,
-                        results,
-                        present_mode,
-                    );
-                }
-            }
-        } else {
-            // Rendering a frame without presenting it will confuse the partial
-            // present logic, so force a full present for the next frame.
-            self.force_redraw();
-        }
+        self.device.enable_pixel_local_storage(false);
     }
 
     pub fn debug_renderer(&mut self) -> Option<&mut DebugRenderer> {
@@ -4708,29 +6320,22 @@ impl Renderer {
     pub fn set_debug_flags(&mut self, flags: DebugFlags) {
         if let Some(enabled) = flag_changed(self.debug_flags, flags, DebugFlags::GPU_TIME_QUERIES) {
             if enabled {
-                self.gpu_profiler.enable_timers();
+                self.gpu_profile.enable_timers();
             } else {
-                self.gpu_profiler.disable_timers();
+                self.gpu_profile.disable_timers();
             }
         }
         if let Some(enabled) = flag_changed(self.debug_flags, flags, DebugFlags::GPU_SAMPLE_QUERIES) {
             if enabled {
-                self.gpu_profiler.enable_samplers();
+                self.gpu_profile.enable_samplers();
             } else {
-                self.gpu_profiler.disable_samplers();
+                self.gpu_profile.disable_samplers();
             }
         }
 
-        self.device.set_use_batched_texture_uploads(flags.contains(DebugFlags::USE_BATCHED_TEXTURE_UPLOADS));
-        self.device.set_use_draw_calls_for_texture_copy(flags.contains(DebugFlags::USE_DRAW_CALLS_FOR_TEXTURE_COPY));
-
         self.debug_flags = flags;
     }
 
-    pub fn set_profiler_ui(&mut self, ui_str: &str) {
-        self.profiler.set_ui(ui_str);
-    }
-
     fn draw_frame_debug_items(&mut self, items: &[DebugItem]) {
         if items.is_empty() {
             return;
@@ -4771,7 +6376,7 @@ impl Renderer {
         }
     }
 
-    fn draw_render_target_debug(&mut self, draw_target: &DrawTarget) {
+    fn draw_render_target_debug(&mut self, device_size: DeviceIntSize) {
         if !self.debug_flags.contains(DebugFlags::RENDER_TARGET_DBG) {
             return;
         }
@@ -4781,17 +6386,14 @@ impl Renderer {
             None => return,
         };
 
-        let textures = self.texture_resolver
-            .texture_cache_map
-            .values()
-            .filter(|texture| { texture.is_render_target() })
-            .collect::<Vec<&Texture>>();
+        let textures =
+            self.texture_resolver.render_target_pool.iter().collect::<Vec<&Texture>>();
 
         Self::do_debug_blit(
             &mut self.device,
             debug_renderer,
             textures,
-            draw_target,
+            device_size,
             0,
             &|_| [0.0, 1.0, 0.0, 1.0], // Use green for all RTs.
         );
@@ -4847,12 +6449,13 @@ impl Renderer {
 
         if self.zoom_debug_texture.is_none() {
             let texture = self.device.create_texture(
-                ImageBufferKind::Texture2D,
+                TextureTarget::Default,
                 ImageFormat::BGRA8,
                 source_rect.size.width,
                 source_rect.size.height,
                 TextureFilter::Nearest,
                 Some(RenderTargetInfo { has_depth: false }),
+                1,
             );
 
             self.zoom_debug_texture = Some(texture);
@@ -4865,6 +6468,7 @@ impl Renderer {
             read_target.to_framebuffer_rect(source_rect),
             DrawTarget::from_texture(
                 self.zoom_debug_texture.as_ref().unwrap(),
+                0,
                 false,
             ),
             texture_rect,
@@ -4875,6 +6479,7 @@ impl Renderer {
         self.device.blit_render_target(
             ReadTarget::from_texture(
                 self.zoom_debug_texture.as_ref().unwrap(),
+                0,
             ),
             texture_rect,
             read_target,
@@ -4883,7 +6488,7 @@ impl Renderer {
         );
     }
 
-    fn draw_texture_cache_debug(&mut self, draw_target: &DrawTarget) {
+    fn draw_texture_cache_debug(&mut self, device_size: DeviceIntSize) {
         if !self.debug_flags.contains(DebugFlags::TEXTURE_CACHE_DBG) {
             return;
         }
@@ -4908,7 +6513,7 @@ impl Renderer {
             &mut self.device,
             debug_renderer,
             textures,
-            draw_target,
+            device_size,
             if self.debug_flags.contains(DebugFlags::RENDER_TARGET_DBG) { 544 } else { 0 },
             &select_color,
         );
@@ -4918,99 +6523,99 @@ impl Renderer {
         device: &mut Device,
         debug_renderer: &mut DebugRenderer,
         mut textures: Vec<&Texture>,
-        draw_target: &DrawTarget,
+        device_size: DeviceIntSize,
         bottom: i32,
         select_color: &dyn Fn(&Texture) -> [f32; 4],
     ) {
         let mut spacing = 16;
         let mut size = 512;
 
-        let device_size = draw_target.dimensions();
         let fb_width = device_size.width;
         let fb_height = device_size.height;
-        let surface_origin_is_top_left = draw_target.surface_origin_is_top_left();
-
-        let num_textures = textures.iter().filter(|t| t.flags().contains(TextureFlags::IS_SHARED_TEXTURE_CACHE)).count() as i32;
+        let num_layers: i32 = textures.iter()
+            .map(|texture| texture.get_layer_count())
+            .sum();
 
-        if num_textures * (size + spacing) > fb_width {
-            let factor = fb_width as f32 / (num_textures * (size + spacing)) as f32;
+        if num_layers * (size + spacing) > fb_width {
+            let factor = fb_width as f32 / (num_layers * (size + spacing)) as f32;
             size = (size as f32 * factor) as i32;
             spacing = (spacing as f32 * factor) as i32;
         }
 
-        let text_height = 14; // Visually approximated.
-        let text_margin = 1;
-        let tag_height = text_height + text_margin * 2;
-        let tag_y = fb_height - (bottom + spacing + tag_height);
-        let image_y = tag_y - size;
-
-        // Sort the display by size (in bytes), so that left-to-right is
+        // Sort the display by layer size (in bytes), so that left-to-right is
         // largest-to-smallest.
         //
         // Note that the vec here is in increasing order, because the elements
         // get drawn right-to-left.
-        textures.sort_by_key(|t| t.size_in_bytes());
+        textures.sort_by_key(|t| t.layer_size_in_bytes());
 
         let mut i = 0;
         for texture in textures.iter() {
-            if !texture.flags().contains(TextureFlags::IS_SHARED_TEXTURE_CACHE) {
-                continue;
-            }
+            let y = spacing + bottom;
             let dimensions = texture.get_dimensions();
             let src_rect = FramebufferIntRect::new(
                 FramebufferIntPoint::zero(),
                 FramebufferIntSize::new(dimensions.width as i32, dimensions.height as i32),
             );
 
-            let x = fb_width - (spacing + size) * (i as i32 + 1);
+            let layer_count = texture.get_layer_count() as usize;
+            for layer in 0 .. layer_count {
+                let x = fb_width - (spacing + size) * (i as i32 + 1);
 
-            // If we have more targets than fit on one row in screen, just early exit.
-            if x > fb_width {
-                return;
-            }
-
-            // Draw the info tag.
-            let tag_rect = rect(x, tag_y, size, tag_height);
-            let tag_color = select_color(texture);
-            device.clear_target(
-                Some(tag_color),
-                None,
-                Some(draw_target.to_framebuffer_rect(tag_rect)),
-            );
+                // If we have more targets than fit on one row in screen, just early exit.
+                if x > fb_width {
+                    return;
+                }
 
-            // Draw the dimensions onto the tag.
-            let dim = texture.get_dimensions();
-            let text_rect = tag_rect.inflate(-text_margin, -text_margin);
-            debug_renderer.add_text(
-                text_rect.min_x() as f32,
-                text_rect.max_y() as f32, // Top-relative.
-                &format!("{}x{}", dim.width, dim.height),
-                ColorU::new(0, 0, 0, 255),
-                Some(tag_rect.to_f32())
-            );
+                //TODO: properly use FramebufferPixel coordinates
 
-            // Blit the contents of the texture.
-            let dest_rect = draw_target.to_framebuffer_rect(rect(x, image_y, size, size));
-            let read_target = ReadTarget::from_texture(texture);
-
-            if surface_origin_is_top_left {
-                device.blit_render_target(
-                    read_target,
-                    src_rect,
-                    *draw_target,
-                    dest_rect,
-                    TextureFilter::Linear,
+                // Draw the info tag.
+                let text_margin = 1;
+                let text_height = 14; // Visually aproximated.
+                let tag_height = text_height + text_margin * 2;
+                let tag_rect = rect(x, y, size, tag_height);
+                let tag_color = select_color(texture);
+                device.clear_target(
+                    Some(tag_color),
+                    None,
+                    Some(tag_rect.cast_unit()),
                 );
-            } else {
-                 // Invert y.
-                 device.blit_render_target_invert_y(
-                    read_target,
-                    src_rect,
-                    *draw_target,
-                    dest_rect,
+
+                // Draw the dimensions onto the tag.
+                let dim = texture.get_dimensions();
+                let mut text_rect = tag_rect;
+                text_rect.origin.y =
+                    fb_height - text_rect.origin.y - text_rect.size.height; // Top-relative.
+                debug_renderer.add_text(
+                    (x + text_margin) as f32,
+                    (fb_height - y - text_margin) as f32, // Top-relative.
+                    &format!("{}x{}", dim.width, dim.height),
+                    ColorU::new(0, 0, 0, 255),
+                    Some(text_rect.to_f32())
                 );
+
+                // Blit the contents of the layer. We need to invert Y because
+                // we're blitting from a texture to the main framebuffer, which
+                // use different conventions.
+                let dest_rect = rect(x, y + tag_height, size, size);
+                if !device.surface_origin_is_top_left() {
+                    device.blit_render_target_invert_y(
+                        ReadTarget::from_texture(texture, layer),
+                        src_rect,
+                        DrawTarget::new_default(device_size, device.surface_origin_is_top_left()),
+                        FramebufferIntRect::from_untyped(&dest_rect),
+                    );
+                } else {
+                    device.blit_render_target(
+                        ReadTarget::from_texture(texture, layer),
+                        src_rect,
+                        DrawTarget::new_default(device_size, device.surface_origin_is_top_left()),
+                        FramebufferIntRect::from_untyped(&dest_rect),
+                        TextureFilter::Linear,
+                    );
+                }
+                i += 1;
             }
-            i += 1;
         }
     }
 
@@ -5062,7 +6667,8 @@ impl Renderer {
         };
 
         let (x_off, y_off) = (30f32, 30f32);
-        let height = self.gpu_cache_texture.get_height()
+        let height = self.gpu_cache_texture.texture
+            .as_ref().map_or(0, |t| t.get_dimensions().height)
             .min(device_size.height - (y_off as i32) * 2) as usize;
         debug_renderer.add_quad(
             x_off,
@@ -5098,6 +6704,22 @@ impl Renderer {
         pixels
     }
 
+    pub fn read_gpu_cache(&mut self) -> (DeviceIntSize, Vec<u8>) {
+        let texture = self.gpu_cache_texture.texture.as_ref().unwrap();
+        let size = device_size_as_framebuffer_size(texture.get_dimensions());
+        let mut texels = vec![0; (size.width * size.height * 16) as usize];
+        self.device.begin_frame();
+        self.device.bind_read_target(ReadTarget::from_texture(texture, 0));
+        self.device.read_pixels_into(
+            size.into(),
+            ImageFormat::RGBAF32,
+            &mut texels,
+        );
+        self.device.reset_read_target();
+        self.device.end_frame();
+        (texture.get_dimensions(), texels)
+    }
+
     // De-initialize the Renderer safely, assuming the GL is still alive and active.
     pub fn deinit(mut self) {
         //Note: this is a fake frame, only needed because texture deletion is require to happen inside a frame
@@ -5124,12 +6746,25 @@ impl Renderer {
         for textures in self.vertex_data_textures.drain(..) {
             textures.deinit(&mut self.device);
         }
-        self.texture_upload_pbo_pool.deinit(&mut self.device);
-        self.staging_texture_pool.delete_textures(&mut self.device);
+        self.device.delete_pbo(self.texture_cache_upload_pbo);
         self.texture_resolver.deinit(&mut self.device);
-        self.vaos.deinit(&mut self.device);
+        self.device.delete_vao(self.vaos.prim_vao);
+        self.device.delete_vao(self.vaos.resolve_vao);
+        self.device.delete_vao(self.vaos.clip_vao);
+        self.device.delete_vao(self.vaos.gradient_vao);
+        self.device.delete_vao(self.vaos.blur_vao);
+        self.device.delete_vao(self.vaos.line_vao);
+        self.device.delete_vao(self.vaos.border_vao);
+        self.device.delete_vao(self.vaos.scale_vao);
+        self.device.delete_vao(self.vaos.svg_filter_vao);
+        self.device.delete_vao(self.vaos.composite_vao);
+        self.device.delete_vao(self.vaos.clear_vao);
+
         self.debug.deinit(&mut self.device);
 
+        for (_, target) in self.output_targets {
+            self.device.delete_fbo(target.fbo_id);
+        }
         if let Ok(shaders) = Rc::try_unwrap(self.shaders) {
             shaders.into_inner().deinit(&mut self.device);
         }
@@ -5152,8 +6787,8 @@ impl Renderer {
     }
 
     fn size_of<T>(&self, ptr: *const T) -> usize {
-        let ops = self.size_of_ops.as_ref().unwrap();
-        unsafe { ops.malloc_size_of(ptr) }
+        let op = self.size_of_ops.as_ref().unwrap().size_of_op;
+        unsafe { op(ptr as *const c_void) }
     }
 
     /// Collects a memory report.
@@ -5161,9 +6796,15 @@ impl Renderer {
         let mut report = MemoryReport::default();
 
         // GPU cache CPU memory.
-        self.gpu_cache_texture.report_memory_to(&mut report, self.size_of_ops.as_ref().unwrap());
+        if let GpuCacheBus::PixelBuffer{ref rows, ..} = self.gpu_cache_texture.bus {
+            for row in rows.iter() {
+                report.gpu_cache_cpu_mirror += self.size_of(&*row.cpu_blocks as *const _);
+            }
+        }
 
-        self.staging_texture_pool.report_memory_to(&mut report, self.size_of_ops.as_ref().unwrap());
+        // GPU cache GPU memory.
+        report.gpu_cache_textures +=
+            self.gpu_cache_texture.texture.as_ref().map_or(0, |t| t.size_in_bytes());
 
         // Render task CPU memory.
         for (_id, doc) in &self.active_documents {
@@ -5179,11 +6820,8 @@ impl Renderer {
         // Texture cache and render target GPU memory.
         report += self.texture_resolver.report_memory();
 
-        // Texture upload PBO memory.
-        report += self.texture_upload_pbo_pool.report_memory();
-
         // Textures held internally within the device layer.
-        report += self.device.report_memory(self.size_of_ops.as_ref().unwrap());
+        report += self.device.report_memory();
 
         report
     }
@@ -5234,16 +6872,24 @@ impl Renderer {
         }
     }
 
-    /// Clears the texture with a given color.
+    /// Clears all the layers of a texture with a given color.
     fn clear_texture(&mut self, texture: &Texture, color: [f32; 4]) {
-        self.device.bind_draw_target(DrawTarget::from_texture(
-            &texture,
-            false,
-        ));
-        self.device.clear_target(Some(color), None, None);
+        for i in 0..texture.get_layer_count() {
+            self.device.bind_draw_target(DrawTarget::from_texture(
+                &texture,
+                i as usize,
+                false,
+            ));
+            self.device.clear_target(Some(color), None, None);
+        }
     }
 }
 
+pub trait ThreadListener {
+    fn thread_started(&self, thread_name: &str);
+    fn thread_stopped(&self, thread_name: &str);
+}
+
 /// Allows callers to hook in at certain points of the async scene build. These
 /// functions are all called from the scene builder thread.
 pub trait SceneBuilderHooks {
@@ -5287,7 +6933,8 @@ pub trait AsyncPropertySampler {
     /// This is called for each transaction with the generate_frame flag set
     /// (i.e. that will trigger a render). The list of frame messages returned
     /// are processed as though they were part of the original transaction.
-    fn sample(&self, document_id: DocumentId, generated_frame_id: Option<u64>) -> Vec<FrameMsg>;
+    fn sample(&self, document_id: DocumentId,
+              doc: &FastHashMap<PipelineId, Epoch>) -> Vec<FrameMsg>;
     /// This is called exactly once, when the render backend thread is about to
     /// terminate.
     fn deregister(&self);
@@ -5323,15 +6970,13 @@ pub struct RendererOptions {
     pub force_subpixel_aa: bool,
     pub clear_color: Option<ColorF>,
     pub enable_clear_scissor: bool,
-    pub max_internal_texture_size: Option<i32>,
-    pub image_tiling_threshold: i32,
+    pub max_texture_size: Option<i32>,
+    pub max_glyph_cache_size: Option<usize>,
     pub upload_method: UploadMethod,
-    /// The default size in bytes for PBOs used to upload texture data.
-    pub upload_pbo_default_size: usize,
     pub workers: Option<Arc<ThreadPool>>,
     pub enable_multithreading: bool,
     pub blob_image_handler: Option<Box<dyn BlobImageHandler>>,
-    pub crash_annotator: Option<Box<dyn CrashAnnotator>>,
+    pub thread_listener: Option<Box<dyn ThreadListener + Send + Sync>>,
     pub size_of_op: Option<VoidPtrToSizeFn>,
     pub enclosing_size_of_op: Option<VoidPtrToSizeFn>,
     pub cached_programs: Option<Rc<ProgramCache>>,
@@ -5342,6 +6987,7 @@ pub struct RendererOptions {
     pub chase_primitive: ChasePrimitive,
     pub support_low_priority_transactions: bool,
     pub namespace_alloc_by_client: bool,
+    pub enable_picture_caching: bool,
     pub testing: bool,
     /// Set to true if this GPU supports hardware fast clears as a performance
     /// optimization. Likely requires benchmarking on various GPUs to see if
@@ -5350,6 +6996,11 @@ pub struct RendererOptions {
     pub gpu_supports_fast_clears: bool,
     pub allow_dual_source_blending: bool,
     pub allow_advanced_blend_equation: bool,
+    /// If true, allow WR to use pixel local storage if the device supports it.
+    /// For now, this defaults to false since the code is still experimental
+    /// and not complete. This option will probably be removed once support is
+    /// complete, and WR can implicitly choose whether to make use of PLS.
+    pub allow_pixel_local_storage_support: bool,
     /// If true, allow textures to be initialized with glTexStorage.
     /// This affects VRAM consumption and data upload paths.
     pub allow_texture_storage_support: bool,
@@ -5357,11 +7008,16 @@ pub struct RendererOptions {
     /// one expected by the driver, pretending the format is matching, and
     /// swizzling the components on all the shader sampling.
     pub allow_texture_swizzling: bool,
+    /// Number of batches to look back in history for adding the current
+    /// transparent instance into.
+    pub batch_lookback_count: usize,
     /// Use `ps_clear` shader with batched quad rendering to clear the rects
     /// in texture cache and picture cache tasks.
     /// This helps to work around some Intel drivers
     /// that incorrectly synchronize clears to following draws.
     pub clear_caches_with_quads: bool,
+    /// Start the debug server for this renderer.
+    pub start_debug_server: bool,
     /// Output the source of the shader with the given name.
     pub dump_shader_source: Option<String>,
     pub surface_origin_is_top_left: bool,
@@ -5371,26 +7027,14 @@ pub struct RendererOptions {
     /// If true, panic whenever a GL error occurs. This has a significant
     /// performance impact, so only use when debugging specific problems!
     pub panic_on_gl_error: bool,
-    pub picture_tile_size: Option<DeviceIntSize>,
-    pub texture_cache_config: TextureCacheConfig,
-    /// If true, we'll use instanced vertex attributes. Each instace is a quad.
-    /// If false, we'll duplicate the instance attributes per vertex and issue
-    /// regular indexed draws instead.
-    pub enable_instancing: bool,
-}
-
-impl RendererOptions {
-    /// Number of batches to look back in history for adding the current
-    /// transparent instance into.
-    const BATCH_LOOKBACK_COUNT: usize = 10;
-
-    /// Since we are re-initializing the instance buffers on every draw call,
-    /// the driver has to internally manage PBOs in flight.
-    /// It's typically done by bucketing up to a specific limit, and then
-    /// just individually managing the largest buffers.
-    /// Having a limit here allows the drivers to more easily manage
-    /// the PBOs for us.
-    const MAX_INSTANCE_BUFFER_SIZE: usize = 0x20000; // actual threshold in macOS GL drivers
+    /// If the total bytes allocated in shared / standalone cache is less
+    /// than this, then allow the cache to grow without forcing an eviction.
+    pub texture_cache_eviction_threshold_bytes: usize,
+    /// The maximum number of items that will be evicted per frame. This limit helps avoid jank
+    /// on frames where we want to evict a large number of items. Instead, we'd prefer to drop
+    /// the items incrementally over a number of frames, even if that means the total allocated
+    /// size of the cache is above the desired threshold for a small number of frames.
+    pub texture_cache_max_evictions_per_frame: usize,
 }
 
 impl Default for RendererOptions {
@@ -5408,16 +7052,15 @@ impl Default for RendererOptions {
             force_subpixel_aa: false,
             clear_color: Some(ColorF::new(1.0, 1.0, 1.0, 1.0)),
             enable_clear_scissor: true,
-            max_internal_texture_size: None,
-            image_tiling_threshold: 4096,
+            max_texture_size: None,
+            max_glyph_cache_size: None,
             // This is best as `Immediate` on Angle, or `Pixelbuffer(Dynamic)` on GL,
             // but we are unable to make this decision here, so picking the reasonable medium.
-            upload_method: UploadMethod::PixelBuffer(ONE_TIME_USAGE_HINT),
-            upload_pbo_default_size: 512 * 512 * 4,
+            upload_method: UploadMethod::PixelBuffer(VertexUsageHint::Stream),
             workers: None,
             enable_multithreading: true,
             blob_image_handler: None,
-            crash_annotator: None,
+            thread_listener: None,
             size_of_op: None,
             enclosing_size_of_op: None,
             renderer_id: None,
@@ -5427,53 +7070,62 @@ impl Default for RendererOptions {
             chase_primitive: ChasePrimitive::Nothing,
             support_low_priority_transactions: false,
             namespace_alloc_by_client: false,
+            enable_picture_caching: false,
             testing: false,
             gpu_supports_fast_clears: false,
             allow_dual_source_blending: true,
             allow_advanced_blend_equation: false,
+            allow_pixel_local_storage_support: false,
             allow_texture_storage_support: true,
             allow_texture_swizzling: true,
+            batch_lookback_count: DEFAULT_BATCH_LOOKBACK_COUNT,
             clear_caches_with_quads: true,
+            // For backwards compatibility we set this to true by default, so
+            // that if the debugger feature is enabled, the debug server will
+            // be started automatically. Users can explicitly disable this as
+            // needed.
+            start_debug_server: true,
             dump_shader_source: None,
             surface_origin_is_top_left: false,
             compositor_config: CompositorConfig::default(),
             enable_gpu_markers: true,
             panic_on_gl_error: false,
-            picture_tile_size: None,
-            texture_cache_config: TextureCacheConfig::DEFAULT,
-            // Disabling instancing means more vertex data to upload and potentially
-            // process by the vertex shaders.
-            enable_instancing: true,
+            texture_cache_eviction_threshold_bytes: 64 * 1024 * 1024,
+            texture_cache_max_evictions_per_frame: 32,
         }
     }
 }
 
-/// The cumulative times spent in each painting phase to generate this frame.
-#[derive(Debug, Default)]
-pub struct FullFrameStats {
-    pub full_display_list: bool,
-    pub gecko_display_list_time: f64,
-    pub wr_display_list_time: f64,
-    pub scene_build_time: f64,
-    pub frame_build_time: f64,
+pub trait DebugServer {
+    fn send(&mut self, _message: String);
 }
 
-impl FullFrameStats {
-    pub fn merge(&self, other: &FullFrameStats) -> Self {
-        Self {
-            full_display_list: self.full_display_list || other.full_display_list,
-            gecko_display_list_time: self.gecko_display_list_time + other.gecko_display_list_time,
-            wr_display_list_time: self.wr_display_list_time + other.wr_display_list_time,
-            scene_build_time: self.scene_build_time + other.scene_build_time,
-            frame_build_time: self.frame_build_time + other.frame_build_time
-        }
+struct NoopDebugServer;
+
+impl NoopDebugServer {
+    fn new(_: Sender<ApiMsg>) -> Self {
+        NoopDebugServer
     }
+}
+
+impl DebugServer for NoopDebugServer {
+    fn send(&mut self, _: String) {}
+}
 
-    pub fn total(&self) -> f64 {
-      self.gecko_display_list_time + self.wr_display_list_time + self.scene_build_time + self.frame_build_time
+#[cfg(feature = "debugger")]
+fn new_debug_server(enable: bool, api_tx: Sender<ApiMsg>) -> Box<dyn DebugServer> {
+    if enable {
+        Box::new(debug_server::DebugServerImpl::new(api_tx))
+    } else {
+        Box::new(NoopDebugServer::new(api_tx))
     }
 }
 
+#[cfg(not(feature = "debugger"))]
+fn new_debug_server(_enable: bool, api_tx: Sender<ApiMsg>) -> Box<dyn DebugServer> {
+    Box::new(NoopDebugServer::new(api_tx))
+}
+
 /// Some basic statistics about the rendered scene, used in Gecko, as
 /// well as in wrench reftests to ensure that tests are batching and/or
 /// allocating on render targets as we expect them to.
@@ -5483,26 +7135,9 @@ pub struct RendererStats {
     pub total_draw_calls: usize,
     pub alpha_target_count: usize,
     pub color_target_count: usize,
-    pub texture_upload_mb: f64,
-    pub resource_upload_time: f64,
-    pub gpu_cache_upload_time: f64,
-    pub gecko_display_list_time: f64,
-    pub wr_display_list_time: f64,
-    pub scene_build_time: f64,
-    pub frame_build_time: f64,
-    pub full_display_list: bool,
-    pub full_paint: bool,
-}
-
-impl RendererStats {
-    pub fn merge(&mut self, stats: &FullFrameStats) {
-        self.gecko_display_list_time = stats.gecko_display_list_time;
-        self.wr_display_list_time = stats.wr_display_list_time;
-        self.scene_build_time = stats.scene_build_time;
-        self.frame_build_time = stats.frame_build_time;
-        self.full_display_list = stats.full_display_list;
-        self.full_paint = true;
-    }
+    pub texture_upload_kb: usize,
+    pub resource_upload_time: u64,
+    pub gpu_cache_upload_time: u64,
 }
 
 /// Return type from render(), which contains some repr(C) statistics as well as
@@ -5512,6 +7147,11 @@ pub struct RenderResults {
     /// Statistics about the frame that was rendered.
     pub stats: RendererStats,
 
+    /// A list of dirty world rects. This is only currently
+    /// useful to test infrastructure.
+    /// TODO(gw): This needs to be refactored / removed.
+    pub recorded_dirty_regions: Vec<RecordedDirtyRegion>,
+
     /// A list of the device dirty rects that were updated
     /// this frame.
     /// TODO(gw): This is an initial interface, likely to change in future.
@@ -5521,10 +7161,6 @@ pub struct RenderResults {
     ///           OS compositor support where the dirty rects apply to a
     ///           specific picture cache slice / OS compositor surface).
     pub dirty_rects: Vec<DeviceIntRect>,
-
-    /// Information about the state of picture cache tiles. This is only
-    /// allocated and stored if config.testing is true (such as wrench)
-    pub picture_cache_debug: PictureCacheDebugInfo,
 }
 
 #[cfg(any(feature = "capture", feature = "replay"))]
@@ -5532,7 +7168,7 @@ pub struct RenderResults {
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 struct PlainTexture {
     data: String,
-    size: DeviceIntSize,
+    size: (DeviceIntSize, i32),
     format: ImageFormat,
     filter: TextureFilter,
     has_depth: bool,
@@ -5582,6 +7218,19 @@ impl ExternalImageHandler for DummyExternalImageHandler {
     fn unlock(&mut self, _key: ExternalImageId, _channel_index: u8) {}
 }
 
+#[cfg(feature = "replay")]
+struct VoidHandler;
+
+#[cfg(feature = "replay")]
+impl OutputImageHandler for VoidHandler {
+    fn lock(&mut self, _: PipelineId) -> Option<(u32, FramebufferIntSize)> {
+        None
+    }
+    fn unlock(&mut self, _: PipelineId) {
+        unreachable!()
+    }
+}
+
 #[derive(Default)]
 pub struct PipelineInfo {
     pub epochs: FastHashMap<(PipelineId, DocumentId), Epoch>,
@@ -5604,40 +7253,42 @@ impl Renderer {
 
         let mut file = fs::File::create(root.join(&short_path))
             .expect(&format!("Unable to create {}", short_path));
-        let bytes_per_texture = (rect_size.width * rect_size.height * bytes_per_pixel) as usize;
-        let mut data = vec![0; bytes_per_texture];
+        let bytes_per_layer = (rect_size.width * rect_size.height * bytes_per_pixel) as usize;
+        let mut data = vec![0; bytes_per_layer];
 
         //TODO: instead of reading from an FBO with `read_pixels*`, we could
         // read from textures directly with `get_tex_image*`.
 
-        let rect = device_size_as_framebuffer_size(rect_size).into();
+        for layer_id in 0 .. texture.get_layer_count() {
+            let rect = device_size_as_framebuffer_size(rect_size).into();
 
-        device.attach_read_texture(texture);
-        #[cfg(feature = "png")]
-        {
-            let mut png_data;
-            let (data_ref, format) = match texture.get_format() {
-                ImageFormat::RGBAF32 => {
-                    png_data = vec![0; (rect_size.width * rect_size.height * 4) as usize];
-                    device.read_pixels_into(rect, ImageFormat::RGBA8, &mut png_data);
-                    (&png_data, ImageFormat::RGBA8)
-                }
-                fm => (&data, fm),
-            };
-            CaptureConfig::save_png(
-                root.join(format!("textures/{}-{}.png", name, 0)),
-                rect_size, format,
-                None,
-                data_ref,
-            );
+            device.attach_read_texture(texture, layer_id);
+            #[cfg(feature = "png")]
+            {
+                let mut png_data;
+                let (data_ref, format) = match texture.get_format() {
+                    ImageFormat::RGBAF32 => {
+                        png_data = vec![0; (rect_size.width * rect_size.height * 4) as usize];
+                        device.read_pixels_into(rect, ImageFormat::RGBA8, &mut png_data);
+                        (&png_data, ImageFormat::RGBA8)
+                    }
+                    fm => (&data, fm),
+                };
+                CaptureConfig::save_png(
+                    root.join(format!("textures/{}-{}.png", name, layer_id)),
+                    rect_size, format,
+                    None,
+                    data_ref,
+                );
+            }
+            device.read_pixels_into(rect, read_format, &mut data);
+            file.write_all(&data)
+                .unwrap();
         }
-        device.read_pixels_into(rect, read_format, &mut data);
-        file.write_all(&data)
-            .unwrap();
 
         PlainTexture {
             data: short_path,
-            size: rect_size,
+            size: (rect_size, texture.get_layer_count()),
             format: texture.get_format(),
             filter: texture.get_filter(),
             has_depth: texture.supports_depth(),
@@ -5646,7 +7297,7 @@ impl Renderer {
 
     #[cfg(feature = "replay")]
     fn load_texture(
-        target: ImageBufferKind,
+        target: TextureTarget,
         plain: &PlainTexture,
         rt_info: Option<RenderTargetInfo>,
         root: &PathBuf,
@@ -5665,10 +7316,11 @@ impl Renderer {
         let texture = device.create_texture(
             target,
             plain.format,
-            plain.size.width,
-            plain.size.height,
+            plain.size.0.width,
+            plain.size.0.height,
             plain.filter,
             rt_info,
+            plain.size.1,
         );
         device.upload_texture_immediate(&texture, &texels);
 
@@ -5683,14 +7335,13 @@ impl Renderer {
     ) {
         use std::fs;
         use std::io::Write;
-        use api::ExternalImageData;
-        use crate::render_api::CaptureBits;
+        use api::{CaptureBits, ExternalImageData};
 
         let root = config.resource_root();
 
         self.device.begin_frame();
-        let _gm = self.gpu_profiler.start_marker("read GPU data");
-        self.device.bind_read_target_impl(self.read_fbo, DeviceIntPoint::zero());
+        let _gm = self.gpu_profile.start_marker("read GPU data");
+        self.device.bind_read_target_impl(self.read_fbo);
 
         if config.bits.contains(CaptureBits::EXTERNAL_RESOURCES) && !deferred_images.is_empty() {
             info!("saving external images");
@@ -5729,7 +7380,8 @@ impl Renderer {
                                     ExternalImageType::Buffer => unreachable!(),
                                 };
                                 info!("\t\tnative texture of target {:?}", target);
-                                self.device.attach_read_texture_external(gl_id, target);
+                                let layer_index = 0; //TODO: what about layered textures?
+                                self.device.attach_read_texture_external(gl_id, target, layer_index);
                                 let data = self.device.read_pixels(&def.descriptor);
                                 let short_path = format!("externals/t{}.raw", tex_id);
                                 (Some(data), e.insert(short_path).clone())
@@ -5782,7 +7434,7 @@ impl Renderer {
             let mut plain_self = PlainRenderer {
                 device_size: self.device_size,
                 gpu_cache: Self::save_texture(
-                    self.gpu_cache_texture.get_texture(),
+                    &self.gpu_cache_texture.texture.as_ref().unwrap(),
                     "gpu", &root, &mut self.device,
                 ),
                 gpu_cache_frame_id: self.gpu_cache_frame_id,
@@ -5802,15 +7454,6 @@ impl Renderer {
 
         self.device.reset_read_target();
         self.device.end_frame();
-
-        let mut stats_file = fs::File::create(config.root.join("profiler-stats.txt"))
-            .expect(&format!("Unable to create profiler-stats.txt"));
-        if self.debug_flags.intersects(DebugFlags::PROFILER_DBG | DebugFlags::PROFILER_CAPTURE) {
-            self.profiler.dump_stats(&mut stats_file).unwrap();
-        } else {
-            writeln!(stats_file, "Turn on PROFILER_DBG or PROFILER_CAPTURE to get stats here!").unwrap();
-        }
-
         info!("done.");
     }
 
@@ -5820,7 +7463,9 @@ impl Renderer {
         config: CaptureConfig,
         plain_externals: Vec<PlainExternalImage>,
     ) {
-        use std::{fs::File, io::Read};
+        use std::fs::File;
+        use std::io::Read;
+        use std::slice;
 
         info!("loading external buffer-backed images");
         assert!(self.texture_resolver.external_images.is_empty());
@@ -5867,11 +7512,13 @@ impl Renderer {
                 let tid = match native_map.entry(plain_ext.data) {
                     Entry::Occupied(e) => e.get().clone(),
                     Entry::Vacant(e) => {
+                        //TODO: provide a way to query both the layer count and the filter from external images
+                        let (layer_count, filter) = (1, TextureFilter::Linear);
                         let plain_tex = PlainTexture {
                             data: e.key().clone(),
-                            size: descriptor.size,
+                            size: (descriptor.size, layer_count),
                             format: descriptor.format,
-                            filter: TextureFilter::Linear,
+                            filter,
                             has_depth: false,
                         };
                         let t = Self::load_texture(
@@ -5892,21 +7539,18 @@ impl Renderer {
             }
         }
 
-        self.device.begin_frame();
-        self.gpu_cache_texture.remove_texture(&mut self.device);
-
         if let Some(renderer) = config.deserialize_for_resource::<PlainRenderer, _>("renderer") {
             info!("loading cached textures");
             self.device_size = renderer.device_size;
+            self.device.begin_frame();
 
             for (_id, texture) in self.texture_resolver.texture_cache_map.drain() {
                 self.device.delete_texture(texture);
             }
             for (id, texture) in renderer.textures {
                 info!("\t{}", texture.data);
-                let target = ImageBufferKind::Texture2D;
                 let t = Self::load_texture(
-                    target,
+                    TextureTarget::Array,
                     &texture,
                     Some(RenderTargetInfo { has_depth: texture.has_depth }),
                     &root,
@@ -5916,29 +7560,76 @@ impl Renderer {
             }
 
             info!("loading gpu cache");
+            if let Some(t) = self.gpu_cache_texture.texture.take() {
+                self.device.delete_texture(t);
+            }
             let (t, gpu_cache_data) = Self::load_texture(
-                ImageBufferKind::Texture2D,
+                TextureTarget::Default,
                 &renderer.gpu_cache,
                 Some(RenderTargetInfo { has_depth: false }),
                 &root,
                 &mut self.device,
             );
-            self.gpu_cache_texture.load_from_data(t, gpu_cache_data);
+            self.gpu_cache_texture.texture = Some(t);
+            match self.gpu_cache_texture.bus {
+                GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
+                    let dim = self.gpu_cache_texture.texture.as_ref().unwrap().get_dimensions();
+                    let blocks = unsafe {
+                        slice::from_raw_parts(
+                            gpu_cache_data.as_ptr() as *const GpuBlockData,
+                            gpu_cache_data.len() / mem::size_of::<GpuBlockData>(),
+                        )
+                    };
+                    // fill up the CPU cache from the contents we just loaded
+                    rows.clear();
+                    rows.extend((0 .. dim.height).map(|_| CacheRow::new()));
+                    let chunks = blocks.chunks(MAX_VERTEX_TEXTURE_WIDTH);
+                    debug_assert_eq!(chunks.len(), rows.len());
+                    for (row, chunk) in rows.iter_mut().zip(chunks) {
+                        row.cpu_blocks.copy_from_slice(chunk);
+                    }
+                }
+                GpuCacheBus::Scatter { .. } => {}
+            }
             self.gpu_cache_frame_id = renderer.gpu_cache_frame_id;
+
+            self.device.end_frame();
         } else {
             info!("loading cached textures");
             self.device.begin_frame();
             for (_id, texture) in self.texture_resolver.texture_cache_map.drain() {
                 self.device.delete_texture(texture);
             }
+
+            info!("loading gpu cache");
+            if let Some(t) = self.gpu_cache_texture.texture.take() {
+                self.device.delete_texture(t);
+            }
+            self.device.end_frame();
         }
-        self.device.end_frame();
 
+        self.output_image_handler = Some(Box::new(VoidHandler) as Box<_>);
         self.external_image_handler = Some(Box::new(image_handler) as Box<_>);
         info!("done.");
     }
 }
 
+fn get_vao(vertex_array_kind: VertexArrayKind, vaos: &RendererVAOs) -> &VAO {
+    match vertex_array_kind {
+        VertexArrayKind::Primitive => &vaos.prim_vao,
+        VertexArrayKind::Clip => &vaos.clip_vao,
+        VertexArrayKind::Blur => &vaos.blur_vao,
+        VertexArrayKind::VectorStencil | VertexArrayKind::VectorCover => unreachable!(),
+        VertexArrayKind::Border => &vaos.border_vao,
+        VertexArrayKind::Scale => &vaos.scale_vao,
+        VertexArrayKind::LineDecoration => &vaos.line_vao,
+        VertexArrayKind::Gradient => &vaos.gradient_vao,
+        VertexArrayKind::Resolve => &vaos.resolve_vao,
+        VertexArrayKind::SvgFilter => &vaos.svg_filter_vao,
+        VertexArrayKind::Composite => &vaos.composite_vao,
+        VertexArrayKind::Clear => &vaos.clear_vao,
+    }
+}
 #[derive(Clone, Copy, PartialEq)]
 enum FramebufferKind {
     Main,
@@ -5950,6 +7641,8 @@ fn should_skip_batch(kind: &BatchKind, flags: DebugFlags) -> bool {
         BatchKind::TextRun(_) => {
             flags.contains(DebugFlags::DISABLE_TEXT_PRIMS)
         }
+        BatchKind::Brush(BrushBatchKind::ConicGradient) |
+        BatchKind::Brush(BrushBatchKind::RadialGradient) |
         BatchKind::Brush(BrushBatchKind::LinearGradient) => {
             flags.contains(DebugFlags::DISABLE_GRADIENT_PRIMS)
         }
@@ -5962,7 +7655,6 @@ impl CompositeState {
     /// cache tiles to the OS compositor
     fn composite_native(
         &self,
-        dirty_rects: &[DeviceIntRect],
         compositor: &mut dyn Compositor,
     ) {
         // Add each surface to the visual tree. z-order is implicit based on
@@ -5971,44 +7663,9 @@ impl CompositeState {
         for surface in &self.descriptor.surfaces {
             compositor.add_surface(
                 surface.surface_id.expect("bug: no native surface allocated"),
-                surface.transform,
+                surface.offset.to_i32(),
                 surface.clip_rect.to_i32(),
-                surface.image_rendering,
             );
         }
-        compositor.start_compositing(dirty_rects, &[]);
-    }
-}
-
-mod tests {
-    #[test]
-    fn test_buffer_damage_tracker() {
-        use super::BufferDamageTracker;
-        use api::units::{DevicePoint, DeviceRect, DeviceSize};
-
-        let mut tracker = BufferDamageTracker::default();
-        assert_eq!(tracker.get_damage_rect(0), None);
-        assert_eq!(tracker.get_damage_rect(1), Some(DeviceRect::zero()));
-        assert_eq!(tracker.get_damage_rect(2), Some(DeviceRect::zero()));
-        assert_eq!(tracker.get_damage_rect(3), Some(DeviceRect::zero()));
-        assert_eq!(tracker.get_damage_rect(4), None);
-
-        let damage1 = DeviceRect::new(DevicePoint::new(10.0, 10.0), DeviceSize::new(10.0, 10.0));
-        let damage2 = DeviceRect::new(DevicePoint::new(20.0, 20.0), DeviceSize::new(10.0, 10.0));
-        let combined = damage1.union(&damage2);
-
-        tracker.push_dirty_rect(&damage1);
-        assert_eq!(tracker.get_damage_rect(0), None);
-        assert_eq!(tracker.get_damage_rect(1), Some(DeviceRect::zero()));
-        assert_eq!(tracker.get_damage_rect(2), Some(damage1));
-        assert_eq!(tracker.get_damage_rect(3), Some(damage1));
-        assert_eq!(tracker.get_damage_rect(4), None);
-
-        tracker.push_dirty_rect(&damage2);
-        assert_eq!(tracker.get_damage_rect(0), None);
-        assert_eq!(tracker.get_damage_rect(1), Some(DeviceRect::zero()));
-        assert_eq!(tracker.get_damage_rect(2), Some(damage2));
-        assert_eq!(tracker.get_damage_rect(3), Some(combined));
-        assert_eq!(tracker.get_damage_rect(4), None);
     }
 }
diff --git a/third_party/webrender/webrender/src/renderer/gpu_cache.rs b/third_party/webrender/webrender/src/renderer/gpu_cache.rs
deleted file mode 100644
index e7f16e91c5c..00000000000
--- a/third_party/webrender/webrender/src/renderer/gpu_cache.rs
+++ /dev/null
@@ -1,525 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-use std::{cmp, mem};
-use api::units::*;
-use malloc_size_of::MallocSizeOfOps;
-use crate::{
-    device::{CustomVAO, Device, DrawTarget, Program, ReadTarget, Texture, TextureFilter, UploadPBOPool, VBO},
-    gpu_cache::{GpuBlockData, GpuCacheUpdate, GpuCacheUpdateList},
-    internal_types::{RenderTargetInfo, Swizzle},
-    prim_store::DeferredResolve,
-    profiler,
-    render_api::MemoryReport,
-    render_backend::FrameId,
-};
-
-/// Enabling this toggle would force the GPU cache scattered texture to
-/// be resized every frame, which enables GPU debuggers to see if this
-/// is performed correctly.
-const GPU_CACHE_RESIZE_TEST: bool = false;
-
-/// Tracks the state of each row in the GPU cache texture.
-struct CacheRow {
-    /// Mirrored block data on CPU for this row. We store a copy of
-    /// the data on the CPU side to improve upload batching.
-    cpu_blocks: Box<[GpuBlockData; super::MAX_VERTEX_TEXTURE_WIDTH]>,
-    /// The first offset in this row that is dirty.
-    min_dirty: u16,
-    /// The last offset in this row that is dirty.
-    max_dirty: u16,
-}
-
-impl CacheRow {
-    fn new() -> Self {
-        CacheRow {
-            cpu_blocks: Box::new([GpuBlockData::EMPTY; super::MAX_VERTEX_TEXTURE_WIDTH]),
-            min_dirty: super::MAX_VERTEX_TEXTURE_WIDTH as _,
-            max_dirty: 0,
-        }
-    }
-
-    fn is_dirty(&self) -> bool {
-        return self.min_dirty < self.max_dirty;
-    }
-
-    fn clear_dirty(&mut self) {
-        self.min_dirty = super::MAX_VERTEX_TEXTURE_WIDTH as _;
-        self.max_dirty = 0;
-    }
-
-    fn add_dirty(&mut self, block_offset: usize, block_count: usize) {
-        self.min_dirty = self.min_dirty.min(block_offset as _);
-        self.max_dirty = self.max_dirty.max((block_offset + block_count) as _);
-    }
-
-    fn dirty_blocks(&self) -> &[GpuBlockData] {
-        return &self.cpu_blocks[self.min_dirty as usize .. self.max_dirty as usize];
-    }
-}
-
-/// The bus over which CPU and GPU versions of the GPU cache
-/// get synchronized.
-enum GpuCacheBus {
-    /// PBO-based updates, currently operate on a row granularity.
-    /// Therefore, are subject to fragmentation issues.
-    PixelBuffer {
-        /// Per-row data.
-        rows: Vec<CacheRow>,
-    },
-    /// Shader-based scattering updates. Currently rendered by a set
-    /// of points into the GPU texture, each carrying a `GpuBlockData`.
-    Scatter {
-        /// Special program to run the scattered update.
-        program: Program,
-        /// VAO containing the source vertex buffers.
-        vao: CustomVAO,
-        /// VBO for positional data, supplied as normalized `u16`.
-        buf_position: VBO<[u16; 2]>,
-        /// VBO for gpu block data.
-        buf_value: VBO<GpuBlockData>,
-        /// Currently stored block count.
-        count: usize,
-    },
-}
-
-/// The device-specific representation of the cache texture in gpu_cache.rs
-pub struct GpuCacheTexture {
-    texture: Option<Texture>,
-    bus: GpuCacheBus,
-}
-
-impl GpuCacheTexture {
-    /// Ensures that we have an appropriately-sized texture.
-    fn ensure_texture(&mut self, device: &mut Device, height: i32) {
-        // If we already have a texture that works, we're done.
-        if self.texture.as_ref().map_or(false, |t| t.get_dimensions().height >= height) {
-            if GPU_CACHE_RESIZE_TEST {
-                // Special debug mode - resize the texture even though it's fine.
-            } else {
-                return;
-            }
-        }
-
-        // Take the old texture, if any.
-        let blit_source = self.texture.take();
-
-        // Create the new texture.
-        assert!(height >= 2, "Height is too small for ANGLE");
-        let new_size = DeviceIntSize::new(super::MAX_VERTEX_TEXTURE_WIDTH as _, height);
-        // GpuCacheBus::Scatter always requires the texture to be a render target. For
-        // GpuCacheBus::PixelBuffer, we only create the texture with a render target if
-        // RGBAF32 render targets are actually supported, and only if glCopyImageSubData
-        // is not. glCopyImageSubData does not require a render target to copy the texture
-        // data, and if neither RGBAF32 render targets nor glCopyImageSubData is supported,
-        // we simply re-upload the entire contents rather than copying upon resize.
-        let supports_copy_image_sub_data = device.get_capabilities().supports_copy_image_sub_data;
-        let supports_color_buffer_float = device.get_capabilities().supports_color_buffer_float;
-        let rt_info = if matches!(self.bus, GpuCacheBus::PixelBuffer { .. })
-            && (supports_copy_image_sub_data || !supports_color_buffer_float)
-        {
-            None
-        } else {
-            Some(RenderTargetInfo { has_depth: false })
-        };
-        let mut texture = device.create_texture(
-            api::ImageBufferKind::Texture2D,
-            api::ImageFormat::RGBAF32,
-            new_size.width,
-            new_size.height,
-            TextureFilter::Nearest,
-            rt_info,
-        );
-
-        // Copy the contents of the previous texture, if applicable.
-        if let Some(blit_source) = blit_source {
-            if !supports_copy_image_sub_data && !supports_color_buffer_float {
-                // Cannot copy texture, so must re-upload everything.
-                match self.bus {
-                    GpuCacheBus::PixelBuffer { ref mut rows } => {
-                        for row in rows {
-                            row.add_dirty(0, super::MAX_VERTEX_TEXTURE_WIDTH);
-                        }
-                    }
-                    GpuCacheBus::Scatter { .. } => {
-                        panic!("Texture must be copyable to use scatter GPU cache bus method");
-                    }
-                }
-            } else {
-                device.copy_entire_texture(&mut texture, &blit_source);
-            }
-            device.delete_texture(blit_source);
-        }
-
-        self.texture = Some(texture);
-    }
-
-    pub fn new(device: &mut Device, use_scatter: bool) -> Result<Self, super::RendererError> {
-        use super::desc::GPU_CACHE_UPDATE;
-
-        let bus = if use_scatter {
-            assert!(
-                device.get_capabilities().supports_color_buffer_float,
-                "GpuCache scatter method requires EXT_color_buffer_float",
-            );
-            let program = device.create_program_linked(
-                "gpu_cache_update",
-                &[],
-                &GPU_CACHE_UPDATE,
-            )?;
-            let buf_position = device.create_vbo();
-            let buf_value = device.create_vbo();
-            //Note: the vertex attributes have to be supplied in the same order
-            // as for program creation, but each assigned to a different stream.
-            let vao = device.create_custom_vao(&[
-                buf_position.stream_with(&GPU_CACHE_UPDATE.vertex_attributes[0..1]),
-                buf_value   .stream_with(&GPU_CACHE_UPDATE.vertex_attributes[1..2]),
-            ]);
-            GpuCacheBus::Scatter {
-                program,
-                vao,
-                buf_position,
-                buf_value,
-                count: 0,
-            }
-        } else {
-            GpuCacheBus::PixelBuffer {
-                rows: Vec::new(),
-            }
-        };
-
-        Ok(GpuCacheTexture {
-            texture: None,
-            bus,
-        })
-    }
-
-    pub fn deinit(mut self, device: &mut Device) {
-        if let Some(t) = self.texture.take() {
-            device.delete_texture(t);
-        }
-        if let GpuCacheBus::Scatter { program, vao, buf_position, buf_value, .. } = self.bus {
-            device.delete_program(program);
-            device.delete_custom_vao(vao);
-            device.delete_vbo(buf_position);
-            device.delete_vbo(buf_value);
-        }
-    }
-
-    pub fn get_height(&self) -> i32 {
-        self.texture.as_ref().map_or(0, |t| t.get_dimensions().height)
-    }
-
-    #[cfg(feature = "capture")]
-    pub fn get_texture(&self) -> &Texture {
-        self.texture.as_ref().unwrap()
-    }
-
-    fn prepare_for_updates(
-        &mut self,
-        device: &mut Device,
-        total_block_count: usize,
-        max_height: i32,
-    ) {
-        self.ensure_texture(device, max_height);
-        match self.bus {
-            GpuCacheBus::PixelBuffer { .. } => {},
-            GpuCacheBus::Scatter {
-                ref mut buf_position,
-                ref mut buf_value,
-                ref mut count,
-                ..
-            } => {
-                *count = 0;
-                if total_block_count > buf_value.allocated_count() {
-                    device.allocate_vbo(buf_position, total_block_count, super::ONE_TIME_USAGE_HINT);
-                    device.allocate_vbo(buf_value,    total_block_count, super::ONE_TIME_USAGE_HINT);
-                }
-            }
-        }
-    }
-
-    pub fn invalidate(&mut self) {
-        match self.bus {
-            GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
-                info!("Invalidating GPU caches");
-                for row in rows {
-                    row.add_dirty(0, super::MAX_VERTEX_TEXTURE_WIDTH);
-                }
-            }
-            GpuCacheBus::Scatter { .. } => {
-                warn!("Unable to invalidate scattered GPU cache");
-            }
-        }
-    }
-
-    fn update(&mut self, device: &mut Device, updates: &GpuCacheUpdateList) {
-        match self.bus {
-            GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
-                for update in &updates.updates {
-                    match *update {
-                        GpuCacheUpdate::Copy {
-                            block_index,
-                            block_count,
-                            address,
-                        } => {
-                            let row = address.v as usize;
-
-                            // Ensure that the CPU-side shadow copy of the GPU cache data has enough
-                            // rows to apply this patch.
-                            while rows.len() <= row {
-                                // Add a new row.
-                                rows.push(CacheRow::new());
-                            }
-
-                            // Copy the blocks from the patch array in the shadow CPU copy.
-                            let block_offset = address.u as usize;
-                            let data = &mut rows[row].cpu_blocks;
-                            for i in 0 .. block_count {
-                                data[block_offset + i] = updates.blocks[block_index + i];
-                            }
-
-                            // This row is dirty (needs to be updated in GPU texture).
-                            rows[row].add_dirty(block_offset, block_count);
-                        }
-                    }
-                }
-            }
-            GpuCacheBus::Scatter {
-                ref buf_position,
-                ref buf_value,
-                ref mut count,
-                ..
-            } => {
-                //TODO: re-use this heap allocation
-                // Unused positions will be left as 0xFFFF, which translates to
-                // (1.0, 1.0) in the vertex output position and gets culled out
-                let mut position_data = vec![[!0u16; 2]; updates.blocks.len()];
-                let size = self.texture.as_ref().unwrap().get_dimensions().to_usize();
-
-                for update in &updates.updates {
-                    match *update {
-                        GpuCacheUpdate::Copy {
-                            block_index,
-                            block_count,
-                            address,
-                        } => {
-                            // Convert the absolute texel position into normalized
-                            let y = ((2*address.v as usize + 1) << 15) / size.height;
-                            for i in 0 .. block_count {
-                                let x = ((2*address.u as usize + 2*i + 1) << 15) / size.width;
-                                position_data[block_index + i] = [x as _, y as _];
-                            }
-                        }
-                    }
-                }
-
-                device.fill_vbo(buf_value, &updates.blocks, *count);
-                device.fill_vbo(buf_position, &position_data, *count);
-                *count += position_data.len();
-            }
-        }
-    }
-
-    fn flush(&mut self, device: &mut Device, pbo_pool: &mut UploadPBOPool) -> usize {
-        let texture = self.texture.as_ref().unwrap();
-        match self.bus {
-            GpuCacheBus::PixelBuffer { ref mut rows } => {
-                let rows_dirty = rows
-                    .iter()
-                    .filter(|row| row.is_dirty())
-                    .count();
-                if rows_dirty == 0 {
-                    return 0
-                }
-
-                let mut uploader = device.upload_texture(pbo_pool);
-
-                for (row_index, row) in rows.iter_mut().enumerate() {
-                    if !row.is_dirty() {
-                        continue;
-                    }
-
-                    let blocks = row.dirty_blocks();
-                    let rect = DeviceIntRect::new(
-                        DeviceIntPoint::new(row.min_dirty as i32, row_index as i32),
-                        DeviceIntSize::new(blocks.len() as i32, 1),
-                    );
-
-                    uploader.upload(device, texture, rect, None, None, blocks.as_ptr(), blocks.len());
-
-                    row.clear_dirty();
-                }
-
-                uploader.flush(device);
-
-                rows_dirty
-            }
-            GpuCacheBus::Scatter { ref program, ref vao, count, .. } => {
-                device.disable_depth();
-                device.set_blend(false);
-                device.bind_program(program);
-                device.bind_custom_vao(vao);
-                device.bind_draw_target(
-                    DrawTarget::from_texture(
-                        texture,
-                        false,
-                    ),
-                );
-                device.draw_nonindexed_points(0, count as _);
-                0
-            }
-        }
-    }
-
-    #[cfg(feature = "replay")]
-    pub fn remove_texture(&mut self, device: &mut Device) {
-        if let Some(t) = self.texture.take() {
-            device.delete_texture(t);
-        }
-    }
-
-    #[cfg(feature = "replay")]
-    pub fn load_from_data(&mut self, texture: Texture, data: Vec<u8>) {
-        assert!(self.texture.is_none());
-        match self.bus {
-            GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
-                let dim = texture.get_dimensions();
-                let blocks = unsafe {
-                    std::slice::from_raw_parts(
-                        data.as_ptr() as *const GpuBlockData,
-                        data.len() / mem::size_of::<GpuBlockData>(),
-                    )
-                };
-                // fill up the CPU cache from the contents we just loaded
-                rows.clear();
-                rows.extend((0 .. dim.height).map(|_| CacheRow::new()));
-                let chunks = blocks.chunks(super::MAX_VERTEX_TEXTURE_WIDTH);
-                debug_assert_eq!(chunks.len(), rows.len());
-                for (row, chunk) in rows.iter_mut().zip(chunks) {
-                    row.cpu_blocks.copy_from_slice(chunk);
-                }
-            }
-            GpuCacheBus::Scatter { .. } => {}
-        }
-        self.texture = Some(texture);
-    }
-
-    pub fn report_memory_to(&self, report: &mut MemoryReport, size_op_funs: &MallocSizeOfOps) {
-        if let GpuCacheBus::PixelBuffer{ref rows, ..} = self.bus {
-            for row in rows.iter() {
-                report.gpu_cache_cpu_mirror += unsafe { (size_op_funs.size_of_op)(row.cpu_blocks.as_ptr() as *const _) };
-            }
-        }
-
-        // GPU cache GPU memory.
-        report.gpu_cache_textures +=
-            self.texture.as_ref().map_or(0, |t| t.size_in_bytes());
-    }
-}
-
-impl super::Renderer {
-    pub fn update_gpu_cache(&mut self) {
-        let _gm = self.gpu_profiler.start_marker("gpu cache update");
-
-        // For an artificial stress test of GPU cache resizing,
-        // always pass an extra update list with at least one block in it.
-        let gpu_cache_height = self.gpu_cache_texture.get_height();
-        if gpu_cache_height != 0 && GPU_CACHE_RESIZE_TEST {
-            self.pending_gpu_cache_updates.push(GpuCacheUpdateList {
-                frame_id: FrameId::INVALID,
-                clear: false,
-                height: gpu_cache_height,
-                blocks: vec![[1f32; 4].into()],
-                updates: Vec::new(),
-                debug_commands: Vec::new(),
-            });
-        }
-
-        let (updated_blocks, max_requested_height) = self
-            .pending_gpu_cache_updates
-            .iter()
-            .fold((0, gpu_cache_height), |(count, height), list| {
-                (count + list.blocks.len(), cmp::max(height, list.height))
-            });
-
-        if max_requested_height > self.get_max_texture_size() && !self.gpu_cache_overflow {
-            self.gpu_cache_overflow = true;
-            self.renderer_errors.push(super::RendererError::MaxTextureSize);
-        }
-
-        // Note: if we decide to switch to scatter-style GPU cache update
-        // permanently, we can have this code nicer with `BufferUploader` kind
-        // of helper, similarly to how `TextureUploader` API is used.
-        self.gpu_cache_texture.prepare_for_updates(
-            &mut self.device,
-            updated_blocks,
-            max_requested_height,
-        );
-
-        for update_list in self.pending_gpu_cache_updates.drain(..) {
-            assert!(update_list.height <= max_requested_height);
-            if update_list.frame_id > self.gpu_cache_frame_id {
-                self.gpu_cache_frame_id = update_list.frame_id
-            }
-            self.gpu_cache_texture
-                .update(&mut self.device, &update_list);
-        }
-
-        self.profile.start_time(profiler::GPU_CACHE_UPLOAD_TIME);
-        let updated_rows = self.gpu_cache_texture.flush(
-            &mut self.device,
-            &mut self.texture_upload_pbo_pool
-        );
-        self.gpu_cache_upload_time += self.profile.end_time(profiler::GPU_CACHE_UPLOAD_TIME);
-
-        self.profile.set(profiler::GPU_CACHE_ROWS_UPDATED, updated_rows);
-        self.profile.set(profiler::GPU_CACHE_BLOCKS_UPDATED, updated_blocks);
-    }
-
-    pub fn prepare_gpu_cache(
-        &mut self,
-        deferred_resolves: &[DeferredResolve],
-    ) -> Result<(), super::RendererError> {
-        if self.pending_gpu_cache_clear {
-            let use_scatter =
-                matches!(self.gpu_cache_texture.bus, GpuCacheBus::Scatter { .. });
-            let new_cache = GpuCacheTexture::new(&mut self.device, use_scatter)?;
-            let old_cache = mem::replace(&mut self.gpu_cache_texture, new_cache);
-            old_cache.deinit(&mut self.device);
-            self.pending_gpu_cache_clear = false;
-        }
-
-        let deferred_update_list = self.update_deferred_resolves(deferred_resolves);
-        self.pending_gpu_cache_updates.extend(deferred_update_list);
-
-        self.update_gpu_cache();
-
-        // Note: the texture might have changed during the `update`,
-        // so we need to bind it here.
-        self.device.bind_texture(
-            super::TextureSampler::GpuCache,
-            self.gpu_cache_texture.texture.as_ref().unwrap(),
-            Swizzle::default(),
-        );
-
-        Ok(())
-    }
-
-    pub fn read_gpu_cache(&mut self) -> (DeviceIntSize, Vec<u8>) {
-        let texture = self.gpu_cache_texture.texture.as_ref().unwrap();
-        let size = device_size_as_framebuffer_size(texture.get_dimensions());
-        let mut texels = vec![0; (size.width * size.height * 16) as usize];
-        self.device.begin_frame();
-        self.device.bind_read_target(ReadTarget::from_texture(texture));
-        self.device.read_pixels_into(
-            size.into(),
-            api::ImageFormat::RGBAF32,
-            &mut texels,
-        );
-        self.device.reset_read_target();
-        self.device.end_frame();
-        (texture.get_dimensions(), texels)
-    }
-}
diff --git a/third_party/webrender/webrender/src/renderer/upload.rs b/third_party/webrender/webrender/src/renderer/upload.rs
deleted file mode 100644
index 0dac5eaefd1..00000000000
--- a/third_party/webrender/webrender/src/renderer/upload.rs
+++ /dev/null
@@ -1,792 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-//! This module contains the convoluted logic that goes into uploading content into
-//! the texture cache's textures.
-//!
-//! We need to support various combinations of code paths depending on the quirks of
-//! each hardware/driver configuration:
-//! - direct upload,
-//! - staged upload via a pixel buffer object,
-//! - staged upload via a direct upload to a staging texture where PBO's aren't supported,
-//! - copy from the staging to destination textures, either via blits or batched draw calls.
-//!
-//! Conceptually a lot of this logic should probably be in the device module, but some code
-//! here relies on submitting draw calls via the renderer.
-
-
-use std::mem;
-use std::collections::VecDeque;
-use euclid::Transform3D;
-use time::precise_time_ns;
-use malloc_size_of::MallocSizeOfOps;
-use api::units::*;
-use api::{ExternalImageSource, PremultipliedColorF, ImageBufferKind, ImageRendering, ImageFormat};
-use crate::renderer::{
-    Renderer, VertexArrayKind, RendererStats, TextureSampler, TEXTURE_CACHE_DBG_CLEAR_COLOR
-};
-use crate::internal_types::{
-    FastHashMap, TextureUpdateSource, Swizzle, TextureCacheUpdate,
-    CacheTextureId, RenderTargetInfo,
-};
-use crate::device::{
-    Device, UploadMethod, Texture, DrawTarget, UploadStagingBuffer, TextureFlags, TextureUploader,
-    TextureFilter,
-};
-use crate::gpu_types::{ZBufferId, CompositeInstance};
-use crate::batch::BatchTextures;
-use crate::texture_pack::{GuillotineAllocator, FreeRectSlice};
-use crate::composite::{CompositeFeatures, CompositeSurfaceFormat};
-use crate::profiler;
-use crate::render_api::MemoryReport;
-
-pub const BATCH_UPLOAD_TEXTURE_SIZE: DeviceIntSize = DeviceIntSize::new(512, 512);
-
-/// Upload a number of items to texture cache textures.
-///
-/// This is the main entry point of the texture cache upload code.
-/// See also the module documentation for more information.
-pub fn upload_to_texture_cache(
-    renderer: &mut Renderer,
-    update_list: FastHashMap<CacheTextureId, Vec<TextureCacheUpdate>>,
-) {
-
-    let mut stats = UploadStats {
-        num_draw_calls: 0,
-        upload_time: 0,
-        cpu_buffer_alloc_time: 0,
-        texture_alloc_time: 0,
-        cpu_copy_time: 0,
-        gpu_copy_commands_time: 0,
-        bytes_uploaded: 0,
-    };
-
-    let upload_total_start = precise_time_ns();
-
-    let mut batch_upload_textures = Vec::new();
-
-    // A list of copies that must be performed from the temporary textures to the texture cache.
-    let mut batch_upload_copies = Vec::new();
-
-    // For each texture format, this stores a list of staging buffers
-    // and a texture allocator for packing the buffers.
-    let mut batch_upload_buffers = FastHashMap::default();
-
-    // For best performance we use a single TextureUploader for all uploads.
-    // This allows us to fill PBOs more efficiently and therefore allocate fewer PBOs.
-    let mut uploader = renderer.device.upload_texture(
-        &mut renderer.texture_upload_pbo_pool,
-    );
-
-    let num_updates = update_list.len();
-
-    for (texture_id, updates) in update_list {
-        let texture = &renderer.texture_resolver.texture_cache_map[&texture_id];
-        for update in updates {
-            let TextureCacheUpdate { rect, stride, offset, format_override, source } = update;
-
-            let dummy_data;
-            let data = match source {
-                TextureUpdateSource::Bytes { ref data } => {
-                    &data[offset as usize ..]
-                }
-                TextureUpdateSource::External { id, channel_index } => {
-                    let handler = renderer.external_image_handler
-                        .as_mut()
-                        .expect("Found external image, but no handler set!");
-                    // The filter is only relevant for NativeTexture external images.
-                    match handler.lock(id, channel_index, ImageRendering::Auto).source {
-                        ExternalImageSource::RawData(data) => {
-                            &data[offset as usize ..]
-                        }
-                        ExternalImageSource::Invalid => {
-                            // Create a local buffer to fill the pbo.
-                            let bpp = texture.get_format().bytes_per_pixel();
-                            let width = stride.unwrap_or(rect.size.width * bpp);
-                            let total_size = width * rect.size.height;
-                            // WR haven't support RGBAF32 format in texture_cache, so
-                            // we use u8 type here.
-                            dummy_data = vec![0xFFu8; total_size as usize];
-                            &dummy_data
-                        }
-                        ExternalImageSource::NativeTexture(eid) => {
-                            panic!("Unexpected external texture {:?} for the texture cache update of {:?}", eid, id);
-                        }
-                    }
-                }
-                TextureUpdateSource::DebugClear => {
-                    let draw_target = DrawTarget::from_texture(
-                        texture,
-                        false,
-                    );
-                    renderer.device.bind_draw_target(draw_target);
-                    renderer.device.clear_target(
-                        Some(TEXTURE_CACHE_DBG_CLEAR_COLOR),
-                        None,
-                        Some(draw_target.to_framebuffer_rect(update.rect.to_i32()))
-                    );
-
-                    continue;
-                }
-            };
-
-            let use_batch_upload = renderer.device.use_batched_texture_uploads() &&
-                texture.flags().contains(TextureFlags::IS_SHARED_TEXTURE_CACHE) &&
-                rect.size.width <= BATCH_UPLOAD_TEXTURE_SIZE.width &&
-                rect.size.height <= BATCH_UPLOAD_TEXTURE_SIZE.height;
-
-            if use_batch_upload {
-                copy_into_staging_buffer(
-                    &mut renderer.device,
-                    &mut uploader,
-                    &mut renderer.staging_texture_pool,
-                    rect,
-                    stride,
-                    data,
-                    texture_id,
-                    texture,
-                    &mut batch_upload_buffers,
-                    &mut batch_upload_textures,
-                    &mut batch_upload_copies,
-                    &mut stats,
-                );
-            } else {
-                let upload_start_time = precise_time_ns();
-
-                stats.bytes_uploaded += uploader.upload(
-                    &mut renderer.device,
-                    texture,
-                    rect,
-                    stride,
-                    format_override,
-                    data.as_ptr(),
-                    data.len()
-                );
-
-                stats.upload_time += precise_time_ns() - upload_start_time;
-            }
-
-            if let TextureUpdateSource::External { id, channel_index } = source {
-                let handler = renderer.external_image_handler
-                    .as_mut()
-                    .expect("Found external image, but no handler set!");
-                handler.unlock(id, channel_index);
-            }
-        }
-    }
-
-    let upload_start_time = precise_time_ns();
-    // Upload batched texture updates to their temporary textures.
-    for batch_buffer in batch_upload_buffers.into_iter().map(|(_, (_, buffers))| buffers).flatten() {
-        let texture = &batch_upload_textures[batch_buffer.texture_index];
-        match batch_buffer.staging_buffer {
-            StagingBufferKind::Pbo(pbo) => {
-                stats.bytes_uploaded += uploader.upload_staged(
-                    &mut renderer.device,
-                    texture,
-                    DeviceIntRect::from_size(texture.get_dimensions()),
-                    None,
-                    pbo,
-                );
-            }
-            StagingBufferKind::CpuBuffer { bytes, .. } => {
-                let bpp = texture.get_format().bytes_per_pixel();
-                stats.bytes_uploaded += uploader.upload(
-                    &mut renderer.device,
-                    texture,
-                    batch_buffer.upload_rect,
-                    Some(BATCH_UPLOAD_TEXTURE_SIZE.width * bpp),
-                    None,
-                    bytes.as_ptr(),
-                    bytes.len()
-                );
-                renderer.staging_texture_pool.return_temporary_buffer(bytes);
-            }
-        }
-    }
-    stats.upload_time += precise_time_ns() - upload_start_time;
-
-
-    // Flush all uploads, batched or otherwise.
-    let flush_start_time = precise_time_ns();
-    uploader.flush(&mut renderer.device);
-    stats.upload_time += precise_time_ns() - flush_start_time;
-
-    if !batch_upload_copies.is_empty() {
-        // Copy updates that were batch uploaded to their correct destination in the texture cache.
-        // Sort them by destination and source to minimize framebuffer binding changes.
-        batch_upload_copies.sort_unstable_by_key(|b| (b.dest_texture_id.0, b.src_texture_index));
-
-        let gpu_copy_start = precise_time_ns();
-
-        if renderer.device.use_draw_calls_for_texture_copy() {
-            // Some drivers are very have a very high CPU overhead when submitting hundreds of small blit
-            // commands (low end intel drivers on Windows for example can take take 100+ ms submitting a
-            // few hundred blits). In this case we do the copy with batched draw calls.
-            copy_from_staging_to_cache_using_draw_calls(
-                renderer,
-                &mut stats,
-                &batch_upload_textures,
-                batch_upload_copies,
-            );
-        } else {
-            copy_from_staging_to_cache(
-                renderer,
-                &batch_upload_textures,
-                batch_upload_copies,
-            );
-        }
-
-        stats.gpu_copy_commands_time += precise_time_ns() - gpu_copy_start;
-    }
-
-    for texture in batch_upload_textures.drain(..) {
-        renderer.staging_texture_pool.return_texture(texture);
-    }
-
-    // Update the profile counters. We use add instead of set because
-    // this function can be called several times per frame.
-    // We don't update the counters when their value is zero, so that
-    // the profiler can treat them as events and we can get notified
-    // when they happen.
-
-    let upload_total = precise_time_ns() - upload_total_start;
-    renderer.profile.add(
-        profiler::TOTAL_UPLOAD_TIME,
-        profiler::ns_to_ms(upload_total)
-    );
-
-    if num_updates > 0 {
-        renderer.profile.add(profiler::TEXTURE_UPLOADS, num_updates);
-    }
-
-    if stats.bytes_uploaded > 0 {
-        renderer.profile.add(
-            profiler::TEXTURE_UPLOADS_MEM,
-            profiler::bytes_to_mb(stats.bytes_uploaded)
-        );
-    }
-
-    if stats.cpu_copy_time > 0 {
-        renderer.profile.add(
-            profiler::UPLOAD_CPU_COPY_TIME,
-            profiler::ns_to_ms(stats.cpu_copy_time)
-        );
-    }
-    if stats.upload_time > 0 {
-        renderer.profile.add(
-            profiler::UPLOAD_TIME,
-            profiler::ns_to_ms(stats.upload_time)
-        );
-    }
-    if stats.texture_alloc_time > 0 {
-        renderer.profile.add(
-            profiler::STAGING_TEXTURE_ALLOCATION_TIME,
-            profiler::ns_to_ms(stats.texture_alloc_time)
-        );
-    }
-    if stats.cpu_buffer_alloc_time > 0 {
-        renderer.profile.add(
-            profiler::CPU_TEXTURE_ALLOCATION_TIME,
-            profiler::ns_to_ms(stats.cpu_buffer_alloc_time)
-        );
-    }
-    if stats.num_draw_calls > 0{
-        renderer.profile.add(
-            profiler::UPLOAD_NUM_COPY_BATCHES,
-            stats.num_draw_calls
-        );
-    }
-
-    if stats.gpu_copy_commands_time > 0 {
-        renderer.profile.add(
-            profiler::UPLOAD_GPU_COPY_TIME,
-            profiler::ns_to_ms(stats.gpu_copy_commands_time)
-        );
-    }
-}
-
-/// Copy an item into a batched upload staging buffer.
-fn copy_into_staging_buffer<'a>(
-    device: &mut Device,
-    uploader: &mut TextureUploader< 'a>,
-    staging_texture_pool: &mut UploadTexturePool,
-    update_rect: DeviceIntRect,
-    update_stride: Option<i32>,
-    data: &[u8],
-    dest_texture_id: CacheTextureId,
-    texture: &Texture,
-    batch_upload_buffers: &mut FastHashMap<ImageFormat, (GuillotineAllocator, Vec<BatchUploadBuffer<'a>>)>,
-    batch_upload_textures: &mut Vec<Texture>,
-    batch_upload_copies: &mut Vec<BatchUploadCopy>,
-    stats: &mut UploadStats
-) {
-    let (allocator, buffers) = batch_upload_buffers.entry(texture.get_format())
-        .or_insert_with(|| (GuillotineAllocator::new(None), Vec::new()));
-
-    // Allocate a region within the staging buffer for this update. If there is
-    // no room in an existing buffer then allocate another texture and buffer.
-    let (slice, origin) = match allocator.allocate(&update_rect.size) {
-        Some((slice, origin)) => (slice, origin),
-        None => {
-            let new_slice = FreeRectSlice(buffers.len() as u32);
-            allocator.extend(new_slice, BATCH_UPLOAD_TEXTURE_SIZE, update_rect.size);
-
-            let texture_alloc_time_start = precise_time_ns();
-            let staging_texture = staging_texture_pool.get_texture(device, texture.get_format());
-            stats.texture_alloc_time = precise_time_ns() - texture_alloc_time_start;
-
-            let texture_index = batch_upload_textures.len();
-            batch_upload_textures.push(staging_texture);
-
-            let cpu_buffer_alloc_start_time = precise_time_ns();
-            let staging_buffer = match device.upload_method() {
-                UploadMethod::Immediate => StagingBufferKind::CpuBuffer {
-                    bytes: staging_texture_pool.get_temporary_buffer(),
-                },
-                UploadMethod::PixelBuffer(_) => {
-                    let pbo = uploader.stage(
-                        device,
-                        texture.get_format(),
-                        BATCH_UPLOAD_TEXTURE_SIZE,
-                    ).unwrap();
-
-                    StagingBufferKind::Pbo(pbo)
-                }
-            };
-            stats.cpu_buffer_alloc_time += precise_time_ns() - cpu_buffer_alloc_start_time;
-
-            buffers.push(BatchUploadBuffer {
-                staging_buffer,
-                texture_index,
-                upload_rect: DeviceIntRect::zero()
-            });
-
-            (new_slice, DeviceIntPoint::zero())
-        }
-    };
-    let buffer = &mut buffers[slice.0 as usize];
-    let allocated_rect = DeviceIntRect::new(origin, update_rect.size);
-    buffer.upload_rect = buffer.upload_rect.union(&allocated_rect);
-
-    batch_upload_copies.push(BatchUploadCopy {
-        src_texture_index: buffer.texture_index,
-        src_offset: allocated_rect.origin,
-        dest_texture_id,
-        dest_offset: update_rect.origin,
-        size: update_rect.size,
-    });
-
-    unsafe {
-        let memcpy_start_time = precise_time_ns();
-        let bpp = texture.get_format().bytes_per_pixel() as usize;
-        let width_bytes = update_rect.size.width as usize * bpp;
-        let src_stride = update_stride.map_or(width_bytes, |stride| {
-            assert!(stride >= 0);
-            stride as usize
-        });
-        let src_size = (update_rect.size.height as usize - 1) * src_stride + width_bytes;
-        assert!(src_size <= data.len());
-
-        let src: &[mem::MaybeUninit<u8>] = std::slice::from_raw_parts(data.as_ptr() as *const _, src_size);
-        let (dst_stride, dst) = match &mut buffer.staging_buffer {
-            StagingBufferKind::Pbo(buffer) => (
-                buffer.get_stride(),
-                buffer.get_mapping(),
-            ),
-            StagingBufferKind::CpuBuffer { bytes } => (
-                BATCH_UPLOAD_TEXTURE_SIZE.width as usize * bpp,
-                &mut bytes[..],
-            )
-        };
-
-        // copy the data line-by-line in to the buffer so that we do not overwrite
-        // any other region of the buffer.
-        for y in 0..allocated_rect.size.height as usize {
-            let src_start = y * src_stride;
-            let src_end = src_start + width_bytes;
-            let dst_start = (allocated_rect.origin.y as usize + y as usize) * dst_stride +
-                allocated_rect.origin.x as usize * bpp;
-            let dst_end = dst_start + width_bytes;
-
-            dst[dst_start..dst_end].copy_from_slice(&src[src_start..src_end])
-        }
-
-        stats.cpu_copy_time += precise_time_ns() - memcpy_start_time;
-    }
-}
-
-
-/// Copy from the staging PBOs or textures to texture cache textures using blit commands.
-///
-/// Using blits instead of draw calls is supposedly more efficient but some drivers have
-/// a very high per-command overhead so in some configurations we end up using
-/// copy_from_staging_to_cache_using_draw_calls instead.
-fn copy_from_staging_to_cache(
-    renderer: &mut Renderer,
-    batch_upload_textures: &[Texture],
-    batch_upload_copies: Vec<BatchUploadCopy>,
-) {
-    for copy in batch_upload_copies {
-        let dest_texture = &renderer.texture_resolver.texture_cache_map[&copy.dest_texture_id];
-
-        renderer.device.copy_texture_sub_region(
-            &batch_upload_textures[copy.src_texture_index],
-            copy.src_offset.x as _,
-            copy.src_offset.y as _,
-            dest_texture,
-            copy.dest_offset.x as _,
-            copy.dest_offset.y as _,
-            copy.size.width as _,
-            copy.size.height as _,
-        );
-    }
-}
-
-/// Generate and submit composite shader batches to copy from
-/// the staging textures to the destination cache textures.
-///
-/// If this shows up in GPU time ptofiles we could replace it with
-/// a simpler shader (composite.glsl is already quite simple).
-fn copy_from_staging_to_cache_using_draw_calls(
-    renderer: &mut Renderer,
-    stats: &mut UploadStats,
-    batch_upload_textures: &[Texture],
-    batch_upload_copies: Vec<BatchUploadCopy>,
-) {
-    let mut dummy_stats = RendererStats {
-        total_draw_calls: 0,
-        alpha_target_count: 0,
-        color_target_count: 0,
-        texture_upload_mb: 0.0,
-        resource_upload_time: 0.0,
-        gpu_cache_upload_time: 0.0,
-        gecko_display_list_time: 0.0,
-        wr_display_list_time: 0.0,
-        scene_build_time: 0.0,
-        frame_build_time: 0.0,
-        full_display_list: false,
-        full_paint: false,
-    };
-
-    let mut copy_instances = Vec::new();
-    let mut prev_src = None;
-    let mut prev_dst = None;
-
-    for copy in batch_upload_copies {
-
-        let src_changed = prev_src != Some(copy.src_texture_index);
-        let dst_changed = prev_dst != Some(copy.dest_texture_id);
-
-        if (src_changed || dst_changed) && !copy_instances.is_empty() {
-
-            renderer.draw_instanced_batch(
-                &copy_instances,
-                VertexArrayKind::Composite,
-                // We bind the staging texture manually because it isn't known
-                // to the texture resolver.
-                &BatchTextures::empty(),
-                &mut dummy_stats,
-            );
-
-            stats.num_draw_calls += 1;
-            copy_instances.clear();
-        }
-
-        if dst_changed {
-            let dest_texture = &renderer.texture_resolver.texture_cache_map[&copy.dest_texture_id];
-            let target_size = dest_texture.get_dimensions();
-
-            let draw_target = DrawTarget::from_texture(
-                dest_texture,
-                false,
-            );
-            renderer.device.bind_draw_target(draw_target);
-
-            let projection = Transform3D::ortho(
-                0.0,
-                target_size.width as f32,
-                0.0,
-                target_size.height as f32,
-                renderer.device.ortho_near_plane(),
-                renderer.device.ortho_far_plane(),
-            );
-
-            renderer.shaders
-                .borrow_mut()
-                .get_composite_shader(
-                    CompositeSurfaceFormat::Rgba,
-                    ImageBufferKind::Texture2D,
-                    CompositeFeatures::empty(),
-                ).bind(
-                    &mut renderer.device,
-                    &projection,
-                    None,
-                    &mut renderer.renderer_errors
-                );
-
-            prev_dst = Some(copy.dest_texture_id);
-        }
-
-        if src_changed {
-            renderer.device.bind_texture(
-                TextureSampler::Color0,
-                &batch_upload_textures[copy.src_texture_index],
-                Swizzle::default(),
-            );
-
-            prev_src = Some(copy.src_texture_index)
-        }
-
-        let dest_rect = DeviceRect {
-            origin: copy.dest_offset.to_f32(),
-            size: copy.size.to_f32(),
-        };
-
-        let src_rect = TexelRect::new(
-            copy.src_offset.x as f32,
-            copy.src_offset.y as f32,
-            (copy.src_offset.x + copy.size.width) as f32,
-            (copy.src_offset.y + copy.size.height) as f32,
-        );
-
-        copy_instances.push(CompositeInstance::new_rgb(
-            dest_rect,
-            dest_rect,
-            PremultipliedColorF::WHITE,
-            ZBufferId(0),
-            src_rect,
-        ));
-    }
-
-    if !copy_instances.is_empty() {
-        renderer.draw_instanced_batch(
-            &copy_instances,
-            VertexArrayKind::Composite,
-            // We bind the staging texture manually because it isn't known
-            // to the texture resolver.
-            &BatchTextures::empty(),
-            &mut dummy_stats,
-        );
-
-        stats.num_draw_calls += 1;
-    }
-}
-
-/// A very basic pool to avoid reallocating staging textures as well as staging
-/// CPU side buffers.
-pub struct UploadTexturePool {
-    /// The textures in the pool associated with a last used frame index.
-    ///
-    /// The outer array corresponds to each of teh three supported texture formats.
-    textures: [VecDeque<(Texture, u64)>; 3],
-    // Frame at which to deallocate some textures if there are too many in the pool,
-    // for each format.
-    delay_texture_deallocation: [u64; 3],
-    current_frame: u64,
-
-    /// Temporary buffers that are used when using staging uploads + glTexImage2D.
-    ///
-    /// Temporary buffers aren't used asynchronously so they can be reused every frame.
-    /// To keep things simple we always allocate enough memory for formats with four bytes
-    /// per pixel (more than we need for alpha-only textures but it works just as well).
-    temporary_buffers: Vec<Vec<mem::MaybeUninit<u8>>>,
-    used_temporary_buffers: usize,
-    delay_buffer_deallocation: u64,
-}
-
-impl UploadTexturePool {
-    pub fn new() -> Self {
-        UploadTexturePool {
-            textures: [VecDeque::new(), VecDeque::new(), VecDeque::new()],
-            delay_texture_deallocation: [0; 3],
-            current_frame: 0,
-            temporary_buffers: Vec::new(),
-            used_temporary_buffers: 0,
-            delay_buffer_deallocation: 0,
-        }
-    }
-
-    fn format_index(&self, format: ImageFormat) -> usize {
-        match format {
-            ImageFormat::RGBA8 => 0,
-            ImageFormat::BGRA8 => 1,
-            ImageFormat::R8 => 2,
-            _ => { panic!("unexpected format"); }
-        }
-    }
-
-    pub fn begin_frame(&mut self) {
-        self.current_frame += 1;
-    }
-
-    /// Create or reuse a staging texture.
-    ///
-    /// See also return_texture.
-    pub fn get_texture(&mut self, device: &mut Device, format: ImageFormat) -> Texture {
-
-        // First try to reuse a texture from the pool.
-        // "available" here means hasn't been used for 2 frames to avoid stalls.
-        // No need to scan the vector. Newer textures are always pushed at the back
-        // of the vector so we know the first element is the least recently used.
-        let format_idx = self.format_index(format);
-        let can_reuse = self.textures[format_idx].get(0)
-            .map(|tex| self.current_frame - tex.1 > 2)
-            .unwrap_or(false);
-
-        if can_reuse {
-            return self.textures[format_idx].pop_front().unwrap().0;
-        }
-
-        // If we couldn't find an available texture, create a new one.
-
-        device.create_texture(
-            ImageBufferKind::Texture2D,
-            format,
-            BATCH_UPLOAD_TEXTURE_SIZE.width,
-            BATCH_UPLOAD_TEXTURE_SIZE.height,
-            TextureFilter::Nearest,
-            // Currently we need render target support as we always use glBlitFramebuffer
-            // to copy the texture data. Instead, we should use glCopyImageSubData on some
-            // platforms, and avoid creating the FBOs in that case.
-            Some(RenderTargetInfo { has_depth: false }),
-        )
-    }
-
-    /// Hand the staging texture back to the pool after being done with uploads.
-    ///
-    /// The texture must have been obtained from this pool via get_texture.
-    pub fn return_texture(&mut self, texture: Texture) {
-        let format_idx = self.format_index(texture.get_format());
-        self.textures[format_idx].push_back((texture, self.current_frame));
-    }
-
-    /// Create or reuse a temporary CPU buffer.
-    ///
-    /// These buffers are used in the batched upload path when PBOs are not supported.
-    /// Content is first written to the temporary buffer and uploaded via a single
-    /// glTexSubImage2D call.
-    pub fn get_temporary_buffer(&mut self) -> Vec<mem::MaybeUninit<u8>> {
-        self.used_temporary_buffers += 1;
-        self.temporary_buffers.pop().unwrap_or_else(|| {
-            vec![mem::MaybeUninit::new(0); BATCH_UPLOAD_TEXTURE_SIZE.area() as usize * 4]
-        })
-    }
-
-    /// Return memory that was obtained from this pool via get_temporary_buffer.
-    pub fn return_temporary_buffer(&mut self, buffer: Vec<mem::MaybeUninit<u8>>) {
-        assert_eq!(buffer.len(), BATCH_UPLOAD_TEXTURE_SIZE.area() as usize * 4);
-        self.temporary_buffers.push(buffer);
-    }
-
-    /// Deallocate this pool's CPU and GPU memory.
-    pub fn delete_textures(&mut self, device: &mut Device) {
-        for format in &mut self.textures {
-            while let Some(texture) = format.pop_back() {
-                device.delete_texture(texture.0)
-            }
-        }
-        self.temporary_buffers.clear();
-    }
-
-    /// Deallocate some textures if there are too many for a long time.
-    pub fn end_frame(&mut self, device: &mut Device) {
-        for format_idx in 0..self.textures.len() {
-            // Count the number of reusable staging textures.
-            // if it stays high for a large number of frames, truncate it back to 8-ish
-            // over multiple frames.
-
-            let mut num_reusable_textures = 0;
-            for texture in &self.textures[format_idx] {
-                if self.current_frame - texture.1 > 2 {
-                    num_reusable_textures += 1;
-                }
-            }
-
-            if num_reusable_textures < 8 {
-                // Don't deallocate textures for another 120 frames.
-                self.delay_texture_deallocation[format_idx] = self.current_frame + 120;
-            }
-
-            // Deallocate up to 4 staging textures every frame.
-            let to_remove = if self.current_frame > self.delay_texture_deallocation[format_idx] {
-                num_reusable_textures.min(4)
-            } else {
-                0
-            };
-
-            for _ in 0..to_remove {
-                let texture = self.textures[format_idx].pop_front().unwrap().0;
-                device.delete_texture(texture);
-            }
-        }
-
-        // Similar logic for temporary CPU buffers.
-        let unused_buffers = self.temporary_buffers.len() - self.used_temporary_buffers;
-        if unused_buffers < 8 {
-            self.delay_buffer_deallocation = self.current_frame + 120;
-        }
-        let to_remove = if self.current_frame > self.delay_buffer_deallocation  {
-            unused_buffers.min(4)
-        } else {
-            0
-        };
-        for _ in 0..to_remove {
-            // Unlike textures it doesn't matter whether we pop from the front or back
-            // of the vector.
-            self.temporary_buffers.pop();
-        }
-        self.used_temporary_buffers = 0;
-    }
-
-    pub fn report_memory_to(&self, report: &mut MemoryReport, size_op_funs: &MallocSizeOfOps) {
-        for buf in &self.temporary_buffers {
-            report.upload_staging_memory += unsafe { (size_op_funs.size_of_op)(buf.as_ptr() as *const _) };
-        }
-
-        for format in &self.textures {
-            for texture in format {
-                report.upload_staging_textures += texture.0.size_in_bytes();
-            }
-        }
-    }
-}
-
-struct UploadStats {
-    num_draw_calls: u32,
-    upload_time: u64,
-    cpu_buffer_alloc_time: u64,
-    texture_alloc_time: u64,
-    cpu_copy_time: u64,
-    gpu_copy_commands_time: u64,
-    bytes_uploaded: usize,
-}
-
-#[derive(Debug)]
-enum StagingBufferKind<'a> {
-    Pbo(UploadStagingBuffer<'a>),
-    CpuBuffer { bytes: Vec<mem::MaybeUninit<u8>> }
-}
-#[derive(Debug)]
-struct BatchUploadBuffer<'a> {
-    staging_buffer: StagingBufferKind<'a>,
-    texture_index: usize,
-    // A rectangle containing all items going into this staging texture, so
-    // that we can avoid uploading the entire area if we are using glTexSubImage2d.
-    upload_rect: DeviceIntRect,
-}
-
-// On some devices performing many small texture uploads is slow, so instead we batch
-// updates in to a small number of uploads to temporary textures, then copy from those
-// textures to the correct place in the texture cache.
-// A list of temporary textures that batches of updates are uploaded to.
-#[derive(Debug)]
-struct BatchUploadCopy {
-    // Index within batch_upload_textures
-    src_texture_index: usize,
-    src_offset: DeviceIntPoint,
-    dest_texture_id: CacheTextureId,
-    dest_offset: DeviceIntPoint,
-    size: DeviceIntSize,
-}
diff --git a/third_party/webrender/webrender/src/renderer/vertex.rs b/third_party/webrender/webrender/src/renderer/vertex.rs
deleted file mode 100644
index de5a08fa3c5..00000000000
--- a/third_party/webrender/webrender/src/renderer/vertex.rs
+++ /dev/null
@@ -1,1095 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-//! Rendering logic related to the vertex shaders and their states, uncluding
-//!  - Vertex Array Objects
-//!  - vertex layout descriptors
-//!  - textures bound at vertex stage
-
-use std::{marker::PhantomData, mem, num::NonZeroUsize, ops};
-use api::units::*;
-use crate::{
-    device::{
-        Device, Texture, TextureFilter, TextureUploader, UploadPBOPool, VertexUsageHint, VAO,
-    },
-    frame_builder::Frame,
-    gpu_types::{PrimitiveHeaderI, PrimitiveHeaderF, TransformData},
-    internal_types::Swizzle,
-    render_task::RenderTaskData,
-};
-
-pub const VERTEX_TEXTURE_EXTRA_ROWS: i32 = 10;
-
-pub const MAX_VERTEX_TEXTURE_WIDTH: usize = webrender_build::MAX_VERTEX_TEXTURE_WIDTH;
-
-pub mod desc {
-    use crate::device::{VertexAttribute, VertexAttributeKind, VertexDescriptor};
-
-    pub const PRIM_INSTANCES: VertexDescriptor = VertexDescriptor {
-        vertex_attributes: &[VertexAttribute {
-            name: "aPosition",
-            count: 2,
-            kind: VertexAttributeKind::U8Norm,
-        }],
-        instance_attributes: &[VertexAttribute {
-            name: "aData",
-            count: 4,
-            kind: VertexAttributeKind::I32,
-        }],
-    };
-
-    pub const BLUR: VertexDescriptor = VertexDescriptor {
-        vertex_attributes: &[VertexAttribute {
-            name: "aPosition",
-            count: 2,
-            kind: VertexAttributeKind::U8Norm,
-        }],
-        instance_attributes: &[
-            VertexAttribute {
-                name: "aBlurRenderTaskAddress",
-                count: 1,
-                kind: VertexAttributeKind::U16,
-            },
-            VertexAttribute {
-                name: "aBlurSourceTaskAddress",
-                count: 1,
-                kind: VertexAttributeKind::U16,
-            },
-            VertexAttribute {
-                name: "aBlurDirection",
-                count: 1,
-                kind: VertexAttributeKind::I32,
-            },
-        ],
-    };
-
-    pub const LINE: VertexDescriptor = VertexDescriptor {
-        vertex_attributes: &[VertexAttribute {
-            name: "aPosition",
-            count: 2,
-            kind: VertexAttributeKind::U8Norm,
-        }],
-        instance_attributes: &[
-            VertexAttribute {
-                name: "aTaskRect",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aLocalSize",
-                count: 2,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aWavyLineThickness",
-                count: 1,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aStyle",
-                count: 1,
-                kind: VertexAttributeKind::I32,
-            },
-            VertexAttribute {
-                name: "aAxisSelect",
-                count: 1,
-                kind: VertexAttributeKind::F32,
-            },
-        ],
-    };
-
-    pub const FAST_LINEAR_GRADIENT: VertexDescriptor = VertexDescriptor {
-        vertex_attributes: &[VertexAttribute {
-            name: "aPosition",
-            count: 2,
-            kind: VertexAttributeKind::U8Norm,
-        }],
-        instance_attributes: &[
-            VertexAttribute {
-                name: "aTaskRect",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aColor0",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aColor1",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aAxisSelect",
-                count: 1,
-                kind: VertexAttributeKind::F32,
-            },
-        ],
-    };
-
-    pub const LINEAR_GRADIENT: VertexDescriptor = VertexDescriptor {
-        vertex_attributes: &[VertexAttribute {
-            name: "aPosition",
-            count: 2,
-            kind: VertexAttributeKind::U8Norm,
-        }],
-        instance_attributes: &[
-            VertexAttribute {
-                name: "aTaskRect",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aStartPoint",
-                count: 2,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aEndPoint",
-                count: 2,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aScale",
-                count: 2,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aExtendMode",
-                count: 1,
-                kind: VertexAttributeKind::I32,
-            },
-            VertexAttribute {
-                name: "aGradientStopsAddress",
-                count: 1,
-                kind: VertexAttributeKind::I32,
-            },
-        ],
-    };
-
-    pub const RADIAL_GRADIENT: VertexDescriptor = VertexDescriptor {
-        vertex_attributes: &[VertexAttribute {
-            name: "aPosition",
-            count: 2,
-            kind: VertexAttributeKind::U8Norm,
-        }],
-        instance_attributes: &[
-            VertexAttribute {
-                name: "aTaskRect",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aCenter",
-                count: 2,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aScale",
-                count: 2,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aStartRadius",
-                count: 1,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aEndRadius",
-                count: 1,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aXYRatio",
-                count: 1,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aExtendMode",
-                count: 1,
-                kind: VertexAttributeKind::I32,
-            },
-            VertexAttribute {
-                name: "aGradientStopsAddress",
-                count: 1,
-                kind: VertexAttributeKind::I32,
-            },
-        ],
-    };
-
-    pub const CONIC_GRADIENT: VertexDescriptor = VertexDescriptor {
-        vertex_attributes: &[VertexAttribute {
-            name: "aPosition",
-            count: 2,
-            kind: VertexAttributeKind::U8Norm,
-        }],
-        instance_attributes: &[
-            VertexAttribute {
-                name: "aTaskRect",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aCenter",
-                count: 2,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aScale",
-                count: 2,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aStartOffset",
-                count: 1,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aEndOffset",
-                count: 1,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aAngle",
-                count: 1,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aExtendMode",
-                count: 1,
-                kind: VertexAttributeKind::I32,
-            },
-            VertexAttribute {
-                name: "aGradientStopsAddress",
-                count: 1,
-                kind: VertexAttributeKind::I32,
-            },
-        ],
-    };
-
-    pub const BORDER: VertexDescriptor = VertexDescriptor {
-        vertex_attributes: &[VertexAttribute {
-            name: "aPosition",
-            count: 2,
-            kind: VertexAttributeKind::U8Norm,
-        }],
-        instance_attributes: &[
-            VertexAttribute {
-                name: "aTaskOrigin",
-                count: 2,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aRect",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aColor0",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aColor1",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aFlags",
-                count: 1,
-                kind: VertexAttributeKind::I32,
-            },
-            VertexAttribute {
-                name: "aWidths",
-                count: 2,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aRadii",
-                count: 2,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aClipParams1",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aClipParams2",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-        ],
-    };
-
-    pub const SCALE: VertexDescriptor = VertexDescriptor {
-        vertex_attributes: &[VertexAttribute {
-            name: "aPosition",
-            count: 2,
-            kind: VertexAttributeKind::U8Norm,
-        }],
-        instance_attributes: &[
-            VertexAttribute {
-                name: "aScaleTargetRect",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aScaleSourceRect",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-        ],
-    };
-
-    pub const CLIP_RECT: VertexDescriptor = VertexDescriptor {
-        vertex_attributes: &[VertexAttribute {
-            name: "aPosition",
-            count: 2,
-            kind: VertexAttributeKind::U8Norm,
-        }],
-        instance_attributes: &[
-            // common clip attributes
-            VertexAttribute {
-                name: "aClipDeviceArea",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aClipOrigins",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aDevicePixelScale",
-                count: 1,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aTransformIds",
-                count: 2,
-                kind: VertexAttributeKind::I32,
-            },
-            // specific clip attributes
-            VertexAttribute {
-                name: "aClipLocalPos",
-                count: 2,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aClipLocalRect",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aClipMode",
-                count: 1,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aClipRect_TL",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aClipRadii_TL",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aClipRect_TR",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aClipRadii_TR",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aClipRect_BL",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aClipRadii_BL",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aClipRect_BR",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aClipRadii_BR",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-        ],
-    };
-
-    pub const CLIP_BOX_SHADOW: VertexDescriptor = VertexDescriptor {
-        vertex_attributes: &[VertexAttribute {
-            name: "aPosition",
-            count: 2,
-            kind: VertexAttributeKind::U8Norm,
-        }],
-        instance_attributes: &[
-            // common clip attributes
-            VertexAttribute {
-                name: "aClipDeviceArea",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aClipOrigins",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aDevicePixelScale",
-                count: 1,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aTransformIds",
-                count: 2,
-                kind: VertexAttributeKind::I32,
-            },
-            // specific clip attributes
-            VertexAttribute {
-                name: "aClipDataResourceAddress",
-                count: 2,
-                kind: VertexAttributeKind::U16,
-            },
-            VertexAttribute {
-                name: "aClipSrcRectSize",
-                count: 2,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aClipMode",
-                count: 1,
-                kind: VertexAttributeKind::I32,
-            },
-            VertexAttribute {
-                name: "aStretchMode",
-                count: 2,
-                kind: VertexAttributeKind::I32,
-            },
-            VertexAttribute {
-                name: "aClipDestRect",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-        ],
-    };
-
-    pub const CLIP_IMAGE: VertexDescriptor = VertexDescriptor {
-        vertex_attributes: &[VertexAttribute {
-            name: "aPosition",
-            count: 2,
-            kind: VertexAttributeKind::U8Norm,
-        }],
-        instance_attributes: &[
-            // common clip attributes
-            VertexAttribute {
-                name: "aClipDeviceArea",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aClipOrigins",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aDevicePixelScale",
-                count: 1,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aTransformIds",
-                count: 2,
-                kind: VertexAttributeKind::I32,
-            },
-            // specific clip attributes
-            VertexAttribute {
-                name: "aClipTileRect",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aClipDataResourceAddress",
-                count: 2,
-                kind: VertexAttributeKind::U16,
-            },
-            VertexAttribute {
-                name: "aClipLocalRect",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-        ],
-    };
-
-    pub const GPU_CACHE_UPDATE: VertexDescriptor = VertexDescriptor {
-        vertex_attributes: &[
-            VertexAttribute {
-                name: "aPosition",
-                count: 2,
-                kind: VertexAttributeKind::U16Norm,
-            },
-            VertexAttribute {
-                name: "aValue",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-        ],
-        instance_attributes: &[],
-    };
-
-    pub const RESOLVE: VertexDescriptor = VertexDescriptor {
-        vertex_attributes: &[VertexAttribute {
-            name: "aPosition",
-            count: 2,
-            kind: VertexAttributeKind::U8Norm,
-        }],
-        instance_attributes: &[VertexAttribute {
-            name: "aRect",
-            count: 4,
-            kind: VertexAttributeKind::F32,
-        }],
-    };
-
-    pub const SVG_FILTER: VertexDescriptor = VertexDescriptor {
-        vertex_attributes: &[VertexAttribute {
-            name: "aPosition",
-            count: 2,
-            kind: VertexAttributeKind::U8Norm,
-        }],
-        instance_attributes: &[
-            VertexAttribute {
-                name: "aFilterRenderTaskAddress",
-                count: 1,
-                kind: VertexAttributeKind::U16,
-            },
-            VertexAttribute {
-                name: "aFilterInput1TaskAddress",
-                count: 1,
-                kind: VertexAttributeKind::U16,
-            },
-            VertexAttribute {
-                name: "aFilterInput2TaskAddress",
-                count: 1,
-                kind: VertexAttributeKind::U16,
-            },
-            VertexAttribute {
-                name: "aFilterKind",
-                count: 1,
-                kind: VertexAttributeKind::U16,
-            },
-            VertexAttribute {
-                name: "aFilterInputCount",
-                count: 1,
-                kind: VertexAttributeKind::U16,
-            },
-            VertexAttribute {
-                name: "aFilterGenericInt",
-                count: 1,
-                kind: VertexAttributeKind::U16,
-            },
-            VertexAttribute {
-                name: "aFilterExtraDataAddress",
-                count: 2,
-                kind: VertexAttributeKind::U16,
-            },
-        ],
-    };
-
-    pub const VECTOR_STENCIL: VertexDescriptor = VertexDescriptor {
-        vertex_attributes: &[VertexAttribute {
-            name: "aPosition",
-            count: 2,
-            kind: VertexAttributeKind::U8Norm,
-        }],
-        instance_attributes: &[
-            VertexAttribute {
-                name: "aFromPosition",
-                count: 2,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aCtrlPosition",
-                count: 2,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aToPosition",
-                count: 2,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aFromNormal",
-                count: 2,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aCtrlNormal",
-                count: 2,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aToNormal",
-                count: 2,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aPathID",
-                count: 1,
-                kind: VertexAttributeKind::U16,
-            },
-            VertexAttribute {
-                name: "aPad",
-                count: 1,
-                kind: VertexAttributeKind::U16,
-            },
-        ],
-    };
-
-    pub const VECTOR_COVER: VertexDescriptor = VertexDescriptor {
-        vertex_attributes: &[VertexAttribute {
-            name: "aPosition",
-            count: 2,
-            kind: VertexAttributeKind::U8Norm,
-        }],
-        instance_attributes: &[
-            VertexAttribute {
-                name: "aTargetRect",
-                count: 4,
-                kind: VertexAttributeKind::I32,
-            },
-            VertexAttribute {
-                name: "aStencilOrigin",
-                count: 2,
-                kind: VertexAttributeKind::I32,
-            },
-            VertexAttribute {
-                name: "aSubpixel",
-                count: 1,
-                kind: VertexAttributeKind::U16,
-            },
-            VertexAttribute {
-                name: "aPad",
-                count: 1,
-                kind: VertexAttributeKind::U16,
-            },
-        ],
-    };
-
-    pub const COMPOSITE: VertexDescriptor = VertexDescriptor {
-        vertex_attributes: &[VertexAttribute {
-            name: "aPosition",
-            count: 2,
-            kind: VertexAttributeKind::U8Norm,
-        }],
-        instance_attributes: &[
-            VertexAttribute {
-                name: "aDeviceRect",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aDeviceClipRect",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aColor",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aParams",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aUvRect0",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aUvRect1",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aUvRect2",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-        ],
-    };
-
-    pub const CLEAR: VertexDescriptor = VertexDescriptor {
-        vertex_attributes: &[VertexAttribute {
-            name: "aPosition",
-            count: 2,
-            kind: VertexAttributeKind::U8Norm,
-        }],
-        instance_attributes: &[
-            VertexAttribute {
-                name: "aRect",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-            VertexAttribute {
-                name: "aColor",
-                count: 4,
-                kind: VertexAttributeKind::F32,
-            },
-        ],
-    };
-}
-
-#[derive(Debug, Copy, Clone, PartialEq)]
-pub enum VertexArrayKind {
-    Primitive,
-    Blur,
-    ClipImage,
-    ClipRect,
-    ClipBoxShadow,
-    VectorStencil,
-    VectorCover,
-    Border,
-    Scale,
-    LineDecoration,
-    FastLinearGradient,
-    LinearGradient,
-    RadialGradient,
-    ConicGradient,
-    Resolve,
-    SvgFilter,
-    Composite,
-    Clear,
-}
-
-pub struct VertexDataTexture<T> {
-    texture: Option<Texture>,
-    format: api::ImageFormat,
-    _marker: PhantomData<T>,
-}
-
-impl<T> VertexDataTexture<T> {
-    pub fn new(format: api::ImageFormat) -> Self {
-        Self {
-            texture: None,
-            format,
-            _marker: PhantomData,
-        }
-    }
-
-    /// Returns a borrow of the GPU texture. Panics if it hasn't been initialized.
-    pub fn texture(&self) -> &Texture {
-        self.texture.as_ref().unwrap()
-    }
-
-    /// Returns an estimate of the GPU memory consumed by this VertexDataTexture.
-    pub fn size_in_bytes(&self) -> usize {
-        self.texture.as_ref().map_or(0, |t| t.size_in_bytes())
-    }
-
-    pub fn update<'a>(
-        &'a mut self,
-        device: &mut Device,
-        texture_uploader: &mut TextureUploader<'a>,
-        data: &mut Vec<T>,
-    ) {
-        debug_assert!(mem::size_of::<T>() % 16 == 0);
-        let texels_per_item = mem::size_of::<T>() / 16;
-        let items_per_row = MAX_VERTEX_TEXTURE_WIDTH / texels_per_item;
-        debug_assert_ne!(items_per_row, 0);
-
-        // Ensure we always end up with a texture when leaving this method.
-        let mut len = data.len();
-        if len == 0 {
-            if self.texture.is_some() {
-                return;
-            }
-            data.reserve(items_per_row);
-            len = items_per_row;
-        } else {
-            // Extend the data array to have enough capacity to upload at least
-            // a multiple of the row size.  This ensures memory safety when the
-            // array is passed to OpenGL to upload to the GPU.
-            let extra = len % items_per_row;
-            if extra != 0 {
-                let padding = items_per_row - extra;
-                data.reserve(padding);
-                len += padding;
-            }
-        }
-
-        let needed_height = (len / items_per_row) as i32;
-        let existing_height = self
-            .texture
-            .as_ref()
-            .map_or(0, |t| t.get_dimensions().height);
-
-        // Create a new texture if needed.
-        //
-        // These textures are generally very small, which is why we don't bother
-        // with incremental updates and just re-upload every frame. For most pages
-        // they're one row each, and on stress tests like css-francine they end up
-        // in the 6-14 range. So we size the texture tightly to what we need (usually
-        // 1), and shrink it if the waste would be more than `VERTEX_TEXTURE_EXTRA_ROWS`
-        // rows. This helps with memory overhead, especially because there are several
-        // instances of these textures per Renderer.
-        if needed_height > existing_height
-            || needed_height + VERTEX_TEXTURE_EXTRA_ROWS < existing_height
-        {
-            // Drop the existing texture, if any.
-            if let Some(t) = self.texture.take() {
-                device.delete_texture(t);
-            }
-
-            let texture = device.create_texture(
-                api::ImageBufferKind::Texture2D,
-                self.format,
-                MAX_VERTEX_TEXTURE_WIDTH as i32,
-                // Ensure height is at least two to work around
-                // https://bugs.chromium.org/p/angleproject/issues/detail?id=3039
-                needed_height.max(2),
-                TextureFilter::Nearest,
-                None,
-            );
-            self.texture = Some(texture);
-        }
-
-        // Note: the actual width can be larger than the logical one, with a few texels
-        // of each row unused at the tail. This is needed because there is still hardware
-        // (like Intel iGPUs) that prefers power-of-two sizes of textures ([1]).
-        //
-        // [1] https://software.intel.com/en-us/articles/opengl-performance-tips-power-of-two-textures-have-better-performance
-        let logical_width = if needed_height == 1 {
-            data.len() * texels_per_item
-        } else {
-            MAX_VERTEX_TEXTURE_WIDTH - (MAX_VERTEX_TEXTURE_WIDTH % texels_per_item)
-        };
-
-        let rect = DeviceIntRect::new(
-            DeviceIntPoint::zero(),
-            DeviceIntSize::new(logical_width as i32, needed_height),
-        );
-
-        debug_assert!(len <= data.capacity(), "CPU copy will read out of bounds");
-        texture_uploader.upload(
-            device,
-            self.texture(),
-            rect,
-            None,
-            None,
-            data.as_ptr(),
-            len,
-        );
-    }
-
-    pub fn deinit(mut self, device: &mut Device) {
-        if let Some(t) = self.texture.take() {
-            device.delete_texture(t);
-        }
-    }
-}
-
-pub struct VertexDataTextures {
-    prim_header_f_texture: VertexDataTexture<PrimitiveHeaderF>,
-    prim_header_i_texture: VertexDataTexture<PrimitiveHeaderI>,
-    transforms_texture: VertexDataTexture<TransformData>,
-    render_task_texture: VertexDataTexture<RenderTaskData>,
-}
-
-impl VertexDataTextures {
-    pub fn new() -> Self {
-        VertexDataTextures {
-            prim_header_f_texture: VertexDataTexture::new(api::ImageFormat::RGBAF32),
-            prim_header_i_texture: VertexDataTexture::new(api::ImageFormat::RGBAI32),
-            transforms_texture: VertexDataTexture::new(api::ImageFormat::RGBAF32),
-            render_task_texture: VertexDataTexture::new(api::ImageFormat::RGBAF32),
-        }
-    }
-
-    pub fn update(&mut self, device: &mut Device, pbo_pool: &mut UploadPBOPool, frame: &mut Frame) {
-        let mut texture_uploader = device.upload_texture(pbo_pool);
-        self.prim_header_f_texture.update(
-            device,
-            &mut texture_uploader,
-            &mut frame.prim_headers.headers_float,
-        );
-        self.prim_header_i_texture.update(
-            device,
-            &mut texture_uploader,
-            &mut frame.prim_headers.headers_int,
-        );
-        self.transforms_texture
-            .update(device, &mut texture_uploader, &mut frame.transform_palette);
-        self.render_task_texture.update(
-            device,
-            &mut texture_uploader,
-            &mut frame.render_tasks.task_data,
-        );
-
-        // Flush and drop the texture uploader now, so that
-        // we can borrow the textures to bind them.
-        texture_uploader.flush(device);
-
-        device.bind_texture(
-            super::TextureSampler::PrimitiveHeadersF,
-            &self.prim_header_f_texture.texture(),
-            Swizzle::default(),
-        );
-        device.bind_texture(
-            super::TextureSampler::PrimitiveHeadersI,
-            &self.prim_header_i_texture.texture(),
-            Swizzle::default(),
-        );
-        device.bind_texture(
-            super::TextureSampler::TransformPalette,
-            &self.transforms_texture.texture(),
-            Swizzle::default(),
-        );
-        device.bind_texture(
-            super::TextureSampler::RenderTasks,
-            &self.render_task_texture.texture(),
-            Swizzle::default(),
-        );
-    }
-
-    pub fn size_in_bytes(&self) -> usize {
-        self.prim_header_f_texture.size_in_bytes()
-            + self.prim_header_i_texture.size_in_bytes()
-            + self.transforms_texture.size_in_bytes()
-            + self.render_task_texture.size_in_bytes()
-    }
-
-    pub fn deinit(self, device: &mut Device) {
-        self.transforms_texture.deinit(device);
-        self.prim_header_f_texture.deinit(device);
-        self.prim_header_i_texture.deinit(device);
-        self.render_task_texture.deinit(device);
-    }
-}
-
-pub struct RendererVAOs {
-    prim_vao: VAO,
-    blur_vao: VAO,
-    clip_rect_vao: VAO,
-    clip_box_shadow_vao: VAO,
-    clip_image_vao: VAO,
-    border_vao: VAO,
-    line_vao: VAO,
-    scale_vao: VAO,
-    fast_linear_gradient_vao: VAO,
-    linear_gradient_vao: VAO,
-    radial_gradient_vao: VAO,
-    conic_gradient_vao: VAO,
-    resolve_vao: VAO,
-    svg_filter_vao: VAO,
-    composite_vao: VAO,
-    clear_vao: VAO,
-}
-
-impl RendererVAOs {
-    pub fn new(device: &mut Device, indexed_quads: Option<NonZeroUsize>) -> Self {
-        const QUAD_INDICES: [u16; 6] = [0, 1, 2, 2, 1, 3];
-        const QUAD_VERTICES: [[u8; 2]; 4] = [[0, 0], [0xFF, 0], [0, 0xFF], [0xFF, 0xFF]];
-
-        let instance_divisor = if indexed_quads.is_some() { 0 } else { 1 };
-        let prim_vao = device.create_vao(&desc::PRIM_INSTANCES, instance_divisor);
-
-        device.bind_vao(&prim_vao);
-        match indexed_quads {
-            Some(count) => {
-                assert!(count.get() < u16::MAX as usize);
-                let quad_indices = (0 .. count.get() as u16)
-                    .flat_map(|instance| QUAD_INDICES.iter().map(move |&index| instance * 4 + index))
-                    .collect::<Vec<_>>();
-                device.update_vao_indices(&prim_vao, &quad_indices, VertexUsageHint::Static);
-                let quad_vertices = (0 .. count.get() as u16)
-                    .flat_map(|_| QUAD_VERTICES.iter().cloned())
-                    .collect::<Vec<_>>();
-                device.update_vao_main_vertices(&prim_vao, &quad_vertices, VertexUsageHint::Static);
-            }
-            None => {
-                device.update_vao_indices(&prim_vao, &QUAD_INDICES, VertexUsageHint::Static);
-                device.update_vao_main_vertices(&prim_vao, &QUAD_VERTICES, VertexUsageHint::Static);
-            }
-        }
-
-        RendererVAOs {
-            blur_vao: device.create_vao_with_new_instances(&desc::BLUR, &prim_vao),
-            clip_rect_vao: device.create_vao_with_new_instances(&desc::CLIP_RECT, &prim_vao),
-            clip_box_shadow_vao: device
-                .create_vao_with_new_instances(&desc::CLIP_BOX_SHADOW, &prim_vao),
-            clip_image_vao: device.create_vao_with_new_instances(&desc::CLIP_IMAGE, &prim_vao),
-            border_vao: device.create_vao_with_new_instances(&desc::BORDER, &prim_vao),
-            scale_vao: device.create_vao_with_new_instances(&desc::SCALE, &prim_vao),
-            line_vao: device.create_vao_with_new_instances(&desc::LINE, &prim_vao),
-            fast_linear_gradient_vao: device.create_vao_with_new_instances(&desc::FAST_LINEAR_GRADIENT, &prim_vao),
-            linear_gradient_vao: device.create_vao_with_new_instances(&desc::LINEAR_GRADIENT, &prim_vao),
-            radial_gradient_vao: device.create_vao_with_new_instances(&desc::RADIAL_GRADIENT, &prim_vao),
-            conic_gradient_vao: device.create_vao_with_new_instances(&desc::CONIC_GRADIENT, &prim_vao),
-            resolve_vao: device.create_vao_with_new_instances(&desc::RESOLVE, &prim_vao),
-            svg_filter_vao: device.create_vao_with_new_instances(&desc::SVG_FILTER, &prim_vao),
-            composite_vao: device.create_vao_with_new_instances(&desc::COMPOSITE, &prim_vao),
-            clear_vao: device.create_vao_with_new_instances(&desc::CLEAR, &prim_vao),
-            prim_vao,
-        }
-    }
-
-    pub fn deinit(self, device: &mut Device) {
-        device.delete_vao(self.prim_vao);
-        device.delete_vao(self.resolve_vao);
-        device.delete_vao(self.clip_rect_vao);
-        device.delete_vao(self.clip_box_shadow_vao);
-        device.delete_vao(self.clip_image_vao);
-        device.delete_vao(self.fast_linear_gradient_vao);
-        device.delete_vao(self.linear_gradient_vao);
-        device.delete_vao(self.radial_gradient_vao);
-        device.delete_vao(self.conic_gradient_vao);
-        device.delete_vao(self.blur_vao);
-        device.delete_vao(self.line_vao);
-        device.delete_vao(self.border_vao);
-        device.delete_vao(self.scale_vao);
-        device.delete_vao(self.svg_filter_vao);
-        device.delete_vao(self.composite_vao);
-        device.delete_vao(self.clear_vao);
-    }
-}
-
-impl ops::Index<VertexArrayKind> for RendererVAOs {
-    type Output = VAO;
-    fn index(&self, kind: VertexArrayKind) -> &VAO {
-        match kind {
-            VertexArrayKind::Primitive => &self.prim_vao,
-            VertexArrayKind::ClipImage => &self.clip_image_vao,
-            VertexArrayKind::ClipRect => &self.clip_rect_vao,
-            VertexArrayKind::ClipBoxShadow => &self.clip_box_shadow_vao,
-            VertexArrayKind::Blur => &self.blur_vao,
-            VertexArrayKind::VectorStencil | VertexArrayKind::VectorCover => unreachable!(),
-            VertexArrayKind::Border => &self.border_vao,
-            VertexArrayKind::Scale => &self.scale_vao,
-            VertexArrayKind::LineDecoration => &self.line_vao,
-            VertexArrayKind::FastLinearGradient => &self.fast_linear_gradient_vao,
-            VertexArrayKind::LinearGradient => &self.linear_gradient_vao,
-            VertexArrayKind::RadialGradient => &self.radial_gradient_vao,
-            VertexArrayKind::ConicGradient => &self.conic_gradient_vao,
-            VertexArrayKind::Resolve => &self.resolve_vao,
-            VertexArrayKind::SvgFilter => &self.svg_filter_vao,
-            VertexArrayKind::Composite => &self.composite_vao,
-            VertexArrayKind::Clear => &self.clear_vao,
-        }
-    }
-}
diff --git a/third_party/webrender/webrender/src/resource_cache.rs b/third_party/webrender/webrender/src/resource_cache.rs
index 0c5d106bb1c..a87d179b08d 100644
--- a/third_party/webrender/webrender/src/resource_cache.rs
+++ b/third_party/webrender/webrender/src/resource_cache.rs
@@ -2,16 +2,16 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{BlobImageResources, BlobImageRequest, RasterizedBlobImage, ImageFormat};
-use api::{DebugFlags, FontInstanceKey, FontKey, FontTemplate, GlyphIndex};
-use api::{ExternalImageData, ExternalImageType, ExternalImageId, BlobImageResult, FontInstanceData};
+use api::{AddFont, BlobImageResources, ResourceUpdate};
+use api::{BlobImageRequest, RasterizedBlobImage};
+use api::{ClearCache, DebugFlags, FontInstanceKey, FontKey, FontTemplate, GlyphIndex};
+use api::{ExternalImageData, ExternalImageType, BlobImageResult, FontInstanceData};
 use api::{DirtyRect, GlyphDimensions, IdNamespace, DEFAULT_TILE_SIZE};
 use api::{ImageData, ImageDescriptor, ImageKey, ImageRendering, TileSize};
-use api::{BlobImageKey, VoidPtrToSizeFn};
+use api::{BlobImageKey, MemoryReport, VoidPtrToSizeFn};
 use api::{SharedFontInstanceMap, BaseFontInstance};
+use api::image_tiling::{compute_tile_size, compute_tile_range};
 use api::units::*;
-use crate::{render_api::{ClearCache, AddFont, ResourceUpdate, MemoryReport}, util::WeakTable};
-use crate::image_tiling::{compute_tile_size, compute_tile_range};
 #[cfg(feature = "capture")]
 use crate::capture::ExternalCaptureImage;
 #[cfg(feature = "replay")]
@@ -25,12 +25,11 @@ use crate::glyph_cache::GlyphCacheEntry;
 use crate::glyph_rasterizer::{GLYPH_FLASHING, FontInstance, GlyphFormat, GlyphKey, GlyphRasterizer};
 use crate::gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle};
 use crate::gpu_types::UvRectKind;
-use crate::internal_types::{CacheTextureId, FastHashMap, FastHashSet, TextureSource, ResourceUpdateList};
-use crate::picture::SurfaceInfo;
-use crate::profiler::{self, TransactionProfile, bytes_to_mb};
+use crate::internal_types::{FastHashMap, FastHashSet, TextureSource, ResourceUpdateList};
+use crate::profiler::{ResourceProfileCounters, TextureCacheProfileCounters};
 use crate::render_backend::{FrameId, FrameStamp};
-use crate::render_task_graph::{RenderTaskId, RenderTaskGraphBuilder};
-use crate::render_task_cache::{RenderTaskCache, RenderTaskCacheKey, RenderTaskParent};
+use crate::render_task_graph::{RenderTaskGraph, RenderTaskId};
+use crate::render_task_cache::{RenderTaskCache, RenderTaskCacheKey};
 use crate::render_task_cache::{RenderTaskCacheEntry, RenderTaskCacheEntryHandle};
 use euclid::point2;
 use smallvec::SmallVec;
@@ -48,7 +47,7 @@ use std::path::PathBuf;
 use std::sync::Arc;
 use std::sync::atomic::{AtomicUsize, Ordering};
 use std::u32;
-use crate::texture_cache::{TextureCache, TextureCacheHandle, Eviction, TargetShader};
+use crate::texture_cache::{TextureCache, TextureCacheHandle, Eviction};
 
 // Counter for generating unique native surface ids
 static NEXT_NATIVE_SURFACE_ID: AtomicUsize = AtomicUsize::new(0);
@@ -58,9 +57,6 @@ static NEXT_NATIVE_SURFACE_ID: AtomicUsize = AtomicUsize::new(0);
 pub struct GlyphFetchResult {
     pub index_in_text_run: i32,
     pub uv_rect_address: GpuCacheAddress,
-    pub offset: DevicePoint,
-    pub size: DeviceIntSize,
-    pub scale: f32,
 }
 
 // These coordinates are always in texels.
@@ -79,7 +75,7 @@ pub struct CacheItem {
     pub texture_id: TextureSource,
     pub uv_rect_handle: GpuCacheHandle,
     pub uv_rect: DeviceIntRect,
-    pub user_data: [f32; 4],
+    pub texture_layer: i32,
 }
 
 impl CacheItem {
@@ -88,13 +84,9 @@ impl CacheItem {
             texture_id: TextureSource::Invalid,
             uv_rect_handle: GpuCacheHandle::new(),
             uv_rect: DeviceIntRect::zero(),
-            user_data: [0.0; 4],
+            texture_layer: 0,
         }
     }
-
-    pub fn is_valid(&self) -> bool {
-        self.texture_id != TextureSource::Invalid
-    }
 }
 
 /// Represents the backing store of an image in the cache.
@@ -228,7 +220,9 @@ struct CachedImageInfo {
 
 impl CachedImageInfo {
     fn mark_unused(&mut self, texture_cache: &mut TextureCache) {
-        texture_cache.evict_handle(&self.texture_cache_handle);
+        if self.manual_eviction {
+            texture_cache.evict_manual_handle(&self.texture_cache_handle);
+        }
         self.manual_eviction = false;
     }
 }
@@ -400,10 +394,6 @@ struct Resources {
     font_templates: FastHashMap<FontKey, FontTemplate>,
     font_instances: SharedFontInstanceMap,
     image_templates: ImageTemplates,
-    // We keep a set of Weak references to the fonts so that we're able to include them in memory
-    // reports even if only the OS is holding on to the Vec<u8>. PtrWeakHashSet will periodically
-    // drop any references that have gone dead.
-    weak_fonts: WeakTable
 }
 
 impl BlobImageResources for Resources {
@@ -423,29 +413,6 @@ pub type GlyphDimensionsCache = FastHashMap<(FontInstanceKey, GlyphIndex), Optio
 #[derive(Clone, Copy, Debug, PartialEq)]
 pub struct BlobImageRasterizerEpoch(usize);
 
-/// Internal information about allocated render targets in the pool
-struct RenderTarget {
-    size: DeviceIntSize,
-    format: ImageFormat,
-    texture_id: CacheTextureId,
-    /// If true, this is currently leant out, and not available to other passes
-    is_active: bool,
-    last_frame_used: FrameId,
-}
-
-impl RenderTarget {
-    fn size_in_bytes(&self) -> usize {
-        let bpp = self.format.bytes_per_pixel() as usize;
-        (self.size.width * self.size.height) as usize * bpp
-    }
-
-    /// Returns true if this texture was used within `threshold` frames of
-    /// the current frame.
-    pub fn used_recently(&self, current_frame_id: FrameId, threshold: usize) -> bool {
-        self.last_frame_used + threshold >= current_frame_id
-    }
-}
-
 /// High-level container for resources managed by the `RenderBackend`.
 ///
 /// This includes a variety of things, including images, fonts, and glyphs,
@@ -485,12 +452,6 @@ pub struct ResourceCache {
 
     /// A list of queued compositor surface updates to apply next frame.
     pending_native_surface_updates: Vec<NativeSurfaceOperation>,
-
-    image_templates_memory: usize,
-    font_templates_memory: usize,
-
-    /// A pool of render targets for use by the render task graph
-    render_target_pool: Vec<RenderTarget>,
 }
 
 impl ResourceCache {
@@ -508,7 +469,6 @@ impl ResourceCache {
                 font_instances,
                 font_templates: FastHashMap::default(),
                 image_templates: ImageTemplates::default(),
-                weak_fonts: WeakTable::new(),
             },
             cached_glyph_dimensions: FastHashMap::default(),
             texture_cache,
@@ -522,44 +482,13 @@ impl ResourceCache {
             pending_native_surface_updates: Vec::new(),
             #[cfg(feature = "capture")]
             capture_dirty: true,
-            image_templates_memory: 0,
-            font_templates_memory: 0,
-            render_target_pool: Vec::new(),
         }
     }
 
-    /// Construct a resource cache for use in unit tests.
-    #[cfg(test)]
-    pub fn new_for_testing() -> Self {
-        use rayon::ThreadPoolBuilder;
-
-        let texture_cache = TextureCache::new_for_testing(
-            4096,
-            ImageFormat::RGBA8,
-        );
-        let workers = Arc::new(ThreadPoolBuilder::new().build().unwrap());
-        let glyph_rasterizer = GlyphRasterizer::new(workers, true).unwrap();
-        let cached_glyphs = GlyphCache::new();
-        let font_instances = SharedFontInstanceMap::new();
-
-        ResourceCache::new(
-            texture_cache,
-            glyph_rasterizer,
-            cached_glyphs,
-            font_instances,
-        )
-    }
-
     pub fn max_texture_size(&self) -> i32 {
         self.texture_cache.max_texture_size()
     }
 
-    /// Maximum texture size before we consider it preferrable to break the texture
-    /// into tiles.
-    pub fn tiling_threshold(&self) -> i32 {
-        self.texture_cache.tiling_threshold()
-    }
-
     pub fn enable_multithreading(&mut self, enable: bool) {
         self.glyph_rasterizer.enable_multithreading(enable);
     }
@@ -585,25 +514,21 @@ impl ResourceCache {
         &mut self,
         key: RenderTaskCacheKey,
         gpu_cache: &mut GpuCache,
-        rg_builder: &mut RenderTaskGraphBuilder,
-        user_data: Option<[f32; 4]>,
+        render_tasks: &mut RenderTaskGraph,
+        user_data: Option<[f32; 3]>,
         is_opaque: bool,
-        parent: RenderTaskParent,
-        surfaces: &[SurfaceInfo],
         f: F,
-    ) -> RenderTaskId
+    ) -> RenderTaskCacheEntryHandle
     where
-        F: FnOnce(&mut RenderTaskGraphBuilder) -> RenderTaskId,
+        F: FnOnce(&mut RenderTaskGraph) -> RenderTaskId,
     {
         self.cached_render_tasks.request_render_task(
             key,
             &mut self.texture_cache,
             gpu_cache,
-            rg_builder,
+            render_tasks,
             user_data,
             is_opaque,
-            parent,
-            surfaces,
             |render_graph| Ok(f(render_graph))
         ).expect("Failed to request a render task from the resource cache!")
     }
@@ -611,7 +536,7 @@ impl ResourceCache {
     pub fn post_scene_building_update(
         &mut self,
         updates: Vec<ResourceUpdate>,
-        profile: &mut TransactionProfile,
+        profile_counters: &mut ResourceProfileCounters,
     ) {
         // TODO, there is potential for optimization here, by processing updates in
         // bulk rather than one by one (for example by sorting allocations by size or
@@ -626,8 +551,7 @@ impl ResourceCache {
             match update {
                 ResourceUpdate::AddImage(img) => {
                     if let ImageData::Raw(ref bytes) = img.data {
-                        self.image_templates_memory += bytes.len();
-                        profile.set(profiler::IMAGE_TEMPLATES_MEM, bytes_to_mb(self.image_templates_memory));
+                        profile_counters.image_templates.inc(bytes.len());
                     }
                     self.add_image_template(
                         img.key,
@@ -636,7 +560,6 @@ impl ResourceCache {
                         &img.descriptor.size.into(),
                         img.tiling,
                     );
-                    profile.set(profiler::IMAGE_TEMPLATES, self.resources.image_templates.images.len());
                 }
                 ResourceUpdate::UpdateImage(img) => {
                     self.update_image_template(img.key, img.descriptor, img.data.into(), &img.dirty_rect);
@@ -664,16 +587,12 @@ impl ResourceCache {
                 }
                 ResourceUpdate::DeleteImage(img) => {
                     self.delete_image_template(img);
-                    profile.set(profiler::IMAGE_TEMPLATES, self.resources.image_templates.images.len());
-                    profile.set(profiler::IMAGE_TEMPLATES_MEM, bytes_to_mb(self.image_templates_memory));
                 }
                 ResourceUpdate::DeleteBlobImage(img) => {
                     self.delete_image_template(img.as_image());
                 }
                 ResourceUpdate::DeleteFont(font) => {
                     self.delete_font_template(font);
-                    profile.set(profiler::FONT_TEMPLATES, self.resources.font_templates.len());
-                    profile.set(profiler::FONT_TEMPLATES_MEM, bytes_to_mb(self.font_templates_memory));
                 }
                 ResourceUpdate::DeleteFontInstance(font) => {
                     self.delete_font_instance(font);
@@ -685,15 +604,13 @@ impl ResourceCache {
                 ResourceUpdate::AddFont(font) => {
                     match font {
                         AddFont::Raw(id, bytes, index) => {
-                            self.font_templates_memory += bytes.len();
-                            profile.set(profiler::FONT_TEMPLATES_MEM, bytes_to_mb(self.font_templates_memory));
+                            profile_counters.font_templates.inc(bytes.len());
                             self.add_font_template(id, FontTemplate::Raw(bytes, index));
                         }
                         AddFont::Native(id, native_font_handle) => {
                             self.add_font_template(id, FontTemplate::Native(native_font_handle));
                         }
                     }
-                    profile.set(profiler::FONT_TEMPLATES, self.resources.font_templates.len());
                 }
                 ResourceUpdate::AddFontInstance(..) => {
                     // Already added in ApiResources.
@@ -705,7 +622,7 @@ impl ResourceCache {
     pub fn add_rasterized_blob_images(
         &mut self,
         images: Vec<(BlobImageRequest, BlobImageResult)>,
-        profile: &mut TransactionProfile,
+        texture_cache_profile: &mut TextureCacheProfileCounters,
     ) {
         for (request, result) in images {
             let data = match result {
@@ -716,7 +633,7 @@ impl ResourceCache {
                 }
             };
 
-            profile.add(profiler::RASTERIZED_BLOBS_PX, data.rasterized_rect.area());
+            texture_cache_profile.rasterized_blob_pixels.inc(data.rasterized_rect.area() as usize);
 
             // First make sure we have an entry for this key (using a placeholder
             // if need be).
@@ -744,18 +661,13 @@ impl ResourceCache {
     pub fn add_font_template(&mut self, font_key: FontKey, template: FontTemplate) {
         // Push the new font to the font renderer, and also store
         // it locally for glyph metric requests.
-        if let FontTemplate::Raw(ref font, _) = template {
-            self.resources.weak_fonts.insert(Arc::downgrade(font));
-        }
         self.glyph_rasterizer.add_font(font_key, template.clone());
         self.resources.font_templates.insert(font_key, template);
     }
 
     pub fn delete_font_template(&mut self, font_key: FontKey) {
         self.glyph_rasterizer.delete_font(font_key);
-        if let Some(FontTemplate::Raw(data, _)) = self.resources.font_templates.remove(&font_key) {
-            self.font_templates_memory -= data.len();
-        }
+        self.resources.font_templates.remove(&font_key);
         self.cached_glyphs
             .clear_fonts(|font| font.font_key == font_key);
     }
@@ -780,7 +692,7 @@ impl ResourceCache {
         visible_rect: &DeviceIntRect,
         mut tiling: Option<TileSize>,
     ) {
-        if tiling.is_none() && Self::should_tile(self.tiling_threshold(), &descriptor, &data) {
+        if tiling.is_none() && Self::should_tile(self.max_texture_size(), &descriptor, &data) {
             // We aren't going to be able to upload a texture this big, so tile it, even
             // if tiling was not requested.
             tiling = Some(DEFAULT_TILE_SIZE);
@@ -804,14 +716,14 @@ impl ResourceCache {
         data: CachedImageData,
         dirty_rect: &ImageDirtyRect,
     ) {
-        let tiling_threshold = self.tiling_threshold();
+        let max_texture_size = self.max_texture_size();
         let image = match self.resources.image_templates.get_mut(image_key) {
             Some(res) => res,
             None => panic!("Attempt to update non-existent image"),
         };
 
         let mut tiling = image.tiling;
-        if tiling.is_none() && Self::should_tile(tiling_threshold, &descriptor, &data) {
+        if tiling.is_none() && Self::should_tile(max_texture_size, &descriptor, &data) {
             tiling = Some(DEFAULT_TILE_SIZE);
         }
 
@@ -875,10 +787,6 @@ impl ResourceCache {
 
         match value {
             Some(image) => if image.data.is_blob() {
-                if let CachedImageData::Raw(data) = image.data {
-                    self.image_templates_memory -= data.len();
-                }
-
                 let blob_key = BlobImageKey(image_key);
                 self.deleted_blob_keys.back_mut().unwrap().push(blob_key);
                 self.rasterized_blob_images.remove(&blob_key);
@@ -898,14 +806,11 @@ impl ResourceCache {
             .map_or(ImageGeneration::INVALID, |template| template.generation)
     }
 
-    /// Requests an image to ensure that it will be in the texture cache this frame.
-    ///
-    /// returns the size in device pixel of the image or tile.
     pub fn request_image(
         &mut self,
         request: ImageRequest,
         gpu_cache: &mut GpuCache,
-    ) -> DeviceIntSize {
+    ) {
         debug_assert_eq!(self.state, State::AddResources);
 
         let template = match self.resources.image_templates.get(request.key) {
@@ -913,18 +818,13 @@ impl ResourceCache {
             None => {
                 warn!("ERROR: Trying to render deleted / non-existent key");
                 debug!("key={:?}", request.key);
-                return DeviceIntSize::zero();
+                return
             }
         };
 
-        let size = match request.tile {
-            Some(tile) => compute_tile_size(&template.visible_rect, template.tiling.unwrap(), tile),
-            None => template.descriptor.size,
-        };
-
         // Images that don't use the texture cache can early out.
         if !template.data.uses_texture_cache() {
-            return size;
+            return;
         }
 
         let side_size =
@@ -935,7 +835,7 @@ impl ResourceCache {
             warn!("Dropping image, image:(w:{},h:{}, tile:{}) is too big for hardware!",
                   template.descriptor.size.width, template.descriptor.size.height, template.tiling.unwrap_or(0));
             self.cached_images.insert(request.key, ImageResult::Err(ImageCacheError::OverLimitSize));
-            return DeviceIntSize::zero();
+            return;
         }
 
         let storage = match self.cached_images.entry(request.key) {
@@ -1000,11 +900,11 @@ impl ResourceCache {
         let needs_upload = self.texture_cache.request(&entry.texture_cache_handle, gpu_cache);
 
         if !needs_upload && entry.dirty_rect.is_empty() {
-            return size;
+            return
         }
 
         if !self.pending_image_requests.insert(request) {
-            return size;
+            return
         }
 
         if template.data.is_blob() {
@@ -1016,8 +916,6 @@ impl ResourceCache {
 
             assert!(!missing);
         }
-
-        size
     }
 
     fn discard_tiles_outside_visible_area(
@@ -1072,6 +970,7 @@ impl ResourceCache {
         mut font: FontInstance,
         glyph_keys: &[GlyphKey],
         gpu_cache: &mut GpuCache,
+        render_task_tree: &mut RenderTaskGraph,
     ) {
         debug_assert_eq!(self.state, State::AddResources);
 
@@ -1082,6 +981,8 @@ impl ResourceCache {
             glyph_keys,
             &mut self.texture_cache,
             gpu_cache,
+            &mut self.cached_render_tasks,
+            render_task_tree,
         );
     }
 
@@ -1130,9 +1031,6 @@ impl ResourceCache {
             fetch_buffer.push(GlyphFetchResult {
                 index_in_text_run: loop_index as i32,
                 uv_rect_address: gpu_cache.get_address(&cache_item.uv_rect_handle),
-                offset: DevicePoint::new(cache_item.user_data[0], cache_item.user_data[1]),
-                size: cache_item.uv_rect.size,
-                scale: cache_item.user_data[2],
             });
         }
 
@@ -1213,14 +1111,15 @@ impl ResourceCache {
         })
     }
 
-    pub fn begin_frame(&mut self, stamp: FrameStamp, profile: &mut TransactionProfile) {
+    pub fn begin_frame(&mut self, stamp: FrameStamp) {
         profile_scope!("begin_frame");
         debug_assert_eq!(self.state, State::Idle);
         self.state = State::AddResources;
-        self.texture_cache.begin_frame(stamp, profile);
+        self.texture_cache.begin_frame(stamp);
         self.cached_glyphs.begin_frame(
             stamp,
             &mut self.texture_cache,
+            &self.cached_render_tasks,
             &mut self.glyph_rasterizer,
         );
         self.cached_render_tasks.begin_frame(&mut self.texture_cache);
@@ -1234,7 +1133,8 @@ impl ResourceCache {
     pub fn block_until_all_resources_added(
         &mut self,
         gpu_cache: &mut GpuCache,
-        profile: &mut TransactionProfile,
+        render_tasks: &mut RenderTaskGraph,
+        texture_cache_profile: &mut TextureCacheProfileCounters,
     ) {
         profile_scope!("block_until_all_resources_added");
 
@@ -1245,7 +1145,9 @@ impl ResourceCache {
             &mut self.cached_glyphs,
             &mut self.texture_cache,
             gpu_cache,
-            profile,
+            &mut self.cached_render_tasks,
+            render_tasks,
+            texture_cache_profile,
         );
 
         // Apply any updates of new / updated images (incl. blobs) to the texture cache.
@@ -1354,13 +1256,12 @@ impl ResourceCache {
                     descriptor,
                     filter,
                     Some(image_data),
-                    [0.0; 4],
+                    [0.0; 3],
                     dirty_rect,
                     gpu_cache,
                     None,
                     UvRectKind::Rect,
                     eviction,
-                    TargetShader::Default,
                 );
             }
         }
@@ -1390,24 +1291,6 @@ impl ResourceCache {
         id
     }
 
-    pub fn create_compositor_external_surface(
-        &mut self,
-        is_opaque: bool,
-    ) -> NativeSurfaceId {
-        let id = NativeSurfaceId(NEXT_NATIVE_SURFACE_ID.fetch_add(1, Ordering::Relaxed) as u64);
-
-        self.pending_native_surface_updates.push(
-            NativeSurfaceOperation {
-                details: NativeSurfaceOperationDetails::CreateExternalSurface {
-                    id,
-                    is_opaque,
-                },
-            }
-        );
-
-        id
-    }
-
     /// Queue up destruction of an existing native OS surface. This is used when
     /// a picture cache surface is dropped or resized.
     pub fn destroy_compositor_surface(
@@ -1451,46 +1334,11 @@ impl ResourceCache {
         );
     }
 
-    pub fn attach_compositor_external_image(
-        &mut self,
-        id: NativeSurfaceId,
-        external_image: ExternalImageId,
-    ) {
-        self.pending_native_surface_updates.push(
-            NativeSurfaceOperation {
-                details: NativeSurfaceOperationDetails::AttachExternalImage {
-                    id,
-                    external_image,
-                },
-            }
-        );
-    }
-
-
-    pub fn end_frame(&mut self, profile: &mut TransactionProfile) {
+    pub fn end_frame(&mut self, texture_cache_profile: &mut TextureCacheProfileCounters) {
         debug_assert_eq!(self.state, State::QueryResources);
         profile_scope!("end_frame");
         self.state = State::Idle;
-
-        // GC the render target pool, if it's currently > 64 MB in size.
-        //
-        // We use a simple scheme whereby we drop any texture that hasn't been used
-        // in the last 60 frames, until we are below the size threshold. This should
-        // generally prevent any sustained build-up of unused textures, unless we don't
-        // generate frames for a long period. This can happen when the window is
-        // minimized, and we probably want to flush all the WebRender caches in that case [1].
-        // There is also a second "red line" memory threshold which prevents
-        // memory exhaustion if many render targets are allocated within a small
-        // number of frames. For now this is set at 320 MB (10x the normal memory threshold).
-        //
-        // [1] https://bugzilla.mozilla.org/show_bug.cgi?id=1494099
-        self.gc_render_targets(
-            64 * 1024 * 1024,
-            32 * 1024 * 1024 * 10,
-            60,
-        );
-
-        self.texture_cache.end_frame(profile);
+        self.texture_cache.end_frame(texture_cache_profile);
     }
 
     pub fn set_debug_flags(&mut self, flags: DebugFlags) {
@@ -1516,9 +1364,6 @@ impl ResourceCache {
         if what.contains(ClearCache::TEXTURE_CACHE) {
             self.texture_cache.clear_all();
         }
-        if what.contains(ClearCache::RENDER_TARGETS) {
-            self.clear_render_target_pool();
-        }
     }
 
     pub fn clear_namespace(&mut self, namespace: IdNamespace) {
@@ -1547,19 +1392,11 @@ impl ResourceCache {
     pub fn report_memory(&self, op: VoidPtrToSizeFn) -> MemoryReport {
         let mut report = MemoryReport::default();
 
-        let mut seen_fonts = std::collections::HashSet::new();
         // Measure fonts. We only need the templates here, because the instances
         // don't have big buffers.
         for (_, font) in self.resources.font_templates.iter() {
             if let FontTemplate::Raw(ref raw, _) = font {
                 report.fonts += unsafe { op(raw.as_ptr() as *const c_void) };
-                seen_fonts.insert(raw.as_ptr());
-            }
-        }
-
-        for font in self.resources.weak_fonts.iter() {
-            if !seen_fonts.contains(&font.as_ptr()) { 
-                report.weak_fonts += unsafe { op(font.as_ptr() as *const c_void) };
             }
         }
 
@@ -1610,129 +1447,6 @@ impl ResourceCache {
             assert!(!self.rasterized_blob_images.keys().any(&blob_f));
         }
     }
-
-    /// Get a render target from the pool, or allocate a new one if none are
-    /// currently available that match the requested parameters.
-    pub fn get_or_create_render_target_from_pool(
-        &mut self,
-        size: DeviceIntSize,
-        format: ImageFormat,
-    ) -> CacheTextureId {
-        for target in &mut self.render_target_pool {
-            if target.size == size &&
-               target.format == format &&
-               !target.is_active {
-                // Found a target that's not currently in use which matches. Update
-                // the last_frame_used for GC purposes.
-                target.is_active = true;
-                target.last_frame_used = self.current_frame_id;
-                return target.texture_id;
-            }
-        }
-
-        // Need to create a new render target and add it to the pool
-
-        let texture_id = self.texture_cache.alloc_render_target(
-            size,
-            format,
-        );
-
-        self.render_target_pool.push(RenderTarget {
-            size,
-            format,
-            texture_id,
-            is_active: true,
-            last_frame_used: self.current_frame_id,
-        });
-
-        texture_id
-    }
-
-    /// Return a render target to the pool.
-    pub fn return_render_target_to_pool(
-        &mut self,
-        id: CacheTextureId,
-    ) {
-        let target = self.render_target_pool
-            .iter_mut()
-            .find(|t| t.texture_id == id)
-            .expect("bug: invalid render target id");
-
-        assert!(target.is_active);
-        target.is_active = false;
-    }
-
-    /// Clear all current render targets (e.g. on memory pressure)
-    fn clear_render_target_pool(
-        &mut self,
-    ) {
-        for target in self.render_target_pool.drain(..) {
-            debug_assert!(!target.is_active);
-            self.texture_cache.free_render_target(target.texture_id);
-        }
-    }
-
-    /// Garbage collect and remove old render targets from the pool that haven't
-    /// been used for some time.
-    fn gc_render_targets(
-        &mut self,
-        total_bytes_threshold: usize,
-        total_bytes_red_line_threshold: usize,
-        frames_threshold: usize,
-    ) {
-        // Get the total GPU memory size used by the current render target pool
-        let mut rt_pool_size_in_bytes: usize = self.render_target_pool
-            .iter()
-            .map(|t| t.size_in_bytes())
-            .sum();
-
-        // If the total size of the pool is less than the threshold, don't bother
-        // trying to GC any targets
-        if rt_pool_size_in_bytes <= total_bytes_threshold {
-            return;
-        }
-
-        // Sort the current pool by age, so that we remove oldest textures first
-        self.render_target_pool.sort_by_key(|t| t.last_frame_used);
-
-        // We can't just use retain() because `RenderTarget` requires manual cleanup.
-        let mut retained_targets = SmallVec::<[RenderTarget; 8]>::new();
-
-        for target in self.render_target_pool.drain(..) {
-            assert!(!target.is_active);
-
-            // Drop oldest textures until we are under the allowed size threshold.
-            // However, if it's been used in very recently, it is always kept around,
-            // which ensures we don't thrash texture allocations on pages that do
-            // require a very large render target pool and are regularly changing.
-            let above_red_line = rt_pool_size_in_bytes > total_bytes_red_line_threshold;
-            let above_threshold = rt_pool_size_in_bytes > total_bytes_threshold;
-            let used_recently = target.used_recently(self.current_frame_id, frames_threshold);
-            let used_this_frame = target.last_frame_used == self.current_frame_id;
-
-            if !used_this_frame && (above_red_line || (above_threshold && !used_recently)) {
-                rt_pool_size_in_bytes -= target.size_in_bytes();
-                self.texture_cache.free_render_target(target.texture_id);
-            } else {
-                retained_targets.push(target);
-            }
-        }
-
-        self.render_target_pool.extend(retained_targets);
-    }
-
-    #[cfg(test)]
-    pub fn validate_surfaces(
-        &self,
-        expected_surfaces: &[(i32, i32, ImageFormat)],
-    ) {
-        assert_eq!(expected_surfaces.len(), self.render_target_pool.len());
-
-        for (expected, surface) in expected_surfaces.iter().zip(self.render_target_pool.iter()) {
-            assert_eq!(DeviceIntSize::new(expected.0, expected.1), surface.size);
-            assert_eq!(expected.2, surface.format);
-        }
-    }
 }
 
 impl Drop for ResourceCache {
@@ -1993,7 +1707,6 @@ impl ResourceCache {
         config: &CaptureConfig,
     ) -> Vec<PlainExternalImage> {
         use std::{fs, path::Path};
-        use crate::texture_cache::TextureCacheConfig;
 
         info!("loading resource cache");
         //TODO: instead of filling the local path to Arc<data> map as we process
@@ -2017,11 +1730,13 @@ impl ResourceCache {
                 self.current_frame_id = FrameId::INVALID;
                 self.texture_cache = TextureCache::new(
                     self.texture_cache.max_texture_size(),
-                    self.texture_cache.tiling_threshold(),
-                    self.texture_cache.default_picture_tile_size(),
+                    self.texture_cache.max_texture_layers(),
+                    &self.texture_cache.picture_tile_sizes(),
+                    DeviceIntSize::zero(),
                     self.texture_cache.color_formats(),
                     self.texture_cache.swizzle_settings(),
-                    &TextureCacheConfig::DEFAULT,
+                    self.texture_cache.eviction_threshold_bytes(),
+                    self.texture_cache.max_evictions_per_frame(),
                 );
             }
         }
diff --git a/third_party/webrender/webrender/src/scene.rs b/third_party/webrender/webrender/src/scene.rs
index edacd9bd2d9..3caf68ab603 100644
--- a/third_party/webrender/webrender/src/scene.rs
+++ b/third_party/webrender/webrender/src/scene.rs
@@ -4,17 +4,16 @@
 
 use api::{BuiltDisplayList, DisplayListWithCache, ColorF, DynamicProperties, Epoch, FontRenderMode};
 use api::{PipelineId, PropertyBinding, PropertyBindingId, PropertyValue, MixBlendMode, StackingContext};
+use api::MemoryReport;
 use api::units::*;
 use malloc_size_of::{MallocSizeOf, MallocSizeOfOps};
-use crate::render_api::MemoryReport;
 use crate::composite::CompositorKind;
-use crate::clip::{ClipStore, ClipStoreStats};
-use crate::spatial_tree::SpatialTree;
+use crate::clip::{ClipStore, ClipDataStore};
+use crate::spatial_tree::{SpatialTree, SpatialNodeIndex};
 use crate::frame_builder::{ChasePrimitive, FrameBuilderConfig};
 use crate::hit_test::{HitTester, HitTestingScene, HitTestingSceneStats};
-use crate::internal_types::FastHashMap;
+use crate::internal_types::{FastHashMap, FastHashSet};
 use crate::prim_store::{PrimitiveStore, PrimitiveStoreStats, PictureIndex};
-use crate::tile_cache::TileCacheConfig;
 use std::sync::Arc;
 
 /// Stores a map of the animated property bindings for the current display list. These
@@ -72,7 +71,6 @@ impl SceneProperties {
             if *pending_properties != self.current_properties {
                 self.transform_properties.clear();
                 self.float_properties.clear();
-                self.color_properties.clear();
 
                 for property in &pending_properties.transforms {
                     self.transform_properties
@@ -162,6 +160,7 @@ impl SceneProperties {
 pub struct ScenePipeline {
     pub pipeline_id: PipelineId,
     pub viewport_size: LayoutSize,
+    pub content_size: LayoutSize,
     pub background_color: Option<ColorF>,
     pub display_list: DisplayListWithCache,
 }
@@ -196,6 +195,7 @@ impl Scene {
         display_list: BuiltDisplayList,
         background_color: Option<ColorF>,
         viewport_size: LayoutSize,
+        content_size: LayoutSize,
     ) {
         // Adds a cache to the given display list. If this pipeline already had
         // a display list before, that display list is updated and used instead.
@@ -210,6 +210,7 @@ impl Scene {
         let new_pipeline = ScenePipeline {
             pipeline_id,
             viewport_size,
+            content_size,
             background_color,
             display_list,
         };
@@ -269,13 +270,14 @@ pub struct BuiltScene {
     pub pipeline_epochs: FastHashMap<PipelineId, Epoch>,
     pub output_rect: DeviceIntRect,
     pub background_color: Option<ColorF>,
+    pub root_pic_index: PictureIndex,
     pub prim_store: PrimitiveStore,
     pub clip_store: ClipStore,
     pub config: FrameBuilderConfig,
     pub spatial_tree: SpatialTree,
     pub hit_testing_scene: Arc<HitTestingScene>,
-    pub tile_cache_config: TileCacheConfig,
-    pub tile_cache_pictures: Vec<PictureIndex>,
+    pub content_slice_count: usize,
+    pub picture_cache_spatial_nodes: FastHashSet<SpatialNodeIndex>,
 }
 
 impl BuiltScene {
@@ -285,31 +287,29 @@ impl BuiltScene {
             pipeline_epochs: FastHashMap::default(),
             output_rect: DeviceIntRect::zero(),
             background_color: None,
+            root_pic_index: PictureIndex(0),
             prim_store: PrimitiveStore::new(&PrimitiveStoreStats::empty()),
-            clip_store: ClipStore::new(&ClipStoreStats::empty()),
+            clip_store: ClipStore::new(),
             spatial_tree: SpatialTree::new(),
             hit_testing_scene: Arc::new(HitTestingScene::new(&HitTestingSceneStats::empty())),
-            tile_cache_config: TileCacheConfig::new(0),
-            tile_cache_pictures: Vec::new(),
+            content_slice_count: 0,
+            picture_cache_spatial_nodes: FastHashSet::default(),
             config: FrameBuilderConfig {
                 default_font_render_mode: FontRenderMode::Mono,
                 dual_source_blending_is_enabled: true,
                 dual_source_blending_is_supported: false,
                 chase_primitive: ChasePrimitive::Nothing,
+                global_enable_picture_caching: false,
                 testing: false,
                 gpu_supports_fast_clears: false,
                 gpu_supports_advanced_blend: false,
                 advanced_blend_is_coherent: false,
-                gpu_supports_render_target_partial_update: true,
-                external_images_require_copy: false,
                 batch_lookback_count: 0,
                 background_color: None,
                 compositor_kind: CompositorKind::default(),
                 tile_size_override: None,
                 max_depth_ids: 0,
                 max_target_size: 0,
-                force_invalidation: false,
-                is_software: false,
             },
         }
     }
@@ -319,14 +319,18 @@ impl BuiltScene {
         SceneStats {
             prim_store_stats: self.prim_store.get_stats(),
             hit_test_stats: self.hit_testing_scene.get_stats(),
-            clip_store_stats: self.clip_store.get_stats(),
         }
     }
 
-    pub fn create_hit_tester(&mut self) -> HitTester {
+    pub fn create_hit_tester(
+        &mut self,
+        clip_data_store: &ClipDataStore,
+    ) -> HitTester {
         HitTester::new(
             Arc::clone(&self.hit_testing_scene),
             &self.spatial_tree,
+            &self.clip_store,
+            clip_data_store,
         )
     }
 }
@@ -338,7 +342,6 @@ impl BuiltScene {
 pub struct SceneStats {
     pub prim_store_stats: PrimitiveStoreStats,
     pub hit_test_stats: HitTestingSceneStats,
-    pub clip_store_stats: ClipStoreStats,
 }
 
 impl SceneStats {
@@ -346,7 +349,6 @@ impl SceneStats {
         SceneStats {
             prim_store_stats: PrimitiveStoreStats::empty(),
             hit_test_stats: HitTestingSceneStats::empty(),
-            clip_store_stats: ClipStoreStats::empty(),
         }
     }
 }
diff --git a/third_party/webrender/webrender/src/scene_builder_thread.rs b/third_party/webrender/webrender/src/scene_builder_thread.rs
index c10bba793bc..dd599f45f84 100644
--- a/third_party/webrender/webrender/src/scene_builder_thread.rs
+++ b/third_party/webrender/webrender/src/scene_builder_thread.rs
@@ -2,20 +2,16 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{AsyncBlobImageRasterizer, BlobImageResult};
-use api::{DocumentId, PipelineId, ExternalEvent, BlobImageRequest};
-use api::{NotificationRequest, Checkpoint, IdNamespace, QualitySettings};
-use api::{PrimitiveKeyKind, SharedFontInstanceMap};
-use api::{GlyphDimensionRequest, GlyphIndexRequest};
-use api::channel::{unbounded_channel, single_msg_channel, Receiver, Sender};
+use api::{AsyncBlobImageRasterizer, BlobImageRequest, BlobImageResult};
+use api::{DocumentId, PipelineId, ApiMsg, FrameMsg, SceneMsg, ResourceUpdate, ExternalEvent};
+use api::{NotificationRequest, Checkpoint, IdNamespace, QualitySettings, TransactionMsg};
+use api::{ClipIntern, FilterDataIntern, MemoryReport, PrimitiveKeyKind, SharedFontInstanceMap};
+use api::{DocumentLayer, GlyphDimensionRequest, GlyphIndexRequest};
 use api::units::*;
-use crate::render_api::{ApiMsg, FrameMsg, SceneMsg, ResourceUpdate, TransactionMsg, MemoryReport};
 #[cfg(feature = "capture")]
 use crate::capture::CaptureConfig;
 use crate::frame_builder::FrameBuilderConfig;
 use crate::scene_building::SceneBuilder;
-use crate::clip::{ClipIntern, PolygonIntern};
-use crate::filterdata::FilterDataIntern;
 use crate::intern::{Internable, Interner, UpdateList};
 use crate::internal_types::{FastHashMap, FastHashSet};
 use malloc_size_of::{MallocSizeOf, MallocSizeOfOps};
@@ -26,16 +22,35 @@ use crate::prim_store::image::{Image, YuvImage};
 use crate::prim_store::line_dec::LineDecoration;
 use crate::prim_store::picture::Picture;
 use crate::prim_store::text_run::TextRun;
-use crate::profiler::{self, TransactionProfile};
 use crate::render_backend::SceneView;
-use crate::renderer::{FullFrameStats, PipelineInfo, SceneBuilderHooks};
+use crate::renderer::{PipelineInfo, SceneBuilderHooks};
 use crate::scene::{Scene, BuiltScene, SceneStats};
 use std::iter;
+use std::sync::mpsc::{channel, Receiver, Sender};
+use std::mem::replace;
 use time::precise_time_ns;
 use crate::util::drain_filter;
 use std::thread;
 use std::time::Duration;
 
+#[cfg(feature = "debugger")]
+use crate::debug_server;
+#[cfg(feature = "debugger")]
+use api::{BuiltDisplayListIter, DisplayItem};
+
+/// Various timing information that will be turned into
+/// TransactionProfileCounters later down the pipeline.
+#[derive(Clone, Debug)]
+pub struct TransactionTimings {
+    pub builder_start_time_ns: u64,
+    pub builder_end_time_ns: u64,
+    pub send_time_ns: u64,
+    pub scene_build_start_time_ns: u64,
+    pub scene_build_end_time_ns: u64,
+    pub blob_rasterization_end_time_ns: u64,
+    pub display_list_len: usize,
+}
+
 fn rasterize_blobs(txn: &mut TransactionMsg, is_low_priority: bool) {
     profile_scope!("rasterize_blobs");
 
@@ -63,11 +78,12 @@ pub struct BuiltTransaction {
     pub removed_pipelines: Vec<(PipelineId, DocumentId)>,
     pub notifications: Vec<NotificationRequest>,
     pub interner_updates: Option<InternerUpdates>,
+    pub scene_build_start_time: u64,
+    pub scene_build_end_time: u64,
     pub render_frame: bool,
     pub invalidate_rendered_frame: bool,
     pub discard_frame_state_for_pipelines: Vec<PipelineId>,
-    pub profile: TransactionProfile,
-    pub frame_stats: FullFrameStats,
+    pub timings: Option<TransactionTimings>,
 }
 
 #[cfg(feature = "replay")]
@@ -81,20 +97,27 @@ pub struct LoadScene {
     pub interners: Interners,
 }
 
-/// Message to the scene builder thread.
+// Message to the scene builder thread.
 pub enum SceneBuilderRequest {
     Transactions(Vec<Box<TransactionMsg>>),
-    AddDocument(DocumentId, DeviceIntSize),
+    ExternalEvent(ExternalEvent),
+    AddDocument(DocumentId, DeviceIntSize, DocumentLayer),
     DeleteDocument(DocumentId),
     GetGlyphDimensions(GlyphDimensionRequest),
     GetGlyphIndices(GlyphIndexRequest),
-    ClearNamespace(IdNamespace),
-    SimulateLongSceneBuild(u32),
-    ExternalEvent(ExternalEvent),
     WakeUp,
-    StopRenderBackend,
-    ShutDown(Option<Sender<()>>),
+    Stop,
     Flush(Sender<()>),
+    ClearNamespace(IdNamespace),
+    SimulateLongSceneBuild(u32),
+    SimulateLongLowPrioritySceneBuild(u32),
+    /// Enqueue this to inform the scene builder to pick one message from
+    /// backend_rx.
+    BackendMessage,
+}
+
+/// Message from render backend to scene builder.
+pub enum BackendSceneBuilderRequest {
     SetFrameBuilderConfig(FrameBuilderConfig),
     ReportMemory(Box<MemoryReport>, Sender<Box<MemoryReport>>),
     #[cfg(feature = "capture")]
@@ -105,30 +128,21 @@ pub enum SceneBuilderRequest {
     StartCaptureSequence(CaptureConfig),
     #[cfg(feature = "capture")]
     StopCaptureSequence,
+    DocumentsForDebugger
 }
 
 // Message from scene builder to render backend.
 pub enum SceneBuilderResult {
     Transactions(Vec<Box<BuiltTransaction>>, Option<Sender<SceneSwapResult>>),
+    #[cfg(feature = "capture")]
+    CapturedTransactions(Vec<Box<BuiltTransaction>>, CaptureConfig, Option<Sender<SceneSwapResult>>),
     ExternalEvent(ExternalEvent),
     FlushComplete(Sender<()>),
-    DeleteDocument(DocumentId),
     ClearNamespace(IdNamespace),
     GetGlyphDimensions(GlyphDimensionRequest),
     GetGlyphIndices(GlyphIndexRequest),
-    StopRenderBackend,
-    ShutDown(Option<Sender<()>>),
-
-    #[cfg(feature = "capture")]
-    /// The same as `Transactions`, but also supplies a `CaptureConfig` that the
-    /// render backend should use for sequence capture, until the next
-    /// `CapturedTransactions` or `StopCaptureSequence` result.
-    CapturedTransactions(Vec<Box<BuiltTransaction>>, CaptureConfig, Option<Sender<SceneSwapResult>>),
-
-    #[cfg(feature = "capture")]
-    /// The scene builder has stopped sequence capture, so the render backend
-    /// should do the same.
-    StopCaptureSequence,
+    Stopped,
+    DocumentsForDebugger(String)
 }
 
 // Message from render backend to scene builder to indicate the
@@ -193,7 +207,7 @@ macro_rules! declare_interners {
     }
 }
 
-crate::enumerate_interners!(declare_interners);
+enumerate_interners!(declare_interners);
 
 // A document in the scene builder contains the current scene,
 // as well as a persistent clip interner. This allows clips
@@ -204,16 +218,21 @@ struct Document {
     interners: Interners,
     stats: SceneStats,
     view: SceneView,
+    /// A set of pipelines that the caller has requested be
+    /// made available as output textures.
+    output_pipelines: FastHashSet<PipelineId>,
 }
 
 impl Document {
-    fn new(device_rect: DeviceIntRect, device_pixel_ratio: f32) -> Self {
+    fn new(device_rect: DeviceIntRect, layer: DocumentLayer, device_pixel_ratio: f32) -> Self {
         Document {
             scene: Scene::new(),
             interners: Interners::default(),
             stats: SceneStats::empty(),
+            output_pipelines: FastHashSet::default(),
             view: SceneView {
                 device_rect,
+                layer,
                 device_pixel_ratio,
                 page_zoom_factor: 1.0,
                 quality_settings: QualitySettings::default(),
@@ -225,7 +244,9 @@ impl Document {
 pub struct SceneBuilderThread {
     documents: FastHashMap<DocumentId, Document>,
     rx: Receiver<SceneBuilderRequest>,
-    tx: Sender<ApiMsg>,
+    backend_rx: Receiver<BackendSceneBuilderRequest>,
+    tx: Sender<SceneBuilderResult>,
+    api_tx: Sender<ApiMsg>,
     config: FrameBuilderConfig,
     default_device_pixel_ratio: f32,
     font_instances: SharedFontInstanceMap,
@@ -239,20 +260,28 @@ pub struct SceneBuilderThread {
 
 pub struct SceneBuilderThreadChannels {
     rx: Receiver<SceneBuilderRequest>,
-    tx: Sender<ApiMsg>,
+    backend_rx: Receiver<BackendSceneBuilderRequest>,
+    tx: Sender<SceneBuilderResult>,
+    api_tx: Sender<ApiMsg>,
 }
 
 impl SceneBuilderThreadChannels {
     pub fn new(
-        tx: Sender<ApiMsg>
-    ) -> (Self, Sender<SceneBuilderRequest>) {
-        let (in_tx, in_rx) = unbounded_channel();
+        api_tx: Sender<ApiMsg>
+    ) -> (Self, Sender<SceneBuilderRequest>, Sender<BackendSceneBuilderRequest>, Receiver<SceneBuilderResult>) {
+        let (in_tx, in_rx) = channel();
+        let (out_tx, out_rx) = channel();
+        let (backend_tx, backend_rx) = channel();
         (
             Self {
                 rx: in_rx,
-                tx,
+                backend_rx,
+                tx: out_tx,
+                api_tx,
             },
             in_tx,
+            backend_tx,
+            out_rx,
         )
     }
 }
@@ -266,12 +295,14 @@ impl SceneBuilderThread {
         hooks: Option<Box<dyn SceneBuilderHooks + Send>>,
         channels: SceneBuilderThreadChannels,
     ) -> Self {
-        let SceneBuilderThreadChannels { rx, tx } = channels;
+        let SceneBuilderThreadChannels { rx, backend_rx, tx, api_tx } = channels;
 
         Self {
             documents: Default::default(),
             rx,
+            backend_rx,
             tx,
+            api_tx,
             config,
             default_device_pixel_ratio,
             font_instances,
@@ -289,7 +320,8 @@ impl SceneBuilderThread {
     /// We first put something in the result queue and then send a wake-up
     /// message to the api queue that the render backend is blocking on.
     pub fn send(&self, msg: SceneBuilderResult) {
-        self.tx.send(ApiMsg::SceneBuilderResult(msg)).unwrap();
+        self.tx.send(msg).unwrap();
+        let _ = self.api_tx.send(ApiMsg::WakeUp);
     }
 
     /// The scene builder thread's event loop.
@@ -306,9 +338,9 @@ impl SceneBuilderThread {
                 Ok(SceneBuilderRequest::Flush(tx)) => {
                     self.send(SceneBuilderResult::FlushComplete(tx));
                 }
-                Ok(SceneBuilderRequest::Transactions(txns)) => {
-                    let built_txns : Vec<Box<BuiltTransaction>> = txns.into_iter()
-                        .map(|txn| self.process_transaction(*txn))
+                Ok(SceneBuilderRequest::Transactions(mut txns)) => {
+                    let built_txns : Vec<Box<BuiltTransaction>> = txns.iter_mut()
+                        .map(|txn| self.process_transaction(txn))
                         .collect();
                     #[cfg(feature = "capture")]
                     match built_txns.iter().any(|txn| txn.built_scene.is_some()) {
@@ -317,16 +349,16 @@ impl SceneBuilderThread {
                     }
                     self.forward_built_transactions(built_txns);
                 }
-                Ok(SceneBuilderRequest::AddDocument(document_id, initial_size)) => {
+                Ok(SceneBuilderRequest::AddDocument(document_id, initial_size, layer)) => {
                     let old = self.documents.insert(document_id, Document::new(
                         initial_size.into(),
+                        layer,
                         self.default_device_pixel_ratio,
                     ));
                     debug_assert!(old.is_none());
                 }
                 Ok(SceneBuilderRequest::DeleteDocument(document_id)) => {
                     self.documents.remove(&document_id);
-                    self.send(SceneBuilderResult::DeleteDocument(document_id));
                 }
                 Ok(SceneBuilderRequest::ClearNamespace(id)) => {
                     self.documents.retain(|doc_id, _doc| doc_id.namespace_id != id);
@@ -336,46 +368,54 @@ impl SceneBuilderThread {
                     self.send(SceneBuilderResult::ExternalEvent(evt));
                 }
                 Ok(SceneBuilderRequest::GetGlyphDimensions(request)) => {
-                    self.send(SceneBuilderResult::GetGlyphDimensions(request));
+                    self.send(SceneBuilderResult::GetGlyphDimensions(request))
                 }
                 Ok(SceneBuilderRequest::GetGlyphIndices(request)) => {
-                    self.send(SceneBuilderResult::GetGlyphIndices(request));
+                    self.send(SceneBuilderResult::GetGlyphIndices(request))
                 }
-                Ok(SceneBuilderRequest::StopRenderBackend) => {
-                    self.send(SceneBuilderResult::StopRenderBackend);
-                }
-                Ok(SceneBuilderRequest::ShutDown(sync)) => {
-                    self.send(SceneBuilderResult::ShutDown(sync));
+                Ok(SceneBuilderRequest::Stop) => {
+                    self.tx.send(SceneBuilderResult::Stopped).unwrap();
+                    // We don't need to send a WakeUp to api_tx because we only
+                    // get the Stop when the RenderBackend loop is exiting.
                     break;
                 }
                 Ok(SceneBuilderRequest::SimulateLongSceneBuild(time_ms)) => {
                     self.simulate_slow_ms = time_ms
                 }
-                Ok(SceneBuilderRequest::ReportMemory(mut report, tx)) => {
-                    (*report) += self.report_memory();
-                    tx.send(report).unwrap();
-                }
-                Ok(SceneBuilderRequest::SetFrameBuilderConfig(cfg)) => {
-                    self.config = cfg;
-                }
-                #[cfg(feature = "replay")]
-                Ok(SceneBuilderRequest::LoadScenes(msg)) => {
-                    self.load_scenes(msg);
-                }
-                #[cfg(feature = "capture")]
-                Ok(SceneBuilderRequest::SaveScene(config)) => {
-                    self.save_scene(config);
-                }
-                #[cfg(feature = "capture")]
-                Ok(SceneBuilderRequest::StartCaptureSequence(config)) => {
-                    self.start_capture_sequence(config);
-                }
-                #[cfg(feature = "capture")]
-                Ok(SceneBuilderRequest::StopCaptureSequence) => {
-                    // FIXME(aosmond): clear config for frames and resource cache without scene
-                    // rebuild?
-                    self.capture_config = None;
-                    self.send(SceneBuilderResult::StopCaptureSequence);
+                Ok(SceneBuilderRequest::SimulateLongLowPrioritySceneBuild(_)) => {}
+                Ok(SceneBuilderRequest::BackendMessage) => {
+                    let msg = self.backend_rx.try_recv().unwrap();
+                    match msg {
+                        BackendSceneBuilderRequest::ReportMemory(mut report, tx) => {
+                            (*report) += self.report_memory();
+                            tx.send(report).unwrap();
+                        }
+                        BackendSceneBuilderRequest::SetFrameBuilderConfig(cfg) => {
+                            self.config = cfg;
+                        }
+                        #[cfg(feature = "replay")]
+                        BackendSceneBuilderRequest::LoadScenes(msg) => {
+                            self.load_scenes(msg);
+                        }
+                        #[cfg(feature = "capture")]
+                        BackendSceneBuilderRequest::SaveScene(config) => {
+                            self.save_scene(config);
+                        }
+                        #[cfg(feature = "capture")]
+                        BackendSceneBuilderRequest::StartCaptureSequence(config) => {
+                            self.start_capture_sequence(config);
+                        }
+                        #[cfg(feature = "capture")]
+                        BackendSceneBuilderRequest::StopCaptureSequence => {
+                            // FIXME(aosmond): clear config for frames and resource cache without scene
+                            // rebuild?
+                            self.capture_config = None;
+                        }
+                        BackendSceneBuilderRequest::DocumentsForDebugger => {
+                            let json = self.get_docs_for_debugger();
+                            self.send(SceneBuilderResult::DocumentsForDebugger(json));
+                        }
+                    }
                 }
                 Err(_) => {
                     break;
@@ -400,8 +440,7 @@ impl SceneBuilderThread {
             let interners_name = format!("interners-{}-{}", id.namespace_id.0, id.id);
             config.serialize_for_scene(&doc.interners, interners_name);
 
-            use crate::render_api::CaptureBits;
-            if config.bits.contains(CaptureBits::SCENE) {
+            if config.bits.contains(api::CaptureBits::SCENE) {
                 let file_name = format!("scene-{}-{}", id.namespace_id.0, id.id);
                 config.serialize_for_scene(&doc.scene, file_name);
             }
@@ -413,14 +452,19 @@ impl SceneBuilderThread {
         for mut item in scenes {
             self.config = item.config;
 
+            let scene_build_start_time = precise_time_ns();
+
             let mut built_scene = None;
             let mut interner_updates = None;
 
+            let output_pipelines = FastHashSet::default();
+
             if item.scene.has_root_pipeline() {
                 built_scene = Some(SceneBuilder::build(
                     &item.scene,
                     item.font_instances,
                     &item.view,
+                    &output_pipelines,
                     &self.config,
                     &mut item.interners,
                     &SceneStats::empty(),
@@ -438,6 +482,7 @@ impl SceneBuilderThread {
                     interners: item.interners,
                     stats: SceneStats::empty(),
                     view: item.view.clone(),
+                    output_pipelines,
                 },
             );
 
@@ -454,9 +499,10 @@ impl SceneBuilderThread {
                 removed_pipelines: Vec::new(),
                 discard_frame_state_for_pipelines: Vec::new(),
                 notifications: Vec::new(),
+                scene_build_start_time,
+                scene_build_end_time: precise_time_ns(),
                 interner_updates,
-                profile: TransactionProfile::new(),
-                frame_stats: FullFrameStats::default(),
+                timings: None,
             })];
 
             self.forward_built_transactions(txns);
@@ -473,8 +519,7 @@ impl SceneBuilderThread {
                 let interners_name = format!("interners-{}-{}", id.namespace_id.0, id.id);
                 config.serialize_for_scene(&doc.interners, interners_name);
 
-                use crate::render_api::CaptureBits;
-                if config.bits.contains(CaptureBits::SCENE) {
+                if config.bits.contains(api::CaptureBits::SCENE) {
                     let file_name = format!("scene-{}-{}", id.namespace_id.0, id.id);
                     config.serialize_for_scene(&doc.scene, file_name);
                 }
@@ -491,25 +536,88 @@ impl SceneBuilderThread {
         self.save_capture_sequence();
     }
 
+    #[cfg(feature = "debugger")]
+    fn traverse_items<'a>(
+        &self,
+        traversal: &mut BuiltDisplayListIter<'a>,
+        node: &mut debug_server::TreeNode,
+    ) {
+        loop {
+            let subtraversal = {
+                let item = match traversal.next() {
+                    Some(item) => item,
+                    None => break,
+                };
+
+                match *item.item() {
+                    display_item @ DisplayItem::PushStackingContext(..) => {
+                        let mut subtraversal = item.sub_iter();
+                        let mut child_node =
+                            debug_server::TreeNode::new(&display_item.debug_name().to_string());
+                        self.traverse_items(&mut subtraversal, &mut child_node);
+                        node.add_child(child_node);
+                        Some(subtraversal)
+                    }
+                    DisplayItem::PopStackingContext => {
+                        return;
+                    }
+                    display_item => {
+                        node.add_item(&display_item.debug_name().to_string());
+                        None
+                    }
+                }
+            };
+
+            // If flatten_item created a sub-traversal, we need `traversal` to have the
+            // same state as the completed subtraversal, so we reinitialize it here.
+            if let Some(subtraversal) = subtraversal {
+                *traversal = subtraversal;
+            }
+        }
+    }
+
+    #[cfg(not(feature = "debugger"))]
+    fn get_docs_for_debugger(&self) -> String {
+        String::new()
+    }
+
+    #[cfg(feature = "debugger")]
+    fn get_docs_for_debugger(&self) -> String {
+        let mut docs = debug_server::DocumentList::new();
+
+        for (_, doc) in &self.documents {
+            let mut debug_doc = debug_server::TreeNode::new("document");
+
+            for (_, pipeline) in &doc.scene.pipelines {
+                let mut debug_dl = debug_server::TreeNode::new("display-list");
+                self.traverse_items(&mut pipeline.display_list.iter(), &mut debug_dl);
+                debug_doc.add_child(debug_dl);
+            }
+
+            docs.add(debug_doc);
+        }
+
+        serde_json::to_string(&docs).unwrap()
+    }
+
     /// Do the bulk of the work of the scene builder thread.
-    fn process_transaction(&mut self, mut txn: TransactionMsg) -> Box<BuiltTransaction> {
+    fn process_transaction(&mut self, txn: &mut TransactionMsg) -> Box<BuiltTransaction> {
         profile_scope!("process_transaction");
 
         if let Some(ref hooks) = self.hooks {
             hooks.pre_scene_build();
         }
 
+        let scene_build_start_time = precise_time_ns();
+
         let doc = self.documents.get_mut(&txn.document_id).unwrap();
         let scene = &mut doc.scene;
 
-        let mut profile = txn.profile.take();
+        let mut timings = None;
 
-        let scene_build_start = precise_time_ns();
         let mut discard_frame_state_for_pipelines = Vec::new();
         let mut removed_pipelines = Vec::new();
         let mut rebuild_scene = false;
-        let mut frame_stats = FullFrameStats::default();
-
         for message in txn.scene_ops.drain(..) {
             match message {
                 SceneMsg::UpdateEpoch(pipeline_id, epoch) => {
@@ -530,21 +638,14 @@ impl SceneBuilderThread {
                     pipeline_id,
                     background,
                     viewport_size,
+                    content_size,
                     display_list,
                     preserve_frame_state,
                 } => {
+                    let display_list_len = display_list.data().len();
+
                     let (builder_start_time_ns, builder_end_time_ns, send_time_ns) =
-                      display_list.times();
-                    let content_send_time = profiler::ns_to_ms(precise_time_ns() - send_time_ns);
-                    let dl_build_time = profiler::ns_to_ms(builder_end_time_ns - builder_start_time_ns);
-                    profile.set(profiler::CONTENT_SEND_TIME, content_send_time);
-                    profile.set(profiler::DISPLAY_LIST_BUILD_TIME, dl_build_time);
-                    profile.set(profiler::DISPLAY_LIST_MEM, profiler::bytes_to_mb(display_list.data().len()));
-
-                    let (gecko_display_list_time, full_display_list) = display_list.gecko_display_list_stats();
-                    frame_stats.full_display_list = full_display_list;
-                    frame_stats.gecko_display_list_time = gecko_display_list_time;
-                    frame_stats.wr_display_list_time += dl_build_time;
+                        display_list.times();
 
                     if self.removed_pipelines.contains(&pipeline_id) {
                         continue;
@@ -561,8 +662,19 @@ impl SceneBuilderThread {
                         display_list,
                         background,
                         viewport_size,
+                        content_size,
                     );
 
+                    timings = Some(TransactionTimings {
+                        builder_start_time_ns,
+                        builder_end_time_ns,
+                        send_time_ns,
+                        scene_build_start_time_ns: 0,
+                        scene_build_end_time_ns: 0,
+                        blob_rasterization_end_time_ns: 0,
+                        display_list_len,
+                    });
+
                     if !preserve_frame_state {
                         discard_frame_state_for_pipelines.push(pipeline_id);
                     }
@@ -578,6 +690,13 @@ impl SceneBuilderThread {
                     self.removed_pipelines.insert(pipeline_id);
                     removed_pipelines.push((pipeline_id, txn.document_id));
                 }
+                SceneMsg::EnableFrameOutput(pipeline_id, enable) => {
+                    if enable {
+                        doc.output_pipelines.insert(pipeline_id);
+                    } else {
+                        doc.output_pipelines.remove(&pipeline_id);
+                    }
+                }
             }
         }
 
@@ -591,6 +710,7 @@ impl SceneBuilderThread {
                 &scene,
                 self.font_instances.clone(),
                 &doc.view,
+                &doc.output_pipelines,
                 &self.config,
                 &mut doc.interners,
                 &doc.stats,
@@ -607,19 +727,15 @@ impl SceneBuilderThread {
             built_scene = Some(built);
         }
 
-        let scene_build_time_ms =
-            profiler::ns_to_ms(precise_time_ns() - scene_build_start);
-        profile.set(profiler::SCENE_BUILD_TIME, scene_build_time_ms);
-
-        frame_stats.scene_build_time += scene_build_time_ms;
+        let scene_build_end_time = precise_time_ns();
 
-        if !txn.blob_requests.is_empty() {
-            profile.start_time(profiler::BLOB_RASTERIZATION_TIME);
+        let is_low_priority = false;
+        rasterize_blobs(txn, is_low_priority);
 
-            let is_low_priority = false;
-            rasterize_blobs(&mut txn, is_low_priority);
-
-            profile.end_time(profiler::BLOB_RASTERIZATION_TIME);
+        if let Some(timings) = timings.as_mut() {
+            timings.blob_rasterization_end_time_ns = precise_time_ns();
+            timings.scene_build_start_time_ns = scene_build_start_time;
+            timings.scene_build_end_time_ns = scene_build_end_time;
         }
 
         drain_filter(
@@ -634,20 +750,21 @@ impl SceneBuilderThread {
 
         Box::new(BuiltTransaction {
             document_id: txn.document_id,
-            render_frame: txn.generate_frame.as_bool(),
+            render_frame: txn.generate_frame,
             invalidate_rendered_frame: txn.invalidate_rendered_frame,
             built_scene,
             view: doc.view,
-            rasterized_blobs: txn.rasterized_blobs,
-            resource_updates: txn.resource_updates,
-            blob_rasterizer: txn.blob_rasterizer,
-            frame_ops: txn.frame_ops,
+            rasterized_blobs: replace(&mut txn.rasterized_blobs, Vec::new()),
+            resource_updates: replace(&mut txn.resource_updates, Vec::new()),
+            blob_rasterizer: replace(&mut txn.blob_rasterizer, None),
+            frame_ops: replace(&mut txn.frame_ops, Vec::new()),
             removed_pipelines,
             discard_frame_state_for_pipelines,
-            notifications: txn.notifications,
+            notifications: replace(&mut txn.notifications, Vec::new()),
             interner_updates,
-            profile,
-            frame_stats,
+            scene_build_start_time,
+            scene_build_end_time,
+            timings,
         })
     }
 
@@ -670,9 +787,9 @@ impl SceneBuilderThread {
                             .flatten().collect(),
                     };
 
-                    let (tx, rx) = single_msg_channel();
+                    let (tx, rx) = channel();
                     let txn = txns.iter().find(|txn| txn.built_scene.is_some()).unwrap();
-                    hooks.pre_scene_swap((txn.profile.get(profiler::SCENE_BUILD_TIME).unwrap() * 1000000.0) as u64);
+                    hooks.pre_scene_swap(txn.scene_build_end_time - txn.scene_build_start_time);
 
                     (Some(info), Some(tx), Some(rx))
                 } else {
@@ -695,12 +812,14 @@ impl SceneBuilderThread {
 
         #[cfg(feature = "capture")]
         match self.capture_config {
-            Some(ref config) => self.send(SceneBuilderResult::CapturedTransactions(txns, config.clone(), result_tx)),
-            None => self.send(SceneBuilderResult::Transactions(txns, result_tx)),
-        };
+            Some(ref config) => self.tx.send(SceneBuilderResult::CapturedTransactions(txns, config.clone(), result_tx)).unwrap(),
+            None => self.tx.send(SceneBuilderResult::Transactions(txns, result_tx)).unwrap(),
+        }
 
         #[cfg(not(feature = "capture"))]
-        self.send(SceneBuilderResult::Transactions(txns, result_tx));
+        self.tx.send(SceneBuilderResult::Transactions(txns, result_tx)).unwrap();
+
+        let _ = self.api_tx.send(ApiMsg::WakeUp);
 
         if let Some(pipeline_info) = pipeline_info {
             // Block until the swap is done, then invoke the hook.
@@ -742,6 +861,7 @@ impl SceneBuilderThread {
 pub struct LowPrioritySceneBuilderThread {
     pub rx: Receiver<SceneBuilderRequest>,
     pub tx: Sender<SceneBuilderRequest>,
+    pub simulate_slow_ms: u32,
 }
 
 impl LowPrioritySceneBuilderThread {
@@ -754,10 +874,19 @@ impl LowPrioritySceneBuilderThread {
                         .collect();
                     self.tx.send(SceneBuilderRequest::Transactions(txns)).unwrap();
                 }
-                Ok(SceneBuilderRequest::ShutDown(sync)) => {
-                    self.tx.send(SceneBuilderRequest::ShutDown(sync)).unwrap();
+                Ok(SceneBuilderRequest::AddDocument(id, size, layer)) => {
+                    self.tx.send(SceneBuilderRequest::AddDocument(id, size, layer)).unwrap();
+                }
+                Ok(SceneBuilderRequest::DeleteDocument(document_id)) => {
+                    self.tx.send(SceneBuilderRequest::DeleteDocument(document_id)).unwrap();
+                }
+                Ok(SceneBuilderRequest::Stop) => {
+                    self.tx.send(SceneBuilderRequest::Stop).unwrap();
                     break;
                 }
+                Ok(SceneBuilderRequest::SimulateLongLowPrioritySceneBuild(time_ms)) => {
+                    self.simulate_slow_ms = time_ms;
+                }
                 Ok(other) => {
                     self.tx.send(other).unwrap();
                 }
@@ -773,6 +902,10 @@ impl LowPrioritySceneBuilderThread {
         rasterize_blobs(&mut txn, is_low_priority);
         txn.blob_requests = Vec::new();
 
+        if self.simulate_slow_ms > 0 {
+            thread::sleep(Duration::from_millis(self.simulate_slow_ms as u64));
+        }
+
         txn
     }
 }
diff --git a/third_party/webrender/webrender/src/scene_building.rs b/third_party/webrender/webrender/src/scene_building.rs
index 44b75e6d847..bfc466640bc 100644
--- a/third_party/webrender/webrender/src/scene_building.rs
+++ b/third_party/webrender/webrender/src/scene_building.rs
@@ -2,86 +2,46 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-//! # Scene building
-//!
-//! Scene building is the phase during which display lists, a representation built for
-//! serialization, are turned into a scene, webrender's internal representation that is
-//! suited for rendering frames.
-//!
-//! This phase is happening asynchronously on the scene builder thread.
-//!
-//! # General algorithm
-//!
-//! The important aspects of scene building are:
-//! - Building up primitive lists (much of the cost of scene building goes here).
-//! - Creating pictures for content that needs to be rendered into a surface, be it so that
-//!   filters can be applied or for caching purposes.
-//! - Maintaining a temporary stack of stacking contexts to keep track of some of the
-//!   drawing states.
-//! - Stitching multiple display lists which reference each other (without cycles) into
-//!   a single scene (see build_reference_frame).
-//! - Interning, which detects when some of the retained state stays the same between display
-//!   lists.
-//!
-//! The scene builder linearly traverses the serialized display list which is naturally
-//! ordered back-to-front, accumulating primitives in the top-most stacking context's
-//! primitive list.
-//! At the end of each stacking context (see pop_stacking_context), its primitive list is
-//! either handed over to a picture if one is created, or it is concatenated into the parent
-//! stacking context's primitive list.
-//!
-//! The flow of the algorithm is mostly linear except when handling:
-//!  - shadow stacks (see push_shadow and pop_all_shadows),
-//!  - backdrop filters (see add_backdrop_filter)
-//!
-
 use api::{AlphaType, BorderDetails, BorderDisplayItem, BuiltDisplayListIter, PrimitiveFlags};
 use api::{ClipId, ColorF, CommonItemProperties, ComplexClipRegion, ComponentTransferFuncType, RasterSpace};
 use api::{DisplayItem, DisplayItemRef, ExtendMode, ExternalScrollId, FilterData, SharedFontInstanceMap};
 use api::{FilterOp, FilterPrimitive, FontInstanceKey, FontSize, GlyphInstance, GlyphOptions, GradientStop};
 use api::{IframeDisplayItem, ImageKey, ImageRendering, ItemRange, ColorDepth, QualitySettings};
 use api::{LineOrientation, LineStyle, NinePatchBorderSource, PipelineId, MixBlendMode, StackingContextFlags};
-use api::{PropertyBinding, ReferenceFrameKind, ScrollFrameDisplayItem, ScrollSensitivity};
-use api::{Shadow, SpaceAndClipInfo, SpatialId, StickyFrameDisplayItem, ImageMask, ItemTag};
+use api::{PropertyBinding, ReferenceFrame, ReferenceFrameKind, ScrollFrameDisplayItem, ScrollSensitivity};
+use api::{Shadow, SpaceAndClipInfo, SpatialId, StackingContext, StickyFrameDisplayItem, ImageMask};
 use api::{ClipMode, PrimitiveKeyKind, TransformStyle, YuvColorSpace, ColorRange, YuvData, TempFilterData};
-use api::{ReferenceTransformBinding, Rotation, FillRule};
+use api::image_tiling::simplify_repeated_primitive;
 use api::units::*;
-use crate::image_tiling::simplify_repeated_primitive;
 use crate::clip::{ClipChainId, ClipRegion, ClipItemKey, ClipStore, ClipItemKeyKind};
-use crate::clip::{ClipInternData, ClipNodeKind, ClipInstance, SceneClipInstance};
-use crate::clip::{PolygonDataHandle};
-use crate::spatial_tree::{ROOT_SPATIAL_NODE_INDEX, SpatialTree, SpatialNodeIndex, StaticCoordinateSystemId};
+use crate::clip::{ClipInternData, ClipNodeKind, ClipInstance};
+use crate::spatial_tree::{ROOT_SPATIAL_NODE_INDEX, SpatialTree, SpatialNodeIndex};
 use crate::frame_builder::{ChasePrimitive, FrameBuilderConfig};
 use crate::glyph_rasterizer::FontInstance;
-use crate::hit_test::HitTestingScene;
+use crate::hit_test::{HitTestingItem, HitTestingScene};
 use crate::intern::Interner;
-use crate::internal_types::{FastHashMap, LayoutPrimitiveInfo, Filter};
+use crate::internal_types::{FastHashMap, FastHashSet, LayoutPrimitiveInfo, Filter};
 use crate::picture::{Picture3DContext, PictureCompositeMode, PicturePrimitive, PictureOptions};
-use crate::picture::{BlitReason, OrderedPictureChild, PrimitiveList};
-use crate::prim_store::{PrimitiveInstance, register_prim_chase_id};
+use crate::picture::{BlitReason, OrderedPictureChild, PrimitiveList, TileCacheInstance, ClusterFlags};
+use crate::prim_store::PrimitiveInstance;
 use crate::prim_store::{PrimitiveInstanceKind, NinePatchDescriptor, PrimitiveStore};
 use crate::prim_store::{InternablePrimitive, SegmentInstanceIndex, PictureIndex};
-use crate::prim_store::PolygonKey;
+use crate::prim_store::{register_prim_chase_id, get_line_decoration_size};
+use crate::prim_store::{SpaceSnapper};
 use crate::prim_store::backdrop::Backdrop;
 use crate::prim_store::borders::{ImageBorder, NormalBorderPrim};
-use crate::prim_store::gradient::{
-    GradientStopKey, LinearGradient, RadialGradient, RadialGradientParams, ConicGradient,
-    ConicGradientParams, optimize_radial_gradient, apply_gradient_local_clip,
-    optimize_linear_gradient,
-};
+use crate::prim_store::gradient::{GradientStopKey, LinearGradient, RadialGradient, RadialGradientParams, ConicGradient, ConicGradientParams};
 use crate::prim_store::image::{Image, YuvImage};
-use crate::prim_store::line_dec::{LineDecoration, LineDecorationCacheKey, get_line_decoration_size};
+use crate::prim_store::line_dec::{LineDecoration, LineDecorationCacheKey};
 use crate::prim_store::picture::{Picture, PictureCompositeKey, PictureKey};
 use crate::prim_store::text_run::TextRun;
 use crate::render_backend::SceneView;
 use crate::resource_cache::ImageRequest;
-use crate::scene::{Scene, ScenePipeline, BuiltScene, SceneStats, StackingContextHelpers};
+use crate::scene::{Scene, BuiltScene, SceneStats, StackingContextHelpers};
 use crate::scene_builder_thread::Interners;
-use crate::space::SpaceSnapper;
 use crate::spatial_node::{StickyFrameInfo, ScrollFrameKind};
-use crate::tile_cache::TileCacheBuilder;
 use euclid::approxeq::ApproxEq;
-use std::{f32, mem, usize};
+use std::{f32, mem, usize, ops};
 use std::collections::vec_deque::VecDeque;
 use std::sync::Arc;
 use crate::util::{MaxRect, VecHelper};
@@ -242,204 +202,31 @@ impl CompositeOps {
             self.filter_primitives.is_empty() &&
             self.mix_blend_mode.is_none()
     }
-
-    /// Returns true if this CompositeOps contains any filters that affect
-    /// the content (false if no filters, or filters are all no-ops).
-    fn has_valid_filters(&self) -> bool {
-        // For each filter, create a new image with that composite mode.
-        let mut current_filter_data_index = 0;
-        for filter in &self.filters {
-            match filter {
-                Filter::ComponentTransfer => {
-                    let filter_data =
-                        &self.filter_datas[current_filter_data_index];
-                    let filter_data = filter_data.sanitize();
-                    current_filter_data_index = current_filter_data_index + 1;
-                    if filter_data.is_identity() {
-                        continue
-                    } else {
-                        return true;
-                    }
-                }
-                _ => {
-                    if filter.is_noop() {
-                        continue;
-                    } else {
-                        return true;
-                    }
-                }
-            }
-        }
-
-        if !self.filter_primitives.is_empty() {
-            return true;
-        }
-
-        false
-    }
-}
-
-/// Represents the current input for a picture chain builder (either a
-/// prim list from the stacking context, or a wrapped picture instance).
-enum PictureSource {
-    PrimitiveList {
-        prim_list: PrimitiveList,
-    },
-    WrappedPicture {
-        instance: PrimitiveInstance,
-    },
-}
-
-/// Helper struct to build picture chains during scene building from
-/// a flattened stacking context struct.
-struct PictureChainBuilder {
-    /// The current input source for the next picture
-    current: PictureSource,
-
-    /// Positioning node for this picture chain
-    spatial_node_index: SpatialNodeIndex,
-    /// Prim flags for any pictures in this chain
-    flags: PrimitiveFlags,
-}
-
-impl PictureChainBuilder {
-    /// Create a new picture chain builder, from a primitive list
-    fn from_prim_list(
-        prim_list: PrimitiveList,
-        flags: PrimitiveFlags,
-        spatial_node_index: SpatialNodeIndex,
-    ) -> Self {
-        PictureChainBuilder {
-            current: PictureSource::PrimitiveList {
-                prim_list,
-            },
-            spatial_node_index,
-            flags,
-        }
-    }
-
-    /// Create a new picture chain builder, from a picture wrapper instance
-    fn from_instance(
-        instance: PrimitiveInstance,
-        flags: PrimitiveFlags,
-        spatial_node_index: SpatialNodeIndex,
-    ) -> Self {
-        PictureChainBuilder {
-            current: PictureSource::WrappedPicture {
-                instance,
-            },
-            flags,
-            spatial_node_index,
-        }
-    }
-
-    /// Wrap the existing content with a new picture with the given parameters
-    #[must_use]
-    fn add_picture(
-        self,
-        composite_mode: PictureCompositeMode,
-        context_3d: Picture3DContext<OrderedPictureChild>,
-        options: PictureOptions,
-        interners: &mut Interners,
-        prim_store: &mut PrimitiveStore,
-    ) -> PictureChainBuilder {
-        let prim_list = match self.current {
-            PictureSource::PrimitiveList { prim_list } => {
-                prim_list
-            }
-            PictureSource::WrappedPicture { instance } => {
-                let mut prim_list = PrimitiveList::empty();
-
-                prim_list.add_prim(
-                    instance,
-                    LayoutRect::zero(),
-                    self.spatial_node_index,
-                    self.flags,
-                );
-
-                prim_list
-            }
-        };
-
-        let pic_index = PictureIndex(prim_store.pictures
-            .alloc()
-            .init(PicturePrimitive::new_image(
-                Some(composite_mode.clone()),
-                context_3d,
-                true,
-                self.flags,
-                prim_list,
-                self.spatial_node_index,
-                options,
-            ))
-        );
-
-        let instance = create_prim_instance(
-            pic_index,
-            Some(composite_mode).into(),
-            ClipChainId::NONE,
-            interners,
-        );
-
-        PictureChainBuilder {
-            current: PictureSource::WrappedPicture {
-                instance,
-            },
-            spatial_node_index: self.spatial_node_index,
-            flags: self.flags,
-        }
-    }
-
-    /// Finish building this picture chain. Set the clip chain on the outermost picture
-    fn finalize(
-        self,
-        clip_chain_id: ClipChainId,
-        interners: &mut Interners,
-        prim_store: &mut PrimitiveStore,
-    ) -> PrimitiveInstance {
-        match self.current {
-            PictureSource::WrappedPicture { mut instance } => {
-                instance.clip_set.clip_chain_id = clip_chain_id;
-                instance
-            }
-            PictureSource::PrimitiveList { prim_list } => {
-                // If no picture was created for this stacking context, create a
-                // pass-through wrapper now. This is only needed in 1-2 edge cases
-                // now, and will be removed as a follow up.
-                let pic_index = PictureIndex(prim_store.pictures
-                    .alloc()
-                    .init(PicturePrimitive::new_image(
-                        None,
-                        Picture3DContext::Out,
-                        true,
-                        self.flags,
-                        prim_list,
-                        self.spatial_node_index,
-                        PictureOptions::default(),
-                    ))
-                );
-
-                create_prim_instance(
-                    pic_index,
-                    None.into(),
-                    clip_chain_id,
-                    interners,
-                )
-            }
-        }
-    }
 }
 
 bitflags! {
     /// Slice flags
     pub struct SliceFlags : u8 {
-        /// Slice created by a prim that has PrimitiveFlags::IS_SCROLLBAR_CONTAINER
+        /// Slice created by a cluster that has ClusterFlags::SCROLLBAR_CONTAINER
         const IS_SCROLLBAR = 1;
-        /// Represents a mix-blend container (can't split out compositor surfaces in this slice)
-        const IS_BLEND_CONTAINER = 2;
     }
 }
 
+/// Information about a set of primitive clusters that will form a picture cache slice.
+struct Slice {
+    /// The spatial node root of the picture cache. If this is None, the slice
+    /// will not be cached and instead drawn directly to the parent surface. This
+    /// is a temporary measure until we enable caching all slices.
+    cache_scroll_root: Option<SpatialNodeIndex>,
+    /// List of primitive clusters that make up this slice
+    prim_list: PrimitiveList,
+    /// A list of clips that are shared by all primitives in the slice. These can be
+    /// filtered out and applied when the tile cache is composited rather than per-item.
+    shared_clips: Option<Vec<ClipInstance>>,
+    /// Various flags describing properties of this slice
+    pub flags: SliceFlags,
+}
+
 /// A structure that converts a serialized display list into a form that WebRender
 /// can use to later build a frame. This structure produces a BuiltScene. Public
 /// members are typically those that are destructured into the BuiltScene.
@@ -450,6 +237,10 @@ pub struct SceneBuilder<'a> {
     /// The map of all font instances.
     font_instances: SharedFontInstanceMap,
 
+    /// A set of pipelines that the caller has requested be made available as
+    /// output textures.
+    output_pipelines: &'a FastHashSet<PipelineId>,
+
     /// The data structure that converts between ClipId/SpatialId and the various
     /// index types that the SpatialTree uses.
     id_to_index_mapper: NodeIdToIndexMapper,
@@ -457,12 +248,6 @@ pub struct SceneBuilder<'a> {
     /// A stack of stacking context properties.
     sc_stack: Vec<FlattenedStackingContext>,
 
-    /// Stack of spatial node indices forming containing block for 3d contexts
-    containing_block_stack: Vec<SpatialNodeIndex>,
-
-    /// Stack of requested raster spaces for stacking contexts
-    raster_space_stack: Vec<RasterSpace>,
-
     /// Maintains state for any currently active shadows
     pending_shadow_items: VecDeque<ShadowItem>,
 
@@ -485,31 +270,36 @@ pub struct SceneBuilder<'a> {
     /// Reference to the set of data that is interned across display lists.
     interners: &'a mut Interners,
 
+    /// The root picture index for this builder. This is the picture
+    /// to start the culling phase from.
+    pub root_pic_index: PictureIndex,
+
     /// Helper struct to map stacking context coords <-> reference frame coords.
     rf_mapper: ReferenceFrameMapper,
 
     /// Helper struct to map spatial nodes to external scroll offsets.
     external_scroll_mapper: ScrollOffsetMapper,
 
+    /// If true, picture caching setup has already been completed.
+    picture_caching_initialized: bool,
+
     /// The current recursion depth of iframes encountered. Used to restrict picture
     /// caching slices to only the top-level content frame.
-    iframe_size: Vec<LayoutSize>,
+    iframe_depth: usize,
 
-    /// Clip-chain for root iframes applied to any tile caches created within this iframe
-    root_iframe_clip: Option<ClipChainId>,
+    /// The number of picture cache slices that were created for content.
+    content_slice_count: usize,
+
+    /// A set of any spatial nodes that are attached to either a picture cache
+    /// root, or a clip node on the picture cache primitive. These are used
+    /// to detect cases where picture caching must be disabled. This is mostly
+    /// a temporary workaround for some existing wrench tests. I don't think
+    /// Gecko ever produces picture cache slices with complex transforms, so
+    /// in future we should prevent this in the public API and remove this hack.
+    picture_cache_spatial_nodes: FastHashSet<SpatialNodeIndex>,
 
     /// The current quality / performance settings for this scene.
     quality_settings: QualitySettings,
-
-    /// Maintains state about the list of tile caches being built for this scene.
-    tile_cache_builder: TileCacheBuilder,
-
-    /// A helper struct to snap local rects in device space. During frame
-    /// building we may establish new raster roots, however typically that is in
-    /// cases where we won't be applying snapping (e.g. has perspective), or in
-    /// edge cases (e.g. SVG filter) where we can accept slightly incorrect
-    /// behaviour in favour of getting the common case right.
-    snap_to_device: SpaceSnapper,
 }
 
 impl<'a> SceneBuilder<'a> {
@@ -517,6 +307,7 @@ impl<'a> SceneBuilder<'a> {
         scene: &Scene,
         font_instances: SharedFontInstanceMap,
         view: &SceneView,
+        output_pipelines: &FastHashSet<PipelineId>,
         frame_builder_config: &FrameBuilderConfig,
         interners: &mut Interners,
         stats: &SceneStats,
@@ -531,47 +322,82 @@ impl<'a> SceneBuilder<'a> {
             .background_color
             .and_then(|color| if color.a > 0.0 { Some(color) } else { None });
 
-        let device_pixel_scale = view.accumulated_scale_factor_for_snapping();
-        let spatial_tree = SpatialTree::new();
-
-        let snap_to_device = SpaceSnapper::new(
-            ROOT_SPATIAL_NODE_INDEX,
-            device_pixel_scale,
-        );
-
         let mut builder = SceneBuilder {
             scene,
-            spatial_tree,
+            spatial_tree: SpatialTree::new(),
             font_instances,
             config: *frame_builder_config,
+            output_pipelines,
             id_to_index_mapper: NodeIdToIndexMapper::default(),
             hit_testing_scene: HitTestingScene::new(&stats.hit_test_stats),
             pending_shadow_items: VecDeque::new(),
             sc_stack: Vec::new(),
-            containing_block_stack: Vec::new(),
-            raster_space_stack: vec![RasterSpace::Screen],
             prim_store: PrimitiveStore::new(&stats.prim_store_stats),
-            clip_store: ClipStore::new(&stats.clip_store_stats),
+            clip_store: ClipStore::new(),
             interners,
+            root_pic_index: PictureIndex(0),
             rf_mapper: ReferenceFrameMapper::new(),
             external_scroll_mapper: ScrollOffsetMapper::new(),
-            iframe_size: Vec::new(),
-            root_iframe_clip: None,
+            picture_caching_initialized: false,
+            iframe_depth: 0,
+            content_slice_count: 0,
+            picture_cache_spatial_nodes: FastHashSet::default(),
             quality_settings: view.quality_settings,
-            tile_cache_builder: TileCacheBuilder::new(),
-            snap_to_device,
         };
 
-        builder.build_all(&root_pipeline);
+        let device_pixel_scale = view.accumulated_scale_factor_for_snapping();
 
-        // Construct the picture cache primitive instance(s) from the tile cache builder
-        let (tile_cache_config, tile_cache_pictures) = builder.tile_cache_builder.build(
-            &builder.config,
-            &mut builder.clip_store,
-            &mut builder.prim_store,
-            builder.interners,
+        builder.clip_store.register_clip_template(
+            ClipId::root(root_pipeline_id),
+            ClipId::root(root_pipeline_id),
+            &[],
         );
 
+        builder.clip_store.push_clip_root(
+            Some(ClipId::root(root_pipeline_id)),
+            false,
+        );
+
+        builder.push_root(
+            root_pipeline_id,
+            &root_pipeline.viewport_size,
+            &root_pipeline.content_size,
+            device_pixel_scale,
+        );
+
+        // In order to ensure we have a single root stacking context for the
+        // entire display list, we push one here. Gecko _almost_ wraps its
+        // entire display list within a single stacking context, but sometimes
+        // appends a few extra items in AddWindowOverlayWebRenderCommands. We
+        // could fix it there, but it's easier and more robust for WebRender
+        // to just ensure there's a context on the stack whenever we append
+        // primitives (since otherwise we'd panic).
+        //
+        // Note that we don't do this for iframes, even if they're pipeline
+        // roots, because they should be entirely contained within a stacking
+        // context, and we probably wouldn't crash if they weren't.
+        builder.push_stacking_context(
+            root_pipeline.pipeline_id,
+            CompositeOps::default(),
+            TransformStyle::Flat,
+            /* prim_flags = */ PrimitiveFlags::IS_BACKFACE_VISIBLE,
+            ROOT_SPATIAL_NODE_INDEX,
+            None,
+            RasterSpace::Screen,
+            StackingContextFlags::IS_BACKDROP_ROOT,
+            device_pixel_scale,
+        );
+
+        builder.build_items(
+            &mut root_pipeline.display_list.iter(),
+            root_pipeline.pipeline_id,
+        );
+
+        builder.pop_stacking_context();
+        builder.clip_store.pop_clip_root();
+
+        debug_assert!(builder.sc_stack.is_empty());
+
         BuiltScene {
             has_root_pipeline: scene.has_root_pipeline(),
             pipeline_epochs: scene.pipeline_epochs.clone(),
@@ -581,9 +407,10 @@ impl<'a> SceneBuilder<'a> {
             spatial_tree: builder.spatial_tree,
             prim_store: builder.prim_store,
             clip_store: builder.clip_store,
+            root_pic_index: builder.root_pic_index,
             config: builder.config,
-            tile_cache_config,
-            tile_cache_pictures,
+            content_slice_count: builder.content_slice_count,
+            picture_cache_spatial_nodes: builder.picture_cache_spatial_nodes,
         }
     }
 
@@ -609,250 +436,219 @@ impl<'a> SceneBuilder<'a> {
         rf_offset + scroll_offset
     }
 
-    fn build_all(&mut self, root_pipeline: &ScenePipeline) {
-        enum ContextKind<'a> {
-            Root,
-            StackingContext {
-                sc_info: StackingContextInfo,
-            },
-            ReferenceFrame,
-            Iframe {
-                parent_traversal: BuiltDisplayListIter<'a>,
-            }
-        }
-        struct BuildContext<'a> {
-            pipeline_id: PipelineId,
-            kind: ContextKind<'a>,
+    /// Figure out the shape of the display list, and wrap various primitive clusters
+    /// into tile cache primitive instances.
+    fn setup_picture_caching(
+        &mut self,
+        main_prim_list: &mut PrimitiveList,
+    ) {
+        if !self.config.global_enable_picture_caching {
+            return;
         }
 
-        let root_clip_id = ClipId::root(root_pipeline.pipeline_id);
-        self.clip_store.register_clip_template(root_clip_id, root_clip_id, &[]);
-        self.clip_store.push_clip_root(Some(root_clip_id), false);
-        self.push_root(
-            root_pipeline.pipeline_id,
-            &root_pipeline.viewport_size,
-        );
-
-        let mut stack = vec![BuildContext {
-            pipeline_id: root_pipeline.pipeline_id,
-            kind: ContextKind::Root,
-        }];
-        let mut traversal = root_pipeline.display_list.iter();
+        // Ensure that setup_picture_caching has executed
+        debug_assert!(self.picture_caching_initialized);
 
-        'outer: while let Some(bc) = stack.pop() {
-            loop {
-                let item = match traversal.next() {
-                    Some(item) => item,
-                    None => break,
-                };
+        // Unconditionally insert a marker to create a picture cache slice on the
+        // first cluster. This handles implicit picture caches, and also the common
+        // case, by allowing the root / background primitives to be cached in a slice.
+        if let Some(cluster) = main_prim_list.clusters.first_mut() {
+            cluster.flags.insert(ClusterFlags::CREATE_PICTURE_CACHE_PRE);
+        }
 
-                match item.item() {
-                    DisplayItem::PushStackingContext(ref info) => {
-                        profile_scope!("build_stacking_context");
-                        let spatial_node_index = self.get_space(info.spatial_id);
-                        let mut subtraversal = item.sub_iter();
-                        // Avoid doing unnecessary work for empty stacking contexts.
-                        if subtraversal.current_stacking_context_empty() {
-                            subtraversal.skip_current_stacking_context();
-                            traversal = subtraversal;
-                            continue;
-                        }
+        // List of slices that have been found
+        let mut slices: Vec<Slice> = Vec::new();
+        // Tracker for whether a new slice should be created
+        let mut create_slice = true;
+        // The clips found the last time we traversed a set of clip chains. Stored and cleared
+        // here to avoid constant allocations.
+        let mut prim_clips = Vec::new();
+        // If true, the cache is out of date and needs to be rebuilt.
+        let mut update_shared_clips = true;
+        // The last prim clip chain we build prim_clips for.
+        let mut last_prim_clip_chain_id = ClipChainId::NONE;
+
+        // Walk the supplied top level of clusters, slicing into slices as appropriate
+        for cluster in main_prim_list.clusters.drain(..) {
+            // Check if this cluster requires a new slice
+            create_slice |= cluster.flags.intersects(
+                ClusterFlags::CREATE_PICTURE_CACHE_PRE | ClusterFlags::IS_CLEAR_PRIMITIVE
+            );
 
-                        let composition_operations = CompositeOps::new(
-                            filter_ops_for_compositing(item.filters()),
-                            filter_datas_for_compositing(item.filter_datas()),
-                            filter_primitives_for_compositing(item.filter_primitives()),
-                            info.stacking_context.mix_blend_mode_for_compositing(),
-                        );
+            if create_slice {
+                let slice_flags = if cluster.flags.contains(ClusterFlags::SCROLLBAR_CONTAINER) {
+                    SliceFlags::IS_SCROLLBAR
+                } else {
+                    SliceFlags::empty()
+                };
+                let slice = Slice {
+                    cache_scroll_root: cluster.cache_scroll_root,
+                    prim_list: PrimitiveList::empty(),
+                    shared_clips: None,
+                    flags: slice_flags
+                };
 
-                        let sc_info = self.push_stacking_context(
-                            composition_operations,
-                            info.stacking_context.transform_style,
-                            info.prim_flags,
-                            spatial_node_index,
-                            info.stacking_context.clip_id,
-                            info.stacking_context.raster_space,
-                            info.stacking_context.flags,
-                            bc.pipeline_id,
-                        );
+                // Open up clip chains on the stack on the new slice
+                slices.push(slice);
+                create_slice = false;
+            }
 
-                        self.rf_mapper.push_offset(info.origin.to_vector());
-                        let new_context = BuildContext {
-                            pipeline_id: bc.pipeline_id,
-                            kind: ContextKind::StackingContext {
-                                sc_info,
-                            },
-                        };
-                        stack.push(bc);
-                        stack.push(new_context);
+            // Step through each prim instance, in order to collect shared clips for the slice.
+            for instance in &cluster.prim_instances {
+                // If the primitive clip chain is different, then we need to rebuild prim_clips.
+                update_shared_clips |= last_prim_clip_chain_id != instance.clip_chain_id;
+                last_prim_clip_chain_id = instance.clip_chain_id;
+
+                if update_shared_clips {
+                    prim_clips.clear();
+                    // Update the list of clips that apply to this primitive instance
+                    add_clips(
+                        instance.clip_chain_id,
+                        &mut prim_clips,
+                        &self.clip_store,
+                        &self.interners,
+                    );
+                }
 
-                        subtraversal.merge_debug_stats_from(&mut traversal);
-                        traversal = subtraversal;
-                        continue 'outer;
+                // If there are no shared clips set for this slice, the shared clips are just
+                // the current clips set. Otherwise, the shared clips are those that are
+                // in both the current shared list and the clips list for this primitive.
+                match slices.last_mut().unwrap().shared_clips {
+                    Some(ref mut shared_clips) => {
+                        if update_shared_clips {
+                            shared_clips.retain(|h1: &ClipInstance| {
+                                let uid = h1.handle.uid();
+                                prim_clips.iter().any(|h2| {
+                                    uid == h2.handle.uid() &&
+                                    h1.spatial_node_index == h2.spatial_node_index
+                                })
+                            });
+                        }
                     }
-                    DisplayItem::PushReferenceFrame(ref info) => {
-                        profile_scope!("build_reference_frame");
-                        let parent_space = self.get_space(info.parent_spatial_id);
-                        let mut subtraversal = item.sub_iter();
-                        let current_offset = self.current_offset(parent_space);
-
-                        let transform = match info.reference_frame.transform {
-                            ReferenceTransformBinding::Static { binding } => binding,
-                            ReferenceTransformBinding::Computed { scale_from, vertical_flip, rotation } => {
-                                let content_size = &self.iframe_size.last().unwrap();
-
-                                let mut transform = if let Some(scale_from) = scale_from {
-                                    // If we have a 90/270 degree rotation, then scale_from
-                                    // and content_size are in different coordinate spaces and
-                                    // we need to swap width/height for them to be correct.
-                                    match rotation {
-                                        Rotation::Degree0 |
-                                        Rotation::Degree180 => {
-                                            LayoutTransform::scale(
-                                                content_size.width / scale_from.width,
-                                                content_size.height / scale_from.height,
-                                                1.0
-                                            )
-                                        },
-                                        Rotation::Degree90 |
-                                        Rotation::Degree270 => {
-                                            LayoutTransform::scale(
-                                                content_size.height / scale_from.width,
-                                                content_size.width / scale_from.height,
-                                                1.0
-                                            )
-
-                                        }
-                                    }
-                                } else {
-                                    LayoutTransform::identity()
-                                };
-
-                                if vertical_flip {
-                                    let content_size = &self.iframe_size.last().unwrap();
-                                    transform = transform
-                                        .then_translate(LayoutVector3D::new(0.0, content_size.height, 0.0))
-                                        .pre_scale(1.0, -1.0, 1.0);
-                                }
-
-                                let rotate = rotation.to_matrix(**content_size);
-                                let transform = transform.then(&rotate);
+                    ref mut shared_clips @ None => {
+                        *shared_clips = Some(prim_clips.clone());
+                    }
+                }
 
-                                PropertyBinding::Value(transform)
-                            },
-                        };
+                update_shared_clips = false;
+            }
 
-                        self.push_reference_frame(
-                            info.reference_frame.id,
-                            Some(parent_space),
-                            bc.pipeline_id,
-                            info.reference_frame.transform_style,
-                            transform,
-                            info.reference_frame.kind,
-                            current_offset + info.origin.to_vector(),
-                        );
+            // If this cluster creates a slice after, then note that for next cluster
+            create_slice |= cluster.flags.intersects(
+                ClusterFlags::CREATE_PICTURE_CACHE_POST | ClusterFlags::IS_CLEAR_PRIMITIVE
+            );
 
-                        self.rf_mapper.push_scope();
-                        let new_context = BuildContext {
-                            pipeline_id: bc.pipeline_id,
-                            kind: ContextKind::ReferenceFrame,
-                        };
-                        stack.push(bc);
-                        stack.push(new_context);
+            // Finally, add this cluster to the current slice
+            slices.last_mut().unwrap().prim_list.add_cluster(cluster);
+        }
 
-                        subtraversal.merge_debug_stats_from(&mut traversal);
-                        traversal = subtraversal;
-                        continue 'outer;
-                    }
-                    DisplayItem::PopReferenceFrame |
-                    DisplayItem::PopStackingContext => break,
-                    DisplayItem::Iframe(ref info) => {
-                        profile_scope!("iframe");
-
-                        let space = self.get_space(info.space_and_clip.spatial_id);
-                        let (size, subtraversal) = match self.push_iframe(info, space) {
-                            Some(pair) => pair,
-                            None => continue,
-                        };
+        // Step through the slices, creating picture cache wrapper instances.
+        for (slice_index, slice) in slices.drain(..).enumerate() {
+            let background_color = if slice_index == 0 {
+                self.config.background_color
+            } else {
+                None
+            };
 
-                        // Get a clip-chain id for the root clip for this pipeline. We will
-                        // add that as an unconditional clip to any tile cache created within
-                        // this iframe. This ensures these clips are handled by the tile cache
-                        // compositing code, which is more efficient and accurate than applying
-                        // these clips individually to each primitive.
-                        let clip_id = ClipId::root(info.pipeline_id);
-                        let clip_chain_id = self.get_clip_chain(clip_id);
-
-                        // If this is a root iframe, force a new tile cache both before and after
-                        // adding primitives for this iframe.
-                        if self.iframe_size.is_empty() {
-                            self.add_tile_cache_barrier_if_needed(SliceFlags::empty());
-                            assert!(self.root_iframe_clip.is_none());
-                            self.root_iframe_clip = Some(clip_chain_id);
-                        }
+            // If the cluster specifies a scroll root, use it. Otherwise,
+            // just cache assuming no scrolling takes place. Even if that's
+            // not true, we still get caching benefits for any changes that
+            // occur while not scrolling (such as animation, video etc);
+            let scroll_root = slice.cache_scroll_root.unwrap_or(ROOT_SPATIAL_NODE_INDEX);
+
+            let instance = create_tile_cache(
+                slice_index,
+                slice.flags,
+                scroll_root,
+                slice.prim_list,
+                background_color,
+                slice.shared_clips.unwrap_or_else(Vec::new),
+                &mut self.interners,
+                &mut self.prim_store,
+                &mut self.clip_store,
+                &mut self.picture_cache_spatial_nodes,
+                &self.config,
+            );
 
-                        self.rf_mapper.push_scope();
-                        self.iframe_size.push(size);
+            main_prim_list.add_prim(
+                instance,
+                LayoutRect::zero(),
+                scroll_root,
+                PrimitiveFlags::IS_BACKFACE_VISIBLE,
+            );
+        }
+    }
 
-                        let new_context = BuildContext {
-                            pipeline_id: info.pipeline_id,
-                            kind: ContextKind::Iframe {
-                                parent_traversal: mem::replace(&mut traversal, subtraversal),
-                            },
-                        };
-                        stack.push(bc);
-                        stack.push(new_context);
-                        continue 'outer;
-                    }
-                    _ => {
-                        self.build_item(item, bc.pipeline_id);
-                    }
-                };
-            }
+    fn build_items(
+        &mut self,
+        traversal: &mut BuiltDisplayListIter<'a>,
+        pipeline_id: PipelineId,
+    ) {
+        loop {
+            let item = match traversal.next() {
+                Some(item) => item,
+                None => break,
+            };
 
-            match bc.kind {
-                ContextKind::Root => {}
-                ContextKind::StackingContext { sc_info } => {
-                    self.rf_mapper.pop_offset();
-                    self.pop_stacking_context(sc_info);
+            let subtraversal = match item.item() {
+                DisplayItem::PushStackingContext(ref info) => {
+                    let space = self.get_space(info.spatial_id);
+                    let mut subtraversal = item.sub_iter();
+                    self.build_stacking_context(
+                        &mut subtraversal,
+                        pipeline_id,
+                        &info.stacking_context,
+                        space,
+                        info.origin,
+                        item.filters(),
+                        &item.filter_datas(),
+                        item.filter_primitives(),
+                        info.prim_flags,
+                    );
+                    Some(subtraversal)
                 }
-                ContextKind::ReferenceFrame => {
-                    self.rf_mapper.pop_scope();
+                DisplayItem::PushReferenceFrame(ref info) => {
+                    let parent_space = self.get_space(info.parent_spatial_id);
+                    let mut subtraversal = item.sub_iter();
+                    self.build_reference_frame(
+                        &mut subtraversal,
+                        pipeline_id,
+                        parent_space,
+                        info.origin,
+                        &info.reference_frame,
+                    );
+                    Some(subtraversal)
                 }
-                ContextKind::Iframe { parent_traversal } => {
-                    self.iframe_size.pop();
-                    self.rf_mapper.pop_scope();
-
-                    self.clip_store.pop_clip_root();
-                    if self.iframe_size.is_empty() {
-                        assert!(self.root_iframe_clip.is_some());
-                        self.root_iframe_clip = None;
-                        self.add_tile_cache_barrier_if_needed(SliceFlags::empty());
-                    }
+                DisplayItem::PopReferenceFrame |
+                DisplayItem::PopStackingContext => return,
+                _ => None,
+            };
 
-                    traversal = parent_traversal;
-                }
+            // If build_item created a sub-traversal, we need `traversal` to have the
+            // same state as the completed subtraversal, so we reinitialize it here.
+            if let Some(mut subtraversal) = subtraversal {
+                subtraversal.merge_debug_stats_from(traversal);
+                *traversal = subtraversal;
+            } else {
+                self.build_item(item, pipeline_id);
             }
+        }
 
-            // TODO: factor this out to be part of capture
-            if cfg!(feature = "display_list_stats") {
-                let stats = traversal.debug_stats();
-                let total_bytes: usize = stats.iter().map(|(_, stats)| stats.num_bytes).sum();
-                println!("item, total count, total bytes, % of DL bytes, bytes per item");
-                for (label, stats) in stats {
-                    println!("{}, {}, {}kb, {}%, {}",
-                        label,
-                        stats.total_count,
-                        stats.num_bytes / 1000,
-                        ((stats.num_bytes as f32 / total_bytes.max(1) as f32) * 100.0) as usize,
-                        stats.num_bytes / stats.total_count.max(1));
-                }
-                println!();
+        // TODO: factor this out to be part of capture
+        if cfg!(feature = "display_list_stats") {
+            let stats = traversal.debug_stats();
+            let total_bytes: usize = stats.iter().map(|(_, stats)| stats.num_bytes).sum();
+            println!("item, total count, total bytes, % of DL bytes, bytes per item");
+            for (label, stats) in stats {
+                println!("{}, {}, {}kb, {}%, {}",
+                    label,
+                    stats.total_count,
+                    stats.num_bytes / 1000,
+                    ((stats.num_bytes as f32 / total_bytes.max(1) as f32) * 100.0) as usize,
+                    stats.num_bytes / stats.total_count.max(1));
             }
+            println!();
         }
-
-        self.clip_store.pop_clip_root();
-        debug_assert!(self.sc_stack.is_empty());
     }
 
     fn build_sticky_frame(
@@ -885,20 +681,18 @@ impl<'a> SceneBuilder<'a> {
         pipeline_id: PipelineId,
     ) {
         let current_offset = self.current_offset(parent_node_index);
-        let clip_rect = info.clip_rect.translate(current_offset);
-
+        let clip_region = ClipRegion::create_for_clip_node_with_local_clip(
+            &info.clip_rect,
+            &current_offset,
+        );
         // Just use clip rectangle as the frame rect for this scroll frame.
         // This is useful when calculating scroll extents for the
         // SpatialNode::scroll(..) API as well as for properly setting sticky
         // positioning offsets.
-        let frame_rect = clip_rect;
+        let frame_rect = clip_region.main;
         let content_size = info.content_rect.size;
 
-        self.add_rect_clip_node(
-            info.clip_id,
-            &info.parent_space_and_clip,
-            &clip_rect,
-        );
+        self.add_clip_node(info.clip_id, &info.parent_space_and_clip, clip_region);
 
         self.add_scroll_frame(
             info.scroll_frame_id,
@@ -913,27 +707,107 @@ impl<'a> SceneBuilder<'a> {
         );
     }
 
-    fn push_iframe(
+    fn build_reference_frame(
+        &mut self,
+        traversal: &mut BuiltDisplayListIter<'a>,
+        pipeline_id: PipelineId,
+        parent_spatial_node: SpatialNodeIndex,
+        origin: LayoutPoint,
+        reference_frame: &ReferenceFrame,
+    ) {
+        profile_scope!("build_reference_frame");
+        let current_offset = self.current_offset(parent_spatial_node);
+        self.push_reference_frame(
+            reference_frame.id,
+            Some(parent_spatial_node),
+            pipeline_id,
+            reference_frame.transform_style,
+            reference_frame.transform,
+            reference_frame.kind,
+            current_offset + origin.to_vector(),
+        );
+
+        self.rf_mapper.push_scope();
+        self.build_items(
+            traversal,
+            pipeline_id,
+        );
+        self.rf_mapper.pop_scope();
+    }
+
+
+    fn build_stacking_context(
+        &mut self,
+        traversal: &mut BuiltDisplayListIter<'a>,
+        pipeline_id: PipelineId,
+        stacking_context: &StackingContext,
+        spatial_node_index: SpatialNodeIndex,
+        origin: LayoutPoint,
+        filters: ItemRange<FilterOp>,
+        filter_datas: &[TempFilterData],
+        filter_primitives: ItemRange<FilterPrimitive>,
+        prim_flags: PrimitiveFlags,
+    ) {
+        profile_scope!("build_stacking_context");
+        // Avoid doing unnecessary work for empty stacking contexts.
+        if traversal.current_stacking_context_empty() {
+            traversal.skip_current_stacking_context();
+            return;
+        }
+
+        let composition_operations = {
+            CompositeOps::new(
+                filter_ops_for_compositing(filters),
+                filter_datas_for_compositing(filter_datas),
+                filter_primitives_for_compositing(filter_primitives),
+                stacking_context.mix_blend_mode_for_compositing(),
+            )
+        };
+
+        self.push_stacking_context(
+            pipeline_id,
+            composition_operations,
+            stacking_context.transform_style,
+            prim_flags,
+            spatial_node_index,
+            stacking_context.clip_id,
+            stacking_context.raster_space,
+            stacking_context.flags,
+            self.sc_stack.last().unwrap().snap_to_device.device_pixel_scale,
+        );
+
+        self.rf_mapper.push_offset(origin.to_vector());
+        self.build_items(
+            traversal,
+            pipeline_id,
+        );
+        self.rf_mapper.pop_offset();
+
+        self.pop_stacking_context();
+    }
+
+    fn build_iframe(
         &mut self,
         info: &IframeDisplayItem,
         spatial_node_index: SpatialNodeIndex,
-    ) -> Option<(LayoutSize, BuiltDisplayListIter<'a>)> {
+    ) {
         let iframe_pipeline_id = info.pipeline_id;
         let pipeline = match self.scene.pipelines.get(&iframe_pipeline_id) {
             Some(pipeline) => pipeline,
             None => {
                 debug_assert!(info.ignore_missing_pipeline);
-                return None
+                return
             },
         };
 
         let current_offset = self.current_offset(spatial_node_index);
-        let clip_rect = info.clip_rect.translate(current_offset);
-
-        self.add_rect_clip_node(
+        self.add_clip_node(
             ClipId::root(iframe_pipeline_id),
             &info.space_and_clip,
-            &clip_rect,
+            ClipRegion::create_for_clip_node_with_local_clip(
+                &info.clip_rect,
+                &current_offset,
+            ),
         );
 
         self.clip_store.push_clip_root(
@@ -941,42 +815,52 @@ impl<'a> SceneBuilder<'a> {
             true,
         );
 
-        let bounds = self.snap_rect(
-            &info.bounds.translate(current_offset),
+        let snap_to_device = &mut self.sc_stack.last_mut().unwrap().snap_to_device;
+        snap_to_device.set_target_spatial_node(
             spatial_node_index,
+            &self.spatial_tree,
         );
 
+        let bounds = snap_to_device.snap_rect(
+            &info.bounds.translate(current_offset),
+        );
+
+        let content_size = snap_to_device.snap_size(&pipeline.content_size);
+
         let spatial_node_index = self.push_reference_frame(
             SpatialId::root_reference_frame(iframe_pipeline_id),
             Some(spatial_node_index),
             iframe_pipeline_id,
             TransformStyle::Flat,
             PropertyBinding::Value(LayoutTransform::identity()),
-            ReferenceFrameKind::Transform {
-                is_2d_scale_translation: false,
-                should_snap: false
-            },
+            ReferenceFrameKind::Transform,
             bounds.origin.to_vector(),
         );
 
         let iframe_rect = LayoutRect::new(LayoutPoint::zero(), bounds.size);
-        let is_root_pipeline = self.iframe_size.is_empty();
-
         self.add_scroll_frame(
             SpatialId::root_scroll_node(iframe_pipeline_id),
             spatial_node_index,
-            ExternalScrollId(0, iframe_pipeline_id),
+            Some(ExternalScrollId(0, iframe_pipeline_id)),
             iframe_pipeline_id,
             &iframe_rect,
-            &bounds.size,
+            &content_size,
             ScrollSensitivity::ScriptAndInputEvents,
-            ScrollFrameKind::PipelineRoot {
-                is_root_pipeline,
-            },
+            ScrollFrameKind::PipelineRoot,
             LayoutVector2D::zero(),
         );
 
-        Some((bounds.size, pipeline.display_list.iter()))
+        self.rf_mapper.push_scope();
+        self.iframe_depth += 1;
+
+        self.build_items(
+            &mut pipeline.display_list.iter(),
+            pipeline.pipeline_id,
+        );
+        self.iframe_depth -= 1;
+        self.rf_mapper.pop_scope();
+
+        self.clip_store.pop_clip_root();
     }
 
     fn get_space(
@@ -1003,28 +887,29 @@ impl<'a> SceneBuilder<'a> {
 
         let current_offset = self.current_offset(spatial_node_index);
 
-        let unsnapped_clip_rect = common.clip_rect.translate(current_offset);
-        let clip_rect = self.snap_rect(
-            &unsnapped_clip_rect,
+        let snap_to_device = &mut self.sc_stack.last_mut().unwrap().snap_to_device;
+        snap_to_device.set_target_spatial_node(
             spatial_node_index,
+            &self.spatial_tree
         );
 
+        let unsnapped_clip_rect = common.clip_rect.translate(current_offset);
+        let clip_rect = snap_to_device.snap_rect(&unsnapped_clip_rect);
+
         let unsnapped_rect = bounds.map(|bounds| {
             bounds.translate(current_offset)
         });
 
         // If no bounds rect is given, default to clip rect.
         let rect = unsnapped_rect.map_or(clip_rect, |bounds| {
-            self.snap_rect(
-                &bounds,
-                spatial_node_index,
-            )
+            snap_to_device.snap_rect(&bounds)
         });
 
         let layout = LayoutPrimitiveInfo {
             rect,
             clip_rect,
             flags: common.flags,
+            hit_info: common.hit_info,
         };
 
         (layout, unsnapped_rect.unwrap_or(unsnapped_clip_rect), spatial_node_index, clip_chain_id)
@@ -1046,11 +931,12 @@ impl<'a> SceneBuilder<'a> {
         rect: &LayoutRect,
         target_spatial_node: SpatialNodeIndex,
     ) -> LayoutRect {
-        self.snap_to_device.set_target_spatial_node(
+        let snap_to_device = &mut self.sc_stack.last_mut().unwrap().snap_to_device;
+        snap_to_device.set_target_spatial_node(
             target_spatial_node,
             &self.spatial_tree
         );
-        self.snap_to_device.snap_rect(rect)
+        snap_to_device.snap_rect(rect)
     }
 
     fn build_item<'b>(
@@ -1060,8 +946,6 @@ impl<'a> SceneBuilder<'a> {
     ) {
         match *item.item() {
             DisplayItem::Image(ref info) => {
-                profile_scope!("image");
-
                 let (layout, _, spatial_node_index, clip_chain_id) = self.process_common_properties_with_bounds(
                     &info.common,
                     &info.bounds,
@@ -1080,8 +964,6 @@ impl<'a> SceneBuilder<'a> {
                 );
             }
             DisplayItem::RepeatingImage(ref info) => {
-                profile_scope!("repeating_image");
-
                 let (layout, unsnapped_rect, spatial_node_index, clip_chain_id) = self.process_common_properties_with_bounds(
                     &info.common,
                     &info.bounds,
@@ -1106,8 +988,6 @@ impl<'a> SceneBuilder<'a> {
                 );
             }
             DisplayItem::YuvImage(ref info) => {
-                profile_scope!("yuv_image");
-
                 let (layout, _, spatial_node_index, clip_chain_id) = self.process_common_properties_with_bounds(
                     &info.common,
                     &info.bounds,
@@ -1125,8 +1005,6 @@ impl<'a> SceneBuilder<'a> {
                 );
             }
             DisplayItem::Text(ref info) => {
-                profile_scope!("text");
-
                 // TODO(aosmond): Snapping text primitives does not make much sense, given the
                 // primitive bounds and clip are supposed to be conservative, not definitive.
                 // E.g. they should be able to grow and not impact the output. However there
@@ -1149,46 +1027,32 @@ impl<'a> SceneBuilder<'a> {
                 );
             }
             DisplayItem::Rectangle(ref info) => {
-                profile_scope!("rect");
-
                 let (layout, _, spatial_node_index, clip_chain_id) = self.process_common_properties_with_bounds(
                     &info.common,
                     &info.bounds,
                 );
 
-                self.add_primitive(
+                self.add_solid_rectangle(
                     spatial_node_index,
                     clip_chain_id,
                     &layout,
-                    Vec::new(),
-                    PrimitiveKeyKind::Rectangle {
-                        color: info.color.into(),
-                    },
+                    info.color,
                 );
             }
             DisplayItem::HitTest(ref info) => {
-                profile_scope!("hit_test");
-
-                // TODO(gw): We could skip building the clip-chain here completely, as it's not used by
-                //           hit-test items.
-                let (layout, _, spatial_node_index, _) = self.process_common_properties(
+                let (layout, _, spatial_node_index, clip_chain_id) = self.process_common_properties(
                     &info.common,
                     None,
                 );
 
-                // Don't add transparent rectangles to the draw list,
-                // but do consider them for hit testing. This allows
-                // specifying invisible hit testing areas.
-                self.add_primitive_to_hit_testing_list(
-                    &layout,
+                self.add_solid_rectangle(
                     spatial_node_index,
-                    info.common.clip_id,
-                    info.tag,
+                    clip_chain_id,
+                    &layout,
+                    PropertyBinding::Value(ColorF::TRANSPARENT),
                 );
             }
             DisplayItem::ClearRectangle(ref info) => {
-                profile_scope!("clear");
-
                 let (layout, _, spatial_node_index, clip_chain_id) = self.process_common_properties_with_bounds(
                     &info.common,
                     &info.bounds,
@@ -1201,8 +1065,6 @@ impl<'a> SceneBuilder<'a> {
                 );
             }
             DisplayItem::Line(ref info) => {
-                profile_scope!("line");
-
                 let (layout, _, spatial_node_index, clip_chain_id) = self.process_common_properties_with_bounds(
                     &info.common,
                     &info.area,
@@ -1219,167 +1081,71 @@ impl<'a> SceneBuilder<'a> {
                 );
             }
             DisplayItem::Gradient(ref info) => {
-                profile_scope!("gradient");
-
-                if !info.gradient.is_valid() {
-                    return;
-                }
-
-                let (mut layout, unsnapped_rect, spatial_node_index, clip_chain_id) = self.process_common_properties_with_bounds(
+                let (layout, unsnapped_rect, spatial_node_index, clip_chain_id) = self.process_common_properties_with_bounds(
                     &info.common,
                     &info.bounds,
                 );
 
-                let mut tile_size = process_repeat_size(
+                let tile_size = process_repeat_size(
                     &layout.rect,
                     &unsnapped_rect,
                     info.tile_size,
                 );
 
-                let mut stops = read_gradient_stops(item.gradient_stops());
-                let mut start = info.gradient.start_point;
-                let mut end = info.gradient.end_point;
-                let flags = layout.flags;
-
-                let optimized = optimize_linear_gradient(
-                    &mut layout.rect,
-                    &mut tile_size,
-                    info.tile_spacing,
-                    &layout.clip_rect,
-                    &mut start,
-                    &mut end,
+                if let Some(prim_key_kind) = self.create_linear_gradient_prim(
+                    &layout,
+                    info.gradient.start_point,
+                    info.gradient.end_point,
+                    item.gradient_stops(),
                     info.gradient.extend_mode,
-                    &mut stops,
-                    &mut |rect, start, end, stops| {
-                        let layout = LayoutPrimitiveInfo { rect: *rect, clip_rect: *rect, flags };
-                        if let Some(prim_key_kind) = self.create_linear_gradient_prim(
-                            &layout,
-                            start,
-                            end,
-                            stops.to_vec(),
-                            ExtendMode::Clamp,
-                            rect.size,
-                            LayoutSize::zero(),
-                            None,
-                        ) {
-                            self.add_nonshadowable_primitive(
-                                spatial_node_index,
-                                clip_chain_id,
-                                &layout,
-                                Vec::new(),
-                                prim_key_kind,
-                            );
-                        }
-                    }
-                );
-
-                if !optimized && !tile_size.ceil().is_empty() {
-                    if let Some(prim_key_kind) = self.create_linear_gradient_prim(
+                    tile_size,
+                    info.tile_spacing,
+                    None,
+                ) {
+                    self.add_nonshadowable_primitive(
+                        spatial_node_index,
+                        clip_chain_id,
                         &layout,
-                        start,
-                        end,
-                        stops,
-                        info.gradient.extend_mode,
-                        tile_size,
-                        info.tile_spacing,
-                        None,
-                    ) {
-                        self.add_nonshadowable_primitive(
-                            spatial_node_index,
-                            clip_chain_id,
-                            &layout,
-                            Vec::new(),
-                            prim_key_kind,
-                        );
-                    }
+                        Vec::new(),
+                        prim_key_kind,
+                    );
                 }
             }
             DisplayItem::RadialGradient(ref info) => {
-                profile_scope!("radial");
-
-                if !info.gradient.is_valid() {
-                    return;
-                }
-
-                let (mut layout, unsnapped_rect, spatial_node_index, clip_chain_id) = self.process_common_properties_with_bounds(
+                let (layout, unsnapped_rect, spatial_node_index, clip_chain_id) = self.process_common_properties_with_bounds(
                     &info.common,
                     &info.bounds,
                 );
 
-                let mut center = info.gradient.center;
-
-                let stops = read_gradient_stops(item.gradient_stops());
-
-                let mut tile_size = process_repeat_size(
+                let tile_size = process_repeat_size(
                     &layout.rect,
                     &unsnapped_rect,
                     info.tile_size,
                 );
 
-                let mut prim_rect = layout.rect;
-                let mut tile_spacing = info.tile_spacing;
-                optimize_radial_gradient(
-                    &mut prim_rect,
-                    &mut tile_size,
-                    &mut center,
-                    &mut tile_spacing,
-                    &layout.clip_rect,
-                    info.gradient.radius,
-                    info.gradient.end_offset,
+                let prim_key_kind = self.create_radial_gradient_prim(
+                    &layout,
+                    info.gradient.center,
+                    info.gradient.start_offset * info.gradient.radius.width,
+                    info.gradient.end_offset * info.gradient.radius.width,
+                    info.gradient.radius.width / info.gradient.radius.height,
+                    item.gradient_stops(),
                     info.gradient.extend_mode,
-                    &stops,
-                    &mut |solid_rect, color| {
-                        self.add_nonshadowable_primitive(
-                            spatial_node_index,
-                            clip_chain_id,
-                            &LayoutPrimitiveInfo {
-                                rect: *solid_rect,
-                                .. layout
-                            },
-                            Vec::new(),
-                            PrimitiveKeyKind::Rectangle { color: PropertyBinding::Value(color) },
-                        );
-                    }
+                    tile_size,
+                    info.tile_spacing,
+                    None,
                 );
 
-                // TODO: create_radial_gradient_prim already calls
-                // this, but it leaves the info variable that is
-                // passed to add_nonshadowable_primitive unmodified
-                // which can cause issues.
-                simplify_repeated_primitive(&tile_size, &mut tile_spacing, &mut prim_rect);
-
-                if !tile_size.ceil().is_empty() {
-                    layout.rect = prim_rect;
-                    let prim_key_kind = self.create_radial_gradient_prim(
-                        &layout,
-                        center,
-                        info.gradient.start_offset * info.gradient.radius.width,
-                        info.gradient.end_offset * info.gradient.radius.width,
-                        info.gradient.radius.width / info.gradient.radius.height,
-                        stops,
-                        info.gradient.extend_mode,
-                        tile_size,
-                        tile_spacing,
-                        None,
-                    );
-
-                    self.add_nonshadowable_primitive(
-                        spatial_node_index,
-                        clip_chain_id,
-                        &layout,
-                        Vec::new(),
-                        prim_key_kind,
-                    );
-                }
+                self.add_nonshadowable_primitive(
+                    spatial_node_index,
+                    clip_chain_id,
+                    &layout,
+                    Vec::new(),
+                    prim_key_kind,
+                );
             }
             DisplayItem::ConicGradient(ref info) => {
-                profile_scope!("conic");
-
-                if !info.gradient.is_valid() {
-                    return;
-                }
-
-                let (mut layout, unsnapped_rect, spatial_node_index, clip_chain_id) = self.process_common_properties_with_bounds(
+                let (layout, unsnapped_rect, spatial_node_index, clip_chain_id) = self.process_common_properties_with_bounds(
                     &info.common,
                     &info.bounds,
                 );
@@ -1390,40 +1156,28 @@ impl<'a> SceneBuilder<'a> {
                     info.tile_size,
                 );
 
-                let offset = apply_gradient_local_clip(
-                    &mut layout.rect,
-                    &tile_size,
-                    &info.tile_spacing,
-                    &layout.clip_rect,
+                let prim_key_kind = self.create_conic_gradient_prim(
+                    &layout,
+                    info.gradient.center,
+                    info.gradient.angle,
+                    info.gradient.start_offset,
+                    info.gradient.end_offset,
+                    item.gradient_stops(),
+                    info.gradient.extend_mode,
+                    tile_size,
+                    info.tile_spacing,
+                    None,
                 );
-                let center = info.gradient.center + offset;
 
-                if !tile_size.ceil().is_empty() {
-                    let prim_key_kind = self.create_conic_gradient_prim(
-                        &layout,
-                        center,
-                        info.gradient.angle,
-                        info.gradient.start_offset,
-                        info.gradient.end_offset,
-                        item.gradient_stops(),
-                        info.gradient.extend_mode,
-                        tile_size,
-                        info.tile_spacing,
-                        None,
-                    );
-
-                    self.add_nonshadowable_primitive(
-                        spatial_node_index,
-                        clip_chain_id,
-                        &layout,
-                        Vec::new(),
-                        prim_key_kind,
-                    );
-                }
+                self.add_nonshadowable_primitive(
+                    spatial_node_index,
+                    clip_chain_id,
+                    &layout,
+                    Vec::new(),
+                    prim_key_kind,
+                );
             }
             DisplayItem::BoxShadow(ref info) => {
-                profile_scope!("box_shadow");
-
                 let (layout, _, spatial_node_index, clip_chain_id) = self.process_common_properties_with_bounds(
                     &info.common,
                     &info.box_bounds,
@@ -1442,8 +1196,6 @@ impl<'a> SceneBuilder<'a> {
                 );
             }
             DisplayItem::Border(ref info) => {
-                profile_scope!("border");
-
                 let (layout, _, spatial_node_index, clip_chain_id) = self.process_common_properties_with_bounds(
                     &info.common,
                     &info.bounds,
@@ -1457,9 +1209,14 @@ impl<'a> SceneBuilder<'a> {
                     item.gradient_stops(),
                 );
             }
+            DisplayItem::Iframe(ref info) => {
+                let space = self.get_space(info.space_and_clip.spatial_id);
+                self.build_iframe(
+                    info,
+                    space,
+                );
+            }
             DisplayItem::ImageMaskClip(ref info) => {
-                profile_scope!("image_clip");
-
                 let parent_space = self.get_space(info.parent_space_and_clip.spatial_id);
                 let current_offset = self.current_offset(parent_space);
 
@@ -1472,13 +1229,9 @@ impl<'a> SceneBuilder<'a> {
                     info.id,
                     &info.parent_space_and_clip,
                     &image_mask,
-                    info.fill_rule,
-                    item.points(),
                 );
             }
             DisplayItem::RoundedRectClip(ref info) => {
-                profile_scope!("rounded_clip");
-
                 let parent_space = self.get_space(info.parent_space_and_clip.spatial_id);
                 let current_offset = self.current_offset(parent_space);
 
@@ -1490,8 +1243,6 @@ impl<'a> SceneBuilder<'a> {
                 );
             }
             DisplayItem::RectClip(ref info) => {
-                profile_scope!("rect_clip");
-
                 let parent_space = self.get_space(info.parent_space_and_clip.spatial_id);
                 let current_offset = self.current_offset(parent_space);
                 let clip_rect = info.clip_rect.translate(current_offset);
@@ -1503,8 +1254,6 @@ impl<'a> SceneBuilder<'a> {
                 );
             }
             DisplayItem::Clip(ref info) => {
-                profile_scope!("clip");
-
                 let parent_space = self.get_space(info.parent_space_and_clip.spatial_id);
                 let current_offset = self.current_offset(parent_space);
                 let clip_region = ClipRegion::create_for_clip_node(
@@ -1515,26 +1264,21 @@ impl<'a> SceneBuilder<'a> {
                 self.add_clip_node(info.id, &info.parent_space_and_clip, clip_region);
             }
             DisplayItem::ClipChain(ref info) => {
-                profile_scope!("clip_chain");
-
                 let parent = info.parent.map_or(ClipId::root(pipeline_id), |id| ClipId::ClipChain(id));
-                let mut clips: SmallVec<[SceneClipInstance; 4]> = SmallVec::new();
+                let mut instances: SmallVec<[ClipInstance; 4]> = SmallVec::new();
 
                 for clip_item in item.clip_chain_items() {
                     let template = self.clip_store.get_template(clip_item);
-                    let instances = &self.clip_store.instances[template.clips.start as usize .. template.clips.end as usize];
-                    clips.extend_from_slice(instances);
+                    instances.extend_from_slice(&template.instances);
                 }
 
                 self.clip_store.register_clip_template(
                     ClipId::ClipChain(info.id),
                     parent,
-                    &clips,
+                    &instances,
                 );
             },
             DisplayItem::ScrollFrame(ref info) => {
-                profile_scope!("scrollframe");
-
                 let parent_space = self.get_space(info.parent_space_and_clip.spatial_id);
                 self.build_scroll_frame(
                     info,
@@ -1543,8 +1287,6 @@ impl<'a> SceneBuilder<'a> {
                 );
             }
             DisplayItem::StickyFrame(ref info) => {
-                profile_scope!("stickyframe");
-
                 let parent_space = self.get_space(info.parent_spatial_id);
                 self.build_sticky_frame(
                     info,
@@ -1552,8 +1294,6 @@ impl<'a> SceneBuilder<'a> {
                 );
             }
             DisplayItem::BackdropFilter(ref info) => {
-                profile_scope!("backdrop");
-
                 let (layout, _, spatial_node_index, clip_chain_id) = self.process_common_properties(
                     &info.common,
                     None,
@@ -1577,16 +1317,14 @@ impl<'a> SceneBuilder<'a> {
             DisplayItem::SetGradientStops |
             DisplayItem::SetFilterOps |
             DisplayItem::SetFilterData |
-            DisplayItem::SetFilterPrimitives |
-            DisplayItem::SetPoints => {}
+            DisplayItem::SetFilterPrimitives => {}
 
             // Special items that are handled in the parent method
             DisplayItem::PushStackingContext(..) |
             DisplayItem::PushReferenceFrame(..) |
             DisplayItem::PopReferenceFrame |
-            DisplayItem::PopStackingContext |
-            DisplayItem::Iframe(_) => {
-                unreachable!("Handled in `build_all`")
+            DisplayItem::PopStackingContext => {
+                unreachable!("Should have returned in parent method.")
             }
 
             DisplayItem::ReuseItems(key) |
@@ -1595,8 +1333,6 @@ impl<'a> SceneBuilder<'a> {
             }
 
             DisplayItem::PushShadow(info) => {
-                profile_scope!("push_shadow");
-
                 let spatial_node_index = self.get_space(info.space_and_clip.spatial_id);
                 let clip_chain_id = self.get_clip_chain(
                     info.space_and_clip.clip_id,
@@ -1610,8 +1346,6 @@ impl<'a> SceneBuilder<'a> {
                 );
             }
             DisplayItem::PopAllShadows => {
-                profile_scope!("pop_all_shadows");
-
                 self.pop_all_shadows();
             }
         }
@@ -1696,17 +1430,41 @@ impl<'a> SceneBuilder<'a> {
         &mut self,
         info: &LayoutPrimitiveInfo,
         spatial_node_index: SpatialNodeIndex,
-        clip_id: ClipId,
-        tag: ItemTag,
+        clip_chain_id: ClipChainId,
     ) {
-        self.hit_testing_scene.add_item(
+        let tag = match info.hit_info {
+            Some(tag) => tag,
+            None => return,
+        };
+
+        // We want to get a range of clip chain roots that apply to this
+        // hit testing primitive.
+
+        // Get the start index for the clip chain root range for this primitive.
+        let start = self.hit_testing_scene.next_clip_chain_index();
+
+        // Add the clip chain root for the primitive itself.
+        self.hit_testing_scene.add_clip_chain(clip_chain_id);
+
+        // Append any clip chain roots from enclosing stacking contexts.
+        for sc in &self.sc_stack {
+            self.hit_testing_scene.add_clip_chain(sc.clip_chain_id);
+        }
+
+        // Construct a clip chain roots range to be stored with the item.
+        let clip_chain_range = ops::Range {
+            start,
+            end: self.hit_testing_scene.next_clip_chain_index(),
+        };
+
+        // Create and store the hit testing primitive itself.
+        let new_item = HitTestingItem::new(
             tag,
             info,
             spatial_node_index,
-            clip_id,
-            &self.clip_store,
-            self.interners,
+            clip_chain_range,
         );
+        self.hit_testing_scene.add_item(new_item);
     }
 
     /// Add an already created primitive to the draw lists.
@@ -1722,33 +1480,13 @@ impl<'a> SceneBuilder<'a> {
             println!("\tadded to stacking context at {}", self.sc_stack.len());
         }
 
-        // If we have a valid stacking context, the primitive gets added to that.
-        // Otherwise, it gets added to a top-level picture cache slice.
-
-        match self.sc_stack.last_mut() {
-            Some(stacking_context) => {
-                stacking_context.prim_list.add_prim(
-                    prim_instance,
-                    prim_rect,
-                    spatial_node_index,
-                    flags,
-                );
-            }
-            None => {
-                self.tile_cache_builder.add_prim(
-                    prim_instance,
-                    prim_rect,
-                    spatial_node_index,
-                    flags,
-                    &self.spatial_tree,
-                    &self.clip_store,
-                    self.interners,
-                    &self.config,
-                    &self.quality_settings,
-                    self.root_iframe_clip,
-                );
-            }
-        }
+        let stacking_context = self.sc_stack.last_mut().unwrap();
+        stacking_context.prim_list.add_prim(
+            prim_instance,
+            prim_rect,
+            spatial_node_index,
+            flags,
+        );
     }
 
     /// Convenience interface that creates a primitive entry and adds it
@@ -1838,6 +1576,11 @@ impl<'a> SceneBuilder<'a> {
             &info.rect,
             &prim_instance,
         );
+        self.add_primitive_to_hit_testing_list(
+            info,
+            spatial_node_index,
+            clip_chain_id,
+        );
         self.add_primitive_to_draw_list(
             prim_instance,
             info.rect,
@@ -1846,23 +1589,9 @@ impl<'a> SceneBuilder<'a> {
         );
     }
 
-    /// If no stacking contexts are present (i.e. we are adding prims to a tile
-    /// cache), set a barrier to force creation of a slice before the next prim
-    fn add_tile_cache_barrier_if_needed(
-        &mut self,
-        slice_flags: SliceFlags,
-    ) {
-        if self.sc_stack.is_empty() {
-            // Shadows can only exist within a stacking context
-            assert!(self.pending_shadow_items.is_empty());
-
-            self.tile_cache_builder.add_tile_cache_barrier(slice_flags);
-        }
-    }
-
-    /// Push a new stacking context. Returns context that must be passed to pop_stacking_context().
-    fn push_stacking_context(
+    pub fn push_stacking_context(
         &mut self,
+        pipeline_id: PipelineId,
         composite_ops: CompositeOps,
         transform_style: TransformStyle,
         prim_flags: PrimitiveFlags,
@@ -1870,12 +1599,22 @@ impl<'a> SceneBuilder<'a> {
         clip_id: Option<ClipId>,
         requested_raster_space: RasterSpace,
         flags: StackingContextFlags,
-        pipeline_id: PipelineId,
-    ) -> StackingContextInfo {
-        profile_scope!("push_stacking_context");
+        device_pixel_scale: DevicePixelScale,
+    ) {
+        // Check if this stacking context is the root of a pipeline, and the caller
+        // has requested it as an output frame.
+        let is_pipeline_root =
+            self.sc_stack.last().map_or(true, |sc| sc.pipeline_id != pipeline_id);
+        let frame_output_pipeline_id = if is_pipeline_root && self.output_pipelines.contains(&pipeline_id) {
+            Some(pipeline_id)
+        } else {
+            None
+        };
 
-        // Push current requested raster space on stack for prims to access
-        self.raster_space_stack.push(requested_raster_space);
+        let clip_chain_id = match clip_id {
+            Some(clip_id) => self.clip_store.get_or_build_clip_chain_id(clip_id),
+            None => ClipChainId::NONE,
+        };
 
         // Get the transform-style of the parent stacking context,
         // which determines if we *might* need to draw this on
@@ -1923,20 +1662,21 @@ impl<'a> SceneBuilder<'a> {
             (parent_is_3d || transform_style == TransformStyle::Preserve3D);
 
         let context_3d = if participating_in_3d_context {
-            // Get the spatial node index of the containing block, which
+            // Find the spatial node index of the containing block, which
             // defines the context of backface-visibility.
-            let ancestor_index = self.containing_block_stack
-                .last()
-                .cloned()
-                .unwrap_or(ROOT_SPATIAL_NODE_INDEX);
-
+            let ancestor_context = self.sc_stack
+                .iter()
+                .rfind(|sc| !sc.is_3d());
             Picture3DContext::In {
                 root_data: if parent_is_3d {
                     None
                 } else {
                     Some(Vec::new())
                 },
-                ancestor_index,
+                ancestor_index: match ancestor_context {
+                    Some(sc) => sc.spatial_node_index,
+                    None => ROOT_SPATIAL_NODE_INDEX,
+                },
             }
         } else {
             Picture3DContext::Out
@@ -1947,171 +1687,165 @@ impl<'a> SceneBuilder<'a> {
         // prepare step to skip the intermediate surface if the
         // clip node doesn't affect the stacking context rect.
         let mut blit_reason = BlitReason::empty();
+        let mut current_clip_chain_id = clip_chain_id;
 
         if flags.contains(StackingContextFlags::IS_BLEND_CONTAINER) {
             blit_reason |= BlitReason::ISOLATE;
         }
 
-        // If this stacking context has any complex clips, we need to draw it
-        // to an off-screen surface.
-        if let Some(clip_id) = clip_id {
-            if self.clip_store.has_complex_clips(clip_id) {
-                blit_reason |= BlitReason::CLIP;
-            }
-        }
+        // Walk each clip in this chain, to see whether any of the clips
+        // require that we draw this to an intermediate surface.
+        while current_clip_chain_id != ClipChainId::NONE {
+            let clip_chain_node = &self
+                .clip_store
+                .clip_chain_nodes[current_clip_chain_id.0 as usize];
 
-        let is_redundant = FlattenedStackingContext::is_redundant(
-            flags,
-            &context_3d,
-            &composite_ops,
-            blit_reason,
-            self.sc_stack.last(),
-            prim_flags,
-        );
+            let clip_node_data = &self.interners.clip[clip_chain_node.handle];
 
-        // If stacking context is a scrollbar, force a new slice for the primitives
-        // within. The stacking context will be redundant and removed by above check.
-        let set_tile_cache_barrier = prim_flags.contains(PrimitiveFlags::IS_SCROLLBAR_CONTAINER);
+            if let ClipNodeKind::Complex = clip_node_data.clip_node_kind {
+                blit_reason = BlitReason::CLIP;
+                break;
+            }
 
-        if set_tile_cache_barrier {
-            self.add_tile_cache_barrier_if_needed(SliceFlags::IS_SCROLLBAR);
+            current_clip_chain_id = clip_chain_node.parent_clip_chain_id;
         }
 
-        let mut sc_info = StackingContextInfo {
-            pop_hit_testing_clip: false,
-            pop_stacking_context: false,
-            pop_containing_block: false,
-            set_tile_cache_barrier,
-        };
-
-        // If this is not 3d, then it establishes an ancestor root for child 3d contexts.
-        if !participating_in_3d_context {
-            sc_info.pop_containing_block = true;
-            self.containing_block_stack.push(spatial_node_index);
-        }
+        let snap_to_device = self.sc_stack.last().map_or(
+            SpaceSnapper::new(
+                ROOT_SPATIAL_NODE_INDEX,
+                device_pixel_scale,
+            ),
+            |sc| sc.snap_to_device.clone(),
+        );
 
-        // If this stacking context is redundant, we don't care about getting a clip-chain for it.
-        // However, if we _do_ have a clip, we must build it here before the `push_clip_root`
-        // calls below, to ensure we get the clips for drawing this stacking context itself.
-        let clip_chain_id = if is_redundant {
-            ClipChainId::NONE
-        }  else {
-            // Get a clip-chain for this stacking context - even if the stacking context
-            // itself has no clips, it's possible that there are clips to collect from
-            // the previous clip-chain builder.
-            let clip_id = clip_id.unwrap_or(ClipId::root(pipeline_id));
-            self.clip_store.get_or_build_clip_chain_id(clip_id)
+        let is_redundant = match self.sc_stack.last() {
+            Some(parent) => {
+                FlattenedStackingContext::is_redundant(
+                    &context_3d,
+                    &composite_ops,
+                    prim_flags,
+                    blit_reason,
+                    requested_raster_space,
+                    parent,
+                )
+            }
+            None => {
+                false
+            }
         };
 
-        // If this has a valid clip, register with the hit-testing scene
         if let Some(clip_id) = clip_id {
-            self.hit_testing_scene.push_clip(clip_id);
-            sc_info.pop_hit_testing_clip = true;
-        }
-
-        // If this stacking context is redundant (prims will be pushed into
-        // the parent during pop) but it has a valid clip, then we need to
-        // add that clip to the current clip chain builder, so it's correctly
-        // applied to any primitives within this redundant stacking context.
-        // For the normal case, we start a new clip root, knowing that the
-        // clip on this stacking context will be pushed onto the stack during
-        // frame building.
-        if is_redundant {
-            self.clip_store.push_clip_root(clip_id, true);
-        } else {
-            self.clip_store.push_clip_root(None, false);
-        }
-
-        // If not redundant, create a stacking context to hold primitive clusters
-        if !is_redundant {
-            sc_info.pop_stacking_context = true;
-
-            // Push the SC onto the stack, so we know how to handle things in
-            // pop_stacking_context.
-            self.sc_stack.push(FlattenedStackingContext {
-                prim_list: PrimitiveList::empty(),
-                prim_flags,
-                spatial_node_index,
-                clip_chain_id,
-                composite_ops,
-                blit_reason,
-                transform_style,
-                context_3d,
-                is_redundant,
-                is_backdrop_root: flags.contains(StackingContextFlags::IS_BACKDROP_ROOT),
-                flags,
-            });
+            // If this stacking context is redundant (prims will be pushed into
+            // the parent during pop) but it has a valid clip, then we need to
+            // add that clip to the current clip chain builder, so it's correctly
+            // applied to any primitives within this redundant stacking context.
+            // For the normal case, we start a new clip root, knowing that the
+            // clip on this stacking context will be pushed onto the stack during
+            // frame building.
+            if is_redundant {
+                self.clip_store.push_clip_root(Some(clip_id), true);
+            } else {
+                self.clip_store.push_clip_root(None, false);
+            }
         }
 
-        sc_info
+        // Push the SC onto the stack, so we know how to handle things in
+        // pop_stacking_context.
+        self.sc_stack.push(FlattenedStackingContext {
+            prim_list: PrimitiveList::empty(),
+            pipeline_id,
+            prim_flags,
+            requested_raster_space,
+            spatial_node_index,
+            clip_id,
+            clip_chain_id,
+            frame_output_pipeline_id,
+            composite_ops,
+            blit_reason,
+            transform_style,
+            context_3d,
+            is_redundant,
+            is_backdrop_root: flags.contains(StackingContextFlags::IS_BACKDROP_ROOT),
+            snap_to_device,
+        });
     }
 
-    fn pop_stacking_context(
-        &mut self,
-        info: StackingContextInfo,
-    ) {
-        profile_scope!("pop_stacking_context");
-
-        // Pop off current raster space (pushed unconditionally in push_stacking_context)
-        self.raster_space_stack.pop().unwrap();
-
-        // Pop off clip builder root (pushed unconditionally in push_stacking_context)
-        self.clip_store.pop_clip_root();
-
-        // If the stacking context formed a containing block, pop off the stack
-        if info.pop_containing_block {
-            self.containing_block_stack.pop().unwrap();
-        }
-
-        if info.set_tile_cache_barrier {
-            self.add_tile_cache_barrier_if_needed(SliceFlags::empty());
-        }
+    pub fn pop_stacking_context(&mut self) {
+        let mut stacking_context = self.sc_stack.pop().unwrap();
 
-        // If the stacking context established a clip root, pop off the stack
-        if info.pop_hit_testing_clip {
-            self.hit_testing_scene.pop_clip();
+        if stacking_context.clip_id.is_some() {
+            self.clip_store.pop_clip_root();
         }
 
-        // If the stacking context was otherwise redundant, early exit
-        if !info.pop_stacking_context {
-            return;
-        }
+        // If we encounter a stacking context that is effectively a no-op, then instead
+        // of creating a picture, just append the primitive list to the parent stacking
+        // context as a short cut. This serves two purposes:
+        // (a) It's an optimization to reduce picture count and allocations, as display lists
+        //     often contain a lot of these stacking contexts that don't require pictures or
+        //     off-screen surfaces.
+        // (b) It's useful for the initial version of picture caching in gecko, by enabling
+        //     is to just look for interesting scroll roots on the root stacking context,
+        //     without having to consider cuts at stacking context boundaries.
+        let parent_is_empty = match self.sc_stack.last_mut() {
+            Some(parent_sc) => {
+                if stacking_context.is_redundant {
+                    if !stacking_context.prim_list.is_empty() {
+                        // If popping a redundant stacking context that is from a different pipeline,
+                        // we want to insert flags where the picture cache slices should be created
+                        // for this iframe. For now, we want to match existing behavior, that is:
+                        // - Only cache content that is within the main scroll root, and:
+                        // - Skip caching fixed position content before / after the scroll root.
+                        // This means that we don't add scrollbars, which cause lots of extra
+                        // invalidations. There is ongoing work to add tags to primitives that
+                        // are scrollbars. Once this lands, we can simplify this logic considerably
+                        // (and add a separate picture cache slice / OS layer for scroll bars).
+                        if parent_sc.pipeline_id != stacking_context.pipeline_id && self.iframe_depth == 1 {
+                            self.content_slice_count = stacking_context.init_picture_caching(
+                                &self.spatial_tree,
+                                &self.clip_store,
+                                &self.quality_settings,
+                            );
 
-        let stacking_context = self.sc_stack.pop().unwrap();
-
-        // If the stacking context is a blend container, and if we're at the top level
-        // of the stacking context tree, we can make this blend container into a tile
-        // cache. This means that we get caching and correct scrolling invalidation for
-        // root level blend containers. For these cases, the readbacks of the backdrop
-        // are handled by doing partial reads of the picture cache tiles during rendering.
-        if stacking_context.flags.contains(StackingContextFlags::IS_BLEND_CONTAINER) &&
-           self.sc_stack.is_empty() &&
-           self.tile_cache_builder.can_add_container_tile_cache() &&
-           self.spatial_tree.get_static_coordinate_system_id(stacking_context.spatial_node_index) == StaticCoordinateSystemId::ROOT
-        {
-            self.tile_cache_builder.add_tile_cache(
-                stacking_context.prim_list,
-                stacking_context.clip_chain_id,
-                &self.spatial_tree,
-                &self.clip_store,
-                self.interners,
-                &self.config,
-                self.root_iframe_clip,
-                SliceFlags::IS_BLEND_CONTAINER,
-            );
+                            // Mark that a user supplied tile cache was specified.
+                            self.picture_caching_initialized = true;
+                        }
 
-            return;
-        }
+                        // If the parent context primitives list is empty, it's faster
+                        // to assign the storage of the popped context instead of paying
+                        // the copying cost for extend.
+                        if parent_sc.prim_list.is_empty() {
+                            parent_sc.prim_list = stacking_context.prim_list;
+                        } else {
+                            parent_sc.prim_list.extend(stacking_context.prim_list);
+                        }
+                    }
 
-        let parent_is_empty = match self.sc_stack.last() {
-            Some(parent_sc) => {
-                assert!(!stacking_context.is_redundant);
+                    return;
+                }
                 parent_sc.prim_list.is_empty()
             },
             None => true,
         };
 
-        let mut source = match stacking_context.context_3d {
+        if self.sc_stack.is_empty() {
+            // If we didn't encounter a content iframe, then set up picture caching slice markers
+            // on the root stacking context. This can happen in Gecko when the parent process
+            // provides the content display list (e.g. about:support, about:config etc).
+            if !self.picture_caching_initialized {
+                self.content_slice_count = stacking_context.init_picture_caching(
+                    &self.spatial_tree,
+                    &self.clip_store,
+                    &self.quality_settings,
+                );
+                self.picture_caching_initialized = true;
+            }
+
+            self.setup_picture_caching(
+                &mut stacking_context.prim_list,
+            );
+        }
+
+        let (leaf_context_3d, leaf_composite_mode, leaf_output_pipeline_id) = match stacking_context.context_3d {
             // TODO(gw): For now, as soon as this picture is in
             //           a 3D context, we draw it to an intermediate
             //           surface and apply plane splitting. However,
@@ -2119,92 +1853,63 @@ impl<'a> SceneBuilder<'a> {
             //           During culling, we can check if there is actually
             //           perspective present, and skip the plane splitting
             //           completely when that is not the case.
-            Picture3DContext::In { ancestor_index, .. } => {
-                let composite_mode = Some(
-                    PictureCompositeMode::Blit(BlitReason::PRESERVE3D | stacking_context.blit_reason)
-                );
-
-                // Add picture for this actual stacking context contents to render into.
-                let pic_index = PictureIndex(self.prim_store.pictures
-                    .alloc()
-                    .init(PicturePrimitive::new_image(
-                        composite_mode.clone(),
-                        Picture3DContext::In { root_data: None, ancestor_index },
-                        true,
-                        stacking_context.prim_flags,
-                        stacking_context.prim_list,
-                        stacking_context.spatial_node_index,
-                        PictureOptions::default(),
-                    ))
-                );
-
-                let instance = create_prim_instance(
-                    pic_index,
-                    composite_mode.into(),
-                    ClipChainId::NONE,
-                    &mut self.interners,
-                );
-
-                PictureChainBuilder::from_instance(
-                    instance,
-                    stacking_context.prim_flags,
-                    stacking_context.spatial_node_index,
-                )
-            }
-            Picture3DContext::Out => {
+            Picture3DContext::In { ancestor_index, .. } => (
+                Picture3DContext::In { root_data: None, ancestor_index },
+                Some(PictureCompositeMode::Blit(BlitReason::PRESERVE3D | stacking_context.blit_reason)),
+                None,
+            ),
+            Picture3DContext::Out => (
+                Picture3DContext::Out,
                 if stacking_context.blit_reason.is_empty() {
-                    PictureChainBuilder::from_prim_list(
-                        stacking_context.prim_list,
-                        stacking_context.prim_flags,
-                        stacking_context.spatial_node_index,
-                    )
+                    // By default, this picture will be collapsed into
+                    // the owning target.
+                    None
                 } else {
-                    let composite_mode = Some(
-                        PictureCompositeMode::Blit(stacking_context.blit_reason)
-                    );
+                    // Add a dummy composite filter if the SC has to be isolated.
+                    Some(PictureCompositeMode::Blit(stacking_context.blit_reason))
+                },
+                stacking_context.frame_output_pipeline_id
+            ),
+        };
 
-                    // Add picture for this actual stacking context contents to render into.
-                    let pic_index = PictureIndex(self.prim_store.pictures
-                        .alloc()
-                        .init(PicturePrimitive::new_image(
-                            composite_mode.clone(),
-                            Picture3DContext::Out,
-                            true,
-                            stacking_context.prim_flags,
-                            stacking_context.prim_list,
-                            stacking_context.spatial_node_index,
-                            PictureOptions::default(),
-                        ))
-                    );
+        // Add picture for this actual stacking context contents to render into.
+        let leaf_pic_index = PictureIndex(self.prim_store.pictures
+            .alloc()
+            .init(PicturePrimitive::new_image(
+                leaf_composite_mode.clone(),
+                leaf_context_3d,
+                leaf_output_pipeline_id,
+                true,
+                stacking_context.prim_flags,
+                stacking_context.requested_raster_space,
+                stacking_context.prim_list,
+                stacking_context.spatial_node_index,
+                None,
+                PictureOptions::default(),
+            ))
+        );
 
-                    let instance = create_prim_instance(
-                        pic_index,
-                        composite_mode.into(),
-                        ClipChainId::NONE,
-                        &mut self.interners,
-                    );
+        // Create a chain of pictures based on presence of filters,
+        // mix-blend-mode and/or 3d rendering context containers.
 
-                    PictureChainBuilder::from_instance(
-                        instance,
-                        stacking_context.prim_flags,
-                        stacking_context.spatial_node_index,
-                    )
-                }
-            }
-        };
+        let mut current_pic_index = leaf_pic_index;
+        let mut cur_instance = create_prim_instance(
+            leaf_pic_index,
+            leaf_composite_mode.into(),
+            ClipChainId::NONE,
+            &mut self.interners,
+        );
+
+        if cur_instance.is_chased() {
+            println!("\tis a leaf primitive for a stacking context");
+        }
 
         // If establishing a 3d context, the `cur_instance` represents
         // a picture with all the *trailing* immediate children elements.
         // We append this to the preserve-3D picture set and make a container picture of them.
         if let Picture3DContext::In { root_data: Some(mut prims), ancestor_index } = stacking_context.context_3d {
-            let instance = source.finalize(
-                ClipChainId::NONE,
-                &mut self.interners,
-                &mut self.prim_store,
-            );
-
             prims.push(ExtendedPrimitiveInstance {
-                instance,
+                instance: cur_instance,
                 spatial_node_index: stacking_context.spatial_node_index,
                 flags: stacking_context.prim_flags,
             });
@@ -2220,7 +1925,7 @@ impl<'a> SceneBuilder<'a> {
             }
 
             // This is the acttual picture representing our 3D hierarchy root.
-            let pic_index = PictureIndex(self.prim_store.pictures
+            current_pic_index = PictureIndex(self.prim_store.pictures
                 .alloc()
                 .init(PicturePrimitive::new_image(
                     None,
@@ -2228,38 +1933,42 @@ impl<'a> SceneBuilder<'a> {
                         root_data: Some(Vec::new()),
                         ancestor_index,
                     },
+                    stacking_context.frame_output_pipeline_id,
                     true,
                     stacking_context.prim_flags,
+                    stacking_context.requested_raster_space,
                     prim_list,
                     stacking_context.spatial_node_index,
+                    None,
                     PictureOptions::default(),
                 ))
             );
 
-            let instance = create_prim_instance(
-                pic_index,
+            cur_instance = create_prim_instance(
+                current_pic_index,
                 PictureCompositeKey::Identity,
                 ClipChainId::NONE,
                 &mut self.interners,
             );
-
-            source = PictureChainBuilder::from_instance(
-                instance,
-                stacking_context.prim_flags,
-                stacking_context.spatial_node_index,
-            );
         }
 
-        let has_filters = stacking_context.composite_ops.has_valid_filters();
-
-        source = self.wrap_prim_with_filters(
-            source,
+        let (filtered_pic_index, filtered_instance) = self.wrap_prim_with_filters(
+            cur_instance,
+            current_pic_index,
             stacking_context.composite_ops.filters,
             stacking_context.composite_ops.filter_primitives,
             stacking_context.composite_ops.filter_datas,
+            stacking_context.prim_flags,
+            stacking_context.requested_raster_space,
+            stacking_context.spatial_node_index,
             true,
         );
 
+        let has_filters = current_pic_index != filtered_pic_index;
+
+        current_pic_index = filtered_pic_index;
+        cur_instance = filtered_instance;
+
         // Same for mix-blend-mode, except we can skip if this primitive is the first in the parent
         // stacking context.
         // From https://drafts.fxtf.org/compositing-1/#generalformula, the formula for blending is:
@@ -2273,20 +1982,44 @@ impl<'a> SceneBuilder<'a> {
         // backdrop alpha will be 0, and then the blend equation collapses to just
         // Cs = Cs, and the blend mode isn't taken into account at all.
         if let (Some(mix_blend_mode), false) = (stacking_context.composite_ops.mix_blend_mode, parent_is_empty) {
-            let parent_is_isolated = match self.sc_stack.last() {
-                Some(parent_sc) => parent_sc.blit_reason.contains(BlitReason::ISOLATE),
-                None => false,
-            };
-            if parent_is_isolated {
-                let composite_mode = PictureCompositeMode::MixBlend(mix_blend_mode);
+            if self.sc_stack.last().unwrap().blit_reason.contains(BlitReason::ISOLATE) {
+                let composite_mode = Some(PictureCompositeMode::MixBlend(mix_blend_mode));
 
-                source = source.add_picture(
-                    composite_mode,
-                    Picture3DContext::Out,
-                    PictureOptions::default(),
+                let mut prim_list = PrimitiveList::empty();
+                prim_list.add_prim(
+                    cur_instance.clone(),
+                    LayoutRect::zero(),
+                    stacking_context.spatial_node_index,
+                    stacking_context.prim_flags,
+                );
+
+                let blend_pic_index = PictureIndex(self.prim_store.pictures
+                    .alloc()
+                    .init(PicturePrimitive::new_image(
+                        composite_mode.clone(),
+                        Picture3DContext::Out,
+                        None,
+                        true,
+                        stacking_context.prim_flags,
+                        stacking_context.requested_raster_space,
+                        prim_list,
+                        stacking_context.spatial_node_index,
+                        None,
+                        PictureOptions::default(),
+                    ))
+                );
+
+                current_pic_index = blend_pic_index;
+                cur_instance = create_prim_instance(
+                    blend_pic_index,
+                    composite_mode.into(),
+                    ClipChainId::NONE,
                     &mut self.interners,
-                    &mut self.prim_store,
                 );
+
+                if cur_instance.is_chased() {
+                    println!("\tis a mix-blend picture for a stacking context with {:?}", mix_blend_mode);
+                }
             } else {
                 // If we have a mix-blend-mode, the stacking context needs to be isolated
                 // to blend correctly as per the CSS spec.
@@ -2297,11 +2030,7 @@ impl<'a> SceneBuilder<'a> {
 
         // Set the stacking context clip on the outermost picture in the chain,
         // unless we already set it on the leaf picture.
-        let cur_instance = source.finalize(
-            stacking_context.clip_chain_id,
-            &mut self.interners,
-            &mut self.prim_store,
-        );
+        cur_instance.clip_chain_id = stacking_context.clip_chain_id;
 
         // The primitive instance for the remainder of flat children of this SC
         // if it's a part of 3D hierarchy but not the root of it.
@@ -2322,13 +2051,7 @@ impl<'a> SceneBuilder<'a> {
             }
             // This must be the root stacking context
             None => {
-                self.add_primitive_to_draw_list(
-                    cur_instance,
-                    LayoutRect::zero(),
-                    stacking_context.spatial_node_index,
-                    stacking_context.prim_flags,
-                );
-
+                self.root_pic_index = current_pic_index;
                 None
             }
         };
@@ -2376,6 +2099,8 @@ impl<'a> SceneBuilder<'a> {
         &mut self,
         pipeline_id: PipelineId,
         viewport_size: &LayoutSize,
+        content_size: &LayoutSize,
+        device_pixel_scale: DevicePixelScale,
     ) {
         if let ChasePrimitive::Id(id) = self.config.chase_primitive {
             println!("Chasing {:?} by index", id);
@@ -2388,29 +2113,33 @@ impl<'a> SceneBuilder<'a> {
             pipeline_id,
             TransformStyle::Flat,
             PropertyBinding::Value(LayoutTransform::identity()),
-            ReferenceFrameKind::Transform {
-                is_2d_scale_translation: false,
-                should_snap: false,
-            },
+            ReferenceFrameKind::Transform,
             LayoutVector2D::zero(),
         );
 
-        let viewport_rect = self.snap_rect(
-            &LayoutRect::new(LayoutPoint::zero(), *viewport_size),
+        // We can't use this with the stacking context because it does not exist
+        // yet. Just create a dedicated snapper for the root.
+        let snap_to_device = SpaceSnapper::new_with_target(
             spatial_node_index,
+            ROOT_SPATIAL_NODE_INDEX,
+            device_pixel_scale,
+            &self.spatial_tree,
+        );
+
+        let content_size = snap_to_device.snap_size(content_size);
+        let viewport_rect = snap_to_device.snap_rect(
+            &LayoutRect::new(LayoutPoint::zero(), *viewport_size),
         );
 
         self.add_scroll_frame(
             SpatialId::root_scroll_node(pipeline_id),
             spatial_node_index,
-            ExternalScrollId(0, pipeline_id),
+            Some(ExternalScrollId(0, pipeline_id)),
             pipeline_id,
             &viewport_rect,
-            &viewport_rect.size,
+            &content_size,
             ScrollSensitivity::ScriptAndInputEvents,
-            ScrollFrameKind::PipelineRoot {
-                is_root_pipeline: true,
-            },
+            ScrollFrameKind::PipelineRoot,
             LayoutVector2D::zero(),
         );
     }
@@ -2420,31 +2149,18 @@ impl<'a> SceneBuilder<'a> {
         new_node_id: ClipId,
         space_and_clip: &SpaceAndClipInfo,
         image_mask: &ImageMask,
-        fill_rule: FillRule,
-        points_range: ItemRange<LayoutPoint>,
     ) {
         let spatial_node_index = self.id_to_index_mapper.get_spatial_node_index(space_and_clip.spatial_id);
 
-        let snapped_mask_rect = self.snap_rect(
-            &image_mask.rect,
+        let snap_to_device = &mut self.sc_stack.last_mut().unwrap().snap_to_device;
+        snap_to_device.set_target_spatial_node(
             spatial_node_index,
+            &self.spatial_tree,
         );
-        let points: Vec<LayoutPoint> = points_range.iter().collect();
-
-        // If any points are provided, then intern a polygon with the points and fill rule.
-        let mut polygon_handle: Option<PolygonDataHandle> = None;
-        if points.len() > 0 {
-            let item = PolygonKey::new(&points, fill_rule);
-
-            let handle = self
-                .interners
-                .polygon
-                .intern(&item, || item);
-            polygon_handle = Some(handle);
-        }
 
+        let snapped_mask_rect = snap_to_device.snap_rect(&image_mask.rect);
         let item = ClipItemKey {
-            kind: ClipItemKeyKind::image_mask(image_mask, snapped_mask_rect, polygon_handle),
+            kind: ClipItemKeyKind::image_mask(image_mask, snapped_mask_rect),
         };
 
         let handle = self
@@ -2456,10 +2172,7 @@ impl<'a> SceneBuilder<'a> {
                 }
             });
 
-        let instance = SceneClipInstance {
-            key: item,
-            clip: ClipInstance::new(handle, spatial_node_index),
-        };
+        let instance = ClipInstance::new(handle, spatial_node_index);
 
         self.clip_store.register_clip_template(
             new_node_id,
@@ -2477,11 +2190,14 @@ impl<'a> SceneBuilder<'a> {
     ) {
         let spatial_node_index = self.id_to_index_mapper.get_spatial_node_index(space_and_clip.spatial_id);
 
-        let snapped_clip_rect = self.snap_rect(
-            clip_rect,
+        let snap_to_device = &mut self.sc_stack.last_mut().unwrap().snap_to_device;
+        snap_to_device.set_target_spatial_node(
             spatial_node_index,
+            &self.spatial_tree,
         );
 
+        let snapped_clip_rect = snap_to_device.snap_rect(clip_rect);
+
         let item = ClipItemKey {
             kind: ClipItemKeyKind::rectangle(snapped_clip_rect, ClipMode::Clip),
         };
@@ -2494,10 +2210,7 @@ impl<'a> SceneBuilder<'a> {
                 }
             });
 
-        let instance = SceneClipInstance {
-            key: item,
-            clip: ClipInstance::new(handle, spatial_node_index),
-        };
+        let instance = ClipInstance::new(handle, spatial_node_index);
 
         self.clip_store.register_clip_template(
             new_node_id,
@@ -2515,10 +2228,13 @@ impl<'a> SceneBuilder<'a> {
     ) {
         let spatial_node_index = self.id_to_index_mapper.get_spatial_node_index(space_and_clip.spatial_id);
 
-        let snapped_region_rect = self.snap_rect(
-            &clip.rect.translate(current_offset),
+        let snap_to_device = &mut self.sc_stack.last_mut().unwrap().snap_to_device;
+        snap_to_device.set_target_spatial_node(
             spatial_node_index,
+            &self.spatial_tree,
         );
+
+        let snapped_region_rect = snap_to_device.snap_rect(&clip.rect.translate(current_offset));
         let item = ClipItemKey {
             kind: ClipItemKeyKind::rounded_rect(
                 snapped_region_rect,
@@ -2536,10 +2252,7 @@ impl<'a> SceneBuilder<'a> {
                 }
             });
 
-        let instance = SceneClipInstance {
-            key: item,
-            clip: ClipInstance::new(handle, spatial_node_index),
-        };
+        let instance = ClipInstance::new(handle, spatial_node_index);
 
         self.clip_store.register_clip_template(
             new_node_id,
@@ -2560,11 +2273,14 @@ impl<'a> SceneBuilder<'a> {
         // Map the ClipId for the positioning node to a spatial node index.
         let spatial_node_index = self.id_to_index_mapper.get_spatial_node_index(space_and_clip.spatial_id);
 
-        let snapped_clip_rect = self.snap_rect(
-            &clip_region.main,
+        let snap_to_device = &mut self.sc_stack.last_mut().unwrap().snap_to_device;
+        snap_to_device.set_target_spatial_node(
             spatial_node_index,
+            &self.spatial_tree,
         );
-        let mut instances: SmallVec<[SceneClipInstance; 4]> = SmallVec::new();
+
+        let snapped_clip_rect = snap_to_device.snap_rect(&clip_region.main);
+        let mut instances: SmallVec<[ClipInstance; 4]> = SmallVec::new();
 
         // Intern each clip item in this clip node, and add the interned
         // handle to a clip chain node, parented to form a chain.
@@ -2583,15 +2299,10 @@ impl<'a> SceneBuilder<'a> {
                     clip_node_kind: ClipNodeKind::Rectangle,
                 }
             });
-        instances.push(
-            SceneClipInstance {
-                key: item,
-                clip: ClipInstance::new(handle, spatial_node_index),
-            },
-        );
+        instances.push(ClipInstance::new(handle, spatial_node_index));
 
         for region in clip_region.complex_clips {
-            let snapped_region_rect = self.snap_rect(&region.rect, spatial_node_index);
+            let snapped_region_rect = snap_to_device.snap_rect(&region.rect);
             let item = ClipItemKey {
                 kind: ClipItemKeyKind::rounded_rect(
                     snapped_region_rect,
@@ -2609,12 +2320,7 @@ impl<'a> SceneBuilder<'a> {
                     }
                 });
 
-            instances.push(
-                SceneClipInstance {
-                    key: item,
-                    clip: ClipInstance::new(handle, spatial_node_index),
-                },
-            );
+            instances.push(ClipInstance::new(handle, spatial_node_index));
         }
 
         self.clip_store.register_clip_template(
@@ -2628,7 +2334,7 @@ impl<'a> SceneBuilder<'a> {
         &mut self,
         new_node_id: SpatialId,
         parent_node_index: SpatialNodeIndex,
-        external_id: ExternalScrollId,
+        external_id: Option<ExternalScrollId>,
         pipeline_id: PipelineId,
         frame_rect: &LayoutRect,
         content_size: &LayoutSize,
@@ -2695,68 +2401,62 @@ impl<'a> SceneBuilder<'a> {
                     // Gaussian blur with a standard deviation equal to half the blur radius."
                     let std_deviation = pending_shadow.shadow.blur_radius * 0.5;
 
+                    // If the shadow has no blur, any elements will get directly rendered
+                    // into the parent picture surface, instead of allocating and drawing
+                    // into an intermediate surface. In this case, we will need to apply
+                    // the local clip rect to primitives.
+                    let is_passthrough = pending_shadow.shadow.blur_radius == 0.0;
+
+                    // shadows always rasterize in local space.
+                    // TODO(gw): expose API for clients to specify a raster scale
+                    let raster_space = if is_passthrough {
+                        self.sc_stack.last().unwrap().requested_raster_space
+                    } else {
+                        RasterSpace::Local(1.0)
+                    };
+
                     // Add any primitives that come after this shadow in the item
                     // list to this shadow.
                     let mut prim_list = PrimitiveList::empty();
-                    let blur_filter = Filter::Blur(std_deviation, std_deviation);
-                    let blur_is_noop = blur_filter.is_noop();
 
                     for item in &items {
-                        let (instance, info, spatial_node_index) = match item {
+                        match item {
                             ShadowItem::Image(ref pending_image) => {
-                                self.create_shadow_prim(
+                                self.add_shadow_prim(
                                     &pending_shadow,
                                     pending_image,
-                                    blur_is_noop,
+                                    &mut prim_list,
                                 )
                             }
                             ShadowItem::LineDecoration(ref pending_line_dec) => {
-                                self.create_shadow_prim(
+                                self.add_shadow_prim(
                                     &pending_shadow,
                                     pending_line_dec,
-                                    blur_is_noop,
+                                    &mut prim_list,
                                 )
                             }
                             ShadowItem::NormalBorder(ref pending_border) => {
-                                self.create_shadow_prim(
+                                self.add_shadow_prim(
                                     &pending_shadow,
                                     pending_border,
-                                    blur_is_noop,
+                                    &mut prim_list,
                                 )
                             }
                             ShadowItem::Primitive(ref pending_primitive) => {
-                                self.create_shadow_prim(
+                                self.add_shadow_prim(
                                     &pending_shadow,
                                     pending_primitive,
-                                    blur_is_noop,
+                                    &mut prim_list,
                                 )
                             }
                             ShadowItem::TextRun(ref pending_text_run) => {
-                                self.create_shadow_prim(
+                                self.add_shadow_prim(
                                     &pending_shadow,
                                     pending_text_run,
-                                    blur_is_noop,
+                                    &mut prim_list,
                                 )
                             }
-                            _ => {
-                                continue;
-                            }
-                        };
-
-                        if blur_is_noop {
-                            self.add_primitive_to_draw_list(
-                                instance,
-                                info.rect,
-                                spatial_node_index,
-                                info.flags,
-                            );
-                        } else {
-                            prim_list.add_prim(
-                                instance,
-                                info.rect,
-                                spatial_node_index,
-                                info.flags,
-                            );
+                            _ => {}
                         }
                     }
 
@@ -2767,10 +2467,9 @@ impl<'a> SceneBuilder<'a> {
                         // blur radius is 0, the code in Picture::prepare_for_render will
                         // detect this and mark the picture to be drawn directly into the
                         // parent picture, which avoids an intermediate surface and blur.
-                        let blur_filter = Filter::Blur(std_deviation, std_deviation);
-                        assert!(!blur_filter.is_noop());
-                        let composite_mode = Some(PictureCompositeMode::Filter(blur_filter));
-                        let composite_mode_key = composite_mode.clone().into();
+                        let blur_filter = Filter::Blur(std_deviation);
+                        let composite_mode = PictureCompositeMode::Filter(blur_filter);
+                        let composite_mode_key = Some(composite_mode.clone()).into();
 
                         // Pass through configuration information about whether WR should
                         // do the bounding rect inflation for text shadows.
@@ -2782,12 +2481,15 @@ impl<'a> SceneBuilder<'a> {
                         let shadow_pic_index = PictureIndex(self.prim_store.pictures
                             .alloc()
                             .init(PicturePrimitive::new_image(
-                                composite_mode,
+                                Some(composite_mode),
                                 Picture3DContext::Out,
-                                false,
+                                None,
+                                is_passthrough,
                                 PrimitiveFlags::IS_BACKFACE_VISIBLE,
+                                raster_space,
                                 prim_list,
                                 pending_shadow.spatial_node_index,
+                                None,
                                 options,
                             ))
                         );
@@ -2852,29 +2554,33 @@ impl<'a> SceneBuilder<'a> {
         self.pending_shadow_items = items;
     }
 
-    fn create_shadow_prim<P>(
+    fn add_shadow_prim<P>(
         &mut self,
         pending_shadow: &PendingShadow,
         pending_primitive: &PendingPrimitive<P>,
-        blur_is_noop: bool,
-    ) -> (PrimitiveInstance, LayoutPrimitiveInfo, SpatialNodeIndex)
+        prim_list: &mut PrimitiveList,
+    )
     where
         P: InternablePrimitive + CreateShadow,
         Interners: AsMut<Interner<P>>,
     {
+        let snap_to_device = &mut self.sc_stack.last_mut().unwrap().snap_to_device;
+        snap_to_device.set_target_spatial_node(
+            pending_primitive.spatial_node_index,
+            &self.spatial_tree,
+        );
+
         // Offset the local rect and clip rect by the shadow offset. The pending
         // primitive has already been snapped, but we will need to snap the
         // shadow after translation. We don't need to worry about the size
         // changing because the shadow has the same raster space as the
         // primitive, and thus we know the size is already rounded.
         let mut info = pending_primitive.info.clone();
-        info.rect = self.snap_rect(
+        info.rect = snap_to_device.snap_rect(
             &info.rect.translate(pending_shadow.shadow.offset),
-            pending_primitive.spatial_node_index,
         );
-        info.clip_rect = self.snap_rect(
+        info.clip_rect = snap_to_device.snap_rect(
             &info.clip_rect.translate(pending_shadow.shadow.offset),
-            pending_primitive.spatial_node_index,
         );
 
         // Construct and add a primitive for the given shadow.
@@ -2882,14 +2588,16 @@ impl<'a> SceneBuilder<'a> {
             &info,
             pending_primitive.spatial_node_index,
             pending_primitive.clip_chain_id,
-            pending_primitive.prim.create_shadow(
-                &pending_shadow.shadow,
-                blur_is_noop,
-                self.raster_space_stack.last().cloned().unwrap(),
-            ),
+            pending_primitive.prim.create_shadow(&pending_shadow.shadow),
         );
 
-        (shadow_prim_instance, info, pending_primitive.spatial_node_index)
+        // Add the new primitive to the shadow picture.
+        prim_list.add_prim(
+            shadow_prim_instance,
+            info.rect,
+            pending_primitive.spatial_node_index,
+            info.flags,
+        );
     }
 
     fn add_shadow_prim_to_draw_list<P>(
@@ -2931,25 +2639,54 @@ impl<'a> SceneBuilder<'a> {
     ) {
     }
 
-    pub fn add_clear_rectangle(
+    pub fn add_solid_rectangle(
         &mut self,
         spatial_node_index: SpatialNodeIndex,
         clip_chain_id: ClipChainId,
         info: &LayoutPrimitiveInfo,
+        color: PropertyBinding<ColorF>,
     ) {
-        // Clear prims must be in their own picture cache slice to
-        // be composited correctly.
-        self.add_tile_cache_barrier_if_needed(SliceFlags::empty());
+        match color {
+            PropertyBinding::Value(value) => {
+                if value.a == 0.0 {
+                    // Don't add transparent rectangles to the draw list,
+                    // but do consider them for hit testing. This allows
+                    // specifying invisible hit testing areas.
+                    self.add_primitive_to_hit_testing_list(
+                        info,
+                        spatial_node_index,
+                        clip_chain_id,
+                    );
+                    return;
+                }
+            },
+            PropertyBinding::Binding(..) => {},
+        }
 
         self.add_primitive(
             spatial_node_index,
             clip_chain_id,
             info,
             Vec::new(),
-            PrimitiveKeyKind::Clear,
+            PrimitiveKeyKind::Rectangle {
+                color: color.into(),
+            },
         );
+    }
 
-        self.add_tile_cache_barrier_if_needed(SliceFlags::empty());
+    pub fn add_clear_rectangle(
+        &mut self,
+        spatial_node_index: SpatialNodeIndex,
+        clip_chain_id: ClipChainId,
+        info: &LayoutPrimitiveInfo,
+    ) {
+        self.add_primitive(
+            spatial_node_index,
+            clip_chain_id,
+            info,
+            Vec::new(),
+            PrimitiveKeyKind::Clear,
+        );
     }
 
     pub fn add_line(
@@ -3066,7 +2803,7 @@ impl<'a> SceneBuilder<'a> {
                             &info,
                             gradient.start_point,
                             gradient.end_point,
-                            read_gradient_stops(gradient_stops),
+                            gradient_stops,
                             gradient.extend_mode,
                             LayoutSize::new(border.height as f32, border.width as f32),
                             LayoutSize::zero(),
@@ -3091,7 +2828,7 @@ impl<'a> SceneBuilder<'a> {
                             gradient.start_offset * gradient.radius.width,
                             gradient.end_offset * gradient.radius.width,
                             gradient.radius.width / gradient.radius.height,
-                            read_gradient_stops(gradient_stops),
+                            gradient_stops,
                             gradient.extend_mode,
                             LayoutSize::new(border.height as f32, border.width as f32),
                             LayoutSize::zero(),
@@ -3147,7 +2884,7 @@ impl<'a> SceneBuilder<'a> {
         info: &LayoutPrimitiveInfo,
         start_point: LayoutPoint,
         end_point: LayoutPoint,
-        stops: Vec<GradientStopKey>,
+        stops: ItemRange<GradientStop>,
         extend_mode: ExtendMode,
         stretch_size: LayoutSize,
         mut tile_spacing: LayoutSize,
@@ -3156,17 +2893,19 @@ impl<'a> SceneBuilder<'a> {
         let mut prim_rect = info.rect;
         simplify_repeated_primitive(&stretch_size, &mut tile_spacing, &mut prim_rect);
 
-        let mut is_entirely_transparent = true;
-        for stop in &stops {
-            if stop.color.a > 0 {
-                is_entirely_transparent = false;
-                break;
+        let mut max_alpha: f32 = 0.0;
+
+        let stops = stops.iter().map(|stop| {
+            max_alpha = max_alpha.max(stop.color.a);
+            GradientStopKey {
+                offset: stop.offset,
+                color: stop.color.into(),
             }
-        }
+        }).collect();
 
         // If all the stops have no alpha, then this
         // gradient can't contribute to the scene.
-        if is_entirely_transparent {
+        if max_alpha <= 0.0 {
             return None;
         }
 
@@ -3188,13 +2927,6 @@ impl<'a> SceneBuilder<'a> {
             (start_point, end_point)
         };
 
-        let is_tiled = prim_rect.size.width > stretch_size.width
-         || prim_rect.size.height > stretch_size.height;
-        // SWGL has a fast-path that can render gradients faster than it can sample from the
-        // texture cache so we disable caching in this configuration. Cached gradients are
-        // faster on hardware.
-        let cached = !self.config.is_software || is_tiled;
-
         Some(LinearGradient {
             extend_mode,
             start_point: sp.into(),
@@ -3204,7 +2936,6 @@ impl<'a> SceneBuilder<'a> {
             stops,
             reverse_stops,
             nine_patch,
-            cached,
         })
     }
 
@@ -3215,7 +2946,7 @@ impl<'a> SceneBuilder<'a> {
         start_radius: f32,
         end_radius: f32,
         ratio_xy: f32,
-        stops: Vec<GradientStopKey>,
+        stops: ItemRange<GradientStop>,
         extend_mode: ExtendMode,
         stretch_size: LayoutSize,
         mut tile_spacing: LayoutSize,
@@ -3230,6 +2961,13 @@ impl<'a> SceneBuilder<'a> {
             ratio_xy,
         };
 
+        let stops = stops.iter().map(|stop| {
+            GradientStopKey {
+                offset: stop.offset,
+                color: stop.color.into(),
+            }
+        }).collect();
+
         RadialGradient {
             extend_mode,
             center: center.into(),
@@ -3337,18 +3075,10 @@ impl<'a> SceneBuilder<'a> {
                 })
                 .collect();
 
-            // Query the current requested raster space (stack handled by push/pop
-            // stacking context).
-            let requested_raster_space = self.raster_space_stack
-                .last()
-                .cloned()
-                .unwrap();
-
             TextRun {
                 glyphs: Arc::new(glyphs),
                 font,
                 shadow: false,
-                requested_raster_space,
             }
         };
 
@@ -3463,6 +3193,7 @@ impl<'a> SceneBuilder<'a> {
         };
 
         let backdrop_spatial_node_index = self.prim_store.pictures[backdrop_pic_index.0].spatial_node_index;
+        let requested_raster_space = self.sc_stack.last().expect("no active stacking context").requested_raster_space;
 
         let mut instance = self.create_primitive(
             info,
@@ -3501,10 +3232,13 @@ impl<'a> SceneBuilder<'a> {
                 .init(PicturePrimitive::new_image(
                     composite_mode.clone(),
                     Picture3DContext::Out,
+                    None,
                     true,
                     prim_flags,
+                    requested_raster_space,
                     prim_list,
                     backdrop_spatial_node_index,
+                    None,
                     PictureOptions {
                        inflate_if_required: false,
                     },
@@ -3519,17 +3253,15 @@ impl<'a> SceneBuilder<'a> {
             );
         }
 
-        let mut source = PictureChainBuilder::from_instance(
+        let (mut filtered_pic_index, mut filtered_instance) = self.wrap_prim_with_filters(
             instance,
-            info.flags,
-            backdrop_spatial_node_index,
-        );
-
-        source = self.wrap_prim_with_filters(
-            source,
+            backdrop_pic_index,
             filters,
             filter_primitives,
             filter_datas,
+            info.flags,
+            requested_raster_space,
+            backdrop_spatial_node_index,
             false,
         );
 
@@ -3544,20 +3276,23 @@ impl<'a> SceneBuilder<'a> {
             let filter_primitives = stacking_context.composite_ops.filter_primitives.clone();
             let filter_datas = stacking_context.composite_ops.filter_datas.clone();
 
-            source = self.wrap_prim_with_filters(
-                source,
+            let (pic_index, instance) = self.wrap_prim_with_filters(
+                filtered_instance,
+                filtered_pic_index,
                 filters,
                 filter_primitives,
                 filter_datas,
+                info.flags,
+                requested_raster_space,
+                backdrop_spatial_node_index,
                 false,
             );
+
+            filtered_instance = instance;
+            filtered_pic_index = pic_index;
         }
 
-        let filtered_instance = source.finalize(
-            clip_chain_id,
-            &mut self.interners,
-            &mut self.prim_store,
-        );
+        filtered_instance.clip_chain_id = clip_chain_id;
 
         self.sc_stack
             .iter_mut()
@@ -3573,9 +3308,6 @@ impl<'a> SceneBuilder<'a> {
             );
     }
 
-    /// Create pictures for each stacking context rendered into their parents, down to the nearest
-    /// backdrop root until we have a picture that represents the contents of all primitives added
-    /// since the backdrop root
     pub fn cut_backdrop_picture(&mut self) -> Option<PictureIndex> {
         let mut flattened_items = None;
         let mut backdrop_root =  None;
@@ -3619,15 +3351,18 @@ impl<'a> SceneBuilder<'a> {
         Some(pic_index)
     }
 
-    #[must_use]
     fn wrap_prim_with_filters(
         &mut self,
-        mut source: PictureChainBuilder,
+        mut cur_instance: PrimitiveInstance,
+        mut current_pic_index: PictureIndex,
         mut filter_ops: Vec<Filter>,
         mut filter_primitives: Vec<FilterPrimitive>,
         filter_datas: Vec<FilterData>,
+        flags: PrimitiveFlags,
+        requested_raster_space: RasterSpace,
+        spatial_node_index: SpatialNodeIndex,
         inflate_if_required: bool,
-    ) -> PictureChainBuilder {
+    ) -> (PictureIndex, PrimitiveInstance) {
         // TODO(cbrewster): Currently CSS and SVG filters live side by side in WebRender, but unexpected results will
         // happen if they are used simulataneously. Gecko only provides either filter ops or filter primitives.
         // At some point, these two should be combined and CSS filters should be expressed in terms of SVG filters.
@@ -3637,7 +3372,7 @@ impl<'a> SceneBuilder<'a> {
         // For each filter, create a new image with that composite mode.
         let mut current_filter_data_index = 0;
         for filter in &mut filter_ops {
-            let composite_mode = match filter {
+            let composite_mode = Some(match *filter {
                 Filter::ComponentTransfer => {
                     let filter_data =
                         &filter_datas[current_filter_data_index];
@@ -3666,22 +3401,50 @@ impl<'a> SceneBuilder<'a> {
                         PictureCompositeMode::ComponentTransferFilter(handle)
                     }
                 }
-                _ => {
-                    if filter.is_noop() {
-                        continue;
-                    } else {
-                        PictureCompositeMode::Filter(filter.clone())
-                    }
-                }
-            };
+                _ => PictureCompositeMode::Filter(filter.clone()),
+            });
 
-            source = source.add_picture(
-                composite_mode,
-                Picture3DContext::Out,
-                PictureOptions { inflate_if_required },
+            let mut prim_list = PrimitiveList::empty();
+            prim_list.add_prim(
+                cur_instance.clone(),
+                LayoutRect::zero(),
+                spatial_node_index,
+                flags,
+            );
+
+            let filter_pic_index = PictureIndex(self.prim_store.pictures
+                .alloc()
+                .init(PicturePrimitive::new_image(
+                    composite_mode.clone(),
+                    Picture3DContext::Out,
+                    None,
+                    true,
+                    flags,
+                    requested_raster_space,
+                    prim_list,
+                    spatial_node_index,
+                    None,
+                    PictureOptions {
+                       inflate_if_required,
+                    },
+                ))
+            );
+
+            current_pic_index = filter_pic_index;
+            cur_instance = create_prim_instance(
+                current_pic_index,
+                composite_mode.into(),
+                ClipChainId::NONE,
                 &mut self.interners,
-                &mut self.prim_store,
             );
+
+            if cur_instance.is_chased() {
+                println!("\tis a composite picture for a stacking context with {:?}", filter);
+            }
+
+            // Run the optimize pass on this picture, to see if we can
+            // collapse opacity and avoid drawing to an off-screen surface.
+            self.prim_store.optimize_picture_if_possible(current_pic_index);
         }
 
         if !filter_primitives.is_empty() {
@@ -3711,27 +3474,55 @@ impl<'a> SceneBuilder<'a> {
                 filter_datas,
             );
 
-            source = source.add_picture(
-                composite_mode,
-                Picture3DContext::Out,
-                PictureOptions { inflate_if_required },
+            let mut prim_list = PrimitiveList::empty();
+            prim_list.add_prim(
+                cur_instance.clone(),
+                LayoutRect::zero(),
+                spatial_node_index,
+                flags,
+            );
+
+            let filter_pic_index = PictureIndex(self.prim_store.pictures
+                .alloc()
+                .init(PicturePrimitive::new_image(
+                    Some(composite_mode.clone()),
+                    Picture3DContext::Out,
+                    None,
+                    true,
+                    flags,
+                    requested_raster_space,
+                    prim_list,
+                    spatial_node_index,
+                    None,
+                    PictureOptions {
+                        inflate_if_required,
+                    },
+                ))
+            );
+
+            current_pic_index = filter_pic_index;
+            cur_instance = create_prim_instance(
+                current_pic_index,
+                Some(composite_mode).into(),
+                ClipChainId::NONE,
                 &mut self.interners,
-                &mut self.prim_store,
             );
-        }
 
-        source
+            if cur_instance.is_chased() {
+                println!("\tis a composite picture for a stacking context with an SVG filter");
+            }
+
+            // Run the optimize pass on this picture, to see if we can
+            // collapse opacity and avoid drawing to an off-screen surface.
+            self.prim_store.optimize_picture_if_possible(current_pic_index);
+        }
+        (current_pic_index, cur_instance)
     }
 }
 
 
 pub trait CreateShadow {
-    fn create_shadow(
-        &self,
-        shadow: &Shadow,
-        blur_is_noop: bool,
-        current_raster_space: RasterSpace,
-    ) -> Self;
+    fn create_shadow(&self, shadow: &Shadow) -> Self;
 }
 
 pub trait IsVisible {
@@ -3747,19 +3538,6 @@ struct ExtendedPrimitiveInstance {
     flags: PrimitiveFlags,
 }
 
-/// Internal tracking information about the currently pushed stacking context.
-/// Used to track what operations need to happen when a stacking context is popped.
-struct StackingContextInfo {
-    /// If true, pop an entry from the hit-testing scene.
-    pop_hit_testing_clip: bool,
-    /// If true, pop and entry from the containing block stack.
-    pop_containing_block: bool,
-    /// If true, pop an entry from the flattened stacking context stack.
-    pop_stacking_context: bool,
-    /// If true, set a tile cache barrier when popping the stacking context.
-    set_tile_cache_barrier: bool,
-}
-
 /// Properties of a stacking context that are maintained
 /// during creation of the scene. These structures are
 /// not persisted after the initial scene build.
@@ -3770,11 +3548,20 @@ struct FlattenedStackingContext {
     /// Primitive instance flags for compositing this stacking context
     prim_flags: PrimitiveFlags,
 
+    /// Whether or not the caller wants this drawn in
+    /// screen space (quality) or local space (performance)
+    requested_raster_space: RasterSpace,
+
     /// The positioning node for this stacking context
     spatial_node_index: SpatialNodeIndex,
 
     /// The clip chain for this stacking context
     clip_chain_id: ClipChainId,
+    clip_id: Option<ClipId>,
+
+    /// If set, this should be provided to caller
+    /// as an output texture.
+    frame_output_pipeline_id: Option<PipelineId>,
 
     /// The list of filters / mix-blend-mode for this
     /// stacking context.
@@ -3784,6 +3571,9 @@ struct FlattenedStackingContext {
     /// be an offscreen surface.
     blit_reason: BlitReason,
 
+    /// Pipeline this stacking context belongs to.
+    pipeline_id: PipelineId,
+
     /// CSS transform-style property.
     transform_style: TransformStyle,
 
@@ -3796,8 +3586,12 @@ struct FlattenedStackingContext {
     /// True if this stacking context is redundant (i.e. doesn't require a surface)
     is_redundant: bool,
 
-    /// Flags identifying the type of container (among other things) this stacking context is
-    flags: StackingContextFlags,
+    /// A helper struct to snap local rects in device space. During frame
+    /// building we may establish new raster roots, however typically that is in
+    /// cases where we won't be applying snapping (e.g. has perspective), or in
+    /// edge cases (e.g. SVG filter) where we can accept slightly incorrect
+    /// behaviour in favour of getting the common case right.
+    snap_to_device: SpaceSnapper,
 }
 
 impl FlattenedStackingContext {
@@ -3806,38 +3600,171 @@ impl FlattenedStackingContext {
         self.transform_style == TransformStyle::Preserve3D && self.composite_ops.is_empty()
     }
 
+    /// Set up appropriate cluster flags for picture caching on this stacking context.
+    fn init_picture_caching(
+        &mut self,
+        spatial_tree: &SpatialTree,
+        clip_store: &ClipStore,
+        quality_settings: &QualitySettings,
+    ) -> usize {
+        struct SliceInfo {
+            cluster_index: usize,
+            scroll_root: SpatialNodeIndex,
+            cluster_flags: ClusterFlags,
+        }
+
+        let mut content_slice_count = 0;
+        let mut slices: Vec<SliceInfo> = Vec::new();
+
+        // Step through each cluster, and work out where the slice boundaries should be.
+        for (cluster_index, cluster) in self.prim_list.clusters.iter().enumerate() {
+            let scroll_root = spatial_tree.find_scroll_root(
+                cluster.spatial_node_index,
+            );
+
+            // We want to create a slice in the following conditions:
+            // (1) This cluster is a scrollbar
+            // (2) Certain conditions when the scroll root changes (see below)
+            // (3) No slice exists yet
+            let mut cluster_flags = ClusterFlags::empty();
+
+            if cluster.flags.contains(ClusterFlags::SCROLLBAR_CONTAINER) {
+                // Scrollbar containers need to ensure that a new slice is
+                // created both before and after the scrollbar, so that no
+                // other prims with the same scroll root sneak into this slice.
+                cluster_flags.insert(
+                    ClusterFlags::CREATE_PICTURE_CACHE_PRE |
+                    ClusterFlags::CREATE_PICTURE_CACHE_POST
+                );
+            }
+
+            let create_new_slice_for_scroll_root =
+                slices.last().map(|slice| {
+                    match (slice.scroll_root, scroll_root) {
+                        (ROOT_SPATIAL_NODE_INDEX, ROOT_SPATIAL_NODE_INDEX) => {
+                            // Both current slice and this cluster are fixed position, no need to cut
+                            false
+                        }
+                        (ROOT_SPATIAL_NODE_INDEX, _) => {
+                            // A real scroll root is being established, so create a cache slice
+                            true
+                        }
+                        (_, ROOT_SPATIAL_NODE_INDEX) => {
+                            // If quality settings force subpixel AA over performance, skip creating
+                            // a slice for the fixed position element(s) here.
+                            if quality_settings.force_subpixel_aa_where_possible {
+                                return false;
+                            }
+
+                            // A fixed position slice is encountered within a scroll root. Only create
+                            // a slice in this case if all the clips referenced by this cluster are also
+                            // fixed position. There's no real point in creating slices for these cases,
+                            // since we'll have to rasterize them as the scrolling clip moves anyway. It
+                            // also allows us to retain subpixel AA in these cases. For these types of
+                            // slices, the intra-slice dirty rect handling typically works quite well
+                            // (a common case is parallax scrolling effects).
+                            for prim_instance in &cluster.prim_instances {
+                                let mut current_clip_chain_id = prim_instance.clip_chain_id;
+
+                                while current_clip_chain_id != ClipChainId::NONE {
+                                    let clip_chain_node = &clip_store
+                                        .clip_chain_nodes[current_clip_chain_id.0 as usize];
+                                    let spatial_root = spatial_tree.find_scroll_root(clip_chain_node.spatial_node_index);
+                                    if spatial_root != ROOT_SPATIAL_NODE_INDEX {
+                                        return false;
+                                    }
+                                    current_clip_chain_id = clip_chain_node.parent_clip_chain_id;
+                                }
+                            }
+
+                            true
+                        }
+                        (curr_scroll_root, scroll_root) => {
+                            // Two scrolling roots - only need a new slice if they differ
+                            curr_scroll_root != scroll_root
+                        }
+                    }
+                }).unwrap_or(true);
+
+            if create_new_slice_for_scroll_root {
+                cluster_flags.insert(ClusterFlags::CREATE_PICTURE_CACHE_PRE);
+            }
+
+            // Create a new slice if required
+            if !cluster_flags.is_empty() {
+                slices.push(SliceInfo {
+                    cluster_index,
+                    scroll_root,
+                    cluster_flags,
+                });
+            }
+        }
+
+        // If the page would create too many slices (an arbitrary definition where
+        // it's assumed the GPU memory + compositing overhead would be too high)
+        // then just create a single picture cache for the entire content. This at
+        // least means that we can cache small content changes efficiently when
+        // scrolling isn't occurring. Scrolling regions will be handled reasonably
+        // efficiently by the dirty rect tracking (since it's likely that if the
+        // page has so many slices there isn't a single major scroll region).
+        const MAX_CONTENT_SLICES: usize = 8;
+
+        if slices.len() > MAX_CONTENT_SLICES {
+            if let Some(cluster) = self.prim_list.clusters.first_mut() {
+                content_slice_count = 1;
+                cluster.flags.insert(ClusterFlags::CREATE_PICTURE_CACHE_PRE);
+                cluster.cache_scroll_root = None;
+            }
+        } else {
+            // Walk the list of slices, setting appropriate flags on the clusters which are
+            // later used during setup_picture_caching.
+            for slice in slices.drain(..) {
+                content_slice_count += 1;
+                let cluster = &mut self.prim_list.clusters[slice.cluster_index];
+                // Mark that this cluster creates a picture cache slice
+                cluster.flags.insert(slice.cluster_flags);
+                cluster.cache_scroll_root = Some(slice.scroll_root);
+            }
+        }
+
+        // Always end the cache at the end of the stacking context, so that we don't
+        // cache anything from primitives outside this pipeline in the same slice.
+        if let Some(cluster) = self.prim_list.clusters.last_mut() {
+            cluster.flags.insert(ClusterFlags::CREATE_PICTURE_CACHE_POST);
+        }
+
+        content_slice_count
+    }
+
     /// Return true if the stacking context isn't needed.
     pub fn is_redundant(
-        sc_flags: StackingContextFlags,
         context_3d: &Picture3DContext<ExtendedPrimitiveInstance>,
         composite_ops: &CompositeOps,
-        blit_reason: BlitReason,
-        parent: Option<&FlattenedStackingContext>,
         prim_flags: PrimitiveFlags,
+        blit_reason: BlitReason,
+        requested_raster_space: RasterSpace,
+        parent: &FlattenedStackingContext,
     ) -> bool {
-        // If this is a backdrop or blend container, it's needed
-        if sc_flags.intersects(StackingContextFlags::IS_BACKDROP_ROOT | StackingContextFlags::IS_BLEND_CONTAINER) {
+        // Any 3d context is required
+        if let Picture3DContext::In { .. } = context_3d {
             return false;
         }
 
-        // Any 3d context is required
-        if let Picture3DContext::In { .. } = context_3d {
+        // If there are filters / mix-blend-mode
+        if !composite_ops.filters.is_empty() {
             return false;
         }
 
-        // If any filters are present that affect the output
-        if composite_ops.has_valid_filters() {
+        // If there are svg filters
+        if !composite_ops.filter_primitives.is_empty() {
             return false;
         }
 
         // We can skip mix-blend modes if they are the first primitive in a stacking context,
         // see pop_stacking_context for a full explanation.
-        if composite_ops.mix_blend_mode.is_some() {
-            if let Some(parent) = parent {
-                if !parent.prim_list.is_empty() {
-                    return false;
-                }
-            }
+        if composite_ops.mix_blend_mode.is_some() &&
+            !parent.prim_list.is_empty() {
+            return false;
         }
 
         // If backface visibility is explicitly set.
@@ -3845,11 +3772,21 @@ impl FlattenedStackingContext {
             return false;
         }
 
+        // If rasterization space is different
+        if requested_raster_space != parent.requested_raster_space {
+            return false;
+        }
+
         // If need to isolate in surface due to clipping / mix-blend-mode
         if !blit_reason.is_empty() {
             return false;
         }
 
+        // If this stacking context is a scrollbar, retain it so it can form a picture cache slice
+        if prim_flags.contains(PrimitiveFlags::IS_SCROLLBAR_CONTAINER) {
+            return false;
+        }
+
         // It is redundant!
         true
     }
@@ -3871,10 +3808,13 @@ impl FlattenedStackingContext {
             .init(PicturePrimitive::new_image(
                 composite_mode.clone(),
                 flat_items_context_3d,
+                None,
                 true,
                 self.prim_flags,
+                self.requested_raster_space,
                 mem::replace(&mut self.prim_list, PrimitiveList::empty()),
                 self.spatial_node_index,
+                None,
                 PictureOptions::default(),
             ))
         );
@@ -4043,11 +3983,108 @@ fn process_repeat_size(
     )
 }
 
-fn read_gradient_stops(stops: ItemRange<GradientStop>) -> Vec<GradientStopKey> {
-    stops.iter().map(|stop| {
-        GradientStopKey {
-            offset: stop.offset,
-            color: stop.color.into(),
+/// Given a PrimitiveList and scroll root, construct a tile cache primitive instance
+/// that wraps the primitive list.
+fn create_tile_cache(
+    slice: usize,
+    slice_flags: SliceFlags,
+    scroll_root: SpatialNodeIndex,
+    prim_list: PrimitiveList,
+    background_color: Option<ColorF>,
+    shared_clips: Vec<ClipInstance>,
+    interners: &mut Interners,
+    prim_store: &mut PrimitiveStore,
+    clip_store: &mut ClipStore,
+    picture_cache_spatial_nodes: &mut FastHashSet<SpatialNodeIndex>,
+    frame_builder_config: &FrameBuilderConfig,
+) -> PrimitiveInstance {
+    // Add this spatial node to the list to check for complex transforms
+    // at the start of a frame build.
+    picture_cache_spatial_nodes.insert(scroll_root);
+
+    // Now, create a picture with tile caching enabled that will hold all
+    // of the primitives selected as belonging to the main scroll root.
+    let pic_key = PictureKey::new(
+        Picture {
+            composite_mode_key: PictureCompositeKey::Identity,
+        },
+    );
+
+    let pic_data_handle = interners
+        .picture
+        .intern(&pic_key, || ());
+
+    // Build a clip-chain for the tile cache, that contains any of the shared clips
+    // we will apply when drawing the tiles. In all cases provided by Gecko, these
+    // are rectangle clips with a scale/offset transform only, and get handled as
+    // a simple local clip rect in the vertex shader. However, this should in theory
+    // also work with any complex clips, such as rounded rects and image masks, by
+    // producing a clip mask that is applied to the picture cache tiles.
+    let mut parent_clip_chain_id = ClipChainId::NONE;
+    for clip_instance in &shared_clips {
+        // Add this spatial node to the list to check for complex transforms
+        // at the start of a frame build.
+        picture_cache_spatial_nodes.insert(clip_instance.spatial_node_index);
+
+        parent_clip_chain_id = clip_store.add_clip_chain_node(
+            clip_instance.handle,
+            clip_instance.spatial_node_index,
+            parent_clip_chain_id,
+        );
+    }
+
+    let tile_cache = Box::new(TileCacheInstance::new(
+        slice,
+        slice_flags,
+        scroll_root,
+        background_color,
+        shared_clips,
+        parent_clip_chain_id,
+        frame_builder_config,
+    ));
+
+    let pic_index = prim_store.pictures.alloc().init(PicturePrimitive::new_image(
+        Some(PictureCompositeMode::TileCache { }),
+        Picture3DContext::Out,
+        None,
+        true,
+        PrimitiveFlags::IS_BACKFACE_VISIBLE,
+        RasterSpace::Screen,
+        prim_list,
+        scroll_root,
+        Some(tile_cache),
+        PictureOptions::default(),
+    ));
+
+    PrimitiveInstance::new(
+        LayoutRect::max_rect(),
+        PrimitiveInstanceKind::Picture {
+            data_handle: pic_data_handle,
+            pic_index: PictureIndex(pic_index),
+            segment_instance_index: SegmentInstanceIndex::INVALID,
+        },
+        parent_clip_chain_id,
+    )
+}
+
+// Helper fn to collect clip handles from a given clip chain.
+fn add_clips(
+    clip_chain_id: ClipChainId,
+    prim_clips: &mut Vec<ClipInstance>,
+    clip_store: &ClipStore,
+    interners: &Interners,
+) {
+    let mut current_clip_chain_id = clip_chain_id;
+
+    while current_clip_chain_id != ClipChainId::NONE {
+        let clip_chain_node = &clip_store
+            .clip_chain_nodes[current_clip_chain_id.0 as usize];
+
+        let clip_node_data = &interners.clip[clip_chain_node.handle];
+        if let ClipNodeKind::Rectangle = clip_node_data.clip_node_kind {
+            prim_clips.push(ClipInstance::new(clip_chain_node.handle, clip_chain_node.spatial_node_index));
         }
-    }).collect()
+
+        current_clip_chain_id = clip_chain_node.parent_clip_chain_id;
+    }
 }
diff --git a/third_party/webrender/webrender/src/screen_capture.rs b/third_party/webrender/webrender/src/screen_capture.rs
index 9da2db2397e..56fdf458674 100644
--- a/third_party/webrender/webrender/src/screen_capture.rs
+++ b/third_party/webrender/webrender/src/screen_capture.rs
@@ -6,7 +6,7 @@
 
 use std::collections::HashMap;
 
-use api::{ImageFormat, ImageBufferKind};
+use api::{ImageFormat, TextureTarget};
 use api::units::*;
 use gleam::gl::GlType;
 
@@ -115,9 +115,6 @@ impl AsyncScreenshotGrabber {
     ) -> (AsyncScreenshotHandle, DeviceIntSize) {
         let screenshot_size = match self.mode {
             AsyncScreenshotGrabberMode::ProfilerScreenshots => {
-                assert_ne!(window_rect.size.width, 0);
-                assert_ne!(window_rect.size.height, 0);
-
                 let scale = (buffer_size.width as f32 / window_rect.size.width as f32)
                     .min(buffer_size.height as f32 / window_rect.size.height as f32);
 
@@ -144,7 +141,7 @@ impl AsyncScreenshotGrabber {
         let read_size = match self.mode {
             AsyncScreenshotGrabberMode::ProfilerScreenshots => {
                 let stride = (screenshot_size.width * image_format.bytes_per_pixel()) as usize;
-                let rounded = round_up_to_multiple(stride, device.required_pbo_stride().num_bytes(image_format));
+                let rounded = round_up_to_multiple(stride, device.optimal_pbo_stride().num_bytes(image_format));
                 let optimal_width = rounded as i32 / image_format.bytes_per_pixel();
 
                 DeviceIntSize::new(
@@ -185,7 +182,7 @@ impl AsyncScreenshotGrabber {
                     0,
                 );
 
-                ReadTarget::from_texture(&self.scaling_textures[0])
+                ReadTarget::from_texture(&self.scaling_textures[0], 0)
             }
 
             AsyncScreenshotGrabberMode::CompositionRecorder => ReadTarget::Default,
@@ -251,12 +248,13 @@ impl AsyncScreenshotGrabber {
         // texture is the wrong size, then create a new one.
         if level == self.scaling_textures.len() || self.scaling_textures[level].get_dimensions() != texture_size {
             let texture = device.create_texture(
-                ImageBufferKind::Texture2D,
+                TextureTarget::Default,
                 image_format,
                 texture_size.width,
                 texture_size.height,
                 TextureFilter::Linear,
                 Some(RenderTargetInfo { has_depth: false }),
+                1,
             );
             if level == self.scaling_textures.len() {
                 self.scaling_textures.push(texture);
@@ -280,14 +278,14 @@ impl AsyncScreenshotGrabber {
             );
 
             (
-                ReadTarget::from_texture(&self.scaling_textures[level + 1]),
+                ReadTarget::from_texture(&self.scaling_textures[level + 1], 0),
                 DeviceIntRect::new(DeviceIntPoint::new(0, 0), dest_size * 2),
             )
         } else {
             (read_target, read_target_rect)
         };
 
-        let draw_target = DrawTarget::from_texture(&self.scaling_textures[level], false);
+        let draw_target = DrawTarget::from_texture(&self.scaling_textures[level], 0 as _, false);
 
         let draw_target_rect = draw_target
             .to_framebuffer_rect(DeviceIntRect::new(DeviceIntPoint::new(0, 0), dest_size));
diff --git a/third_party/webrender/webrender/src/segment.rs b/third_party/webrender/webrender/src/segment.rs
index 84d19cb4fc1..77bca528215 100644
--- a/third_party/webrender/webrender/src/segment.rs
+++ b/third_party/webrender/webrender/src/segment.rs
@@ -49,35 +49,13 @@
 //! [clip.rs]: ../clip/index.html
 //!
 
-use api::{BorderRadius, ClipMode};
+use api::{BorderRadius, ClipMode, EdgeAaSegmentMask};
 use api::units::*;
 use std::{cmp, usize};
-use crate::util::{extract_inner_rect_safe};
+use crate::util::{extract_inner_rect_safe, RectHelpers};
 use smallvec::SmallVec;
 
 bitflags! {
-    /// Each bit of the edge AA mask is:
-    /// 0, when the edge of the primitive needs to be considered for AA
-    /// 1, when the edge of the segment needs to be considered for AA
-    ///
-    /// *Note*: the bit values have to match the shader logic in
-    /// `write_transform_vertex()` function.
-    #[cfg_attr(feature = "capture", derive(Serialize))]
-    #[cfg_attr(feature = "replay", derive(Deserialize))]
-    #[derive(MallocSizeOf)]
-    pub struct EdgeAaSegmentMask: u8 {
-        ///
-        const LEFT = 0x1;
-        ///
-        const TOP = 0x2;
-        ///
-        const RIGHT = 0x4;
-        ///
-        const BOTTOM = 0x8;
-    }
-}
-
-bitflags! {
     pub struct ItemFlags: u8 {
         const X_ACTIVE = 0x1;
         const Y_ACTIVE = 0x2;
@@ -302,7 +280,7 @@ impl SegmentBuilder {
     ) {
         self.has_interesting_clips = true;
 
-        if inner_rect.is_empty() {
+        if !inner_rect.is_well_formed_and_nonempty() {
             self.items.push(Item::new(
                 outer_rect,
                 None,
@@ -353,8 +331,6 @@ impl SegmentBuilder {
             ),
         ];
 
-        self.items.reserve(segments.len() + 1);
-
         for segment in segments {
             self.items.push(Item::new(
                 *segment,
@@ -402,8 +378,6 @@ impl SegmentBuilder {
                         let p2 = inner.bottom_right();
                         let p3 = rect.bottom_right();
 
-                        self.items.reserve(9);
-
                         let corner_segments = &[
                             LayoutRect::new(
                                 LayoutPoint::new(p0.x, p0.y),
@@ -705,9 +679,9 @@ fn emit_segment_if_needed(
 
 #[cfg(test)]
 mod test {
-    use api::{BorderRadius, ClipMode};
+    use api::{BorderRadius, ClipMode, EdgeAaSegmentMask};
     use api::units::{LayoutPoint, LayoutRect, LayoutSize};
-    use super::{Segment, SegmentBuilder, EdgeAaSegmentMask};
+    use super::{Segment, SegmentBuilder};
     use std::cmp;
 
     fn rect(x0: f32, y0: f32, x1: f32, y1: f32) -> LayoutRect {
diff --git a/third_party/webrender/webrender/src/renderer/shade.rs b/third_party/webrender/webrender/src/shade.rs
index 1f7d5cb444f..cce58259015 100644
--- a/third_party/webrender/webrender/src/renderer/shade.rs
+++ b/third_party/webrender/webrender/src/shade.rs
@@ -2,15 +2,14 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{ImageBufferKind, units::DeviceSize};
 use crate::batch::{BatchKey, BatchKind, BrushBatchKind, BatchFeatures};
-use crate::composite::{CompositeFeatures, CompositeSurfaceFormat};
+use crate::composite::CompositeSurfaceFormat;
 use crate::device::{Device, Program, ShaderError};
 use euclid::default::Transform3D;
 use crate::glyph_rasterizer::GlyphFormat;
 use crate::renderer::{
     desc,
-    BlendMode, DebugFlags, RendererError, RendererOptions,
+    BlendMode, DebugFlags, ImageBufferKind, RendererError, RendererOptions,
     TextureSampler, VertexArrayKind, ShaderPrecacheFlags,
 };
 
@@ -22,39 +21,33 @@ use std::rc::Rc;
 
 use webrender_build::shader::{ShaderFeatures, ShaderFeatureFlags, get_shader_features};
 
-/// Which extension version to use for texture external support.
-#[derive(Clone, Copy, Debug, PartialEq)]
-enum TextureExternalVersion {
-    // GL_OES_EGL_image_external_essl3 (Compatible with ESSL 3.0 and
-    // later shaders, but not supported on all GLES 3 devices.)
-    ESSL3,
-    // GL_OES_EGL_image_external (Compatible with ESSL 1.0 shaders)
-    ESSL1,
-}
-
-fn get_feature_string(kind: ImageBufferKind, texture_external_version: TextureExternalVersion) -> &'static str {
-    match (kind, texture_external_version) {
-        (ImageBufferKind::Texture2D, _) => "TEXTURE_2D",
-        (ImageBufferKind::TextureRect, _) => "TEXTURE_RECT",
-        (ImageBufferKind::TextureExternal, TextureExternalVersion::ESSL3) => "TEXTURE_EXTERNAL",
-        (ImageBufferKind::TextureExternal, TextureExternalVersion::ESSL1) => "TEXTURE_EXTERNAL_ESSL1",
+impl ImageBufferKind {
+    pub(crate) fn get_feature_string(&self) -> &'static str {
+        match *self {
+            ImageBufferKind::Texture2D => "TEXTURE_2D",
+            ImageBufferKind::Texture2DArray => "",
+            ImageBufferKind::TextureRect => "TEXTURE_RECT",
+            ImageBufferKind::TextureExternal => "TEXTURE_EXTERNAL",
+        }
     }
-}
 
-fn has_platform_support(kind: ImageBufferKind, gl_type: &GlType) -> bool {
-    match (kind, gl_type) {
-        (ImageBufferKind::Texture2D, _) => true,
-        (ImageBufferKind::TextureRect, &GlType::Gles) => false,
-        (ImageBufferKind::TextureRect, &GlType::Gl) => true,
-        (ImageBufferKind::TextureExternal, &GlType::Gles) => true,
-        (ImageBufferKind::TextureExternal, &GlType::Gl) => false,
+    fn has_platform_support(&self, gl_type: &GlType) -> bool {
+        match (*self, gl_type) {
+            (ImageBufferKind::Texture2D, _) => true,
+            (ImageBufferKind::Texture2DArray, _) => true,
+            (ImageBufferKind::TextureRect, &GlType::Gles) => false,
+            (ImageBufferKind::TextureRect, &GlType::Gl) => true,
+            (ImageBufferKind::TextureExternal, &GlType::Gles) => true,
+            (ImageBufferKind::TextureExternal, &GlType::Gl) => false,
+        }
     }
 }
 
-pub const IMAGE_BUFFER_KINDS: [ImageBufferKind; 3] = [
+pub const IMAGE_BUFFER_KINDS: [ImageBufferKind; 4] = [
     ImageBufferKind::Texture2D,
     ImageBufferKind::TextureRect,
     ImageBufferKind::TextureExternal,
+    ImageBufferKind::Texture2DArray,
 ];
 
 const ADVANCED_BLEND_FEATURE: &str = "ADVANCED_BLEND";
@@ -63,18 +56,18 @@ const DEBUG_OVERDRAW_FEATURE: &str = "DEBUG_OVERDRAW";
 const DITHERING_FEATURE: &str = "DITHERING";
 const DUAL_SOURCE_FEATURE: &str = "DUAL_SOURCE_BLENDING";
 const FAST_PATH_FEATURE: &str = "FAST_PATH";
+const PIXEL_LOCAL_STORAGE_FEATURE: &str = "PIXEL_LOCAL_STORAGE";
 
 pub(crate) enum ShaderKind {
     Primitive,
     Cache(VertexArrayKind),
-    ClipCache(VertexArrayKind),
+    ClipCache,
     Brush,
     Text,
     #[allow(dead_code)]
     VectorStencil,
     #[allow(dead_code)]
     VectorCover,
-    #[allow(dead_code)]
     Resolve,
     Composite,
     Clear,
@@ -92,13 +85,20 @@ impl LazilyCompiledShader {
     pub(crate) fn new(
         kind: ShaderKind,
         name: &'static str,
-        unsorted_features: &[&'static str],
+        features: &[&'static str],
         device: &mut Device,
         precache_flags: ShaderPrecacheFlags,
         shader_list: &ShaderFeatures,
     ) -> Result<Self, ShaderError> {
-        let mut features = unsorted_features.to_vec();
-        features.sort();
+        let mut shader = LazilyCompiledShader {
+            program: None,
+            name,
+            kind,
+            //Note: this isn't really the default state, but there is no chance
+            // an actual projection passed here would accidentally match.
+            cached_projection: Transform3D::identity(),
+            features: features.to_vec(),
+        };
 
         // Ensure this shader config is in the available shader list so that we get
         // alerted if the list gets out-of-date when shaders or features are added.
@@ -110,16 +110,6 @@ impl LazilyCompiledShader {
             config,
         );
 
-        let mut shader = LazilyCompiledShader {
-            program: None,
-            name,
-            kind,
-            //Note: this isn't really the default state, but there is no chance
-            // an actual projection passed here would accidentally match.
-            cached_projection: Transform3D::identity(),
-            features,
-        };
-
         if precache_flags.intersects(ShaderPrecacheFlags::ASYNC_COMPILE | ShaderPrecacheFlags::FULL_COMPILE) {
             let t0 = precise_time_ns();
             shader.get_internal(device, precache_flags)?;
@@ -127,7 +117,7 @@ impl LazilyCompiledShader {
             debug!("[C: {:.1} ms ] Precache {} {:?}",
                 (t1 - t0) as f64 / 1000000.0,
                 name,
-                unsorted_features
+                features
             );
         }
 
@@ -138,7 +128,6 @@ impl LazilyCompiledShader {
         &mut self,
         device: &mut Device,
         projection: &Transform3D<f32>,
-        texture_size: Option<DeviceSize>,
         renderer_errors: &mut Vec<RendererError>,
     ) {
         let update_projection = self.cached_projection != *projection;
@@ -150,9 +139,6 @@ impl LazilyCompiledShader {
             }
         };
         device.bind_program(program);
-        if let Some(texture_size) = texture_size {
-            device.set_shader_texture_size(program, texture_size);
-        }
         if update_projection {
             device.set_uniforms(program, projection);
             // thanks NLL for this (`program` technically borrows `self`)
@@ -202,7 +188,7 @@ impl LazilyCompiledShader {
                         &self.features,
                     )
                 }
-                ShaderKind::ClipCache(..) => {
+                ShaderKind::ClipCache => {
                     create_clip_shader(
                         self.name,
                         device,
@@ -223,7 +209,7 @@ impl LazilyCompiledShader {
                 ShaderKind::Cache(format) => format,
                 ShaderKind::VectorStencil => VertexArrayKind::VectorStencil,
                 ShaderKind::VectorCover => VertexArrayKind::VectorCover,
-                ShaderKind::ClipCache(format) => format,
+                ShaderKind::ClipCache => VertexArrayKind::Clip,
                 ShaderKind::Resolve => VertexArrayKind::Resolve,
                 ShaderKind::Composite => VertexArrayKind::Composite,
                 ShaderKind::Clear => VertexArrayKind::Clear,
@@ -232,14 +218,9 @@ impl LazilyCompiledShader {
             let vertex_descriptor = match vertex_format {
                 VertexArrayKind::Primitive => &desc::PRIM_INSTANCES,
                 VertexArrayKind::LineDecoration => &desc::LINE,
-                VertexArrayKind::FastLinearGradient => &desc::FAST_LINEAR_GRADIENT,
-                VertexArrayKind::LinearGradient => &desc::LINEAR_GRADIENT,
-                VertexArrayKind::RadialGradient => &desc::RADIAL_GRADIENT,
-                VertexArrayKind::ConicGradient => &desc::CONIC_GRADIENT,
+                VertexArrayKind::Gradient => &desc::GRADIENT,
                 VertexArrayKind::Blur => &desc::BLUR,
-                VertexArrayKind::ClipImage => &desc::CLIP_IMAGE,
-                VertexArrayKind::ClipRect => &desc::CLIP_RECT,
-                VertexArrayKind::ClipBoxShadow => &desc::CLIP_BOX_SHADOW,
+                VertexArrayKind::Clip => &desc::CLIP,
                 VertexArrayKind::VectorStencil => &desc::VECTOR_STENCIL,
                 VertexArrayKind::VectorCover => &desc::VECTOR_COVER,
                 VertexArrayKind::Border => &desc::BORDER,
@@ -253,7 +234,7 @@ impl LazilyCompiledShader {
             device.link_program(program, vertex_descriptor)?;
             device.bind_program(program);
             match self.kind {
-                ShaderKind::ClipCache(..) => {
+                ShaderKind::ClipCache => {
                     device.bind_shader_samplers(
                         &program,
                         &[
@@ -274,12 +255,13 @@ impl LazilyCompiledShader {
                             ("sColor1", TextureSampler::Color1),
                             ("sColor2", TextureSampler::Color2),
                             ("sDither", TextureSampler::Dither),
+                            ("sPrevPassAlpha", TextureSampler::PrevPassAlpha),
+                            ("sPrevPassColor", TextureSampler::PrevPassColor),
                             ("sTransformPalette", TextureSampler::TransformPalette),
                             ("sRenderTasks", TextureSampler::RenderTasks),
                             ("sGpuCache", TextureSampler::GpuCache),
                             ("sPrimitiveHeadersF", TextureSampler::PrimitiveHeadersF),
                             ("sPrimitiveHeadersI", TextureSampler::PrimitiveHeadersI),
-                            ("sClipMask", TextureSampler::ClipMask),
                         ],
                     );
                 }
@@ -324,19 +306,22 @@ impl BrushShader {
         shader_list: &ShaderFeatures,
         use_advanced_blend: bool,
         use_dual_source: bool,
+        use_pixel_local_storage: bool,
     ) -> Result<Self, ShaderError> {
-        let opaque_features = features.to_vec();
         let opaque = LazilyCompiledShader::new(
             ShaderKind::Brush,
             name,
-            &opaque_features,
+            features,
             device,
             precache_flags,
             &shader_list,
         )?;
 
-        let mut alpha_features = opaque_features.to_vec();
+        let mut alpha_features = features.to_vec();
         alpha_features.push(ALPHA_FEATURE);
+        if use_pixel_local_storage {
+            alpha_features.push(PIXEL_LOCAL_STORAGE_FEATURE);
+        }
 
         let alpha = LazilyCompiledShader::new(
             ShaderKind::Brush,
@@ -404,7 +389,7 @@ impl BrushShader {
         })
     }
 
-    fn get(&mut self, blend_mode: BlendMode, features: BatchFeatures, debug_flags: DebugFlags)
+    fn get(&mut self, blend_mode: BlendMode, debug_flags: DebugFlags)
            -> &mut LazilyCompiledShader {
         match blend_mode {
             _ if debug_flags.contains(DebugFlags::SHOW_OVERDRAW) => &mut self.debug_overdraw,
@@ -413,22 +398,13 @@ impl BrushShader {
             BlendMode::PremultipliedAlpha |
             BlendMode::PremultipliedDestOut |
             BlendMode::SubpixelConstantTextColor(..) |
-            BlendMode::SubpixelWithBgColor |
-            BlendMode::Screen |
-            BlendMode::Exclusion => {
-                if features.contains(BatchFeatures::ALPHA_PASS) {
-                    &mut self.alpha
-                } else {
-                    &mut self.opaque
-                }
-            }
+            BlendMode::SubpixelWithBgColor => &mut self.alpha,
             BlendMode::Advanced(_) => {
                 self.advanced_blend
                     .as_mut()
                     .expect("bug: no advanced blend shader loaded")
             }
-            BlendMode::SubpixelDualSource |
-            BlendMode::MultiplyDualSource => {
+            BlendMode::SubpixelDualSource => {
                 self.dual_source
                     .as_mut()
                     .expect("bug: no dual source shader loaded")
@@ -465,7 +441,6 @@ impl TextShader {
     ) -> Result<Self, ShaderError> {
         let mut simple_features = features.to_vec();
         simple_features.push("ALPHA_PASS");
-        simple_features.push("TEXTURE_2D");
 
         let simple = LazilyCompiledShader::new(
             ShaderKind::Text,
@@ -479,7 +454,6 @@ impl TextShader {
         let mut glyph_transform_features = features.to_vec();
         glyph_transform_features.push("GLYPH_TRANSFORM");
         glyph_transform_features.push("ALPHA_PASS");
-        glyph_transform_features.push("TEXTURE_2D");
 
         let glyph_transform = LazilyCompiledShader::new(
             ShaderKind::Text,
@@ -492,7 +466,6 @@ impl TextShader {
 
         let mut debug_overdraw_features = features.to_vec();
         debug_overdraw_features.push("DEBUG_OVERDRAW");
-        debug_overdraw_features.push("TEXTURE_2D");
 
         let debug_overdraw = LazilyCompiledShader::new(
             ShaderKind::Text,
@@ -559,12 +532,9 @@ pub struct Shaders {
     pub cs_blur_rgba8: LazilyCompiledShader,
     pub cs_border_segment: LazilyCompiledShader,
     pub cs_border_solid: LazilyCompiledShader,
-    pub cs_scale: Vec<Option<LazilyCompiledShader>>,
+    pub cs_scale: LazilyCompiledShader,
     pub cs_line_decoration: LazilyCompiledShader,
-    pub cs_fast_linear_gradient: LazilyCompiledShader,
-    pub cs_linear_gradient: LazilyCompiledShader,
-    pub cs_radial_gradient: LazilyCompiledShader,
-    pub cs_conic_gradient: LazilyCompiledShader,
+    pub cs_gradient: LazilyCompiledShader,
     pub cs_svg_filter: LazilyCompiledShader,
 
     // Brush shaders
@@ -574,9 +544,10 @@ pub struct Shaders {
     brush_blend: BrushShader,
     brush_mix_blend: BrushShader,
     brush_yuv_image: Vec<Option<BrushShader>>,
+    brush_conic_gradient: BrushShader,
+    brush_radial_gradient: BrushShader,
     brush_linear_gradient: BrushShader,
     brush_opacity: BrushShader,
-    brush_opacity_aa: BrushShader,
 
     /// These are "cache clip shaders". These shaders are used to
     /// draw clip instances into the cached clip mask. The results
@@ -596,6 +567,12 @@ pub struct Shaders {
     pub ps_text_run: TextShader,
     pub ps_text_run_dual_source: Option<TextShader>,
 
+    // Helper shaders for pixel local storage render paths.
+    // pls_init: Initialize pixel local storage, based on current framebuffer value.
+    // pls_resolve: Convert pixel local storage, writing out to fragment value.
+    pub pls_init: Option<LazilyCompiledShader>,
+    pub pls_resolve: Option<LazilyCompiledShader>,
+
     ps_split_composite: LazilyCompiledShader,
     pub ps_clear: LazilyCompiledShader,
 
@@ -610,9 +587,6 @@ pub struct Shaders {
     // shaders with WR_FEATURE flags on or off based on the type of image
     // buffer we're sourcing from (see IMAGE_BUFFER_KINDS).
     pub composite_rgba: Vec<Option<LazilyCompiledShader>>,
-    // A faster set of rgba composite shaders that do not support UV clamping
-    // or color modulation.
-    pub composite_rgba_fast_path: Vec<Option<LazilyCompiledShader>>,
     // The same set of composite shaders but with WR_FEATURE_YUV added.
     pub composite_yuv: Vec<Option<LazilyCompiledShader>>,
 }
@@ -623,28 +597,25 @@ impl Shaders {
         gl_type: GlType,
         options: &RendererOptions,
     ) -> Result<Self, ShaderError> {
+        let use_pixel_local_storage = device
+            .get_capabilities()
+            .supports_pixel_local_storage;
+        // If using PLS, we disable all subpixel AA implicitly. Subpixel AA is always
+        // disabled on mobile devices anyway, due to uncertainty over the subpixel
+        // layout configuration.
         let use_dual_source_blending =
             device.get_capabilities().supports_dual_source_blending &&
-            options.allow_dual_source_blending;
+            options.allow_dual_source_blending &&
+            !use_pixel_local_storage;
         let use_advanced_blend_equation =
             device.get_capabilities().supports_advanced_blend_equation &&
             options.allow_advanced_blend_equation;
 
-        let texture_external_version = if device.get_capabilities().supports_image_external_essl3 {
-            TextureExternalVersion::ESSL3
-        } else {
-            TextureExternalVersion::ESSL1
-        };
         let mut shader_flags = match gl_type {
             GlType::Gl => ShaderFeatureFlags::GL,
-            GlType::Gles => {
-                let texture_external_flag = match texture_external_version {
-                    TextureExternalVersion::ESSL3 => ShaderFeatureFlags::TEXTURE_EXTERNAL,
-                    TextureExternalVersion::ESSL1 => ShaderFeatureFlags::TEXTURE_EXTERNAL_ESSL1,
-                };
-                ShaderFeatureFlags::GLES | texture_external_flag
-            }
+            GlType::Gles => ShaderFeatureFlags::GLES | ShaderFeatureFlags::TEXTURE_EXTERNAL,
         };
+        shader_flags.set(ShaderFeatureFlags::PIXEL_LOCAL_STORAGE, use_pixel_local_storage);
         shader_flags.set(ShaderFeatureFlags::ADVANCED_BLEND_EQUATION, use_advanced_blend_equation);
         shader_flags.set(ShaderFeatureFlags::DUAL_SOURCE_BLENDING, use_dual_source_blending);
         shader_flags.set(ShaderFeatureFlags::DITHERING, options.enable_dithering);
@@ -658,6 +629,7 @@ impl Shaders {
             &shader_list,
             false /* advanced blend */,
             false /* dual source */,
+            use_pixel_local_storage,
         )?;
 
         let brush_blend = BrushShader::new(
@@ -668,6 +640,7 @@ impl Shaders {
             &shader_list,
             false /* advanced blend */,
             false /* dual source */,
+            use_pixel_local_storage,
         )?;
 
         let brush_mix_blend = BrushShader::new(
@@ -678,10 +651,11 @@ impl Shaders {
             &shader_list,
             false /* advanced blend */,
             false /* dual source */,
+            use_pixel_local_storage,
         )?;
 
-        let brush_linear_gradient = BrushShader::new(
-            "brush_linear_gradient",
+        let brush_conic_gradient = BrushShader::new(
+            "brush_conic_gradient",
             device,
             if options.enable_dithering {
                &[DITHERING_FEATURE]
@@ -692,16 +666,37 @@ impl Shaders {
             &shader_list,
             false /* advanced blend */,
             false /* dual source */,
+            use_pixel_local_storage,
         )?;
 
-        let brush_opacity_aa = BrushShader::new(
-            "brush_opacity",
+        let brush_radial_gradient = BrushShader::new(
+            "brush_radial_gradient",
+            device,
+            if options.enable_dithering {
+               &[DITHERING_FEATURE]
+            } else {
+               &[]
+            },
+            options.precache_flags,
+            &shader_list,
+            false /* advanced blend */,
+            false /* dual source */,
+            use_pixel_local_storage,
+        )?;
+
+        let brush_linear_gradient = BrushShader::new(
+            "brush_linear_gradient",
             device,
-            &["ANTIALIASING"],
+            if options.enable_dithering {
+               &[DITHERING_FEATURE]
+            } else {
+               &[]
+            },
             options.precache_flags,
             &shader_list,
             false /* advanced blend */,
             false /* dual source */,
+            use_pixel_local_storage,
         )?;
 
         let brush_opacity = BrushShader::new(
@@ -712,6 +707,7 @@ impl Shaders {
             &shader_list,
             false /* advanced blend */,
             false /* dual source */,
+            use_pixel_local_storage,
         )?;
 
         let cs_blur_a8 = LazilyCompiledShader::new(
@@ -742,7 +738,7 @@ impl Shaders {
         )?;
 
         let cs_clip_rectangle_slow = LazilyCompiledShader::new(
-            ShaderKind::ClipCache(VertexArrayKind::ClipRect),
+            ShaderKind::ClipCache,
             "cs_clip_rectangle",
             &[],
             device,
@@ -751,7 +747,7 @@ impl Shaders {
         )?;
 
         let cs_clip_rectangle_fast = LazilyCompiledShader::new(
-            ShaderKind::ClipCache(VertexArrayKind::ClipRect),
+            ShaderKind::ClipCache,
             "cs_clip_rectangle",
             &[FAST_PATH_FEATURE],
             device,
@@ -760,73 +756,77 @@ impl Shaders {
         )?;
 
         let cs_clip_box_shadow = LazilyCompiledShader::new(
-            ShaderKind::ClipCache(VertexArrayKind::ClipBoxShadow),
+            ShaderKind::ClipCache,
             "cs_clip_box_shadow",
-            &["TEXTURE_2D"],
+            &[],
             device,
             options.precache_flags,
             &shader_list,
         )?;
 
         let cs_clip_image = LazilyCompiledShader::new(
-            ShaderKind::ClipCache(VertexArrayKind::ClipImage),
+            ShaderKind::ClipCache,
             "cs_clip_image",
-            &["TEXTURE_2D"],
+            &[],
             device,
             options.precache_flags,
             &shader_list,
         )?;
 
-        let mut cs_scale = Vec::new();
-        let scale_shader_num = IMAGE_BUFFER_KINDS.len();
-        // PrimitiveShader is not clonable. Use push() to initialize the vec.
-        for _ in 0 .. scale_shader_num {
-            cs_scale.push(None);
-        }
-        for image_buffer_kind in &IMAGE_BUFFER_KINDS {
-            if has_platform_support(*image_buffer_kind, &gl_type) {
-                let feature_string = get_feature_string(
-                    *image_buffer_kind,
-                    texture_external_version,
-                );
-
-                let mut features = Vec::new();
-                if feature_string != "" {
-                    features.push(feature_string);
-                }
+        let pls_init = if use_pixel_local_storage {
+            Some(LazilyCompiledShader::new(
+                ShaderKind::Resolve,
+                "pls_init",
+                &[PIXEL_LOCAL_STORAGE_FEATURE],
+                device,
+                options.precache_flags,
+                &shader_list,
+            )?)
+        } else {
+            None
+        };
 
-                let shader = LazilyCompiledShader::new(
-                    ShaderKind::Cache(VertexArrayKind::Scale),
-                    "cs_scale",
-                    &features,
-                    device,
-                    options.precache_flags,
-                    &shader_list,
-                 )?;
+        let pls_resolve = if use_pixel_local_storage {
+            Some(LazilyCompiledShader::new(
+                ShaderKind::Resolve,
+                "pls_resolve",
+                &[PIXEL_LOCAL_STORAGE_FEATURE],
+                device,
+                options.precache_flags,
+                &shader_list,
+            )?)
+        } else {
+            None
+        };
 
-                 let index = Self::get_compositing_shader_index(
-                    *image_buffer_kind,
-                 );
-                 cs_scale[index] = Some(shader);
-            }
-        }
+        let cs_scale = LazilyCompiledShader::new(
+            ShaderKind::Cache(VertexArrayKind::Scale),
+            "cs_scale",
+            &[],
+            device,
+            options.precache_flags,
+            &shader_list,
+        )?;
 
         // TODO(gw): The split composite + text shader are special cases - the only
         //           shaders used during normal scene rendering that aren't a brush
         //           shader. Perhaps we can unify these in future?
+        let mut extra_features = Vec::new();
+        if use_pixel_local_storage {
+            extra_features.push(PIXEL_LOCAL_STORAGE_FEATURE);
+        }
 
         let ps_text_run = TextShader::new("ps_text_run",
             device,
-            &[],
+            &extra_features,
             options.precache_flags,
             &shader_list,
         )?;
 
         let ps_text_run_dual_source = if use_dual_source_blending {
-            let dual_source_features = vec![DUAL_SOURCE_FEATURE];
             Some(TextShader::new("ps_text_run",
                 device,
-                &dual_source_features,
+                &[DUAL_SOURCE_FEATURE],
                 options.precache_flags,
                 &shader_list,
             )?)
@@ -837,7 +837,7 @@ impl Shaders {
         let ps_split_composite = LazilyCompiledShader::new(
             ShaderKind::Primitive,
             "ps_split_composite",
-            &[],
+            &extra_features,
             device,
             options.precache_flags,
             &shader_list,
@@ -846,7 +846,7 @@ impl Shaders {
         let ps_clear = LazilyCompiledShader::new(
             ShaderKind::Clear,
             "ps_clear",
-            &[],
+            &extra_features,
             device,
             options.precache_flags,
             &shader_list,
@@ -862,18 +862,11 @@ impl Shaders {
             brush_fast_image.push(None);
         }
         for buffer_kind in 0 .. IMAGE_BUFFER_KINDS.len() {
-            if !has_platform_support(IMAGE_BUFFER_KINDS[buffer_kind], &gl_type)
-                // Brush shaders are not ESSL1 compatible
-                || (IMAGE_BUFFER_KINDS[buffer_kind] == ImageBufferKind::TextureExternal
-                    && texture_external_version == TextureExternalVersion::ESSL1)
-            {
+            if !IMAGE_BUFFER_KINDS[buffer_kind].has_platform_support(&gl_type) {
                 continue;
             }
 
-            let feature_string = get_feature_string(
-                IMAGE_BUFFER_KINDS[buffer_kind],
-                texture_external_version,
-            );
+            let feature_string = IMAGE_BUFFER_KINDS[buffer_kind].get_feature_string();
             if feature_string != "" {
                 image_features.push(feature_string);
             }
@@ -886,6 +879,7 @@ impl Shaders {
                 &shader_list,
                 use_advanced_blend_equation,
                 use_dual_source_blending,
+                use_pixel_local_storage,
             )?);
 
             image_features.push("REPETITION");
@@ -899,6 +893,7 @@ impl Shaders {
                 &shader_list,
                 use_advanced_blend_equation,
                 use_dual_source_blending,
+                use_pixel_local_storage,
             )?);
 
             image_features.clear();
@@ -907,76 +902,50 @@ impl Shaders {
         // All yuv_image configuration.
         let mut yuv_features = Vec::new();
         let mut rgba_features = Vec::new();
-        let mut fast_path_features = Vec::new();
         let yuv_shader_num = IMAGE_BUFFER_KINDS.len();
         let mut brush_yuv_image = Vec::new();
         let mut composite_yuv = Vec::new();
         let mut composite_rgba = Vec::new();
-        let mut composite_rgba_fast_path = Vec::new();
         // PrimitiveShader is not clonable. Use push() to initialize the vec.
         for _ in 0 .. yuv_shader_num {
             brush_yuv_image.push(None);
             composite_yuv.push(None);
             composite_rgba.push(None);
-            composite_rgba_fast_path.push(None);
         }
         for image_buffer_kind in &IMAGE_BUFFER_KINDS {
-            if has_platform_support(*image_buffer_kind, &gl_type) {
+            if image_buffer_kind.has_platform_support(&gl_type) {
                 yuv_features.push("YUV");
-                fast_path_features.push("FAST_PATH");
 
-                let index = Self::get_compositing_shader_index(
-                    *image_buffer_kind,
-                );
-
-                let feature_string = get_feature_string(
-                    *image_buffer_kind,
-                    texture_external_version,
-                );
+                let feature_string = image_buffer_kind.get_feature_string();
                 if feature_string != "" {
                     yuv_features.push(feature_string);
                     rgba_features.push(feature_string);
-                    fast_path_features.push(feature_string);
                 }
 
-                // YUV shaders are not compatible with ESSL1
-                if *image_buffer_kind != ImageBufferKind::TextureExternal ||
-                    texture_external_version == TextureExternalVersion::ESSL3 {
-                    let brush_shader = BrushShader::new(
-                        "brush_yuv_image",
-                        device,
-                        &yuv_features,
-                        options.precache_flags,
-                        &shader_list,
-                        false /* advanced blend */,
-                        false /* dual source */,
-                    )?;
-                    brush_yuv_image[index] = Some(brush_shader);
-
-                    let composite_yuv_shader = LazilyCompiledShader::new(
-                        ShaderKind::Composite,
-                        "composite",
-                        &yuv_features,
-                        device,
-                        options.precache_flags,
-                        &shader_list,
-                    )?;
-                    composite_yuv[index] = Some(composite_yuv_shader);
-                }
+                let brush_shader = BrushShader::new(
+                    "brush_yuv_image",
+                    device,
+                    &yuv_features,
+                    options.precache_flags,
+                    &shader_list,
+                    false /* advanced blend */,
+                    false /* dual source */,
+                    use_pixel_local_storage,
+                )?;
 
-                let composite_rgba_shader = LazilyCompiledShader::new(
+                let composite_yuv_shader = LazilyCompiledShader::new(
                     ShaderKind::Composite,
                     "composite",
-                    &rgba_features,
+                    &yuv_features,
                     device,
                     options.precache_flags,
                     &shader_list,
                 )?;
 
-                let composite_rgba_fast_path_shader = LazilyCompiledShader::new(
+                let composite_rgba_shader = LazilyCompiledShader::new(
                     ShaderKind::Composite,
                     "composite",
-                    &fast_path_features,
+                    &rgba_features,
                     device,
                     options.precache_flags,
                     &shader_list,
@@ -985,12 +954,12 @@ impl Shaders {
                 let index = Self::get_compositing_shader_index(
                     *image_buffer_kind,
                 );
+                brush_yuv_image[index] = Some(brush_shader);
+                composite_yuv[index] = Some(composite_yuv_shader);
                 composite_rgba[index] = Some(composite_rgba_shader);
-                composite_rgba_fast_path[index] = Some(composite_rgba_fast_path_shader);
 
                 yuv_features.clear();
-                rgba_features.clear();
-                fast_path_features.clear();
+                rgba_features.clear()
             }
         }
 
@@ -1003,36 +972,9 @@ impl Shaders {
             &shader_list,
         )?;
 
-        let cs_fast_linear_gradient = LazilyCompiledShader::new(
-            ShaderKind::Cache(VertexArrayKind::FastLinearGradient),
-            "cs_fast_linear_gradient",
-            &[],
-            device,
-            options.precache_flags,
-            &shader_list,
-        )?;
-
-        let cs_linear_gradient = LazilyCompiledShader::new(
-            ShaderKind::Cache(VertexArrayKind::LinearGradient),
-            "cs_linear_gradient",
-            &[],
-            device,
-            options.precache_flags,
-            &shader_list,
-        )?;
-
-        let cs_radial_gradient = LazilyCompiledShader::new(
-            ShaderKind::Cache(VertexArrayKind::RadialGradient),
-            "cs_radial_gradient",
-            &[],
-            device,
-            options.precache_flags,
-            &shader_list,
-        )?;
-
-        let cs_conic_gradient = LazilyCompiledShader::new(
-            ShaderKind::Cache(VertexArrayKind::ConicGradient),
-            "cs_conic_gradient",
+        let cs_gradient = LazilyCompiledShader::new(
+            ShaderKind::Cache(VertexArrayKind::Gradient),
+            "cs_gradient",
             &[],
             device,
             options.precache_flags,
@@ -1062,10 +1004,7 @@ impl Shaders {
             cs_blur_rgba8,
             cs_border_segment,
             cs_line_decoration,
-            cs_fast_linear_gradient,
-            cs_linear_gradient,
-            cs_radial_gradient,
-            cs_conic_gradient,
+            cs_gradient,
             cs_border_solid,
             cs_scale,
             cs_svg_filter,
@@ -1075,19 +1014,21 @@ impl Shaders {
             brush_blend,
             brush_mix_blend,
             brush_yuv_image,
+            brush_conic_gradient,
+            brush_radial_gradient,
             brush_linear_gradient,
             brush_opacity,
-            brush_opacity_aa,
             cs_clip_rectangle_slow,
             cs_clip_rectangle_fast,
             cs_clip_box_shadow,
             cs_clip_image,
+            pls_init,
+            pls_resolve,
             ps_text_run,
             ps_text_run_dual_source,
             ps_split_composite,
             ps_clear,
             composite_rgba,
-            composite_rgba_fast_path,
             composite_yuv,
         })
     }
@@ -1100,23 +1041,13 @@ impl Shaders {
         &mut self,
         format: CompositeSurfaceFormat,
         buffer_kind: ImageBufferKind,
-        features: CompositeFeatures,
     ) -> &mut LazilyCompiledShader {
         match format {
             CompositeSurfaceFormat::Rgba => {
-                if features.contains(CompositeFeatures::NO_UV_CLAMP)
-                    && features.contains(CompositeFeatures::NO_COLOR_MODULATION)
-                {
-                    let shader_index = Self::get_compositing_shader_index(buffer_kind);
-                    self.composite_rgba_fast_path[shader_index]
-                        .as_mut()
-                        .expect("bug: unsupported rgba fast path shader requested")
-                } else {
-                    let shader_index = Self::get_compositing_shader_index(buffer_kind);
-                    self.composite_rgba[shader_index]
-                        .as_mut()
-                        .expect("bug: unsupported rgba shader requested")
-                }
+                let shader_index = Self::get_compositing_shader_index(buffer_kind);
+                self.composite_rgba[shader_index]
+                    .as_mut()
+                    .expect("bug: unsupported rgba shader requested")
             }
             CompositeSurfaceFormat::Yuv => {
                 let shader_index = Self::get_compositing_shader_index(buffer_kind);
@@ -1127,34 +1058,12 @@ impl Shaders {
         }
     }
 
-    pub fn get_scale_shader(
-        &mut self,
-        buffer_kind: ImageBufferKind,
-    ) -> &mut LazilyCompiledShader {
-        let shader_index = Self::get_compositing_shader_index(buffer_kind);
-        self.cs_scale[shader_index]
-            .as_mut()
-            .expect("bug: unsupported scale shader requested")
-    }
-
-    pub fn get(&
-        mut self,
-        key: &BatchKey,
-        mut features: BatchFeatures,
-        debug_flags: DebugFlags,
-        device: &Device,
-    ) -> &mut LazilyCompiledShader {
+    pub fn get(&mut self, key: &BatchKey, features: BatchFeatures, debug_flags: DebugFlags) -> &mut LazilyCompiledShader {
         match key.kind {
             BatchKind::SplitComposite => {
                 &mut self.ps_split_composite
             }
             BatchKind::Brush(brush_kind) => {
-                // SWGL uses a native anti-aliasing implementation that bypasses the shader.
-                // Don't consider it in that case when deciding whether or not to use
-                // an alpha-pass shader.
-                if device.get_capabilities().uses_native_antialiasing {
-                    features.remove(BatchFeatures::ANTIALIASING);
-                }
                 let brush_shader = match brush_kind {
                     BrushBatchKind::Solid => {
                         &mut self.brush_solid
@@ -1178,26 +1087,14 @@ impl Shaders {
                     BrushBatchKind::MixBlend { .. } => {
                         &mut self.brush_mix_blend
                     }
+                    BrushBatchKind::ConicGradient => {
+                        &mut self.brush_conic_gradient
+                    }
+                    BrushBatchKind::RadialGradient => {
+                        &mut self.brush_radial_gradient
+                    }
                     BrushBatchKind::LinearGradient => {
-                        // SWGL uses a native clip mask implementation that bypasses the shader.
-                        // Don't consider it in that case when deciding whether or not to use
-                        // an alpha-pass shader.
-                        if device.get_capabilities().uses_native_clip_mask {
-                            features.remove(BatchFeatures::CLIP_MASK);
-                        }
-                        // Gradient brushes can optimistically use the opaque shader even
-                        // with a blend mode if they don't require any features.
-                        if !features.intersects(
-                            BatchFeatures::ANTIALIASING
-                                | BatchFeatures::REPETITION
-                                | BatchFeatures::CLIP_MASK,
-                        ) {
-                            features.remove(BatchFeatures::ALPHA_PASS);
-                        }
-                        match brush_kind {
-                            BrushBatchKind::LinearGradient => &mut self.brush_linear_gradient,
-                            _ => panic!(),
-                        }
+                        &mut self.brush_linear_gradient
                     }
                     BrushBatchKind::YuvImage(image_buffer_kind, ..) => {
                         let shader_index =
@@ -1207,14 +1104,10 @@ impl Shaders {
                             .expect("Unsupported YUV shader kind")
                     }
                     BrushBatchKind::Opacity => {
-                        if features.contains(BatchFeatures::ANTIALIASING) {
-                            &mut self.brush_opacity_aa
-                        } else {
-                            &mut self.brush_opacity
-                        }
+                        &mut self.brush_opacity
                     }
                 };
-                brush_shader.get(key.blend_mode, features, debug_flags)
+                brush_shader.get(key.blend_mode, debug_flags)
             }
             BatchKind::TextRun(glyph_format) => {
                 let text_shader = match key.blend_mode {
@@ -1227,24 +1120,27 @@ impl Shaders {
     }
 
     pub fn deinit(self, device: &mut Device) {
-        for shader in self.cs_scale {
-            if let Some(shader) = shader {
-                shader.deinit(device);
-            }
-        }
+        self.cs_scale.deinit(device);
         self.cs_blur_a8.deinit(device);
         self.cs_blur_rgba8.deinit(device);
         self.cs_svg_filter.deinit(device);
         self.brush_solid.deinit(device);
         self.brush_blend.deinit(device);
         self.brush_mix_blend.deinit(device);
+        self.brush_conic_gradient.deinit(device);
+        self.brush_radial_gradient.deinit(device);
         self.brush_linear_gradient.deinit(device);
         self.brush_opacity.deinit(device);
-        self.brush_opacity_aa.deinit(device);
         self.cs_clip_rectangle_slow.deinit(device);
         self.cs_clip_rectangle_fast.deinit(device);
         self.cs_clip_box_shadow.deinit(device);
         self.cs_clip_image.deinit(device);
+        if let Some(shader) = self.pls_init {
+            shader.deinit(device);
+        }
+        if let Some(shader) = self.pls_resolve {
+            shader.deinit(device);
+        }
         self.ps_text_run.deinit(device);
         if let Some(shader) = self.ps_text_run_dual_source {
             shader.deinit(device);
@@ -1265,10 +1161,7 @@ impl Shaders {
             }
         }
         self.cs_border_solid.deinit(device);
-        self.cs_fast_linear_gradient.deinit(device);
-        self.cs_linear_gradient.deinit(device);
-        self.cs_radial_gradient.deinit(device);
-        self.cs_conic_gradient.deinit(device);
+        self.cs_gradient.deinit(device);
         self.cs_line_decoration.deinit(device);
         self.cs_border_segment.deinit(device);
         self.ps_split_composite.deinit(device);
@@ -1279,11 +1172,6 @@ impl Shaders {
                 shader.deinit(device);
             }
         }
-        for shader in self.composite_rgba_fast_path {
-            if let Some(shader) = shader {
-                shader.deinit(device);
-            }
-        }
         for shader in self.composite_yuv {
             if let Some(shader) = shader {
                 shader.deinit(device);
@@ -1292,4 +1180,10 @@ impl Shaders {
     }
 }
 
-pub type SharedShaders = Rc<RefCell<Shaders>>;
+// A wrapper around a strong reference to a Shaders
+// object. We have this so that external (ffi)
+// consumers can own a reference to a shared Shaders
+// instance without understanding rust's refcounting.
+pub struct WrShaders {
+    pub shaders: Rc<RefCell<Shaders>>,
+}
diff --git a/third_party/webrender/webrender/src/space.rs b/third_party/webrender/webrender/src/space.rs
deleted file mode 100644
index 15646b478d4..00000000000
--- a/third_party/webrender/webrender/src/space.rs
+++ /dev/null
@@ -1,254 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-
-//! Utilities to deal with coordinate spaces.
-
-use std::fmt;
-
-use euclid::{Transform3D, Rect, Point2D, Vector2D};
-
-use api::units::*;
-use crate::spatial_tree::{SpatialTree, CoordinateSpaceMapping, SpatialNodeIndex, VisibleFace};
-use crate::util::project_rect;
-use crate::util::{MatrixHelpers, ScaleOffset, RectHelpers, PointHelpers};
-
-
-#[derive(Debug, Clone)]
-pub struct SpaceMapper<F, T> {
-    kind: CoordinateSpaceMapping<F, T>,
-    pub ref_spatial_node_index: SpatialNodeIndex,
-    pub current_target_spatial_node_index: SpatialNodeIndex,
-    pub bounds: Rect<f32, T>,
-    visible_face: VisibleFace,
-}
-
-impl<F, T> SpaceMapper<F, T> where F: fmt::Debug {
-    pub fn new(
-        ref_spatial_node_index: SpatialNodeIndex,
-        bounds: Rect<f32, T>,
-    ) -> Self {
-        SpaceMapper {
-            kind: CoordinateSpaceMapping::Local,
-            ref_spatial_node_index,
-            current_target_spatial_node_index: ref_spatial_node_index,
-            bounds,
-            visible_face: VisibleFace::Front,
-        }
-    }
-
-    pub fn new_with_target(
-        ref_spatial_node_index: SpatialNodeIndex,
-        target_node_index: SpatialNodeIndex,
-        bounds: Rect<f32, T>,
-        spatial_tree: &SpatialTree,
-    ) -> Self {
-        let mut mapper = Self::new(ref_spatial_node_index, bounds);
-        mapper.set_target_spatial_node(target_node_index, spatial_tree);
-        mapper
-    }
-
-    pub fn set_target_spatial_node(
-        &mut self,
-        target_node_index: SpatialNodeIndex,
-        spatial_tree: &SpatialTree,
-    ) {
-        if target_node_index == self.current_target_spatial_node_index {
-            return
-        }
-
-        let ref_spatial_node = &spatial_tree.spatial_nodes[self.ref_spatial_node_index.0 as usize];
-        let target_spatial_node = &spatial_tree.spatial_nodes[target_node_index.0 as usize];
-        self.visible_face = VisibleFace::Front;
-
-        self.kind = if self.ref_spatial_node_index == target_node_index {
-            CoordinateSpaceMapping::Local
-        } else if ref_spatial_node.coordinate_system_id == target_spatial_node.coordinate_system_id {
-            let scale_offset = ref_spatial_node.content_transform
-                .inverse()
-                .accumulate(&target_spatial_node.content_transform);
-            CoordinateSpaceMapping::ScaleOffset(scale_offset)
-        } else {
-            let transform = spatial_tree
-                .get_relative_transform_with_face(
-                    target_node_index, 
-                    self.ref_spatial_node_index,
-                    Some(&mut self.visible_face),
-                )
-                .into_transform()
-                .with_source::<F>()
-                .with_destination::<T>();
-            CoordinateSpaceMapping::Transform(transform)
-        };
-
-        self.current_target_spatial_node_index = target_node_index;
-    }
-
-    pub fn get_transform(&self) -> Transform3D<f32, F, T> {
-        match self.kind {
-            CoordinateSpaceMapping::Local => {
-                Transform3D::identity()
-            }
-            CoordinateSpaceMapping::ScaleOffset(ref scale_offset) => {
-                scale_offset.to_transform()
-            }
-            CoordinateSpaceMapping::Transform(transform) => {
-                transform
-            }
-        }
-    }
-
-    pub fn unmap(&self, rect: &Rect<f32, T>) -> Option<Rect<f32, F>> {
-        match self.kind {
-            CoordinateSpaceMapping::Local => {
-                Some(rect.cast_unit())
-            }
-            CoordinateSpaceMapping::ScaleOffset(ref scale_offset) => {
-                Some(scale_offset.unmap_rect(rect))
-            }
-            CoordinateSpaceMapping::Transform(ref transform) => {
-                transform.inverse_rect_footprint(rect)
-            }
-        }
-    }
-
-    pub fn map(&self, rect: &Rect<f32, F>) -> Option<Rect<f32, T>> {
-        match self.kind {
-            CoordinateSpaceMapping::Local => {
-                Some(rect.cast_unit())
-            }
-            CoordinateSpaceMapping::ScaleOffset(ref scale_offset) => {
-                Some(scale_offset.map_rect(rect))
-            }
-            CoordinateSpaceMapping::Transform(ref transform) => {
-                match project_rect(transform, rect, &self.bounds) {
-                    Some(bounds) => {
-                        Some(bounds)
-                    }
-                    None => {
-                        warn!("parent relative transform can't transform the primitive rect for {:?}", rect);
-                        None
-                    }
-                }
-            }
-        }
-    }
-
-    // Attempt to return a rect that is contained in the mapped rect.
-    pub fn map_inner_bounds(&self, rect: &Rect<f32, F>) -> Option<Rect<f32, T>> {
-        match self.kind {
-            CoordinateSpaceMapping::Local => {
-                Some(rect.cast_unit())
-            }
-            CoordinateSpaceMapping::ScaleOffset(ref scale_offset) => {
-                Some(scale_offset.map_rect(rect))
-            }
-            CoordinateSpaceMapping::Transform(..) => {
-                // We could figure out a rect that is contained in the transformed rect but
-                // for now we do the simple thing here and bail out.
-                return None;
-            }
-        }
-    }
-
-    pub fn map_vector(&self, v: Vector2D<f32, F>) -> Vector2D<f32, T> {
-        match self.kind {
-            CoordinateSpaceMapping::Local => {
-                v.cast_unit()
-            }
-            CoordinateSpaceMapping::ScaleOffset(ref scale_offset) => {
-                scale_offset.map_vector(&v)
-            }
-            CoordinateSpaceMapping::Transform(ref transform) => {
-                transform.transform_vector2d(v)
-            }
-        }
-    }
-}
-
-
-#[derive(Clone, Debug)]
-pub struct SpaceSnapper {
-    pub ref_spatial_node_index: SpatialNodeIndex,
-    current_target_spatial_node_index: SpatialNodeIndex,
-    snapping_transform: Option<ScaleOffset>,
-    pub device_pixel_scale: DevicePixelScale,
-}
-
-impl SpaceSnapper {
-    pub fn new(
-        ref_spatial_node_index: SpatialNodeIndex,
-        device_pixel_scale: DevicePixelScale,
-    ) -> Self {
-        SpaceSnapper {
-            ref_spatial_node_index,
-            current_target_spatial_node_index: SpatialNodeIndex::INVALID,
-            snapping_transform: None,
-            device_pixel_scale,
-        }
-    }
-
-    pub fn new_with_target(
-        ref_spatial_node_index: SpatialNodeIndex,
-        target_node_index: SpatialNodeIndex,
-        device_pixel_scale: DevicePixelScale,
-        spatial_tree: &SpatialTree,
-    ) -> Self {
-        let mut snapper = SpaceSnapper {
-            ref_spatial_node_index,
-            current_target_spatial_node_index: SpatialNodeIndex::INVALID,
-            snapping_transform: None,
-            device_pixel_scale,
-        };
-
-        snapper.set_target_spatial_node(target_node_index, spatial_tree);
-        snapper
-    }
-
-    pub fn set_target_spatial_node(
-        &mut self,
-        target_node_index: SpatialNodeIndex,
-        spatial_tree: &SpatialTree,
-    ) {
-        if target_node_index == self.current_target_spatial_node_index {
-            return
-        }
-
-        let ref_spatial_node = &spatial_tree.spatial_nodes[self.ref_spatial_node_index.0 as usize];
-        let target_spatial_node = &spatial_tree.spatial_nodes[target_node_index.0 as usize];
-
-        self.current_target_spatial_node_index = target_node_index;
-        self.snapping_transform = match (ref_spatial_node.snapping_transform, target_spatial_node.snapping_transform) {
-            (Some(ref ref_scale_offset), Some(ref target_scale_offset)) => {
-                Some(ref_scale_offset
-                    .inverse()
-                    .accumulate(target_scale_offset)
-                    .scale(self.device_pixel_scale.0))
-            }
-            _ => None,
-        };
-    }
-
-    pub fn snap_rect<F>(&self, rect: &Rect<f32, F>) -> Rect<f32, F> where F: fmt::Debug {
-        debug_assert!(self.current_target_spatial_node_index != SpatialNodeIndex::INVALID);
-        match self.snapping_transform {
-            Some(ref scale_offset) => {
-                let snapped_device_rect : DeviceRect = scale_offset.map_rect(rect).snap();
-                scale_offset.unmap_rect(&snapped_device_rect)
-            }
-            None => *rect,
-        }
-    }
-
-    pub fn snap_point<F>(&self, point: &Point2D<f32, F>) -> Point2D<f32, F> where F: fmt::Debug {
-        debug_assert!(self.current_target_spatial_node_index != SpatialNodeIndex::INVALID);
-        match self.snapping_transform {
-            Some(ref scale_offset) => {
-                let snapped_device_vector : DevicePoint = scale_offset.map_point(point).snap();
-                scale_offset.unmap_point(&snapped_device_vector)
-            }
-            None => *point,
-        }
-    }
-}
diff --git a/third_party/webrender/webrender/src/spatial_node.rs b/third_party/webrender/webrender/src/spatial_node.rs
index b2dd77f148b..2283b0fe1a9 100644
--- a/third_party/webrender/webrender/src/spatial_node.rs
+++ b/third_party/webrender/webrender/src/spatial_node.rs
@@ -6,12 +6,12 @@
 use api::{ExternalScrollId, PipelineId, PropertyBinding, PropertyBindingId, ReferenceFrameKind, ScrollClamping, ScrollLocation};
 use api::{TransformStyle, ScrollSensitivity, StickyOffsetBounds};
 use api::units::*;
-use crate::spatial_tree::{CoordinateSystem, SpatialNodeIndex, TransformUpdateState};
-use crate::spatial_tree::{CoordinateSystemId, StaticCoordinateSystemId};
+use crate::spatial_tree::{CoordinateSystem, CoordinateSystemId, SpatialNodeIndex, TransformUpdateState};
 use euclid::{Point2D, Vector2D, SideOffsets2D};
 use crate::scene::SceneProperties;
 use crate::util::{LayoutFastTransform, MatrixHelpers, ScaleOffset, TransformedRectKind, PointHelpers};
 
+#[derive(Clone, Debug)]
 pub enum SpatialNodeType {
     /// A special kind of node that adjusts its position based on the position
     /// of its parent node and a given set of sticky positioning offset bounds.
@@ -28,6 +28,7 @@ pub enum SpatialNodeType {
 }
 
 /// Contains information common among all types of SpatialTree nodes.
+#[derive(Clone, Debug)]
 pub struct SpatialNode {
     /// The scale/offset of the viewport for this spatial node, relative to the
     /// coordinate system. Includes any accumulated scrolling offsets from nodes
@@ -44,10 +45,6 @@ pub struct SpatialNode {
     /// The axis-aligned coordinate system id of this node.
     pub coordinate_system_id: CoordinateSystemId,
 
-    /// Coordinate system statically assigned during scene building (doesn't change regardless of
-    /// the current property binding value during frame building).
-    pub static_coordinate_system_id: StaticCoordinateSystemId,
-
     /// The current transform kind of this node.
     pub transform_kind: TransformedRectKind,
 
@@ -92,7 +89,7 @@ fn compute_offset_from(
                 break;
             },
             SpatialNodeType::ScrollFrame(ref info) => {
-                if info.external_id == external_id {
+                if info.external_id == Some(external_id) {
                     break;
                 }
 
@@ -135,14 +132,12 @@ impl SpatialNode {
         pipeline_id: PipelineId,
         parent_index: Option<SpatialNodeIndex>,
         node_type: SpatialNodeType,
-        static_coordinate_system_id: StaticCoordinateSystemId,
     ) -> Self {
         SpatialNode {
             viewport_transform: ScaleOffset::identity(),
             content_transform: ScaleOffset::identity(),
             snapping_transform: None,
             coordinate_system_id: CoordinateSystemId(0),
-            static_coordinate_system_id,
             transform_kind: TransformedRectKind::AxisAligned,
             parent: parent_index,
             children: Vec::new(),
@@ -157,13 +152,12 @@ impl SpatialNode {
     pub fn new_scroll_frame(
         pipeline_id: PipelineId,
         parent_index: SpatialNodeIndex,
-        external_id: ExternalScrollId,
+        external_id: Option<ExternalScrollId>,
         frame_rect: &LayoutRect,
         content_size: &LayoutSize,
         scroll_sensitivity: ScrollSensitivity,
         frame_kind: ScrollFrameKind,
         external_scroll_offset: LayoutVector2D,
-        static_coordinate_system_id: StaticCoordinateSystemId,
     ) -> Self {
         let node_type = SpatialNodeType::ScrollFrame(ScrollFrameInfo::new(
                 *frame_rect,
@@ -178,12 +172,7 @@ impl SpatialNode {
             )
         );
 
-        Self::new(
-            pipeline_id,
-            Some(parent_index),
-            node_type,
-            static_coordinate_system_id,
-        )
+        Self::new(pipeline_id, Some(parent_index), node_type)
     }
 
     pub fn new_reference_frame(
@@ -193,7 +182,6 @@ impl SpatialNode {
         kind: ReferenceFrameKind,
         origin_in_parent_reference_frame: LayoutVector2D,
         pipeline_id: PipelineId,
-        static_coordinate_system_id: StaticCoordinateSystemId,
     ) -> Self {
         let info = ReferenceFrameInfo {
             transform_style,
@@ -202,26 +190,15 @@ impl SpatialNode {
             origin_in_parent_reference_frame,
             invertible: true,
         };
-        Self::new(
-            pipeline_id,
-            parent_index,
-            SpatialNodeType::ReferenceFrame(info),
-            static_coordinate_system_id,
-        )
+        Self::new(pipeline_id, parent_index, SpatialNodeType::ReferenceFrame(info))
     }
 
     pub fn new_sticky_frame(
         parent_index: SpatialNodeIndex,
         sticky_frame_info: StickyFrameInfo,
         pipeline_id: PipelineId,
-        static_coordinate_system_id: StaticCoordinateSystemId,
     ) -> Self {
-        Self::new(
-            pipeline_id,
-            Some(parent_index),
-            SpatialNodeType::StickyFrame(sticky_frame_info),
-            static_coordinate_system_id,
-        )
+        Self::new(pipeline_id, Some(parent_index), SpatialNodeType::StickyFrame(sticky_frame_info))
     }
 
     pub fn add_child(&mut self, child: SpatialNodeIndex) {
@@ -342,14 +319,9 @@ impl SpatialNode {
 
                 if info.invertible {
                     // Resolve the transform against any property bindings.
-                    let source_transform = {
-                        let source_transform = scene_properties.resolve_layout_transform(&info.source_transform);
-                        if let ReferenceFrameKind::Transform { is_2d_scale_translation: true, .. } = info.kind {
-                            assert!(source_transform.is_2d_scale_translation(), "Reference frame was marked as only having 2d scale or translation");
-                        }
-
-                        LayoutFastTransform::from(source_transform)
-                    };
+                    let source_transform = LayoutFastTransform::from(
+                        scene_properties.resolve_layout_transform(&info.source_transform)
+                    );
 
                     // Do a change-basis operation on the perspective matrix using
                     // the scroll offset.
@@ -368,7 +340,7 @@ impl SpatialNode {
                                 .then_translate(-scroll_offset)
                         }
                         ReferenceFrameKind::Perspective { scrolling_relative_to: None } |
-                        ReferenceFrameKind::Transform { .. } => source_transform,
+                        ReferenceFrameKind::Transform | ReferenceFrameKind::Zoom => source_transform,
                     };
 
                     let resolved_transform =
@@ -398,9 +370,10 @@ impl SpatialNode {
                             Some(ref scale_offset) => {
                                 // We generally do not want to snap animated transforms as it causes jitter.
                                 // However, we do want to snap the visual viewport offset when scrolling.
-                                // This may still cause jitter when zooming, unfortunately.
+                                // Therefore only snap the transform for Zoom reference frames. This may still
+                                // cause jitter when zooming, unfortunately.
                                 let mut maybe_snapped = scale_offset.clone();
-                                if let ReferenceFrameKind::Transform { should_snap: true, .. } = info.kind {
+                                if info.kind == ReferenceFrameKind::Zoom {
                                     maybe_snapped.offset = snap_offset(
                                         scale_offset.offset,
                                         state.coordinate_system_relative_scale_offset.scale,
@@ -435,7 +408,7 @@ impl SpatialNode {
                                 transform,
                                 world_transform,
                                 should_flatten: match (info.transform_style, info.kind) {
-                                    (TransformStyle::Flat, ReferenceFrameKind::Transform { .. }) => true,
+                                    (TransformStyle::Flat, ReferenceFrameKind::Transform) => true,
                                     (_, _) => false,
                                 },
                                 parent: Some(state.current_coordinate_system_id),
@@ -700,7 +673,7 @@ impl SpatialNode {
 
     pub fn matches_external_id(&self, external_id: ExternalScrollId) -> bool {
         match self.node_type {
-            SpatialNodeType::ScrollFrame(info) if info.external_id == external_id => true,
+            SpatialNodeType::ScrollFrame(info) if info.external_id == Some(external_id) => true,
             _ => false,
         }
     }
@@ -777,9 +750,7 @@ impl SpatialNode {
 /// or an explicitly defined scroll frame from the display list.
 #[derive(Copy, Clone, Debug)]
 pub enum ScrollFrameKind {
-    PipelineRoot {
-        is_root_pipeline: bool,
-    },
+    PipelineRoot,
     Explicit,
 }
 
@@ -797,7 +768,7 @@ pub struct ScrollFrameInfo {
     /// An external id to identify this scroll frame to API clients. This
     /// allows setting scroll positions via the API without relying on ClipsIds
     /// which may change between frames.
-    pub external_id: ExternalScrollId,
+    pub external_id: Option<ExternalScrollId>,
 
     /// Stores whether this is a scroll frame added implicitly by WR when adding
     /// a pipeline (either the root or an iframe). We need to exclude these
@@ -812,14 +783,7 @@ pub struct ScrollFrameInfo {
     /// pre-scrolled in their local coordinates.
     pub external_scroll_offset: LayoutVector2D,
 
-    /// The negated scroll offset of this scroll node. including the
-    /// pre-scrolled amount. If, for example, a scroll node was pre-scrolled
-    /// to y=10 (10 pixels down from the initial unscrolled position), then
-    /// `external_scroll_offset` would be (0,10), and this `offset` field would
-    /// be (0,-10). If WebRender is then asked to change the scroll position by
-    /// an additional 10 pixels (without changing the pre-scroll amount in the
-    /// display list), `external_scroll_offset` would remain at (0,10) and
-    /// `offset` would change to (0,-20).
+    /// The current offset of this scroll node.
     pub offset: LayoutVector2D,
 }
 
@@ -829,7 +793,7 @@ impl ScrollFrameInfo {
         viewport_rect: LayoutRect,
         scroll_sensitivity: ScrollSensitivity,
         scrollable_size: LayoutSize,
-        external_id: ExternalScrollId,
+        external_id: Option<ExternalScrollId>,
         frame_kind: ScrollFrameKind,
         external_scroll_offset: LayoutVector2D,
     ) -> ScrollFrameInfo {
@@ -855,9 +819,14 @@ impl ScrollFrameInfo {
         self,
         old_scroll_info: &ScrollFrameInfo
     ) -> ScrollFrameInfo {
+        let offset =
+            old_scroll_info.offset +
+            self.external_scroll_offset -
+            old_scroll_info.external_scroll_offset;
+
         ScrollFrameInfo {
             viewport_rect: self.viewport_rect,
-            offset: old_scroll_info.offset,
+            offset,
             scroll_sensitivity: self.scroll_sensitivity,
             scrollable_size: self.scrollable_size,
             external_id: self.external_id,
@@ -939,17 +908,14 @@ fn test_cst_perspective_relative_scroll() {
         None,
         TransformStyle::Flat,
         PropertyBinding::Value(LayoutTransform::identity()),
-        ReferenceFrameKind::Transform {
-            is_2d_scale_translation: false,
-            should_snap: false,
-        },
+        ReferenceFrameKind::Transform,
         LayoutVector2D::zero(),
         pipeline_id,
     );
 
     let scroll_frame_1 = cst.add_scroll_frame(
         root,
-        ext_scroll_id,
+        Some(ext_scroll_id),
         pipeline_id,
         &LayoutRect::new(LayoutPoint::zero(), LayoutSize::new(100.0, 100.0)),
         &LayoutSize::new(100.0, 500.0),
@@ -960,7 +926,7 @@ fn test_cst_perspective_relative_scroll() {
 
     let scroll_frame_2 = cst.add_scroll_frame(
         scroll_frame_1,
-        ExternalScrollId(2, pipeline_id),
+        None,
         pipeline_id,
         &LayoutRect::new(LayoutPoint::zero(), LayoutSize::new(100.0, 100.0)),
         &LayoutSize::new(100.0, 500.0),
diff --git a/third_party/webrender/webrender/src/spatial_tree.rs b/third_party/webrender/webrender/src/spatial_tree.rs
index bfbea525a6e..d1eaca9d8fc 100644
--- a/third_party/webrender/webrender/src/spatial_tree.rs
+++ b/third_party/webrender/webrender/src/spatial_tree.rs
@@ -3,7 +3,7 @@
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
 use api::{ExternalScrollId, PropertyBinding, ReferenceFrameKind, TransformStyle};
-use api::{PipelineId, ScrollClamping, ScrollNodeState, ScrollSensitivity};
+use api::{PipelineId, ScrollClamping, ScrollNodeState, ScrollLocation, ScrollSensitivity};
 use api::units::*;
 use euclid::Transform3D;
 use crate::gpu_types::TransformPalette;
@@ -25,13 +25,6 @@ pub type ScrollStates = FastHashMap<ExternalScrollId, ScrollFrameInfo>;
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct CoordinateSystemId(pub u32);
 
-#[derive(Debug, Copy, Clone, PartialEq)]
-pub struct StaticCoordinateSystemId(pub u32);
-
-impl StaticCoordinateSystemId {
-    pub const ROOT: StaticCoordinateSystemId = StaticCoordinateSystemId(0);
-}
-
 /// A node in the hierarchy of coordinate system
 /// transforms.
 #[derive(Debug)]
@@ -66,14 +59,6 @@ impl SpatialNodeIndex {
 pub const ROOT_SPATIAL_NODE_INDEX: SpatialNodeIndex = SpatialNodeIndex(0);
 const TOPMOST_SCROLL_NODE_INDEX: SpatialNodeIndex = SpatialNodeIndex(1);
 
-// In some cases, the conversion from CSS pixels to device pixels can result in small
-// rounding errors when calculating the scrollable distance of a scroll frame. Apply
-// a small epsilon so that we don't detect these frames as "real" scroll frames.
-const MIN_SCROLLABLE_AMOUNT: f32 = 0.01;
-
-// The minimum size for a scroll frame for it to be considered for a scroll root.
-const MIN_SCROLL_ROOT_SIZE: f32 = 128.0;
-
 impl SpatialNodeIndex {
     pub fn new(index: usize) -> Self {
         debug_assert!(index < ::std::u32::MAX as usize);
@@ -127,9 +112,6 @@ pub struct SpatialTree {
 
     /// Temporary stack of nodes to update when traversing the tree.
     nodes_to_update: Vec<(SpatialNodeIndex, TransformUpdateState)>,
-
-    /// Next id to assign when creating a new static coordinate system
-    next_static_coord_system_id: u32,
 }
 
 #[derive(Clone)]
@@ -184,6 +166,16 @@ impl<Src, Dst> CoordinateSpaceMapping<Src, Dst> {
         }
     }
 
+    pub fn visible_face(&self) -> VisibleFace {
+        match *self {
+            CoordinateSpaceMapping::Transform(ref transform) if transform.is_backface_visible() => VisibleFace::Back,
+            CoordinateSpaceMapping::Local |
+            CoordinateSpaceMapping::Transform(_) |
+            CoordinateSpaceMapping::ScaleOffset(_) => VisibleFace::Front,
+
+        }
+    }
+
     pub fn is_perspective(&self) -> bool {
         match *self {
             CoordinateSpaceMapping::Local |
@@ -203,7 +195,7 @@ impl<Src, Dst> CoordinateSpaceMapping<Src, Dst> {
     pub fn scale_factors(&self) -> (f32, f32) {
         match *self {
             CoordinateSpaceMapping::Local => (1.0, 1.0),
-            CoordinateSpaceMapping::ScaleOffset(ref scale_offset) => (scale_offset.scale.x.abs(), scale_offset.scale.y.abs()),
+            CoordinateSpaceMapping::ScaleOffset(ref scale_offset) => (scale_offset.scale.x, scale_offset.scale.y),
             CoordinateSpaceMapping::Transform(ref transform) => scale_factors(transform),
         }
     }
@@ -234,7 +226,6 @@ impl SpatialTree {
             pending_scroll_offsets: FastHashMap::default(),
             pipelines_to_discard: FastHashSet::default(),
             nodes_to_update: Vec::new(),
-            next_static_coord_system_id: 0,
         }
     }
 
@@ -274,19 +265,6 @@ impl SpatialTree {
         child_index: SpatialNodeIndex,
         parent_index: SpatialNodeIndex,
     ) -> CoordinateSpaceMapping<LayoutPixel, LayoutPixel> {
-        self.get_relative_transform_with_face(child_index, parent_index, None)
-    }
-
-    /// Calculate the relative transform from `child_index` to `parent_index`.
-    /// This method will panic if the nodes are not connected!
-    /// Also, switch the visible face to `Back` if at any stage where the
-    /// combined transform is flattened, we see the back face.
-    pub fn get_relative_transform_with_face(
-        &self,
-        child_index: SpatialNodeIndex,
-        parent_index: SpatialNodeIndex,
-        mut visible_face: Option<&mut VisibleFace>,
-    ) -> CoordinateSpaceMapping<LayoutPixel, LayoutPixel> {
         if child_index == parent_index {
             return CoordinateSpaceMapping::Local;
         }
@@ -318,12 +296,6 @@ impl SpatialTree {
                 .then(&child_transform)
                 .with_source::<LayoutPixel>()
                 .with_destination::<LayoutPixel>();
-
-            if let Some(face) = visible_face {
-                if result.is_backface_visible() {
-                    *face = VisibleFace::Back;
-                }
-            }
             return CoordinateSpaceMapping::Transform(result);
         }
 
@@ -338,11 +310,6 @@ impl SpatialTree {
             let coord_system = &self.coord_systems[coordinate_system_id.0 as usize];
 
             if coord_system.should_flatten {
-                if let Some(ref mut face) = visible_face {
-                    if transform.is_backface_visible() {
-                        **face = VisibleFace::Back;
-                    }
-                }
                 transform.flatten_z_output();
             }
 
@@ -355,30 +322,10 @@ impl SpatialTree {
                 .inverse()
                 .to_transform(),
         );
-        if let Some(face) = visible_face {
-            if transform.is_backface_visible() {
-                *face = VisibleFace::Back;
-            }
-        }
 
         CoordinateSpaceMapping::Transform(transform)
     }
 
-    pub fn is_relative_transform_complex(
-        &self,
-        child_index: SpatialNodeIndex,
-        parent_index: SpatialNodeIndex,
-    ) -> bool {
-        if child_index == parent_index {
-            return false;
-        }
-
-        let child = &self.spatial_nodes[child_index.0 as usize];
-        let parent = &self.spatial_nodes[parent_index.0 as usize];
-
-        child.coordinate_system_id != parent.coordinate_system_id
-    }
-
     fn get_world_transform_impl(
         &self,
         index: SpatialNodeIndex,
@@ -443,10 +390,12 @@ impl SpatialTree {
         let mut result = vec![];
         for node in &self.spatial_nodes {
             if let SpatialNodeType::ScrollFrame(info) = node.node_type {
-                result.push(ScrollNodeState {
-                    id: info.external_id,
-                    scroll_offset: info.offset - info.external_scroll_offset,
-                })
+                if let Some(id) = info.external_id {
+                    result.push(ScrollNodeState {
+                        id,
+                        scroll_offset: info.offset - info.external_scroll_offset,
+                    })
+                }
             }
         }
         result
@@ -460,8 +409,8 @@ impl SpatialTree {
             }
 
             match old_node.node_type {
-                SpatialNodeType::ScrollFrame(info) => {
-                    scroll_states.insert(info.external_id, info);
+                SpatialNodeType::ScrollFrame(info) if info.external_id.is_some() => {
+                    scroll_states.insert(info.external_id.unwrap(), info);
                 }
                 _ => {}
             }
@@ -488,6 +437,34 @@ impl SpatialTree {
         false
     }
 
+    fn find_nearest_scrolling_ancestor(
+        &self,
+        index: Option<SpatialNodeIndex>
+    ) -> SpatialNodeIndex {
+        let index = match index {
+            Some(index) => index,
+            None => return self.topmost_scroll_node_index(),
+        };
+
+        let node = &self.spatial_nodes[index.0 as usize];
+        match node.node_type {
+            SpatialNodeType::ScrollFrame(state) if state.sensitive_to_input_events() => index,
+            _ => self.find_nearest_scrolling_ancestor(node.parent)
+        }
+    }
+
+    pub fn scroll_nearest_scrolling_ancestor(
+        &mut self,
+        scroll_location: ScrollLocation,
+        node_index: Option<SpatialNodeIndex>,
+    ) -> bool {
+        if self.spatial_nodes.is_empty() {
+            return false;
+        }
+        let node_index = self.find_nearest_scrolling_ancestor(node_index);
+        self.spatial_nodes[node_index.0 as usize].scroll(scroll_location)
+    }
+
     pub fn update_tree(
         &mut self,
         pan: WorldPoint,
@@ -551,7 +528,7 @@ impl SpatialTree {
     pub fn finalize_and_apply_pending_scroll_offsets(&mut self, old_states: ScrollStates) {
         for node in &mut self.spatial_nodes {
             let external_id = match node.node_type {
-                SpatialNodeType::ScrollFrame(ScrollFrameInfo { external_id, ..}) => external_id,
+                SpatialNodeType::ScrollFrame(ScrollFrameInfo { external_id: Some(id), ..} ) => id,
                 _ => continue,
             };
 
@@ -565,15 +542,10 @@ impl SpatialTree {
         }
     }
 
-    /// Get the static coordinate system for a given spatial node index
-    pub fn get_static_coordinate_system_id(&self, node_index: SpatialNodeIndex) -> StaticCoordinateSystemId {
-        self.spatial_nodes[node_index.0 as usize].static_coordinate_system_id
-    }
-
     pub fn add_scroll_frame(
         &mut self,
         parent_index: SpatialNodeIndex,
-        external_id: ExternalScrollId,
+        external_id: Option<ExternalScrollId>,
         pipeline_id: PipelineId,
         frame_rect: &LayoutRect,
         content_size: &LayoutSize,
@@ -581,9 +553,6 @@ impl SpatialTree {
         frame_kind: ScrollFrameKind,
         external_scroll_offset: LayoutVector2D,
     ) -> SpatialNodeIndex {
-        // Scroll frames are only 2d translations - they can't introduce a new static coord system
-        let static_coordinate_system_id = self.get_static_coordinate_system_id(parent_index);
-
         let node = SpatialNode::new_scroll_frame(
             pipeline_id,
             parent_index,
@@ -593,7 +562,6 @@ impl SpatialTree {
             scroll_sensitivity,
             frame_kind,
             external_scroll_offset,
-            static_coordinate_system_id,
         );
         self.add_spatial_node(node)
     }
@@ -607,45 +575,6 @@ impl SpatialTree {
         origin_in_parent_reference_frame: LayoutVector2D,
         pipeline_id: PipelineId,
     ) -> SpatialNodeIndex {
-
-        // Determine if this reference frame creates a new static coordinate system
-        let new_static_coord_system = match parent_index {
-            Some(..) => {
-                match kind {
-                    ReferenceFrameKind::Transform { is_2d_scale_translation: true, .. } => {
-                        // Client has guaranteed this transform will only be axis-aligned
-                        false
-                    }
-                    ReferenceFrameKind::Transform { is_2d_scale_translation: false, .. } | ReferenceFrameKind::Perspective { .. } => {
-                        // Even if client hasn't promised it's an axis-aligned transform, we can still
-                        // check this so long as the transform isn't animated (and thus could change to
-                        // anything by APZ during frame building)
-                        match source_transform {
-                            PropertyBinding::Value(m) => {
-                                !m.is_2d_scale_translation()
-                            }
-                            PropertyBinding::Binding(..) => {
-                                // Animated, so assume it may introduce a complex transform
-                                true
-                            }
-                        }
-                    }
-                }
-            }
-            None => {
-                // The root reference frame always creates a new static coord system
-                true
-            }
-        };
-
-        let static_coordinate_system_id = if new_static_coord_system {
-            let id = StaticCoordinateSystemId(self.next_static_coord_system_id);
-            self.next_static_coord_system_id += 1;
-            id
-        } else {
-            self.get_static_coordinate_system_id(parent_index.unwrap())
-        };
-
         let node = SpatialNode::new_reference_frame(
             parent_index,
             transform_style,
@@ -653,7 +582,6 @@ impl SpatialTree {
             kind,
             origin_in_parent_reference_frame,
             pipeline_id,
-            static_coordinate_system_id,
         );
         self.add_spatial_node(node)
     }
@@ -664,14 +592,10 @@ impl SpatialTree {
         sticky_frame_info: StickyFrameInfo,
         pipeline_id: PipelineId,
     ) -> SpatialNodeIndex {
-        // Sticky frames are only 2d translations - they can't introduce a new static coord system
-        let static_coordinate_system_id = self.get_static_coordinate_system_id(parent_index);
-
         let node = SpatialNode::new_sticky_frame(
             parent_index,
             sticky_frame_info,
             pipeline_id,
-            static_coordinate_system_id,
         );
         self.add_spatial_node(node)
     }
@@ -696,31 +620,6 @@ impl SpatialTree {
         self.pipelines_to_discard.insert(pipeline_id);
     }
 
-    /// Check if a given spatial node is an ancestor of another spatial node.
-    pub fn is_ancestor(
-        &self,
-        maybe_parent: SpatialNodeIndex,
-        maybe_child: SpatialNodeIndex,
-    ) -> bool {
-        // Early out if same node
-        if maybe_parent == maybe_child {
-            return false;
-        }
-
-        let mut current_node = maybe_child;
-
-        while current_node != ROOT_SPATIAL_NODE_INDEX {
-            let node = &self.spatial_nodes[current_node.0 as usize];
-            current_node = node.parent.expect("bug: no parent");
-
-            if current_node == maybe_parent {
-                return true;
-            }
-        }
-
-        false
-    }
-
     /// Find the spatial node that is the scroll root for a given spatial node.
     /// A scroll root is the first spatial node when found travelling up the
     /// spatial node tree that is an explicit scroll frame.
@@ -728,8 +627,7 @@ impl SpatialTree {
         &self,
         spatial_node_index: SpatialNodeIndex,
     ) -> SpatialNodeIndex {
-        let mut real_scroll_root = ROOT_SPATIAL_NODE_INDEX;
-        let mut outermost_scroll_root = ROOT_SPATIAL_NODE_INDEX;
+        let mut scroll_root = ROOT_SPATIAL_NODE_INDEX;
         let mut node_index = spatial_node_index;
 
         while node_index != ROOT_SPATIAL_NODE_INDEX {
@@ -737,38 +635,31 @@ impl SpatialTree {
             match node.node_type {
                 SpatialNodeType::ReferenceFrame(ref info) => {
                     match info.kind {
-                        ReferenceFrameKind::Transform { is_2d_scale_translation: true, .. } => {
-                            // We can handle scroll nodes that pass through a 2d scale/translation node
+                        ReferenceFrameKind::Zoom => {
+                            // We can handle scroll nodes that pass through a zoom node
                         }
-                        ReferenceFrameKind::Transform { is_2d_scale_translation: false, .. } |
+                        ReferenceFrameKind::Transform |
                         ReferenceFrameKind::Perspective { .. } => {
                             // When a reference frame is encountered, forget any scroll roots
                             // we have encountered, as they may end up with a non-axis-aligned transform.
-                            real_scroll_root = ROOT_SPATIAL_NODE_INDEX;
-                            outermost_scroll_root = ROOT_SPATIAL_NODE_INDEX;
+                            scroll_root = ROOT_SPATIAL_NODE_INDEX;
                         }
                     }
                 }
                 SpatialNodeType::StickyFrame(..) => {}
                 SpatialNodeType::ScrollFrame(ref info) => {
                     match info.frame_kind {
-                        ScrollFrameKind::PipelineRoot { is_root_pipeline } => {
+                        ScrollFrameKind::PipelineRoot => {
                             // Once we encounter a pipeline root, there is no need to look further
-                            if is_root_pipeline {
-                                break;
-                            }
+                            break;
                         }
                         ScrollFrameKind::Explicit => {
-                            // Store the closest scroll root we find to the root, for use
-                            // later on, even if it's not actually scrollable.
-                            outermost_scroll_root = node_index;
-
                             // If the scroll root has no scrollable area, we don't want to
                             // consider it. This helps pages that have a nested scroll root
                             // within a redundant scroll root to avoid selecting the wrong
                             // reference spatial node for a picture cache.
-                            if info.scrollable_size.width > MIN_SCROLLABLE_AMOUNT ||
-                               info.scrollable_size.height > MIN_SCROLLABLE_AMOUNT {
+                            if info.scrollable_size.width > 0.0 ||
+                               info.scrollable_size.height > 0.0 {
                                 // Since we are skipping redundant scroll roots, we may end up
                                 // selecting inner scroll roots that are very small. There is
                                 // no performance benefit to creating a slice for these roots,
@@ -776,11 +667,11 @@ impl SpatialTree {
                                 // local-space, but makes for a reasonable estimate. The value
                                 // is arbitrary, but is generally small enough to ignore things
                                 // like scroll roots around text input elements.
-                                if info.viewport_rect.size.width > MIN_SCROLL_ROOT_SIZE &&
-                                   info.viewport_rect.size.height > MIN_SCROLL_ROOT_SIZE {
+                                if info.viewport_rect.size.width > 128.0 &&
+                                   info.viewport_rect.size.height > 128.0 {
                                     // If we've found a root that is scrollable, and a reasonable
                                     // size, select that as the current root for this node
-                                    real_scroll_root = node_index;
+                                    scroll_root = node_index;
                                 }
                             }
                         }
@@ -790,16 +681,7 @@ impl SpatialTree {
             node_index = node.parent.expect("unable to find parent node");
         }
 
-        // If we didn't find any real (scrollable) frames, then return the outermost
-        // redundant scroll frame. This is important so that we can correctly find
-        // the clips defined on the content which should be handled when drawing the
-        // picture cache tiles (by definition these clips are ancestors of the
-        // scroll root selected for the picture cache).
-        if real_scroll_root == ROOT_SPATIAL_NODE_INDEX {
-            outermost_scroll_root
-        } else {
-            real_scroll_root
-        }
+        scroll_root
     }
 
     fn print_node<T: PrintTreePrinter>(
@@ -835,7 +717,6 @@ impl SpatialTree {
         pt.add_item(format!("viewport_transform: {:?}", node.viewport_transform));
         pt.add_item(format!("snapping_transform: {:?}", node.snapping_transform));
         pt.add_item(format!("coordinate_system_id: {:?}", node.coordinate_system_id));
-        pt.add_item(format!("static_coordinate_system_id: {:?}", node.static_coordinate_system_id));
 
         for child_index in &node.children {
             self.print_node(*child_index, pt);
@@ -847,11 +728,12 @@ impl SpatialTree {
     /// Get the visible face of the transfrom from the specified node to its parent.
     pub fn get_local_visible_face(&self, node_index: SpatialNodeIndex) -> VisibleFace {
         let node = &self.spatial_nodes[node_index.0 as usize];
-        let mut face = VisibleFace::Front;
-        if let Some(parent_index) = node.parent {
-            self.get_relative_transform_with_face(node_index, parent_index, Some(&mut face));
-        }
-        face
+        let parent_index = match node.parent {
+            Some(index) => index,
+            None => return VisibleFace::Front
+        };
+        self.get_relative_transform(node_index, parent_index)
+            .visible_face()
     }
 
     #[allow(dead_code)]
@@ -888,10 +770,7 @@ fn add_reference_frame(
         parent,
         TransformStyle::Preserve3D,
         PropertyBinding::Value(transform),
-        ReferenceFrameKind::Transform {
-            is_2d_scale_translation: false,
-            should_snap: false,
-        },
+        ReferenceFrameKind::Transform,
         origin_in_parent_reference_frame,
         PipelineId::dummy(),
     )
@@ -1084,329 +963,3 @@ fn test_cst_translation_rotate() {
 
     test_pt(100.0, 0.0, &cst, child1, root, 0.0, -100.0);
 }
-
-#[test]
-fn test_is_ancestor1() {
-    let mut st = SpatialTree::new();
-
-    let root = add_reference_frame(
-        &mut st,
-        None,
-        LayoutTransform::identity(),
-        LayoutVector2D::zero(),
-    );
-
-    let child1_0 = add_reference_frame(
-        &mut st,
-        Some(root),
-        LayoutTransform::identity(),
-        LayoutVector2D::zero(),
-    );
-
-    let child1_1 = add_reference_frame(
-        &mut st,
-        Some(child1_0),
-        LayoutTransform::identity(),
-        LayoutVector2D::zero(),
-    );
-
-    let child2 = add_reference_frame(
-        &mut st,
-        Some(root),
-        LayoutTransform::identity(),
-        LayoutVector2D::zero(),
-    );
-
-    st.update_tree(
-        WorldPoint::zero(),
-        DevicePixelScale::new(1.0),
-        &SceneProperties::new(),
-    );
-
-    assert!(!st.is_ancestor(root, root));
-    assert!(!st.is_ancestor(child1_0, child1_0));
-    assert!(!st.is_ancestor(child1_1, child1_1));
-    assert!(!st.is_ancestor(child2, child2));
-
-    assert!(st.is_ancestor(root, child1_0));
-    assert!(st.is_ancestor(root, child1_1));
-    assert!(st.is_ancestor(child1_0, child1_1));
-
-    assert!(!st.is_ancestor(child1_0, root));
-    assert!(!st.is_ancestor(child1_1, root));
-    assert!(!st.is_ancestor(child1_1, child1_0));
-
-    assert!(st.is_ancestor(root, child2));
-    assert!(!st.is_ancestor(child2, root));
-
-    assert!(!st.is_ancestor(child1_0, child2));
-    assert!(!st.is_ancestor(child1_1, child2));
-    assert!(!st.is_ancestor(child2, child1_0));
-    assert!(!st.is_ancestor(child2, child1_1));
-}
-
-/// Tests that we select the correct scroll root in the simple case.
-#[test]
-fn test_find_scroll_root_simple() {
-    let mut st = SpatialTree::new();
-
-    let root = st.add_reference_frame(
-        None,
-        TransformStyle::Flat,
-        PropertyBinding::Value(LayoutTransform::identity()),
-        ReferenceFrameKind::Transform {
-            is_2d_scale_translation: false,
-            should_snap: false,
-        },
-        LayoutVector2D::new(0.0, 0.0),
-        PipelineId::dummy(),
-    );
-
-    let scroll = st.add_scroll_frame(
-        root,
-        ExternalScrollId(1, PipelineId::dummy()),
-        PipelineId::dummy(),
-        &LayoutRect::from_size(LayoutSize::new(400.0, 400.0)),
-        &LayoutSize::new(800.0, 400.0),
-        ScrollSensitivity::ScriptAndInputEvents,
-        ScrollFrameKind::Explicit,
-        LayoutVector2D::new(0.0, 0.0),
-    );
-
-    assert_eq!(st.find_scroll_root(scroll), scroll);
-}
-
-/// Tests that we select the root scroll frame rather than the subframe if both are scrollable.
-#[test]
-fn test_find_scroll_root_sub_scroll_frame() {
-    let mut st = SpatialTree::new();
-
-    let root = st.add_reference_frame(
-        None,
-        TransformStyle::Flat,
-        PropertyBinding::Value(LayoutTransform::identity()),
-        ReferenceFrameKind::Transform {
-            is_2d_scale_translation: false,
-            should_snap: false,
-        },
-        LayoutVector2D::new(0.0, 0.0),
-        PipelineId::dummy(),
-    );
-
-    let root_scroll = st.add_scroll_frame(
-        root,
-        ExternalScrollId(1, PipelineId::dummy()),
-        PipelineId::dummy(),
-        &LayoutRect::from_size(LayoutSize::new(400.0, 400.0)),
-        &LayoutSize::new(800.0, 400.0),
-        ScrollSensitivity::ScriptAndInputEvents,
-        ScrollFrameKind::Explicit,
-        LayoutVector2D::new(0.0, 0.0),
-    );
-
-    let sub_scroll = st.add_scroll_frame(
-        root_scroll,
-        ExternalScrollId(1, PipelineId::dummy()),
-        PipelineId::dummy(),
-        &LayoutRect::from_size(LayoutSize::new(400.0, 400.0)),
-        &LayoutSize::new(800.0, 400.0),
-        ScrollSensitivity::ScriptAndInputEvents,
-        ScrollFrameKind::Explicit,
-        LayoutVector2D::new(0.0, 0.0),
-    );
-
-    assert_eq!(st.find_scroll_root(sub_scroll), root_scroll);
-}
-
-/// Tests that we select the sub scroll frame when the root scroll frame is not scrollable.
-#[test]
-fn test_find_scroll_root_not_scrollable() {
-    let mut st = SpatialTree::new();
-
-    let root = st.add_reference_frame(
-        None,
-        TransformStyle::Flat,
-        PropertyBinding::Value(LayoutTransform::identity()),
-        ReferenceFrameKind::Transform {
-            is_2d_scale_translation: false,
-            should_snap: false,
-        },
-        LayoutVector2D::new(0.0, 0.0),
-        PipelineId::dummy(),
-    );
-
-    let root_scroll = st.add_scroll_frame(
-        root,
-        ExternalScrollId(1, PipelineId::dummy()),
-        PipelineId::dummy(),
-        &LayoutRect::from_size(LayoutSize::new(400.0, 400.0)),
-        &LayoutSize::new(400.0, 400.0),
-        ScrollSensitivity::ScriptAndInputEvents,
-        ScrollFrameKind::Explicit,
-        LayoutVector2D::new(0.0, 0.0),
-    );
-
-    let sub_scroll = st.add_scroll_frame(
-        root_scroll,
-        ExternalScrollId(1, PipelineId::dummy()),
-        PipelineId::dummy(),
-        &LayoutRect::from_size(LayoutSize::new(400.0, 400.0)),
-        &LayoutSize::new(800.0, 400.0),
-        ScrollSensitivity::ScriptAndInputEvents,
-        ScrollFrameKind::Explicit,
-        LayoutVector2D::new(0.0, 0.0),
-    );
-
-    assert_eq!(st.find_scroll_root(sub_scroll), sub_scroll);
-}
-
-/// Tests that we select the sub scroll frame when the root scroll frame is too small.
-#[test]
-fn test_find_scroll_root_too_small() {
-    let mut st = SpatialTree::new();
-
-    let root = st.add_reference_frame(
-        None,
-        TransformStyle::Flat,
-        PropertyBinding::Value(LayoutTransform::identity()),
-        ReferenceFrameKind::Transform {
-            is_2d_scale_translation: false,
-            should_snap: false,
-        },
-        LayoutVector2D::new(0.0, 0.0),
-        PipelineId::dummy(),
-    );
-
-    let root_scroll = st.add_scroll_frame(
-        root,
-        ExternalScrollId(1, PipelineId::dummy()),
-        PipelineId::dummy(),
-        &LayoutRect::from_size(LayoutSize::new(MIN_SCROLL_ROOT_SIZE, MIN_SCROLL_ROOT_SIZE)),
-        &LayoutSize::new(1000.0, 1000.0),
-        ScrollSensitivity::ScriptAndInputEvents,
-        ScrollFrameKind::Explicit,
-        LayoutVector2D::new(0.0, 0.0),
-    );
-
-    let sub_scroll = st.add_scroll_frame(
-        root_scroll,
-        ExternalScrollId(1, PipelineId::dummy()),
-        PipelineId::dummy(),
-        &LayoutRect::from_size(LayoutSize::new(400.0, 400.0)),
-        &LayoutSize::new(800.0, 400.0),
-        ScrollSensitivity::ScriptAndInputEvents,
-        ScrollFrameKind::Explicit,
-        LayoutVector2D::new(0.0, 0.0),
-    );
-
-    assert_eq!(st.find_scroll_root(sub_scroll), sub_scroll);
-}
-
-/// Tests that we select the root scroll node, even if it is not scrollable,
-/// when encountering a non-axis-aligned transform.
-#[test]
-fn test_find_scroll_root_perspective() {
-    let mut st = SpatialTree::new();
-
-    let root = st.add_reference_frame(
-        None,
-        TransformStyle::Flat,
-        PropertyBinding::Value(LayoutTransform::identity()),
-        ReferenceFrameKind::Transform {
-            is_2d_scale_translation: false,
-            should_snap: false,
-        },
-        LayoutVector2D::new(0.0, 0.0),
-        PipelineId::dummy(),
-    );
-
-    let root_scroll = st.add_scroll_frame(
-        root,
-        ExternalScrollId(1, PipelineId::dummy()),
-        PipelineId::dummy(),
-        &LayoutRect::from_size(LayoutSize::new(400.0, 400.0)),
-        &LayoutSize::new(400.0, 400.0),
-        ScrollSensitivity::ScriptAndInputEvents,
-        ScrollFrameKind::Explicit,
-        LayoutVector2D::new(0.0, 0.0),
-    );
-
-    let perspective = st.add_reference_frame(
-        Some(root_scroll),
-        TransformStyle::Flat,
-        PropertyBinding::Value(LayoutTransform::identity()),
-        ReferenceFrameKind::Perspective {
-            scrolling_relative_to: None,
-        },
-        LayoutVector2D::new(0.0, 0.0),
-        PipelineId::dummy(),
-    );
-
-    let sub_scroll = st.add_scroll_frame(
-        perspective,
-        ExternalScrollId(1, PipelineId::dummy()),
-        PipelineId::dummy(),
-        &LayoutRect::from_size(LayoutSize::new(400.0, 400.0)),
-        &LayoutSize::new(800.0, 400.0),
-        ScrollSensitivity::ScriptAndInputEvents,
-        ScrollFrameKind::Explicit,
-        LayoutVector2D::new(0.0, 0.0),
-    );
-
-    assert_eq!(st.find_scroll_root(sub_scroll), root_scroll);
-}
-
-/// Tests that encountering a 2D scale or translation transform does not prevent
-/// us from selecting the sub scroll frame if the root scroll frame is unscrollable.
-#[test]
-fn test_find_scroll_root_2d_scale() {
-    let mut st = SpatialTree::new();
-
-    let root = st.add_reference_frame(
-        None,
-        TransformStyle::Flat,
-        PropertyBinding::Value(LayoutTransform::identity()),
-        ReferenceFrameKind::Transform {
-            is_2d_scale_translation: false,
-            should_snap: false,
-        },
-        LayoutVector2D::new(0.0, 0.0),
-        PipelineId::dummy(),
-    );
-
-    let root_scroll = st.add_scroll_frame(
-        root,
-        ExternalScrollId(1, PipelineId::dummy()),
-        PipelineId::dummy(),
-        &LayoutRect::from_size(LayoutSize::new(400.0, 400.0)),
-        &LayoutSize::new(400.0, 400.0),
-        ScrollSensitivity::ScriptAndInputEvents,
-        ScrollFrameKind::Explicit,
-        LayoutVector2D::new(0.0, 0.0),
-    );
-
-    let scale = st.add_reference_frame(
-        Some(root_scroll),
-        TransformStyle::Flat,
-        PropertyBinding::Value(LayoutTransform::identity()),
-        ReferenceFrameKind::Transform {
-            is_2d_scale_translation: true,
-            should_snap: false,
-        },
-        LayoutVector2D::new(0.0, 0.0),
-        PipelineId::dummy(),
-    );
-
-    let sub_scroll = st.add_scroll_frame(
-        scale,
-        ExternalScrollId(1, PipelineId::dummy()),
-        PipelineId::dummy(),
-        &LayoutRect::from_size(LayoutSize::new(400.0, 400.0)),
-        &LayoutSize::new(800.0, 400.0),
-        ScrollSensitivity::ScriptAndInputEvents,
-        ScrollFrameKind::Explicit,
-        LayoutVector2D::new(0.0, 0.0),
-    );
-
-    assert_eq!(st.find_scroll_root(sub_scroll), sub_scroll);
-}
diff --git a/third_party/webrender/webrender/src/prim_store/storage.rs b/third_party/webrender/webrender/src/storage.rs
index a928192cd99..a928192cd99 100644
--- a/third_party/webrender/webrender/src/prim_store/storage.rs
+++ b/third_party/webrender/webrender/src/storage.rs
diff --git a/third_party/webrender/webrender/src/texture_pack/guillotine.rs b/third_party/webrender/webrender/src/texture_allocator.rs
index c71cf974e50..1f7902b2191 100644
--- a/third_party/webrender/webrender/src/texture_pack/guillotine.rs
+++ b/third_party/webrender/webrender/src/texture_allocator.rs
@@ -41,46 +41,37 @@ pub struct FreeRectSlice(pub u32);
 
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
-struct FreeRect {
+pub struct FreeRect {
     slice: FreeRectSlice,
     rect: DeviceIntRect,
 }
 
-/// A texture allocator using the guillotine algorithm.
-///
-/// See sections 2.2 and 2.2.5 in "A Thousand Ways to Pack the Bin - A Practical Approach to Two-
+/// A texture allocator using the guillotine algorithm with the rectangle merge improvement. See
+/// sections 2.2 and 2.2.5 in "A Thousand Ways to Pack the Bin - A Practical Approach to Two-
 /// Dimensional Rectangle Bin Packing":
 ///
 ///    http://clb.demon.fi/files/RectangleBinPack.pdf
 ///
-/// This approach was chosen because of its simplicity and good performance.
+/// This approach was chosen because of its simplicity, good performance, and easy support for
+/// dynamic texture deallocation.
 ///
 /// Note: the allocations are spread across multiple textures, and also are binned
 /// orthogonally in order to speed up the search.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct GuillotineAllocator {
+pub struct ArrayAllocationTracker {
     bins: [Vec<FreeRect>; NUM_BINS],
 }
 
-impl GuillotineAllocator {
-    pub fn new(initial_size: Option<DeviceIntSize>) -> Self {
-        let mut allocator = GuillotineAllocator {
+impl ArrayAllocationTracker {
+    pub fn new() -> Self {
+        ArrayAllocationTracker {
             bins: [
                 Vec::new(),
                 Vec::new(),
                 Vec::new(),
             ],
-        };
-
-        if let Some(initial_size) = initial_size {
-            allocator.push(
-                FreeRectSlice(0),
-                initial_size.into(),
-            );
         }
-
-        allocator
     }
 
     fn push(&mut self, slice: FreeRectSlice, rect: DeviceIntRect) {
@@ -222,7 +213,7 @@ fn random_fill(count: usize, texture_size: i32) -> f32 {
         DeviceIntSize::new(texture_size, texture_size),
     );
     let mut rng = thread_rng();
-    let mut allocator = GuillotineAllocator::new(None);
+    let mut allocator = ArrayAllocationTracker::new();
 
     // check for empty allocation
     assert_eq!(
diff --git a/third_party/webrender/webrender/src/texture_cache.rs b/third_party/webrender/webrender/src/texture_cache.rs
index f2ca3213dbb..9b244656805 100644
--- a/third_party/webrender/webrender/src/texture_cache.rs
+++ b/third_party/webrender/webrender/src/texture_cache.rs
@@ -2,54 +2,42 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-use api::{DirtyRect, ExternalImageType, ImageFormat, ImageBufferKind};
+use api::{DirtyRect, ExternalImageType, ImageFormat};
 use api::{DebugFlags, ImageDescriptor};
 use api::units::*;
 #[cfg(test)]
 use api::{DocumentId, IdNamespace};
 use crate::device::{TextureFilter, TextureFormatPair};
-use crate::freelist::{FreeList, FreeListHandle, WeakFreeListHandle};
+use crate::freelist::{FreeListHandle, WeakFreeListHandle};
 use crate::gpu_cache::{GpuCache, GpuCacheHandle};
 use crate::gpu_types::{ImageSource, UvRectKind};
 use crate::internal_types::{
-    CacheTextureId, Swizzle, SwizzleSettings,
+    CacheTextureId, LayerIndex, Swizzle, SwizzleSettings,
     TextureUpdateList, TextureUpdateSource, TextureSource,
     TextureCacheAllocInfo, TextureCacheUpdate,
 };
 use crate::lru_cache::LRUCache;
-use crate::profiler::{self, TransactionProfile};
-use crate::render_backend::{FrameStamp, FrameId};
+use crate::profiler::{ResourceProfileCounter, TextureCacheProfileCounters};
+use crate::render_backend::FrameStamp;
 use crate::resource_cache::{CacheItem, CachedImageData};
-use crate::texture_pack::{
-    AllocatorList,
-    AllocId,
-    AtlasAllocatorList,
-    ShelfAllocator,
-    ShelfAllocatorOptions,
-    SlabAllocator, SlabAllocatorParameters,
-};
 use smallvec::SmallVec;
 use std::cell::Cell;
-use std::{cmp, mem};
+use std::cmp;
+use std::mem;
 use std::rc::Rc;
-use euclid::size2;
-use malloc_size_of::{MallocSizeOf, MallocSizeOfOps};
-
-/// Information about which shader will use the entry.
-///
-/// For batching purposes, it's beneficial to group some items in their
-/// own textures if we know that they are used by a specific shader.
-#[derive(Copy, Clone, Debug, PartialEq, Eq)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub enum TargetShader {
-    Default,
-    Text,
-}
 
-/// The size of each region in shared cache texture arrays.
+/// The size of each region/layer in shared cache texture arrays.
 pub const TEXTURE_REGION_DIMENSIONS: i32 = 512;
 
+const PICTURE_TEXTURE_ADD_SLICES: usize = 4;
+
+/// The chosen image format for picture tiles.
+const PICTURE_TILE_FORMAT: ImageFormat = ImageFormat::RGBA8;
+
+/// The number of pixels in a region. Derived from the above.
+const TEXTURE_REGION_PIXELS: usize =
+    (TEXTURE_REGION_DIMENSIONS as usize) * (TEXTURE_REGION_DIMENSIONS as usize);
+
 /// Items in the texture cache can either be standalone textures,
 /// or a sub-rect inside the shared cache.
 #[derive(Debug)]
@@ -61,25 +49,25 @@ enum EntryDetails {
         size_in_bytes: usize,
     },
     Picture {
-        /// Size of the tile (used for debug clears only)
-        size: DeviceIntSize,
+        // Index in the picture_textures array
+        texture_index: usize,
+        // Slice in the texture array
+        layer_index: usize,
     },
     Cache {
         /// Origin within the texture layer where this item exists.
         origin: DeviceIntPoint,
-        /// ID of the allocation specific to its allocator.
-        alloc_id: AllocId,
-        /// The allocated size in bytes for this entry.
-        allocated_size_in_bytes: usize,
+        /// The layer index of the texture array.
+        layer_index: usize,
     },
 }
 
 impl EntryDetails {
-    fn describe(&self) -> DeviceIntPoint {
+    fn describe(&self) -> (LayerIndex, DeviceIntPoint) {
         match *self {
-            EntryDetails::Standalone { .. }  => DeviceIntPoint::zero(),
-            EntryDetails::Picture { .. } => DeviceIntPoint::zero(),
-            EntryDetails::Cache { origin, .. } => origin,
+            EntryDetails::Standalone { .. }  => (0, DeviceIntPoint::zero()),
+            EntryDetails::Picture { layer_index, .. } => (layer_index, DeviceIntPoint::zero()),
+            EntryDetails::Cache { origin, layer_index, .. } => (layer_index, origin),
         }
     }
 }
@@ -87,17 +75,7 @@ impl EntryDetails {
 #[derive(Debug, PartialEq)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
-pub enum PictureCacheEntryMarker {}
-
-#[derive(Debug, PartialEq)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub enum AutoCacheEntryMarker {}
-
-#[derive(Debug, PartialEq)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub enum ManualCacheEntryMarker {}
+pub enum CacheEntryMarker {}
 
 // Stores information related to a single entry in the texture
 // cache. This is stored for each item whether it's in the shared
@@ -106,14 +84,12 @@ pub enum ManualCacheEntryMarker {}
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 struct CacheEntry {
-    /// Size of the requested item, in device pixels. Does not include any
-    /// padding for alignment that the allocator may have added to this entry's
-    /// allocation.
+    /// Size the requested item, in device pixels.
     size: DeviceIntSize,
     /// Details specific to standalone or shared items.
     details: EntryDetails,
     /// Arbitrary user data associated with this item.
-    user_data: [f32; 4],
+    user_data: [f32; 3],
     /// The last frame this item was requested for rendering.
     // TODO(gw): This stamp is only used for picture cache tiles, and some checks
     //           in the glyph cache eviction code. We could probably remove it
@@ -131,15 +107,8 @@ struct CacheEntry {
     eviction_notice: Option<EvictionNotice>,
     /// The type of UV rect this entry specifies.
     uv_rect_kind: UvRectKind,
-
-    shader: TargetShader,
 }
 
-malloc_size_of::malloc_size_of_is_0!(
-    CacheEntry,
-    AutoCacheEntryMarker, ManualCacheEntryMarker, PictureCacheEntryMarker
-);
-
 impl CacheEntry {
     // Create a new entry for a standalone texture.
     fn new_standalone(
@@ -163,7 +132,6 @@ impl CacheEntry {
             uv_rect_handle: GpuCacheHandle::new(),
             eviction_notice: None,
             uv_rect_kind: params.uv_rect_kind,
-            shader: TargetShader::Default,
         }
     }
 
@@ -173,10 +141,11 @@ impl CacheEntry {
     // to fetch from.
     fn update_gpu_cache(&mut self, gpu_cache: &mut GpuCache) {
         if let Some(mut request) = gpu_cache.request(&mut self.uv_rect_handle) {
-            let origin = self.details.describe();
+            let (layer_index, origin) = self.details.describe();
             let image_source = ImageSource {
                 p0: origin.to_f32(),
                 p1: (origin + self.size).to_f32(),
+                texture_layer: layer_index as f32,
                 user_data: self.user_data,
                 uv_rect_kind: self.uv_rect_kind,
             };
@@ -206,29 +175,7 @@ impl CacheEntry {
 /// previously inserted and then evicted, lookup of the handle will fail, and
 /// the cache handle needs to re-upload this item to the texture cache (see
 /// request() below).
-
-#[derive(MallocSizeOf,Clone,PartialEq,Debug)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub enum TextureCacheHandle {
-    /// A fresh handle.
-    Empty,
-
-    /// A handle for a picture cache entry, evicted on every frame if not used.
-    Picture(WeakFreeListHandle<PictureCacheEntryMarker>),
-
-    /// A handle for an entry with automatic eviction.
-    Auto(WeakFreeListHandle<AutoCacheEntryMarker>),
-
-    /// A handle for an entry with manual eviction.
-    Manual(WeakFreeListHandle<ManualCacheEntryMarker>)
-}
-
-impl TextureCacheHandle {
-    pub fn invalid() -> Self {
-        TextureCacheHandle::Empty
-    }
-}
+pub type TextureCacheHandle = WeakFreeListHandle<CacheEntryMarker>;
 
 /// Describes the eviction policy for a given entry in the texture cache.
 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
@@ -271,285 +218,136 @@ impl EvictionNotice {
     }
 }
 
-/// The different budget types for the texture cache. Each type has its own
-/// memory budget. Once the budget is exceeded, entries with automatic eviction
-/// are evicted. Entries with manual eviction share the same budget but are not
-/// evicted once the budget is exceeded.
-/// Keeping separate budgets ensures that we don't evict entries from unrelated
-/// textures if one texture gets full.
-#[derive(Copy, Clone, Debug, PartialEq, Eq)]
-#[repr(u8)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-enum BudgetType {
-    SharedColor8Linear,
-    SharedColor8Nearest,
-    SharedColor8Glyphs,
-    SharedAlpha8,
-    SharedAlpha8Glyphs,
-    SharedAlpha16,
-    Standalone,
-}
-
-impl BudgetType {
-    pub const COUNT: usize = 7;
-
-    pub const VALUES: [BudgetType; BudgetType::COUNT] = [
-        BudgetType::SharedColor8Linear,
-        BudgetType::SharedColor8Nearest,
-        BudgetType::SharedColor8Glyphs,
-        BudgetType::SharedAlpha8,
-        BudgetType::SharedAlpha8Glyphs,
-        BudgetType::SharedAlpha16,
-        BudgetType::Standalone,
-    ];
-
-    pub const PRESSURE_COUNTERS: [usize; BudgetType::COUNT] = [
-        profiler::TEXTURE_CACHE_COLOR8_LINEAR_PRESSURE,
-        profiler::TEXTURE_CACHE_COLOR8_NEAREST_PRESSURE,
-        profiler::TEXTURE_CACHE_COLOR8_GLYPHS_PRESSURE,
-        profiler::TEXTURE_CACHE_ALPHA8_PRESSURE,
-        profiler::TEXTURE_CACHE_ALPHA8_GLYPHS_PRESSURE,
-        profiler::TEXTURE_CACHE_ALPHA16_PRESSURE,
-        profiler::TEXTURE_CACHE_STANDALONE_PRESSURE,
-    ];
-
-    pub fn iter() -> impl Iterator<Item = BudgetType> {
-        BudgetType::VALUES.iter().cloned()
-    }
-}
-
 /// A set of lazily allocated, fixed size, texture arrays for each format the
 /// texture cache supports.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 struct SharedTextures {
-    color8_nearest: AllocatorList<ShelfAllocator, TextureParameters>,
-    alpha8_linear: AllocatorList<ShelfAllocator, TextureParameters>,
-    alpha8_glyphs: AllocatorList<ShelfAllocator, TextureParameters>,
-    alpha16_linear: AllocatorList<SlabAllocator, TextureParameters>,
-    color8_linear: AllocatorList<ShelfAllocator, TextureParameters>,
-    color8_glyphs: AllocatorList<ShelfAllocator, TextureParameters>,
-    bytes_per_texture_of_type: [i32 ; BudgetType::COUNT],
+    array_color8_nearest: TextureArray,
+    array_alpha8_linear: TextureArray,
+    array_alpha16_linear: TextureArray,
+    array_color8_linear: TextureArray,
 }
 
 impl SharedTextures {
     /// Mints a new set of shared textures.
-    fn new(color_formats: TextureFormatPair<ImageFormat>, config: &TextureCacheConfig) -> Self {
-        let mut bytes_per_texture_of_type = [0 ; BudgetType::COUNT];
-
-        // Used primarily for cached shadow masks. There can be lots of
-        // these on some pages like francine, but most pages don't use it
-        // much.
-        // Most content tends to fit into two 512x512 textures. We are
-        // conservatively using 1024x1024 to fit everything in a single
-        // texture and avoid breaking batches, but it's worth checking
-        // whether it would actually lead to a lot of batch breaks in
-        // practice.
-        let alpha8_linear = AllocatorList::new(
-            config.alpha8_texture_size,
-            ShelfAllocatorOptions {
-                num_columns: 1,
-                alignment: size2(8, 8),
-                .. ShelfAllocatorOptions::default()
-            },
-            TextureParameters {
-                formats: TextureFormatPair::from(ImageFormat::R8),
-                filter: TextureFilter::Linear,
-            },
-        );
-        bytes_per_texture_of_type[BudgetType::SharedAlpha8 as usize] =
-            config.alpha8_texture_size * config.alpha8_texture_size;
-
-        // The cache for alpha glyphs (separate to help with batching).
-        let alpha8_glyphs = AllocatorList::new(
-            config.alpha8_glyph_texture_size,
-            ShelfAllocatorOptions {
-                num_columns: if config.alpha8_glyph_texture_size >= 1024 { 2 } else { 1 },
-                alignment: size2(4, 8),
-                .. ShelfAllocatorOptions::default()
-            },
-            TextureParameters {
-                formats: TextureFormatPair::from(ImageFormat::R8),
-                filter: TextureFilter::Linear,
-            },
-        );
-        bytes_per_texture_of_type[BudgetType::SharedAlpha8Glyphs as usize] =
-            config.alpha8_glyph_texture_size * config.alpha8_glyph_texture_size;
-
-        // Used for experimental hdr yuv texture support, but not used in
-        // production Firefox.
-        let alpha16_linear = AllocatorList::new(
-            config.alpha16_texture_size,
-            SlabAllocatorParameters {
-                region_size: TEXTURE_REGION_DIMENSIONS,
-            },
-            TextureParameters {
-                formats: TextureFormatPair::from(ImageFormat::R16),
-                filter: TextureFilter::Linear,
-            },
-        );
-        bytes_per_texture_of_type[BudgetType::SharedAlpha16 as usize] =
-            ImageFormat::R16.bytes_per_pixel() *
-            config.alpha16_texture_size * config.alpha16_texture_size;
-
-        // The primary cache for images, etc.
-        let color8_linear = AllocatorList::new(
-            config.color8_linear_texture_size,
-            ShelfAllocatorOptions {
-                num_columns: if config.color8_linear_texture_size >= 1024 { 2 } else { 1 },
-                alignment: size2(16, 16),
-                .. ShelfAllocatorOptions::default()
-            },
-            TextureParameters {
-                formats: color_formats.clone(),
-                filter: TextureFilter::Linear,
-            },
-        );
-        bytes_per_texture_of_type[BudgetType::SharedColor8Linear as usize] =
-            color_formats.internal.bytes_per_pixel() *
-            config.color8_linear_texture_size * config.color8_linear_texture_size;
-
-        // The cache for subpixel-AA and bitmap glyphs (separate to help with batching).
-        let color8_glyphs = AllocatorList::new(
-            config.color8_glyph_texture_size,
-            ShelfAllocatorOptions {
-                num_columns: if config.color8_glyph_texture_size >= 1024 { 2 } else { 1 },
-                alignment: size2(4, 8),
-                .. ShelfAllocatorOptions::default()
-            },
-            TextureParameters {
-                formats: color_formats.clone(),
-                filter: TextureFilter::Linear,
-            },
-        );
-        bytes_per_texture_of_type[BudgetType::SharedColor8Glyphs as usize] =
-            color_formats.internal.bytes_per_pixel() *
-            config.color8_glyph_texture_size * config.color8_glyph_texture_size;
-
-        // Used for image-rendering: crisp. This is mostly favicons, which
-        // are small. Some other images use it too, but those tend to be
-        // larger than 512x512 and thus don't use the shared cache anyway.
-        let color8_nearest = AllocatorList::new(
-            config.color8_nearest_texture_size,
-            ShelfAllocatorOptions::default(),
-            TextureParameters {
-                formats: color_formats.clone(),
-                filter: TextureFilter::Nearest,
-            }
-        );
-        bytes_per_texture_of_type[BudgetType::SharedColor8Nearest as usize] =
-            color_formats.internal.bytes_per_pixel() *
-            config.color8_nearest_texture_size * config.color8_nearest_texture_size;
-
+    fn new(color_formats: TextureFormatPair<ImageFormat>) -> Self {
         Self {
-            alpha8_linear,
-            alpha8_glyphs,
-            alpha16_linear,
-            color8_linear,
-            color8_glyphs,
-            color8_nearest,
-            bytes_per_texture_of_type,
+            // Used primarily for cached shadow masks. There can be lots of
+            // these on some pages like francine, but most pages don't use it
+            // much.
+            array_alpha8_linear: TextureArray::new(
+                TextureFormatPair::from(ImageFormat::R8),
+                TextureFilter::Linear,
+                4,
+            ),
+            // Used for experimental hdr yuv texture support, but not used in
+            // production Firefox.
+            array_alpha16_linear: TextureArray::new(
+                TextureFormatPair::from(ImageFormat::R16),
+                TextureFilter::Linear,
+                1,
+            ),
+            // The primary cache for images, glyphs, etc.
+            array_color8_linear: TextureArray::new(
+                color_formats.clone(),
+                TextureFilter::Linear,
+                16,
+            ),
+            // Used for image-rendering: crisp. This is mostly favicons, which
+            // are small. Some other images use it too, but those tend to be
+            // larger than 512x512 and thus don't use the shared cache anyway.
+            array_color8_nearest: TextureArray::new(
+                color_formats,
+                TextureFilter::Nearest,
+                1,
+            ),
         }
     }
 
     /// Clears each texture in the set, with the given set of pending updates.
     fn clear(&mut self, updates: &mut TextureUpdateList) {
-        let texture_dealloc_cb = &mut |texture_id| {
-            updates.push_free(texture_id);
-        };
-
-        self.alpha8_linear.clear(texture_dealloc_cb);
-        self.alpha8_glyphs.clear(texture_dealloc_cb);
-        self.alpha16_linear.clear(texture_dealloc_cb);
-        self.color8_linear.clear(texture_dealloc_cb);
-        self.color8_nearest.clear(texture_dealloc_cb);
-        self.color8_glyphs.clear(texture_dealloc_cb);
+        self.array_alpha8_linear.clear(updates);
+        self.array_alpha16_linear.clear(updates);
+        self.array_color8_linear.clear(updates);
+        self.array_color8_nearest.clear(updates);
     }
 
     /// Returns a mutable borrow for the shared texture array matching the parameters.
     fn select(
-        &mut self, external_format: ImageFormat, filter: TextureFilter, shader: TargetShader,
-    ) -> (&mut dyn AtlasAllocatorList<TextureParameters>, BudgetType) {
+        &mut self, external_format: ImageFormat, filter: TextureFilter
+    ) -> &mut TextureArray {
         match external_format {
             ImageFormat::R8 => {
                 assert_eq!(filter, TextureFilter::Linear);
-                match shader {
-                    TargetShader::Text => {
-                        (&mut self.alpha8_glyphs, BudgetType::SharedAlpha8Glyphs)
-                    },
-                    _ => (&mut self.alpha8_linear, BudgetType::SharedAlpha8),
-                }
+                &mut self.array_alpha8_linear
             }
             ImageFormat::R16 => {
                 assert_eq!(filter, TextureFilter::Linear);
-                (&mut self.alpha16_linear, BudgetType::SharedAlpha16)
+                &mut self.array_alpha16_linear
             }
             ImageFormat::RGBA8 |
             ImageFormat::BGRA8 => {
-                match (filter, shader) {
-                    (TextureFilter::Linear, TargetShader::Text) => {
-                        (&mut self.color8_glyphs, BudgetType::SharedColor8Glyphs)
-                    },
-                    (TextureFilter::Linear, _) => {
-                        (&mut self.color8_linear, BudgetType::SharedColor8Linear)
-                    },
-                    (TextureFilter::Nearest, _) => {
-                        (&mut self.color8_nearest, BudgetType::SharedColor8Nearest)
-                    },
-                    _ => panic!("Unexpected filter {:?}", filter),
+                match filter {
+                    TextureFilter::Linear => &mut self.array_color8_linear,
+                    TextureFilter::Nearest => &mut self.array_color8_nearest,
+                    _ => panic!("Unexpexcted filter {:?}", filter),
                 }
             }
             _ => panic!("Unexpected format {:?}", external_format),
         }
     }
-
-    /// How many bytes a single texture of the given type takes up, for the
-    /// configured texture sizes.
-    fn bytes_per_shared_texture(&self, budget_type: BudgetType) -> usize {
-        self.bytes_per_texture_of_type[budget_type as usize] as usize
-    }
-}
-
-/// The textures used to hold picture cache tiles.
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-struct PictureTexture {
-    texture_id: CacheTextureId,
-    size: DeviceIntSize,
-    format: ImageFormat,
-    is_allocated: bool,
-    last_frame_used: FrameId,
-}
-
-impl PictureTexture {
-    fn size_in_bytes(&self) -> usize {
-        let bpp = self.format.bytes_per_pixel() as usize;
-        (self.size.width * self.size.height) as usize * bpp
-    }
 }
 
-/// The textures used to hold picture cache tiles.
+/// The texture arrays used to hold picture cache tiles.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 struct PictureTextures {
-    /// Current list of textures in the pool
-    textures: Vec<PictureTexture>,
-    /// Default tile size for content tiles
-    default_tile_size: DeviceIntSize,
-    /// Number of currently allocated textures in the pool
-    allocated_texture_count: usize,
+    textures: Vec<WholeTextureArray>,
 }
 
 impl PictureTextures {
     fn new(
-        default_tile_size: DeviceIntSize,
+        initial_window_size: DeviceIntSize,
+        picture_tile_sizes: &[DeviceIntSize],
+        next_texture_id: &mut CacheTextureId,
+        pending_updates: &mut TextureUpdateList,
     ) -> Self {
-        PictureTextures {
-            textures: Vec::new(),
-            default_tile_size,
-            allocated_texture_count: 0,
+        let mut textures = Vec::new();
+        for tile_size in picture_tile_sizes {
+            // TODO(gw): The way initial size is used here may allocate a lot of memory once
+            //           we are using multiple slice sizes. Do some measurements once we
+            //           have multiple slices here and adjust the calculations as required.
+            let num_x = (initial_window_size.width + tile_size.width - 1) / tile_size.width;
+            let num_y = (initial_window_size.height + tile_size.height - 1) / tile_size.height;
+            let mut slice_count = (num_x * num_y).max(1).min(16) as usize;
+            if slice_count < 4 {
+                // On some platforms we get bogus (1x1) initial window size. The first real frame will then
+                // reallocate many more picture cache slices. Don't bother preallocating in that case.
+                slice_count = 0;
+            }
+
+            if slice_count == 0 {
+                continue;
+            }
+
+            let texture = WholeTextureArray {
+                size: *tile_size,
+                filter: TextureFilter::Nearest,
+                format: PICTURE_TILE_FORMAT,
+                texture_id: *next_texture_id,
+                slices: vec![WholeTextureSlice { uv_rect_handle: None }; slice_count],
+                has_depth: true,
+            };
+
+            next_texture_id.0 += 1;
+
+            pending_updates.push_alloc(texture.texture_id, texture.to_info());
+
+            textures.push(texture);
         }
+
+        PictureTextures { textures }
     }
 
     fn get_or_allocate_tile(
@@ -559,143 +357,75 @@ impl PictureTextures {
         next_texture_id: &mut CacheTextureId,
         pending_updates: &mut TextureUpdateList,
     ) -> CacheEntry {
-        let mut texture_id = None;
-        self.allocated_texture_count += 1;
+        let texture_index = self.textures
+            .iter()
+            .position(|texture| { texture.size == tile_size })
+            .unwrap_or(self.textures.len());
 
-        for texture in &mut self.textures {
-            if texture.size == tile_size && !texture.is_allocated {
-                // Found a target that's not currently in use which matches. Update
-                // the last_frame_used for GC purposes.
-                texture.is_allocated = true;
-                texture.last_frame_used = FrameId::INVALID;
-                texture_id = Some(texture.texture_id);
-                break;
-            }
-        }
-
-        // Need to create a new render target and add it to the pool
-
-        let texture_id = texture_id.unwrap_or_else(|| {
-            let texture_id = *next_texture_id;
-            next_texture_id.0 += 1;
-
-            // Push a command to allocate device storage of the right size / format.
-            let info = TextureCacheAllocInfo {
-                target: ImageBufferKind::Texture2D,
-                width: tile_size.width,
-                height: tile_size.height,
-                format: ImageFormat::RGBA8,
+        if texture_index == self.textures.len() {
+            self.textures.push(WholeTextureArray {
+                size: tile_size,
                 filter: TextureFilter::Nearest,
-                is_shared_cache: false,
+                format: PICTURE_TILE_FORMAT,
+                texture_id: *next_texture_id,
+                slices: Vec::new(),
                 has_depth: true,
-            };
-
-            pending_updates.push_alloc(texture_id, info);
-
-            self.textures.push(PictureTexture {
-                texture_id,
-                is_allocated: true,
-                format: ImageFormat::RGBA8,
-                size: tile_size,
-                last_frame_used: FrameId::INVALID,
             });
-
-            texture_id
-        });
-
-        CacheEntry {
-            size: tile_size,
-            user_data: [0.0; 4],
-            last_access: now,
-            details: EntryDetails::Picture {
-                size: tile_size,
-            },
-            uv_rect_handle: GpuCacheHandle::new(),
-            input_format: ImageFormat::RGBA8,
-            filter: TextureFilter::Nearest,
-            swizzle: Swizzle::default(),
-            texture_id,
-            eviction_notice: None,
-            uv_rect_kind: UvRectKind::Rect,
-            shader: TargetShader::Default,
+            next_texture_id.0 += 1;
         }
-    }
 
-    fn free_tile(
-        &mut self,
-        id: CacheTextureId,
-        current_frame_id: FrameId,
-    ) {
-        self.allocated_texture_count -= 1;
+        let texture = &mut self.textures[texture_index];
 
-        let texture = self.textures
-            .iter_mut()
-            .find(|t| t.texture_id == id)
-            .expect("bug: invalid texture id");
+        let layer_index = match texture.find_free() {
+            Some(index) => index,
+            None => {
+                let was_empty = texture.slices.is_empty();
+                let index = texture.grow(PICTURE_TEXTURE_ADD_SLICES);
+                let info = texture.to_info();
+                if was_empty {
+                    pending_updates.push_alloc(texture.texture_id, info);
+                } else {
+                    pending_updates.push_realloc(texture.texture_id, info);
+                }
 
-        assert!(texture.is_allocated);
-        texture.is_allocated = false;
+                index
+            },
+        };
+
+        texture.occupy(texture_index, layer_index, now)
+    }
 
-        assert_eq!(texture.last_frame_used, FrameId::INVALID);
-        texture.last_frame_used = current_frame_id;
+    fn get(&mut self, index: usize) -> &mut WholeTextureArray {
+        &mut self.textures[index]
     }
 
     fn clear(&mut self, pending_updates: &mut TextureUpdateList) {
-        for texture in self.textures.drain(..) {
-            pending_updates.push_free(texture.texture_id);
+        for texture in &mut self.textures {
+            if texture.slices.is_empty() {
+                continue;
+            }
+
+            if let Some(texture_id) = texture.reset(PICTURE_TEXTURE_ADD_SLICES) {
+                pending_updates.push_reset(texture_id, texture.to_info());
+            }
         }
     }
 
-    fn update_profile(&self, profile: &mut TransactionProfile) {
-        // For now, this profile counter just accumulates the tiles and bytes
-        // from all picture cache textures.
-        let mut picture_tiles = 0;
+    fn update_profile(&self, profile: &mut ResourceProfileCounter) {
+        // For now, this profile counter just accumulates the slices and bytes
+        // from all picture cache texture arrays.
+        let mut picture_slices = 0;
         let mut picture_bytes = 0;
         for texture in &self.textures {
-            picture_tiles += 1;
+            picture_slices += texture.slices.len();
             picture_bytes += texture.size_in_bytes();
         }
-        profile.set(profiler::PICTURE_TILES, picture_tiles);
-        profile.set(profiler::PICTURE_TILES_MEM, profiler::bytes_to_mb(picture_bytes));
+        profile.set(picture_slices, picture_bytes);
     }
 
-    /// Simple garbage collect of picture cache tiles
-    fn gc(
-        &mut self,
-        pending_updates: &mut TextureUpdateList,
-    ) {
-        // Allow the picture cache pool to keep 25% of the current allocated tile count
-        // as free textures to be reused. This ensures the allowed tile count is appropriate
-        // based on current window size.
-        let free_texture_count = self.textures.len() - self.allocated_texture_count;
-        let allowed_retained_count = (self.allocated_texture_count as f32 * 0.25).ceil() as usize;
-        let do_gc = free_texture_count > allowed_retained_count;
-
-        if do_gc {
-            // Sort the current pool by age, so that we remove oldest textures first
-            self.textures.sort_unstable_by_key(|t| cmp::Reverse(t.last_frame_used));
-
-            // We can't just use retain() because `PictureTexture` requires manual cleanup.
-            let mut allocated_targets = SmallVec::<[PictureTexture; 32]>::new();
-            let mut retained_targets = SmallVec::<[PictureTexture; 32]>::new();
-
-            for target in self.textures.drain(..) {
-                if target.is_allocated {
-                    // Allocated targets can't be collected
-                    allocated_targets.push(target);
-                } else if retained_targets.len() < allowed_retained_count {
-                    // Retain the most recently used targets up to the allowed count
-                    retained_targets.push(target);
-                } else {
-                    // The rest of the targets get freed
-                    assert_ne!(target.last_frame_used, FrameId::INVALID);
-                    pending_updates.push_free(target.texture_id);
-                }
-            }
-
-            self.textures.extend(retained_targets);
-            self.textures.extend(allocated_targets);
-        }
+    #[cfg(feature = "replay")]
+    fn tile_sizes(&self) -> Vec<DeviceIntSize> {
+        self.textures.iter().map(|pt| pt.size).collect()
     }
 }
 
@@ -703,33 +433,8 @@ impl PictureTextures {
 struct CacheAllocParams {
     descriptor: ImageDescriptor,
     filter: TextureFilter,
-    user_data: [f32; 4],
+    user_data: [f32; 3],
     uv_rect_kind: UvRectKind,
-    shader: TargetShader,
-}
-
-/// Startup parameters for the texture cache.
-///
-/// Texture sizes must be at least 512.
-#[derive(Clone)]
-pub struct TextureCacheConfig {
-    pub color8_linear_texture_size: i32,
-    pub color8_nearest_texture_size: i32,
-    pub color8_glyph_texture_size: i32,
-    pub alpha8_texture_size: i32,
-    pub alpha8_glyph_texture_size: i32,
-    pub alpha16_texture_size: i32,
-}
-
-impl TextureCacheConfig {
-    pub const DEFAULT: Self = TextureCacheConfig {
-        color8_linear_texture_size: 2048,
-        color8_nearest_texture_size: 512,
-        color8_glyph_texture_size: 2048,
-        alpha8_texture_size: 1024,
-        alpha8_glyph_texture_size: 2048,
-        alpha16_texture_size: 512,
-    };
 }
 
 /// General-purpose manager for images in GPU memory. This includes images,
@@ -761,9 +466,8 @@ pub struct TextureCache {
     /// Maximum texture size supported by hardware.
     max_texture_size: i32,
 
-    /// Maximum texture size before it is considered preferable to break the
-    /// texture into tiles.
-    tiling_threshold: i32,
+    /// Maximum number of texture layers supported by hardware.
+    max_texture_layers: usize,
 
     /// Settings on using texture unit swizzling.
     swizzle: Option<SwizzleSettings>,
@@ -782,43 +486,79 @@ pub struct TextureCache {
     /// The current `FrameStamp`. Used for cache eviction policies.
     now: FrameStamp,
 
-    /// Cache of texture cache handles with automatic lifetime management, evicted
-    /// in a least-recently-used order.
-    lru_cache: LRUCache<CacheEntry, AutoCacheEntryMarker>,
+    /// List of picture cache entries. These are maintained separately from regular
+    /// texture cache entries.
+    picture_cache_handles: Vec<FreeListHandle<CacheEntryMarker>>,
 
-    /// Cache of picture cache entries.
-    picture_cache_entries: FreeList<CacheEntry, PictureCacheEntryMarker>,
+    /// Cache of texture cache handles with automatic lifetime management, evicted
+    /// in a least-recently-used order (except those entries with manual eviction enabled).
+    lru_cache: LRUCache<CacheEntry, CacheEntryMarker>,
 
-    /// Strong handles for the picture_cache_entries FreeList.
-    picture_cache_handles: Vec<FreeListHandle<PictureCacheEntryMarker>>,
+    /// A list of texture cache handles that have been set to explicitly have manual
+    /// eviction policy enabled. The handles reference cache entries in the lru_cache
+    /// above, but have opted in to manual lifetime management.
+    manual_handles: Vec<FreeListHandle<CacheEntryMarker>>,
 
-    /// Cache of texture cache entries with manual liftime management.
-    manual_entries: FreeList<CacheEntry, ManualCacheEntryMarker>,
+    /// Estimated memory usage of allocated entries in all of the shared textures. This
+    /// is used to decide when to evict old items from the cache.
+    shared_bytes_allocated: usize,
 
-    /// Strong handles for the manual_entries FreeList.
-    manual_handles: Vec<FreeListHandle<ManualCacheEntryMarker>>,
+    /// Number of bytes allocated in standalone textures. Used as an input to deciding
+    /// when to run texture cache eviction.
+    standalone_bytes_allocated: usize,
 
-    /// Memory usage of allocated entries in all of the shared or standalone
-    /// textures. Includes both manually and automatically evicted entries.
-    bytes_allocated: [usize ; BudgetType::COUNT],
-}
+    /// If the total bytes allocated in shared / standalone cache is less
+    /// than this, then allow the cache to grow without forcing an eviction.
+    // TODO(gw): In future, it's probably reasonable to make this higher again, perhaps 64-128 MB.
+    eviction_threshold_bytes: usize,
 
-impl TextureCache {
     /// The maximum number of items that will be evicted per frame. This limit helps avoid jank
     /// on frames where we want to evict a large number of items. Instead, we'd prefer to drop
     /// the items incrementally over a number of frames, even if that means the total allocated
     /// size of the cache is above the desired threshold for a small number of frames.
-    const MAX_EVICTIONS_PER_FRAME: usize = 32;
+    max_evictions_per_frame: usize,
+}
 
+impl TextureCache {
     pub fn new(
         max_texture_size: i32,
-        tiling_threshold: i32,
-        default_picture_tile_size: DeviceIntSize,
+        mut max_texture_layers: usize,
+        picture_tile_sizes: &[DeviceIntSize],
+        initial_size: DeviceIntSize,
         color_formats: TextureFormatPair<ImageFormat>,
         swizzle: Option<SwizzleSettings>,
-        config: &TextureCacheConfig,
+        eviction_threshold_bytes: usize,
+        max_evictions_per_frame: usize,
     ) -> Self {
-        let pending_updates = TextureUpdateList::new();
+        // On MBP integrated Intel GPUs, texture arrays appear to be
+        // implemented as a single texture of stacked layers, and that
+        // texture appears to be subject to the texture size limit. As such,
+        // allocating more than 32 512x512 regions results in a dimension
+        // longer than 16k (the max texture size), causing incorrect behavior.
+        //
+        // So we clamp the number of layers on mac. This results in maximum
+        // texture array size of 32MB, which isn't ideal but isn't terrible
+        // either. OpenGL on mac is not long for this earth, so this may be
+        // good enough until we have WebRender on gfx-rs (on Metal).
+        //
+        // On all platforms, we also clamp the number of textures per layer to 16
+        // to avoid the cost of resizing large texture arrays (at the expense
+        // of batching efficiency).
+        //
+        // Note that we could also define this more generally in terms of
+        // |max_texture_size / TEXTURE_REGION_DIMENSION|, except:
+        //   * max_texture_size is actually clamped beyond the device limit
+        //     by Gecko to 8192, so we'd need to thread the raw device value
+        //     here, and:
+        //   * The bug we're working around is likely specific to a single
+        //     driver family, and those drivers are also likely to share
+        //     the same max texture size of 16k. If we do encounter a driver
+        //     with the same bug but a lower max texture size, we might need
+        //     to rethink our strategy anyway, since a limit below 32MB might
+        //     start to introduce performance issues.
+        max_texture_layers = max_texture_layers.min(16);
+
+        let mut pending_updates = TextureUpdateList::new();
 
         // Shared texture cache controls swizzling on a per-entry basis, assuming that
         // the texture as a whole doesn't need to be swizzled (but only some entries do).
@@ -827,26 +567,30 @@ impl TextureCache {
             swizzle.map_or(true, |s| s.bgra8_sampling_swizzle == Swizzle::default())
         );
 
-        let next_texture_id = CacheTextureId(1);
+        let mut next_texture_id = CacheTextureId(1);
 
         TextureCache {
-            shared_textures: SharedTextures::new(color_formats, config),
+            shared_textures: SharedTextures::new(color_formats),
             picture_textures: PictureTextures::new(
-                default_picture_tile_size,
+                initial_size,
+                picture_tile_sizes,
+                &mut next_texture_id,
+                &mut pending_updates,
             ),
             max_texture_size,
-            tiling_threshold,
+            max_texture_layers,
             swizzle,
             debug_flags: DebugFlags::empty(),
             next_id: next_texture_id,
             pending_updates,
             now: FrameStamp::INVALID,
-            lru_cache: LRUCache::new(BudgetType::COUNT),
-            picture_cache_entries: FreeList::new(),
+            lru_cache: LRUCache::new(),
+            shared_bytes_allocated: 0,
+            standalone_bytes_allocated: 0,
             picture_cache_handles: Vec::new(),
-            manual_entries: FreeList::new(),
             manual_handles: Vec::new(),
-            bytes_allocated: [0 ; BudgetType::COUNT],
+            eviction_threshold_bytes,
+            max_evictions_per_frame,
         }
     }
 
@@ -856,19 +600,22 @@ impl TextureCache {
     #[cfg(test)]
     pub fn new_for_testing(
         max_texture_size: i32,
+        max_texture_layers: usize,
         image_format: ImageFormat,
     ) -> Self {
         let mut cache = Self::new(
             max_texture_size,
-            max_texture_size,
-            crate::picture::TILE_SIZE_DEFAULT,
+            max_texture_layers,
+            &[],
+            DeviceIntSize::zero(),
             TextureFormatPair::from(image_format),
             None,
-            &TextureCacheConfig::DEFAULT,
+            64 * 1024 * 1024,
+            32,
         );
         let mut now = FrameStamp::first(DocumentId::new(IdNamespace(1), 1));
         now.advance();
-        cache.begin_frame(now, &mut TransactionProfile::new());
+        cache.begin_frame(now);
         cache
     }
 
@@ -885,8 +632,7 @@ impl TextureCache {
             Vec::new(),
         );
         for handle in manual_handles {
-            let entry = self.manual_entries.free(handle);
-            self.evict_impl(entry);
+            self.evict_impl(handle);
         }
 
         // Evict all picture cache handles
@@ -895,16 +641,13 @@ impl TextureCache {
             Vec::new(),
         );
         for handle in picture_handles {
-            let entry = self.picture_cache_entries.free(handle);
-            self.evict_impl(entry);
+            self.evict_impl(handle);
         }
 
         // Evict all auto (LRU) cache handles
-        for budget_type in BudgetType::iter() {
-            while let Some(entry) = self.lru_cache.pop_oldest(budget_type as u8) {
-                entry.evict();
-                self.free(&entry);
-            }
+        while let Some(entry) = self.lru_cache.pop_oldest() {
+            entry.evict();
+            self.free(&entry);
         }
 
         // Free the picture and shared textures
@@ -914,71 +657,40 @@ impl TextureCache {
     }
 
     /// Called at the beginning of each frame.
-    pub fn begin_frame(&mut self, stamp: FrameStamp, profile: &mut TransactionProfile) {
+    pub fn begin_frame(&mut self, stamp: FrameStamp) {
         debug_assert!(!self.now.is_valid());
         profile_scope!("begin_frame");
         self.now = stamp;
 
         // Texture cache eviction is done at the start of the frame. This ensures that
         // we won't evict items that have been requested on this frame.
-        // It also frees up space in the cache for items allocated later in the frame
-        // potentially reducing texture allocations and fragmentation.
-        self.evict_items_from_cache_if_required(profile);
-        self.expire_old_picture_cache_tiles();
+        self.evict_items_from_cache_if_required();
     }
 
-    pub fn end_frame(&mut self, profile: &mut TransactionProfile) {
+    pub fn end_frame(&mut self, texture_cache_profile: &mut TextureCacheProfileCounters) {
         debug_assert!(self.now.is_valid());
-        self.picture_textures.gc(
-            &mut self.pending_updates,
-        );
-
-        let updates = &mut self.pending_updates; // To avoid referring to self in the closure.
-        let callback = &mut|texture_id| { updates.push_free(texture_id); };
+        self.expire_old_picture_cache_tiles();
 
         // Release of empty shared textures is done at the end of the frame. That way, if the
         // eviction at the start of the frame frees up a texture, that is then subsequently
         // used during the frame, we avoid doing a free/alloc for it.
-        self.shared_textures.alpha8_linear.release_empty_textures(callback);
-        self.shared_textures.alpha8_glyphs.release_empty_textures(callback);
-        self.shared_textures.alpha16_linear.release_empty_textures(callback);
-        self.shared_textures.color8_linear.release_empty_textures(callback);
-        self.shared_textures.color8_nearest.release_empty_textures(callback);
-        self.shared_textures.color8_glyphs.release_empty_textures(callback);
-
-        for budget in BudgetType::iter() {
-            let threshold = self.get_eviction_threshold(budget);
-            let pressure = self.bytes_allocated[budget as usize] as f32 / threshold as f32;
-            profile.set(BudgetType::PRESSURE_COUNTERS[budget as usize], pressure);
-        }
-
-        profile.set(profiler::TEXTURE_CACHE_A8_PIXELS, self.shared_textures.alpha8_linear.allocated_space());
-        profile.set(profiler::TEXTURE_CACHE_A8_TEXTURES, self.shared_textures.alpha8_linear.allocated_textures());
-        profile.set(profiler::TEXTURE_CACHE_A8_GLYPHS_PIXELS, self.shared_textures.alpha8_glyphs.allocated_space());
-        profile.set(profiler::TEXTURE_CACHE_A8_GLYPHS_TEXTURES, self.shared_textures.alpha8_glyphs.allocated_textures());
-        profile.set(profiler::TEXTURE_CACHE_A16_PIXELS, self.shared_textures.alpha16_linear.allocated_space());
-        profile.set(profiler::TEXTURE_CACHE_A16_TEXTURES, self.shared_textures.alpha16_linear.allocated_textures());
-        profile.set(profiler::TEXTURE_CACHE_RGBA8_LINEAR_PIXELS, self.shared_textures.color8_linear.allocated_space());
-        profile.set(profiler::TEXTURE_CACHE_RGBA8_LINEAR_TEXTURES, self.shared_textures.color8_linear.allocated_textures());
-        profile.set(profiler::TEXTURE_CACHE_RGBA8_NEAREST_PIXELS, self.shared_textures.color8_nearest.allocated_space());
-        profile.set(profiler::TEXTURE_CACHE_RGBA8_NEAREST_TEXTURES, self.shared_textures.color8_nearest.allocated_textures());
-        profile.set(profiler::TEXTURE_CACHE_RGBA8_GLYPHS_PIXELS, self.shared_textures.color8_glyphs.allocated_space());
-        profile.set(profiler::TEXTURE_CACHE_RGBA8_GLYPHS_TEXTURES, self.shared_textures.color8_glyphs.allocated_textures());
-
-        self.picture_textures.update_profile(profile);
-
-        let shared_bytes = [
-            BudgetType::SharedColor8Linear,
-            BudgetType::SharedColor8Nearest,
-            BudgetType::SharedColor8Glyphs,
-            BudgetType::SharedAlpha8,
-            BudgetType::SharedAlpha8Glyphs,
-            BudgetType::SharedAlpha16,
-        ].iter().map(|b| self.bytes_allocated[*b as usize]).sum();
-        let standalone_bytes = self.bytes_allocated[BudgetType::Standalone as usize];
-
-        profile.set(profiler::TEXTURE_CACHE_SHARED_MEM, profiler::bytes_to_mb(shared_bytes));
-        profile.set(profiler::TEXTURE_CACHE_STANDALONE_MEM, profiler::bytes_to_mb(standalone_bytes));
+        self.shared_textures.array_alpha8_linear.release_empty_textures(&mut self.pending_updates);
+        self.shared_textures.array_alpha16_linear.release_empty_textures(&mut self.pending_updates);
+        self.shared_textures.array_color8_linear.release_empty_textures(&mut self.pending_updates);
+        self.shared_textures.array_color8_nearest.release_empty_textures(&mut self.pending_updates);
+
+        self.shared_textures.array_alpha8_linear
+            .update_profile(&mut texture_cache_profile.pages_alpha8_linear);
+        self.shared_textures.array_alpha16_linear
+            .update_profile(&mut texture_cache_profile.pages_alpha16_linear);
+        self.shared_textures.array_color8_linear
+            .update_profile(&mut texture_cache_profile.pages_color8_linear);
+        self.shared_textures.array_color8_nearest
+            .update_profile(&mut texture_cache_profile.pages_color8_nearest);
+        self.picture_textures
+            .update_profile(&mut texture_cache_profile.pages_picture);
+        texture_cache_profile.shared_bytes.set(self.shared_bytes_allocated);
+        texture_cache_profile.standalone_bytes.set(self.standalone_bytes_allocated);
 
         self.now = FrameStamp::INVALID;
     }
@@ -992,45 +704,15 @@ impl TextureCache {
     // texture cache (either never uploaded, or has been
     // evicted on a previous frame).
     pub fn request(&mut self, handle: &TextureCacheHandle, gpu_cache: &mut GpuCache) -> bool {
-        let now = self.now;
-        let entry = match handle {
-            TextureCacheHandle::Empty => None,
-            TextureCacheHandle::Picture(handle) => {
-                self.picture_cache_entries.get_opt_mut(handle)
-            },
-            TextureCacheHandle::Auto(handle) => {
-                // Call touch rather than get_opt_mut so that the LRU index
-                // knows that the entry has been used.
-                self.lru_cache.touch(handle)
-            },
-            TextureCacheHandle::Manual(handle) => {
-                self.manual_entries.get_opt_mut(handle)
-            },
-        };
-        entry.map_or(true, |entry| {
+        match self.lru_cache.touch(handle) {
             // If an image is requested that is already in the cache,
             // refresh the GPU cache data associated with this item.
-            entry.last_access = now;
-            entry.update_gpu_cache(gpu_cache);
-            false
-        })
-    }
-
-    fn get_entry_opt(&self, handle: &TextureCacheHandle) -> Option<&CacheEntry> {
-        match handle {
-            TextureCacheHandle::Empty => None,
-            TextureCacheHandle::Picture(handle) => self.picture_cache_entries.get_opt(handle),
-            TextureCacheHandle::Auto(handle) => self.lru_cache.get_opt(handle),
-            TextureCacheHandle::Manual(handle) => self.manual_entries.get_opt(handle),
-        }
-    }
-
-    fn get_entry_opt_mut(&mut self, handle: &TextureCacheHandle) -> Option<&mut CacheEntry> {
-        match handle {
-            TextureCacheHandle::Empty => None,
-            TextureCacheHandle::Picture(handle) => self.picture_cache_entries.get_opt_mut(handle),
-            TextureCacheHandle::Auto(handle) => self.lru_cache.get_opt_mut(handle),
-            TextureCacheHandle::Manual(handle) => self.manual_entries.get_opt_mut(handle),
+            Some(entry) => {
+                entry.last_access = self.now;
+                entry.update_gpu_cache(gpu_cache);
+                false
+            }
+            None => true,
         }
     }
 
@@ -1038,20 +720,26 @@ impl TextureCache {
     // texture cache (either never uploaded, or has been
     // evicted on a previous frame).
     pub fn needs_upload(&self, handle: &TextureCacheHandle) -> bool {
-        !self.is_allocated(handle)
+        self.lru_cache.get_opt(handle).is_none()
     }
 
     pub fn max_texture_size(&self) -> i32 {
         self.max_texture_size
     }
 
-    pub fn tiling_threshold(&self) -> i32 {
-        self.tiling_threshold
+    #[cfg(feature = "replay")]
+    pub fn max_texture_layers(&self) -> usize {
+        self.max_texture_layers
+    }
+
+    #[cfg(feature = "replay")]
+    pub fn picture_tile_sizes(&self) -> Vec<DeviceIntSize> {
+        self.picture_textures.tile_sizes()
     }
 
     #[cfg(feature = "replay")]
     pub fn color_formats(&self) -> TextureFormatPair<ImageFormat> {
-        self.shared_textures.color8_linear.texture_parameters().formats.clone()
+        self.shared_textures.array_color8_linear.formats.clone()
     }
 
     #[cfg(feature = "replay")]
@@ -1059,6 +747,16 @@ impl TextureCache {
         self.swizzle
     }
 
+    #[cfg(feature = "replay")]
+    pub fn eviction_threshold_bytes(&self) -> usize {
+        self.eviction_threshold_bytes
+    }
+
+    #[cfg(feature = "replay")]
+    pub fn max_evictions_per_frame(&self) -> usize {
+        self.max_evictions_per_frame
+    }
+
     pub fn pending_updates(&mut self) -> TextureUpdateList {
         mem::replace(&mut self.pending_updates, TextureUpdateList::new())
     }
@@ -1070,22 +768,22 @@ impl TextureCache {
         descriptor: ImageDescriptor,
         filter: TextureFilter,
         data: Option<CachedImageData>,
-        user_data: [f32; 4],
+        user_data: [f32; 3],
         mut dirty_rect: ImageDirtyRect,
         gpu_cache: &mut GpuCache,
         eviction_notice: Option<&EvictionNotice>,
         uv_rect_kind: UvRectKind,
         eviction: Eviction,
-        shader: TargetShader,
     ) {
         debug_assert!(self.now.is_valid());
+
         // Determine if we need to allocate texture cache memory
         // for this item. We need to reallocate if any of the following
         // is true:
         // - Never been in the cache
         // - Has been in the cache but was evicted.
         // - Exists in the cache but dimensions / format have changed.
-        let realloc = match self.get_entry_opt(handle) {
+        let realloc = match self.lru_cache.get_opt(handle) {
             Some(entry) => {
                 entry.size != descriptor.size || (entry.input_format != descriptor.format &&
                     entry.alternative_input_format() != descriptor.format)
@@ -1097,15 +795,22 @@ impl TextureCache {
         };
 
         if realloc {
-            let params = CacheAllocParams { descriptor, filter, user_data, uv_rect_kind, shader };
-            self.allocate(&params, handle, eviction);
+            let params = CacheAllocParams { descriptor, filter, user_data, uv_rect_kind };
+            self.allocate(&params, handle);
 
             // If we reallocated, we need to upload the whole item again.
             dirty_rect = DirtyRect::All;
         }
 
-        let entry = self.get_entry_opt_mut(handle)
-            .expect("BUG: There must be an entry at this handle now");
+        // Update eviction policy (this is a no-op if it hasn't changed)
+        if eviction == Eviction::Manual {
+            if let Some(manual_handle) = self.lru_cache.set_manual_eviction(handle) {
+                self.manual_handles.push(manual_handle);
+            }
+        }
+
+        let entry = self.lru_cache.get_opt_mut(handle)
+            .expect("BUG: handle must be valid now");
 
         // Install the new eviction notice for this update, if applicable.
         entry.eviction_notice = eviction_notice.cloned();
@@ -1127,32 +832,31 @@ impl TextureCache {
             // If the swizzling is supported, we always upload in the internal
             // texture format (thus avoiding the conversion by the driver).
             // Otherwise, pass the external format to the driver.
-            let origin = entry.details.describe();
-            let texture_id = entry.texture_id;
-            let size = entry.size;
             let use_upload_format = self.swizzle.is_none();
+            let (layer_index, origin) = entry.details.describe();
             let op = TextureCacheUpdate::new_update(
                 data,
                 &descriptor,
                 origin,
-                size,
+                entry.size,
+                layer_index as i32,
                 use_upload_format,
                 &dirty_rect,
             );
-            self.pending_updates.push_update(texture_id, op);
+            self.pending_updates.push_update(entry.texture_id, op);
         }
     }
 
     // Check if a given texture handle has a valid allocation
     // in the texture cache.
     pub fn is_allocated(&self, handle: &TextureCacheHandle) -> bool {
-        self.get_entry_opt(handle).is_some()
+        self.lru_cache.get_opt(handle).is_some()
     }
 
     // Check if a given texture handle was last used as recently
     // as the specified number of previous frames.
     pub fn is_recently_used(&self, handle: &TextureCacheHandle, margin: usize) -> bool {
-        self.get_entry_opt(handle).map_or(false, |entry| {
+        self.lru_cache.get_opt(handle).map_or(false, |entry| {
             entry.last_access.frame_id() + margin >= self.now.frame_id()
         })
     }
@@ -1160,7 +864,7 @@ impl TextureCache {
     // Return the allocated size of the texture handle's associated data,
     // or otherwise indicate the handle is invalid.
     pub fn get_allocated_size(&self, handle: &TextureCacheHandle) -> Option<usize> {
-        self.get_entry_opt(handle).map(|entry| {
+        self.lru_cache.get_opt(handle).map(|entry| {
             (entry.input_format.bytes_per_pixel() * entry.size.area()) as usize
         })
     }
@@ -1171,15 +875,12 @@ impl TextureCache {
     // This function will assert in debug modes if the caller
     // tries to get a handle that was not requested this frame.
     pub fn get(&self, handle: &TextureCacheHandle) -> CacheItem {
-        let (texture_id, uv_rect, swizzle, uv_rect_handle, user_data) = self.get_cache_location(handle);
+        let (texture_id, layer_index, uv_rect, swizzle, uv_rect_handle) = self.get_cache_location(handle);
         CacheItem {
             uv_rect_handle,
-            texture_id: TextureSource::TextureCache(
-                texture_id,
-                swizzle,
-            ),
+            texture_id: TextureSource::TextureCache(texture_id, swizzle),
             uv_rect,
-            user_data,
+            texture_layer: layer_index as i32,
         }
     }
 
@@ -1191,72 +892,43 @@ impl TextureCache {
     pub fn get_cache_location(
         &self,
         handle: &TextureCacheHandle,
-    ) -> (CacheTextureId, DeviceIntRect, Swizzle, GpuCacheHandle, [f32; 4]) {
-        let entry = self
-            .get_entry_opt(handle)
+    ) -> (CacheTextureId, LayerIndex, DeviceIntRect, Swizzle, GpuCacheHandle) {
+        let entry = self.lru_cache
+            .get_opt(handle)
             .expect("BUG: was dropped from cache or not updated!");
         debug_assert_eq!(entry.last_access, self.now);
-        let origin = entry.details.describe();
-        (
-            entry.texture_id,
-            DeviceIntRect::new(origin, entry.size),
-            entry.swizzle,
-            entry.uv_rect_handle,
-            entry.user_data,
-        )
+        let (layer_index, origin) = entry.details.describe();
+        (entry.texture_id,
+         layer_index as usize,
+         DeviceIntRect::new(origin, entry.size),
+         entry.swizzle,
+         entry.uv_rect_handle)
     }
 
     /// Internal helper function to evict a strong texture cache handle
     fn evict_impl(
         &mut self,
-        entry: CacheEntry,
+        handle: FreeListHandle<CacheEntryMarker>,
     ) {
+        let entry = self.lru_cache.remove_manual_handle(handle);
         entry.evict();
         self.free(&entry);
     }
 
     /// Evict a texture cache handle that was previously set to be in manual
     /// eviction mode.
-    pub fn evict_handle(&mut self, handle: &TextureCacheHandle) {
-        match handle {
-            TextureCacheHandle::Manual(handle) => {
-                // Find the strong handle that matches this weak handle. If this
-                // ever shows up in profiles, we can make it a hash (but the number
-                // of manual eviction handles is typically small).
-                // Alternatively, we could make a more forgiving FreeList variant
-                // which does not differentiate between strong and weak handles.
-                let index = self.manual_handles.iter().position(|strong_handle| {
-                    strong_handle.matches(handle)
-                });
-                if let Some(index) = index {
-                    let handle = self.manual_handles.swap_remove(index);
-                    let entry = self.manual_entries.free(handle);
-                    self.evict_impl(entry);
-                }
-            }
-            TextureCacheHandle::Auto(handle) => {
-                if let Some(entry) = self.lru_cache.remove(handle) {
-                    self.evict_impl(entry);
-                }
-            }
-            _ => {}
-        }
-    }
-
-    pub fn dump_color8_linear_as_svg(&self, output: &mut dyn std::io::Write) -> std::io::Result<()> {
-        self.shared_textures.color8_linear.dump_as_svg(output)
-    }
-
-    pub fn dump_color8_glyphs_as_svg(&self, output: &mut dyn std::io::Write) -> std::io::Result<()> {
-        self.shared_textures.color8_glyphs.dump_as_svg(output)
-    }
-
-    pub fn dump_alpha8_glyphs_as_svg(&self, output: &mut dyn std::io::Write) -> std::io::Result<()> {
-        self.shared_textures.alpha8_glyphs.dump_as_svg(output)
-    }
+    pub fn evict_manual_handle(&mut self, handle: &TextureCacheHandle) {
+        // Find the strong handle that matches this weak handle. If this
+        // ever shows up in profiles, we can make it a hash (but the number
+        // of manual eviction handles is typically small).
+        let index = self.manual_handles.iter().position(|strong_handle| {
+            strong_handle.matches(handle)
+        });
 
-    pub fn dump_alpha8_linear_as_svg(&self, output: &mut dyn std::io::Write) -> std::io::Result<()> {
-        self.shared_textures.alpha8_linear.dump_as_svg(output)
+        if let Some(index) = index {
+            let handle = self.manual_handles.swap_remove(index);
+            self.evict_impl(handle);
+        }
     }
 
     /// Expire picture cache tiles that haven't been referenced in the last frame.
@@ -1265,152 +937,70 @@ impl TextureCache {
     fn expire_old_picture_cache_tiles(&mut self) {
         for i in (0 .. self.picture_cache_handles.len()).rev() {
             let evict = {
-                let entry = self.picture_cache_entries.get(
-                    &self.picture_cache_handles[i]
-                );
-
-                // This function is called at the beginning of the frame,
-                // so we don't yet know which picture cache tiles will be
-                // requested this frame. Therefore only evict picture cache
-                // tiles which weren't requested in the *previous* frame.
-                entry.last_access.frame_id() < self.now.frame_id() - 1
+                let entry = self.lru_cache.get(&self.picture_cache_handles[i]);
+
+                // Texture cache entries can be evicted at the start of
+                // a frame, or at any time during the frame when a cache
+                // allocation is occurring. This means that entries tagged
+                // with eager eviction may get evicted before they have a
+                // chance to be requested on the current frame. Instead,
+                // advance the frame id of the entry by one before
+                // comparison. This ensures that an eager entry will
+                // not be evicted until it is not used for at least
+                // one complete frame.
+                let mut entry_frame_id = entry.last_access.frame_id();
+                entry_frame_id.advance();
+
+                entry_frame_id < self.now.frame_id()
             };
 
             if evict {
                 let handle = self.picture_cache_handles.swap_remove(i);
-                let entry = self.picture_cache_entries.free(handle);
-                self.evict_impl(entry);
+                self.evict_impl(handle);
             }
         }
     }
 
-    /// Get the eviction threshold, in bytes, for the given budget type.
-    fn get_eviction_threshold(&self, budget_type: BudgetType) -> usize {
-        if budget_type == BudgetType::Standalone {
-            // For standalone textures, the only reason to evict textures is
-            // to save GPU memory. Batching / draw call concerns do not apply
-            // to standalone textures, because unused textures don't cause
-            // extra draw calls.
-            return 16 * 1024 * 1024;
-        }
-
-        // For shared textures, evicting an entry only frees up GPU memory if it
-        // causes one of the shared textures to become empty.
-        // The bigger concern for shared textures is batching: The entries that
-        // are needed in the current frame should be distributed across as few
-        // shared textures as possible, to minimize the number of draw calls.
-        // Ideally we only want one or two textures per type.
-        let expected_texture_count = match budget_type {
-            BudgetType::SharedColor8Nearest | BudgetType::SharedAlpha16 => {
-                // These types are only rarely used, we don't want more than
-                // one of each.
-                1
-            },
-
-            _ => {
-                // For the other types, having two textures is acceptable.
-                2
-            },
-        };
-
-        // The threshold that we pick here will be compared to the number of
-        // bytes that are *occupied by entries*. And we're trying to target a
-        // certain number of textures.
-        // Unfortunately, it's hard to predict the number of needed textures
-        // purely based on number of occupied bytes: Due to the different
-        // rectangular shape of each entry, and due to decisions made by the
-        // allocator, sometimes we may need a new texture even if there are
-        // still large gaps in the existing textures.
-        // Let's assume that we have an average allocator wastage of 50%.
-        let average_used_bytes_per_texture_when_full =
-            self.shared_textures.bytes_per_shared_texture(budget_type) / 2;
-
-        // Compute the threshold.
-        // Because of varying allocator wastage, we may still need to use more
-        // than the expected number of textures; that's fine. We'll also go over
-        // the expected texture count whenever a large number of entries are
-        // needed to draw a complex frame (since we don't evict entries which
-        // are needed for the current frame), or if eviction hasn't had a chance
-        // to catch up after a large allocation burst.
-        expected_texture_count * average_used_bytes_per_texture_when_full
-    }
-
     /// Evict old items from the shared and standalone caches, if we're over a
     /// threshold memory usage value
-    fn evict_items_from_cache_if_required(&mut self, profile: &mut TransactionProfile) {
-        let previous_frame_id = self.now.frame_id() - 1;
+    fn evict_items_from_cache_if_required(&mut self) {
         let mut eviction_count = 0;
-        let mut youngest_evicted = FrameId::first();
-
-        for budget in BudgetType::iter() {
-            let threshold = self.get_eviction_threshold(budget);
-            while self.should_continue_evicting(
-                self.bytes_allocated[budget as usize],
-                threshold,
-                eviction_count,
-            ) {
-                if let Some(entry) = self.lru_cache.peek_oldest(budget as u8) {
-                    // Only evict this item if it wasn't used in the previous frame. The reason being that if it
-                    // was used the previous frame then it will likely be used in this frame too, and we don't
-                    // want to be continually evicting and reuploading the item every frame.
-                    if entry.last_access.frame_id() >= previous_frame_id {
-                        // Since the LRU cache is ordered by frame access, we can break out of the loop here because
-                        // we know that all remaining items were also used in the previous frame (or more recently).
-                        break;
-                    }
-                    if entry.last_access.frame_id() > youngest_evicted {
-                        youngest_evicted = entry.last_access.frame_id();
-                    }
-                    let entry = self.lru_cache.pop_oldest(budget as u8).unwrap();
+
+        // Keep evicting while memory is above the threshold, and we haven't
+        // reached a maximum number of evictions this frame.
+        while self.current_memory_estimate() > self.eviction_threshold_bytes && eviction_count < self.max_evictions_per_frame {
+            match self.lru_cache.pop_oldest() {
+                Some(entry) => {
                     entry.evict();
                     self.free(&entry);
                     eviction_count += 1;
-                } else {
-                    // The LRU cache is empty, all remaining items use manual
-                    // eviction. In this case, there's nothing we can do until
-                    // the calling code manually evicts items to reduce the
-                    // allocated cache size.
+                }
+                None => {
+                    // It's possible that we could fail to pop an item from the LRU list to evict, if every
+                    // item in the cache is set to manual eviction mode. In this case, just break out of the
+                    // loop as there's nothing we can do until the calling code manually evicts items to
+                    // reduce the allocated cache size.
                     break;
                 }
             }
         }
-
-        if eviction_count > 0 {
-            profile.set(profiler::TEXTURE_CACHE_EVICTION_COUNT, eviction_count);
-            profile.set(
-                profiler::TEXTURE_CACHE_YOUNGEST_EVICTION,
-                self.now.frame_id().as_usize() - youngest_evicted.as_usize()
-            );
-        }
     }
 
-    /// Returns true if texture cache eviction loop should continue
-    fn should_continue_evicting(
-        &self,
-        bytes_allocated: usize,
-        threshold: usize,
-        eviction_count: usize,
-    ) -> bool {
-        // If current memory usage is below selected threshold, we can stop evicting items
-        if bytes_allocated < threshold {
-            return false;
-        }
-
-        // If current memory usage is significantly more than the threshold, keep evicting this frame
-        if bytes_allocated > 4 * threshold {
-            return true;
-        }
-
-        // Otherwise, only allow evicting up to a certain number of items per frame. This allows evictions
-        // to be spread over a number of frames, to avoid frame spikes.
-        eviction_count < Self::MAX_EVICTIONS_PER_FRAME
+    /// Return the total used bytes in standalone and shared textures. This is
+    /// used to determine how many textures need to be evicted to keep texture
+    /// cache memory usage under a reasonable limit. Note that this does not
+    /// include memory allocated to picture cache tiles, which are considered
+    /// separately for the purposes of texture cache eviction.
+    fn current_memory_estimate(&self) -> usize {
+        self.standalone_bytes_allocated + self.shared_bytes_allocated
     }
 
     // Free a cache entry from the standalone list or shared cache.
     fn free(&mut self, entry: &CacheEntry) {
         match entry.details {
-            EntryDetails::Picture { size } => {
-                self.picture_textures.free_tile(entry.texture_id, self.now.frame_id());
+            EntryDetails::Picture { texture_index, layer_index } => {
+                let picture_texture = self.picture_textures.get(texture_index);
+                picture_texture.slices[layer_index].uv_rect_handle = None;
                 if self.debug_flags.contains(
                     DebugFlags::TEXTURE_CACHE_DBG |
                     DebugFlags::TEXTURE_CACHE_DBG_CLEAR_EVICTED)
@@ -1418,27 +1008,28 @@ impl TextureCache {
                     self.pending_updates.push_debug_clear(
                         entry.texture_id,
                         DeviceIntPoint::zero(),
-                        size.width,
-                        size.height,
+                        picture_texture.size.width,
+                        picture_texture.size.height,
+                        layer_index,
                     );
                 }
             }
             EntryDetails::Standalone { size_in_bytes, .. } => {
-                self.bytes_allocated[BudgetType::Standalone as usize] -= size_in_bytes;
+                self.standalone_bytes_allocated -= size_in_bytes;
 
                 // This is a standalone texture allocation. Free it directly.
                 self.pending_updates.push_free(entry.texture_id);
             }
-            EntryDetails::Cache { origin, alloc_id, allocated_size_in_bytes } => {
-                let (allocator_list, budget_type) = self.shared_textures.select(
-                    entry.input_format,
-                    entry.filter,
-                    entry.shader,
-                );
+            EntryDetails::Cache { origin, layer_index, .. } => {
+                // Free the block in the given region.
+                let texture_array = self.shared_textures.select(entry.input_format, entry.filter);
+                let unit = texture_array.units
+                    .iter_mut()
+                    .find(|unit| unit.texture_id == entry.texture_id)
+                    .expect("Unable to find the associated texture array unit");
+                let region = &mut unit.regions[layer_index];
 
-                allocator_list.deallocate(entry.texture_id, alloc_id);
-
-                self.bytes_allocated[budget_type as usize] -= allocated_size_in_bytes;
+                self.shared_bytes_allocated -= region.slab_size.size_in_bytes(texture_array.formats.internal);
 
                 if self.debug_flags.contains(
                     DebugFlags::TEXTURE_CACHE_DBG |
@@ -1447,10 +1038,12 @@ impl TextureCache {
                     self.pending_updates.push_debug_clear(
                         entry.texture_id,
                         origin,
-                        entry.size.width,
-                        entry.size.height,
+                        region.slab_size.width,
+                        region.slab_size.height,
+                        layer_index,
                     );
                 }
+                region.free(origin, &mut unit.empty_regions);
             }
         }
     }
@@ -1459,42 +1052,13 @@ impl TextureCache {
     fn allocate_from_shared_cache(
         &mut self,
         params: &CacheAllocParams,
-    ) -> (CacheEntry, BudgetType) {
-        let (allocator_list, budget_type) = self.shared_textures.select(
+    ) -> CacheEntry {
+        // Mutably borrow the correct texture.
+        let texture_array = self.shared_textures.select(
             params.descriptor.format,
             params.filter,
-            params.shader,
         );
-
-        // To avoid referring to self in the closure.
-        let next_id = &mut self.next_id;
-        let pending_updates = &mut self.pending_updates;
-
-        let (texture_id, alloc_id, allocated_rect) = allocator_list.allocate(
-            params.descriptor.size,
-            &mut |size, parameters| {
-                let texture_id = *next_id;
-                next_id.0 += 1;
-                pending_updates.push_alloc(
-                    texture_id,
-                    TextureCacheAllocInfo {
-                        target: ImageBufferKind::Texture2D,
-                        width: size.width,
-                        height: size.height,
-                        format: parameters.formats.internal,
-                        filter: parameters.filter,
-                        is_shared_cache: true,
-                        has_depth: false,
-                    },
-                );
-
-                texture_id
-            },
-        );
-
-        let formats = &allocator_list.texture_parameters().formats;
-
-        let swizzle = if formats.external == params.descriptor.format {
+        let swizzle = if texture_array.formats.external == params.descriptor.format {
             Swizzle::default()
         } else {
             match self.swizzle {
@@ -1503,28 +1067,59 @@ impl TextureCache {
             }
         };
 
-        let bpp = formats.internal.bytes_per_pixel();
-        let allocated_size_in_bytes = (allocated_rect.size.area() * bpp) as usize;
-        self.bytes_allocated[budget_type as usize] += allocated_size_in_bytes;
+        let max_texture_layers = self.max_texture_layers;
+        let slab_size = SlabSize::new(params.descriptor.size);
 
-        (CacheEntry {
-            size: params.descriptor.size,
-            user_data: params.user_data,
-            last_access: self.now,
-            details: EntryDetails::Cache {
-                origin: allocated_rect.origin,
-                alloc_id,
-                allocated_size_in_bytes,
-            },
-            uv_rect_handle: GpuCacheHandle::new(),
-            input_format: params.descriptor.format,
-            filter: params.filter,
-            swizzle,
-            texture_id,
-            eviction_notice: None,
-            uv_rect_kind: params.uv_rect_kind,
-            shader: params.shader
-        }, budget_type)
+        let mut info = TextureCacheAllocInfo {
+            width: TEXTURE_REGION_DIMENSIONS,
+            height: TEXTURE_REGION_DIMENSIONS,
+            format: texture_array.formats.internal,
+            filter: texture_array.filter,
+            layer_count: 1,
+            is_shared_cache: true,
+            has_depth: false,
+        };
+
+        let unit_index = if let Some(index) = texture_array.units
+            .iter()
+            .position(|unit| unit.can_alloc(slab_size))
+        {
+            index
+        } else if let Some(index) = texture_array.units
+            .iter()
+            .position(|unit| unit.regions.len() < max_texture_layers)
+        {
+            let unit = &mut texture_array.units[index];
+
+            unit.push_regions(texture_array.layers_per_allocation);
+
+            info.layer_count = unit.regions.len() as i32;
+            self.pending_updates.push_realloc(unit.texture_id, info);
+
+            index
+        } else {
+            let index = texture_array.units.len();
+            texture_array.units.push(TextureArrayUnit {
+                texture_id: self.next_id,
+                regions: Vec::new(),
+                empty_regions: 0,
+            });
+
+            let unit = &mut texture_array.units[index];
+
+            unit.push_regions(texture_array.layers_per_allocation);
+
+            info.layer_count = unit.regions.len() as i32;
+            self.pending_updates.push_alloc(self.next_id, info);
+            self.next_id.0 += 1;
+            index
+        };
+
+        self.shared_bytes_allocated += slab_size.size_in_bytes(texture_array.formats.internal);
+
+        // Do the allocation. This can fail and return None
+        // if there are no free slots or regions available.
+        texture_array.alloc(params, unit_index, self.now, swizzle)
     }
 
     // Returns true if the given image descriptor *may* be
@@ -1558,60 +1153,27 @@ impl TextureCache {
         allowed_in_shared_cache
     }
 
-    /// Allocate a render target via the pending updates sent to the renderer
-    pub fn alloc_render_target(
-        &mut self,
-        size: DeviceIntSize,
-        format: ImageFormat,
-    ) -> CacheTextureId {
-        let texture_id = self.next_id;
-        self.next_id.0 += 1;
-
-        // Push a command to allocate device storage of the right size / format.
-        let info = TextureCacheAllocInfo {
-            target: ImageBufferKind::Texture2D,
-            width: size.width,
-            height: size.height,
-            format,
-            filter: TextureFilter::Linear,
-            is_shared_cache: false,
-            has_depth: false,
-        };
-
-        self.pending_updates.push_alloc(texture_id, info);
-
-        texture_id
-    }
-
-    /// Free an existing render target
-    pub fn free_render_target(
-        &mut self,
-        id: CacheTextureId,
-    ) {
-        self.pending_updates.push_free(id);
-    }
-
     /// Allocates a new standalone cache entry.
     fn allocate_standalone_entry(
         &mut self,
         params: &CacheAllocParams,
-    ) -> (CacheEntry, BudgetType) {
+    ) -> CacheEntry {
         let texture_id = self.next_id;
         self.next_id.0 += 1;
 
         // Push a command to allocate device storage of the right size / format.
         let info = TextureCacheAllocInfo {
-            target: ImageBufferKind::Texture2D,
             width: params.descriptor.size.width,
             height: params.descriptor.size.height,
             format: params.descriptor.format,
             filter: params.filter,
+            layer_count: 1,
             is_shared_cache: false,
             has_depth: false,
         };
 
         let size_in_bytes = (info.width * info.height * info.format.bytes_per_pixel()) as usize;
-        self.bytes_allocated[BudgetType::Standalone as usize] += size_in_bytes;
+        self.standalone_bytes_allocated += size_in_bytes;
 
         self.pending_updates.push_alloc(texture_id, info);
 
@@ -1622,13 +1184,13 @@ impl TextureCache {
             None
         };
 
-        (CacheEntry::new_standalone(
+        CacheEntry::new_standalone(
             texture_id,
             self.now,
             params,
             swizzle.unwrap_or_default(),
             size_in_bytes,
-        ), BudgetType::Standalone)
+        )
     }
 
     /// Allocates a cache entry appropriate for the given parameters.
@@ -1639,7 +1201,7 @@ impl TextureCache {
     fn allocate_cache_entry(
         &mut self,
         params: &CacheAllocParams,
-    ) -> (CacheEntry, BudgetType) {
+    ) -> CacheEntry {
         assert!(!params.descriptor.size.is_empty());
 
         // If this image doesn't qualify to go in the shared (batching) cache,
@@ -1653,14 +1215,9 @@ impl TextureCache {
 
     /// Allocates a cache entry for the given parameters, and updates the
     /// provided handle to point to the new entry.
-    fn allocate(
-        &mut self,
-        params: &CacheAllocParams,
-        handle: &mut TextureCacheHandle,
-        eviction: Eviction,
-    ) {
+    fn allocate(&mut self, params: &CacheAllocParams, handle: &mut TextureCacheHandle) {
         debug_assert!(self.now.is_valid());
-        let (new_cache_entry, budget_type) = self.allocate_cache_entry(params);
+        let new_cache_entry = self.allocate_cache_entry(params);
 
         // If the handle points to a valid cache entry, we want to replace the
         // cache entry with our newly updated location. We also need to ensure
@@ -1669,36 +1226,7 @@ impl TextureCache {
         //
         // If the handle is invalid, we need to insert the data, and append the
         // result to the corresponding vector.
-        let old_entry = match (&mut *handle, eviction) {
-            (TextureCacheHandle::Auto(handle), Eviction::Auto) => {
-                self.lru_cache.replace_or_insert(handle, budget_type as u8, new_cache_entry)
-            },
-            (TextureCacheHandle::Manual(handle), Eviction::Manual) => {
-                let entry = self.manual_entries.get_opt_mut(handle)
-                    .expect("Don't call this after evicting");
-                Some(mem::replace(entry, new_cache_entry))
-            },
-            (TextureCacheHandle::Manual(_), Eviction::Auto) |
-            (TextureCacheHandle::Auto(_), Eviction::Manual) => {
-                panic!("Can't change eviction policy after initial allocation");
-            },
-            (TextureCacheHandle::Empty, Eviction::Auto) => {
-                let new_handle = self.lru_cache.push_new(budget_type as u8, new_cache_entry);
-                *handle = TextureCacheHandle::Auto(new_handle);
-                None
-            },
-            (TextureCacheHandle::Empty, Eviction::Manual) => {
-                let manual_handle = self.manual_entries.insert(new_cache_entry);
-                let new_handle = manual_handle.weak();
-                self.manual_handles.push(manual_handle);
-                *handle = TextureCacheHandle::Manual(new_handle);
-                None
-            },
-            (TextureCacheHandle::Picture(_), _) => {
-                panic!("Picture cache entries are managed separately and shouldn't appear in this function");
-            },
-        };
-        if let Some(old_entry) = old_entry {
+        if let Some(old_entry) = self.lru_cache.replace_or_insert(handle, new_cache_entry) {
             old_entry.evict();
             self.free(&old_entry);
         }
@@ -1714,18 +1242,7 @@ impl TextureCache {
         debug_assert!(self.now.is_valid());
         debug_assert!(tile_size.width > 0 && tile_size.height > 0);
 
-        let need_alloc = match handle {
-            TextureCacheHandle::Empty => true,
-            TextureCacheHandle::Picture(handle) => {
-                // Check if the entry has been evicted.
-                self.picture_cache_entries.get_opt(handle).is_none()
-            },
-            TextureCacheHandle::Auto(_) | TextureCacheHandle::Manual(_) => {
-                panic!("Unexpected handle type in update_picture_cache");
-            }
-        };
-
-        if need_alloc {
+        if self.lru_cache.get_opt(handle).is_none() {
             let cache_entry = self.picture_textures.get_or_allocate_tile(
                 tile_size,
                 self.now,
@@ -1733,56 +1250,448 @@ impl TextureCache {
                 &mut self.pending_updates,
             );
 
-            // Add the cache entry to the picture_cache_entries FreeList.
-            let strong_handle = self.picture_cache_entries.insert(cache_entry);
-            let new_handle = strong_handle.weak();
+            // Add the cache entry to the LRU cache, then mark it for manual eviction
+            // so that the lifetime is controlled by the texture cache.
 
-            self.picture_cache_handles.push(strong_handle);
+            *handle = self.lru_cache.push_new(cache_entry);
 
-            *handle = TextureCacheHandle::Picture(new_handle);
+            let strong_handle = self.lru_cache
+                .set_manual_eviction(handle)
+                .expect("bug: handle must be valid here");
+            self.picture_cache_handles.push(strong_handle);
         }
 
-        if let TextureCacheHandle::Picture(handle) = handle {
-            // Upload the resource rect and texture array layer.
-            self.picture_cache_entries
-                .get_opt_mut(handle)
-                .expect("BUG: handle must be valid now")
-                .update_gpu_cache(gpu_cache);
-        } else {
-            panic!("The handle should be valid picture cache handle now")
-        }
+        // Upload the resource rect and texture array layer.
+        self.lru_cache
+            .get_opt_mut(handle)
+            .expect("BUG: handle must be valid now")
+            .update_gpu_cache(gpu_cache);
     }
 
     pub fn shared_alpha_expected_format(&self) -> ImageFormat {
-        self.shared_textures.alpha8_linear.texture_parameters().formats.external
+        self.shared_textures.array_alpha8_linear.formats.external
     }
 
     pub fn shared_color_expected_format(&self) -> ImageFormat {
-        self.shared_textures.color8_linear.texture_parameters().formats.external
+        self.shared_textures.array_color8_linear.formats.external
+    }
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Copy, Clone, PartialEq)]
+struct SlabSize {
+    width: i32,
+    height: i32,
+}
+
+impl SlabSize {
+    fn new(size: DeviceIntSize) -> Self {
+        let x_size = quantize_dimension(size.width);
+        let y_size = quantize_dimension(size.height);
+
+        assert!(x_size > 0 && x_size <= TEXTURE_REGION_DIMENSIONS);
+        assert!(y_size > 0 && y_size <= TEXTURE_REGION_DIMENSIONS);
+
+        let (width, height) = match (x_size, y_size) {
+            // Special cased rectangular slab pages.
+            (512, 0..=64) => (512, 64),
+            (512, 128) => (512, 128),
+            (512, 256) => (512, 256),
+            (0..=64, 512) => (64, 512),
+            (128, 512) => (128, 512),
+            (256, 512) => (256, 512),
+
+            // If none of those fit, use a square slab size.
+            (x_size, y_size) => {
+                let square_size = cmp::max(x_size, y_size);
+                (square_size, square_size)
+            }
+        };
+
+        SlabSize {
+            width,
+            height,
+        }
     }
 
+    fn size_in_bytes(&self, format: ImageFormat) -> usize {
+        let bpp = format.bytes_per_pixel();
+        (self.width * self.height * bpp) as usize
+    }
 
-    pub fn default_picture_tile_size(&self) -> DeviceIntSize {
-        self.picture_textures.default_tile_size
+    fn invalid() -> SlabSize {
+        SlabSize {
+            width: 0,
+            height: 0,
+        }
     }
+}
 
-    #[cfg(test)]
-    pub fn total_allocated_bytes_for_testing(&self) -> usize {
-        BudgetType::iter().map(|b| self.bytes_allocated[b as usize]).sum()
+// The x/y location within a texture region of an allocation.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct TextureLocation(u8, u8);
+
+impl TextureLocation {
+    fn new(x: i32, y: i32) -> Self {
+        debug_assert!(x >= 0 && y >= 0 && x < 0x100 && y < 0x100);
+        TextureLocation(x as u8, y as u8)
+    }
+}
+
+/// A region corresponds to a layer in a shared cache texture.
+///
+/// All allocations within a region are of the same size.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct TextureRegion {
+    layer_index: usize,
+    slab_size: SlabSize,
+    free_slots: Vec<TextureLocation>,
+    total_slot_count: usize,
+}
+
+impl TextureRegion {
+    fn new(layer_index: usize) -> Self {
+        TextureRegion {
+            layer_index,
+            slab_size: SlabSize::invalid(),
+            free_slots: Vec::new(),
+            total_slot_count: 0,
+        }
+    }
+
+    // Initialize a region to be an allocator for a specific slab size.
+    fn init(&mut self, slab_size: SlabSize, empty_regions: &mut usize) {
+        debug_assert!(self.slab_size == SlabSize::invalid());
+        debug_assert!(self.free_slots.is_empty());
+
+        self.slab_size = slab_size;
+        let slots_per_x_axis = TEXTURE_REGION_DIMENSIONS / self.slab_size.width;
+        let slots_per_y_axis = TEXTURE_REGION_DIMENSIONS / self.slab_size.height;
+
+        // Add each block to a freelist.
+        for y in 0 .. slots_per_y_axis {
+            for x in 0 .. slots_per_x_axis {
+                self.free_slots.push(TextureLocation::new(x, y));
+            }
+        }
+
+        self.total_slot_count = self.free_slots.len();
+        *empty_regions -= 1;
+    }
+
+    // Deinit a region, allowing it to become a region with
+    // a different allocator size.
+    fn deinit(&mut self, empty_regions: &mut usize) {
+        self.slab_size = SlabSize::invalid();
+        self.free_slots.clear();
+        self.total_slot_count = 0;
+        *empty_regions += 1;
+    }
+
+    fn is_empty(&self) -> bool {
+        self.slab_size == SlabSize::invalid()
+    }
+
+    // Attempt to allocate a fixed size block from this region.
+    fn alloc(&mut self) -> Option<DeviceIntPoint> {
+        debug_assert!(self.slab_size != SlabSize::invalid());
+
+        self.free_slots.pop().map(|location| {
+            DeviceIntPoint::new(
+                self.slab_size.width * location.0 as i32,
+                self.slab_size.height * location.1 as i32,
+            )
+        })
+    }
+
+    // Free a block in this region.
+    fn free(&mut self, point: DeviceIntPoint, empty_regions: &mut usize) {
+        let x = point.x / self.slab_size.width;
+        let y = point.y / self.slab_size.height;
+        self.free_slots.push(TextureLocation::new(x, y));
+
+        // If this region is completely unused, deinit it
+        // so that it can become a different slab size
+        // as required.
+        if self.free_slots.len() == self.total_slot_count {
+            self.deinit(empty_regions);
+        }
+    }
+}
+
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct TextureArrayUnit {
+    texture_id: CacheTextureId,
+    regions: Vec<TextureRegion>,
+    empty_regions: usize,
+}
+
+impl TextureArrayUnit {
+    /// Adds a new empty region to the array.
+    fn push_regions(&mut self, count: i32) {
+        assert!(self.empty_regions <= self.regions.len());
+        for _ in 0..count {
+            let index = self.regions.len();
+            self.regions.push(TextureRegion::new(index));
+            self.empty_regions += 1;
+        }
+    }
+
+    /// Returns true if we can allocate the given entry.
+    fn can_alloc(&self, slab_size: SlabSize) -> bool {
+        self.empty_regions != 0 || self.regions.iter().any(|region| {
+            region.slab_size == slab_size && !region.free_slots.is_empty()
+        })
+    }
+
+    fn is_empty(&self) -> bool {
+        self.empty_regions == self.regions.len()
+    }
+}
+
+/// A texture array contains a number of textures, each with a number of
+/// layers, where each layer contains a region that can act as a slab allocator.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+struct TextureArray {
+    filter: TextureFilter,
+    formats: TextureFormatPair<ImageFormat>,
+    units: SmallVec<[TextureArrayUnit; 1]>,
+    layers_per_allocation: i32,
+}
+
+impl TextureArray {
+    fn new(
+        formats: TextureFormatPair<ImageFormat>,
+        filter: TextureFilter,
+        layers_per_allocation: i32,
+    ) -> Self {
+        TextureArray {
+            formats,
+            filter,
+            units: SmallVec::new(),
+            layers_per_allocation,
+        }
+    }
+
+    /// Returns the number of GPU bytes consumed by this texture array.
+    fn size_in_bytes(&self) -> usize {
+        let bpp = self.formats.internal.bytes_per_pixel() as usize;
+        let num_regions: usize = self.units.iter().map(|u| u.regions.len()).sum();
+        num_regions * TEXTURE_REGION_PIXELS * bpp
+    }
+
+    fn clear(&mut self, updates: &mut TextureUpdateList) {
+        for unit in self.units.drain(..) {
+            updates.push_free(unit.texture_id);
+        }
+    }
+
+    fn release_empty_textures(&mut self, updates: &mut TextureUpdateList) {
+        self.units.retain(|unit| {
+            if unit.is_empty() {
+                updates.push_free(unit.texture_id);
+
+                false
+            } else {
+                true
+            }
+        });
+    }
+
+    fn update_profile(&self, counter: &mut ResourceProfileCounter) {
+        let num_regions: usize = self.units.iter().map(|u| u.regions.len()).sum();
+        counter.set(num_regions, self.size_in_bytes());
     }
 
-    pub fn report_memory(&self, ops: &mut MallocSizeOfOps) -> usize {
-        self.lru_cache.size_of(ops)
+    /// Allocate space in this texture array.
+    fn alloc(
+        &mut self,
+        params: &CacheAllocParams,
+        unit_index: usize,
+        now: FrameStamp,
+        swizzle: Swizzle,
+    ) -> CacheEntry {
+        // Quantize the size of the allocation to select a region to
+        // allocate from.
+        let slab_size = SlabSize::new(params.descriptor.size);
+        let unit = &mut self.units[unit_index];
+
+        // TODO(gw): For simplicity, the initial implementation just
+        //           has a single vec<> of regions. We could easily
+        //           make this more efficient by storing a list of
+        //           regions for each slab size specifically...
+
+        // Keep track of the location of an empty region,
+        // in case we need to select a new empty region
+        // after the loop.
+        let mut empty_region_index = None;
+        let mut entry_details = None;
+
+        // Run through the existing regions of this size, and see if
+        // we can find a free block in any of them.
+        for (i, region) in unit.regions.iter_mut().enumerate() {
+            if region.is_empty() {
+                empty_region_index = Some(i);
+            } else if region.slab_size == slab_size {
+                if let Some(location) = region.alloc() {
+                    entry_details = Some(EntryDetails::Cache {
+                        layer_index: region.layer_index,
+                        origin: location,
+                    });
+                    break;
+                }
+            }
+        }
+
+        // Find a region of the right size and try to allocate from it.
+        let details = match entry_details {
+            Some(details) => details,
+            None => {
+                let region = &mut unit.regions[empty_region_index.unwrap()];
+                region.init(slab_size, &mut unit.empty_regions);
+                EntryDetails::Cache {
+                    layer_index: region.layer_index,
+                    origin: region.alloc().unwrap(),
+                }
+            }
+        };
+
+        CacheEntry {
+            size: params.descriptor.size,
+            user_data: params.user_data,
+            last_access: now,
+            details,
+            uv_rect_handle: GpuCacheHandle::new(),
+            input_format: params.descriptor.format,
+            filter: self.filter,
+            swizzle,
+            texture_id: unit.texture_id,
+            eviction_notice: None,
+            uv_rect_kind: params.uv_rect_kind,
+        }
     }
 }
 
+
+/// A tracking structure for each slice in `WholeTextureArray`.
+#[cfg_attr(feature = "capture", derive(Serialize))]
+#[cfg_attr(feature = "replay", derive(Deserialize))]
+#[derive(Clone, Copy, Debug)]
+struct WholeTextureSlice {
+    uv_rect_handle: Option<GpuCacheHandle>,
+}
+
+/// A texture array that allocates whole slices and doesn't do any region tracking.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct TextureParameters {
-    pub formats: TextureFormatPair<ImageFormat>,
-    pub filter: TextureFilter,
+struct WholeTextureArray {
+    size: DeviceIntSize,
+    filter: TextureFilter,
+    format: ImageFormat,
+    texture_id: CacheTextureId,
+    slices: Vec<WholeTextureSlice>,
+    has_depth: bool,
+}
+
+impl WholeTextureArray {
+    fn to_info(&self) -> TextureCacheAllocInfo {
+        TextureCacheAllocInfo {
+            width: self.size.width,
+            height: self.size.height,
+            format: self.format,
+            filter: self.filter,
+            layer_count: self.slices.len() as i32,
+            is_shared_cache: true, //TODO: reconsider
+            has_depth: self.has_depth,
+        }
+    }
+
+    /// Returns the number of GPU bytes consumed by this texture array.
+    fn size_in_bytes(&self) -> usize {
+        let bpp = self.format.bytes_per_pixel() as usize;
+        self.slices.len() * (self.size.width * self.size.height) as usize * bpp
+    }
+
+    /// Find an free slice.
+    fn find_free(&self) -> Option<LayerIndex> {
+        self.slices.iter().position(|slice| slice.uv_rect_handle.is_none())
+    }
+
+    /// Grow the array by the specified number of slices
+    fn grow(&mut self, count: usize) -> LayerIndex {
+        let index = self.slices.len();
+        for _ in 0 .. count {
+            self.slices.push(WholeTextureSlice {
+                uv_rect_handle: None,
+            });
+        }
+        index
+    }
+
+    fn cache_entry_impl(
+        &self,
+        texture_index: usize,
+        layer_index: usize,
+        now: FrameStamp,
+        uv_rect_handle: GpuCacheHandle,
+        texture_id: CacheTextureId,
+    ) -> CacheEntry {
+        CacheEntry {
+            size: self.size,
+            user_data: [0.0; 3],
+            last_access: now,
+            details: EntryDetails::Picture {
+                texture_index,
+                layer_index,
+            },
+            uv_rect_handle,
+            input_format: self.format,
+            filter: self.filter,
+            swizzle: Swizzle::default(),
+            texture_id,
+            eviction_notice: None,
+            uv_rect_kind: UvRectKind::Rect,
+        }
+    }
+
+    /// Occupy a specified slice by a cache entry.
+    fn occupy(
+        &mut self,
+        texture_index: usize,
+        layer_index: usize,
+        now: FrameStamp,
+    ) -> CacheEntry {
+        let uv_rect_handle = GpuCacheHandle::new();
+        assert!(self.slices[layer_index].uv_rect_handle.is_none());
+        self.slices[layer_index].uv_rect_handle = Some(uv_rect_handle);
+        self.cache_entry_impl(
+            texture_index,
+            layer_index,
+            now,
+            uv_rect_handle,
+            self.texture_id,
+        )
+    }
+
+    /// Reset the texture array to the specified number of slices, if it's larger.
+    fn reset(
+        &mut self, num_slices: usize
+    ) -> Option<CacheTextureId> {
+        if self.slices.len() <= num_slices {
+            None
+        } else {
+            self.slices.truncate(num_slices);
+            Some(self.texture_id)
+        }
+    }
 }
 
+
 impl TextureCacheUpdate {
     // Constructs a TextureCacheUpdate operation to be passed to the
     // rendering thread in order to do an upload to the right
@@ -1792,6 +1701,7 @@ impl TextureCacheUpdate {
         descriptor: &ImageDescriptor,
         origin: DeviceIntPoint,
         size: DeviceIntSize,
+        layer_index: i32,
         use_upload_format: bool,
         dirty_rect: &ImageDirtyRect,
     ) -> TextureCacheUpdate {
@@ -1841,6 +1751,7 @@ impl TextureCacheUpdate {
                     stride: Some(stride),
                     offset,
                     format_override,
+                    layer_index,
                 }
             }
             DirtyRect::All => {
@@ -1850,80 +1761,22 @@ impl TextureCacheUpdate {
                     stride: descriptor.stride,
                     offset: descriptor.offset,
                     format_override,
+                    layer_index,
                 }
             }
         }
     }
 }
 
-#[cfg(test)]
-mod test_texture_cache {
-    #[test]
-    fn check_allocation_size_balance() {
-        // Allocate some glyphs, observe the total allocation size, and free
-        // the glyphs again. Check that the total allocation size is back at the
-        // original value.
-
-        use crate::texture_cache::{TextureCache, TextureCacheHandle, Eviction, TargetShader};
-        use crate::gpu_cache::GpuCache;
-        use crate::device::TextureFilter;
-        use crate::gpu_types::UvRectKind;
-        use api::{ImageDescriptor, ImageDescriptorFlags, ImageFormat, DirtyRect};
-        use api::units::*;
-        use euclid::size2;
-        let mut gpu_cache = GpuCache::new_for_testing();
-        let mut texture_cache = TextureCache::new_for_testing(2048, ImageFormat::BGRA8);
-
-        let sizes: &[DeviceIntSize] = &[
-            size2(23, 27),
-            size2(15, 22),
-            size2(11, 5),
-            size2(20, 25),
-            size2(38, 41),
-            size2(11, 19),
-            size2(13, 21),
-            size2(37, 40),
-            size2(13, 15),
-            size2(14, 16),
-            size2(10, 9),
-            size2(25, 28),
-        ];
-
-        let bytes_at_start = texture_cache.total_allocated_bytes_for_testing();
-
-        let handles: Vec<TextureCacheHandle> = sizes.iter().map(|size| {
-            let mut texture_cache_handle = TextureCacheHandle::invalid();
-            texture_cache.request(&texture_cache_handle, &mut gpu_cache);
-            texture_cache.update(
-                &mut texture_cache_handle,
-                ImageDescriptor {
-                    size: *size,
-                    stride: None,
-                    format: ImageFormat::BGRA8,
-                    flags: ImageDescriptorFlags::empty(),
-                    offset: 0,
-                },
-                TextureFilter::Linear,
-                None,
-                [0.0; 4],
-                DirtyRect::All,
-                &mut gpu_cache,
-                None,
-                UvRectKind::Rect,
-                Eviction::Manual,
-                TargetShader::Text,
-            );
-            texture_cache_handle
-        }).collect();
-
-        let bytes_after_allocating = texture_cache.total_allocated_bytes_for_testing();
-        assert!(bytes_after_allocating > bytes_at_start);
-
-        for handle in handles {
-            texture_cache.evict_handle(&handle);
-        }
-
-        let bytes_at_end = texture_cache.total_allocated_bytes_for_testing();
-        assert_eq!(bytes_at_end, bytes_at_start);
+fn quantize_dimension(size: i32) -> i32 {
+    match size {
+        0 => unreachable!(),
+        1..=16 => 16,
+        17..=32 => 32,
+        33..=64 => 64,
+        65..=128 => 128,
+        129..=256 => 256,
+        257..=512 => 512,
+        _ => panic!("Invalid dimensions for cache!"),
     }
 }
diff --git a/third_party/webrender/webrender/src/texture_pack/mod.rs b/third_party/webrender/webrender/src/texture_pack/mod.rs
deleted file mode 100644
index 21707a2e96f..00000000000
--- a/third_party/webrender/webrender/src/texture_pack/mod.rs
+++ /dev/null
@@ -1,329 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-mod guillotine;
-mod slab;
-
-pub use guillotine::*;
-pub use slab::*;
-
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-use api::units::*;
-use crate::internal_types::CacheTextureId;
-use euclid::{point2, size2, default::Box2D};
-use smallvec::SmallVec;
-
-pub use etagere::AllocatorOptions as ShelfAllocatorOptions;
-pub use etagere::BucketedAtlasAllocator as BucketedShelfAllocator;
-pub use etagere::AtlasAllocator as ShelfAllocator;
-
-/// ID of an allocation within a given allocator.
-#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct AllocId(pub u32);
-
-pub trait AtlasAllocator {
-    /// Specific parameters of the allocator.
-    type Parameters;
-    /// Constructor
-    fn new(size: i32, parameters: &Self::Parameters) -> Self;
-    /// Allocate a rectangle.
-    fn allocate(&mut self, size: DeviceIntSize) -> Option<(AllocId, DeviceIntRect)>;
-    /// Deallocate a rectangle and return its size.
-    fn deallocate(&mut self, id: AllocId);
-    /// Return true if there is no live allocations.
-    fn is_empty(&self) -> bool;
-    /// Allocated area in pixels.
-    fn allocated_space(&self) -> i32;
-    /// Write a debug visualization of the atlas fitting in the provided rectangle.
-    ///
-    /// This is inserted in a larger dump so it shouldn't contain the xml start/end tags.
-    fn dump_into_svg(&self, rect: &Box2D<f32>, output: &mut dyn std::io::Write) -> std::io::Result<()>;
-}
-
-pub trait AtlasAllocatorList<TextureParameters> {
-    /// Allocate a rectangle.
-    ///
-    /// If allocation fails, call the provided callback, add a new allocator to the list and try again.
-    fn allocate(
-        &mut self,
-        size: DeviceIntSize,
-        texture_alloc_cb: &mut dyn FnMut(DeviceIntSize, &TextureParameters) -> CacheTextureId,
-    ) -> (CacheTextureId, AllocId, DeviceIntRect);
-
-    /// Deallocate a rectangle and return its size.
-    fn deallocate(&mut self, texture_id: CacheTextureId, alloc_id: AllocId);
-
-    fn texture_parameters(&self) -> &TextureParameters;
-}
-
-/// A number of 2D textures (single layer), each with a number of
-/// regions that can act as a slab allocator.
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-struct TextureUnit<Allocator> {
-    allocator: Allocator,
-    texture_id: CacheTextureId,
-}
-
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct AllocatorList<Allocator: AtlasAllocator, TextureParameters> {
-    units: SmallVec<[TextureUnit<Allocator>; 1]>,
-    size: i32,
-    atlas_parameters: Allocator::Parameters,
-    texture_parameters: TextureParameters,
-}
-
-impl<Allocator: AtlasAllocator, TextureParameters> AllocatorList<Allocator, TextureParameters> {
-    pub fn new(
-        size: i32,
-        atlas_parameters: Allocator::Parameters,
-        texture_parameters: TextureParameters,
-    ) -> Self {
-        AllocatorList {
-            units: SmallVec::new(),
-            size,
-            atlas_parameters,
-            texture_parameters,
-        }
-    }
-
-    pub fn allocate(
-        &mut self,
-        requested_size: DeviceIntSize,
-        texture_alloc_cb: &mut dyn FnMut(DeviceIntSize, &TextureParameters) -> CacheTextureId,
-    ) -> (CacheTextureId, AllocId, DeviceIntRect) {
-        // Try to allocate from one of the existing textures.
-        for unit in &mut self.units {
-            if let Some((alloc_id, rect)) = unit.allocator.allocate(requested_size) {
-                return (unit.texture_id, alloc_id, rect);
-            }
-        }
-
-        // Need to create a new texture to hold the allocation.
-        let texture_id = texture_alloc_cb(size2(self.size, self.size), &self.texture_parameters);
-        let unit_index = self.units.len();
-
-        self.units.push(TextureUnit {
-            allocator: Allocator::new(self.size, &self.atlas_parameters),
-            texture_id,
-        });
-
-        let (alloc_id, rect) = self.units[unit_index]
-            .allocator
-            .allocate(requested_size)
-            .unwrap();
-
-        (texture_id, alloc_id, rect)
-    }
-
-    pub fn deallocate(&mut self, texture_id: CacheTextureId, alloc_id: AllocId) {
-        let unit = self.units
-            .iter_mut()
-            .find(|unit| unit.texture_id == texture_id)
-            .expect("Unable to find the associated texture array unit");
-
-        unit.allocator.deallocate(alloc_id);
-    }
-
-    pub fn release_empty_textures<'l>(&mut self, texture_dealloc_cb: &'l mut dyn FnMut(CacheTextureId)) {
-        self.units.retain(|unit| {
-            if unit.allocator.is_empty() {
-                texture_dealloc_cb(unit.texture_id);
-
-                false
-            } else{
-                true
-            }
-        });
-    }
-
-    pub fn clear(&mut self, texture_dealloc_cb: &mut dyn FnMut(CacheTextureId)) {
-        for unit in self.units.drain(..) {
-            texture_dealloc_cb(unit.texture_id);
-        }
-    }
-
-    #[allow(dead_code)]
-    pub fn dump_as_svg(&self, output: &mut dyn std::io::Write) -> std::io::Result<()> {
-        use svg_fmt::*;
-
-        let num_arrays = self.units.len() as f32;
-
-        let text_spacing = 15.0;
-        let unit_spacing = 30.0;
-        let texture_size = self.size as f32 / 2.0;
-
-        let svg_w = unit_spacing * 2.0 + texture_size;
-        let svg_h = unit_spacing + num_arrays * (texture_size + text_spacing + unit_spacing);
-
-        writeln!(output, "{}", BeginSvg { w: svg_w, h: svg_h })?;
-
-        // Background.
-        writeln!(output,
-            "    {}",
-            rectangle(0.0, 0.0, svg_w, svg_h)
-                .inflate(1.0, 1.0)
-                .fill(rgb(50, 50, 50))
-        )?;
-
-        let mut y = unit_spacing;
-        for unit in &self.units {
-            writeln!(output, "    {}", text(unit_spacing, y, format!("{:?}", unit.texture_id)).color(rgb(230, 230, 230)))?;
-
-            let rect = Box2D {
-                min: point2(unit_spacing, y),
-                max: point2(unit_spacing + texture_size, y + texture_size),
-            };
-
-            unit.allocator.dump_into_svg(&rect, output)?;
-
-            y += unit_spacing + texture_size + text_spacing;
-        }
-
-        writeln!(output, "{}", EndSvg)
-    }
-
-    pub fn allocated_space(&self) -> i32 {
-        let mut accum = 0;
-        for unit in &self.units {
-            accum += unit.allocator.allocated_space();
-        }
-
-        accum
-    }
-
-    pub fn allocated_textures(&self) -> usize {
-        self.units.len()
-    }
-}
-
-impl<Allocator: AtlasAllocator, TextureParameters> AtlasAllocatorList<TextureParameters> 
-for AllocatorList<Allocator, TextureParameters> {
-    fn allocate(
-        &mut self,
-        requested_size: DeviceIntSize,
-        texture_alloc_cb: &mut dyn FnMut(DeviceIntSize, &TextureParameters) -> CacheTextureId,
-    ) -> (CacheTextureId, AllocId, DeviceIntRect) {
-        self.allocate(requested_size, texture_alloc_cb)
-    }
-
-    fn deallocate(&mut self, texture_id: CacheTextureId, alloc_id: AllocId) {
-        self.deallocate(texture_id, alloc_id);
-    }
-
-    fn texture_parameters(&self) -> &TextureParameters {
-        &self.texture_parameters
-    }
-}
-
-impl AtlasAllocator for BucketedShelfAllocator {
-    type Parameters = ShelfAllocatorOptions;
-
-    fn new(size: i32, options: &Self::Parameters) -> Self {
-        BucketedShelfAllocator::with_options(size2(size, size), options)
-    }
-
-    fn allocate(&mut self, size: DeviceIntSize) -> Option<(AllocId, DeviceIntRect)> {
-        self.allocate(size.to_untyped()).map(|alloc| {
-            (AllocId(alloc.id.serialize()), alloc.rectangle.to_rect().cast_unit())
-        })
-    }
-
-    fn deallocate(&mut self, id: AllocId) {
-        self.deallocate(etagere::AllocId::deserialize(id.0));
-    }
-
-    fn is_empty(&self) -> bool {
-        self.is_empty()
-    }
-
-    fn allocated_space(&self) -> i32 {
-        self.allocated_space()
-    }
-
-    fn dump_into_svg(&self, rect: &Box2D<f32>, output: &mut dyn std::io::Write) -> std::io::Result<()> {
-        self.dump_into_svg(Some(&rect.to_i32().cast_unit()), output)
-    }
-}
-
-impl AtlasAllocator for ShelfAllocator {
-    type Parameters = ShelfAllocatorOptions;
-
-    fn new(size: i32, options: &Self::Parameters) -> Self {
-        ShelfAllocator::with_options(size2(size, size), options)
-    }
-
-    fn allocate(&mut self, size: DeviceIntSize) -> Option<(AllocId, DeviceIntRect)> {
-        self.allocate(size.to_untyped()).map(|alloc| {
-            (AllocId(alloc.id.serialize()), alloc.rectangle.to_rect().cast_unit())
-        })
-    }
-
-    fn deallocate(&mut self, id: AllocId) {
-        self.deallocate(etagere::AllocId::deserialize(id.0));
-    }
-
-    fn is_empty(&self) -> bool {
-        self.is_empty()
-    }
-
-    fn allocated_space(&self) -> i32 {
-        self.allocated_space()
-    }
-
-    fn dump_into_svg(&self, rect: &Box2D<f32>, output: &mut dyn std::io::Write) -> std::io::Result<()> {
-        self.dump_into_svg(Some(&rect.to_i32().cast_unit()), output)
-    }
-}
-
-#[test]
-fn bug_1680769() {
-    let mut allocators: AllocatorList<ShelfAllocator, ()> = AllocatorList::new(
-        1024,
-        ShelfAllocatorOptions::default(),
-        (),
-    );
-
-    let mut allocations = Vec::new();
-    let mut next_id = CacheTextureId(0);
-    let alloc_cb = &mut |_: DeviceIntSize, _: &()| {
-        let texture_id = next_id;
-        next_id.0 += 1;
-
-        texture_id
-    };
-
-    // Make some allocations, forcing the the creation of multiple textures.
-    for _ in 0..50 {
-        allocations.push(allocators.allocate(size2(256, 256), alloc_cb));
-    }
-
-    // Deallocate everything.
-    // It should empty all atlases and we still have textures allocated because
-    // we haven't called release_empty_textures yet.
-    for alloc in allocations.drain(..) {
-        allocators.deallocate(alloc.0, alloc.1);
-    }
-
-    // Allocate something else.
-    // Bug 1680769 was causing this allocation to be duplicated and leaked in
-    // all textures.
-    allocations.push(allocators.allocate(size2(8, 8), alloc_cb));
-
-    // Deallocate all known allocations.
-    for alloc in allocations.drain(..) {
-        allocators.deallocate(alloc.0, alloc.1);
-    }
-
-    // If we have leaked items, this won't manage to remove all textures.
-    allocators.release_empty_textures(&mut |_| {});
-
-    assert_eq!(allocators.allocated_textures(), 0);
-}
diff --git a/third_party/webrender/webrender/src/texture_pack/slab.rs b/third_party/webrender/webrender/src/texture_pack/slab.rs
deleted file mode 100644
index 6e383833978..00000000000
--- a/third_party/webrender/webrender/src/texture_pack/slab.rs
+++ /dev/null
@@ -1,356 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#![deny(unconditional_recursion)]
-
-use super::{AtlasAllocator, AllocId};
-use api::units::{DeviceIntPoint, DeviceIntRect, DeviceIntSize};
-use euclid::{point2, size2, default::Box2D};
-use std::cmp;
-
-fn pack_alloc_id(region_index: usize, location: TextureLocation) -> AllocId {
-    AllocId(
-        region_index as u32 & 0xFFFF
-        | (location.0 as u32) << 16
-        | (location.1 as u32) << 24
-    )
-}
-
-fn unpack_alloc_id(id: AllocId) -> (usize, TextureLocation) {
-    (
-        (id.0 & 0xFFFF) as usize,
-        TextureLocation(
-            ((id.0 >> 16) & 0xFF) as u8,
-            ((id.0 >> 24) & 0xFF) as u8,
-        ),
-    )
-}
-
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-#[derive(Copy, Clone, PartialEq)]
-struct SlabSize {
-    width: i32,
-    height: i32,
-}
-
-impl SlabSize {
-    fn invalid() -> SlabSize {
-        SlabSize {
-            width: 0,
-            height: 0,
-        }
-    }
-
-    fn get(size: DeviceIntSize) -> SlabSize {
-        fn quantize_dimension(size: i32) -> i32 {
-            match size {
-                0 => unreachable!(),
-                1..=16 => 16,
-                17..=32 => 32,
-                33..=64 => 64,
-                65..=128 => 128,
-                129..=256 => 256,
-                257..=512 => 512,
-                _ => panic!("Invalid dimensions for cache!"),
-            }
-        }
-
-
-        let x_size = quantize_dimension(size.width);
-        let y_size = quantize_dimension(size.height);
-
-        let (width, height) = match (x_size, y_size) {
-            // Special cased rectangular slab pages.
-            (512, 0..=64) => (512, 64),
-            (512, 128) => (512, 128),
-            (512, 256) => (512, 256),
-            (0..=64, 512) => (64, 512),
-            (128, 512) => (128, 512),
-            (256, 512) => (256, 512),
-
-            // If none of those fit, use a square slab size.
-            (x_size, y_size) => {
-                let square_size = cmp::max(x_size, y_size);
-                (square_size, square_size)
-            }
-        };
-
-        SlabSize {
-            width,
-            height,
-        }
-    }
-}
-
-// The x/y location within a texture region of an allocation.
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-struct TextureLocation(pub u8, pub u8);
-
-impl TextureLocation {
-    fn new(x: i32, y: i32) -> Self {
-        debug_assert!(x >= 0 && y >= 0 && x < 0x100 && y < 0x100);
-        TextureLocation(x as u8, y as u8)
-    }
-}
-
-/// A region is a rectangular part of a texture cache texture, split into fixed-size slabs.
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-struct TextureRegion {
-    index: usize,
-    slab_size: SlabSize,
-    offset: DeviceIntPoint,
-    free_slots: Vec<TextureLocation>,
-    total_slot_count: usize,
-}
-
-impl TextureRegion {
-    fn new(index: usize, offset: DeviceIntPoint) -> Self {
-        TextureRegion {
-            index,
-            slab_size: SlabSize::invalid(),
-            offset,
-            free_slots: Vec::new(),
-            total_slot_count: 0,
-        }
-    }
-
-    // Initialize a region to be an allocator for a specific slab size.
-    fn init(&mut self, slab_size: SlabSize, region_size: i32, empty_regions: &mut usize) {
-        debug_assert!(self.slab_size == SlabSize::invalid());
-        debug_assert!(self.free_slots.is_empty());
-
-        self.slab_size = slab_size;
-        let slots_per_x_axis = region_size / self.slab_size.width;
-        let slots_per_y_axis = region_size / self.slab_size.height;
-
-        // Add each block to a freelist.
-        for y in 0 .. slots_per_y_axis {
-            for x in 0 .. slots_per_x_axis {
-                self.free_slots.push(TextureLocation::new(x, y));
-            }
-        }
-
-        self.total_slot_count = self.free_slots.len();
-        *empty_regions -= 1;
-    }
-
-    // Deinit a region, allowing it to become a region with
-    // a different allocator size.
-    fn deinit(&mut self, empty_regions: &mut usize) {
-        self.slab_size = SlabSize::invalid();
-        self.free_slots.clear();
-        self.total_slot_count = 0;
-        *empty_regions += 1;
-    }
-
-    fn is_empty(&self) -> bool {
-        self.slab_size == SlabSize::invalid()
-    }
-
-    // Attempt to allocate a fixed size block from this region.
-    fn alloc(&mut self) -> Option<(DeviceIntPoint, TextureLocation)> {
-        debug_assert!(self.slab_size != SlabSize::invalid());
-
-        self.free_slots.pop().map(|location| {(
-            point2(
-                self.offset.x + self.slab_size.width * location.0 as i32,
-                self.offset.y + self.slab_size.height * location.1 as i32,
-            ),
-            location,
-        )})
-    }
-
-    // Free a block in this region.
-    fn free(&mut self, location: TextureLocation, empty_regions: &mut usize) {
-        self.free_slots.push(location);
-
-        // If this region is completely unused, deinit it
-        // so that it can become a different slab size
-        // as required.
-        if self.free_slots.len() == self.total_slot_count {
-            self.deinit(empty_regions);
-        }
-    }
-}
-
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct SlabAllocatorParameters {
-    pub region_size: i32,
-}
-
-/// A 2D texture divided into regions.
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct SlabAllocator {
-    regions: Vec<TextureRegion>,
-    size: i32,
-    region_size: i32,
-    empty_regions: usize,
-    allocated_space: i32,
-}
-
-impl SlabAllocator {
-    pub fn new(size: i32, options: &SlabAllocatorParameters) -> Self {
-        let regions_per_row = size / options.region_size;
-        let num_regions = (regions_per_row * regions_per_row) as usize;
-
-        let mut regions = Vec::with_capacity(num_regions);
-
-        for index in 0..num_regions {
-            let offset = point2(
-                (index as i32 % regions_per_row) * options.region_size,
-                (index as i32 / regions_per_row) * options.region_size,
-            );
-
-            regions.push(TextureRegion::new(index, offset));
-        }
-
-        SlabAllocator {
-            regions,
-            size,
-            region_size: options.region_size,
-            empty_regions: num_regions,
-            allocated_space: 0,
-        }
-    }
-
-    pub fn is_empty(&self) -> bool {
-        self.empty_regions == self.regions.len()
-    }
-
-    pub fn allocated_space(&self) -> i32 {
-        self.allocated_space
-    }
-
-    // Returns the region index and allocated rect.
-    pub fn allocate(&mut self, size: DeviceIntSize) -> Option<(AllocId, DeviceIntRect)> {
-        let slab_size = SlabSize::get(size);
-
-        // Keep track of the location of an empty region,
-        // in case we need to select a new empty region
-        // after the loop.
-        let mut empty_region_index = None;
-
-        let allocated_size = size2(slab_size.width, slab_size.height);
-
-        // Run through the existing regions of this size, and see if
-        // we can find a free block in any of them.
-        for (i, region) in self.regions.iter_mut().enumerate() {
-            if region.is_empty() {
-                empty_region_index = Some(i);
-            } else if region.slab_size == slab_size {
-                if let Some((origin, location)) = region.alloc() {
-                    return Some((
-                        pack_alloc_id(region.index, location),
-                        DeviceIntRect {
-                            origin,
-                            size: allocated_size,
-                        }
-                    ));
-                }
-            }
-        }
-
-        if let Some(empty_region_index) = empty_region_index {
-            let region = &mut self.regions[empty_region_index];
-            region.init(slab_size, self.region_size, &mut self.empty_regions);
-            let (origin, location) = region.alloc().unwrap();
-
-            return Some((
-                pack_alloc_id(region.index, location),
-                DeviceIntRect {
-                    origin,
-                    size: allocated_size,
-                },
-            ))
-        }
-
-        None
-    }
-
-    pub fn deallocate(&mut self, id: AllocId) {
-        let (region_index, location) = unpack_alloc_id(id);
-
-        let region = &mut self.regions[region_index];
-        region.free(location, &mut self.empty_regions);
-
-        self.allocated_space -= region.slab_size.width * region.slab_size.height;
-    }
-
-    pub fn dump_into_svg(&self, rect: &Box2D<f32>, output: &mut dyn std::io::Write) -> std::io::Result<()> {
-        use svg_fmt::*;
-
-        let region_spacing = 5.0;
-        let text_spacing = 15.0;
-        let regions_per_row = (self.size / self.region_size) as usize;
-        let wh = rect.size().width.min(rect.size().height);
-        let region_wh = (wh - region_spacing) / regions_per_row as f32 - region_spacing;
-
-        let x0 = rect.min.x;
-        let y0 = rect.min.y;
-
-        for (idx, region) in self.regions.iter().enumerate() {
-            let slab_size = region.slab_size;
-            let x = x0 + (idx % regions_per_row) as f32 * (region_wh + region_spacing);
-
-            let y = y0 + text_spacing + (idx / regions_per_row) as f32 * (region_wh + region_spacing);
-
-            let texture_background = if region.is_empty() { rgb(30, 30, 30) } else { rgb(40, 40, 130) };
-            writeln!(output, "    {}", rectangle(x, y, region_wh, region_wh).inflate(1.0, 1.0).fill(rgb(10, 10, 10)))?;
-            writeln!(output, "    {}", rectangle(x, y, region_wh, region_wh).fill(texture_background))?;
-
-            let sw = (slab_size.width as f32 / self.region_size as f32) * region_wh;
-            let sh = (slab_size.height as f32 / self.region_size as f32) * region_wh;
-
-            for slot in &region.free_slots {
-                let sx = x + slot.0 as f32 * sw;
-                let sy = y + slot.1 as f32 * sh;
-
-                // Allocation slot.
-                writeln!(output, "    {}", rectangle(sx, sy, sw, sh).inflate(-0.5, -0.5).fill(rgb(30, 30, 30)))?;
-            }
-
-            if slab_size.width != 0 {
-                let region_text = format!("{}x{}", slab_size.width, slab_size.height);
-                let tx = x + 1.0;
-                let ty = y + region_wh - 1.0;
-                writeln!(output, "    {}", text(tx, ty, region_text).color(rgb(230, 230, 230)))?;
-            }
-        }
-
-        Ok(())
-    }
-}
-
-impl AtlasAllocator for SlabAllocator {
-    type Parameters = SlabAllocatorParameters;
-
-    fn new(size: i32, options: &Self::Parameters) -> Self {
-        SlabAllocator::new(size, options)
-    }
-
-    fn allocate(&mut self, size: DeviceIntSize) -> Option<(AllocId, DeviceIntRect)> {
-        self.allocate(size)
-    }
-
-    fn deallocate(&mut self, id: AllocId) {
-        self.deallocate(id);
-    }
-
-    fn is_empty(&self) -> bool {
-        self.is_empty()
-    }
-
-    fn allocated_space(&self) -> i32 {
-        self.allocated_space()
-    }
-
-    fn dump_into_svg(&self, rect: &Box2D<f32>, output: &mut dyn std::io::Write) -> std::io::Result<()> {
-        self.dump_into_svg(rect, output)
-    }
-}
diff --git a/third_party/webrender/webrender/src/tile_cache.rs b/third_party/webrender/webrender/src/tile_cache.rs
deleted file mode 100644
index a7ec0f25cb5..00000000000
--- a/third_party/webrender/webrender/src/tile_cache.rs
+++ /dev/null
@@ -1,743 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-use api::{ColorF, PrimitiveFlags, QualitySettings};
-use api::units::*;
-use crate::clip::{ClipChainId, ClipNodeKind, ClipStore, ClipInstance};
-use crate::frame_builder::FrameBuilderConfig;
-use crate::internal_types::{FastHashMap, FastHashSet};
-use crate::picture::{PrimitiveList, PictureCompositeMode, PictureOptions, PicturePrimitive, SliceId};
-use crate::picture::{Picture3DContext, TileCacheParams, TileOffset};
-use crate::prim_store::{PrimitiveInstance, PrimitiveStore, PictureIndex};
-use crate::scene_building::SliceFlags;
-use crate::scene_builder_thread::Interners;
-use crate::spatial_tree::{ROOT_SPATIAL_NODE_INDEX, SpatialNodeIndex, SpatialTree};
-use crate::util::VecHelper;
-
-/*
- Types and functionality related to picture caching. In future, we'll
- move more and more of the existing functionality out of picture.rs
- and into here.
- */
-
-// If the page would create too many slices (an arbitrary definition where
-// it's assumed the GPU memory + compositing overhead would be too high)
-// then create a single picture cache for the remaining content. This at
-// least means that we can cache small content changes efficiently when
-// scrolling isn't occurring. Scrolling regions will be handled reasonably
-// efficiently by the dirty rect tracking (since it's likely that if the
-// page has so many slices there isn't a single major scroll region).
-const MAX_CACHE_SLICES: usize = 12;
-
-/// Created during scene building, describes how to create a tile cache for a given slice.
-pub struct PendingTileCache {
-    /// List of primitives that are part of this slice
-    pub prim_list: PrimitiveList,
-    /// Parameters that define the tile cache (such as background color, shared clips, reference spatial node)
-    pub params: TileCacheParams,
-    /// An additional clip chain that get applied to the shared clips unconditionally for this tile cache
-    pub iframe_clip: Option<ClipChainId>,
-}
-
-/// Used during scene building to construct the list of pending tile caches.
-pub struct TileCacheBuilder {
-    /// When Some(..), a new tile cache will be created for the next primitive.
-    force_new_tile_cache: Option<SliceFlags>,
-    /// List of tile caches that have been created so far (last in the list is currently active).
-    pending_tile_caches: Vec<PendingTileCache>,
-
-    /// Cache the previous scroll root search for a spatial node, since they are often the same.
-    prev_scroll_root_cache: (SpatialNodeIndex, SpatialNodeIndex),
-    /// A buffer for collecting clips for a clip-chain. Retained here to avoid memory allocations in add_prim.
-    prim_clips_buffer: Vec<ClipInstance>,
-    /// Cache the last clip-chain that was added to the shared clips as it's often the same between prims.
-    last_checked_clip_chain: ClipChainId,
-}
-
-/// The output of a tile cache builder, containing all details needed to construct the
-/// tile cache(s) for the next scene, and retain tiles from the previous frame when sent
-/// send to the frame builder.
-pub struct TileCacheConfig {
-    /// Mapping of slice id to the parameters needed to construct this tile cache.
-    pub tile_caches: FastHashMap<SliceId, TileCacheParams>,
-    /// A set of any spatial nodes that are attached to either a picture cache
-    /// root, or a clip node on the picture cache primitive. These are used
-    /// to detect cases where picture caching must be disabled. This is mostly
-    /// a temporary workaround for some existing wrench tests. I don't think
-    /// Gecko ever produces picture cache slices with complex transforms, so
-    /// in future we should prevent this in the public API and remove this hack.
-    pub picture_cache_spatial_nodes: FastHashSet<SpatialNodeIndex>,
-    /// Number of picture cache slices that were created (for profiler)
-    pub picture_cache_slice_count: usize,
-}
-
-impl TileCacheConfig {
-    pub fn new(picture_cache_slice_count: usize) -> Self {
-        TileCacheConfig {
-            tile_caches: FastHashMap::default(),
-            picture_cache_spatial_nodes: FastHashSet::default(),
-            picture_cache_slice_count,
-        }
-    }
-}
-
-impl TileCacheBuilder {
-    /// Construct a new tile cache builder.
-    pub fn new() -> Self {
-        TileCacheBuilder {
-            force_new_tile_cache: None,
-            pending_tile_caches: Vec::new(),
-            prev_scroll_root_cache: (ROOT_SPATIAL_NODE_INDEX, ROOT_SPATIAL_NODE_INDEX),
-            prim_clips_buffer: Vec::new(),
-            last_checked_clip_chain: ClipChainId::INVALID,
-        }
-    }
-
-    /// Set a barrier that forces a new tile cache next time a prim is added.
-    pub fn add_tile_cache_barrier(
-        &mut self,
-        slice_flags: SliceFlags,
-    ) {
-        self.force_new_tile_cache = Some(slice_flags);
-    }
-
-    /// Returns true if it's OK to add a container tile cache (will return false
-    /// if too many slices have been created).
-    pub fn can_add_container_tile_cache(&self) -> bool {
-        // See the logic and comments around MAX_CACHE_SLICES in add_prim
-        // to explain why < MAX_CACHE_SLICES-1 is used.
-        self.pending_tile_caches.len() < MAX_CACHE_SLICES-1
-    }
-
-    /// Create a new tile cache for an existing prim_list
-    pub fn add_tile_cache(
-        &mut self,
-        prim_list: PrimitiveList,
-        clip_chain_id: ClipChainId,
-        spatial_tree: &SpatialTree,
-        clip_store: &ClipStore,
-        interners: &Interners,
-        config: &FrameBuilderConfig,
-        iframe_clip: Option<ClipChainId>,
-        slice_flags: SliceFlags,
-    ) {
-        assert!(self.can_add_container_tile_cache());
-
-        if prim_list.is_empty() {
-            return;
-        }
-
-        // Iterate the clusters and determine which is the most commonly occurring
-        // scroll root. This is a reasonable heuristic to decide which spatial node
-        // should be considered the scroll root of this tile cache, in order to
-        // minimize the invalidations that occur due to scrolling. It's often the
-        // case that a blend container will have only a single scroll root.
-        let mut scroll_root_occurrences = FastHashMap::default();
-
-        for cluster in &prim_list.clusters {
-            let scroll_root = self.find_scroll_root(
-                cluster.spatial_node_index,
-                spatial_tree,
-            );
-
-            *scroll_root_occurrences.entry(scroll_root).or_insert(0) += 1;
-        }
-
-        // We can't just select the most commonly occurring scroll root in this
-        // primitive list. If that is a nested scroll root, there may be
-        // primitives in the list that are outside that scroll root, which
-        // can cause panics when calculating relative transforms. To ensure
-        // this doesn't happen, only retain scroll root candidates that are
-        // also ancestors of every other scroll root candidate.
-        let scroll_roots: Vec<SpatialNodeIndex> = scroll_root_occurrences
-            .keys()
-            .cloned()
-            .collect();
-
-        scroll_root_occurrences.retain(|parent_spatial_node_index, _| {
-            scroll_roots.iter().all(|child_spatial_node_index| {
-                parent_spatial_node_index == child_spatial_node_index ||
-                spatial_tree.is_ancestor(
-                    *parent_spatial_node_index,
-                    *child_spatial_node_index,
-                )
-            })
-        });
-
-        // Select the scroll root by finding the most commonly occurring one
-        let scroll_root = scroll_root_occurrences
-            .iter()
-            .max_by_key(|entry | entry.1)
-            .map(|(spatial_node_index, _)| *spatial_node_index)
-            .unwrap_or(ROOT_SPATIAL_NODE_INDEX);
-
-        let mut first = true;
-        let prim_clips_buffer = &mut self.prim_clips_buffer;
-        let mut shared_clips = Vec::new();
-
-        // Work out which clips are shared by all prim instances and can thus be applied
-        // at the tile cache level. In future, we aim to remove this limitation by knowing
-        // during initial scene build which are the relevant compositor clips, but for now
-        // this is unlikely to be a significant cost.
-        for cluster in &prim_list.clusters {
-            for prim_instance in &prim_list.prim_instances[cluster.prim_range()] {
-                if first {
-                    add_clips(
-                        scroll_root,
-                        prim_instance.clip_set.clip_chain_id,
-                        &mut shared_clips,
-                        clip_store,
-                        interners,
-                        spatial_tree,
-                    );
-
-                    self.last_checked_clip_chain = prim_instance.clip_set.clip_chain_id;
-                    first = false;
-                } else {
-                    if self.last_checked_clip_chain != prim_instance.clip_set.clip_chain_id {
-                        prim_clips_buffer.clear();
-
-                        add_clips(
-                            scroll_root,
-                            prim_instance.clip_set.clip_chain_id,
-                            prim_clips_buffer,
-                            clip_store,
-                            interners,
-                            spatial_tree,
-                        );
-
-                        shared_clips.retain(|h1: &ClipInstance| {
-                            let uid = h1.handle.uid();
-                            prim_clips_buffer.iter().any(|h2| {
-                                uid == h2.handle.uid() &&
-                                h1.spatial_node_index == h2.spatial_node_index
-                            })
-                        });
-
-                        self.last_checked_clip_chain = prim_instance.clip_set.clip_chain_id;
-                    }
-                }
-            }
-        }
-
-        // If a blend-container has any clips on the stacking context we are removing,
-        // we need to ensure those clips are added to the shared clips applied to the
-        // tile cache we are creating.
-        let mut current_clip_chain_id = clip_chain_id;
-        while current_clip_chain_id != ClipChainId::NONE {
-            let clip_chain_node = &clip_store
-                .clip_chain_nodes[current_clip_chain_id.0 as usize];
-
-            let clip_node_data = &interners.clip[clip_chain_node.handle];
-            if let ClipNodeKind::Rectangle = clip_node_data.clip_node_kind {
-                shared_clips.push(ClipInstance::new(clip_chain_node.handle, clip_chain_node.spatial_node_index));
-            }
-
-            current_clip_chain_id = clip_chain_node.parent_clip_chain_id;
-        }
-
-        // Construct the new tile cache and add to the list to be built
-        let slice = self.pending_tile_caches.len();
-
-        let params = TileCacheParams {
-            slice,
-            slice_flags,
-            spatial_node_index: scroll_root,
-            background_color: None,
-            shared_clips,
-            shared_clip_chain: ClipChainId::NONE,
-            virtual_surface_size: config.compositor_kind.get_virtual_surface_size(),
-            compositor_surface_count: prim_list.compositor_surface_count,
-        };
-
-        self.pending_tile_caches.push(PendingTileCache {
-            prim_list,
-            params,
-            iframe_clip,
-        });
-
-        // Add a tile cache barrier so that the next prim definitely gets added to a
-        // new tile cache, even if it's otherwise compatible with the blend container.
-        self.force_new_tile_cache = Some(SliceFlags::empty());
-    }
-
-    /// Add a primitive, either to the current tile cache, or a new one, depending on various conditions.
-    pub fn add_prim(
-        &mut self,
-        prim_instance: PrimitiveInstance,
-        prim_rect: LayoutRect,
-        spatial_node_index: SpatialNodeIndex,
-        prim_flags: PrimitiveFlags,
-        spatial_tree: &SpatialTree,
-        clip_store: &ClipStore,
-        interners: &Interners,
-        config: &FrameBuilderConfig,
-        quality_settings: &QualitySettings,
-        iframe_clip: Option<ClipChainId>,
-    ) {
-        // Check if we want to create a new slice based on the current / next scroll root
-        let scroll_root = self.find_scroll_root(spatial_node_index, spatial_tree);
-
-        // Also create a new slice if there was a barrier previously set
-        let mut want_new_tile_cache =
-            self.force_new_tile_cache.is_some() ||
-            self.pending_tile_caches.is_empty();
-
-        let current_scroll_root = self.pending_tile_caches
-            .last()
-            .map(|p| p.params.spatial_node_index);
-
-        if let Some(current_scroll_root) = current_scroll_root {
-            want_new_tile_cache |= match (current_scroll_root, scroll_root) {
-                (ROOT_SPATIAL_NODE_INDEX, ROOT_SPATIAL_NODE_INDEX) => {
-                    // Both current slice and this cluster are fixed position, no need to cut
-                    false
-                }
-                (ROOT_SPATIAL_NODE_INDEX, _) => {
-                    // A real scroll root is being established, so create a cache slice
-                    true
-                }
-                (_, ROOT_SPATIAL_NODE_INDEX) => {
-                    // If quality settings force subpixel AA over performance, skip creating
-                    // a slice for the fixed position element(s) here.
-                    if quality_settings.force_subpixel_aa_where_possible {
-                        false
-                    } else {
-                        // A fixed position slice is encountered within a scroll root. Only create
-                        // a slice in this case if all the clips referenced by this cluster are also
-                        // fixed position. There's no real point in creating slices for these cases,
-                        // since we'll have to rasterize them as the scrolling clip moves anyway. It
-                        // also allows us to retain subpixel AA in these cases. For these types of
-                        // slices, the intra-slice dirty rect handling typically works quite well
-                        // (a common case is parallax scrolling effects).
-                        let mut create_slice = true;
-                        let mut current_clip_chain_id = prim_instance.clip_set.clip_chain_id;
-
-                        while current_clip_chain_id != ClipChainId::NONE {
-                            let clip_chain_node = &clip_store.clip_chain_nodes[current_clip_chain_id.0 as usize];
-                            let spatial_root = self.find_scroll_root(clip_chain_node.spatial_node_index, spatial_tree);
-                            if spatial_root != ROOT_SPATIAL_NODE_INDEX {
-                                create_slice = false;
-                                break;
-                            }
-                            current_clip_chain_id = clip_chain_node.parent_clip_chain_id;
-                        }
-
-                        create_slice
-                    }
-                }
-                (curr_scroll_root, scroll_root) => {
-                    // Two scrolling roots - only need a new slice if they differ
-                    curr_scroll_root != scroll_root
-                }
-            };
-
-            // Update the list of clips that apply to this primitive instance, to track which are the
-            // shared clips for this tile cache that can be applied during compositing.
-            if self.last_checked_clip_chain != prim_instance.clip_set.clip_chain_id {
-                let prim_clips_buffer = &mut self.prim_clips_buffer;
-                prim_clips_buffer.clear();
-                add_clips(
-                    current_scroll_root,
-                    prim_instance.clip_set.clip_chain_id,
-                    prim_clips_buffer,
-                    clip_store,
-                    interners,
-                    spatial_tree,
-                );
-
-                let current_shared_clips = &self.pending_tile_caches
-                    .last()
-                    .unwrap()
-                    .params
-                    .shared_clips;
-
-                // If the shared clips are not compatible, create a new slice.
-                // TODO(gw): Does Gecko ever supply duplicate or out-of-order
-                //           shared clips? It doesn't seem to, but if it does,
-                //           we will need to be more clever here to check if
-                //           the shared clips are compatible.
-                want_new_tile_cache |= current_shared_clips != prim_clips_buffer;
-
-                self.last_checked_clip_chain = prim_instance.clip_set.clip_chain_id;
-            }
-        }
-
-        if want_new_tile_cache {
-            let slice = self.pending_tile_caches.len();
-
-            // If we have exceeded the maximum number of slices, skip creating a new
-            // one and the primitive will be added to the last slice.
-            if slice < MAX_CACHE_SLICES {
-                // When we reach the last valid slice that can be created, it is created as
-                // a fixed slice without shared clips, ensuring that we can safely add any
-                // subsequent primitives to it. This doesn't seem to occur on any real
-                // world content (only contrived test cases), where this acts as a fail safe
-                // to ensure we don't allocate too much GPU memory for surface caches.
-                // However, if we _do_ ever see this occur on real world content, we could
-                // probably consider increasing the max cache slices a bit more than the
-                // current limit.
-                let (params, iframe_clip) = if slice == MAX_CACHE_SLICES-1 {
-                    let params = TileCacheParams {
-                        slice,
-                        slice_flags: SliceFlags::empty(),
-                        spatial_node_index: ROOT_SPATIAL_NODE_INDEX,
-                        background_color: None,
-                        shared_clips: Vec::new(),
-                        shared_clip_chain: ClipChainId::NONE,
-                        virtual_surface_size: config.compositor_kind.get_virtual_surface_size(),
-                        compositor_surface_count: 0,
-                    };
-
-                    (params, None)
-                } else {
-                    let slice_flags = self.force_new_tile_cache.unwrap_or(SliceFlags::empty());
-
-                    let background_color = if slice == 0 {
-                        config.background_color
-                    } else {
-                        None
-                    };
-
-                    let mut shared_clips = Vec::new();
-                    add_clips(
-                        scroll_root,
-                        prim_instance.clip_set.clip_chain_id,
-                        &mut shared_clips,
-                        clip_store,
-                        interners,
-                        spatial_tree,
-                    );
-
-                    self.last_checked_clip_chain = prim_instance.clip_set.clip_chain_id;
-
-                    let params = TileCacheParams {
-                        slice,
-                        slice_flags,
-                        spatial_node_index: scroll_root,
-                        background_color,
-                        shared_clips,
-                        shared_clip_chain: ClipChainId::NONE,
-                        virtual_surface_size: config.compositor_kind.get_virtual_surface_size(),
-                        compositor_surface_count: 0,
-                    };
-
-                    (params, iframe_clip)
-                };
-
-                self.pending_tile_caches.push(PendingTileCache {
-                    prim_list: PrimitiveList::empty(),
-                    params,
-                    iframe_clip,
-                });
-
-                self.force_new_tile_cache = None;
-            }
-        }
-
-        self.pending_tile_caches
-            .last_mut()
-            .unwrap()
-            .prim_list
-            .add_prim(
-                prim_instance,
-                prim_rect,
-                spatial_node_index,
-                prim_flags,
-            );
-    }
-
-    /// Consume this object and build the list of tile cache primitives
-    pub fn build(
-        self,
-        config: &FrameBuilderConfig,
-        clip_store: &mut ClipStore,
-        prim_store: &mut PrimitiveStore,
-        interners: &Interners,
-    ) -> (TileCacheConfig, Vec<PictureIndex>) {
-        let mut result = TileCacheConfig::new(self.pending_tile_caches.len());
-        let mut tile_cache_pictures = Vec::new();
-
-        for mut pending_tile_cache in self.pending_tile_caches {
-            // Accumulate any clip instances from the iframe_clip into the shared clips
-            // that will be applied by this tile cache during compositing.
-            if let Some(clip_chain_id) = pending_tile_cache.iframe_clip {
-                add_all_rect_clips(
-                    clip_chain_id,
-                    &mut pending_tile_cache.params.shared_clips,
-                    clip_store,
-                    interners,
-                );
-            }
-
-            let pic_index = create_tile_cache(
-                pending_tile_cache.params.slice,
-                pending_tile_cache.params.slice_flags,
-                pending_tile_cache.params.spatial_node_index,
-                pending_tile_cache.prim_list,
-                pending_tile_cache.params.background_color,
-                pending_tile_cache.params.shared_clips,
-                prim_store,
-                clip_store,
-                &mut result.picture_cache_spatial_nodes,
-                config,
-                &mut result.tile_caches,
-            );
-
-            tile_cache_pictures.push(pic_index);
-        }
-
-        (result, tile_cache_pictures)
-    }
-
-    /// Find the scroll root for a given spatial node
-    fn find_scroll_root(
-        &mut self,
-        spatial_node_index: SpatialNodeIndex,
-        spatial_tree: &SpatialTree,
-    ) -> SpatialNodeIndex {
-        if self.prev_scroll_root_cache.0 == spatial_node_index {
-            return self.prev_scroll_root_cache.1;
-        }
-
-        let scroll_root = spatial_tree.find_scroll_root(spatial_node_index);
-        self.prev_scroll_root_cache = (spatial_node_index, scroll_root);
-
-        scroll_root
-    }
-}
-
-// Helper fn to collect clip handles from a given clip chain.
-fn add_clips(
-    scroll_root: SpatialNodeIndex,
-    clip_chain_id: ClipChainId,
-    prim_clips: &mut Vec<ClipInstance>,
-    clip_store: &ClipStore,
-    interners: &Interners,
-    spatial_tree: &SpatialTree,
-) {
-    let mut current_clip_chain_id = clip_chain_id;
-
-    while current_clip_chain_id != ClipChainId::NONE {
-        let clip_chain_node = &clip_store
-            .clip_chain_nodes[current_clip_chain_id.0 as usize];
-
-        let clip_node_data = &interners.clip[clip_chain_node.handle];
-        if let ClipNodeKind::Rectangle = clip_node_data.clip_node_kind {
-            if spatial_tree.is_ancestor(
-                clip_chain_node.spatial_node_index,
-                scroll_root,
-            ) {
-                prim_clips.push(ClipInstance::new(clip_chain_node.handle, clip_chain_node.spatial_node_index));
-            }
-        }
-
-        current_clip_chain_id = clip_chain_node.parent_clip_chain_id;
-    }
-}
-
-// Walk a clip-chain, and accumulate all clip instances into supplied `prim_clips` array.
-fn add_all_rect_clips(
-    clip_chain_id: ClipChainId,
-    prim_clips: &mut Vec<ClipInstance>,
-    clip_store: &ClipStore,
-    interners: &Interners,
-) {
-    let mut current_clip_chain_id = clip_chain_id;
-
-    while current_clip_chain_id != ClipChainId::NONE {
-        let clip_chain_node = &clip_store
-            .clip_chain_nodes[current_clip_chain_id.0 as usize];
-
-        let clip_node_data = &interners.clip[clip_chain_node.handle];
-        if let ClipNodeKind::Rectangle = clip_node_data.clip_node_kind {
-            prim_clips.push(ClipInstance::new(clip_chain_node.handle, clip_chain_node.spatial_node_index));
-        }
-
-        current_clip_chain_id = clip_chain_node.parent_clip_chain_id;
-    }
-}
-
-/// Given a PrimitiveList and scroll root, construct a tile cache primitive instance
-/// that wraps the primitive list.
-fn create_tile_cache(
-    slice: usize,
-    slice_flags: SliceFlags,
-    scroll_root: SpatialNodeIndex,
-    prim_list: PrimitiveList,
-    background_color: Option<ColorF>,
-    shared_clips: Vec<ClipInstance>,
-    prim_store: &mut PrimitiveStore,
-    clip_store: &mut ClipStore,
-    picture_cache_spatial_nodes: &mut FastHashSet<SpatialNodeIndex>,
-    frame_builder_config: &FrameBuilderConfig,
-    tile_caches: &mut FastHashMap<SliceId, TileCacheParams>,
-) -> PictureIndex {
-    // Add this spatial node to the list to check for complex transforms
-    // at the start of a frame build.
-    picture_cache_spatial_nodes.insert(scroll_root);
-
-    // Build a clip-chain for the tile cache, that contains any of the shared clips
-    // we will apply when drawing the tiles. In all cases provided by Gecko, these
-    // are rectangle clips with a scale/offset transform only, and get handled as
-    // a simple local clip rect in the vertex shader. However, this should in theory
-    // also work with any complex clips, such as rounded rects and image masks, by
-    // producing a clip mask that is applied to the picture cache tiles.
-
-    let mut parent_clip_chain_id = ClipChainId::NONE;
-    for clip_instance in &shared_clips {
-        // Add this spatial node to the list to check for complex transforms
-        // at the start of a frame build.
-        picture_cache_spatial_nodes.insert(clip_instance.spatial_node_index);
-
-        parent_clip_chain_id = clip_store.add_clip_chain_node(
-            clip_instance.handle,
-            clip_instance.spatial_node_index,
-            parent_clip_chain_id,
-        );
-    }
-
-    let slice_id = SliceId::new(slice);
-
-    // Store some information about the picture cache slice. This is used when we swap the
-    // new scene into the frame builder to either reuse existing slices, or create new ones.
-    tile_caches.insert(slice_id, TileCacheParams {
-        slice,
-        slice_flags,
-        spatial_node_index: scroll_root,
-        background_color,
-        shared_clips,
-        shared_clip_chain: parent_clip_chain_id,
-        virtual_surface_size: frame_builder_config.compositor_kind.get_virtual_surface_size(),
-        compositor_surface_count: prim_list.compositor_surface_count,
-    });
-
-    let pic_index = prim_store.pictures.alloc().init(PicturePrimitive::new_image(
-        Some(PictureCompositeMode::TileCache { slice_id }),
-        Picture3DContext::Out,
-        true,
-        PrimitiveFlags::IS_BACKFACE_VISIBLE,
-        prim_list,
-        scroll_root,
-        PictureOptions::default(),
-    ));
-
-    PictureIndex(pic_index)
-}
-
-/// Debug information about a set of picture cache slices, exposed via RenderResults
-#[derive(Debug)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct PictureCacheDebugInfo {
-    pub slices: FastHashMap<usize, SliceDebugInfo>,
-}
-
-impl PictureCacheDebugInfo {
-    pub fn new() -> Self {
-        PictureCacheDebugInfo {
-            slices: FastHashMap::default(),
-        }
-    }
-
-    /// Convenience method to retrieve a given slice. Deliberately panics
-    /// if the slice isn't present.
-    pub fn slice(&self, slice: usize) -> &SliceDebugInfo {
-        &self.slices[&slice]
-    }
-}
-
-impl Default for PictureCacheDebugInfo {
-    fn default() -> PictureCacheDebugInfo {
-        PictureCacheDebugInfo::new()
-    }
-}
-
-/// Debug information about a set of picture cache tiles, exposed via RenderResults
-#[derive(Debug)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct SliceDebugInfo {
-    pub tiles: FastHashMap<TileOffset, TileDebugInfo>,
-}
-
-impl SliceDebugInfo {
-    pub fn new() -> Self {
-        SliceDebugInfo {
-            tiles: FastHashMap::default(),
-        }
-    }
-
-    /// Convenience method to retrieve a given tile. Deliberately panics
-    /// if the tile isn't present.
-    pub fn tile(&self, x: i32, y: i32) -> &TileDebugInfo {
-        &self.tiles[&TileOffset::new(x, y)]
-    }
-}
-
-/// Debug information about a tile that was dirty and was rasterized
-#[derive(Debug, PartialEq)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub struct DirtyTileDebugInfo {
-    pub local_valid_rect: PictureRect,
-    pub local_dirty_rect: PictureRect,
-}
-
-/// Debug information about the state of a tile
-#[derive(Debug, PartialEq)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-#[cfg_attr(feature = "replay", derive(Deserialize))]
-pub enum TileDebugInfo {
-    /// Tile was occluded by a tile in front of it
-    Occluded,
-    /// Tile was culled (not visible in current display port)
-    Culled,
-    /// Tile was valid (no rasterization was done) and visible
-    Valid,
-    /// Tile was dirty, and was updated
-    Dirty(DirtyTileDebugInfo),
-}
-
-impl TileDebugInfo {
-    pub fn is_occluded(&self) -> bool {
-        match self {
-            TileDebugInfo::Occluded => true,
-            TileDebugInfo::Culled |
-            TileDebugInfo::Valid |
-            TileDebugInfo::Dirty(..) => false,
-        }
-    }
-
-    pub fn is_valid(&self) -> bool {
-        match self {
-            TileDebugInfo::Valid => true,
-            TileDebugInfo::Culled |
-            TileDebugInfo::Occluded |
-            TileDebugInfo::Dirty(..) => false,
-        }
-    }
-
-    pub fn is_culled(&self) -> bool {
-        match self {
-            TileDebugInfo::Culled => true,
-            TileDebugInfo::Valid |
-            TileDebugInfo::Occluded |
-            TileDebugInfo::Dirty(..) => false,
-        }
-    }
-
-    pub fn as_dirty(&self) -> &DirtyTileDebugInfo {
-        match self {
-            TileDebugInfo::Occluded |
-            TileDebugInfo::Culled |
-            TileDebugInfo::Valid => {
-                panic!("not a dirty tile!");
-            }
-            TileDebugInfo::Dirty(ref info) => {
-                info
-            }
-        }
-    }
-}
diff --git a/third_party/webrender/webrender/src/util.rs b/third_party/webrender/webrender/src/util.rs
index 0d940d38934..c6a10386432 100644
--- a/third_party/webrender/webrender/src/util.rs
+++ b/third_party/webrender/webrender/src/util.rs
@@ -4,7 +4,7 @@
 
 use api::BorderRadius;
 use api::units::*;
-use euclid::{Point2D, Rect, Size2D, Vector2D, point2, size2};
+use euclid::{Point2D, Rect, Size2D, Vector2D};
 use euclid::{default, Transform2D, Transform3D, Scale};
 use malloc_size_of::{MallocShallowSizeOf, MallocSizeOf, MallocSizeOfOps};
 use plane_split::{Clipper, Polygon};
@@ -66,9 +66,6 @@ pub trait VecHelper<T> {
     /// Equivalent to `mem::replace(&mut vec, Vec::new())`
     fn take(&mut self) -> Self;
 
-    /// Call clear and return self (useful for chaining with calls that move the vector).
-    fn cleared(self) -> Self;
-
     /// Functionally equivalent to `mem::replace(&mut vec, Vec::new())` but tries
     /// to keep the allocation in the caller if it is empty or replace it with a
     /// pre-allocated vector.
@@ -102,12 +99,6 @@ impl<T> VecHelper<T> for Vec<T> {
         replace(self, Vec::new())
     }
 
-    fn cleared(mut self) -> Self {
-        self.clear();
-
-        self
-    }
-
     fn take_and_preallocate(&mut self) -> Self {
         let len = self.len();
         if len == 0 {
@@ -150,12 +141,14 @@ impl ScaleOffset {
         // To check that we have a pure scale / translation:
         // Every field must match an identity matrix, except:
         //  - Any value present in tx,ty
-        //  - Any value present in sx,sy
+        //  - Any non-neg value present in sx,sy (avoid negative for reflection/rotation)
 
-        if m.m12.abs() > NEARLY_ZERO ||
+        if m.m11 < 0.0 ||
+           m.m12.abs() > NEARLY_ZERO ||
            m.m13.abs() > NEARLY_ZERO ||
            m.m14.abs() > NEARLY_ZERO ||
            m.m21.abs() > NEARLY_ZERO ||
+           m.m22 < 0.0 ||
            m.m23.abs() > NEARLY_ZERO ||
            m.m24.abs() > NEARLY_ZERO ||
            m.m31.abs() > NEARLY_ZERO ||
@@ -228,80 +221,28 @@ impl ScaleOffset {
     }
 
     pub fn map_rect<F, T>(&self, rect: &Rect<f32, F>) -> Rect<f32, T> {
-        // TODO(gw): The logic below can return an unexpected result if the supplied
-        //           rect is invalid (has size < 0). Since Gecko currently supplied
-        //           invalid rects in some cases, adding a max(0) here ensures that
-        //           mapping an invalid rect retains the property that rect.is_empty()
-        //           will return true (the mapped rect output will have size 0 instead
-        //           of a negative size). In future we could catch / assert / fix
-        //           these invalid rects earlier, and assert here instead.
-
-        let w = rect.size.width.max(0.0);
-        let h = rect.size.height.max(0.0);
-
-        let mut x0 = rect.origin.x * self.scale.x + self.offset.x;
-        let mut y0 = rect.origin.y * self.scale.y + self.offset.y;
-
-        let mut sx = w * self.scale.x;
-        let mut sy = h * self.scale.y;
-
-        // Handle negative scale. Previously, branchless float math was used to find the
-        // min / max vertices and size. However, that sequence of operations was producind
-        // additional floating point accuracy on android emulator builds, causing one test
-        // to fail an assert. Instead, we retain the same math as previously, and adjust
-        // the origin / size if required.
-
-        if self.scale.x < 0.0 {
-            x0 += sx;
-            sx = -sx;
-        }
-        if self.scale.y < 0.0 {
-            y0 += sy;
-            sy = -sy;
-        }
-
         Rect::new(
-            Point2D::new(x0, y0),
-            Size2D::new(sx, sy),
+            Point2D::new(
+                rect.origin.x * self.scale.x + self.offset.x,
+                rect.origin.y * self.scale.y + self.offset.y,
+            ),
+            Size2D::new(
+                rect.size.width * self.scale.x,
+                rect.size.height * self.scale.y,
+            )
         )
     }
 
     pub fn unmap_rect<F, T>(&self, rect: &Rect<f32, F>) -> Rect<f32, T> {
-        // TODO(gw): The logic below can return an unexpected result if the supplied
-        //           rect is invalid (has size < 0). Since Gecko currently supplied
-        //           invalid rects in some cases, adding a max(0) here ensures that
-        //           mapping an invalid rect retains the property that rect.is_empty()
-        //           will return true (the mapped rect output will have size 0 instead
-        //           of a negative size). In future we could catch / assert / fix
-        //           these invalid rects earlier, and assert here instead.
-
-        let w = rect.size.width.max(0.0);
-        let h = rect.size.height.max(0.0);
-
-        let mut x0 = (rect.origin.x - self.offset.x) / self.scale.x;
-        let mut y0 = (rect.origin.y - self.offset.y) / self.scale.y;
-
-        let mut sx = w / self.scale.x;
-        let mut sy = h / self.scale.y;
-
-        // Handle negative scale. Previously, branchless float math was used to find the
-        // min / max vertices and size. However, that sequence of operations was producind
-        // additional floating point accuracy on android emulator builds, causing one test
-        // to fail an assert. Instead, we retain the same math as previously, and adjust
-        // the origin / size if required.
-
-        if self.scale.x < 0.0 {
-            x0 += sx;
-            sx = -sx;
-        }
-        if self.scale.y < 0.0 {
-            y0 += sy;
-            sy = -sy;
-        }
-
         Rect::new(
-            Point2D::new(x0, y0),
-            Size2D::new(sx, sy),
+            Point2D::new(
+                (rect.origin.x - self.offset.x) / self.scale.x,
+                (rect.origin.y - self.offset.y) / self.scale.y,
+            ),
+            Size2D::new(
+                rect.size.width / self.scale.x,
+                rect.size.height / self.scale.y,
+            )
         )
     }
 
@@ -383,8 +324,6 @@ pub trait MatrixHelpers<Src, Dst> {
     /// Turn Z transformation into identity. This is useful when crossing "flat"
     /// transform styled stacking contexts upon traversing the coordinate systems.
     fn flatten_z_output(&mut self);
-
-    fn cast_unit<NewSrc, NewDst>(&self) -> Transform3D<f32, NewSrc, NewDst>;
 }
 
 impl<Src, Dst> MatrixHelpers<Src, Dst> for Transform3D<f32, Src, Dst> {
@@ -435,21 +374,14 @@ impl<Src, Dst> MatrixHelpers<Src, Dst> for Transform3D<f32, Src, Dst> {
         self.m21 * self.m21 + self.m22 * self.m22 > limit2
     }
 
-    /// Find out a point in `Src` that would be projected into the `target`.
     fn inverse_project(&self, target: &Point2D<f32, Dst>) -> Option<Point2D<f32, Src>> {
-        // form the linear equation for the hyperplane intersection
-        let m = Transform2D::<f32, Src, Dst>::new(
+        let m: Transform2D<f32, Src, Dst>;
+        m = Transform2D::new(
             self.m11 - target.x * self.m14, self.m12 - target.y * self.m14,
             self.m21 - target.x * self.m24, self.m22 - target.y * self.m24,
             self.m41 - target.x * self.m44, self.m42 - target.y * self.m44,
         );
-        let inv = m.inverse()?;
-        // we found the point, now check if it maps to the positive hemisphere
-        if inv.m31 * self.m14 + inv.m32 * self.m24 + self.m44 > 0.0 {
-            Some(Point2D::new(inv.m31, inv.m32))
-        } else {
-            None
-        }
+        m.inverse().map(|inv| Point2D::new(inv.m31, inv.m32))
     }
 
     fn inverse_rect_footprint(&self, rect: &Rect<f32, Dst>) -> Option<Rect<f32, Src>> {
@@ -499,7 +431,7 @@ impl<Src, Dst> MatrixHelpers<Src, Dst> for Transform3D<f32, Src, Dst> {
      *  a  b  0  1
      */
     fn is_2d_scale_translation(&self) -> bool {
-        (self.m33 - 1.0).abs() < NEARLY_ZERO &&
+        (self.m33 - 1.0).abs() < NEARLY_ZERO && 
             (self.m44 - 1.0).abs() < NEARLY_ZERO &&
             self.m12.abs() < NEARLY_ZERO && self.m13.abs() < NEARLY_ZERO && self.m14.abs() < NEARLY_ZERO &&
             self.m21.abs() < NEARLY_ZERO && self.m23.abs() < NEARLY_ZERO && self.m24.abs() < NEARLY_ZERO &&
@@ -527,16 +459,6 @@ impl<Src, Dst> MatrixHelpers<Src, Dst> for Transform3D<f32, Src, Dst> {
         self.m23 = 0.0;
         self.m33 = 1.0;
         self.m43 = 0.0;
-        //Note: we used to zero out m3? as well, see "reftests/flatten-all-flat.yaml" test
-    }
-
-    fn cast_unit<NewSrc, NewDst>(&self) -> Transform3D<f32, NewSrc, NewDst> {
-        Transform3D::new(
-            self.m11, self.m12, self.m13, self.m14,
-            self.m21, self.m22, self.m23, self.m24,
-            self.m31, self.m32, self.m33, self.m34,
-            self.m41, self.m42, self.m43, self.m44,
-        )
     }
 }
 
@@ -561,6 +483,7 @@ where
     Self: Sized,
 {
     fn from_floats(x0: f32, y0: f32, x1: f32, y1: f32) -> Self;
+    fn is_well_formed_and_nonempty(&self) -> bool;
     fn snap(&self) -> Self;
 }
 
@@ -572,6 +495,10 @@ impl<U> RectHelpers<U> for Rect<f32, U> {
         )
     }
 
+    fn is_well_formed_and_nonempty(&self) -> bool {
+        self.size.width > 0.0 && self.size.height > 0.0
+    }
+
     fn snap(&self) -> Self {
         let origin = Point2D::new(
             (self.origin.x + 0.5).floor(),
@@ -664,12 +591,9 @@ use euclid::vec3;
 #[cfg(test)]
 pub mod test {
     use super::*;
-    use euclid::default::{Point2D, Rect, Size2D, Transform3D};
-    use euclid::{Angle, approxeq::ApproxEq};
+    use euclid::default::{Point2D, Transform3D};
+    use euclid::Angle;
     use std::f32::consts::PI;
-    use crate::clip::{is_left_of_line, polygon_contains_point};
-    use crate::prim_store::PolygonKey;
-    use api::FillRule;
 
     #[test]
     fn inverse_project() {
@@ -682,50 +606,6 @@ pub mod test {
         assert_eq!(m1.inverse_project(&p0), Some(Point2D::new(2.0, 2.0)));
     }
 
-    #[test]
-    fn inverse_project_footprint() {
-        let m = Transform3D::new(
-            0.477499992, 0.135000005, -1.0, 0.000624999986,
-            -0.642787635, 0.766044438, 0.0, 0.0,
-            0.766044438, 0.642787635, 0.0, 0.0,
-            1137.10986, 113.71286, 402.0, 0.748749971,
-        );
-        let r = Rect::new(Point2D::zero(), Size2D::new(804.0, 804.0));
-        {
-            let points = &[
-                r.origin,
-                r.top_right(),
-                r.bottom_left(),
-                r.bottom_right(),
-            ];
-            let mi = m.inverse().unwrap();
-            // In this section, we do the forward and backward transformation
-            // to confirm that its bijective.
-            // We also do the inverse projection path, and confirm it functions the same way.
-            println!("Points:");
-            for p in points {
-                let pp = m.transform_point2d_homogeneous(*p);
-                let p3 = pp.to_point3d().unwrap();
-                let pi = mi.transform_point3d_homogeneous(p3);
-                let px = pi.to_point2d().unwrap();
-                let py = m.inverse_project(&pp.to_point2d().unwrap()).unwrap();
-                println!("\t{:?} -> {:?} -> {:?} -> ({:?} -> {:?}, {:?})", p, pp, p3, pi, px, py);
-                assert!(px.approx_eq_eps(p, &Point2D::new(0.001, 0.001)));
-                assert!(py.approx_eq_eps(p, &Point2D::new(0.001, 0.001)));
-            }
-        }
-        // project
-        let rp = project_rect(&m, &r, &Rect::new(Point2D::zero(), Size2D::new(1000.0, 1000.0))).unwrap();
-        println!("Projected {:?}", rp);
-        // one of the points ends up in the negative hemisphere
-        assert_eq!(m.inverse_project(&rp.origin), None);
-        // inverse
-        if let Some(ri) = m.inverse_rect_footprint(&rp) {
-            // inverse footprint should be larger, since it doesn't know the original Z
-            assert!(ri.contains_rect(&r), "Inverse {:?}", ri);
-        }
-    }
-
     fn validate_convert(xref: &LayoutTransform) {
         let so = ScaleOffset::from_transform(xref).unwrap();
         let xf = so.to_transform();
@@ -733,35 +613,6 @@ pub mod test {
     }
 
     #[test]
-    fn negative_scale_map_unmap() {
-        let xref = LayoutTransform::scale(1.0, -1.0, 1.0)
-                        .pre_translate(LayoutVector3D::new(124.0, 38.0, 0.0));
-        let so = ScaleOffset::from_transform(&xref).unwrap();
-        let local_rect = LayoutRect::new(
-            LayoutPoint::new(50.0, -100.0),
-            LayoutSize::new(200.0, 400.0),
-        );
-
-        let mapped_rect: LayoutRect = so.map_rect(&local_rect);
-        let xf_rect = project_rect(
-            &xref,
-            &local_rect,
-            &LayoutRect::max_rect(),
-        ).unwrap();
-
-        assert!(mapped_rect.origin.x.approx_eq(&xf_rect.origin.x));
-        assert!(mapped_rect.origin.y.approx_eq(&xf_rect.origin.y));
-        assert!(mapped_rect.size.width.approx_eq(&xf_rect.size.width));
-        assert!(mapped_rect.size.height.approx_eq(&xf_rect.size.height));
-
-        let unmapped_rect: LayoutRect = so.unmap_rect(&mapped_rect);
-        assert!(unmapped_rect.origin.x.approx_eq(&local_rect.origin.x));
-        assert!(unmapped_rect.origin.y.approx_eq(&local_rect.origin.y));
-        assert!(unmapped_rect.size.width.approx_eq(&local_rect.size.width));
-        assert!(unmapped_rect.size.height.approx_eq(&local_rect.size.height));
-    }
-
-    #[test]
     fn scale_offset_convert() {
         let xref = LayoutTransform::translation(130.0, 200.0, 0.0);
         validate_convert(&xref);
@@ -841,84 +692,6 @@ pub mod test {
         assert_eq!(origin, Point2D::new(1.0, 0.5));
         assert_eq!(m.transform_point2d(origin), Some(Point2D::zero()));
     }
-
-    #[test]
-    fn polygon_clip_is_left_of_point() {
-        // Define points of a line through (1, -3) and (-2, 6) to test against.
-        // If the triplet consisting of these two points and the test point
-        // form a counter-clockwise triangle, then the test point is on the
-        // left. The easiest way to visualize this is with an "ascending"
-        // line from low-Y to high-Y.
-        let p0_x = 1.0;
-        let p0_y = -3.0;
-        let p1_x = -2.0;
-        let p1_y = 6.0;
-
-        // Test some points to the left of the line.
-        assert!(is_left_of_line(-9.0, 0.0, p0_x, p0_y, p1_x, p1_y) > 0.0);
-        assert!(is_left_of_line(-1.0, 1.0, p0_x, p0_y, p1_x, p1_y) > 0.0);
-        assert!(is_left_of_line(1.0, -4.0, p0_x, p0_y, p1_x, p1_y) > 0.0);
-
-        // Test some points on the line.
-        assert!(is_left_of_line(-3.0, 9.0, p0_x, p0_y, p1_x, p1_y) == 0.0);
-        assert!(is_left_of_line(0.0, 0.0, p0_x, p0_y, p1_x, p1_y) == 0.0);
-        assert!(is_left_of_line(100.0, -300.0, p0_x, p0_y, p1_x, p1_y) == 0.0);
-
-        // Test some points to the right of the line.
-        assert!(is_left_of_line(0.0, 1.0, p0_x, p0_y, p1_x, p1_y) < 0.0);
-        assert!(is_left_of_line(-4.0, 13.0, p0_x, p0_y, p1_x, p1_y) < 0.0);
-        assert!(is_left_of_line(5.0, -12.0, p0_x, p0_y, p1_x, p1_y) < 0.0);
-    }
-
-    #[test]
-    fn polygon_clip_contains_point() {
-        // We define the points of a self-overlapping polygon, which we will
-        // use to create polygons with different windings and fill rules.
-        let p0 = LayoutPoint::new(4.0, 4.0);
-        let p1 = LayoutPoint::new(6.0, 4.0);
-        let p2 = LayoutPoint::new(4.0, 7.0);
-        let p3 = LayoutPoint::new(2.0, 1.0);
-        let p4 = LayoutPoint::new(8.0, 1.0);
-        let p5 = LayoutPoint::new(6.0, 7.0);
-
-        let poly_clockwise_nonzero = PolygonKey::new(
-            &[p5, p4, p3, p2, p1, p0].to_vec(), FillRule::Nonzero
-        );
-        let poly_clockwise_evenodd = PolygonKey::new(
-            &[p5, p4, p3, p2, p1, p0].to_vec(), FillRule::Evenodd
-        );
-        let poly_counter_clockwise_nonzero = PolygonKey::new(
-            &[p0, p1, p2, p3, p4, p5].to_vec(), FillRule::Nonzero
-        );
-        let poly_counter_clockwise_evenodd = PolygonKey::new(
-            &[p0, p1, p2, p3, p4, p5].to_vec(), FillRule::Evenodd
-        );
-
-        // We define a rect that provides a bounding clip area of
-        // the polygon.
-        let rect = LayoutRect::new(LayoutPoint::new(0.0, 0.0),
-                                   LayoutSize::new(10.0, 10.0));
-
-        // And we'll test three points of interest.
-        let p_inside_once = LayoutPoint::new(5.0, 3.0);
-        let p_inside_twice = LayoutPoint::new(5.0, 5.0);
-        let p_outside = LayoutPoint::new(9.0, 9.0);
-
-        // We should get the same results for both clockwise and
-        // counter-clockwise polygons.
-        // For nonzero polygons, the inside twice point is considered inside.
-        for poly_nonzero in vec![poly_clockwise_nonzero, poly_counter_clockwise_nonzero].iter() {
-            assert_eq!(polygon_contains_point(&p_inside_once, &rect, &poly_nonzero), true);
-            assert_eq!(polygon_contains_point(&p_inside_twice, &rect, &poly_nonzero), true);
-            assert_eq!(polygon_contains_point(&p_outside, &rect, &poly_nonzero), false);
-        }
-        // For evenodd polygons, the inside twice point is considered outside.
-        for poly_evenodd in vec![poly_clockwise_evenodd, poly_counter_clockwise_evenodd].iter() {
-            assert_eq!(polygon_contains_point(&p_inside_once, &rect, &poly_evenodd), true);
-            assert_eq!(polygon_contains_point(&p_inside_twice, &rect, &poly_evenodd), false);
-            assert_eq!(polygon_contains_point(&p_outside, &rect, &poly_evenodd), false);
-        }
-    }
 }
 
 pub trait MaxRect {
@@ -1310,56 +1083,6 @@ impl Recycler {
     }
 }
 
-/// Record the size of a data structure to preallocate a similar size
-/// at the next frame and avoid growing it too many time.
-#[derive(Copy, Clone, Debug)]
-pub struct Preallocator {
-    size: usize,
-}
-
-impl Preallocator {
-    pub fn new(initial_size: usize) -> Self {
-        Preallocator {
-            size: initial_size,
-        }
-    }
-
-    /// Record the size of a vector to preallocate it the next frame.
-    pub fn record_vec<T>(&mut self, vec: &Vec<T>) {
-        let len = vec.len();
-        if len > self.size {
-            self.size = len;
-        } else {
-            self.size = (self.size + len) / 2;
-        }
-    }
-
-    /// The size that we'll preallocate the vector with.
-    pub fn preallocation_size(&self) -> usize {
-        // Round up to multiple of 16 to avoid small tiny
-        // variations causing reallocations.
-        (self.size + 15) & !15
-    }
-
-    /// Preallocate vector storage.
-    ///
-    /// The preallocated amount depends on the length recorded in the last
-    /// record_vec call.
-    pub fn preallocate_vec<T>(&self, vec: &mut Vec<T>) {
-        let len = vec.len();
-        let cap = self.preallocation_size();
-        if len < cap {
-            vec.reserve(cap - len);
-        }
-    }
-}
-
-impl Default for Preallocator {
-    fn default() -> Self {
-        Self::new(0)
-    }
-}
-
 /// Arc wrapper to support measurement via MallocSizeOf.
 ///
 /// Memory reporting for Arcs is tricky because of the risk of double-counting.
@@ -1496,201 +1219,3 @@ macro_rules! c_str {
         }
     }
 }
-
-// Find a rectangle that is contained by the sum of r1 and r2.
-pub fn conservative_union_rect<U>(r1: &Rect<f32, U>, r2: &Rect<f32, U>) -> Rect<f32, U> {
-    //  +---+---+   +--+-+--+
-    //  |   |   |   |  | |  |
-    //  |   |   |   |  | |  |
-    //  +---+---+   +--+-+--+
-    if r1.origin.y == r2.origin.y && r1.size.height == r2.size.height {
-        if r2.min_x() <= r1.max_x() && r2.max_x() >= r1.min_x() {
-            let origin_x = f32::min(r1.origin.x, r2.origin.x);
-            let width = f32::max(r1.max_x(), r2.max_x()) - origin_x;
-
-            return Rect {
-                origin: point2(origin_x, r1.origin.y),
-                size: size2(width, r1.size.height),
-            }
-        }
-    }
-
-    //  +----+    +----+
-    //  |    |    |    |
-    //  |    |    +----+
-    //  +----+    |    |
-    //  |    |    +----+
-    //  |    |    |    |
-    //  +----+    +----+
-    if r1.origin.x == r2.origin.x && r1.size.width == r2.size.width {
-        if r2.min_y() <= r1.max_y() && r2.max_y() >= r1.min_y() {
-            let origin_y = f32::min(r1.origin.y, r2.origin.y);
-            let height = f32::max(r1.max_y(), r2.max_y()) - origin_y;
-
-            return Rect {
-                origin: point2(r1.origin.x, origin_y),
-                size: size2(r1.size.width, height),
-            }
-        }
-    }
-
-    if r1.area() >= r2.area() { *r1 } else {*r2 }
-}
-
-#[test]
-fn test_conservative_union_rect() {
-    // Adjacent, x axis
-    let r = conservative_union_rect(
-        &LayoutRect { origin: point2(1.0, 2.0), size: size2(3.0, 4.0) },
-        &LayoutRect { origin: point2(4.0, 2.0), size: size2(5.0, 4.0) },
-    );
-    assert_eq!(r, LayoutRect { origin: point2(1.0, 2.0), size: size2(8.0, 4.0) });
-
-    let r = conservative_union_rect(
-        &LayoutRect { origin: point2(4.0, 2.0), size: size2(5.0, 4.0) },
-        &LayoutRect { origin: point2(1.0, 2.0), size: size2(3.0, 4.0) },
-    );
-    assert_eq!(r, LayoutRect { origin: point2(1.0, 2.0), size: size2(8.0, 4.0) });
-
-    // Averlapping adjacent, x axis
-    let r = conservative_union_rect(
-        &LayoutRect { origin: point2(1.0, 2.0), size: size2(3.0, 4.0) },
-        &LayoutRect { origin: point2(3.0, 2.0), size: size2(5.0, 4.0) },
-    );
-    assert_eq!(r, LayoutRect { origin: point2(1.0, 2.0), size: size2(7.0, 4.0) });
-
-    let r = conservative_union_rect(
-        &LayoutRect { origin: point2(5.0, 2.0), size: size2(3.0, 4.0) },
-        &LayoutRect { origin: point2(1.0, 2.0), size: size2(5.0, 4.0) },
-    );
-    assert_eq!(r, LayoutRect { origin: point2(1.0, 2.0), size: size2(7.0, 4.0) });
-
-    // Adjacent but not touching, x axis
-    let r = conservative_union_rect(
-        &LayoutRect { origin: point2(1.0, 2.0), size: size2(3.0, 4.0) },
-        &LayoutRect { origin: point2(6.0, 2.0), size: size2(5.0, 4.0) },
-    );
-    assert_eq!(r, LayoutRect { origin: point2(6.0, 2.0), size: size2(5.0, 4.0) });
-
-    let r = conservative_union_rect(
-        &LayoutRect { origin: point2(1.0, 2.0), size: size2(3.0, 4.0) },
-        &LayoutRect { origin: point2(-6.0, 2.0), size: size2(1.0, 4.0) },
-    );
-    assert_eq!(r, LayoutRect { origin: point2(1.0, 2.0), size: size2(3.0, 4.0) });
-
-
-    // Adjacent, y axis
-    let r = conservative_union_rect(
-        &LayoutRect { origin: point2(1.0, 2.0), size: size2(3.0, 4.0) },
-        &LayoutRect { origin: point2(1.0, 6.0), size: size2(3.0, 4.0) },
-    );
-    assert_eq!(r, LayoutRect { origin: point2(1.0, 2.0), size: size2(3.0, 8.0) });
-
-    let r = conservative_union_rect(
-        &LayoutRect { origin: point2(1.0, 5.0), size: size2(3.0, 4.0) },
-        &LayoutRect { origin: point2(1.0, 1.0), size: size2(3.0, 4.0) },
-    );
-    assert_eq!(r, LayoutRect { origin: point2(1.0, 1.0), size: size2(3.0, 8.0) });
-
-    // Averlapping adjacent, y axis
-    let r = conservative_union_rect(
-        &LayoutRect { origin: point2(1.0, 2.0), size: size2(3.0, 4.0) },
-        &LayoutRect { origin: point2(1.0, 3.0), size: size2(3.0, 4.0) },
-    );
-    assert_eq!(r, LayoutRect { origin: point2(1.0, 2.0), size: size2(3.0, 5.0) });
-
-    let r = conservative_union_rect(
-        &LayoutRect { origin: point2(1.0, 4.0), size: size2(3.0, 4.0) },
-        &LayoutRect { origin: point2(1.0, 2.0), size: size2(3.0, 4.0) },
-    );
-    assert_eq!(r, LayoutRect { origin: point2(1.0, 2.0), size: size2(3.0, 6.0) });
-
-    // Adjacent but not touching, y axis
-    let r = conservative_union_rect(
-        &LayoutRect { origin: point2(1.0, 2.0), size: size2(3.0, 4.0) },
-        &LayoutRect { origin: point2(1.0, 10.0), size: size2(3.0, 5.0) },
-    );
-    assert_eq!(r, LayoutRect { origin: point2(1.0, 10.0), size: size2(3.0, 5.0) });
-
-    let r = conservative_union_rect(
-        &LayoutRect { origin: point2(1.0, 5.0), size: size2(3.0, 4.0) },
-        &LayoutRect { origin: point2(1.0, 0.0), size: size2(3.0, 3.0) },
-    );
-    assert_eq!(r, LayoutRect { origin: point2(1.0, 5.0), size: size2(3.0, 4.0) });
-
-
-    // Contained
-    let r = conservative_union_rect(
-        &LayoutRect { origin: point2(1.0, 2.0), size: size2(3.0, 4.0) },
-        &LayoutRect { origin: point2(0.0, 1.0), size: size2(10.0, 11.0) },
-    );
-    assert_eq!(r, LayoutRect { origin: point2(0.0, 1.0), size: size2(10.0, 11.0) });
-
-    let r = conservative_union_rect(
-        &LayoutRect { origin: point2(0.0, 1.0), size: size2(10.0, 11.0) },
-        &LayoutRect { origin: point2(1.0, 2.0), size: size2(3.0, 4.0) },
-    );
-    assert_eq!(r, LayoutRect { origin: point2(0.0, 1.0), size: size2(10.0, 11.0) });
-}
-
-/// This is inspired by the `weak-table` crate.
-/// It holds a Vec of weak pointers that are garbage collected as the Vec
-pub struct WeakTable {
-    inner: Vec<std::sync::Weak<Vec<u8>>>
-}
-
-impl WeakTable {
-    pub fn new() -> WeakTable {
-        WeakTable { inner: Vec::new() }
-    }
-    pub fn insert(&mut self, x: std::sync::Weak<Vec<u8>>) {
-        if self.inner.len() == self.inner.capacity() {
-            self.remove_expired();
-
-            // We want to make sure that we change capacity()
-            // even if remove_expired() removes some entries
-            // so that we don't repeatedly hit remove_expired()
-            if self.inner.len() * 3 < self.inner.capacity() {
-                // We use a different multiple for shrinking then
-                // expanding so that we we don't accidentally
-                // oscilate.
-                self.inner.shrink_to_fit();
-            } else {
-                // Otherwise double our size
-                self.inner.reserve(self.inner.len())
-            }
-        }
-        self.inner.push(x);
-    }
-
-    fn remove_expired(&mut self) {
-        self.inner.retain(|x| x.strong_count() > 0)
-    }
-
-    pub fn iter(&self) -> impl Iterator<Item = Arc<Vec<u8>>> + '_ {
-        self.inner.iter().filter_map(|x| x.upgrade())
-    }
-}
-
-#[test]
-fn weak_table() {
-    let mut tbl = WeakTable::new();
-    let mut things = Vec::new();
-    let target_count = 50;
-    for _ in 0..target_count {
-        things.push(Arc::new(vec![4]));
-    }
-    for i in &things {
-        tbl.insert(Arc::downgrade(i))
-    }
-    assert_eq!(tbl.inner.len(), target_count);
-    drop(things);
-    assert_eq!(tbl.iter().count(), 0);
-
-    // make sure that we shrink the table if it gets too big
-    // by adding a bunch of dead items
-    for _ in 0..target_count*2 {
-        tbl.insert(Arc::downgrade(&Arc::new(vec![5])))
-    }
-    assert!(tbl.inner.capacity() <= 4);
-}
diff --git a/third_party/webrender/webrender/src/visibility.rs b/third_party/webrender/webrender/src/visibility.rs
deleted file mode 100644
index ffd3ef8b618..00000000000
--- a/third_party/webrender/webrender/src/visibility.rs
+++ /dev/null
@@ -1,713 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-//! # Visibility pass
-//!
-//! TODO: document what this pass does!
-//!
-
-use api::{ColorF, DebugFlags};
-use api::units::*;
-use euclid::Scale;
-use std::{usize, mem};
-use crate::batch::BatchFilter;
-use crate::clip::{ClipStore, ClipChainStack};
-use crate::composite::CompositeState;
-use crate::spatial_tree::{ROOT_SPATIAL_NODE_INDEX, SpatialTree, SpatialNodeIndex};
-use crate::clip::{ClipInstance, ClipChainInstance};
-use crate::debug_colors;
-use crate::frame_builder::FrameBuilderConfig;
-use crate::gpu_cache::GpuCache;
-use crate::internal_types::FastHashMap;
-use crate::picture::{PictureCompositeMode, ClusterFlags, SurfaceInfo, TileCacheInstance};
-use crate::picture::{PrimitiveList, SurfaceIndex, RasterConfig, SliceId};
-use crate::prim_store::{ClipTaskIndex, PictureIndex, PrimitiveInstanceKind};
-use crate::prim_store::{PrimitiveStore, PrimitiveInstance};
-use crate::render_backend::{DataStores, ScratchBuffer};
-use crate::resource_cache::ResourceCache;
-use crate::scene::SceneProperties;
-use crate::space::SpaceMapper;
-use crate::internal_types::Filter;
-use crate::util::{MaxRect};
-
-pub struct FrameVisibilityContext<'a> {
-    pub spatial_tree: &'a SpatialTree,
-    pub global_screen_world_rect: WorldRect,
-    pub global_device_pixel_scale: DevicePixelScale,
-    pub surfaces: &'a [SurfaceInfo],
-    pub debug_flags: DebugFlags,
-    pub scene_properties: &'a SceneProperties,
-    pub config: FrameBuilderConfig,
-}
-
-pub struct FrameVisibilityState<'a> {
-    pub clip_store: &'a mut ClipStore,
-    pub resource_cache: &'a mut ResourceCache,
-    pub gpu_cache: &'a mut GpuCache,
-    pub scratch: &'a mut ScratchBuffer,
-    pub tile_cache: Option<Box<TileCacheInstance>>,
-    pub data_stores: &'a mut DataStores,
-    pub clip_chain_stack: ClipChainStack,
-    pub composite_state: &'a mut CompositeState,
-    /// A stack of currently active off-screen surfaces during the
-    /// visibility frame traversal.
-    pub surface_stack: Vec<SurfaceIndex>,
-}
-
-impl<'a> FrameVisibilityState<'a> {
-    pub fn push_surface(
-        &mut self,
-        surface_index: SurfaceIndex,
-        shared_clips: &[ClipInstance],
-        spatial_tree: &SpatialTree,
-    ) {
-        self.surface_stack.push(surface_index);
-        self.clip_chain_stack.push_surface(shared_clips, spatial_tree);
-    }
-
-    pub fn pop_surface(&mut self) {
-        self.surface_stack.pop().unwrap();
-        self.clip_chain_stack.pop_surface();
-    }
-}
-
-bitflags! {
-    /// A set of bitflags that can be set in the visibility information
-    /// for a primitive instance. This can be used to control how primitives
-    /// are treated during batching.
-    // TODO(gw): We should also move `is_compositor_surface` to be part of
-    //           this flags struct.
-    #[cfg_attr(feature = "capture", derive(Serialize))]
-    pub struct PrimitiveVisibilityFlags: u8 {
-        /// Implies that this primitive covers the entire picture cache slice,
-        /// and can thus be dropped during batching and drawn with clear color.
-        const IS_BACKDROP = 1;
-    }
-}
-
-/// Contains the current state of the primitive's visibility.
-#[derive(Debug)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-pub enum VisibilityState {
-    /// Uninitialized - this should never be encountered after prim reset
-    Unset,
-    /// Culled for being off-screen, or not possible to render (e.g. missing image resource)
-    Culled,
-    /// A picture that doesn't have a surface - primitives are composed into the
-    /// parent picture with a surface.
-    PassThrough,
-    /// During picture cache dependency update, was found to be intersecting with one
-    /// or more visible tiles. The rect in picture cache space is stored here to allow
-    /// the detailed calculations below.
-    Coarse {
-        /// Information about which tile batchers this prim should be added to
-        filter: BatchFilter,
-
-        /// A set of flags that define how this primitive should be handled
-        /// during batching of visible primitives.
-        vis_flags: PrimitiveVisibilityFlags,
-    },
-    /// Once coarse visibility is resolved, this will be set if the primitive
-    /// intersected any dirty rects, otherwise prim will be culled.
-    Detailed {
-        /// Information about which tile batchers this prim should be added to
-        filter: BatchFilter,
-
-        /// A set of flags that define how this primitive should be handled
-        /// during batching of visible primitives.
-        vis_flags: PrimitiveVisibilityFlags,
-    },
-}
-
-/// Information stored for a visible primitive about the visible
-/// rect and associated clip information.
-#[derive(Debug)]
-#[cfg_attr(feature = "capture", derive(Serialize))]
-pub struct PrimitiveVisibility {
-    /// The clip chain instance that was built for this primitive.
-    pub clip_chain: ClipChainInstance,
-
-    /// Current visibility state of the primitive.
-    // TODO(gw): Move more of the fields from this struct into
-    //           the state enum.
-    pub state: VisibilityState,
-
-    /// An index into the clip task instances array in the primitive
-    /// store. If this is ClipTaskIndex::INVALID, then the primitive
-    /// has no clip mask. Otherwise, it may store the offset of the
-    /// global clip mask task for this primitive, or the first of
-    /// a list of clip task ids (one per segment).
-    pub clip_task_index: ClipTaskIndex,
-
-    /// The current combined local clip for this primitive, from
-    /// the primitive local clip above and the current clip chain.
-    pub combined_local_clip_rect: LayoutRect,
-}
-
-impl PrimitiveVisibility {
-    pub fn new() -> Self {
-        PrimitiveVisibility {
-            state: VisibilityState::Unset,
-            clip_chain: ClipChainInstance::empty(),
-            clip_task_index: ClipTaskIndex::INVALID,
-            combined_local_clip_rect: LayoutRect::zero(),
-        }
-    }
-
-    pub fn reset(&mut self) {
-        self.state = VisibilityState::Culled;
-        self.clip_task_index = ClipTaskIndex::INVALID;
-    }
-}
-
-/// Update visibility pass - update each primitive visibility struct, and
-/// build the clip chain instance if appropriate.
-pub fn update_primitive_visibility(
-    store: &mut PrimitiveStore,
-    pic_index: PictureIndex,
-    parent_surface_index: SurfaceIndex,
-    world_culling_rect: &WorldRect,
-    frame_context: &FrameVisibilityContext,
-    frame_state: &mut FrameVisibilityState,
-    tile_caches: &mut FastHashMap<SliceId, Box<TileCacheInstance>>,
-    is_root_tile_cache: bool,
-) -> Option<PictureRect> {
-    profile_scope!("update_visibility");
-    let (mut prim_list, surface_index, apply_local_clip_rect, world_culling_rect, is_composite) = {
-        let pic = &mut store.pictures[pic_index.0];
-        let mut world_culling_rect = *world_culling_rect;
-
-        let prim_list = mem::replace(&mut pic.prim_list, PrimitiveList::empty());
-        let (surface_index, is_composite) = match pic.raster_config {
-            Some(ref raster_config) => (raster_config.surface_index, true),
-            None => (parent_surface_index, false)
-        };
-
-        match pic.raster_config {
-            Some(RasterConfig { composite_mode: PictureCompositeMode::TileCache { slice_id }, .. }) => {
-                let mut tile_cache = tile_caches
-                    .remove(&slice_id)
-                    .expect("bug: non-existent tile cache");
-
-                // If we have a tile cache for this picture, see if any of the
-                // relative transforms have changed, which means we need to
-                // re-map the dependencies of any child primitives.
-                world_culling_rect = tile_cache.pre_update(
-                    layout_rect_as_picture_rect(&pic.estimated_local_rect),
-                    surface_index,
-                    frame_context,
-                    frame_state,
-                );
-
-                // Push a new surface, supplying the list of clips that should be
-                // ignored, since they are handled by clipping when drawing this surface.
-                frame_state.push_surface(
-                    surface_index,
-                    &tile_cache.shared_clips,
-                    frame_context.spatial_tree,
-                );
-                frame_state.tile_cache = Some(tile_cache);
-            }
-            _ => {
-                if is_composite {
-                    frame_state.push_surface(
-                        surface_index,
-                        &[],
-                        frame_context.spatial_tree,
-                    );
-                }
-            }
-        }
-
-        (prim_list, surface_index, pic.apply_local_clip_rect, world_culling_rect, is_composite)
-    };
-
-    let surface = &frame_context.surfaces[surface_index.0 as usize];
-
-    let mut map_local_to_surface = surface
-        .map_local_to_surface
-        .clone();
-
-    let map_surface_to_world = SpaceMapper::new_with_target(
-        ROOT_SPATIAL_NODE_INDEX,
-        surface.surface_spatial_node_index,
-        frame_context.global_screen_world_rect,
-        frame_context.spatial_tree,
-    );
-
-    let mut surface_rect = PictureRect::zero();
-
-    for cluster in &mut prim_list.clusters {
-        profile_scope!("cluster");
-        // Get the cluster and see if is visible
-        if !cluster.flags.contains(ClusterFlags::IS_VISIBLE) {
-            // Each prim instance must have reset called each frame, to clear
-            // indices into various scratch buffers. If this doesn't occur,
-            // the primitive may incorrectly be considered visible, which can
-            // cause unexpected conditions to occur later during the frame.
-            // Primitive instances are normally reset in the main loop below,
-            // but we must also reset them in the rare case that the cluster
-            // visibility has changed (due to an invalid transform and/or
-            // backface visibility changing for this cluster).
-            // TODO(gw): This is difficult to test for in CI - as a follow up,
-            //           we should add a debug flag that validates the prim
-            //           instance is always reset every frame to catch similar
-            //           issues in future.
-            for prim_instance in &mut prim_list.prim_instances[cluster.prim_range()] {
-                prim_instance.reset();
-            }
-            continue;
-        }
-
-        map_local_to_surface.set_target_spatial_node(
-            cluster.spatial_node_index,
-            frame_context.spatial_tree,
-        );
-
-        for prim_instance in &mut prim_list.prim_instances[cluster.prim_range()] {
-            prim_instance.reset();
-
-            if prim_instance.is_chased() {
-                #[cfg(debug_assertions)] // needed for ".id" part
-                println!("\tpreparing {:?} in {:?}", prim_instance.id, pic_index);
-                println!("\t{:?}", prim_instance.kind);
-            }
-
-            let (is_passthrough, prim_local_rect, prim_shadowed_rect) = match prim_instance.kind {
-                PrimitiveInstanceKind::Picture { pic_index, .. } => {
-                    let (is_visible, is_passthrough) = {
-                        let pic = &store.pictures[pic_index.0];
-                        (pic.is_visible(), pic.raster_config.is_none())
-                    };
-
-                    if !is_visible {
-                        continue;
-                    }
-
-                    if is_passthrough {
-                        frame_state.clip_chain_stack.push_clip(
-                            prim_instance.clip_set.clip_chain_id,
-                            frame_state.clip_store,
-                        );
-                    }
-
-                    let pic_surface_rect = update_primitive_visibility(
-                        store,
-                        pic_index,
-                        surface_index,
-                        &world_culling_rect,
-                        frame_context,
-                        frame_state,
-                        tile_caches,
-                        false,
-                    );
-
-                    if is_passthrough {
-                        frame_state.clip_chain_stack.pop_clip();
-                    }
-
-                    let pic = &store.pictures[pic_index.0];
-
-                    if prim_instance.is_chased() && pic.estimated_local_rect != pic.precise_local_rect {
-                        println!("\testimate {:?} adjusted to {:?}", pic.estimated_local_rect, pic.precise_local_rect);
-                    }
-
-                    let mut shadow_rect = pic.precise_local_rect;
-                    match pic.raster_config {
-                        Some(ref rc) => match rc.composite_mode {
-                            // If we have a drop shadow filter, we also need to include the shadow in
-                            // our shadowed local rect for the purpose of calculating the size of the
-                            // picture.
-                            PictureCompositeMode::Filter(Filter::DropShadows(ref shadows)) => {
-                                for shadow in shadows {
-                                    shadow_rect = shadow_rect.union(&pic.precise_local_rect.translate(shadow.offset));
-                                }
-                            }
-                            _ => {}
-                        }
-                        None => {
-                            // If the primitive does not have its own raster config, we need to
-                            // propogate the surface rect calculation to the parent.
-                            if let Some(ref rect) = pic_surface_rect {
-                                surface_rect = surface_rect.union(rect);
-                            }
-                        }
-                    }
-
-                    (is_passthrough, pic.precise_local_rect, shadow_rect)
-                }
-                _ => {
-                    let prim_data = &frame_state.data_stores.as_common_data(&prim_instance);
-
-                    (false, prim_data.prim_rect, prim_data.prim_rect)
-                }
-            };
-
-            if is_passthrough {
-                // Pass through pictures are always considered visible in all dirty tiles.
-                prim_instance.vis.state = VisibilityState::PassThrough;
-            } else {
-                if prim_local_rect.size.width <= 0.0 || prim_local_rect.size.height <= 0.0 {
-                    if prim_instance.is_chased() {
-                        println!("\tculled for zero local rectangle");
-                    }
-                    continue;
-                }
-
-                // Inflate the local rect for this primitive by the inflation factor of
-                // the picture context and include the shadow offset. This ensures that
-                // even if the primitive itstore is not visible, any effects from the
-                // blur radius or shadow will be correctly taken into account.
-                let inflation_factor = surface.inflation_factor;
-                let local_rect = prim_shadowed_rect
-                    .inflate(inflation_factor, inflation_factor)
-                    .intersection(&prim_instance.clip_set.local_clip_rect);
-                let local_rect = match local_rect {
-                    Some(local_rect) => local_rect,
-                    None => {
-                        if prim_instance.is_chased() {
-                            println!("\tculled for being out of the local clip rectangle: {:?}",
-                                     prim_instance.clip_set.local_clip_rect);
-                        }
-                        continue;
-                    }
-                };
-
-                // Include the clip chain for this primitive in the current stack.
-                frame_state.clip_chain_stack.push_clip(
-                    prim_instance.clip_set.clip_chain_id,
-                    frame_state.clip_store,
-                );
-
-                frame_state.clip_store.set_active_clips(
-                    prim_instance.clip_set.local_clip_rect,
-                    cluster.spatial_node_index,
-                    map_local_to_surface.ref_spatial_node_index,
-                    frame_state.clip_chain_stack.current_clips_array(),
-                    &frame_context.spatial_tree,
-                    &frame_state.data_stores.clip,
-                );
-
-                let clip_chain = frame_state
-                    .clip_store
-                    .build_clip_chain_instance(
-                        local_rect,
-                        &map_local_to_surface,
-                        &map_surface_to_world,
-                        &frame_context.spatial_tree,
-                        frame_state.gpu_cache,
-                        frame_state.resource_cache,
-                        surface.device_pixel_scale,
-                        &world_culling_rect,
-                        &mut frame_state.data_stores.clip,
-                        true,
-                        prim_instance.is_chased(),
-                    );
-
-                // Ensure the primitive clip is popped
-                frame_state.clip_chain_stack.pop_clip();
-
-                prim_instance.vis.clip_chain = match clip_chain {
-                    Some(clip_chain) => clip_chain,
-                    None => {
-                        if prim_instance.is_chased() {
-                            println!("\tunable to build the clip chain, skipping");
-                        }
-                        continue;
-                    }
-                };
-
-                if prim_instance.is_chased() {
-                    println!("\teffective clip chain from {:?} {}",
-                             prim_instance.vis.clip_chain.clips_range,
-                             if apply_local_clip_rect { "(applied)" } else { "" },
-                    );
-                    println!("\tpicture rect {:?} @{:?}",
-                             prim_instance.vis.clip_chain.pic_clip_rect,
-                             prim_instance.vis.clip_chain.pic_spatial_node_index,
-                    );
-                }
-
-                prim_instance.vis.combined_local_clip_rect = if apply_local_clip_rect {
-                    prim_instance.vis.clip_chain.local_clip_rect
-                } else {
-                    prim_instance.clip_set.local_clip_rect
-                };
-
-                if prim_instance.vis.combined_local_clip_rect.size.is_empty() {
-                    if prim_instance.is_chased() {
-                        println!("\tculled for zero local clip rectangle");
-                    }
-                    continue;
-                }
-
-                // Include the visible area for primitive, including any shadows, in
-                // the area affected by the surface.
-                match prim_instance.vis.combined_local_clip_rect.intersection(&local_rect) {
-                    Some(visible_rect) => {
-                        if let Some(rect) = map_local_to_surface.map(&visible_rect) {
-                            surface_rect = surface_rect.union(&rect);
-                        }
-                    }
-                    None => {
-                        if prim_instance.is_chased() {
-                            println!("\tculled for zero visible rectangle");
-                        }
-                        continue;
-                    }
-                }
-
-                frame_state.tile_cache
-                    .as_mut()
-                    .unwrap()
-                    .update_prim_dependencies(
-                        prim_instance,
-                        cluster.spatial_node_index,
-                        prim_local_rect,
-                        frame_context,
-                        frame_state.data_stores,
-                        frame_state.clip_store,
-                        &store.pictures,
-                        frame_state.resource_cache,
-                        &store.color_bindings,
-                        &frame_state.surface_stack,
-                        &mut frame_state.composite_state,
-                        &mut frame_state.gpu_cache,
-                        is_root_tile_cache,
-                );
-
-                // Skip post visibility prim update if this primitive was culled above.
-                match prim_instance.vis.state {
-                    VisibilityState::Unset => panic!("bug: invalid state"),
-                    VisibilityState::Culled => continue,
-                    VisibilityState::Coarse { .. } | VisibilityState::Detailed { .. } | VisibilityState::PassThrough => {}
-                }
-
-                // When the debug display is enabled, paint a colored rectangle around each
-                // primitive.
-                if frame_context.debug_flags.contains(::api::DebugFlags::PRIMITIVE_DBG) {
-                    let debug_color = match prim_instance.kind {
-                        PrimitiveInstanceKind::Picture { .. } => ColorF::TRANSPARENT,
-                        PrimitiveInstanceKind::TextRun { .. } => debug_colors::RED,
-                        PrimitiveInstanceKind::LineDecoration { .. } => debug_colors::PURPLE,
-                        PrimitiveInstanceKind::NormalBorder { .. } |
-                        PrimitiveInstanceKind::ImageBorder { .. } => debug_colors::ORANGE,
-                        PrimitiveInstanceKind::Rectangle { .. } => ColorF { r: 0.8, g: 0.8, b: 0.8, a: 0.5 },
-                        PrimitiveInstanceKind::YuvImage { .. } => debug_colors::BLUE,
-                        PrimitiveInstanceKind::Image { .. } => debug_colors::BLUE,
-                        PrimitiveInstanceKind::LinearGradient { .. } => debug_colors::PINK,
-                        PrimitiveInstanceKind::CachedLinearGradient { .. } => debug_colors::PINK,
-                        PrimitiveInstanceKind::RadialGradient { .. } => debug_colors::PINK,
-                        PrimitiveInstanceKind::ConicGradient { .. } => debug_colors::PINK,
-                        PrimitiveInstanceKind::Clear { .. } => debug_colors::CYAN,
-                        PrimitiveInstanceKind::Backdrop { .. } => debug_colors::MEDIUMAQUAMARINE,
-                    };
-                    if debug_color.a != 0.0 {
-                        if let Some(rect) = calculate_prim_clipped_world_rect(
-                            &prim_instance.vis.clip_chain.pic_clip_rect,
-                            &world_culling_rect,
-                            &map_surface_to_world,
-                        ) {
-                            let debug_rect = rect * frame_context.global_device_pixel_scale;
-                            frame_state.scratch.primitive.push_debug_rect(debug_rect, debug_color, debug_color.scale_alpha(0.5));
-                        }
-                    }
-                } else if frame_context.debug_flags.contains(::api::DebugFlags::OBSCURE_IMAGES) {
-                    let is_image = matches!(
-                        prim_instance.kind,
-                        PrimitiveInstanceKind::Image { .. } | PrimitiveInstanceKind::YuvImage { .. }
-                    );
-                    if is_image {
-                        // We allow "small" images, since they're generally UI elements.
-                        if let Some(rect) = calculate_prim_clipped_world_rect(
-                            &prim_instance.vis.clip_chain.pic_clip_rect,
-                            &world_culling_rect,
-                            &map_surface_to_world,
-                        ) {
-                            let rect = rect * frame_context.global_device_pixel_scale;
-                            if rect.size.width > 70.0 && rect.size.height > 70.0 {
-                                frame_state.scratch.primitive.push_debug_rect(rect, debug_colors::PURPLE, debug_colors::PURPLE);
-                            }
-                        }
-                    }
-                }
-
-                if prim_instance.is_chased() {
-                    println!("\tvisible with {:?}", prim_instance.vis.combined_local_clip_rect);
-                }
-
-                // TODO(gw): This should probably be an instance method on PrimitiveInstance?
-                update_prim_post_visibility(
-                    store,
-                    prim_instance,
-                    world_culling_rect,
-                    &map_surface_to_world,
-                );
-            }
-        }
-    }
-
-    // Similar to above, pop either the clip chain or root entry off the current clip stack.
-    if is_composite {
-        frame_state.pop_surface();
-    }
-
-    let pic = &mut store.pictures[pic_index.0];
-    pic.prim_list = prim_list;
-
-    // If the local rect changed (due to transforms in child primitives) then
-    // invalidate the GPU cache location to re-upload the new local rect
-    // and stretch size. Drop shadow filters also depend on the local rect
-    // size for the extra GPU cache data handle.
-    // TODO(gw): In future, if we support specifying a flag which gets the
-    //           stretch size from the segment rect in the shaders, we can
-    //           remove this invalidation here completely.
-    if let Some(ref rc) = pic.raster_config {
-        // Inflate the local bounding rect if required by the filter effect.
-        if pic.options.inflate_if_required {
-            surface_rect = rc.composite_mode.inflate_picture_rect(surface_rect, surface.scale_factors);
-        }
-
-        // Layout space for the picture is picture space from the
-        // perspective of its child primitives.
-        pic.precise_local_rect = surface_rect * Scale::new(1.0);
-
-        // If the precise rect changed since last frame, we need to invalidate
-        // any segments and gpu cache handles for drop-shadows.
-        // TODO(gw): Requiring storage of the `prev_precise_local_rect` here
-        //           is a total hack. It's required because `prev_precise_local_rect`
-        //           gets written to twice (during initial vis pass and also during
-        //           prepare pass). The proper longer term fix for this is to make
-        //           use of the conservative picture rect for segmenting (which should
-        //           be done during scene building).
-        if pic.precise_local_rect != pic.prev_precise_local_rect {
-            match rc.composite_mode {
-                PictureCompositeMode::Filter(Filter::DropShadows(..)) => {
-                    for handle in &pic.extra_gpu_data_handles {
-                        frame_state.gpu_cache.invalidate(handle);
-                    }
-                }
-                _ => {}
-            }
-            // Invalidate any segments built for this picture, since the local
-            // rect has changed.
-            pic.segments_are_valid = false;
-            pic.prev_precise_local_rect = pic.precise_local_rect;
-        }
-
-        if let PictureCompositeMode::TileCache { .. } = rc.composite_mode {
-            let mut tile_cache = frame_state.tile_cache.take().unwrap();
-
-            // Build the dirty region(s) for this tile cache.
-            tile_cache.post_update(
-                frame_context,
-                frame_state,
-            );
-
-            tile_caches.insert(SliceId::new(tile_cache.slice), tile_cache);
-        }
-
-        None
-    } else {
-        let parent_surface = &frame_context.surfaces[parent_surface_index.0 as usize];
-        let map_surface_to_parent_surface = SpaceMapper::new_with_target(
-            parent_surface.surface_spatial_node_index,
-            surface.surface_spatial_node_index,
-            PictureRect::max_rect(),
-            frame_context.spatial_tree,
-        );
-        map_surface_to_parent_surface.map(&surface_rect)
-    }
-}
-
-
-fn update_prim_post_visibility(
-    store: &mut PrimitiveStore,
-    prim_instance: &mut PrimitiveInstance,
-    world_culling_rect: WorldRect,
-    map_surface_to_world: &SpaceMapper<PicturePixel, WorldPixel>,
-) {
-    profile_scope!("update_prim_post_visibility");
-    match prim_instance.kind {
-        PrimitiveInstanceKind::Picture { pic_index, .. } => {
-            let pic = &mut store.pictures[pic_index.0];
-            // If this picture has a surface, determine the clipped bounding rect for it to
-            // minimize the size of the render target that is required.
-            if let Some(ref mut raster_config) = pic.raster_config {
-                raster_config.clipped_bounding_rect = map_surface_to_world
-                    .map(&prim_instance.vis.clip_chain.pic_clip_rect)
-                    .and_then(|rect| {
-                        rect.intersection(&world_culling_rect)
-                    })
-                    .unwrap_or(WorldRect::zero());
-            }
-        }
-        PrimitiveInstanceKind::TextRun { .. } => {
-            // Text runs can't request resources early here, as we don't
-            // know until TileCache::post_update() whether we are drawing
-            // on an opaque surface.
-            // TODO(gw): We might be able to detect simple cases of this earlier,
-            //           during the picture traversal. But it's probably not worth it?
-        }
-        _ => {}
-    }
-}
-
-pub fn compute_conservative_visible_rect(
-    clip_chain: &ClipChainInstance,
-    world_culling_rect: WorldRect,
-    prim_spatial_node_index: SpatialNodeIndex,
-    spatial_tree: &SpatialTree,
-) -> LayoutRect {
-    // Mapping from picture space -> world space
-    let map_pic_to_world: SpaceMapper<PicturePixel, WorldPixel> = SpaceMapper::new_with_target(
-        ROOT_SPATIAL_NODE_INDEX,
-        clip_chain.pic_spatial_node_index,
-        world_culling_rect,
-        spatial_tree,
-    );
-
-    // Mapping from local space -> picture space
-    let map_local_to_pic: SpaceMapper<LayoutPixel, PicturePixel> = SpaceMapper::new_with_target(
-        clip_chain.pic_spatial_node_index,
-        prim_spatial_node_index,
-        PictureRect::max_rect(),
-        spatial_tree,
-    );
-
-    // Unmap the world culling rect from world -> picture space. If this mapping fails due
-    // to matrix weirdness, best we can do is use the clip chain's local clip rect.
-    let pic_culling_rect = match map_pic_to_world.unmap(&world_culling_rect) {
-        Some(rect) => rect,
-        None => return clip_chain.local_clip_rect,
-    };
-
-    // Intersect the unmapped world culling rect with the primitive's clip chain rect that
-    // is in picture space (the clip-chain already takes into account the bounds of the
-    // primitive local_rect and local_clip_rect). If there is no intersection here, the
-    // primitive is not visible at all.
-    let pic_culling_rect = match pic_culling_rect.intersection(&clip_chain.pic_clip_rect) {
-        Some(rect) => rect,
-        None => return LayoutRect::zero(),
-    };
-
-    // Unmap the picture culling rect from picture -> local space. If this mapping fails due
-    // to matrix weirdness, best we can do is use the clip chain's local clip rect.
-    match map_local_to_pic.unmap(&pic_culling_rect) {
-        Some(rect) => rect,
-        None => clip_chain.local_clip_rect,
-    }
-}
-
-fn calculate_prim_clipped_world_rect(
-    pic_clip_rect: &PictureRect,
-    world_culling_rect: &WorldRect,
-    map_surface_to_world: &SpaceMapper<PicturePixel, WorldPixel>,
-) -> Option<WorldRect> {
-    map_surface_to_world
-        .map(pic_clip_rect)
-        .and_then(|world_rect| {
-            world_rect.intersection(world_culling_rect)
-        })
-}