diff options
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | components/net/resource_task.rs | 88 | ||||
-rw-r--r-- | python/servo/bootstrap_commands.py | 92 | ||||
-rw-r--r-- | python/servo/command_base.py | 2 | ||||
-rw-r--r-- | tests/unit/net/resource_task.rs | 124 |
5 files changed, 287 insertions, 20 deletions
diff --git a/.gitignore b/.gitignore index a21c4e30a17..0354c3e17f3 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,4 @@ Servo.app .config.mk.last parser.out /glfw +hsts_preload.json diff --git a/components/net/resource_task.rs b/components/net/resource_task.rs index 7c97b5449b1..59b15d53ead 100644 --- a/components/net/resource_task.rs +++ b/components/net/resource_task.rs @@ -17,21 +17,24 @@ use net_traits::{Metadata, ProgressMsg, ResourceTask, AsyncResponseTarget, Respo use net_traits::ProgressMsg::Done; use util::opts; use util::task::spawn_named; +use util::resource_files::read_resource_file; use devtools_traits::{DevtoolsControlMsg}; use hyper::header::{ContentType, Header, SetCookie, UserAgent}; use hyper::mime::{Mime, TopLevel, SubLevel}; +use rustc_serialize::json::{decode}; + use std::borrow::ToOwned; use std::boxed::FnBox; use std::collections::HashMap; use std::env; use std::fs::File; use std::io::{BufReader, Read}; +use std::str::{FromStr, from_utf8}; use std::sync::Arc; use std::sync::mpsc::{channel, Receiver, Sender}; - static mut HOST_TABLE: Option<*mut HashMap<String, String>> = None; pub fn global_init() { @@ -152,17 +155,86 @@ pub fn start_sending_opt(start_chan: LoadConsumer, metadata: Metadata) -> Result } } +fn preload_hsts_domains() -> Option<HSTSList> { + match read_resource_file(&["hsts_preload.json"]) { + Ok(bytes) => { + match from_utf8(&bytes) { + Ok(hsts_preload_content) => { + HSTSList::new_from_preload(hsts_preload_content) + }, + Err(_) => None + } + }, + Err(_) => None + } +} + /// Create a ResourceTask pub fn new_resource_task(user_agent: Option<String>, devtools_chan: Option<Sender<DevtoolsControlMsg>>) -> ResourceTask { + let hsts_preload = preload_hsts_domains(); + let (setup_chan, setup_port) = channel(); let setup_chan_clone = setup_chan.clone(); spawn_named("ResourceManager".to_owned(), move || { - ResourceManager::new(setup_port, user_agent, setup_chan_clone, devtools_chan).start(); + ResourceManager::new(setup_port, user_agent, setup_chan_clone, hsts_preload, devtools_chan).start(); }); setup_chan } +#[derive(RustcDecodable, RustcEncodable)] +pub struct HSTSEntry { + pub host: String, + pub include_subdomains: bool +} + +#[derive(RustcDecodable, RustcEncodable)] +pub struct HSTSList { + pub entries: Vec<HSTSEntry> +} + +impl HSTSList { + pub fn new_from_preload(preload_content: &str) -> Option<HSTSList> { + match decode(preload_content) { + Ok(list) => Some(list), + Err(_) => None + } + } + + pub fn always_secure(&self, host: &str) -> bool { + // TODO - Should this be faster than O(n)? The HSTS list is only a few + // hundred or maybe thousand entries... + self.entries.iter().any(|e| { + if e.include_subdomains { + host.ends_with(&format!(".{}", e.host)) || e.host == host + } else { + e.host == host + } + }) + } + + + pub fn make_hsts_secure(&self, load_data: LoadData) -> LoadData { + if let Some(h) = load_data.url.domain() { + if self.always_secure(h) { + match &*load_data.url.scheme { + "http" => { + let mut secure_load_data = load_data.clone(); + let mut secure_url = load_data.url.clone(); + secure_url.scheme = "https".to_string(); + secure_load_data.url = secure_url; + + return secure_load_data + }, + _ => () + }; + } + } + + load_data + } +} + pub fn parse_hostsfile(hostsfile_content: &str) -> Box<HashMap<String, String>> { let ipv4_regex = regex!( r"^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$"); @@ -204,13 +276,15 @@ struct ResourceManager { cookie_storage: CookieStorage, resource_task: Sender<ControlMsg>, mime_classifier: Arc<MIMEClassifier>, - devtools_chan: Option<Sender<DevtoolsControlMsg>> + devtools_chan: Option<Sender<DevtoolsControlMsg>>, + hsts_list: Option<HSTSList> } impl ResourceManager { fn new(from_client: Receiver<ControlMsg>, user_agent: Option<String>, resource_task: Sender<ControlMsg>, + hsts_list: Option<HSTSList>, devtools_channel: Option<Sender<DevtoolsControlMsg>>) -> ResourceManager { ResourceManager { from_client: from_client, @@ -218,7 +292,8 @@ impl ResourceManager { cookie_storage: CookieStorage::new(), resource_task: resource_task, mime_classifier: Arc::new(MIMEClassifier::new()), - devtools_chan: devtools_channel + devtools_chan: devtools_channel, + hsts_list: hsts_list } } } @@ -262,6 +337,11 @@ impl ResourceManager { load_data.preserved_headers.set(UserAgent(ua.clone())); }); + match self.hsts_list { + Some(ref l) => load_data = l.make_hsts_secure(load_data), + _ => () + } + fn from_factory(factory: fn(LoadData, LoadConsumer, Arc<MIMEClassifier>)) -> Box<FnBox(LoadData, LoadConsumer, Arc<MIMEClassifier>) + Send> { box move |load_data, senders, classifier| { diff --git a/python/servo/bootstrap_commands.py b/python/servo/bootstrap_commands.py index 5cf2f39c927..67345fb1fdd 100644 --- a/python/servo/bootstrap_commands.py +++ b/python/servo/bootstrap_commands.py @@ -9,11 +9,15 @@ from __future__ import print_function, unicode_literals +import base64 +import json import os import os.path as path +import re import shutil import subprocess import sys +import StringIO import tarfile import urllib2 from distutils.version import LooseVersion @@ -27,27 +31,33 @@ from mach.decorators import ( from servo.command_base import CommandBase, cd, host_triple -def download(desc, src, dst): +def download(desc, src, writer): print("Downloading %s..." % desc) dumb = (os.environ.get("TERM") == "dumb") or (not sys.stdout.isatty()) try: resp = urllib2.urlopen(src) - fsize = int(resp.info().getheader('Content-Length').strip()) + + fsize = None + if resp.info().getheader('Content-Length'): + fsize = int(resp.info().getheader('Content-Length').strip()) + recved = 0 chunk_size = 8192 - with open(dst, 'wb') as fd: - while True: - chunk = resp.read(chunk_size) - if not chunk: - break - recved += len(chunk) - if not dumb: + while True: + chunk = resp.read(chunk_size) + if not chunk: break + recved += len(chunk) + if not dumb: + if fsize is not None: pct = recved * 100.0 / fsize print("\rDownloading %s: %5.1f%%" % (desc, pct), end="") - sys.stdout.flush() - fd.write(chunk) + else: + print("\rDownloading %s" % desc, end="") + + sys.stdout.flush() + writer.write(chunk) if not dumb: print() @@ -62,6 +72,14 @@ def download(desc, src, dst): sys.exit(1) +def download_file(desc, src, dst): + with open(dst, 'wb') as fd: + download(desc, src, fd) + +def download_bytes(desc, src): + content_writer = StringIO.StringIO() + download(desc, src, content_writer) + return content_writer.getvalue() def extract(src, dst, movedir=None): tarfile.open(src).extractall(dst) @@ -111,7 +129,7 @@ class MachCommands(CommandBase): % self.rust_snapshot_path()) tgz_file = rust_dir + '.tar.gz' - download("Rust snapshot", snapshot_url, tgz_file) + download_file("Rust snapshot", snapshot_url, tgz_file) print("Extracting Rust snapshot...") snap_dir = path.join(rust_dir, @@ -142,7 +160,7 @@ class MachCommands(CommandBase): % docs_name) tgz_file = path.join(hash_dir, 'doc.tar.gz') - download("Rust docs", snapshot_url, tgz_file) + download_file("Rust docs", snapshot_url, tgz_file) print("Extracting Rust docs...") temp_dir = path.join(hash_dir, "temp_docs") @@ -166,7 +184,7 @@ class MachCommands(CommandBase): self.cargo_build_id()) if not force and path.exists(path.join(cargo_dir, "bin", "cargo")): print("Cargo already downloaded.", end=" ") - print("Use |bootstrap_cargo --force| to download again.") + print("Use |bootstrap-cargo --force| to download again.") return if path.isdir(cargo_dir): @@ -177,7 +195,7 @@ class MachCommands(CommandBase): nightly_url = "https://static-rust-lang-org.s3.amazonaws.com/cargo-dist/%s/%s" % \ (self.cargo_build_id(), tgz_file) - download("Cargo nightly", nightly_url, tgz_file) + download_file("Cargo nightly", nightly_url, tgz_file) print("Extracting Cargo nightly...") nightly_dir = path.join(cargo_dir, @@ -185,6 +203,50 @@ class MachCommands(CommandBase): extract(tgz_file, cargo_dir, movedir=nightly_dir) print("Cargo ready.") + @Command('bootstrap-hsts-preload', + description='Download the HSTS preload list', + category='bootstrap') + @CommandArgument('--force', '-f', + action='store_true', + help='Force download even if HSTS list already exist') + def bootstrap_hsts_preload(self, force=False): + preload_filename = "hsts_preload.json" + preload_path = path.join(self.context.topdir, "resources") + + if not force and path.exists(path.join(preload_path, preload_filename)): + print("HSTS preload list already downloaded.", end=" ") + print("Use |bootstrap-hsts-preload --force| to download again.") + return + + chromium_hsts_url = "https://chromium.googlesource.com/chromium/src/net/+/master/http/transport_security_state_static.json?format=TEXT" + + try: + content_base64 = download_bytes("Chromium HSTS preload list", chromium_hsts_url) + except URLError, e: + print("Unable to download chromium HSTS preload list, are you connected to the internet?") + sys.exit(1) + + content_decoded = base64.b64decode(content_base64) + content_json = re.sub(r'//.*$', '', content_decoded, flags=re.MULTILINE) + + try: + pins_and_static_preloads = json.loads(content_json) + entries = { + "entries": [ + { + "host": e["name"], + "include_subdomains": e.get("include_subdomains", False) + } + for e in pins_and_static_preloads["entries"] + ] + } + + with open(path.join(preload_path, preload_filename), 'w') as fd: + json.dump(entries, fd, indent=4) + except ValueError, e: + print("Unable to parse chromium HSTS preload list, has the format changed?") + sys.exit(1) + @Command('update-submodules', description='Update submodules', category='bootstrap') diff --git a/python/servo/command_base.py b/python/servo/command_base.py index 7c41fa42196..b4ba795691f 100644 --- a/python/servo/command_base.py +++ b/python/servo/command_base.py @@ -324,5 +324,7 @@ class CommandBase(object): not path.exists(path.join( self.config["tools"]["cargo-root"], "cargo", "bin", "cargo")): Registrar.dispatch("bootstrap-cargo", context=self.context) + if not path.exists(path.join("resources", "hsts_preload.json")): + Registrar.dispatch("bootstrap-hsts-preload", context=self.context) self.context.bootstrapped = True diff --git a/tests/unit/net/resource_task.rs b/tests/unit/net/resource_task.rs index 5abfc40ba45..0c73fac39ac 100644 --- a/tests/unit/net/resource_task.rs +++ b/tests/unit/net/resource_task.rs @@ -2,7 +2,9 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -use net::resource_task::{new_resource_task, parse_hostsfile, replace_hosts}; +use net::resource_task::{ + new_resource_task, parse_hostsfile, replace_hosts, HSTSList, HSTSEntry +}; use net_traits::{ControlMsg, LoadData, LoadConsumer}; use net_traits::ProgressMsg; use std::borrow::ToOwned; @@ -18,6 +20,126 @@ fn test_exit() { } #[test] +fn test_parse_hsts_preload_should_return_none_when_json_invalid() { + let mock_preload_content = "derp"; + match HSTSList::new_from_preload(mock_preload_content) { + Some(_) => assert!(false, "preload list should not have parsed"), + None => assert!(true) + } +} + +#[test] +fn test_parse_hsts_preload_should_return_none_when_json_contains_no_entries_key() { + let mock_preload_content = "{\"nothing\": \"to see here\"}"; + match HSTSList::new_from_preload(mock_preload_content) { + Some(_) => assert!(false, "preload list should not have parsed"), + None => assert!(true) + } +} + +#[test] +fn test_parse_hsts_preload_should_decode_host_and_includes_subdomains() { + let mock_preload_content = "{\ + \"entries\": [\ + {\"host\": \"mozilla.org\",\ + \"include_subdomains\": false}\ + ]\ + }"; + let hsts_list = HSTSList::new_from_preload(mock_preload_content); + let entries = hsts_list.unwrap().entries; + + assert!(entries.get(0).unwrap().host == "mozilla.org"); + assert!(entries.get(0).unwrap().include_subdomains == false); +} + +#[test] +fn test_hsts_list_with_no_entries_does_not_always_secure() { + let hsts_list = HSTSList { + entries: Vec::new() + }; + + assert!(hsts_list.always_secure("mozilla.org") == false); +} + +#[test] +fn test_hsts_list_with_exact_domain_entry_is_always_secure() { + let hsts_list = HSTSList { + entries: vec![HSTSEntry { host: "mozilla.org".to_string(), include_subdomains: false}] + }; + + assert!(hsts_list.always_secure("mozilla.org") == true); +} + +#[test] +fn test_hsts_list_with_subdomain_when_include_subdomains_is_true_is_always_secure() { + let hsts_list = HSTSList { + entries: vec![HSTSEntry { host: "mozilla.org".to_string(), include_subdomains: true}] + }; + + assert!(hsts_list.always_secure("servo.mozilla.org") == true); +} + +#[test] +fn test_hsts_list_with_subdomain_when_include_subdomains_is_false_is_not_always_secure() { + let hsts_list = HSTSList { + entries: vec![HSTSEntry { host: "mozilla.org".to_string(), include_subdomains: false}] + }; + + assert!(hsts_list.always_secure("servo.mozilla.org") == false); +} + +#[test] +fn test_hsts_list_with_subdomain_when_host_is_not_a_subdomain_is_not_always_secure() { + let hsts_list = HSTSList { + entries: vec![HSTSEntry { host: "mozilla.org".to_string(), include_subdomains: true}] + }; + + assert!(hsts_list.always_secure("servo-mozilla.org") == false); +} + +#[test] +fn test_hsts_list_with_subdomain_when_host_is_exact_match_is_always_secure() { + let hsts_list = HSTSList { + entries: vec![HSTSEntry { host: "mozilla.org".to_string(), include_subdomains: true}] + }; + + assert!(hsts_list.always_secure("mozilla.org") == true); +} + +#[test] +fn test_make_hsts_secure_doesnt_affect_non_http_schemas() { + let load_data = LoadData::new(Url::parse("file://mozilla.org").unwrap(), None); + let hsts_list = HSTSList { + entries: vec![HSTSEntry { host: "mozilla.org".to_string(), include_subdomains: false}] + }; + let secure_load_data = hsts_list.make_hsts_secure(load_data); + + assert!(&secure_load_data.url.scheme == "file"); +} + +#[test] +fn test_make_hsts_secure_sets_secure_schema_on_subdomains_when_include_subdomains_is_true() { + let load_data = LoadData::new(Url::parse("http://servo.mozilla.org").unwrap(), None); + let hsts_list = HSTSList { + entries: vec![HSTSEntry { host: "mozilla.org".to_string(), include_subdomains: true}] + }; + let secure_load_data = hsts_list.make_hsts_secure(load_data); + + assert!(&secure_load_data.url.scheme == "https"); +} + +#[test] +fn test_make_hsts_secure_forces_an_http_host_in_list_to_https() { + let load_data = LoadData::new(Url::parse("http://mozilla.org").unwrap(), None); + let hsts_list = HSTSList { + entries: vec![HSTSEntry { host: "mozilla.org".to_string(), include_subdomains: false}] + }; + let secure_load_data = hsts_list.make_hsts_secure(load_data); + + assert!(&secure_load_data.url.scheme == "https"); +} + +#[test] fn test_bad_scheme() { let resource_task = new_resource_task(None, None); let (start_chan, start) = channel(); |