diff options
author | Patrick Shaughnessy <pshaughn@comcast.net> | 2019-12-20 20:04:41 -0500 |
---|---|---|
committer | Patrick Shaughnessy <pshaughn@comcast.net> | 2019-12-24 11:08:15 -0500 |
commit | 0c08849d1cb83e84d65bf601c11428960da2dc71 (patch) | |
tree | 2471c54eceac5f336ff94248160397dc94cd5d2d /components/script/dom | |
parent | 3e77a0ae09daf28c0a102add5a198bff08befe31 (diff) | |
download | servo-0c08849d1cb83e84d65bf601c11428960da2dc71.tar.gz servo-0c08849d1cb83e84d65bf601c11428960da2dc71.zip |
Headers no longer have to be ASCII or UTF-8, though there is some disagreement with http::header::HeaderValue
Diffstat (limited to 'components/script/dom')
-rw-r--r-- | components/script/dom/headers.rs | 130 |
1 files changed, 56 insertions, 74 deletions
diff --git a/components/script/dom/headers.rs b/components/script/dom/headers.rs index 61c6fc1f98e..a75b0292d6f 100644 --- a/components/script/dom/headers.rs +++ b/components/script/dom/headers.rs @@ -100,10 +100,20 @@ impl HeadersMethods for Headers { combined_value.push(b','); } combined_value.extend(valid_value.iter().cloned()); - self.header_list.borrow_mut().insert( - HeaderName::from_str(&valid_name).unwrap(), - HeaderValue::from_bytes(&combined_value).unwrap(), - ); + match HeaderValue::from_bytes(&combined_value) { + Ok(value) => { + self.header_list + .borrow_mut() + .insert(HeaderName::from_str(&valid_name).unwrap(), value); + }, + Err(_) => { + // can't add the header, but we don't need to panic the browser over it + warn!( + "Servo thinks \"{:?}\" is a valid HTTP header value but HeaderValue doesn't.", + combined_value + ); + }, + }; Ok(()) } @@ -197,7 +207,7 @@ impl Headers { for (name, value) in h.header_list.borrow().iter() { self.Append( ByteString::new(Vec::from(name.as_str())), - ByteString::new(Vec::from(value.to_str().unwrap().as_bytes())), + ByteString::new(Vec::from(value.as_bytes())), )?; } Ok(()) @@ -267,13 +277,13 @@ impl Headers { .map_or(vec![], |v| v.as_bytes().to_owned()) } - pub fn sort_header_list(&self) -> Vec<(String, String)> { + pub fn sort_header_list(&self) -> Vec<(String, Vec<u8>)> { let borrowed_header_list = self.header_list.borrow(); let headers_iter = borrowed_header_list.iter(); let mut header_vec = vec![]; for (name, value) in headers_iter { let name = name.as_str().to_owned(); - let value = value.to_str().unwrap().to_owned(); + let value = value.as_bytes().to_vec(); let name_value = (name, value); header_vec.push(name_value); } @@ -293,7 +303,7 @@ impl Iterable for Headers { fn get_value_at_index(&self, n: u32) -> ByteString { let sorted_header_vec = self.sort_header_list(); let value = sorted_header_vec[n as usize].1.clone(); - ByteString::new(value.into_bytes().to_vec()) + ByteString::new(value) } fn get_key_at_index(&self, n: u32) -> ByteString { @@ -345,40 +355,19 @@ pub fn is_forbidden_header_name(name: &str) -> bool { } // There is some unresolved confusion over the definition of a name and a value. -// The fetch spec [1] defines a name as "a case-insensitive byte -// sequence that matches the field-name token production. The token -// productions are viewable in [2]." A field-name is defined as a -// token, which is defined in [3]. -// ISSUE 1: -// It defines a value as "a byte sequence that matches the field-content token production." -// To note, there is a difference between field-content and -// field-value (which is made up of field-content and obs-fold). The -// current definition does not allow for obs-fold (which are white -// space and newlines) in values. So perhaps a value should be defined -// as "a byte sequence that matches the field-value token production." -// However, this would then allow values made up entirely of white space and newlines. -// RELATED ISSUE 2: -// According to a previously filed Errata ID: 4189 in [4], "the -// specified field-value rule does not allow single field-vchar -// surrounded by whitespace anywhere". They provided a fix for the -// field-content production, but ISSUE 1 has still not been resolved. -// The production definitions likely need to be re-written. -// [1] https://fetch.spec.whatwg.org/#concept-header-value -// [2] https://tools.ietf.org/html/rfc7230#section-3.2 -// [3] https://tools.ietf.org/html/rfc7230#section-3.2.6 -// [4] https://www.rfc-editor.org/errata_search.php?rfc=7230 // -// As of December 2019 WHATWG, isn't even using grammar productions for value; +// As of December 2019, WHATWG has no formal grammar production for value; // https://fetch.spec.whatg.org/#concept-header-value just says not to have -// newlines, nulls, or leading/trailing whitespace. +// newlines, nulls, or leading/trailing whitespace. It even allows +// octets that aren't a valid UTF-8 encoding, and WPT tests reflect this. +// The HeaderValue class does not fully reflect this, so headers +// containing bytes with values 1..31 or 127 can't be created, failing +// WPT tests but probably not affecting anything important on the real Internet. fn validate_name_and_value(name: ByteString, value: ByteString) -> Fallible<(String, Vec<u8>)> { let valid_name = validate_name(name)?; - - // this is probably out of date - if !is_field_content(&value) { - return Err(Error::Type("Value is not valid".to_string())); + if !is_legal_header_value(&value) { + return Err(Error::Type("Header value is not valid".to_string())); } - Ok((valid_name, value.into())) } @@ -431,47 +420,40 @@ fn is_field_name(name: &ByteString) -> bool { is_token(&*name) } -// https://tools.ietf.org/html/rfc7230#section-3.2 -// http://www.rfc-editor.org/errata_search.php?rfc=7230 -// Errata ID: 4189 -// field-content = field-vchar [ 1*( SP / HTAB / field-vchar ) -// field-vchar ] -fn is_field_content(value: &ByteString) -> bool { +// https://fetch.spec.whatg.org/#concept-header-value +fn is_legal_header_value(value: &ByteString) -> bool { let value_len = value.len(); - if value_len == 0 { - return false; - } - if !is_field_vchar(value[0]) { - return false; - } - - if value_len > 2 { - for &ch in &value[1..value_len - 1] { - if !is_field_vchar(ch) && !is_space(ch) && !is_htab(ch) { - return false; - } + return true; + } + match value[0] { + b' ' | b'\t' => return false, + _ => {}, + }; + match value[value_len - 1] { + b' ' | b'\t' => return false, + _ => {}, + }; + for &ch in &value[..] { + match ch { + b'\0' | b'\n' | b'\r' => return false, + _ => {}, } } - - if !is_field_vchar(value[value_len - 1]) { - return false; - } - - return true; -} - -fn is_space(x: u8) -> bool { - x == b' ' -} - -fn is_htab(x: u8) -> bool { - x == b'\t' -} - -// https://tools.ietf.org/html/rfc7230#section-3.2 -fn is_field_vchar(x: u8) -> bool { - is_vchar(x) || is_obs_text(x) + true + // If accepting non-UTF8 header values causes breakage, + // removing the above "true" and uncommenting the below code + // would ameliorate it while still accepting most reasonable headers: + //match str::from_utf8(value) { + // Ok(_) => true, + // Err(_) => { + // warn!( + // "Rejecting spec-legal but non-UTF8 header value: {:?}", + // value + // ); + // false + // }, + // } } // https://tools.ietf.org/html/rfc5234#appendix-B.1 |