diff options
author | bors-servo <lbergstrom+bors@mozilla.com> | 2020-01-02 21:05:08 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-01-02 21:05:08 -0500 |
commit | 097a91112e5e83775b589e74c8412f0c37a07f75 (patch) | |
tree | ce14f5083a9c079c8afa4c56928170138fb1da4f /components/script/dom | |
parent | 1e0f2e59b7fe5238e42b53099cc992ca6d2ec4d3 (diff) | |
parent | 0c08849d1cb83e84d65bf601c11428960da2dc71 (diff) | |
download | servo-097a91112e5e83775b589e74c8412f0c37a07f75.tar.gz servo-097a91112e5e83775b589e74c8412f0c37a07f75.zip |
Auto merge of #25358 - pshaughn:looser_header_validation, r=jdm
Header values no longer have to be ASCII or UTF-8
<!-- Please describe your changes on the following line: -->
This passes some failed tests related to header validity when handling ByteStrings outside the printable ASCII range. A few failures remain because the HeaderValue class is stricter than WHATWG/WPT, disallowing various control-code bytes that the spec and tests expect to be allowed.
---
<!-- Thank you for contributing to Servo! Please replace each `[ ]` by `[X]` when the step is complete, and replace `___` with appropriate data: -->
- [X] `./mach build -d` does not report any errors
- [X] `./mach test-tidy` does not report any errors
- [X] These changes fix some of the test cases described in #24903
<!-- Either: -->
- [X] There are tests for these changes OR
<!-- Also, please make sure that "Allow edits from maintainers" checkbox is checked, so that we can help you if you get stuck somewhere along the way.-->
<!-- Pull requests that do not address these steps are welcome, but they will require additional verification as part of the review process. -->
Diffstat (limited to 'components/script/dom')
-rw-r--r-- | components/script/dom/headers.rs | 130 |
1 files changed, 56 insertions, 74 deletions
diff --git a/components/script/dom/headers.rs b/components/script/dom/headers.rs index 61c6fc1f98e..a75b0292d6f 100644 --- a/components/script/dom/headers.rs +++ b/components/script/dom/headers.rs @@ -100,10 +100,20 @@ impl HeadersMethods for Headers { combined_value.push(b','); } combined_value.extend(valid_value.iter().cloned()); - self.header_list.borrow_mut().insert( - HeaderName::from_str(&valid_name).unwrap(), - HeaderValue::from_bytes(&combined_value).unwrap(), - ); + match HeaderValue::from_bytes(&combined_value) { + Ok(value) => { + self.header_list + .borrow_mut() + .insert(HeaderName::from_str(&valid_name).unwrap(), value); + }, + Err(_) => { + // can't add the header, but we don't need to panic the browser over it + warn!( + "Servo thinks \"{:?}\" is a valid HTTP header value but HeaderValue doesn't.", + combined_value + ); + }, + }; Ok(()) } @@ -197,7 +207,7 @@ impl Headers { for (name, value) in h.header_list.borrow().iter() { self.Append( ByteString::new(Vec::from(name.as_str())), - ByteString::new(Vec::from(value.to_str().unwrap().as_bytes())), + ByteString::new(Vec::from(value.as_bytes())), )?; } Ok(()) @@ -267,13 +277,13 @@ impl Headers { .map_or(vec![], |v| v.as_bytes().to_owned()) } - pub fn sort_header_list(&self) -> Vec<(String, String)> { + pub fn sort_header_list(&self) -> Vec<(String, Vec<u8>)> { let borrowed_header_list = self.header_list.borrow(); let headers_iter = borrowed_header_list.iter(); let mut header_vec = vec![]; for (name, value) in headers_iter { let name = name.as_str().to_owned(); - let value = value.to_str().unwrap().to_owned(); + let value = value.as_bytes().to_vec(); let name_value = (name, value); header_vec.push(name_value); } @@ -293,7 +303,7 @@ impl Iterable for Headers { fn get_value_at_index(&self, n: u32) -> ByteString { let sorted_header_vec = self.sort_header_list(); let value = sorted_header_vec[n as usize].1.clone(); - ByteString::new(value.into_bytes().to_vec()) + ByteString::new(value) } fn get_key_at_index(&self, n: u32) -> ByteString { @@ -345,40 +355,19 @@ pub fn is_forbidden_header_name(name: &str) -> bool { } // There is some unresolved confusion over the definition of a name and a value. -// The fetch spec [1] defines a name as "a case-insensitive byte -// sequence that matches the field-name token production. The token -// productions are viewable in [2]." A field-name is defined as a -// token, which is defined in [3]. -// ISSUE 1: -// It defines a value as "a byte sequence that matches the field-content token production." -// To note, there is a difference between field-content and -// field-value (which is made up of field-content and obs-fold). The -// current definition does not allow for obs-fold (which are white -// space and newlines) in values. So perhaps a value should be defined -// as "a byte sequence that matches the field-value token production." -// However, this would then allow values made up entirely of white space and newlines. -// RELATED ISSUE 2: -// According to a previously filed Errata ID: 4189 in [4], "the -// specified field-value rule does not allow single field-vchar -// surrounded by whitespace anywhere". They provided a fix for the -// field-content production, but ISSUE 1 has still not been resolved. -// The production definitions likely need to be re-written. -// [1] https://fetch.spec.whatwg.org/#concept-header-value -// [2] https://tools.ietf.org/html/rfc7230#section-3.2 -// [3] https://tools.ietf.org/html/rfc7230#section-3.2.6 -// [4] https://www.rfc-editor.org/errata_search.php?rfc=7230 // -// As of December 2019 WHATWG, isn't even using grammar productions for value; +// As of December 2019, WHATWG has no formal grammar production for value; // https://fetch.spec.whatg.org/#concept-header-value just says not to have -// newlines, nulls, or leading/trailing whitespace. +// newlines, nulls, or leading/trailing whitespace. It even allows +// octets that aren't a valid UTF-8 encoding, and WPT tests reflect this. +// The HeaderValue class does not fully reflect this, so headers +// containing bytes with values 1..31 or 127 can't be created, failing +// WPT tests but probably not affecting anything important on the real Internet. fn validate_name_and_value(name: ByteString, value: ByteString) -> Fallible<(String, Vec<u8>)> { let valid_name = validate_name(name)?; - - // this is probably out of date - if !is_field_content(&value) { - return Err(Error::Type("Value is not valid".to_string())); + if !is_legal_header_value(&value) { + return Err(Error::Type("Header value is not valid".to_string())); } - Ok((valid_name, value.into())) } @@ -431,47 +420,40 @@ fn is_field_name(name: &ByteString) -> bool { is_token(&*name) } -// https://tools.ietf.org/html/rfc7230#section-3.2 -// http://www.rfc-editor.org/errata_search.php?rfc=7230 -// Errata ID: 4189 -// field-content = field-vchar [ 1*( SP / HTAB / field-vchar ) -// field-vchar ] -fn is_field_content(value: &ByteString) -> bool { +// https://fetch.spec.whatg.org/#concept-header-value +fn is_legal_header_value(value: &ByteString) -> bool { let value_len = value.len(); - if value_len == 0 { - return false; - } - if !is_field_vchar(value[0]) { - return false; - } - - if value_len > 2 { - for &ch in &value[1..value_len - 1] { - if !is_field_vchar(ch) && !is_space(ch) && !is_htab(ch) { - return false; - } + return true; + } + match value[0] { + b' ' | b'\t' => return false, + _ => {}, + }; + match value[value_len - 1] { + b' ' | b'\t' => return false, + _ => {}, + }; + for &ch in &value[..] { + match ch { + b'\0' | b'\n' | b'\r' => return false, + _ => {}, } } - - if !is_field_vchar(value[value_len - 1]) { - return false; - } - - return true; -} - -fn is_space(x: u8) -> bool { - x == b' ' -} - -fn is_htab(x: u8) -> bool { - x == b'\t' -} - -// https://tools.ietf.org/html/rfc7230#section-3.2 -fn is_field_vchar(x: u8) -> bool { - is_vchar(x) || is_obs_text(x) + true + // If accepting non-UTF8 header values causes breakage, + // removing the above "true" and uncommenting the below code + // would ameliorate it while still accepting most reasonable headers: + //match str::from_utf8(value) { + // Ok(_) => true, + // Err(_) => { + // warn!( + // "Rejecting spec-legal but non-UTF8 header value: {:?}", + // value + // ); + // false + // }, + // } } // https://tools.ietf.org/html/rfc5234#appendix-B.1 |