diff options
author | Brian Anderson <banderson@mozilla.com> | 2012-10-30 14:54:26 -0700 |
---|---|---|
committer | Brian Anderson <banderson@mozilla.com> | 2012-10-30 14:54:34 -0700 |
commit | bf197096459cee37cc1f160eb7fd3040f07b899c (patch) | |
tree | 5e669159898ada3f46ca6f298da848ba4911aca9 /src/servo/css | |
parent | 37d45c6872b2a26fc7f396d0f04148b1325873fc (diff) | |
download | servo-bf197096459cee37cc1f160eb7fd3040f07b899c.tar.gz servo-bf197096459cee37cc1f160eb7fd3040f07b899c.zip |
Move css parser and lexer to rust-css
Diffstat (limited to 'src/servo/css')
-rw-r--r-- | src/servo/css/lexer.rs | 266 | ||||
-rw-r--r-- | src/servo/css/lexer_util.rs | 157 | ||||
-rw-r--r-- | src/servo/css/parser.rs | 226 | ||||
-rw-r--r-- | src/servo/css/parser_util.rs | 131 |
4 files changed, 0 insertions, 780 deletions
diff --git a/src/servo/css/lexer.rs b/src/servo/css/lexer.rs deleted file mode 100644 index c0d5af6996a..00000000000 --- a/src/servo/css/lexer.rs +++ /dev/null @@ -1,266 +0,0 @@ -//! Code to lex and tokenize css files - -use option::is_none; -use str::from_bytes; -use vec::push; - -use pipes::{Port, Chan}; - -use lexer_util::*; - -use std::net::url::Url; -use std::cell::Cell; - -enum ParserState { - CssElement, - CssRelation, - CssDescription, - CssAttribute -} - -type CssLexer = { - input_state: InputState, - mut parser_state: ParserState -}; - -pub enum Token { - StartDescription, - EndDescription, - Descendant, - Child, - Sibling, - Comma, - Element(~str), - Attr(newcss::values::Attr), - Description(~str, ~str), - Eof -} - -trait CssLexerMethods { - fn parse_css() -> Token; - fn parse_css_relation(c : u8) -> Token; - fn parse_css_element(c : u8) -> Token; - fn parse_css_attribute(c : u8) -> Token; - fn parse_css_description(c: u8) -> Token; -} - -impl CssLexer : CssLexerMethods { - fn parse_css() -> Token { - let mut ch: u8; - match self.input_state.get() { - CoeChar(c) => ch = c, - CoeEof => { return Eof; } - } - - let token = match self.parser_state { - CssDescription => self.parse_css_description(ch), - CssAttribute => self.parse_css_attribute(ch), - CssElement => self.parse_css_element(ch), - CssRelation => self.parse_css_relation(ch) - }; - - #debug["token=%?", token]; - return move token; - } - - fn parse_css_relation(c : u8) -> Token { - self.parser_state = CssElement; - - let token = match c { - '{' as u8 => { self.parser_state = CssDescription; StartDescription } - '>' as u8 => { Child } - '+' as u8 => { Sibling } - ',' as u8 => { Comma } - _ => { self.input_state.unget(c); Descendant } - }; - - self.input_state.eat_whitespace(); - - return move token; - } - - fn parse_css_element(c : u8) -> Token { - assert is_none(&self.input_state.lookahead); - - /* Check for special attributes with an implied element, - or a wildcard which is not a alphabet character.*/ - if c == '.' as u8 || c == '#' as u8 { - self.parser_state = CssAttribute; - self.input_state.unget(c); - return Element(~"*"); - } else if c == '*' as u8 { - self.parser_state = CssAttribute; - return Element(~"*"); - } - - self.input_state.unget(c); - let element = self.input_state.parse_ident(); - - self.parser_state = CssAttribute; - - return move Element(move element); - } - - fn parse_css_attribute(c : u8) -> Token { - let mut ch = c; - - /* If we've reached the end of this list of attributes, - look for the relation to the next element.*/ - if c.is_whitespace() { - self.parser_state = CssRelation; - self.input_state.eat_whitespace(); - - match self.input_state.get() { - CoeChar(c) => { ch = c } - CoeEof => { fail ~"File ended before description of style" } - } - - return self.parse_css_relation(ch); - } - - match ch { - '.' as u8 => return Attr(newcss::values::Includes(~"class", self.input_state.parse_ident())), - '#' as u8 => return Attr(newcss::values::Includes(~"id", self.input_state.parse_ident())), - '[' as u8 => { - let attr_name = self.input_state.parse_ident(); - - match self.input_state.get() { - CoeChar(c) => { ch = c; } - CoeEof => { fail ~"File ended before description finished"; } - } - - if ch == ']' as u8 { - return Attr(newcss::values::Exists(move attr_name)); - } else if ch == '=' as u8 { - let attr_val = self.input_state.parse_ident(); - self.input_state.expect(']' as u8); - return Attr(newcss::values::Exact(move attr_name, move attr_val)); - } else if ch == '~' as u8 { - self.input_state.expect('=' as u8); - let attr_val = self.input_state.parse_ident(); - self.input_state.expect(']' as u8); - return Attr(newcss::values::Includes(move attr_name, move attr_val)); - } else if ch == '|' as u8 { - self.input_state.expect('=' as u8); - let attr_val = self.input_state.parse_ident(); - self.input_state.expect(']' as u8); - return Attr(newcss::values::StartsWith(move attr_name, move attr_val)); - } - - fail #fmt("Unexpected symbol %c in attribute", ch as char); - } - _ => { fail #fmt("Unexpected symbol %c in attribute", ch as char); } - } - } - - fn parse_css_description(c: u8) -> Token { - let mut ch = c; - - if ch == '}' as u8 { - self.parser_state = CssElement; - self.input_state.eat_whitespace(); - return EndDescription; - } else if ch.is_whitespace() { - self.input_state.eat_whitespace(); - - match self.input_state.get() { - CoeChar(c) => { ch = c } - CoeEof => { fail ~"Reached end of file in CSS description" } - } - } - - let mut desc_name = ~[]; - - // Get the name of the descriptor - loop { - if ch.is_whitespace() { - self.input_state.eat_whitespace(); - } else if ch == ':' as u8 { - if desc_name.len() == 0u { - fail ~"Expected descriptor name"; - } else { - break; - } - } else { - push(&mut desc_name, ch); - } - - match self.input_state.get() { - CoeChar(c) => { ch = c } - CoeEof => { fail ~"Reached end of file in CSS description" } - } - } - - self.input_state.eat_whitespace(); - let mut desc_val = ~[]; - - // Get the value of the descriptor - loop { - match self.input_state.get() { - CoeChar(c) => { ch = c } - CoeEof => { fail ~"Reached end of file in CSS description" } - } - - if ch.is_whitespace() { - self.input_state.eat_whitespace(); - } else if ch == '}' as u8 { - if desc_val.len() == 0u { - fail ~"Expected descriptor value"; - } else { - self.input_state.unget('}' as u8); - break; - } - } else if ch == ';' as u8 { - if desc_val.len() == 0u { - fail ~"Expected descriptor value"; - } else { - break; - } - } else { - push(&mut desc_val, ch); - } - } - - return Description(from_bytes(desc_name), from_bytes(desc_val)); - } -} - -fn parser(input: DataStream, state : ParserState) -> CssLexer { - return { - input_state: { - mut lookahead: None, - mut buffer: ~[], - input: input, - mut eof: false - }, - mut parser_state: state - }; -} - -pub fn lex_css_from_bytes(input_stream: DataStream, result_chan : &Chan<Token>) { - let lexer = parser(input_stream, CssElement); - - loop { - let token = lexer.parse_css(); - let should_break = match token { Eof => true, _ => false }; - - result_chan.send(move token); - - if should_break { - break; - } - } -} - -fn spawn_css_lexer_from_string(content : ~str) -> pipes::Port<Token> { - let (result_chan, result_port) = pipes::stream(); - - do task::spawn |move result_chan, move content| { - let content = str::to_bytes(content); - let content = Cell(copy content); - let input = |move content| if !content.is_empty() { Some(content.take()) } else { None }; - lex_css_from_bytes(input, &result_chan); - } - - return move result_port; -} diff --git a/src/servo/css/lexer_util.rs b/src/servo/css/lexer_util.rs deleted file mode 100644 index bd31f8b5e1f..00000000000 --- a/src/servo/css/lexer_util.rs +++ /dev/null @@ -1,157 +0,0 @@ -/*! -A collection of functions that are useful for both css and html parsing -*/ - -use option::is_none; -use str::from_bytes; -use vec::push; -use comm::Port; - -enum CharOrEof { - CoeChar(u8), - CoeEof -} - -pub type DataStream = @fn() -> Option<~[u8]>; - -impl CharOrEof: cmp::Eq { - pure fn eq(other: &CharOrEof) -> bool { - match (self, *other) { - (CoeChar(a), CoeChar(b)) => a == b, - (CoeChar(*), _) | (_, CoeChar(*)) => false, - (CoeEof, CoeEof) => true, - } - } - pure fn ne(other: &CharOrEof) -> bool { - return !self.eq(other); - } -} - -type InputState = { - mut lookahead: Option<CharOrEof>, - mut buffer: ~[u8], - input: DataStream, - mut eof: bool -}; - -trait U8Methods { - fn is_whitespace() -> bool; - fn is_alpha() -> bool; -} - -impl u8 : U8Methods { - fn is_whitespace() -> bool { - return self == ' ' as u8 || self == '\n' as u8 || self == '\t' as u8; - } - - fn is_alpha() -> bool { - return (self >= ('A' as u8) && self <= ('Z' as u8)) || - (self >= ('a' as u8) && self <= ('z' as u8)); - } -} - -trait InputStateUtil { - fn get() -> CharOrEof; - fn unget(ch: u8); - fn parse_err(+err: ~str) -> !; - fn expect(ch: u8); - fn parse_ident() -> ~str; - fn expect_ident(+expected: ~str); - fn eat_whitespace(); -} - -impl InputState : InputStateUtil { - fn get() -> CharOrEof { - match copy self.lookahead { - Some(coe) => { - let rv = coe; - self.lookahead = None; - return rv; - } - None => { - /* fall through */ - } - } - - // FIXME: Lots of copies here - - if self.buffer.len() > 0 { - return CoeChar(vec::shift(&mut self.buffer)); - } - - if self.eof { - return CoeEof; - } - - match self.input() { - Some(data) => { - // TODO: change copy to move once we have match move - self.buffer = copy data; - return CoeChar(vec::shift(&mut self.buffer)); - } - None => { - self.eof = true; - return CoeEof; - } - } - } - - fn unget(ch: u8) { - assert is_none(&self.lookahead); - self.lookahead = Some(CoeChar(ch)); - } - - fn parse_err(err: ~str) -> ! { - fail err - } - - fn expect(ch: u8) { - match self.get() { - CoeChar(c) => { if c != ch { self.parse_err(#fmt("expected '%c'", ch as char)); } } - CoeEof => { self.parse_err(#fmt("expected '%c' at eof", ch as char)); } - } - } - - fn parse_ident() -> ~str { - let mut result: ~[u8] = ~[]; - loop { - match self.get() { - CoeChar(c) => { - if (c.is_alpha()) { push(&mut result, c); } - else if result.len() == 0u { self.parse_err(~"expected ident"); } - else { - self.unget(c); - break; - } - } - CoeEof => { - self.parse_err(~"expected ident"); - } - } - } - return str::from_bytes(result); - } - - fn expect_ident(expected: ~str) { - let actual = self.parse_ident(); - if expected != actual { - self.parse_err(#fmt("expected '%s' but found '%s'", expected, actual)); - } - } - - fn eat_whitespace() { - loop { - match self.get() { - CoeChar(c) => { - if !c.is_whitespace() { - self.unget(c); - return; - } - } - CoeEof => { - return; - } - } - } - } -} diff --git a/src/servo/css/parser.rs b/src/servo/css/parser.rs deleted file mode 100644 index ab84d52b1ed..00000000000 --- a/src/servo/css/parser.rs +++ /dev/null @@ -1,226 +0,0 @@ -/** -Constructs a list of css style rules from a token stream -*/ - -// TODO: fail according to the css spec instead of failing when things -// are not as expected - -use newcss::values::*; -// Disambiguate parsed Selector, Rule values from tokens -use css = newcss::values; -use tok = lexer; -use lexer::Token; -use comm::recv; -use option::{map, is_none}; -use vec::push; -use parser_util::*; -use newcss::color::parsing::parse_color; -use vec::push; - -type TokenReader = {stream : pipes::Port<Token>, mut lookahead : Option<Token>}; - -trait TokenReaderMethods { - fn get() -> Token; - fn unget(+tok : Token); -} - -impl TokenReader : TokenReaderMethods { - fn get() -> Token { - match copy self.lookahead { - Some(tok) => { self.lookahead = None; copy tok } - None => { self.stream.recv() } - } - } - - fn unget(tok : Token) { - assert is_none(&self.lookahead); - self.lookahead = Some(move tok); - } -} - -trait ParserMethods { - fn parse_element() -> Option<~css::Selector>; - fn parse_selector() -> Option<~[~css::Selector]>; - fn parse_description() -> Option<~[StyleDeclaration]>; - fn parse_rule() -> Option<~css::Rule>; -} - -impl TokenReader : ParserMethods { - fn parse_element() -> Option<~css::Selector> { - // Get the current element type - let elmt_name = match self.get() { - lexer::Element(tag) => { copy tag } - lexer::Eof => { return None; } - _ => { fail ~"Expected an element" } - }; - - let mut attr_list = ~[]; - - // Get the attributes associated with that element - loop { - let token = self.get(); - match token { - lexer::Attr(attr) => { push(&mut attr_list, copy attr); } - tok::StartDescription | tok::Descendant | tok::Child | tok::Sibling | tok::Comma => { - self.unget(move token); - break; - } - tok::Eof => { return None; } - tok::Element(_) => fail ~"Unexpected second element without relation to first element", - tok::EndDescription => fail ~"Unexpected '}'", - tok::Description(_, _) => fail ~"Unexpected description" - } - } - return Some(~css::Element(move elmt_name, move attr_list)); - } - - fn parse_selector() -> Option<~[~css::Selector]> { - let mut sel_list = ~[]; - - // Collect all the selectors that this rule applies to - loop { - let mut cur_sel; - - match self.parse_element() { - Some(elmt) => { cur_sel = copy elmt; } - None => { return None; } // we hit an eof in the middle of a rule - } - - loop { - let tok = self.get(); - let built_sel = move cur_sel; - - match tok { - tok::Descendant => { - match self.parse_element() { - Some(elmt) => { - let new_sel = copy elmt; - cur_sel = ~css::Descendant(move built_sel, move new_sel) - } - None => { return None; } - } - } - tok::Child => { - match self.parse_element() { - Some(elmt) => { - let new_sel = copy elmt; - cur_sel = ~css::Child(move built_sel, move new_sel) - } - None => { return None; } - } - } - tok::Sibling => { - match self.parse_element() { - Some(elmt) => { - let new_sel = copy elmt; - cur_sel = ~css::Sibling(move built_sel, move new_sel) - } - None => { return None; } - } - } - tok::StartDescription => { - push(&mut sel_list, move built_sel); - self.unget(tok::StartDescription); - break; - } - tok::Comma => { - push(&mut sel_list, move built_sel); - self.unget(tok::Comma); - break; - } - tok::Attr(_) | tok::EndDescription | tok::Element(_) | tok::Description(_, _) => { - fail #fmt["Unexpected token %? in elements", tok]; - } - tok::Eof => { return None; } - } - } - - // check if we should break out of the nesting loop as well - // TODO: fix this when rust gets labelled loops - let tok = self.get(); - match tok { - tok::StartDescription => { break; } - tok::Comma => { } - _ => { self.unget(move tok); } - } - } - - return Some(move sel_list); - } - - fn parse_description() -> Option<~[StyleDeclaration]> { - let mut desc_list : ~[StyleDeclaration]= ~[]; - - // Get the description to be applied to the selector - loop { - let tok = self.get(); - match tok { - tok::EndDescription => { break; } - tok::Description(prop, val) => { - let desc : Option<StyleDeclaration> = match prop { - // TODO: have color parsing return a ParseResult instead of a real value - ~"background-color" => parse_color(val).map(|res| BackgroundColor(Specified(BgColor(*res)))), - ~"color" => parse_color(val).map(|res| Color(Specified(TextColor(*res)))), - ~"display" => parse_display_type(val).extract(|res| Display(res)), - ~"font-size" => parse_font_size(val).extract(|res| FontSize(res)), - ~"height" => parse_box_sizing(val).extract(|res| Height(res)), - ~"width" => parse_box_sizing(val).extract(|res| Width(res)), - ~"border-width" => parse_length(val).map(|res| BorderWidth(Specified(*res))), - ~"border-color" => parse_color(val).map(|res| BorderColor(Specified(BdrColor(*res)))), - ~"position" => parse_position(val).extract(|res| Position(res)), - ~"top" => parse_length(val).map(|res| Top(Specified(*res))), - ~"right" => parse_length(val).map(|res| Right(Specified(*res))), - ~"bottom" => parse_length(val).map(|res| Bottom(Specified(*res))), - ~"left" => parse_length(val).map(|res| Left(Specified(*res))), - _ => { #debug["Recieved unknown style property '%s'", val]; None } - }; - match desc { - Some(d) => push(&mut desc_list, d), - None => { #debug["Couldn't parse value '%s' for property '%s'", val, prop] } - } - } - tok::Eof => { return None; } - tok::StartDescription | tok::Descendant | tok::Child | tok::Sibling - | tok::Comma | tok::Element(_) | tok::Attr(_) => { - fail #fmt["Unexpected token %? in description", tok]; - } - } - } - - return Some(move desc_list); - } - - fn parse_rule() -> Option<~css::Rule> { - // TODO: get rid of copies once match move works - let sel_list = match self.parse_selector() { - Some(list) => { copy list } - None => { return None; } - }; - - #debug("sel_list: %?", sel_list); - - // Get the description to be applied to the selector - let desc_list = match self.parse_description() { - Some(list) => { copy list } - None => { return None; } - }; - - #debug("desc_list: %?", desc_list); - - return Some(~(move sel_list, move desc_list)); - } -} - -pub fn build_stylesheet(stream : pipes::Port<Token>) -> ~[~css::Rule] { - let mut rule_list = ~[]; - let reader = {stream : move stream, mut lookahead : None}; - - loop { - match reader.parse_rule() { - Some(rule) => { push(&mut rule_list, copy rule); } - None => { break; } - } - } - - return move rule_list; -} diff --git a/src/servo/css/parser_util.rs b/src/servo/css/parser_util.rs deleted file mode 100644 index f012f401eb6..00000000000 --- a/src/servo/css/parser_util.rs +++ /dev/null @@ -1,131 +0,0 @@ -//! Helper functions to parse values of specific attributes - -use newcss::values::*; -use str::{pop_char, from_chars}; -use float::from_str; -use option::map; - -export parse_font_size; -export parse_size; -export parse_box_sizing; -export parse_display_type; - - -fn parse_length(str : &str) -> Option<Length> { - // TODO: use these once we stop lexing below - const PTS_PER_INCH: float = 72.0; - const CM_PER_INCH: float = 2.54; - const PX_PER_PT: float = 1.0 / 0.75; - - match str { - s if s.ends_with("in") => from_str(str.substr(0, str.len() - 2)).map(|f| Px(1.0/0.75 * 72.0 * *f)), - s if s.ends_with("cm") => from_str(str.substr(0, str.len() - 2)).map(|f| Px(*f / 2.54 * 72.0 * 1.0/0.75)), - s if s.ends_with("mm") => from_str(str.substr(0, str.len() - 2)).map(|f| Px(*f * 0.1 / 2.54 * 72.0 * 1.0/0.75)), - s if s.ends_with("pt") => from_str(str.substr(0, str.len() - 2)).map(|f| Px(1.0/0.75 * *f)), - s if s.ends_with("pc") => from_str(str.substr(0, str.len() - 2)).map(|f| Px(1.0/0.75 * 12.0 * *f)), - s if s.ends_with("px") => from_str(str.substr(0, str.len() - 2)).map(|f| Px(*f)), - s if s.ends_with("em") => from_str(str.substr(0, str.len() - 2)).map(|f| Em(*f)), - s if s.ends_with("ex") => from_str(str.substr(0, str.len() - 2)).map(|f| Em(0.5 * *f)), - _ => None, - } -} - -fn parse_absolute_size(str : &str) -> ParseResult<AbsoluteSize> { - // FIXME: Bad copy. Can't match &str - match str.to_str() { - ~"xx-small" => Value(XXSmall), - ~"x-small" => Value(XSmall), - ~"small" => Value(Small), - ~"medium" => Value(Medium), - ~"large" => Value(Large), - ~"x-large" => Value(XLarge), - ~"xx-large" => Value(XXLarge), - _ => Fail - } -} - -fn parse_position(str: &str) -> ParseResult<CSSPosition> { - // FIXME: Bad copy - match str.to_str() { - ~"static" => Value(PosStatic), - ~"relative" => Value(PosRelative), - ~"absolute" => Value(PosAbsolute), - ~"fixed" => Value(PosFixed), - _ => Fail - } -} - -fn parse_relative_size(str: &str) -> ParseResult<RelativeSize> { - // FIXME: Bad copy. Can't match &str - match str.to_str() { - ~"smaller" => Value(Smaller), - ~"larger" => Value(Larger), - _ => Fail - } -} - -fn parse_font_size(_str: &str) -> ParseResult<CSSFontSize> { - // TODO: complete me - Value(LengthSize(Px(14.0))) -} - -// For width / height, and anything else with the same attribute values -fn parse_box_sizing(str : &str) -> ParseResult<BoxSizing> { - // FIXME: Bad copy. Can't match &str - match str.to_str() { - ~"auto" => Value(BoxAuto), - ~"inherit" => CSSInherit, - _ => Fail - } -} - -fn parse_display_type(str : &str) -> ParseResult<CSSDisplay> { - // FIXME: Bad copy. Can't match &str - match str.to_str() { - ~"inline" => Value(DisplayInline), - ~"block" => Value(DisplayBlock), - ~"none" => Value(DisplayNone), - _ => { #debug["Recieved unknown display value '%s'", str]; Fail } - } -} - -#[cfg(test)] -mod test { - use css::lexer::spawn_css_lexer_from_string; - use css::parser::build_stylesheet; - use newcss::values::{Stylesheet, Element, FontSize, Width, Height}; - - // TODO: use helper methods to create test values - - #[test] - fn should_match_font_sizes() { - let input = ~"* {font-size:12px; font-size:inherit; font-size:200%; font-size:x-small}"; - let token_port = spawn_css_lexer_from_string(move input); - let _actual_rule = build_stylesheet(move token_port); - let _expected_rule : Stylesheet = ~[~(~[~Element(~"*", ~[])], - ~[FontSize(Specified(LengthSize(Px(12.0)))), - FontSize(Specified(PercentSize(100.0))), - FontSize(Specified(PercentSize(200.0))), - FontSize(Specified(LengthSize(Px(12.0))))])]; - - // TODO: fix me once StyleDeclaration is a trait, not an enum - //assert actual_rule == expected_rule; - } - - #[test] - fn should_match_width_height() { - let input = ~"* {width:20%; height:auto; width:20px; width:3in; height:70px; height:30px}"; - let token_port = spawn_css_lexer_from_string(move input); - let _actual_rule = build_stylesheet(move token_port); - let _expected_rule : Stylesheet = ~[~(~[~Element(~"*", ~[])], - ~[Width(Specified(BoxPercent(20.0))), - Height(Specified(BoxAuto)), - Width(Specified(BoxLength(Px(20.0)))), - Width(Specified(BoxLength(Px(216.0)))), - Height(Specified(BoxLength(Px(70.0)))), - Height(Specified(BoxLength(Px(30.0))))])]; - - // TODO: fix me once StyleDeclaration is a trait, not an enum - //assert actual_rule == expected_rule; - } -} |