diff options
Diffstat (limited to 'src/servo/parser/html.rs')
-rw-r--r-- | src/servo/parser/html.rs | 92 |
1 files changed, 84 insertions, 8 deletions
diff --git a/src/servo/parser/html.rs b/src/servo/parser/html.rs index 89e53c1adec..7e472081de2 100644 --- a/src/servo/parser/html.rs +++ b/src/servo/parser/html.rs @@ -1,14 +1,22 @@ import comm::{port, chan}; +enum parse_state { + ps_normal, + ps_tag +} + type parser = { mut lookahead: option<char_or_eof>, + mut state: parse_state, reader: io::reader }; enum token { - to_start_tag(str), + to_start_opening_tag(str), + to_end_opening_tag, to_end_tag(str), to_text(str), + to_attr(str, str), to_doctype, to_eof } @@ -18,6 +26,18 @@ enum char_or_eof { coe_eof } +impl u8_methods for u8 { + fn is_alpha() -> bool { + ret (self >= ('A' as u8) && self <= ('Z' as u8)) || + (self >= ('a' as u8) && self <= ('z' as u8)); + } +} + +impl u8_vec_methods for [u8] { + fn to_str() -> str { ret str::from_bytes(self); } + fn to_str_token() -> token { ret to_text(self.to_str()); } +} + impl methods for parser { fn get() -> char_or_eof { alt self.lookahead { @@ -62,8 +82,7 @@ impl methods for parser { loop { alt self.get() { coe_char(c) { - if (c >= ('A' as u8) && c <= ('Z' as u8)) || - (c >= ('a' as u8) && c <= ('z' as u8)) { + if (c.is_alpha()) { result += [c]; } else if result.len() == 0u { self.parse_err("expected ident"); @@ -112,6 +131,14 @@ impl methods for parser { coe_eof { ret to_eof; } } + ret alt self.state { + ps_normal { self.parse_in_normal_state(ch) } + ps_tag { self.parse_in_tag_state(ch) } + } + } + + fn parse_in_normal_state(c: u8) -> token { + let mut ch = c; if ch == ('<' as u8) { alt self.get() { coe_char(c) { ch = c; } @@ -139,8 +166,9 @@ impl methods for parser { self.eat_whitespace(); let ident = self.parse_ident(); self.eat_whitespace(); - self.expect('>' as u8); - ret to_start_tag(ident); + + self.state = ps_tag; + ret to_start_opening_tag(ident); } // Make a text node. @@ -150,18 +178,66 @@ impl methods for parser { coe_char(c) { if c == ('<' as u8) { self.unget(c); - ret to_text(str::from_bytes(s)); + ret s.to_str_token(); } s += [c]; } - coe_eof { ret to_text(str::from_bytes(s)); } + coe_eof { ret s.to_str_token(); } + } + } + } + + fn parse_in_tag_state(c: u8) -> token { + let mut ch = c; + + if ch == ('>' as u8) { + self.state = ps_normal; + ret to_end_opening_tag; + } + + if !ch.is_alpha() { + fail "expected alphabetical in tag"; + } + + // Parse an attribute. + let mut attribute_name = [ch]; + loop { + alt self.get() { + coe_char(c) { + if c == ('=' as u8) { break; } + attribute_name += [c]; + } + coe_eof { + ret to_attr(attribute_name.to_str(), + attribute_name.to_str()); } + } + } + + // Parse the attribute value. + self.expect('"' as u8); + let mut attribute_value = []; + loop { + alt self.get() { + coe_char(c) { + if c == ('"' as u8) { break; } + attribute_value += [c]; + } + coe_eof { + ret to_attr(attribute_name.to_str(), + attribute_value.to_str()); + } } } + + // Eat whitespace. + self.eat_whitespace(); + + ret to_attr(attribute_name.to_str(), attribute_value.to_str()); } } fn parser(reader: io::reader) -> parser { - ret { mut lookahead: none, reader: reader }; + ret { mut lookahead: none, mut state: ps_normal, reader: reader }; } fn spawn_parser_task(filename: str) -> port<token> { |