aboutsummaryrefslogtreecommitdiffstats
path: root/components/net_traits/pub_domains.rs
blob: 47ebe6f97754b923a008a62dcf6d54613a6b7798 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

//! Implementation of public domain matching.
//!
//! The list is a file located on the `resources` folder and loaded once on first need.
//!
//! The list can be updated with `./mach update-pub-domains` from this source:
//! https://publicsuffix.org/list/
//!
//! This implementation is not strictly following the specification of the list. Wildcards are not
//! restricted to appear only in the leftmost position, but the current list has no such cases so
//! we don't need to make the code more complex for it. The `mach` update command makes sure that
//! those cases are not present.

use servo_config::resource_files::read_resource_file;
use servo_url::ServoUrl;
use std::collections::HashSet;
use std::iter::FromIterator;
use std::str::from_utf8;

#[derive(Clone,Debug)]
pub struct PubDomainRules {
    rules: HashSet<String>,
    wildcards: HashSet<String>,
    exceptions: HashSet<String>,
}

lazy_static! {
    static ref PUB_DOMAINS: PubDomainRules = load_pub_domains();
}

impl<'a> FromIterator<&'a str> for PubDomainRules {
    fn from_iter<T>(iter: T) -> Self
        where T: IntoIterator<Item = &'a str>,
    {
        let mut result = PubDomainRules::new();
        for item in iter {
            if item.starts_with("!") {
                result.exceptions.insert(String::from(&item[1..]));
            } else if item.starts_with("*.") {
                result.wildcards.insert(String::from(&item[2..]));
            } else {
                result.rules.insert(String::from(item));
            }
        }
        result
    }
}

impl PubDomainRules {
    pub fn new() -> PubDomainRules {
        PubDomainRules {
            rules: HashSet::new(),
            wildcards: HashSet::new(),
            exceptions: HashSet::new(),
        }
    }
    pub fn parse(content: &str) -> PubDomainRules {
        content.lines()
            .map(str::trim)
            .filter(|s| !s.is_empty())
            .filter(|s| !s.starts_with("//"))
            .collect()
    }
    fn suffix_pair<'a>(&self, domain: &'a str) -> (&'a str, &'a str) {
        let domain = domain.trim_left_matches(".");
        let mut suffix = domain;
        let mut prev_suffix = domain;
        for (index, _) in domain.match_indices(".") {
            let next_suffix = &domain[index + 1..];
            if self.exceptions.contains(suffix) {
                return (next_suffix, suffix);
            } else if self.wildcards.contains(next_suffix) {
                return (suffix, prev_suffix);
            } else if self.rules.contains(suffix) {
                return (suffix, prev_suffix);
            } else {
                prev_suffix = suffix;
                suffix = next_suffix;
            }
        }
        return (suffix, prev_suffix);
    }
    pub fn public_suffix<'a>(&self, domain: &'a str) -> &'a str {
        let (public, _) = self.suffix_pair(domain);
        public
    }
    pub fn registrable_suffix<'a>(&self, domain: &'a str) -> &'a str {
        let (_, registrable) = self.suffix_pair(domain);
        registrable
    }
    pub fn is_public_suffix(&self, domain: &str) -> bool {
        // Speeded-up version of
        // domain != "" &&
        // self.public_suffix(domain) == domain.
        let domain = domain.trim_left_matches(".");
        match domain.find(".") {
            None => !domain.is_empty(),
            Some(index) => {
                !self.exceptions.contains(domain) && self.wildcards.contains(&domain[index + 1..]) ||
                self.rules.contains(domain)
            },
        }
    }
    pub fn is_registrable_suffix(&self, domain: &str) -> bool {
        // Speeded-up version of
        // self.public_suffix(domain) != domain &&
        // self.registrable_suffix(domain) == domain.
        let domain = domain.trim_left_matches(".");
        match domain.find(".") {
            None => false,
            Some(index) => {
                self.exceptions.contains(domain) ||
                !self.wildcards.contains(&domain[index + 1..]) && !self.rules.contains(domain) &&
                self.is_public_suffix(&domain[index + 1..])
            },
        }
    }
}

fn load_pub_domains() -> PubDomainRules {
    let content = read_resource_file("public_domains.txt").expect("Could not find public suffix list file");
    let content = from_utf8(&content).expect("Could not read public suffix list file");
    PubDomainRules::parse(content)
}

pub fn pub_suffix(domain: &str) -> &str {
    PUB_DOMAINS.public_suffix(domain)
}

pub fn reg_suffix(domain: &str) -> &str {
    PUB_DOMAINS.registrable_suffix(domain)
}

pub fn is_pub_domain(domain: &str) -> bool {
    PUB_DOMAINS.is_public_suffix(domain)
}

pub fn is_reg_domain(domain: &str) -> bool {
    PUB_DOMAINS.is_registrable_suffix(domain)
}

/// The registered domain name (aka eTLD+1) for a URL.
/// Returns None if the URL has no host name.
/// Returns the registered suffix for the host name if it is a domain.
/// Leaves the host name alone if it is an IP address.
pub fn reg_host<'a>(url: &'a ServoUrl) -> Option<&'a str> {
    url.domain().map(reg_suffix).or(url.host_str())
}