1 files changed, 1172 insertions, 0 deletions
diff --git a/python/tidy/tidy.py b/python/tidy/tidy.py
new file mode 100644
index 00000000000..1e5de4080c0
--- /dev/null
+++ b/python/tidy/tidy.py
@@ -0,0 +1,1172 @@
+# Copyright 2013 The Servo Project Developers. See the COPYRIGHT
+# file at the top-level directory of this distribution.
+#
+# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+# option. This file may not be copied, modified, or distributed
+# except according to those terms.
+
+import fnmatch
+import glob
+import itertools
+import json
+import os
+import re
+import subprocess
+import sys
+
+import colorama
+import toml
+import voluptuous
+import yaml
+
+from .licenseck import OLD_MPL, MPL, APACHE, COPYRIGHT, licenses_toml, licenses_dep_toml
+
+TOPDIR = os.path.abspath(os.path.dirname(sys.argv[0]))
+WPT_PATH = os.path.join(".", "tests", "wpt")
+SUITES = ["web-platform-tests", os.path.join("mozilla", "tests")]
+
+
+def wpt_path(*args):
+    return os.path.join(WPT_PATH, *args)
+
+
+CONFIG_FILE_PATH = os.path.join(".", "servo-tidy.toml")
+WPT_MANIFEST_PATH = wpt_path("include.ini")
+# regex source https://stackoverflow.com/questions/6883049/
+URL_REGEX = re.compile(br'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+')
+
+# Import wptmanifest only when we do have wpt in tree, i.e. we're not
+# inside a Firefox checkout.
+if os.path.isfile(WPT_MANIFEST_PATH):
+    sys.path.append(wpt_path("web-platform-tests", "tools", "wptrunner", "wptrunner"))
+    from wptmanifest import parser, node
+
+# Default configs
+config = {
+    "skip-check-length": False,
+    "skip-check-licenses": False,
+    "check-alphabetical-order": True,
+    "check-ordered-json-keys": [],
+    "lint-scripts": [],
+    "blocked-packages": {},
+    "ignore": {
+        "files": [
+            os.path.join(".", "."),   # ignore hidden files
+        ],
+        "directories": [
+            os.path.join(".", "."),   # ignore hidden directories
+        ],
+        "packages": [],
+    },
+    "check_ext": {}
+}
+
+COMMENTS = [b"// ", b"# ", b" *", b"/* "]
+
+# File patterns to include in the non-WPT tidy check.
+FILE_PATTERNS_TO_CHECK = ["*.rs", "*.rc", "*.cpp", "*.c",
+                          "*.h", "*.py", "*.sh",
+                          "*.toml", "*.webidl", "*.json", "*.html",
+                          "*.yml"]
+
+# File patterns that are ignored for all tidy and lint checks.
+FILE_PATTERNS_TO_IGNORE = ["*.#*", "*.pyc", "fake-ld.sh", "*.ogv", "*.webm"]
+
+SPEC_BASE_PATH = "components/script/dom/"
+
+WEBIDL_STANDARDS = [
+    b"//www.khronos.org/registry/webgl/extensions",
+    b"//www.khronos.org/registry/webgl/specs",
+    b"//developer.mozilla.org/en-US/docs/Web/API",
+    b"//dev.w3.org/2006/webapi",
+    b"//dev.w3.org/csswg",
+    b"//dev.w3.org/fxtf",
+    b"//dvcs.w3.org/hg",
+    b"//dom.spec.whatwg.org",
+    b"//drafts.csswg.org",
+    b"//drafts.css-houdini.org",
+    b"//drafts.fxtf.org",
+    b"//console.spec.whatwg.org",
+    b"//encoding.spec.whatwg.org",
+    b"//fetch.spec.whatwg.org",
+    b"//html.spec.whatwg.org",
+    b"//url.spec.whatwg.org",
+    b"//xhr.spec.whatwg.org",
+    b"//w3c.github.io",
+    b"//heycam.github.io/webidl",
+    b"//webbluetoothcg.github.io/web-bluetooth/",
+    b"//svgwg.org/svg2-draft",
+    b"//wicg.github.io",
+    b"//webaudio.github.io",
+    b"//immersive-web.github.io/",
+    b"//github.com/immersive-web/webxr-test-api/",
+    b"//github.com/immersive-web/webxr-hands-input/",
+    b"//gpuweb.github.io",
+    # Not a URL
+    b"// This interface is entirely internal to Servo, and should not be"
+    + b" accessible to\n// web pages."
+]
+
+
+def is_iter_empty(iterator):
+    try:
+        obj = next(iterator)
+        return True, itertools.chain((obj,), iterator)
+    except StopIteration:
+        return False, iterator
+
+
+def normilize_paths(paths):
+    if isinstance(paths, str):
+        return os.path.join(*paths.split('/'))
+    else:
+        return [os.path.join(*path.split('/')) for path in paths]
+
+
+# A simple wrapper for iterators to show progress
+# (Note that it's inefficient for giant iterators, since it iterates once to get the upper bound)
+def progress_wrapper(iterator):
+    list_of_stuff = list(iterator)
+    total_files, progress = len(list_of_stuff), 0
+    for idx, thing in enumerate(list_of_stuff):
+        progress = int(float(idx + 1) / total_files * 100)
+        sys.stdout.write('\r  Progress: %s%% (%d/%d)' % (progress, idx + 1, total_files))
+        sys.stdout.flush()
+        yield thing
+
+
+class FileList(object):
+    def __init__(self, directory, only_changed_files=False, exclude_dirs=[], progress=True):
+        self.directory = directory
+        self.excluded = exclude_dirs
+        self.generator = self._filter_excluded() if exclude_dirs else self._default_walk()
+        if only_changed_files:
+            self.generator = self._git_changed_files()
+        if progress:
+            self.generator = progress_wrapper(self.generator)
+
+    def _default_walk(self):
+        for root, _, files in os.walk(self.directory):
+            for f in files:
+                yield os.path.join(root, f)
+
+    def _git_changed_files(self):
+        args = ["git", "log", "-n1", "--merges", "--format=%H"]
+        last_merge = subprocess.check_output(args, universal_newlines=True).strip()
+        if not last_merge:
+            return
+
+        args = ["git", "diff", "--name-only", last_merge, self.directory]
+        file_list = normilize_paths(subprocess.check_output(args, universal_newlines=True).splitlines())
+
+        for f in file_list:
+            if not any(os.path.join('.', os.path.dirname(f)).startswith(path) for path in self.excluded):
+                yield os.path.join('.', f)
+
+    def _filter_excluded(self):
+        for root, dirs, files in os.walk(self.directory, topdown=True):
+            # modify 'dirs' in-place so that we don't do unnecessary traversals in excluded directories
+            dirs[:] = [d for d in dirs if not any(os.path.join(root, d).startswith(name) for name in self.excluded)]
+            for rel_path in files:
+                yield os.path.join(root, rel_path)
+
+    def __iter__(self):
+        return self.generator
+
+    def next(self):
+        return next(self.generator)
+
+
+def filter_file(file_name):
+    if any(file_name.startswith(ignored_file) for ignored_file in config["ignore"]["files"]):
+        return False
+    base_name = os.path.basename(file_name)
+    if any(fnmatch.fnmatch(base_name, pattern) for pattern in FILE_PATTERNS_TO_IGNORE):
+        return False
+    return True
+
+
+def filter_files(start_dir, only_changed_files, progress):
+    file_iter = FileList(start_dir, only_changed_files=only_changed_files,
+                         exclude_dirs=config["ignore"]["directories"], progress=progress)
+    # always yield Cargo.lock so that the correctness of transitive dependencies is checked
+    yield "./Cargo.lock"
+
+    for file_name in iter(file_iter):
+        base_name = os.path.basename(file_name)
+        if not any(fnmatch.fnmatch(base_name, pattern) for pattern in FILE_PATTERNS_TO_CHECK):
+            continue
+        if not filter_file(file_name):
+            continue
+        yield file_name
+
+
+def uncomment(line):
+    for c in COMMENTS:
+        if line.startswith(c):
+            if line.endswith(b"*/"):
+                return line[len(c):(len(line) - 3)].strip()
+            return line[len(c):].strip()
+
+
+def is_apache_licensed(header):
+    if APACHE in header:
+        return any(c in header for c in COPYRIGHT)
+
+
+def check_license(file_name, lines):
+    if any(file_name.endswith(ext) for ext in (".yml", ".toml", ".lock", ".json", ".html")) or \
+       config["skip-check-licenses"]:
+        return
+
+    if lines[0].startswith(b"#!") and lines[1].strip():
+        yield (1, "missing blank line after shebang")
+
+    blank_lines = 0
+    max_blank_lines = 2 if lines[0].startswith(b"#!") else 1
+    license_block = []
+
+    for line in lines:
+        line = line.rstrip(b'\n')
+        if not line.strip():
+            blank_lines += 1
+            if blank_lines >= max_blank_lines:
+                break
+            continue
+        line = uncomment(line)
+        if line is not None:
+            license_block.append(line)
+
+    header = (b" ".join(license_block)).decode("utf-8")
+    valid_license = OLD_MPL in header or MPL in header or is_apache_licensed(header)
+    acknowledged_bad_license = "xfail-license" in header
+    if not (valid_license or acknowledged_bad_license):
+        yield (1, "incorrect license")
+
+
+def check_modeline(file_name, lines):
+    for idx, line in enumerate(lines[:5]):
+        if re.search(b'^.*[ \t](vi:|vim:|ex:)[ \t]', line):
+            yield (idx + 1, "vi modeline present")
+        elif re.search(br'-\*-.*-\*-', line, re.IGNORECASE):
+            yield (idx + 1, "emacs file variables present")
+
+
+def check_length(file_name, idx, line):
+    if any(file_name.endswith(ext) for ext in (".yml", ".lock", ".json", ".html", ".toml")) or \
+       config["skip-check-length"]:
+        return
+
+    # Prefer shorter lines when shell scripting.
+    max_length = 80 if file_name.endswith(".sh") else 120
+    if len(line.rstrip(b'\n')) > max_length and not is_unsplittable(file_name, line):
+        yield (idx + 1, "Line is longer than %d characters" % max_length)
+
+
+def contains_url(line):
+    return bool(URL_REGEX.search(line))
+
+
+def is_unsplittable(file_name, line):
+    return (
+        contains_url(line)
+        or file_name.endswith(".rs")
+        and line.startswith(b"use ")
+        and b"{" not in line
+    )
+
+
+def check_whatwg_specific_url(idx, line):
+    match = re.search(br"https://html\.spec\.whatwg\.org/multipage/[\w-]+\.html#([\w\'\:-]+)", line)
+    if match is not None:
+        preferred_link = "https://html.spec.whatwg.org/multipage/#{}".format(match.group(1))
+        yield (idx + 1, "link to WHATWG may break in the future, use this format instead: {}".format(preferred_link))
+
+
+def check_whatwg_single_page_url(idx, line):
+    match = re.search(br"https://html\.spec\.whatwg\.org/#([\w\'\:-]+)", line)
+    if match is not None:
+        preferred_link = "https://html.spec.whatwg.org/multipage/#{}".format(match.group(1))
+        yield (idx + 1, "links to WHATWG single-page url, change to multi page: {}".format(preferred_link))
+
+
+def check_whitespace(idx, line):
+    if line.endswith(b"\n"):
+        line = line[:-1]
+    else:
+        yield (idx + 1, "no newline at EOF")
+
+    if line.endswith(b" "):
+        yield (idx + 1, "trailing whitespace")
+
+    if b"\t" in line:
+        yield (idx + 1, "tab on line")
+
+    if b"\r" in line:
+        yield (idx + 1, "CR on line")
+
+
+def check_by_line(file_name, lines):
+    for idx, line in enumerate(lines):
+        errors = itertools.chain(
+            check_length(file_name, idx, line),
+            check_whitespace(idx, line),
+            check_whatwg_specific_url(idx, line),
+            check_whatwg_single_page_url(idx, line),
+        )
+
+        for error in errors:
+            yield error
+
+
+def check_flake8(file_name, contents):
+    if not file_name.endswith(".py"):
+        return
+
+    ignore = {
+        "W291",  # trailing whitespace; the standard tidy process will enforce no trailing whitespace
+        "W503",  # linebreak before binary operator; replaced by W504 - linebreak after binary operator
+        "E501",  # 80 character line length; the standard tidy process will enforce line length
+    }
+
+    output = ""
+    try:
+        args = ["flake8", "--ignore=" + ",".join(ignore), file_name]
+        subprocess.check_output(args, universal_newlines=True)
+    except subprocess.CalledProcessError as e:
+        output = e.output
+    for error in output.splitlines():
+        _, line_num, _, message = error.split(":", 3)
+        yield line_num, message.strip()
+
+
+def check_lock(file_name, contents):
+    def find_reverse_dependencies(name, content):
+        for package in itertools.chain([content.get("root", {})], content["package"]):
+            for dependency in package.get("dependencies", []):
+                parts = dependency.split()
+                dependency = (parts[0], parts[1] if len(parts) > 1 else None, parts[2] if len(parts) > 2 else None)
+                if dependency[0] == name:
+                    yield package["name"], package["version"], dependency
+
+    if not file_name.endswith(".lock"):
+        return
+
+    # Package names to be neglected (as named by cargo)
+    exceptions = config["ignore"]["packages"]
+
+    content = toml.loads(contents.decode("utf-8"))
+
+    packages_by_name = {}
+    for package in content.get("package", []):
+        if "replace" in package:
+            continue
+        source = package.get("source", "")
+        if source == r"registry+https://github.com/rust-lang/crates.io-index":
+            source = "crates.io"
+        packages_by_name.setdefault(package["name"], []).append((package["version"], source))
+
+    for name in exceptions:
+        if name not in packages_by_name:
+            yield (1, "duplicates are allowed for `{}` but it is not a dependency".format(name))
+
+    for (name, packages) in packages_by_name.items():
+        has_duplicates = len(packages) > 1
+        duplicates_allowed = name in exceptions
+
+        if has_duplicates == duplicates_allowed:
+            continue
+
+        if duplicates_allowed:
+            message = 'duplicates for `{}` are allowed, but only single version found'.format(name)
+        else:
+            message = "duplicate versions for package `{}`".format(name)
+
+        packages.sort()
+        packages_dependencies = list(find_reverse_dependencies(name, content))
+        for version, source in packages:
+            short_source = source.split("#")[0].replace("git+", "")
+            message += "\n\t\033[93mThe following packages depend on version {} from '{}':\033[0m" \
+                       .format(version, short_source)
+            for pname, package_version, dependency in packages_dependencies:
+                if (not dependency[1] or version in dependency[1]) and \
+                   (not dependency[2] or short_source in dependency[2]):
+                    message += "\n\t\t" + pname + " " + package_version
+        yield (1, message)
+
+    # Check to see if we are transitively using any blocked packages
+    blocked_packages = config["blocked-packages"]
+    # Create map to keep track of visited exception packages
+    visited_whitelisted_packages = {package: {} for package in blocked_packages.keys()}
+
+    for package in content.get("package", []):
+        package_name = package.get("name")
+        package_version = package.get("version")
+        for dependency in package.get("dependencies", []):
+            dependency = dependency.split()
+            dependency_name = dependency[0]
+            whitelist = blocked_packages.get(dependency_name)
+            if whitelist is not None:
+                if package_name not in whitelist:
+                    fmt = "Package {} {} depends on blocked package {}."
+                    message = fmt.format(package_name, package_version, dependency_name)
+                    yield (1, message)
+                else:
+                    visited_whitelisted_packages[dependency_name][package_name] = True
+
+    # Check if all the exceptions to blocked packages actually depend on the blocked package
+    for dependency_name, package_names in blocked_packages.items():
+        for package_name in package_names:
+            if not visited_whitelisted_packages[dependency_name].get(package_name):
+                fmt = "Package {} is not required to be an exception of blocked package {}."
+                message = fmt.format(package_name, dependency_name)
+                yield (1, message)
+
+
+def check_toml(file_name, lines):
+    if not file_name.endswith("Cargo.toml"):
+        return
+    ok_licensed = False
+    for idx, line in enumerate(map(lambda line: line.decode("utf-8"), lines)):
+        if idx == 0 and "[workspace]" in line:
+            return
+        line_without_comment, _, _ = line.partition("#")
+        if line_without_comment.find("*") != -1:
+            yield (idx + 1, "found asterisk instead of minimum version number")
+        for license_line in licenses_toml:
+            ok_licensed |= (license_line in line)
+    if not ok_licensed:
+        yield (0, ".toml file should contain a valid license.")
+
+
+def check_shell(file_name, lines):
+    if not file_name.endswith(".sh"):
+        return
+
+    shebang = "#!/usr/bin/env bash"
+    required_options = ["set -o errexit", "set -o nounset", "set -o pipefail"]
+
+    did_shebang_check = False
+
+    if not lines:
+        yield (0, 'script is an empty file')
+        return
+
+    if lines[0].rstrip() != shebang.encode("utf-8"):
+        yield (1, 'script does not have shebang "{}"'.format(shebang))
+
+    for idx, line in enumerate(map(lambda line: line.decode("utf-8"), lines[1:])):
+        stripped = line.rstrip()
+        # Comments or blank lines are ignored. (Trailing whitespace is caught with a separate linter.)
+        if line.startswith("#") or stripped == "":
+            continue
+
+        if not did_shebang_check:
+            if stripped in required_options:
+                required_options.remove(stripped)
+            else:
+                # The first non-comment, non-whitespace, non-option line is the first "real" line of the script.
+                # The shebang, options, etc. must come before this.
+                if required_options:
+                    formatted = ['"{}"'.format(opt) for opt in required_options]
+                    yield (idx + 1, "script is missing options {}".format(", ".join(formatted)))
+                did_shebang_check = True
+
+        if "`" in stripped:
+            yield (idx + 1, "script should not use backticks for command substitution")
+
+        if " [ " in stripped or stripped.startswith("[ "):
+            yield (idx + 1, "script should use `[[` instead of `[` for conditional testing")
+
+        for dollar in re.finditer(r'\$', stripped):
+            next_idx = dollar.end()
+            if next_idx < len(stripped):
+                next_char = stripped[next_idx]
+                if not (next_char == '{' or next_char == '('):
+                    yield(idx + 1, "variable substitutions should use the full \"${VAR}\" form")
+
+
+def rec_parse(current_path, root_node):
+    dirs = []
+    for item in root_node.children:
+        if isinstance(item, node.DataNode):
+            next_depth = os.path.join(current_path, item.data)
+            dirs.append(next_depth)
+            dirs += rec_parse(next_depth, item)
+    return dirs
+
+
+def check_manifest_dirs(config_file, print_text=True):
+    if not os.path.exists(config_file):
+        yield(config_file, 0, "%s manifest file is required but was not found" % config_file)
+        return
+
+    # Load configs from include.ini
+    with open(config_file, "rb") as content:
+        conf_file = content.read()
+        lines = conf_file.splitlines(True)
+
+    if print_text:
+        print('\rChecking the wpt manifest file...')
+
+    p = parser.parse(lines)
+    paths = rec_parse(wpt_path("web-platform-tests"), p)
+    for idx, path in enumerate(paths):
+        if '_mozilla' in path or '_webgl' in path or '_webgpu' in path:
+            continue
+        if not os.path.isdir(path):
+            yield(config_file, idx + 1, "Path in manifest was not found: {}".format(path))
+
+
+def check_rust(file_name, lines):
+    if not file_name.endswith(".rs") or \
+       file_name.endswith(".mako.rs") or \
+       file_name.endswith(os.path.join("style", "build.rs")) or \
+       file_name.endswith(os.path.join("unit", "style", "stylesheets.rs")):
+        return
+
+    comment_depth = 0
+    merged_lines = ''
+    import_block = False
+    whitespace = False
+
+    is_lib_rs_file = file_name.endswith("lib.rs")
+
+    PANIC_NOT_ALLOWED_PATHS = [
+        os.path.join("*", "components", "compositing", "compositor.rs"),
+        os.path.join("*", "components", "constellation", "*"),
+        os.path.join("*", "ports", "winit", "headed_window.rs"),
+        os.path.join("*", "ports", "winit", "headless_window.rs"),
+        os.path.join("*", "ports", "winit", "embedder.rs"),
+        os.path.join("*", "rust_tidy.rs"),  # This is for the tests.
+    ]
+    is_panic_not_allowed_rs_file = any([
+        glob.fnmatch.fnmatch(file_name, path) for path in PANIC_NOT_ALLOWED_PATHS])
+
+    prev_open_brace = False
+    multi_line_string = False
+    prev_crate = {}
+    prev_mod = {}
+    prev_feature_name = ""
+    indent = 0
+
+    check_alphabetical_order = config["check-alphabetical-order"]
+    decl_message = "{} is not in alphabetical order"
+    decl_expected = "\n\t\033[93mexpected: {}\033[0m"
+    decl_found = "\n\t\033[91mfound: {}\033[0m"
+    panic_message = "unwrap() or panic!() found in code which should not panic."
+
+    for idx, original_line in enumerate(map(lambda line: line.decode("utf-8"), lines)):
+        # simplify the analysis
+        line = original_line.strip()
+        indent = len(original_line) - len(line)
+
+        is_attribute = re.search(r"#\[.*\]", line)
+        is_comment = re.search(r"^//|^/\*|^\*", line)
+
+        # Simple heuristic to avoid common case of no comments.
+        if '/' in line:
+            comment_depth += line.count('/*')
+            comment_depth -= line.count('*/')
+
+        if line.endswith('\\'):
+            merged_lines += line[:-1]
+            continue
+        if comment_depth:
+            merged_lines += line
+            continue
+        if merged_lines:
+            line = merged_lines + line
+            merged_lines = ''
+
+        if multi_line_string:
+            line, count = re.subn(
+                r'^(\\.|[^"\\])*?"', '', line, count=1)
+            if count == 1:
+                multi_line_string = False
+            else:
+                continue
+
+        # Ignore attributes, comments, and imports
+        # Keep track of whitespace to enable checking for a merged import block
+        if import_block:
+            if not (is_comment or is_attribute or line.startswith("use ")):
+                whitespace = line == ""
+
+                if not whitespace:
+                    import_block = False
+
+        # get rid of strings and chars because cases like regex expression, keep attributes
+        if not is_attribute and not is_comment:
+            line = re.sub(r'"(\\.|[^\\"])*?"', '""', line)
+            line = re.sub(
+                r"'(\\.|[^\\']|(\\x[0-9a-fA-F]{2})|(\\u{[0-9a-fA-F]{1,6}}))'",
+                "''", line)
+            # If, after parsing all single-line strings, we still have
+            # an odd number of double quotes, this line starts a
+            # multiline string
+            if line.count('"') % 2 == 1:
+                line = re.sub(r'"(\\.|[^\\"])*?$', '""', line)
+                multi_line_string = True
+
+        # get rid of comments
+        line = re.sub(r'//.*?$|/\*.*?$|^\*.*?$', '//', line)
+
+        # get rid of attributes that do not contain =
+        line = re.sub(r'^#[A-Za-z0-9\(\)\[\]_]*?$', '#[]', line)
+
+        # flag this line if it matches one of the following regular expressions
+        # tuple format: (pattern, format_message, filter_function(match, line))
+        def no_filter(match, line):
+            return True
+        regex_rules = [
+            # There should not be any extra pointer dereferencing
+            (r": &Vec<", "use &[T] instead of &Vec<T>", no_filter),
+            # No benefit over using &str
+            (r": &String", "use &str instead of &String", no_filter),
+            # There should be any use of banned types:
+            # Cell<JSVal>, Cell<Dom<T>>, DomRefCell<Dom<T>>, DomRefCell<HEAP<T>>
+            (r"(\s|:)+Cell<JSVal>", "Banned type Cell<JSVal> detected. Use MutDom<JSVal> instead", no_filter),
+            (r"(\s|:)+Cell<Dom<.+>>", "Banned type Cell<Dom<T>> detected. Use MutDom<T> instead", no_filter),
+            (r"DomRefCell<Dom<.+>>", "Banned type DomRefCell<Dom<T>> detected. Use MutDom<T> instead", no_filter),
+            (r"DomRefCell<Heap<.+>>", "Banned type DomRefCell<Heap<T>> detected. Use MutDom<T> instead", no_filter),
+            # No benefit to using &Root<T>
+            (r": &Root<", "use &T instead of &Root<T>", no_filter),
+            (r": &DomRoot<", "use &T instead of &DomRoot<T>", no_filter),
+            (r"^&&", "operators should go at the end of the first line", no_filter),
+            # -> () is unnecessary
+            (r"-> \(\)", "encountered function signature with -> ()", no_filter),
+        ]
+
+        for pattern, message, filter_func in regex_rules:
+            for match in re.finditer(pattern, line):
+                if filter_func(match, line):
+                    yield (idx + 1, message.format(*match.groups(), **match.groupdict()))
+
+        if prev_open_brace and not line:
+            yield (idx + 1, "found an empty line following a {")
+        prev_open_brace = line.endswith("{")
+
+        # check alphabetical order of extern crates
+        if line.startswith("extern crate "):
+            # strip "extern crate " from the begin and ";" from the end
+            crate_name = line[13:-1]
+            if indent not in prev_crate:
+                prev_crate[indent] = ""
+            if prev_crate[indent] > crate_name and check_alphabetical_order:
+                yield(idx + 1, decl_message.format("extern crate declaration")
+                      + decl_expected.format(prev_crate[indent])
+                      + decl_found.format(crate_name))
+            prev_crate[indent] = crate_name
+
+        if line == "}":
+            for i in [i for i in prev_crate.keys() if i > indent]:
+                del prev_crate[i]
+
+        # check alphabetical order of feature attributes in lib.rs files
+        if is_lib_rs_file:
+            match = re.search(r"#!\[feature\((.*)\)\]", line)
+
+            if match:
+                features = list(map(lambda w: w.strip(), match.group(1).split(',')))
+                sorted_features = sorted(features)
+                if sorted_features != features and check_alphabetical_order:
+                    yield(idx + 1, decl_message.format("feature attribute")
+                          + decl_expected.format(tuple(sorted_features))
+                          + decl_found.format(tuple(features)))
+
+                if prev_feature_name > sorted_features[0] and check_alphabetical_order:
+                    yield(idx + 1, decl_message.format("feature attribute")
+                          + decl_expected.format(prev_feature_name + " after " + sorted_features[0])
+                          + decl_found.format(prev_feature_name + " before " + sorted_features[0]))
+
+                prev_feature_name = sorted_features[0]
+            else:
+                # not a feature attribute line, so empty previous name
+                prev_feature_name = ""
+
+        if is_panic_not_allowed_rs_file:
+            match = re.search(r"unwrap\(|panic!\(", line)
+            if match:
+                yield (idx + 1, panic_message)
+
+        # modules must be in the same line and alphabetically sorted
+        if line.startswith("mod ") or line.startswith("pub mod "):
+            # strip /(pub )?mod/ from the left and ";" from the right
+            mod = line[4:-1] if line.startswith("mod ") else line[8:-1]
+
+            if (idx - 1) < 0 or "#[macro_use]" not in lines[idx - 1].decode("utf-8"):
+                match = line.find(" {")
+                if indent not in prev_mod:
+                    prev_mod[indent] = ""
+                if match == -1 and not line.endswith(";"):
+                    yield (idx + 1, "mod declaration spans multiple lines")
+                if prev_mod[indent] and mod < prev_mod[indent] and check_alphabetical_order:
+                    yield(idx + 1, decl_message.format("mod declaration")
+                          + decl_expected.format(prev_mod[indent])
+                          + decl_found.format(mod))
+                prev_mod[indent] = mod
+        else:
+            # we now erase previous entries
+            prev_mod = {}
+
+        # derivable traits should be alphabetically ordered
+        if is_attribute:
+            # match the derivable traits filtering out macro expansions
+            match = re.search(r"#\[derive\(([a-zA-Z, ]*)", line)
+            if match:
+                derives = list(map(lambda w: w.strip(), match.group(1).split(',')))
+                # sort, compare and report
+                sorted_derives = sorted(derives)
+                if sorted_derives != derives and check_alphabetical_order:
+                    yield(idx + 1, decl_message.format("derivable traits list")
+                              + decl_expected.format(", ".join(sorted_derives))
+                              + decl_found.format(", ".join(derives)))
+
+
+# Avoid flagging <Item=Foo> constructs
+def is_associated_type(match, line):
+    if match.group(1) != '=':
+        return False
+    open_angle = line[0:match.end()].rfind('<')
+    close_angle = line[open_angle:].find('>') if open_angle != -1 else -1
+    generic_open = open_angle != -1 and open_angle < match.start()
+    generic_close = close_angle != -1 and close_angle + open_angle >= match.end()
+    return generic_open and generic_close
+
+
+def check_webidl_spec(file_name, contents):
+    # Sorted by this function (in pseudo-Rust). The idea is to group the same
+    # organization together.
+    # fn sort_standards(a: &Url, b: &Url) -> Ordering {
+    #     let a_domain = a.domain().split(".");
+    #     a_domain.pop();
+    #     a_domain.reverse();
+    #     let b_domain = b.domain().split(".");
+    #     b_domain.pop();
+    #     b_domain.reverse();
+    #     for i in a_domain.into_iter().zip(b_domain.into_iter()) {
+    #         match i.0.cmp(b.0) {
+    #             Less => return Less,
+    #             Greater => return Greater,
+    #             _ => (),
+    #         }
+    #     }
+    #     a_domain.path().cmp(b_domain.path())
+    # }
+
+    if not file_name.endswith(".webidl"):
+        return
+
+    for i in WEBIDL_STANDARDS:
+        if contents.find(i) != -1:
+            return
+    yield (0, "No specification link found.")
+
+
+def duplicate_key_yaml_constructor(loader, node, deep=False):
+    mapping = {}
+    for key_node, value_node in node.value:
+        key = loader.construct_object(key_node, deep=deep)
+        if key in mapping:
+            raise KeyError(key)
+        value = loader.construct_object(value_node, deep=deep)
+        mapping[key] = value
+    return loader.construct_mapping(node, deep)
+
+
+def lint_buildbot_steps_yaml(mapping):
+    from voluptuous import Any, Extra, Required, Schema
+
+    # Note: dictionary keys are optional by default in voluptuous
+    env = Schema({Extra: str})
+    commands = Schema([str])
+    schema = Schema({
+        'env': env,
+        Extra: Any(
+            commands,
+            {
+                'env': env,
+                Required('commands'): commands,
+            },
+        ),
+    })
+
+    # Signals errors via exception throwing
+    schema(mapping)
+
+
+class SafeYamlLoader(yaml.SafeLoader):
+    """Subclass of yaml.SafeLoader to avoid mutating the global SafeLoader."""
+    pass
+
+
+def check_yaml(file_name, contents):
+    if not file_name.endswith("buildbot_steps.yml"):
+        return
+
+    # YAML specification doesn't explicitly disallow
+    # duplicate keys, but they shouldn't be allowed in
+    # buildbot_steps.yml as it could lead to confusion
+    SafeYamlLoader.add_constructor(
+        yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG,
+        duplicate_key_yaml_constructor
+    )
+
+    try:
+        contents = yaml.load(contents, Loader=SafeYamlLoader)
+        lint_buildbot_steps_yaml(contents)
+    except yaml.YAMLError as e:
+        line = e.problem_mark.line + 1 if hasattr(e, 'problem_mark') else None
+        yield (line, e)
+    except KeyError as e:
+        yield (None, "Duplicated Key ({})".format(e.args[0]))
+    except voluptuous.MultipleInvalid as e:
+        yield (None, str(e))
+
+
+def check_for_possible_duplicate_json_keys(key_value_pairs):
+    keys = [x[0] for x in key_value_pairs]
+    seen_keys = set()
+    for key in keys:
+        if key in seen_keys:
+            raise KeyError("Duplicated Key (%s)" % key)
+
+        seen_keys.add(key)
+
+
+def check_for_alphabetical_sorted_json_keys(key_value_pairs):
+    for a, b in zip(key_value_pairs[:-1], key_value_pairs[1:]):
+        if a[0] > b[0]:
+            raise KeyError("Unordered key (found %s before %s)" % (a[0], b[0]))
+
+
+def check_json_requirements(filename):
+    def check_fn(key_value_pairs):
+        check_for_possible_duplicate_json_keys(key_value_pairs)
+        if filename in normilize_paths(config["check-ordered-json-keys"]):
+            check_for_alphabetical_sorted_json_keys(key_value_pairs)
+    return check_fn
+
+
+def check_json(filename, contents):
+    if not filename.endswith(".json"):
+        return
+
+    try:
+        json.loads(contents, object_pairs_hook=check_json_requirements(filename))
+    except ValueError as e:
+        match = re.search(r"line (\d+) ", e.args[0])
+        line_no = match and match.group(1)
+        yield (line_no, e.args[0])
+    except KeyError as e:
+        yield (None, e.args[0])
+
+
+def check_spec(file_name, lines):
+    if SPEC_BASE_PATH not in file_name:
+        return
+    file_name = os.path.relpath(os.path.splitext(file_name)[0], SPEC_BASE_PATH)
+    patt = re.compile(r"^\s*\/\/.+")
+
+    # Pattern representing a line with a macro
+    macro_patt = re.compile(r"^\s*\S+!(.*)$")
+
+    # Pattern representing a line with comment containing a spec link
+    link_patt = re.compile(r"^\s*///? (<https://.+>|https://.+)$")
+
+    # Pattern representing a line with comment or attribute
+    comment_patt = re.compile(r"^\s*(///?.+|#\[.+\])$")
+
+    brace_count = 0
+    in_impl = False
+    pattern = "impl {}Methods for {} {{".format(file_name, file_name)
+
+    for idx, line in enumerate(map(lambda line: line.decode("utf-8"), lines)):
+        if "// check-tidy: no specs after this line" in line:
+            break
+        if not patt.match(line):
+            if pattern.lower() in line.lower():
+                in_impl = True
+            if ("fn " in line or macro_patt.match(line)) and brace_count == 1:
+                for up_idx in range(1, idx + 1):
+                    up_line = lines[idx - up_idx].decode("utf-8")
+                    if link_patt.match(up_line):
+                        # Comment with spec link exists
+                        break
+                    if not comment_patt.match(up_line):
+                        # No more comments exist above, yield warning
+                        yield (idx + 1, "method declared in webidl is missing a comment with a specification link")
+                        break
+            if in_impl:
+                brace_count += line.count('{')
+                brace_count -= line.count('}')
+                if brace_count < 1:
+                    break
+
+
+def check_config_file(config_file, print_text=True, no_wpt=False):
+    # Check if config file exists
+    if not os.path.exists(config_file):
+        print("%s config file is required but was not found" % config_file)
+        sys.exit(1)
+
+    # Load configs from servo-tidy.toml
+    with open(config_file) as content:
+        conf_file = content.read()
+        lines = conf_file.splitlines(True)
+
+    if print_text:
+        print('\rChecking the config file...')
+
+    config_content = toml.loads(conf_file)
+    exclude = config_content.get("ignore", {})
+
+    # Check for invalid listed ignored directories
+    exclude_dirs = [d for p in exclude.get("directories", []) for d in (glob.glob(p) or [p])]
+    skip_dirs = ["./target", "./tests"]
+    invalid_dirs = [d for d in exclude_dirs if not os.path.isdir(d) and not any(s in d for s in skip_dirs)]
+
+    # Check for invalid listed ignored files
+    invalid_files = [f for f in exclude.get("files", []) if not os.path.exists(f)]
+
+    # Do not check for the existense of ignored files under tests/wpts if --no-wpt is used
+    if no_wpt:
+        wpt_dir = './tests/wpt/'
+        invalid_files = [f for f in invalid_files if not os.path.commonprefix([wpt_dir, f]) == wpt_dir]
+
+    current_table = ""
+    for idx, line in enumerate(lines):
+        # Ignore comment lines
+        if line.strip().startswith("#"):
+            continue
+
+        # Check for invalid tables
+        if re.match(r"\[(.*?)\]", line.strip()):
+            table_name = re.findall(r"\[(.*?)\]", line)[0].strip()
+            if table_name not in ("configs", "blocked-packages", "ignore", "check_ext"):
+                yield config_file, idx + 1, "invalid config table [%s]" % table_name
+            current_table = table_name
+            continue
+
+        # Print invalid listed ignored directories
+        if current_table == "ignore" and invalid_dirs:
+            for d in invalid_dirs:
+                if line.strip().strip('\'",') == d:
+                    yield config_file, idx + 1, "ignored directory '%s' doesn't exist" % d
+                    invalid_dirs.remove(d)
+                    break
+
+        # Print invalid listed ignored files
+        if current_table == "ignore" and invalid_files:
+            for f in invalid_files:
+                if line.strip().strip('\'",') == f:
+                    yield config_file, idx + 1, "ignored file '%s' doesn't exist" % f
+                    invalid_files.remove(f)
+                    break
+
+        # Skip if there is no equal sign in line, assuming it's not a key
+        if "=" not in line:
+            continue
+
+        key = line.split("=")[0].strip()
+
+        # Check for invalid keys inside [configs] and [ignore] table
+        if (current_table == "configs" and key not in config
+                or current_table == "ignore" and key not in config["ignore"]
+                # Any key outside of tables
+                or current_table == ""):
+            yield config_file, idx + 1, "invalid config key '%s'" % key
+
+    # Parse config file
+    parse_config(config_content)
+
+
+def parse_config(config_file):
+    exclude = config_file.get("ignore", {})
+    # Add list of ignored directories to config
+    ignored_directories = [d for p in exclude.get("directories", []) for d in (glob.glob(p) or [p])]
+    config["ignore"]["directories"] += normilize_paths(ignored_directories)
+    # Add list of ignored files to config
+    config["ignore"]["files"] += normilize_paths(exclude.get("files", []))
+    # Add list of ignored packages to config
+    config["ignore"]["packages"] = exclude.get("packages", [])
+
+    # Add dict of dir, list of expected ext to config
+    dirs_to_check = config_file.get("check_ext", {})
+    # Fix the paths (OS-dependent)
+    for path, exts in dirs_to_check.items():
+        config['check_ext'][normilize_paths(path)] = exts
+
+    # Add list of blocked packages
+    config["blocked-packages"] = config_file.get("blocked-packages", {})
+
+    # Override default configs
+    user_configs = config_file.get("configs", [])
+    for pref in user_configs:
+        if pref in config:
+            config[pref] = user_configs[pref]
+
+
+def check_directory_files(directories, print_text=True):
+    if print_text:
+        print('\rChecking directories for correct file extensions...')
+    for directory, file_extensions in directories.items():
+        files = sorted(os.listdir(directory))
+        for filename in files:
+            if not any(filename.endswith(ext) for ext in file_extensions):
+                details = {
+                    "name": os.path.basename(filename),
+                    "ext": ", ".join(file_extensions),
+                    "dir_name": directory
+                }
+                message = '''Unexpected extension found for {name}. \
+We only expect files with {ext} extensions in {dir_name}'''.format(**details)
+                yield (filename, 1, message)
+
+
+def collect_errors_for_files(files_to_check, checking_functions, line_checking_functions, print_text=True):
+    (has_element, files_to_check) = is_iter_empty(files_to_check)
+    if not has_element:
+        return
+    if print_text:
+        print('\rChecking files for tidiness...')
+
+    for filename in files_to_check:
+        if not os.path.exists(filename):
+            continue
+        with open(filename, "rb") as f:
+            contents = f.read()
+            if not contents.strip():
+                yield filename, 0, "file is empty"
+                continue
+            for check in checking_functions:
+                for error in check(filename, contents):
+                    # the result will be: `(filename, line, message)`
+                    yield (filename,) + error
+            lines = contents.splitlines(True)
+            for check in line_checking_functions:
+                for error in check(filename, lines):
+                    yield (filename,) + error
+
+
+def get_dep_toml_files(only_changed_files=False):
+    if not only_changed_files:
+        print('\nRunning the dependency licensing lint...')
+        for root, directories, filenames in os.walk(".cargo"):
+            for filename in filenames:
+                if filename == "Cargo.toml":
+                    yield os.path.join(root, filename)
+
+
+def check_dep_license_errors(filenames, progress=True):
+    filenames = progress_wrapper(filenames) if progress else filenames
+    for filename in filenames:
+        with open(filename, "r") as f:
+            ok_licensed = False
+            lines = f.readlines()
+            for idx, line in enumerate(lines):
+                for license_line in licenses_dep_toml:
+                    ok_licensed |= (license_line in line)
+            if not ok_licensed:
+                yield (filename, 0, "dependency should contain a valid license.")
+
+
+class LintRunner(object):
+    def __init__(self, lint_path=None, only_changed_files=True,
+                 exclude_dirs=[], progress=True, stylo=False, no_wpt=False):
+        self.only_changed_files = only_changed_files
+        self.exclude_dirs = exclude_dirs
+        self.progress = progress
+        self.path = lint_path
+        self.stylo = stylo
+        self.no_wpt = no_wpt
+
+    def check(self, lint_cls):
+        lint = lint_cls(self.path, self.only_changed_files,
+                        self.exclude_dirs, self.progress,
+                        stylo=self.stylo, no_wpt=self.no_wpt)
+        for error in lint.run():
+            if type(error) is not tuple or (type(error) is tuple and len(error) != 3):
+                yield (self.path, 1, "errors should be a tuple of (path, line, reason)")
+                return
+            yield error
+
+    def get_files(self, path, **kwargs):
+        args = ['only_changed_files', 'exclude_dirs', 'progress']
+        kwargs = {k: kwargs.get(k, getattr(self, k)) for k in args}
+        return FileList(path, **kwargs)
+
+    def run(self):
+        yield (self.path, 0, "class 'Lint' should implement 'run' method")
+
+
+def run_lint_scripts(only_changed_files=False, progress=True, stylo=False, no_wpt=False):
+    runner = LintRunner(only_changed_files=only_changed_files, progress=progress, stylo=stylo, no_wpt=no_wpt)
+    for error in runner.check(WPTLint):
+        yield error
+
+
+def scan(only_changed_files=False, progress=True, stylo=False, no_wpt=False):
+    # check config file for errors
+    config_errors = check_config_file(CONFIG_FILE_PATH, no_wpt=no_wpt)
+    # check ini directories exist
+    if not no_wpt and os.path.isfile(WPT_MANIFEST_PATH):
+        manifest_errors = check_manifest_dirs(WPT_MANIFEST_PATH)
+    else:
+        manifest_errors = ()
+    # check directories contain expected files
+    directory_errors = check_directory_files(config['check_ext'])
+    # standard checks
+    files_to_check = filter_files('.', only_changed_files and not stylo, progress)
+    checking_functions = (check_flake8, check_lock, check_webidl_spec, check_json, check_yaml)
+    line_checking_functions = (check_license, check_by_line, check_toml, check_shell,
+                               check_rust, check_spec, check_modeline)
+    file_errors = collect_errors_for_files(files_to_check, checking_functions, line_checking_functions)
+    # check dependecy licenses
+    dep_license_errors = check_dep_license_errors(get_dep_toml_files(only_changed_files), progress)
+    # other lint checks
+    lint_errors = run_lint_scripts(only_changed_files, progress, stylo=stylo, no_wpt=no_wpt)
+    # chain all the iterators
+    errors = itertools.chain(config_errors, manifest_errors, directory_errors, lint_errors,
+                             file_errors, dep_license_errors)
+
+    error = None
+    colorama.init()
+    for error in errors:
+        print("\r\033[94m{}\033[0m:\033[93m{}\033[0m: \033[91m{}\033[0m".format(*error))
+
+    print()
+    if error is None:
+        print("\033[92mtidy reported no errors.\033[0m")
+
+    return int(error is not None)
+
+
+class WPTLint(LintRunner):
+    def _get_wpt_files(self, suite):
+        working_dir = os.path.join(WPT_PATH, suite, '')
+        file_iter = self.get_files(working_dir, exclude_dirs=[])
+        print('\nRunning the WPT lint on %s...' % working_dir)
+        for f in file_iter:
+            if filter_file(f):
+                yield f[len(working_dir):]
+
+    def run(self):
+        if self.stylo or self.no_wpt:
+            return
+
+        wpt_working_dir = os.path.abspath(os.path.join(WPT_PATH, "web-platform-tests"))
+        for suite in SUITES:
+            files = list(self._get_wpt_files(suite))
+            if not files:
+                continue
+            sys.path.insert(0, wpt_working_dir)
+            from tools.lint import lint
+            file_dir = os.path.abspath(os.path.join(WPT_PATH, suite))
+            returncode = lint.lint(file_dir, files, output_format="json")
+            sys.path.remove(wpt_working_dir)
+            if returncode:
+                yield ("WPT Lint Tool", "", "lint error(s) in Web Platform Tests: exit status %s" % returncode)