diff options
author | WPT Sync Bot <josh+wptsync@joshmatthews.net> | 2018-06-08 21:05:21 -0400 |
---|---|---|
committer | WPT Sync Bot <josh+wptsync@joshmatthews.net> | 2018-06-08 22:44:24 -0400 |
commit | fe00a63040da3ead9ef3703d5094d3f1e36bffd4 (patch) | |
tree | 6c387c7863764aa4f74b58722373f6ac6e14b209 /tests/wpt/web-platform-tests/tools/html5lib/parse.py | |
parent | 527d874bc107039156d40d25b2a76f4a28eb639e (diff) | |
download | servo-fe00a63040da3ead9ef3703d5094d3f1e36bffd4.tar.gz servo-fe00a63040da3ead9ef3703d5094d3f1e36bffd4.zip |
Update web-platform-tests to revision 132d12daea699ce266324e79eecbe59b10e56502
Diffstat (limited to 'tests/wpt/web-platform-tests/tools/html5lib/parse.py')
-rwxr-xr-x | tests/wpt/web-platform-tests/tools/html5lib/parse.py | 233 |
1 files changed, 0 insertions, 233 deletions
diff --git a/tests/wpt/web-platform-tests/tools/html5lib/parse.py b/tests/wpt/web-platform-tests/tools/html5lib/parse.py deleted file mode 100755 index 9cbf3b8d1cc..00000000000 --- a/tests/wpt/web-platform-tests/tools/html5lib/parse.py +++ /dev/null @@ -1,233 +0,0 @@ -#!/usr/bin/env python -"""usage: %prog [options] filename - -Parse a document to a tree, with optional profiling -""" - -import sys -import os -import traceback -from optparse import OptionParser - -from html5lib import html5parser, sanitizer -from html5lib.tokenizer import HTMLTokenizer -from html5lib import treebuilders, serializer, treewalkers -from html5lib import constants - -def parse(): - optParser = getOptParser() - opts,args = optParser.parse_args() - encoding = "utf8" - - try: - f = args[-1] - # Try opening from the internet - if f.startswith('http://'): - try: - import urllib.request, urllib.parse, urllib.error, cgi - f = urllib.request.urlopen(f) - contentType = f.headers.get('content-type') - if contentType: - (mediaType, params) = cgi.parse_header(contentType) - encoding = params.get('charset') - except: - pass - elif f == '-': - f = sys.stdin - if sys.version_info[0] >= 3: - encoding = None - else: - try: - # Try opening from file system - f = open(f, "rb") - except IOError as e: - sys.stderr.write("Unable to open file: %s\n" % e) - sys.exit(1) - except IndexError: - sys.stderr.write("No filename provided. Use -h for help\n") - sys.exit(1) - - treebuilder = treebuilders.getTreeBuilder(opts.treebuilder) - - if opts.sanitize: - tokenizer = sanitizer.HTMLSanitizer - else: - tokenizer = HTMLTokenizer - - p = html5parser.HTMLParser(tree=treebuilder, tokenizer=tokenizer, debug=opts.log) - - if opts.fragment: - parseMethod = p.parseFragment - else: - parseMethod = p.parse - - if opts.profile: - import cProfile - import pstats - cProfile.runctx("run(parseMethod, f, encoding)", None, - {"run": run, - "parseMethod": parseMethod, - "f": f, - "encoding": encoding}, - "stats.prof") - # XXX - We should use a temp file here - stats = pstats.Stats('stats.prof') - stats.strip_dirs() - stats.sort_stats('time') - stats.print_stats() - elif opts.time: - import time - t0 = time.time() - document = run(parseMethod, f, encoding) - t1 = time.time() - if document: - printOutput(p, document, opts) - t2 = time.time() - sys.stderr.write("\n\nRun took: %fs (plus %fs to print the output)"%(t1-t0, t2-t1)) - else: - sys.stderr.write("\n\nRun took: %fs"%(t1-t0)) - else: - document = run(parseMethod, f, encoding) - if document: - printOutput(p, document, opts) - -def run(parseMethod, f, encoding): - try: - document = parseMethod(f, encoding=encoding) - except: - document = None - traceback.print_exc() - return document - -def printOutput(parser, document, opts): - if opts.encoding: - print("Encoding:", parser.tokenizer.stream.charEncoding) - - for item in parser.log: - print(item) - - if document is not None: - if opts.xml: - sys.stdout.write(document.toxml("utf-8")) - elif opts.tree: - if not hasattr(document,'__getitem__'): - document = [document] - for fragment in document: - print(parser.tree.testSerializer(fragment)) - elif opts.hilite: - sys.stdout.write(document.hilite("utf-8")) - elif opts.html: - kwargs = {} - for opt in serializer.HTMLSerializer.options: - try: - kwargs[opt] = getattr(opts,opt) - except: - pass - if not kwargs['quote_char']: - del kwargs['quote_char'] - - tokens = treewalkers.getTreeWalker(opts.treebuilder)(document) - if sys.version_info[0] >= 3: - encoding = None - else: - encoding = "utf-8" - for text in serializer.HTMLSerializer(**kwargs).serialize(tokens, encoding=encoding): - sys.stdout.write(text) - if not text.endswith('\n'): sys.stdout.write('\n') - if opts.error: - errList=[] - for pos, errorcode, datavars in parser.errors: - errList.append("Line %i Col %i"%pos + " " + constants.E.get(errorcode, 'Unknown error "%s"' % errorcode) % datavars) - sys.stdout.write("\nParse errors:\n" + "\n".join(errList)+"\n") - -def getOptParser(): - parser = OptionParser(usage=__doc__) - - parser.add_option("-p", "--profile", action="store_true", default=False, - dest="profile", help="Use the hotshot profiler to " - "produce a detailed log of the run") - - parser.add_option("-t", "--time", - action="store_true", default=False, dest="time", - help="Time the run using time.time (may not be accurate on all platforms, especially for short runs)") - - parser.add_option("-b", "--treebuilder", action="store", type="string", - dest="treebuilder", default="simpleTree") - - parser.add_option("-e", "--error", action="store_true", default=False, - dest="error", help="Print a list of parse errors") - - parser.add_option("-f", "--fragment", action="store_true", default=False, - dest="fragment", help="Parse as a fragment") - - parser.add_option("", "--tree", action="store_true", default=False, - dest="tree", help="Output as debug tree") - - parser.add_option("-x", "--xml", action="store_true", default=False, - dest="xml", help="Output as xml") - - parser.add_option("", "--no-html", action="store_false", default=True, - dest="html", help="Don't output html") - - parser.add_option("", "--hilite", action="store_true", default=False, - dest="hilite", help="Output as formatted highlighted code.") - - parser.add_option("-c", "--encoding", action="store_true", default=False, - dest="encoding", help="Print character encoding used") - - parser.add_option("", "--inject-meta-charset", action="store_true", - default=False, dest="inject_meta_charset", - help="inject <meta charset>") - - parser.add_option("", "--strip-whitespace", action="store_true", - default=False, dest="strip_whitespace", - help="strip whitespace") - - parser.add_option("", "--omit-optional-tags", action="store_true", - default=False, dest="omit_optional_tags", - help="omit optional tags") - - parser.add_option("", "--quote-attr-values", action="store_true", - default=False, dest="quote_attr_values", - help="quote attribute values") - - parser.add_option("", "--use-best-quote-char", action="store_true", - default=False, dest="use_best_quote_char", - help="use best quote character") - - parser.add_option("", "--quote-char", action="store", - default=None, dest="quote_char", - help="quote character") - - parser.add_option("", "--no-minimize-boolean-attributes", - action="store_false", default=True, - dest="minimize_boolean_attributes", - help="minimize boolean attributes") - - parser.add_option("", "--use-trailing-solidus", action="store_true", - default=False, dest="use_trailing_solidus", - help="use trailing solidus") - - parser.add_option("", "--space-before-trailing-solidus", - action="store_true", default=False, - dest="space_before_trailing_solidus", - help="add space before trailing solidus") - - parser.add_option("", "--escape-lt-in-attrs", action="store_true", - default=False, dest="escape_lt_in_attrs", - help="escape less than signs in attribute values") - - parser.add_option("", "--escape-rcdata", action="store_true", - default=False, dest="escape_rcdata", - help="escape rcdata element values") - - parser.add_option("", "--sanitize", action="store_true", default=False, - dest="sanitize", help="sanitize") - - parser.add_option("-l", "--log", action="store_true", default=False, - dest="log", help="log state transitions") - - return parser - -if __name__ == "__main__": - parse() |