aboutsummaryrefslogtreecommitdiffstats
path: root/tests/wpt/web-platform-tests/tools/html5lib/parse.py
diff options
context:
space:
mode:
authorWPT Sync Bot <josh+wptsync@joshmatthews.net>2018-06-08 21:05:21 -0400
committerWPT Sync Bot <josh+wptsync@joshmatthews.net>2018-06-08 22:44:24 -0400
commitfe00a63040da3ead9ef3703d5094d3f1e36bffd4 (patch)
tree6c387c7863764aa4f74b58722373f6ac6e14b209 /tests/wpt/web-platform-tests/tools/html5lib/parse.py
parent527d874bc107039156d40d25b2a76f4a28eb639e (diff)
downloadservo-fe00a63040da3ead9ef3703d5094d3f1e36bffd4.tar.gz
servo-fe00a63040da3ead9ef3703d5094d3f1e36bffd4.zip
Update web-platform-tests to revision 132d12daea699ce266324e79eecbe59b10e56502
Diffstat (limited to 'tests/wpt/web-platform-tests/tools/html5lib/parse.py')
-rwxr-xr-xtests/wpt/web-platform-tests/tools/html5lib/parse.py233
1 files changed, 0 insertions, 233 deletions
diff --git a/tests/wpt/web-platform-tests/tools/html5lib/parse.py b/tests/wpt/web-platform-tests/tools/html5lib/parse.py
deleted file mode 100755
index 9cbf3b8d1cc..00000000000
--- a/tests/wpt/web-platform-tests/tools/html5lib/parse.py
+++ /dev/null
@@ -1,233 +0,0 @@
-#!/usr/bin/env python
-"""usage: %prog [options] filename
-
-Parse a document to a tree, with optional profiling
-"""
-
-import sys
-import os
-import traceback
-from optparse import OptionParser
-
-from html5lib import html5parser, sanitizer
-from html5lib.tokenizer import HTMLTokenizer
-from html5lib import treebuilders, serializer, treewalkers
-from html5lib import constants
-
-def parse():
- optParser = getOptParser()
- opts,args = optParser.parse_args()
- encoding = "utf8"
-
- try:
- f = args[-1]
- # Try opening from the internet
- if f.startswith('http://'):
- try:
- import urllib.request, urllib.parse, urllib.error, cgi
- f = urllib.request.urlopen(f)
- contentType = f.headers.get('content-type')
- if contentType:
- (mediaType, params) = cgi.parse_header(contentType)
- encoding = params.get('charset')
- except:
- pass
- elif f == '-':
- f = sys.stdin
- if sys.version_info[0] >= 3:
- encoding = None
- else:
- try:
- # Try opening from file system
- f = open(f, "rb")
- except IOError as e:
- sys.stderr.write("Unable to open file: %s\n" % e)
- sys.exit(1)
- except IndexError:
- sys.stderr.write("No filename provided. Use -h for help\n")
- sys.exit(1)
-
- treebuilder = treebuilders.getTreeBuilder(opts.treebuilder)
-
- if opts.sanitize:
- tokenizer = sanitizer.HTMLSanitizer
- else:
- tokenizer = HTMLTokenizer
-
- p = html5parser.HTMLParser(tree=treebuilder, tokenizer=tokenizer, debug=opts.log)
-
- if opts.fragment:
- parseMethod = p.parseFragment
- else:
- parseMethod = p.parse
-
- if opts.profile:
- import cProfile
- import pstats
- cProfile.runctx("run(parseMethod, f, encoding)", None,
- {"run": run,
- "parseMethod": parseMethod,
- "f": f,
- "encoding": encoding},
- "stats.prof")
- # XXX - We should use a temp file here
- stats = pstats.Stats('stats.prof')
- stats.strip_dirs()
- stats.sort_stats('time')
- stats.print_stats()
- elif opts.time:
- import time
- t0 = time.time()
- document = run(parseMethod, f, encoding)
- t1 = time.time()
- if document:
- printOutput(p, document, opts)
- t2 = time.time()
- sys.stderr.write("\n\nRun took: %fs (plus %fs to print the output)"%(t1-t0, t2-t1))
- else:
- sys.stderr.write("\n\nRun took: %fs"%(t1-t0))
- else:
- document = run(parseMethod, f, encoding)
- if document:
- printOutput(p, document, opts)
-
-def run(parseMethod, f, encoding):
- try:
- document = parseMethod(f, encoding=encoding)
- except:
- document = None
- traceback.print_exc()
- return document
-
-def printOutput(parser, document, opts):
- if opts.encoding:
- print("Encoding:", parser.tokenizer.stream.charEncoding)
-
- for item in parser.log:
- print(item)
-
- if document is not None:
- if opts.xml:
- sys.stdout.write(document.toxml("utf-8"))
- elif opts.tree:
- if not hasattr(document,'__getitem__'):
- document = [document]
- for fragment in document:
- print(parser.tree.testSerializer(fragment))
- elif opts.hilite:
- sys.stdout.write(document.hilite("utf-8"))
- elif opts.html:
- kwargs = {}
- for opt in serializer.HTMLSerializer.options:
- try:
- kwargs[opt] = getattr(opts,opt)
- except:
- pass
- if not kwargs['quote_char']:
- del kwargs['quote_char']
-
- tokens = treewalkers.getTreeWalker(opts.treebuilder)(document)
- if sys.version_info[0] >= 3:
- encoding = None
- else:
- encoding = "utf-8"
- for text in serializer.HTMLSerializer(**kwargs).serialize(tokens, encoding=encoding):
- sys.stdout.write(text)
- if not text.endswith('\n'): sys.stdout.write('\n')
- if opts.error:
- errList=[]
- for pos, errorcode, datavars in parser.errors:
- errList.append("Line %i Col %i"%pos + " " + constants.E.get(errorcode, 'Unknown error "%s"' % errorcode) % datavars)
- sys.stdout.write("\nParse errors:\n" + "\n".join(errList)+"\n")
-
-def getOptParser():
- parser = OptionParser(usage=__doc__)
-
- parser.add_option("-p", "--profile", action="store_true", default=False,
- dest="profile", help="Use the hotshot profiler to "
- "produce a detailed log of the run")
-
- parser.add_option("-t", "--time",
- action="store_true", default=False, dest="time",
- help="Time the run using time.time (may not be accurate on all platforms, especially for short runs)")
-
- parser.add_option("-b", "--treebuilder", action="store", type="string",
- dest="treebuilder", default="simpleTree")
-
- parser.add_option("-e", "--error", action="store_true", default=False,
- dest="error", help="Print a list of parse errors")
-
- parser.add_option("-f", "--fragment", action="store_true", default=False,
- dest="fragment", help="Parse as a fragment")
-
- parser.add_option("", "--tree", action="store_true", default=False,
- dest="tree", help="Output as debug tree")
-
- parser.add_option("-x", "--xml", action="store_true", default=False,
- dest="xml", help="Output as xml")
-
- parser.add_option("", "--no-html", action="store_false", default=True,
- dest="html", help="Don't output html")
-
- parser.add_option("", "--hilite", action="store_true", default=False,
- dest="hilite", help="Output as formatted highlighted code.")
-
- parser.add_option("-c", "--encoding", action="store_true", default=False,
- dest="encoding", help="Print character encoding used")
-
- parser.add_option("", "--inject-meta-charset", action="store_true",
- default=False, dest="inject_meta_charset",
- help="inject <meta charset>")
-
- parser.add_option("", "--strip-whitespace", action="store_true",
- default=False, dest="strip_whitespace",
- help="strip whitespace")
-
- parser.add_option("", "--omit-optional-tags", action="store_true",
- default=False, dest="omit_optional_tags",
- help="omit optional tags")
-
- parser.add_option("", "--quote-attr-values", action="store_true",
- default=False, dest="quote_attr_values",
- help="quote attribute values")
-
- parser.add_option("", "--use-best-quote-char", action="store_true",
- default=False, dest="use_best_quote_char",
- help="use best quote character")
-
- parser.add_option("", "--quote-char", action="store",
- default=None, dest="quote_char",
- help="quote character")
-
- parser.add_option("", "--no-minimize-boolean-attributes",
- action="store_false", default=True,
- dest="minimize_boolean_attributes",
- help="minimize boolean attributes")
-
- parser.add_option("", "--use-trailing-solidus", action="store_true",
- default=False, dest="use_trailing_solidus",
- help="use trailing solidus")
-
- parser.add_option("", "--space-before-trailing-solidus",
- action="store_true", default=False,
- dest="space_before_trailing_solidus",
- help="add space before trailing solidus")
-
- parser.add_option("", "--escape-lt-in-attrs", action="store_true",
- default=False, dest="escape_lt_in_attrs",
- help="escape less than signs in attribute values")
-
- parser.add_option("", "--escape-rcdata", action="store_true",
- default=False, dest="escape_rcdata",
- help="escape rcdata element values")
-
- parser.add_option("", "--sanitize", action="store_true", default=False,
- dest="sanitize", help="sanitize")
-
- parser.add_option("-l", "--log", action="store_true", default=False,
- dest="log", help="log state transitions")
-
- return parser
-
-if __name__ == "__main__":
- parse()