diff options
author | Vincent Ricard <magic@magicninja.org> | 2020-12-28 22:31:49 +0100 |
---|---|---|
committer | Josh Matthews <josh@joshmatthews.net> | 2021-02-18 09:35:46 -0500 |
commit | a627dde0d01e35a1cbdb62ca19ee0349757c34b0 (patch) | |
tree | 094b86f657d87bfc374d436da809aca26281069d | |
parent | f73370088b77a834d9b9f6835ae90a4a66e6d7ee (diff) | |
download | servo-a627dde0d01e35a1cbdb62ca19ee0349757c34b0.tar.gz servo-a627dde0d01e35a1cbdb62ca19ee0349757c34b0.zip |
Port some code to Python3
24 files changed, 1438 insertions, 2340 deletions
diff --git a/components/script/build.rs b/components/script/build.rs index 6a9b4866759..ea39b698257 100644 --- a/components/script/build.rs +++ b/components/script/build.rs @@ -65,11 +65,11 @@ impl<'a> phf_shared::PhfHash for Bytes<'a> { } fn find_python() -> String { - env::var("PYTHON2").ok().unwrap_or_else(|| { + env::var("PYTHON3").ok().unwrap_or_else(|| { let candidates = if cfg!(windows) { - ["python2.7.exe", "python27.exe", "python.exe"] + ["python3.8.exe", "python38.exe", "python.exe"] } else { - ["python2.7", "python2", "python"] + ["python3.8", "python3", "python"] }; for &name in &candidates { if Command::new(name) @@ -82,7 +82,7 @@ fn find_python() -> String { } } panic!( - "Can't find python (tried {})! Try fixing PATH or setting the PYTHON2 env var", + "Can't find python (tried {})! Try fixing PATH or setting the PYTHON3 env var", candidates.join(", ") ) }) diff --git a/components/script/dom/bindings/codegen/CodegenRust.py b/components/script/dom/bindings/codegen/CodegenRust.py index dbf363d0fbc..8573998ebb3 100644 --- a/components/script/dom/bindings/codegen/CodegenRust.py +++ b/components/script/dom/bindings/codegen/CodegenRust.py @@ -344,7 +344,7 @@ class CGMethodCall(CGThing): distinguishingArg = "HandleValue::from_raw(args.get(%d))" % distinguishingIndex def pickFirstSignature(condition, filterLambda): - sigs = filter(filterLambda, possibleSignatures) + sigs = list(filter(filterLambda, possibleSignatures)) assert len(sigs) < 2 if len(sigs) > 0: call = getPerSignatureCall(sigs[0], distinguishingIndex) @@ -2117,7 +2117,7 @@ class CGImports(CGWrapper): members += [constructor] if d.proxy: - members += [o for o in d.operations.values() if o] + members += [o for o in list(d.operations.values()) if o] for m in members: if m.isMethod(): @@ -2557,7 +2557,7 @@ def UnionTypes(descriptors, dictionaries, callbacks, typedefs, config): ]) # Sort unionStructs by key, retrieve value - unionStructs = (i[1] for i in sorted(unionStructs.items(), key=operator.itemgetter(0))) + unionStructs = (i[1] for i in sorted(list(unionStructs.items()), key=operator.itemgetter(0))) return CGImports(CGList(unionStructs, "\n\n"), descriptors=[], @@ -4455,9 +4455,10 @@ class CGEnum(CGThing): pub enum %s { %s } -""" % (ident, ",\n ".join(map(getEnumValueName, enum.values()))) +""" % (ident, ",\n ".join(map(getEnumValueName, list(enum.values())))) - pairs = ",\n ".join(['("%s", super::%s::%s)' % (val, ident, getEnumValueName(val)) for val in enum.values()]) + pairs = ",\n ".join(['("%s", super::%s::%s)' % (val, ident, getEnumValueName(val)) + for val in list(enum.values())]) inner = string.Template("""\ use crate::dom::bindings::conversions::ConversionResult; @@ -4640,9 +4641,8 @@ class CGUnionStruct(CGThing): return "Rc" return "" - templateVars = map(lambda t: (getUnionTypeTemplateVars(t, self.descriptorProvider), - getTypeWrapper(t)), - self.type.flatMemberTypes) + templateVars = [(getUnionTypeTemplateVars(t, self.descriptorProvider), + getTypeWrapper(t)) for t in self.type.flatMemberTypes] enumValues = [ " %s(%s)," % (v["name"], "%s<%s>" % (wrapper, v["typeName"]) if wrapper else v["typeName"]) for (v, wrapper) in templateVars @@ -4701,7 +4701,7 @@ class CGUnionConversionStruct(CGThing): " Ok(None) => (),\n" "}\n") % (self.type, name, self.type, name) - interfaceMemberTypes = filter(lambda t: t.isNonCallbackInterface(), memberTypes) + interfaceMemberTypes = [t for t in memberTypes if t.isNonCallbackInterface()] if len(interfaceMemberTypes) > 0: typeNames = [get_name(memberType) for memberType in interfaceMemberTypes] interfaceObject = CGList(CGGeneric(get_match(typeName)) for typeName in typeNames) @@ -4709,7 +4709,7 @@ class CGUnionConversionStruct(CGThing): else: interfaceObject = None - arrayObjectMemberTypes = filter(lambda t: t.isSequence(), memberTypes) + arrayObjectMemberTypes = [t for t in memberTypes if t.isSequence()] if len(arrayObjectMemberTypes) > 0: assert len(arrayObjectMemberTypes) == 1 typeName = arrayObjectMemberTypes[0].name @@ -4718,7 +4718,7 @@ class CGUnionConversionStruct(CGThing): else: arrayObject = None - callbackMemberTypes = filter(lambda t: t.isCallback() or t.isCallbackInterface(), memberTypes) + callbackMemberTypes = [t for t in memberTypes if t.isCallback() or t.isCallbackInterface()] if len(callbackMemberTypes) > 0: assert len(callbackMemberTypes) == 1 typeName = callbackMemberTypes[0].name @@ -4726,7 +4726,7 @@ class CGUnionConversionStruct(CGThing): else: callbackObject = None - dictionaryMemberTypes = filter(lambda t: t.isDictionary(), memberTypes) + dictionaryMemberTypes = [t for t in memberTypes if t.isDictionary()] if len(dictionaryMemberTypes) > 0: assert len(dictionaryMemberTypes) == 1 typeName = dictionaryMemberTypes[0].name @@ -4735,7 +4735,7 @@ class CGUnionConversionStruct(CGThing): else: dictionaryObject = None - objectMemberTypes = filter(lambda t: t.isObject(), memberTypes) + objectMemberTypes = [t for t in memberTypes if t.isObject()] if len(objectMemberTypes) > 0: assert len(objectMemberTypes) == 1 typeName = objectMemberTypes[0].name @@ -4744,7 +4744,7 @@ class CGUnionConversionStruct(CGThing): else: object = None - mozMapMemberTypes = filter(lambda t: t.isRecord(), memberTypes) + mozMapMemberTypes = [t for t in memberTypes if t.isRecord()] if len(mozMapMemberTypes) > 0: assert len(mozMapMemberTypes) == 1 typeName = mozMapMemberTypes[0].name @@ -4790,9 +4790,9 @@ class CGUnionConversionStruct(CGThing): typename = get_name(memberType) return CGGeneric(get_match(typename)) other = [] - stringConversion = map(getStringOrPrimitiveConversion, stringTypes) - numericConversion = map(getStringOrPrimitiveConversion, numericTypes) - booleanConversion = map(getStringOrPrimitiveConversion, booleanTypes) + stringConversion = list(map(getStringOrPrimitiveConversion, stringTypes)) + numericConversion = list(map(getStringOrPrimitiveConversion, numericTypes)) + booleanConversion = list(map(getStringOrPrimitiveConversion, booleanTypes)) if stringConversion: if booleanConversion: other.append(CGIfWrapper("value.get().is_boolean()", booleanConversion[0])) @@ -5958,7 +5958,7 @@ class CGInterfaceTrait(CGThing): rettype) if descriptor.proxy: - for name, operation in descriptor.operations.iteritems(): + for name, operation in descriptor.operations.items(): if not operation or operation.isStringifier(): continue @@ -6488,7 +6488,7 @@ class CGDescriptor(CGThing): post='\n') if reexports: - reexports = ', '.join(map(lambda name: reexportedName(name), reexports)) + reexports = ', '.join([reexportedName(name) for name in reexports]) cgThings = CGList([CGGeneric('pub use self::%s::{%s};' % (toBindingNamespace(descriptor.name), reexports)), cgThings], '\n') @@ -7824,7 +7824,7 @@ impl Clone for TopTypeId { # TypeId enum. return "%s(%sTypeId)" % (name, name) if name in hierarchy else name - for base, derived in hierarchy.iteritems(): + for base, derived in hierarchy.items(): variants = [] if config.getDescriptor(base).concrete: variants.append(CGGeneric(base)) diff --git a/components/script/dom/bindings/codegen/Configuration.py b/components/script/dom/bindings/codegen/Configuration.py index 4f47a737706..b92f68af3b9 100644 --- a/components/script/dom/bindings/codegen/Configuration.py +++ b/components/script/dom/bindings/codegen/Configuration.py @@ -73,7 +73,7 @@ class Configuration: def getDescriptors(self, **filters): """Gets the descriptors that match the given filters.""" curr = self.descriptors - for key, val in filters.iteritems(): + for key, val in filters.items(): if key == 'webIDLFile': def getter(x): return x.interface.filename() @@ -104,14 +104,14 @@ class Configuration: else: def getter(x): return getattr(x, key) - curr = filter(lambda x: getter(x) == val, curr) + curr = [x for x in curr if getter(x) == val] return curr def getEnums(self, webIDLFile): - return filter(lambda e: e.filename() == webIDLFile, self.enums) + return [e for e in self.enums if e.filename() == webIDLFile] def getTypedefs(self, webIDLFile): - return filter(lambda e: e.filename() == webIDLFile, self.typedefs) + return [e for e in self.typedefs if e.filename() == webIDLFile] @staticmethod def _filterForFile(items, webIDLFile=""): @@ -119,7 +119,7 @@ class Configuration: if not webIDLFile: return items - return filter(lambda x: x.filename() == webIDLFile, items) + return [x for x in items if x.filename() == webIDLFile] def getDictionaries(self, webIDLFile=""): return self._filterForFile(self.dictionaries, webIDLFile=webIDLFile) @@ -327,7 +327,7 @@ class Descriptor(DescriptorProvider): if config == '*': iface = self.interface while iface: - add('all', map(lambda m: m.name, iface.members), attribute) + add('all', [m.name for m in iface.members], attribute) iface = iface.parent else: add('all', [config], attribute) diff --git a/components/script/dom/bindings/codegen/parser/WebIDL.py b/components/script/dom/bindings/codegen/parser/WebIDL.py index 223fd7efbb4..d74278c3e0c 100644 --- a/components/script/dom/bindings/codegen/parser/WebIDL.py +++ b/components/script/dom/bindings/codegen/parser/WebIDL.py @@ -4,7 +4,7 @@ """ A WebIDL parser. """ -from __future__ import print_function + from ply import lex, yacc import re import os @@ -57,7 +57,7 @@ def enum(*names, **kw): if "base" not in kw: return Foo(names) - return Foo(chain(kw["base"].attrs.keys(), names)) + return Foo(chain(list(kw["base"].attrs.keys()), names)) class WebIDLError(Exception): @@ -124,6 +124,9 @@ class BuiltinLocation(object): return (isinstance(other, BuiltinLocation) and self.msg == other.msg) + def __hash__(self): + return hash(self.msg) + def filename(self): return '<builtin>' @@ -2360,6 +2363,9 @@ class IDLNullableType(IDLParametrizedType): def __eq__(self, other): return isinstance(other, IDLNullableType) and self.inner == other.inner + def __hash__(self): + return hash(self.inner) + def __str__(self): return self.inner.__str__() + "OrNull" @@ -2522,6 +2528,9 @@ class IDLSequenceType(IDLParametrizedType): def __eq__(self, other): return isinstance(other, IDLSequenceType) and self.inner == other.inner + def __hash__(self): + return hash(self.inner) + def __str__(self): return self.inner.__str__() + "Sequence" @@ -2933,6 +2942,9 @@ class IDLWrapperType(IDLType): self._identifier == other._identifier and self.builtin == other.builtin) + def __hash__(self): + return hash((self._identifier, self.builtin)) + def __str__(self): return str(self.name) + " (Wrapper)" @@ -3301,6 +3313,12 @@ class IDLBuiltinType(IDLType): return "MaybeShared" + str(self.name) return str(self.name) + def __eq__(self, other): + return other and self.location == other.location and self.name == other.name and self._typeTag == other._typeTag + + def __hash__(self): + return hash((self.location, self.name, self._typeTag)) + def prettyName(self): return IDLBuiltinType.PrettyNames[self._typeTag] @@ -3628,7 +3646,7 @@ integerTypeSizes = { def matchIntegerValueToType(value): - for type, extremes in integerTypeSizes.items(): + for type, extremes in list(integerTypeSizes.items()): (min, max) = extremes if value <= max and value >= min: return BuiltinTypes[type] @@ -3707,7 +3725,7 @@ class IDLValue(IDLObject): elif self.type.isString() and type.isEnum(): # Just keep our string, but make sure it's a valid value for this enum enum = type.unroll().inner - if self.value not in enum.values(): + if self.value not in list(enum.values()): raise WebIDLError("'%s' is not a valid default value for enum %s" % (self.value, enum.identifier.name), [location, enum.location]) @@ -4789,7 +4807,7 @@ class IDLAttribute(IDLInterfaceMember): "CrossOriginWritable", "SetterThrows", ] - for (key, value) in self._extendedAttrDict.items(): + for (key, value) in list(self._extendedAttrDict.items()): if key in allowedExtAttrs: if value is not True: raise WebIDLError("[%s] with a value is currently " @@ -5479,7 +5497,7 @@ class IDLMethod(IDLInterfaceMember, IDLScope): [attr.location]) if identifier == "CrossOriginCallable" and self.isStatic(): raise WebIDLError("[CrossOriginCallable] is only allowed on non-static " - "attributes" + "attributes", [attr.location, self.location]) elif identifier == "Pure": if not attr.noArguments(): @@ -5721,6 +5739,7 @@ class Tokenizer(object): "FLOATLITERAL", "IDENTIFIER", "STRING", + "COMMENTS", "WHITESPACE", "OTHER" ] @@ -5753,8 +5772,12 @@ class Tokenizer(object): t.value = t.value[1:-1] return t + def t_COMMENTS(self, t): + r'(\/\*(.|\n)*?\*\/)|(\/\/.*)' + pass + def t_WHITESPACE(self, t): - r'[\t\n\r ]+|[\t\n\r ]*((//[^\n]*|/\*.*?\*/)[\t\n\r ]*)+' + r'[\t\n\r ]+' pass def t_ELLIPSIS(self, t): @@ -5840,7 +5863,7 @@ class Tokenizer(object): "async": "ASYNC", } - tokens.extend(keywords.values()) + tokens.extend(list(keywords.values())) def t_error(self, t): raise WebIDLError("Unrecognized Input", @@ -5849,23 +5872,21 @@ class Tokenizer(object): lexpos=self.lexer.lexpos, filename=self.filename)]) - def __init__(self, outputdir, lexer=None): + def __init__(self, lexer=None): if lexer: self.lexer = lexer else: - self.lexer = lex.lex(object=self, - outputdir=outputdir, - lextab='webidllex', - reflags=re.DOTALL) + self.lexer = lex.lex(object=self) class SqueakyCleanLogger(object): errorWhitelist = [ - # Web IDL defines the WHITESPACE token, but doesn't actually + # Web IDL defines the WHITESPACE and COMMENTS token, but doesn't actually # use it ... so far. "Token 'WHITESPACE' defined, but not used", - # And that means we have an unused token - "There is 1 unused token", + "Token 'COMMENTS' defined, but not used", + # And that means we have unused tokens + "There are 2 unused tokens", # Web IDL defines a OtherOrComma rule that's only used in # ExtendedAttributeInner, which we don't use yet. "Rule 'OtherOrComma' defined, but not used", @@ -7506,22 +7527,11 @@ class Parser(Tokenizer): raise WebIDLError("invalid syntax", [Location(self.lexer, p.lineno, p.lexpos, self._filename)]) def __init__(self, outputdir='', lexer=None): - Tokenizer.__init__(self, outputdir, lexer) + Tokenizer.__init__(self, lexer) logger = SqueakyCleanLogger() try: - self.parser = yacc.yacc(module=self, - outputdir=outputdir, - tabmodule='webidlyacc', - errorlog=logger, - debug=False - # Pickling the grammar is a speedup in - # some cases (older Python?) but a - # significant slowdown in others. - # We're not pickling for now, until it - # becomes a speedup again. - # , picklefile='WebIDLGrammar.pkl' - ) + self.parser = yacc.yacc(module=self, errorlog=logger, debug=False) finally: logger.reportGrammarErrors() @@ -7553,12 +7563,12 @@ class Parser(Tokenizer): return type def parse(self, t, filename=None): - self.lexer.input(t) + self._filename = filename + self.lexer.input(t.decode(encoding = 'utf-8')) # for tok in iter(self.lexer.token, None): # print tok - self._filename = filename self._productions.extend(self.parser.parse(lexer=self.lexer, tracking=True)) self._filename = None diff --git a/components/script/dom/bindings/codegen/ply/README b/components/script/dom/bindings/codegen/ply/README index 2459c490197..d3de9993360 100644 --- a/components/script/dom/bindings/codegen/ply/README +++ b/components/script/dom/bindings/codegen/ply/README @@ -3,7 +3,7 @@ http://www.dabeaz.com/ply/ Licensed under BSD. -This directory contains just the code and license from PLY version 3.3; +This directory contains just the code and license from PLY version 4.0; the full distribution (see the URL) also contains examples, tests, documentation, and a longer README. diff --git a/components/script/dom/bindings/codegen/ply/ply/__init__.py b/components/script/dom/bindings/codegen/ply/ply/__init__.py index 853a985542b..87838622863 100644 --- a/components/script/dom/bindings/codegen/ply/ply/__init__.py +++ b/components/script/dom/bindings/codegen/ply/ply/__init__.py @@ -1,4 +1,6 @@ # PLY package # Author: David Beazley (dave@dabeaz.com) +# https://dabeaz.com/ply/index.html +__version__ = '4.0' __all__ = ['lex','yacc'] diff --git a/components/script/dom/bindings/codegen/ply/ply/lex.py b/components/script/dom/bindings/codegen/ply/ply/lex.py index 267ec100fc2..57b61f1779e 100644 --- a/components/script/dom/bindings/codegen/ply/ply/lex.py +++ b/components/script/dom/bindings/codegen/ply/ply/lex.py @@ -1,22 +1,24 @@ # ----------------------------------------------------------------------------- # ply: lex.py # -# Copyright (C) 2001-2009, +# Copyright (C) 2001-2020 # David M. Beazley (Dabeaz LLC) # All rights reserved. # +# Latest version: https://github.com/dabeaz/ply +# # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: -# +# # * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# * Neither the name of the David Beazley or Dabeaz LLC may be used to +# and/or other materials provided with the distribution. +# * Neither the name of David Beazley or Dabeaz LLC may be used to # endorse or promote products derived from this software without -# specific prior written permission. +# specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT @@ -31,72 +33,50 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ----------------------------------------------------------------------------- -__version__ = "3.3" -__tabversion__ = "3.2" # Version of table file used - -import re, sys, types, copy, os - -# This tuple contains known string types -try: - # Python 2.6 - StringTypes = (types.StringType, types.UnicodeType) -except AttributeError: - # Python 3.0 - StringTypes = (str, bytes) - -# Extract the code attribute of a function. Different implementations -# are for Python 2/3 compatibility. +import re +import sys +import types +import copy +import os +import inspect -if sys.version_info[0] < 3: - def func_code(f): - return f.func_code -else: - def func_code(f): - return f.__code__ +# This tuple contains acceptable string types +StringTypes = (str, bytes) # This regular expression is used to match valid token names _is_identifier = re.compile(r'^[a-zA-Z0-9_]+$') # Exception thrown when invalid token encountered and no default error # handler is defined. - class LexError(Exception): - def __init__(self,message,s): - self.args = (message,) - self.text = s + def __init__(self, message, s): + self.args = (message,) + self.text = s # Token class. This class is used to represent the tokens produced. class LexToken(object): - def __str__(self): - return "LexToken(%s,%r,%d,%d)" % (self.type,self.value,self.lineno,self.lexpos) def __repr__(self): - return str(self) + return f'LexToken({self.type},{self.value!r},{self.lineno},{self.lexpos})' -# This object is a stand-in for a logging object created by the -# logging module. +# This object is a stand-in for a logging object created by the +# logging module. class PlyLogger(object): - def __init__(self,f): + def __init__(self, f): self.f = f - def critical(self,msg,*args,**kwargs): - self.f.write((msg % args) + "\n") - def warning(self,msg,*args,**kwargs): - self.f.write("WARNING: "+ (msg % args) + "\n") + def critical(self, msg, *args, **kwargs): + self.f.write((msg % args) + '\n') + + def warning(self, msg, *args, **kwargs): + self.f.write('WARNING: ' + (msg % args) + '\n') - def error(self,msg,*args,**kwargs): - self.f.write("ERROR: " + (msg % args) + "\n") + def error(self, msg, *args, **kwargs): + self.f.write('ERROR: ' + (msg % args) + '\n') info = critical debug = critical -# Null logger is used when no output is generated. Does nothing. -class NullLogger(object): - def __getattribute__(self,name): - return self - def __call__(self,*args,**kwargs): - return self - # ----------------------------------------------------------------------------- # === Lexing Engine === # @@ -114,31 +94,32 @@ class NullLogger(object): class Lexer: def __init__(self): self.lexre = None # Master regular expression. This is a list of - # tuples (re,findex) where re is a compiled - # regular expression and findex is a list - # mapping regex group numbers to rules + # tuples (re, findex) where re is a compiled + # regular expression and findex is a list + # mapping regex group numbers to rules self.lexretext = None # Current regular expression strings self.lexstatere = {} # Dictionary mapping lexer states to master regexs self.lexstateretext = {} # Dictionary mapping lexer states to regex strings self.lexstaterenames = {} # Dictionary mapping lexer states to symbol names - self.lexstate = "INITIAL" # Current lexer state + self.lexstate = 'INITIAL' # Current lexer state self.lexstatestack = [] # Stack of lexer states self.lexstateinfo = None # State information self.lexstateignore = {} # Dictionary of ignored characters for each state self.lexstateerrorf = {} # Dictionary of error functions for each state + self.lexstateeoff = {} # Dictionary of eof functions for each state self.lexreflags = 0 # Optional re compile flags self.lexdata = None # Actual input data (as a string) self.lexpos = 0 # Current position in input text self.lexlen = 0 # Length of the input text self.lexerrorf = None # Error rule (if any) + self.lexeoff = None # EOF rule (if any) self.lextokens = None # List of valid tokens - self.lexignore = "" # Ignored characters - self.lexliterals = "" # Literal characters that can be passed through + self.lexignore = '' # Ignored characters + self.lexliterals = '' # Literal characters that can be passed through self.lexmodule = None # Module self.lineno = 1 # Current line number - self.lexoptimize = 0 # Optimized mode - def clone(self,object=None): + def clone(self, object=None): c = copy.copy(self) # If the object parameter has been supplied, it means we are attaching the @@ -146,113 +127,29 @@ class Lexer: # the lexstatere and lexstateerrorf tables. if object: - newtab = { } + newtab = {} for key, ritem in self.lexstatere.items(): newre = [] for cre, findex in ritem: - newfindex = [] - for f in findex: - if not f or not f[0]: - newfindex.append(f) - continue - newfindex.append((getattr(object,f[0].__name__),f[1])) - newre.append((cre,newfindex)) + newfindex = [] + for f in findex: + if not f or not f[0]: + newfindex.append(f) + continue + newfindex.append((getattr(object, f[0].__name__), f[1])) + newre.append((cre, newfindex)) newtab[key] = newre c.lexstatere = newtab - c.lexstateerrorf = { } + c.lexstateerrorf = {} for key, ef in self.lexstateerrorf.items(): - c.lexstateerrorf[key] = getattr(object,ef.__name__) + c.lexstateerrorf[key] = getattr(object, ef.__name__) c.lexmodule = object return c # ------------------------------------------------------------ - # writetab() - Write lexer information to a table file - # ------------------------------------------------------------ - def writetab(self,tabfile,outputdir=""): - if isinstance(tabfile,types.ModuleType): - return - basetabfilename = tabfile.split(".")[-1] - filename = os.path.join(outputdir,basetabfilename)+".py" - tf = open(filename,"w") - tf.write("# %s.py. This file automatically created by PLY (version %s). Don't edit!\n" % (tabfile,__version__)) - tf.write("_tabversion = %s\n" % repr(__version__)) - tf.write("_lextokens = %s\n" % repr(self.lextokens)) - tf.write("_lexreflags = %s\n" % repr(self.lexreflags)) - tf.write("_lexliterals = %s\n" % repr(self.lexliterals)) - tf.write("_lexstateinfo = %s\n" % repr(self.lexstateinfo)) - - tabre = { } - # Collect all functions in the initial state - initial = self.lexstatere["INITIAL"] - initialfuncs = [] - for part in initial: - for f in part[1]: - if f and f[0]: - initialfuncs.append(f) - - for key, lre in self.lexstatere.items(): - titem = [] - for i in range(len(lre)): - titem.append((self.lexstateretext[key][i],_funcs_to_names(lre[i][1],self.lexstaterenames[key][i]))) - tabre[key] = titem - - tf.write("_lexstatere = %s\n" % repr(tabre)) - tf.write("_lexstateignore = %s\n" % repr(self.lexstateignore)) - - taberr = { } - for key, ef in self.lexstateerrorf.items(): - if ef: - taberr[key] = ef.__name__ - else: - taberr[key] = None - tf.write("_lexstateerrorf = %s\n" % repr(taberr)) - tf.close() - - # ------------------------------------------------------------ - # readtab() - Read lexer information from a tab file - # ------------------------------------------------------------ - def readtab(self,tabfile,fdict): - if isinstance(tabfile,types.ModuleType): - lextab = tabfile - else: - if sys.version_info[0] < 3: - exec("import %s as lextab" % tabfile) - else: - env = { } - exec("import %s as lextab" % tabfile, env,env) - lextab = env['lextab'] - - if getattr(lextab,"_tabversion","0.0") != __version__: - raise ImportError("Inconsistent PLY version") - - self.lextokens = lextab._lextokens - self.lexreflags = lextab._lexreflags - self.lexliterals = lextab._lexliterals - self.lexstateinfo = lextab._lexstateinfo - self.lexstateignore = lextab._lexstateignore - self.lexstatere = { } - self.lexstateretext = { } - for key,lre in lextab._lexstatere.items(): - titem = [] - txtitem = [] - for i in range(len(lre)): - titem.append((re.compile(lre[i][0],lextab._lexreflags | re.VERBOSE),_names_to_funcs(lre[i][1],fdict))) - txtitem.append(lre[i][0]) - self.lexstatere[key] = titem - self.lexstateretext[key] = txtitem - self.lexstateerrorf = { } - for key,ef in lextab._lexstateerrorf.items(): - self.lexstateerrorf[key] = fdict[ef] - self.begin('INITIAL') - - # ------------------------------------------------------------ # input() - Push a new string into the lexer # ------------------------------------------------------------ - def input(self,s): - # Pull off the first character to see if s looks like a string - c = s[:1] - if not isinstance(c,StringTypes): - raise ValueError("Expected a string") + def input(self, s): self.lexdata = s self.lexpos = 0 self.lexlen = len(s) @@ -260,19 +157,20 @@ class Lexer: # ------------------------------------------------------------ # begin() - Changes the lexing state # ------------------------------------------------------------ - def begin(self,state): - if not state in self.lexstatere: - raise ValueError("Undefined state") + def begin(self, state): + if state not in self.lexstatere: + raise ValueError(f'Undefined state {state!r}') self.lexre = self.lexstatere[state] self.lexretext = self.lexstateretext[state] - self.lexignore = self.lexstateignore.get(state,"") - self.lexerrorf = self.lexstateerrorf.get(state,None) + self.lexignore = self.lexstateignore.get(state, '') + self.lexerrorf = self.lexstateerrorf.get(state, None) + self.lexeoff = self.lexstateeoff.get(state, None) self.lexstate = state # ------------------------------------------------------------ # push_state() - Changes the lexing state and saves old on stack # ------------------------------------------------------------ - def push_state(self,state): + def push_state(self, state): self.lexstatestack.append(self.lexstate) self.begin(state) @@ -291,11 +189,11 @@ class Lexer: # ------------------------------------------------------------ # skip() - Skip ahead n characters # ------------------------------------------------------------ - def skip(self,n): + def skip(self, n): self.lexpos += n # ------------------------------------------------------------ - # opttoken() - Return the next token from the Lexer + # token() - Return the next token from the Lexer # # Note: This function has been carefully implemented to be as fast # as possible. Don't make changes unless you really know what @@ -315,9 +213,10 @@ class Lexer: continue # Look for a regular expression match - for lexre,lexindexfunc in self.lexre: - m = lexre.match(lexdata,lexpos) - if not m: continue + for lexre, lexindexfunc in self.lexre: + m = lexre.match(lexdata, lexpos) + if not m: + continue # Create a token for return tok = LexToken() @@ -326,16 +225,16 @@ class Lexer: tok.lexpos = lexpos i = m.lastindex - func,tok.type = lexindexfunc[i] + func, tok.type = lexindexfunc[i] if not func: - # If no token type was set, it's an ignored token - if tok.type: - self.lexpos = m.end() - return tok - else: - lexpos = m.end() - break + # If no token type was set, it's an ignored token + if tok.type: + self.lexpos = m.end() + return tok + else: + lexpos = m.end() + break lexpos = m.end() @@ -344,22 +243,15 @@ class Lexer: tok.lexer = self # Set additional attributes useful in token rules self.lexmatch = m self.lexpos = lexpos - newtok = func(tok) + del tok.lexer + del self.lexmatch # Every function must return a token, if nothing, we just move to next token if not newtok: lexpos = self.lexpos # This is here in case user has updated lexpos. lexignore = self.lexignore # This is here in case there was a state change break - - # Verify type of the token. If not in the token map, raise an error - if not self.lexoptimize: - if not newtok.type in self.lextokens: - raise LexError("%s:%d: Rule '%s' returned an unknown token type '%s'" % ( - func_code(func).co_filename, func_code(func).co_firstlineno, - func.__name__, newtok.type),lexdata[lexpos:]) - return newtok else: # No match, see if in literals @@ -377,38 +269,50 @@ class Lexer: tok = LexToken() tok.value = self.lexdata[lexpos:] tok.lineno = self.lineno - tok.type = "error" + tok.type = 'error' tok.lexer = self tok.lexpos = lexpos self.lexpos = lexpos newtok = self.lexerrorf(tok) if lexpos == self.lexpos: # Error method didn't change text position at all. This is an error. - raise LexError("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:]) + raise LexError(f"Scanning error. Illegal character {lexdata[lexpos]!r}", + lexdata[lexpos:]) lexpos = self.lexpos - if not newtok: continue + if not newtok: + continue return newtok self.lexpos = lexpos - raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos],lexpos), lexdata[lexpos:]) + raise LexError(f"Illegal character {lexdata[lexpos]!r} at index {lexpos}", + lexdata[lexpos:]) + + if self.lexeoff: + tok = LexToken() + tok.type = 'eof' + tok.value = '' + tok.lineno = self.lineno + tok.lexpos = lexpos + tok.lexer = self + self.lexpos = lexpos + newtok = self.lexeoff(tok) + return newtok self.lexpos = lexpos + 1 if self.lexdata is None: - raise RuntimeError("No input string given with input()") + raise RuntimeError('No input string given with input()') return None # Iterator interface def __iter__(self): return self - def next(self): + def __next__(self): t = self.token() if t is None: raise StopIteration return t - __next__ = next - # ----------------------------------------------------------------------------- # ==== Lex Builder === # @@ -417,59 +321,24 @@ class Lexer: # ----------------------------------------------------------------------------- # ----------------------------------------------------------------------------- +# _get_regex(func) +# +# Returns the regular expression assigned to a function either as a doc string +# or as a .regex attribute attached by the @TOKEN decorator. +# ----------------------------------------------------------------------------- +def _get_regex(func): + return getattr(func, 'regex', func.__doc__) + +# ----------------------------------------------------------------------------- # get_caller_module_dict() # # This function returns a dictionary containing all of the symbols defined within # a caller further down the call stack. This is used to get the environment # associated with the yacc() call if none was provided. # ----------------------------------------------------------------------------- - def get_caller_module_dict(levels): - try: - raise RuntimeError - except RuntimeError: - e,b,t = sys.exc_info() - f = t.tb_frame - while levels > 0: - f = f.f_back - levels -= 1 - ldict = f.f_globals.copy() - if f.f_globals != f.f_locals: - ldict.update(f.f_locals) - - return ldict - -# ----------------------------------------------------------------------------- -# _funcs_to_names() -# -# Given a list of regular expression functions, this converts it to a list -# suitable for output to a table file -# ----------------------------------------------------------------------------- - -def _funcs_to_names(funclist,namelist): - result = [] - for f,name in zip(funclist,namelist): - if f and f[0]: - result.append((name, f[1])) - else: - result.append(f) - return result - -# ----------------------------------------------------------------------------- -# _names_to_funcs() -# -# Given a list of regular expression function names, this converts it back to -# functions. -# ----------------------------------------------------------------------------- - -def _names_to_funcs(namelist,fdict): - result = [] - for n in namelist: - if n and n[0]: - result.append((fdict[n[0]],n[1])) - else: - result.append(n) - return result + f = sys._getframe(levels) + return { **f.f_globals, **f.f_locals } # ----------------------------------------------------------------------------- # _form_master_re() @@ -478,36 +347,35 @@ def _names_to_funcs(namelist,fdict): # form the master regular expression. Given limitations in the Python re # module, it may be necessary to break the master regex into separate expressions. # ----------------------------------------------------------------------------- - -def _form_master_re(relist,reflags,ldict,toknames): - if not relist: return [] - regex = "|".join(relist) +def _form_master_re(relist, reflags, ldict, toknames): + if not relist: + return [], [], [] + regex = '|'.join(relist) try: - lexre = re.compile(regex,re.VERBOSE | reflags) + lexre = re.compile(regex, reflags) # Build the index to function map for the matching engine - lexindexfunc = [ None ] * (max(lexre.groupindex.values())+1) + lexindexfunc = [None] * (max(lexre.groupindex.values()) + 1) lexindexnames = lexindexfunc[:] - for f,i in lexre.groupindex.items(): - handle = ldict.get(f,None) + for f, i in lexre.groupindex.items(): + handle = ldict.get(f, None) if type(handle) in (types.FunctionType, types.MethodType): - lexindexfunc[i] = (handle,toknames[f]) + lexindexfunc[i] = (handle, toknames[f]) lexindexnames[i] = f elif handle is not None: lexindexnames[i] = f - if f.find("ignore_") > 0: - lexindexfunc[i] = (None,None) + if f.find('ignore_') > 0: + lexindexfunc[i] = (None, None) else: lexindexfunc[i] = (None, toknames[f]) - - return [(lexre,lexindexfunc)],[regex],[lexindexnames] + + return [(lexre, lexindexfunc)], [regex], [lexindexnames] except Exception: - m = int(len(relist)/2) - if m == 0: m = 1 - llist, lre, lnames = _form_master_re(relist[:m],reflags,ldict,toknames) - rlist, rre, rnames = _form_master_re(relist[m:],reflags,ldict,toknames) - return llist+rlist, lre+rre, lnames+rnames + m = (len(relist) // 2) + 1 + llist, lre, lnames = _form_master_re(relist[:m], reflags, ldict, toknames) + rlist, rre, rnames = _form_master_re(relist[m:], reflags, ldict, toknames) + return (llist+rlist), (lre+rre), (lnames+rnames) # ----------------------------------------------------------------------------- # def _statetoken(s,names) @@ -517,22 +385,22 @@ def _form_master_re(relist,reflags,ldict,toknames): # is a tuple of state names and tokenname is the name of the token. For example, # calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM') # ----------------------------------------------------------------------------- +def _statetoken(s, names): + parts = s.split('_') + for i, part in enumerate(parts[1:], 1): + if part not in names and part != 'ANY': + break -def _statetoken(s,names): - nonstate = 1 - parts = s.split("_") - for i in range(1,len(parts)): - if not parts[i] in names and parts[i] != 'ANY': break if i > 1: - states = tuple(parts[1:i]) + states = tuple(parts[1:i]) else: - states = ('INITIAL',) + states = ('INITIAL',) if 'ANY' in states: - states = tuple(names) + states = tuple(names) - tokenname = "_".join(parts[i:]) - return (states,tokenname) + tokenname = '_'.join(parts[i:]) + return (states, tokenname) # ----------------------------------------------------------------------------- @@ -542,19 +410,15 @@ def _statetoken(s,names): # user's input file. # ----------------------------------------------------------------------------- class LexerReflect(object): - def __init__(self,ldict,log=None,reflags=0): + def __init__(self, ldict, log=None, reflags=0): self.ldict = ldict self.error_func = None self.tokens = [] self.reflags = reflags - self.stateinfo = { 'INITIAL' : 'inclusive'} - self.files = {} - self.error = 0 - - if log is None: - self.log = PlyLogger(sys.stderr) - else: - self.log = log + self.stateinfo = {'INITIAL': 'inclusive'} + self.modules = set() + self.error = False + self.log = PlyLogger(sys.stderr) if log is None else log # Get all of the basic information def get_all(self): @@ -562,7 +426,7 @@ class LexerReflect(object): self.get_literals() self.get_states() self.get_rules() - + # Validate all of the information def validate_all(self): self.validate_tokens() @@ -572,20 +436,20 @@ class LexerReflect(object): # Get the tokens map def get_tokens(self): - tokens = self.ldict.get("tokens",None) + tokens = self.ldict.get('tokens', None) if not tokens: - self.log.error("No token list is defined") - self.error = 1 + self.log.error('No token list is defined') + self.error = True return - if not isinstance(tokens,(list, tuple)): - self.log.error("tokens must be a list or tuple") - self.error = 1 + if not isinstance(tokens, (list, tuple)): + self.log.error('tokens must be a list or tuple') + self.error = True return - + if not tokens: - self.log.error("tokens is empty") - self.error = 1 + self.log.error('tokens is empty') + self.error = True return self.tokens = tokens @@ -595,280 +459,270 @@ class LexerReflect(object): terminals = {} for n in self.tokens: if not _is_identifier.match(n): - self.log.error("Bad token name '%s'",n) - self.error = 1 + self.log.error(f"Bad token name {n!r}") + self.error = True if n in terminals: - self.log.warning("Token '%s' multiply defined", n) + self.log.warning(f"Token {n!r} multiply defined") terminals[n] = 1 # Get the literals specifier def get_literals(self): - self.literals = self.ldict.get("literals","") + self.literals = self.ldict.get('literals', '') + if not self.literals: + self.literals = '' # Validate literals def validate_literals(self): try: for c in self.literals: - if not isinstance(c,StringTypes) or len(c) > 1: - self.log.error("Invalid literal %s. Must be a single character", repr(c)) - self.error = 1 - continue + if not isinstance(c, StringTypes) or len(c) > 1: + self.log.error(f'Invalid literal {c!r}. Must be a single character') + self.error = True except TypeError: - self.log.error("Invalid literals specification. literals must be a sequence of characters") - self.error = 1 + self.log.error('Invalid literals specification. literals must be a sequence of characters') + self.error = True def get_states(self): - self.states = self.ldict.get("states",None) + self.states = self.ldict.get('states', None) # Build statemap if self.states: - if not isinstance(self.states,(tuple,list)): - self.log.error("states must be defined as a tuple or list") - self.error = 1 - else: - for s in self.states: - if not isinstance(s,tuple) or len(s) != 2: - self.log.error("Invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')",repr(s)) - self.error = 1 - continue - name, statetype = s - if not isinstance(name,StringTypes): - self.log.error("State name %s must be a string", repr(name)) - self.error = 1 - continue - if not (statetype == 'inclusive' or statetype == 'exclusive'): - self.log.error("State type for state %s must be 'inclusive' or 'exclusive'",name) - self.error = 1 - continue - if name in self.stateinfo: - self.log.error("State '%s' already defined",name) - self.error = 1 - continue - self.stateinfo[name] = statetype + if not isinstance(self.states, (tuple, list)): + self.log.error('states must be defined as a tuple or list') + self.error = True + else: + for s in self.states: + if not isinstance(s, tuple) or len(s) != 2: + self.log.error("Invalid state specifier %r. Must be a tuple (statename,'exclusive|inclusive')", s) + self.error = True + continue + name, statetype = s + if not isinstance(name, StringTypes): + self.log.error('State name %r must be a string', name) + self.error = True + continue + if not (statetype == 'inclusive' or statetype == 'exclusive'): + self.log.error("State type for state %r must be 'inclusive' or 'exclusive'", name) + self.error = True + continue + if name in self.stateinfo: + self.log.error("State %r already defined", name) + self.error = True + continue + self.stateinfo[name] = statetype # Get all of the symbols with a t_ prefix and sort them into various # categories (functions, strings, error functions, and ignore characters) def get_rules(self): - tsymbols = [f for f in self.ldict if f[:2] == 't_' ] + tsymbols = [f for f in self.ldict if f[:2] == 't_'] # Now build up a list of functions and a list of strings - - self.toknames = { } # Mapping of symbols to token names - self.funcsym = { } # Symbols defined as functions - self.strsym = { } # Symbols defined as strings - self.ignore = { } # Ignore strings by state - self.errorf = { } # Error functions by state + self.toknames = {} # Mapping of symbols to token names + self.funcsym = {} # Symbols defined as functions + self.strsym = {} # Symbols defined as strings + self.ignore = {} # Ignore strings by state + self.errorf = {} # Error functions by state + self.eoff = {} # EOF functions by state for s in self.stateinfo: - self.funcsym[s] = [] - self.strsym[s] = [] + self.funcsym[s] = [] + self.strsym[s] = [] if len(tsymbols) == 0: - self.log.error("No rules of the form t_rulename are defined") - self.error = 1 + self.log.error('No rules of the form t_rulename are defined') + self.error = True return for f in tsymbols: t = self.ldict[f] - states, tokname = _statetoken(f,self.stateinfo) + states, tokname = _statetoken(f, self.stateinfo) self.toknames[f] = tokname - if hasattr(t,"__call__"): + if hasattr(t, '__call__'): if tokname == 'error': for s in states: self.errorf[s] = t + elif tokname == 'eof': + for s in states: + self.eoff[s] = t elif tokname == 'ignore': - line = func_code(t).co_firstlineno - file = func_code(t).co_filename - self.log.error("%s:%d: Rule '%s' must be defined as a string",file,line,t.__name__) - self.error = 1 + line = t.__code__.co_firstlineno + file = t.__code__.co_filename + self.log.error("%s:%d: Rule %r must be defined as a string", file, line, t.__name__) + self.error = True else: - for s in states: - self.funcsym[s].append((f,t)) + for s in states: + self.funcsym[s].append((f, t)) elif isinstance(t, StringTypes): if tokname == 'ignore': for s in states: self.ignore[s] = t - if "\\" in t: - self.log.warning("%s contains a literal backslash '\\'",f) + if '\\' in t: + self.log.warning("%s contains a literal backslash '\\'", f) elif tokname == 'error': - self.log.error("Rule '%s' must be defined as a function", f) - self.error = 1 + self.log.error("Rule %r must be defined as a function", f) + self.error = True else: - for s in states: - self.strsym[s].append((f,t)) + for s in states: + self.strsym[s].append((f, t)) else: - self.log.error("%s not defined as a function or string", f) - self.error = 1 + self.log.error('%s not defined as a function or string', f) + self.error = True # Sort the functions by line number for f in self.funcsym.values(): - if sys.version_info[0] < 3: - f.sort(lambda x,y: cmp(func_code(x[1]).co_firstlineno,func_code(y[1]).co_firstlineno)) - else: - # Python 3.0 - f.sort(key=lambda x: func_code(x[1]).co_firstlineno) + f.sort(key=lambda x: x[1].__code__.co_firstlineno) # Sort the strings by regular expression length for s in self.strsym.values(): - if sys.version_info[0] < 3: - s.sort(lambda x,y: (len(x[1]) < len(y[1])) - (len(x[1]) > len(y[1]))) - else: - # Python 3.0 - s.sort(key=lambda x: len(x[1]),reverse=True) + s.sort(key=lambda x: len(x[1]), reverse=True) - # Validate all of the t_rules collected + # Validate all of the t_rules collected def validate_rules(self): for state in self.stateinfo: # Validate all rules defined by functions - - for fname, f in self.funcsym[state]: - line = func_code(f).co_firstlineno - file = func_code(f).co_filename - self.files[file] = 1 + line = f.__code__.co_firstlineno + file = f.__code__.co_filename + module = inspect.getmodule(f) + self.modules.add(module) tokname = self.toknames[fname] if isinstance(f, types.MethodType): reqargs = 2 else: reqargs = 1 - nargs = func_code(f).co_argcount + nargs = f.__code__.co_argcount if nargs > reqargs: - self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__) - self.error = 1 + self.log.error("%s:%d: Rule %r has too many arguments", file, line, f.__name__) + self.error = True continue if nargs < reqargs: - self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__) - self.error = 1 + self.log.error("%s:%d: Rule %r requires an argument", file, line, f.__name__) + self.error = True continue - if not f.__doc__: - self.log.error("%s:%d: No regular expression defined for rule '%s'",file,line,f.__name__) - self.error = 1 + if not _get_regex(f): + self.log.error("%s:%d: No regular expression defined for rule %r", file, line, f.__name__) + self.error = True continue try: - c = re.compile("(?P<%s>%s)" % (fname,f.__doc__), re.VERBOSE | self.reflags) - if c.match(""): - self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file,line,f.__name__) - self.error = 1 - except re.error: - _etype, e, _etrace = sys.exc_info() - self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file,line,f.__name__,e) - if '#' in f.__doc__: - self.log.error("%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'",file,line, f.__name__) - self.error = 1 + c = re.compile('(?P<%s>%s)' % (fname, _get_regex(f)), self.reflags) + if c.match(''): + self.log.error("%s:%d: Regular expression for rule %r matches empty string", file, line, f.__name__) + self.error = True + except re.error as e: + self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file, line, f.__name__, e) + if '#' in _get_regex(f): + self.log.error("%s:%d. Make sure '#' in rule %r is escaped with '\\#'", file, line, f.__name__) + self.error = True # Validate all rules defined by strings - for name,r in self.strsym[state]: + for name, r in self.strsym[state]: tokname = self.toknames[name] if tokname == 'error': - self.log.error("Rule '%s' must be defined as a function", name) - self.error = 1 + self.log.error("Rule %r must be defined as a function", name) + self.error = True continue - if not tokname in self.tokens and tokname.find("ignore_") < 0: - self.log.error("Rule '%s' defined for an unspecified token %s",name,tokname) - self.error = 1 + if tokname not in self.tokens and tokname.find('ignore_') < 0: + self.log.error("Rule %r defined for an unspecified token %s", name, tokname) + self.error = True continue try: - c = re.compile("(?P<%s>%s)" % (name,r),re.VERBOSE | self.reflags) - if (c.match("")): - self.log.error("Regular expression for rule '%s' matches empty string",name) - self.error = 1 - except re.error: - _etype, e, _etrace = sys.exc_info() - self.log.error("Invalid regular expression for rule '%s'. %s",name,e) + c = re.compile('(?P<%s>%s)' % (name, r), self.reflags) + if (c.match('')): + self.log.error("Regular expression for rule %r matches empty string", name) + self.error = True + except re.error as e: + self.log.error("Invalid regular expression for rule %r. %s", name, e) if '#' in r: - self.log.error("Make sure '#' in rule '%s' is escaped with '\\#'",name) - self.error = 1 + self.log.error("Make sure '#' in rule %r is escaped with '\\#'", name) + self.error = True if not self.funcsym[state] and not self.strsym[state]: - self.log.error("No rules defined for state '%s'",state) - self.error = 1 + self.log.error("No rules defined for state %r", state) + self.error = True # Validate the error function - efunc = self.errorf.get(state,None) + efunc = self.errorf.get(state, None) if efunc: f = efunc - line = func_code(f).co_firstlineno - file = func_code(f).co_filename - self.files[file] = 1 + line = f.__code__.co_firstlineno + file = f.__code__.co_filename + module = inspect.getmodule(f) + self.modules.add(module) if isinstance(f, types.MethodType): reqargs = 2 else: reqargs = 1 - nargs = func_code(f).co_argcount + nargs = f.__code__.co_argcount if nargs > reqargs: - self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__) - self.error = 1 + self.log.error("%s:%d: Rule %r has too many arguments", file, line, f.__name__) + self.error = True if nargs < reqargs: - self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__) - self.error = 1 - - for f in self.files: - self.validate_file(f) + self.log.error("%s:%d: Rule %r requires an argument", file, line, f.__name__) + self.error = True + for module in self.modules: + self.validate_module(module) # ----------------------------------------------------------------------------- - # validate_file() + # validate_module() # # This checks to see if there are duplicated t_rulename() functions or strings # in the parser input file. This is done using a simple regular expression - # match on each line in the given file. + # match on each line in the source code of the given module. # ----------------------------------------------------------------------------- - def validate_file(self,filename): - import os.path - base,ext = os.path.splitext(filename) - if ext != '.py': return # No idea what the file is. Return OK - + def validate_module(self, module): try: - f = open(filename) - lines = f.readlines() - f.close() + lines, linen = inspect.getsourcelines(module) except IOError: - return # Couldn't find the file. Don't worry about it + return fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(') sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=') - counthash = { } - linen = 1 - for l in lines: - m = fre.match(l) + counthash = {} + linen += 1 + for line in lines: + m = fre.match(line) if not m: - m = sre.match(l) + m = sre.match(line) if m: name = m.group(1) prev = counthash.get(name) if not prev: counthash[name] = linen else: - self.log.error("%s:%d: Rule %s redefined. Previously defined on line %d",filename,linen,name,prev) - self.error = 1 + filename = inspect.getsourcefile(module) + self.log.error('%s:%d: Rule %s redefined. Previously defined on line %d', filename, linen, name, prev) + self.error = True linen += 1 - + # ----------------------------------------------------------------------------- # lex(module) # # Build all of the regular expression rules from definitions in the supplied module # ----------------------------------------------------------------------------- -def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,nowarn=0,outputdir="", debuglog=None, errorlog=None): +def lex(*, module=None, object=None, debug=False, + reflags=int(re.VERBOSE), debuglog=None, errorlog=None): + global lexer + ldict = None - stateinfo = { 'INITIAL' : 'inclusive'} + stateinfo = {'INITIAL': 'inclusive'} lexobj = Lexer() - lexobj.lexoptimize = optimize - global token,input + global token, input if errorlog is None: errorlog = PlyLogger(sys.stderr) @@ -878,131 +732,124 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,now debuglog = PlyLogger(sys.stderr) # Get the module dictionary used for the lexer - if object: module = object + if object: + module = object + # Get the module dictionary used for the parser if module: - _items = [(k,getattr(module,k)) for k in dir(module)] + _items = [(k, getattr(module, k)) for k in dir(module)] ldict = dict(_items) + # If no __file__ attribute is available, try to obtain it from the __module__ instead + if '__file__' not in ldict: + ldict['__file__'] = sys.modules[ldict['__module__']].__file__ else: ldict = get_caller_module_dict(2) # Collect parser information from the dictionary - linfo = LexerReflect(ldict,log=errorlog,reflags=reflags) + linfo = LexerReflect(ldict, log=errorlog, reflags=reflags) linfo.get_all() - if not optimize: - if linfo.validate_all(): - raise SyntaxError("Can't build lexer") - - if optimize and lextab: - try: - lexobj.readtab(lextab,ldict) - token = lexobj.token - input = lexobj.input - lexer = lexobj - return lexobj - - except ImportError: - pass + if linfo.validate_all(): + raise SyntaxError("Can't build lexer") # Dump some basic debugging information if debug: - debuglog.info("lex: tokens = %r", linfo.tokens) - debuglog.info("lex: literals = %r", linfo.literals) - debuglog.info("lex: states = %r", linfo.stateinfo) + debuglog.info('lex: tokens = %r', linfo.tokens) + debuglog.info('lex: literals = %r', linfo.literals) + debuglog.info('lex: states = %r', linfo.stateinfo) # Build a dictionary of valid token names - lexobj.lextokens = { } + lexobj.lextokens = set() for n in linfo.tokens: - lexobj.lextokens[n] = 1 + lexobj.lextokens.add(n) # Get literals specification - if isinstance(linfo.literals,(list,tuple)): + if isinstance(linfo.literals, (list, tuple)): lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals) else: lexobj.lexliterals = linfo.literals + lexobj.lextokens_all = lexobj.lextokens | set(lexobj.lexliterals) + # Get the stateinfo dictionary stateinfo = linfo.stateinfo - regexs = { } + regexs = {} # Build the master regular expressions for state in stateinfo: regex_list = [] # Add rules defined by functions first for fname, f in linfo.funcsym[state]: - line = func_code(f).co_firstlineno - file = func_code(f).co_filename - regex_list.append("(?P<%s>%s)" % (fname,f.__doc__)) + regex_list.append('(?P<%s>%s)' % (fname, _get_regex(f))) if debug: - debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",fname,f.__doc__, state) + debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", fname, _get_regex(f), state) # Now add all of the simple rules - for name,r in linfo.strsym[state]: - regex_list.append("(?P<%s>%s)" % (name,r)) + for name, r in linfo.strsym[state]: + regex_list.append('(?P<%s>%s)' % (name, r)) if debug: - debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",name,r, state) + debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", name, r, state) regexs[state] = regex_list # Build the master regular expressions if debug: - debuglog.info("lex: ==== MASTER REGEXS FOLLOW ====") + debuglog.info('lex: ==== MASTER REGEXS FOLLOW ====') for state in regexs: - lexre, re_text, re_names = _form_master_re(regexs[state],reflags,ldict,linfo.toknames) + lexre, re_text, re_names = _form_master_re(regexs[state], reflags, ldict, linfo.toknames) lexobj.lexstatere[state] = lexre lexobj.lexstateretext[state] = re_text lexobj.lexstaterenames[state] = re_names if debug: - for i in range(len(re_text)): - debuglog.info("lex: state '%s' : regex[%d] = '%s'",state, i, re_text[i]) + for i, text in enumerate(re_text): + debuglog.info("lex: state '%s' : regex[%d] = '%s'", state, i, text) # For inclusive states, we need to add the regular expressions from the INITIAL state - for state,stype in stateinfo.items(): - if state != "INITIAL" and stype == 'inclusive': - lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL']) - lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL']) - lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL']) + for state, stype in stateinfo.items(): + if state != 'INITIAL' and stype == 'inclusive': + lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL']) + lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL']) + lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL']) lexobj.lexstateinfo = stateinfo - lexobj.lexre = lexobj.lexstatere["INITIAL"] - lexobj.lexretext = lexobj.lexstateretext["INITIAL"] + lexobj.lexre = lexobj.lexstatere['INITIAL'] + lexobj.lexretext = lexobj.lexstateretext['INITIAL'] lexobj.lexreflags = reflags # Set up ignore variables lexobj.lexstateignore = linfo.ignore - lexobj.lexignore = lexobj.lexstateignore.get("INITIAL","") + lexobj.lexignore = lexobj.lexstateignore.get('INITIAL', '') # Set up error functions lexobj.lexstateerrorf = linfo.errorf - lexobj.lexerrorf = linfo.errorf.get("INITIAL",None) + lexobj.lexerrorf = linfo.errorf.get('INITIAL', None) if not lexobj.lexerrorf: - errorlog.warning("No t_error rule is defined") + errorlog.warning('No t_error rule is defined') + + # Set up eof functions + lexobj.lexstateeoff = linfo.eoff + lexobj.lexeoff = linfo.eoff.get('INITIAL', None) # Check state information for ignore and error rules - for s,stype in stateinfo.items(): + for s, stype in stateinfo.items(): if stype == 'exclusive': - if not s in linfo.errorf: - errorlog.warning("No error rule is defined for exclusive state '%s'", s) - if not s in linfo.ignore and lexobj.lexignore: - errorlog.warning("No ignore rule is defined for exclusive state '%s'", s) + if s not in linfo.errorf: + errorlog.warning("No error rule is defined for exclusive state %r", s) + if s not in linfo.ignore and lexobj.lexignore: + errorlog.warning("No ignore rule is defined for exclusive state %r", s) elif stype == 'inclusive': - if not s in linfo.errorf: - linfo.errorf[s] = linfo.errorf.get("INITIAL",None) - if not s in linfo.ignore: - linfo.ignore[s] = linfo.ignore.get("INITIAL","") + if s not in linfo.errorf: + linfo.errorf[s] = linfo.errorf.get('INITIAL', None) + if s not in linfo.ignore: + linfo.ignore[s] = linfo.ignore.get('INITIAL', '') # Create global versions of the token() and input() functions token = lexobj.token input = lexobj.input lexer = lexobj - # If in optimize mode, we write the lextab - if lextab and optimize: - lexobj.writetab(lextab,outputdir) - return lexobj # ----------------------------------------------------------------------------- @@ -1011,15 +858,14 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,now # This runs the lexer as a main program # ----------------------------------------------------------------------------- -def runmain(lexer=None,data=None): +def runmain(lexer=None, data=None): if not data: try: filename = sys.argv[1] - f = open(filename) - data = f.read() - f.close() + with open(filename) as f: + data = f.read() except IndexError: - sys.stdout.write("Reading from standard input (type EOF to end):\n") + sys.stdout.write('Reading from standard input (type EOF to end):\n') data = sys.stdin.read() if lexer: @@ -1032,10 +878,11 @@ def runmain(lexer=None,data=None): else: _token = token - while 1: + while True: tok = _token() - if not tok: break - sys.stdout.write("(%s,%r,%d,%d)\n" % (tok.type, tok.value, tok.lineno,tok.lexpos)) + if not tok: + break + sys.stdout.write(f'({tok.type},{tok.value!r},{tok.lineno},{tok.lexpos})\n') # ----------------------------------------------------------------------------- # @TOKEN(regex) @@ -1045,14 +892,10 @@ def runmain(lexer=None,data=None): # ----------------------------------------------------------------------------- def TOKEN(r): - def set_doc(f): - if hasattr(r,"__call__"): - f.__doc__ = r.__doc__ + def set_regex(f): + if hasattr(r, '__call__'): + f.regex = _get_regex(r) else: - f.__doc__ = r + f.regex = r return f - return set_doc - -# Alternative spelling of the TOKEN decorator -Token = TOKEN - + return set_regex diff --git a/components/script/dom/bindings/codegen/ply/ply/yacc.py b/components/script/dom/bindings/codegen/ply/ply/yacc.py index e9f5c657551..bce63c18241 100644 --- a/components/script/dom/bindings/codegen/ply/ply/yacc.py +++ b/components/script/dom/bindings/codegen/ply/ply/yacc.py @@ -1,22 +1,24 @@ # ----------------------------------------------------------------------------- # ply: yacc.py # -# Copyright (C) 2001-2009, +# Copyright (C) 2001-2020 # David M. Beazley (Dabeaz LLC) # All rights reserved. # +# Latest version: https://github.com/dabeaz/ply +# # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: -# +# # * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# * Neither the name of the David Beazley or Dabeaz LLC may be used to +# and/or other materials provided with the distribution. +# * Neither the name of David Beazley or Dabeaz LLC may be used to # endorse or promote products derived from this software without -# specific prior written permission. +# specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT @@ -32,7 +34,7 @@ # ----------------------------------------------------------------------------- # # This implements an LR parser that is constructed from grammar rules defined -# as Python functions. The grammer is specified by supplying the BNF inside +# as Python functions. The grammar is specified by supplying the BNF inside # Python documentation strings. The inspiration for this technique was borrowed # from John Aycock's Spark parsing system. PLY might be viewed as cross between # Spark and the GNU bison utility. @@ -59,8 +61,10 @@ # own risk! # ---------------------------------------------------------------------------- -__version__ = "3.3" -__tabversion__ = "3.2" # Table version +import re +import types +import sys +import inspect #----------------------------------------------------------------------------- # === User configurable parameters === @@ -68,95 +72,69 @@ __tabversion__ = "3.2" # Table version # Change these to modify the default behavior of yacc (if you wish) #----------------------------------------------------------------------------- -yaccdebug = 1 # Debugging mode. If set, yacc generates a - # a 'parser.out' file in the current directory +yaccdebug = False # Debugging mode. If set, yacc generates a +# a 'parser.out' file in the current directory debug_file = 'parser.out' # Default name of the debugging file -tab_module = 'parsetab' # Default name of the table module -default_lr = 'LALR' # Default LR table generation method - error_count = 3 # Number of symbols that must be shifted to leave recovery mode - -yaccdevel = 0 # Set to True if developing yacc. This turns off optimized - # implementations of certain functions. - resultlimit = 40 # Size limit of results when running in debug mode. -pickle_protocol = 0 # Protocol to use when writing pickle files - -import re, types, sys, os.path - -# Compatibility function for python 2.6/3.0 -if sys.version_info[0] < 3: - def func_code(f): - return f.func_code -else: - def func_code(f): - return f.__code__ - -# Compatibility -try: - MAXINT = sys.maxint -except AttributeError: - MAXINT = sys.maxsize +MAXINT = sys.maxsize -# Python 2.x/3.0 compatibility. -def load_ply_lex(): - if sys.version_info[0] < 3: - import lex - else: - import ply.lex as lex - return lex - -# This object is a stand-in for a logging object created by the +# This object is a stand-in for a logging object created by the # logging module. PLY will use this by default to create things # such as the parser.out file. If a user wants more detailed # information, they can create their own logging object and pass # it into PLY. class PlyLogger(object): - def __init__(self,f): + def __init__(self, f): self.f = f - def debug(self,msg,*args,**kwargs): - self.f.write((msg % args) + "\n") - info = debug - def warning(self,msg,*args,**kwargs): - self.f.write("WARNING: "+ (msg % args) + "\n") + def debug(self, msg, *args, **kwargs): + self.f.write((msg % args) + '\n') + + info = debug - def error(self,msg,*args,**kwargs): - self.f.write("ERROR: " + (msg % args) + "\n") + def warning(self, msg, *args, **kwargs): + self.f.write('WARNING: ' + (msg % args) + '\n') + + def error(self, msg, *args, **kwargs): + self.f.write('ERROR: ' + (msg % args) + '\n') critical = debug # Null logger is used when no output is generated. Does nothing. class NullLogger(object): - def __getattribute__(self,name): + def __getattribute__(self, name): return self - def __call__(self,*args,**kwargs): + + def __call__(self, *args, **kwargs): return self - + # Exception raised for yacc-related errors -class YaccError(Exception): pass +class YaccError(Exception): + pass # Format the result message that the parser produces when running in debug mode. def format_result(r): repr_str = repr(r) - if '\n' in repr_str: repr_str = repr(repr_str) + if '\n' in repr_str: + repr_str = repr(repr_str) if len(repr_str) > resultlimit: - repr_str = repr_str[:resultlimit]+" ..." - result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str) + repr_str = repr_str[:resultlimit] + ' ...' + result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str) return result - # Format stack entries when the parser is running in debug mode def format_stack_entry(r): repr_str = repr(r) - if '\n' in repr_str: repr_str = repr(repr_str) + if '\n' in repr_str: + repr_str = repr(repr_str) if len(repr_str) < 16: return repr_str else: - return "<%s @ 0x%x>" % (type(r).__name__,id(r)) + return '<%s @ 0x%x>' % (type(r).__name__, id(r)) #----------------------------------------------------------------------------- # === LR Parsing Engine === @@ -176,8 +154,11 @@ def format_stack_entry(r): # .endlexpos = Ending lex position (optional, set automatically) class YaccSymbol: - def __str__(self): return self.type - def __repr__(self): return str(self) + def __str__(self): + return self.type + + def __repr__(self): + return str(self) # This class is a wrapper around the objects actually passed to each # grammar rule. Index lookup and assignment actually assign the @@ -189,46 +170,53 @@ class YaccSymbol: # representing the range of positional information for a symbol. class YaccProduction: - def __init__(self,s,stack=None): + def __init__(self, s, stack=None): self.slice = s self.stack = stack self.lexer = None - self.parser= None - def __getitem__(self,n): - if n >= 0: return self.slice[n].value - else: return self.stack[n].value + self.parser = None + + def __getitem__(self, n): + if isinstance(n, slice): + return [s.value for s in self.slice[n]] + elif n >= 0: + return self.slice[n].value + else: + return self.stack[n].value - def __setitem__(self,n,v): + def __setitem__(self, n, v): self.slice[n].value = v - def __getslice__(self,i,j): + def __getslice__(self, i, j): return [s.value for s in self.slice[i:j]] def __len__(self): return len(self.slice) - def lineno(self,n): - return getattr(self.slice[n],"lineno",0) + def lineno(self, n): + return getattr(self.slice[n], 'lineno', 0) - def set_lineno(self,n,lineno): + def set_lineno(self, n, lineno): self.slice[n].lineno = lineno - def linespan(self,n): - startline = getattr(self.slice[n],"lineno",0) - endline = getattr(self.slice[n],"endlineno",startline) - return startline,endline + def linespan(self, n): + startline = getattr(self.slice[n], 'lineno', 0) + endline = getattr(self.slice[n], 'endlineno', startline) + return startline, endline - def lexpos(self,n): - return getattr(self.slice[n],"lexpos",0) + def lexpos(self, n): + return getattr(self.slice[n], 'lexpos', 0) - def lexspan(self,n): - startpos = getattr(self.slice[n],"lexpos",0) - endpos = getattr(self.slice[n],"endlexpos",startpos) - return startpos,endpos + def set_lexpos(self, n, lexpos): + self.slice[n].lexpos = lexpos - def error(self): - raise SyntaxError + def lexspan(self, n): + startpos = getattr(self.slice[n], 'lexpos', 0) + endpos = getattr(self.slice[n], 'endlexpos', startpos) + return startpos, endpos + def error(self): + raise SyntaxError # ----------------------------------------------------------------------------- # == LRParser == @@ -237,14 +225,16 @@ class YaccProduction: # ----------------------------------------------------------------------------- class LRParser: - def __init__(self,lrtab,errorf): + def __init__(self, lrtab, errorf): self.productions = lrtab.lr_productions - self.action = lrtab.lr_action - self.goto = lrtab.lr_goto - self.errorfunc = errorf + self.action = lrtab.lr_action + self.goto = lrtab.lr_goto + self.errorfunc = errorf + self.set_defaulted_states() + self.errorok = True def errok(self): - self.errorok = 1 + self.errorok = True def restart(self): del self.statestack[:] @@ -254,47 +244,52 @@ class LRParser: self.symstack.append(sym) self.statestack.append(0) - def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): - if debug or yaccdevel: - if isinstance(debug,int): - debug = PlyLogger(sys.stderr) - return self.parsedebug(input,lexer,debug,tracking,tokenfunc) - elif tracking: - return self.parseopt(input,lexer,debug,tracking,tokenfunc) - else: - return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc) - - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # parsedebug(). + # Defaulted state support. + # This method identifies parser states where there is only one possible reduction action. + # For such states, the parser can make a choose to make a rule reduction without consuming + # the next look-ahead token. This delayed invocation of the tokenizer can be useful in + # certain kinds of advanced parsing situations where the lexer and parser interact with + # each other or change states (i.e., manipulation of scope, lexer states, etc.). # - # This is the debugging enabled version of parse(). All changes made to the - # parsing engine should be made here. For the non-debugging version, - # copy this code to a method parseopt() and delete all of the sections - # enclosed in: + # See: http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions + def set_defaulted_states(self): + self.defaulted_states = {} + for state, actions in self.action.items(): + rules = list(actions.values()) + if len(rules) == 1 and rules[0] < 0: + self.defaulted_states[state] = rules[0] + + def disable_defaulted_states(self): + self.defaulted_states = {} + + # parse(). # - # #--! DEBUG - # statements - # #--! DEBUG - # - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None): - lookahead = None # Current lookahead symbol - lookaheadstack = [ ] # Stack of lookahead symbols - actions = self.action # Local reference to action table (to avoid lookup on self.) - goto = self.goto # Local reference to goto table (to avoid lookup on self.) - prod = self.productions # Local reference to production list (to avoid lookup on self.) - pslice = YaccProduction(None) # Production object passed to grammar rules - errorcount = 0 # Used during error recovery + # This is the core parsing engine. To operate, it requires a lexer object. + # Two options are provided. The debug flag turns on debugging so that you can + # see the various rule reductions and parsing steps. tracking turns on position + # tracking. In this mode, symbols will record the starting/ending line number and + # character index. + + def parse(self, input=None, lexer=None, debug=False, tracking=False): + # If debugging has been specified as a flag, turn it into a logging object + if isinstance(debug, int) and debug: + debug = PlyLogger(sys.stderr) + + lookahead = None # Current lookahead symbol + lookaheadstack = [] # Stack of lookahead symbols + actions = self.action # Local reference to action table (to avoid lookup on self.) + goto = self.goto # Local reference to goto table (to avoid lookup on self.) + prod = self.productions # Local reference to production list (to avoid lookup on self.) + defaulted_states = self.defaulted_states # Local reference to defaulted states + pslice = YaccProduction(None) # Production object passed to grammar rules + errorcount = 0 # Used during error recovery - # --! DEBUG - debug.info("PLY: PARSE DEBUG START") - # --! DEBUG + if debug: + debug.info('PLY: PARSE DEBUG START') # If no lexer was given, we will try to use the lex module if not lexer: - lex = load_ply_lex() + from . import lex lexer = lex.lexer # Set up the lexer and parser objects on pslice @@ -305,72 +300,67 @@ class LRParser: if input is not None: lexer.input(input) - if tokenfunc is None: - # Tokenize function - get_token = lexer.token - else: - get_token = tokenfunc + # Set the token function + get_token = self.token = lexer.token # Set up the state and symbol stacks - - statestack = [ ] # Stack of parsing states - self.statestack = statestack - symstack = [ ] # Stack of grammar symbols - self.symstack = symstack - - pslice.stack = symstack # Put in the production - errtoken = None # Err token + statestack = self.statestack = [] # Stack of parsing states + symstack = self.symstack = [] # Stack of grammar symbols + pslice.stack = symstack # Put in the production + errtoken = None # Err token # The start state is assumed to be (0,$end) statestack.append(0) sym = YaccSymbol() - sym.type = "$end" + sym.type = '$end' symstack.append(sym) state = 0 - while 1: + while True: # Get the next symbol on the input. If a lookahead symbol # is already set, we just use that. Otherwise, we'll pull # the next token off of the lookaheadstack or from the lexer - # --! DEBUG - debug.debug('') - debug.debug('State : %s', state) - # --! DEBUG + if debug: + debug.debug('State : %s', state) - if not lookahead: - if not lookaheadstack: - lookahead = get_token() # Get the next token - else: - lookahead = lookaheadstack.pop() + if state not in defaulted_states: if not lookahead: - lookahead = YaccSymbol() - lookahead.type = "$end" - - # --! DEBUG - debug.debug('Stack : %s', - ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) - # --! DEBUG + if not lookaheadstack: + lookahead = get_token() # Get the next token + else: + lookahead = lookaheadstack.pop() + if not lookahead: + lookahead = YaccSymbol() + lookahead.type = '$end' + + # Check the action table + ltype = lookahead.type + t = actions[state].get(ltype) + else: + t = defaulted_states[state] + if debug: + debug.debug('Defaulted state %s: Reduce using %d', state, -t) - # Check the action table - ltype = lookahead.type - t = actions[state].get(ltype) + if debug: + debug.debug('Stack : %s', + ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) if t is not None: if t > 0: # shift a symbol on the stack statestack.append(t) state = t - - # --! DEBUG - debug.debug("Action : Shift and goto state %s", t) - # --! DEBUG + + if debug: + debug.debug('Action : Shift and goto state %s', t) symstack.append(lookahead) lookahead = None # Decrease error count on successful shift - if errorcount: errorcount -=1 + if errorcount: + errorcount -= 1 continue if t < 0: @@ -384,358 +374,69 @@ class LRParser: sym.type = pname # Production name sym.value = None - # --! DEBUG - if plen: - debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+"]",-t) - else: - debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [],-t) - - # --! DEBUG + if debug: + if plen: + debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, + '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']', + goto[statestack[-1-plen]][pname]) + else: + debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [], + goto[statestack[-1]][pname]) if plen: targ = symstack[-plen-1:] targ[0] = sym - # --! TRACKING if tracking: - t1 = targ[1] - sym.lineno = t1.lineno - sym.lexpos = t1.lexpos - t1 = targ[-1] - sym.endlineno = getattr(t1,"endlineno",t1.lineno) - sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) - - # --! TRACKING + t1 = targ[1] + sym.lineno = t1.lineno + sym.lexpos = t1.lexpos + t1 = targ[-1] + sym.endlineno = getattr(t1, 'endlineno', t1.lineno) + sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated + # The code enclosed in this section is duplicated # below as a performance optimization. Make sure # changes get made in both locations. pslice.slice = targ - - try: - # Call the grammar rule with our special slice object - del symstack[-plen:] - del statestack[-plen:] - p.callable(pslice) - # --! DEBUG - debug.info("Result : %s", format_result(pslice[0])) - # --! DEBUG - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() - state = statestack[-1] - sym.type = 'error' - lookahead = sym - errorcount = error_count - self.errorok = 0 - continue - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - else: - - # --! TRACKING - if tracking: - sym.lineno = lexer.lineno - sym.lexpos = lexer.lexpos - # --! TRACKING - - targ = [ sym ] - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated - # above as a performance optimization. Make sure - # changes get made in both locations. - - pslice.slice = targ try: # Call the grammar rule with our special slice object + del symstack[-plen:] + self.state = state p.callable(pslice) - # --! DEBUG - debug.info("Result : %s", format_result(pslice[0])) - # --! DEBUG + del statestack[-plen:] + if debug: + debug.info('Result : %s', format_result(pslice[0])) symstack.append(sym) state = goto[statestack[-1]][pname] statestack.append(state) except SyntaxError: # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() + lookaheadstack.append(lookahead) # Save the current lookahead token + symstack.extend(targ[1:-1]) # Put the production slice back on the stack + statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] sym.type = 'error' + sym.value = 'error' lookahead = sym errorcount = error_count - self.errorok = 0 - continue - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + self.errorok = False - if t == 0: - n = symstack[-1] - result = getattr(n,"value",None) - # --! DEBUG - debug.info("Done : Returning %s", format_result(result)) - debug.info("PLY: PARSE DEBUG END") - # --! DEBUG - return result - - if t == None: - - # --! DEBUG - debug.error('Error : %s', - ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) - # --! DEBUG - - # We have some kind of parsing error here. To handle - # this, we are going to push the current token onto - # the tokenstack and replace it with an 'error' token. - # If there are any synchronization rules, they may - # catch it. - # - # In addition to pushing the error token, we call call - # the user defined p_error() function if this is the - # first syntax error. This function is only called if - # errorcount == 0. - if errorcount == 0 or self.errorok: - errorcount = error_count - self.errorok = 0 - errtoken = lookahead - if errtoken.type == "$end": - errtoken = None # End of file! - if self.errorfunc: - global errok,token,restart - errok = self.errok # Set some special functions available in error recovery - token = get_token - restart = self.restart - if errtoken and not hasattr(errtoken,'lexer'): - errtoken.lexer = lexer - tok = self.errorfunc(errtoken) - del errok, token, restart # Delete special functions - - if self.errorok: - # User must have done some kind of panic - # mode recovery on their own. The - # returned token is the next lookahead - lookahead = tok - errtoken = None - continue - else: - if errtoken: - if hasattr(errtoken,"lineno"): lineno = lookahead.lineno - else: lineno = 0 - if lineno: - sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) - else: - sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) - else: - sys.stderr.write("yacc: Parse error in input. EOF\n") - return - - else: - errorcount = error_count - - # case 1: the statestack only has 1 entry on it. If we're in this state, the - # entire parse has been rolled back and we're completely hosed. The token is - # discarded and we just keep going. - - if len(statestack) <= 1 and lookahead.type != "$end": - lookahead = None - errtoken = None - state = 0 - # Nuke the pushback stack - del lookaheadstack[:] - continue - - # case 2: the statestack has a couple of entries on it, but we're - # at the end of the file. nuke the top entry and generate an error token - - # Start nuking entries on the stack - if lookahead.type == "$end": - # Whoa. We're really hosed here. Bail out - return - - if lookahead.type != 'error': - sym = symstack[-1] - if sym.type == 'error': - # Hmmm. Error is on top of stack, we'll just nuke input - # symbol and continue - lookahead = None continue - t = YaccSymbol() - t.type = 'error' - if hasattr(lookahead,"lineno"): - t.lineno = lookahead.lineno - t.value = lookahead - lookaheadstack.append(lookahead) - lookahead = t - else: - symstack.pop() - statestack.pop() - state = statestack[-1] # Potential bug fix - - continue - - # Call an error function here - raise RuntimeError("yacc: internal parser error!!!\n") - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # parseopt(). - # - # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY. - # Edit the debug version above, then copy any modifications to the method - # below while removing #--! DEBUG sections. - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - - def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): - lookahead = None # Current lookahead symbol - lookaheadstack = [ ] # Stack of lookahead symbols - actions = self.action # Local reference to action table (to avoid lookup on self.) - goto = self.goto # Local reference to goto table (to avoid lookup on self.) - prod = self.productions # Local reference to production list (to avoid lookup on self.) - pslice = YaccProduction(None) # Production object passed to grammar rules - errorcount = 0 # Used during error recovery - - # If no lexer was given, we will try to use the lex module - if not lexer: - lex = load_ply_lex() - lexer = lex.lexer - - # Set up the lexer and parser objects on pslice - pslice.lexer = lexer - pslice.parser = self - - # If input was supplied, pass to lexer - if input is not None: - lexer.input(input) - - if tokenfunc is None: - # Tokenize function - get_token = lexer.token - else: - get_token = tokenfunc - - # Set up the state and symbol stacks - - statestack = [ ] # Stack of parsing states - self.statestack = statestack - symstack = [ ] # Stack of grammar symbols - self.symstack = symstack - - pslice.stack = symstack # Put in the production - errtoken = None # Err token - - # The start state is assumed to be (0,$end) - - statestack.append(0) - sym = YaccSymbol() - sym.type = '$end' - symstack.append(sym) - state = 0 - while 1: - # Get the next symbol on the input. If a lookahead symbol - # is already set, we just use that. Otherwise, we'll pull - # the next token off of the lookaheadstack or from the lexer - - if not lookahead: - if not lookaheadstack: - lookahead = get_token() # Get the next token - else: - lookahead = lookaheadstack.pop() - if not lookahead: - lookahead = YaccSymbol() - lookahead.type = '$end' - - # Check the action table - ltype = lookahead.type - t = actions[state].get(ltype) - - if t is not None: - if t > 0: - # shift a symbol on the stack - statestack.append(t) - state = t - - symstack.append(lookahead) - lookahead = None - - # Decrease error count on successful shift - if errorcount: errorcount -=1 - continue - - if t < 0: - # reduce a symbol on the stack, emit a production - p = prod[-t] - pname = p.name - plen = p.len - - # Get production function - sym = YaccSymbol() - sym.type = pname # Production name - sym.value = None - - if plen: - targ = symstack[-plen-1:] - targ[0] = sym - - # --! TRACKING - if tracking: - t1 = targ[1] - sym.lineno = t1.lineno - sym.lexpos = t1.lexpos - t1 = targ[-1] - sym.endlineno = getattr(t1,"endlineno",t1.lineno) - sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) - - # --! TRACKING - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated - # below as a performance optimization. Make sure - # changes get made in both locations. - pslice.slice = targ - - try: - # Call the grammar rule with our special slice object - del symstack[-plen:] - del statestack[-plen:] - p.callable(pslice) - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() - state = statestack[-1] - sym.type = 'error' - lookahead = sym - errorcount = error_count - self.errorok = 0 - continue - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - else: - # --! TRACKING if tracking: - sym.lineno = lexer.lineno - sym.lexpos = lexer.lexpos - # --! TRACKING + sym.lineno = lexer.lineno + sym.lexpos = lexer.lexpos - targ = [ sym ] + targ = [sym] # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated + # The code enclosed in this section is duplicated # above as a performance optimization. Make sure # changes get made in both locations. @@ -743,283 +444,41 @@ class LRParser: try: # Call the grammar rule with our special slice object + self.state = state p.callable(pslice) + if debug: + debug.info('Result : %s', format_result(pslice[0])) symstack.append(sym) state = goto[statestack[-1]][pname] statestack.append(state) except SyntaxError: # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() + lookaheadstack.append(lookahead) # Save the current lookahead token + statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] sym.type = 'error' + sym.value = 'error' lookahead = sym errorcount = error_count - self.errorok = 0 + self.errorok = False + continue - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! if t == 0: n = symstack[-1] - return getattr(n,"value",None) - - if t == None: - - # We have some kind of parsing error here. To handle - # this, we are going to push the current token onto - # the tokenstack and replace it with an 'error' token. - # If there are any synchronization rules, they may - # catch it. - # - # In addition to pushing the error token, we call call - # the user defined p_error() function if this is the - # first syntax error. This function is only called if - # errorcount == 0. - if errorcount == 0 or self.errorok: - errorcount = error_count - self.errorok = 0 - errtoken = lookahead - if errtoken.type == '$end': - errtoken = None # End of file! - if self.errorfunc: - global errok,token,restart - errok = self.errok # Set some special functions available in error recovery - token = get_token - restart = self.restart - if errtoken and not hasattr(errtoken,'lexer'): - errtoken.lexer = lexer - tok = self.errorfunc(errtoken) - del errok, token, restart # Delete special functions - - if self.errorok: - # User must have done some kind of panic - # mode recovery on their own. The - # returned token is the next lookahead - lookahead = tok - errtoken = None - continue - else: - if errtoken: - if hasattr(errtoken,"lineno"): lineno = lookahead.lineno - else: lineno = 0 - if lineno: - sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) - else: - sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) - else: - sys.stderr.write("yacc: Parse error in input. EOF\n") - return - - else: - errorcount = error_count - - # case 1: the statestack only has 1 entry on it. If we're in this state, the - # entire parse has been rolled back and we're completely hosed. The token is - # discarded and we just keep going. - - if len(statestack) <= 1 and lookahead.type != '$end': - lookahead = None - errtoken = None - state = 0 - # Nuke the pushback stack - del lookaheadstack[:] - continue + result = getattr(n, 'value', None) - # case 2: the statestack has a couple of entries on it, but we're - # at the end of the file. nuke the top entry and generate an error token - - # Start nuking entries on the stack - if lookahead.type == '$end': - # Whoa. We're really hosed here. Bail out - return - - if lookahead.type != 'error': - sym = symstack[-1] - if sym.type == 'error': - # Hmmm. Error is on top of stack, we'll just nuke input - # symbol and continue - lookahead = None - continue - t = YaccSymbol() - t.type = 'error' - if hasattr(lookahead,"lineno"): - t.lineno = lookahead.lineno - t.value = lookahead - lookaheadstack.append(lookahead) - lookahead = t - else: - symstack.pop() - statestack.pop() - state = statestack[-1] # Potential bug fix - - continue - - # Call an error function here - raise RuntimeError("yacc: internal parser error!!!\n") - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # parseopt_notrack(). - # - # Optimized version of parseopt() with line number tracking removed. - # DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove - # code in the #--! TRACKING sections - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): - lookahead = None # Current lookahead symbol - lookaheadstack = [ ] # Stack of lookahead symbols - actions = self.action # Local reference to action table (to avoid lookup on self.) - goto = self.goto # Local reference to goto table (to avoid lookup on self.) - prod = self.productions # Local reference to production list (to avoid lookup on self.) - pslice = YaccProduction(None) # Production object passed to grammar rules - errorcount = 0 # Used during error recovery - - # If no lexer was given, we will try to use the lex module - if not lexer: - lex = load_ply_lex() - lexer = lex.lexer - - # Set up the lexer and parser objects on pslice - pslice.lexer = lexer - pslice.parser = self - - # If input was supplied, pass to lexer - if input is not None: - lexer.input(input) - - if tokenfunc is None: - # Tokenize function - get_token = lexer.token - else: - get_token = tokenfunc - - # Set up the state and symbol stacks - - statestack = [ ] # Stack of parsing states - self.statestack = statestack - symstack = [ ] # Stack of grammar symbols - self.symstack = symstack - - pslice.stack = symstack # Put in the production - errtoken = None # Err token - - # The start state is assumed to be (0,$end) - - statestack.append(0) - sym = YaccSymbol() - sym.type = '$end' - symstack.append(sym) - state = 0 - while 1: - # Get the next symbol on the input. If a lookahead symbol - # is already set, we just use that. Otherwise, we'll pull - # the next token off of the lookaheadstack or from the lexer - - if not lookahead: - if not lookaheadstack: - lookahead = get_token() # Get the next token - else: - lookahead = lookaheadstack.pop() - if not lookahead: - lookahead = YaccSymbol() - lookahead.type = '$end' - - # Check the action table - ltype = lookahead.type - t = actions[state].get(ltype) - - if t is not None: - if t > 0: - # shift a symbol on the stack - statestack.append(t) - state = t - - symstack.append(lookahead) - lookahead = None - - # Decrease error count on successful shift - if errorcount: errorcount -=1 - continue + if debug: + debug.info('Done : Returning %s', format_result(result)) + debug.info('PLY: PARSE DEBUG END') - if t < 0: - # reduce a symbol on the stack, emit a production - p = prod[-t] - pname = p.name - plen = p.len - - # Get production function - sym = YaccSymbol() - sym.type = pname # Production name - sym.value = None - - if plen: - targ = symstack[-plen-1:] - targ[0] = sym - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated - # below as a performance optimization. Make sure - # changes get made in both locations. - - pslice.slice = targ - - try: - # Call the grammar rule with our special slice object - del symstack[-plen:] - del statestack[-plen:] - p.callable(pslice) - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() - state = statestack[-1] - sym.type = 'error' - lookahead = sym - errorcount = error_count - self.errorok = 0 - continue - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - else: - - targ = [ sym ] - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated - # above as a performance optimization. Make sure - # changes get made in both locations. - - pslice.slice = targ - - try: - # Call the grammar rule with our special slice object - p.callable(pslice) - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() - state = statestack[-1] - sym.type = 'error' - lookahead = sym - errorcount = error_count - self.errorok = 0 - continue - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + return result - if t == 0: - n = symstack[-1] - return getattr(n,"value",None) + if t is None: - if t == None: + if debug: + debug.error('Error : %s', + ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) # We have some kind of parsing error here. To handle # this, we are going to push the current token onto @@ -1033,20 +492,15 @@ class LRParser: # errorcount == 0. if errorcount == 0 or self.errorok: errorcount = error_count - self.errorok = 0 + self.errorok = False errtoken = lookahead if errtoken.type == '$end': errtoken = None # End of file! if self.errorfunc: - global errok,token,restart - errok = self.errok # Set some special functions available in error recovery - token = get_token - restart = self.restart - if errtoken and not hasattr(errtoken,'lexer'): + if errtoken and not hasattr(errtoken, 'lexer'): errtoken.lexer = lexer + self.state = state tok = self.errorfunc(errtoken) - del errok, token, restart # Delete special functions - if self.errorok: # User must have done some kind of panic # mode recovery on their own. The @@ -1056,14 +510,16 @@ class LRParser: continue else: if errtoken: - if hasattr(errtoken,"lineno"): lineno = lookahead.lineno - else: lineno = 0 + if hasattr(errtoken, 'lineno'): + lineno = lookahead.lineno + else: + lineno = 0 if lineno: - sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) + sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) else: - sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) + sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) else: - sys.stderr.write("yacc: Parse error in input. EOF\n") + sys.stderr.write('yacc: Parse error in input. EOF\n') return else: @@ -1094,34 +550,43 @@ class LRParser: if sym.type == 'error': # Hmmm. Error is on top of stack, we'll just nuke input # symbol and continue + if tracking: + sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) + sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) lookahead = None continue + + # Create the error symbol for the first time and make it the new lookahead symbol t = YaccSymbol() t.type = 'error' - if hasattr(lookahead,"lineno"): - t.lineno = lookahead.lineno + + if hasattr(lookahead, 'lineno'): + t.lineno = t.endlineno = lookahead.lineno + if hasattr(lookahead, 'lexpos'): + t.lexpos = t.endlexpos = lookahead.lexpos t.value = lookahead lookaheadstack.append(lookahead) lookahead = t else: - symstack.pop() + sym = symstack.pop() + if tracking: + lookahead.lineno = sym.lineno + lookahead.lexpos = sym.lexpos statestack.pop() - state = statestack[-1] # Potential bug fix + state = statestack[-1] continue - # Call an error function here - raise RuntimeError("yacc: internal parser error!!!\n") + # If we'r here, something really bad happened + raise RuntimeError('yacc: internal parser error!!!\n') # ----------------------------------------------------------------------------- # === Grammar Representation === # # The following functions, classes, and variables are used to represent and -# manipulate the rules that make up a grammar. +# manipulate the rules that make up a grammar. # ----------------------------------------------------------------------------- -import re - # regex matching identifiers _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') @@ -1131,7 +596,7 @@ _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') # This class stores the raw information about a single production or grammar rule. # A grammar rule refers to a specification such as this: # -# expr : expr PLUS term +# expr : expr PLUS term # # Here are the basic attributes defined on all productions # @@ -1151,7 +616,7 @@ _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') class Production(object): reduced = 0 - def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line=0): + def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0): self.name = name self.prod = tuple(prod) self.number = number @@ -1162,11 +627,11 @@ class Production(object): self.prec = precedence # Internal settings used during table construction - + self.len = len(self.prod) # Length of the production # Create a list of unique production symbols used in the production - self.usyms = [ ] + self.usyms = [] for s in self.prod: if s not in self.usyms: self.usyms.append(s) @@ -1177,15 +642,15 @@ class Production(object): # Create a string representation if self.prod: - self.str = "%s -> %s" % (self.name," ".join(self.prod)) + self.str = '%s -> %s' % (self.name, ' '.join(self.prod)) else: - self.str = "%s -> <empty>" % self.name + self.str = '%s -> <empty>' % self.name def __str__(self): return self.str def __repr__(self): - return "Production("+str(self)+")" + return 'Production(' + str(self) + ')' def __len__(self): return len(self.prod) @@ -1193,62 +658,37 @@ class Production(object): def __nonzero__(self): return 1 - def __getitem__(self,index): + def __getitem__(self, index): return self.prod[index] - - # Return the nth lr_item from the production (or None if at the end) - def lr_item(self,n): - if n > len(self.prod): return None - p = LRItem(self,n) - # Precompute the list of productions immediately following. Hack. Remove later + # Return the nth lr_item from the production (or None if at the end) + def lr_item(self, n): + if n > len(self.prod): + return None + p = LRItem(self, n) + # Precompute the list of productions immediately following. try: - p.lr_after = Prodnames[p.prod[n+1]] - except (IndexError,KeyError): + p.lr_after = self.Prodnames[p.prod[n+1]] + except (IndexError, KeyError): p.lr_after = [] try: p.lr_before = p.prod[n-1] except IndexError: p.lr_before = None - return p - - # Bind the production function name to a callable - def bind(self,pdict): - if self.func: - self.callable = pdict[self.func] - -# This class serves as a minimal standin for Production objects when -# reading table data from files. It only contains information -# actually used by the LR parsing engine, plus some additional -# debugging information. -class MiniProduction(object): - def __init__(self,str,name,len,func,file,line): - self.name = name - self.len = len - self.func = func - self.callable = None - self.file = file - self.line = line - self.str = str - def __str__(self): - return self.str - def __repr__(self): - return "MiniProduction(%s)" % self.str # Bind the production function name to a callable - def bind(self,pdict): + def bind(self, pdict): if self.func: self.callable = pdict[self.func] - # ----------------------------------------------------------------------------- # class LRItem # # This class represents a specific stage of parsing a production rule. For -# example: +# example: # -# expr : expr . PLUS term +# expr : expr . PLUS term # # In the above, the "." represents the current location of the parse. Here # basic attributes: @@ -1267,26 +707,26 @@ class MiniProduction(object): # ----------------------------------------------------------------------------- class LRItem(object): - def __init__(self,p,n): + def __init__(self, p, n): self.name = p.name self.prod = list(p.prod) self.number = p.number self.lr_index = n - self.lookaheads = { } - self.prod.insert(n,".") + self.lookaheads = {} + self.prod.insert(n, '.') self.prod = tuple(self.prod) self.len = len(self.prod) self.usyms = p.usyms def __str__(self): if self.prod: - s = "%s -> %s" % (self.name," ".join(self.prod)) + s = '%s -> %s' % (self.name, ' '.join(self.prod)) else: - s = "%s -> <empty>" % self.name + s = '%s -> <empty>' % self.name return s def __repr__(self): - return "LRItem("+str(self)+")" + return 'LRItem(' + str(self) + ')' # ----------------------------------------------------------------------------- # rightmost_terminal() @@ -1309,41 +749,42 @@ def rightmost_terminal(symbols, terminals): # This data is used for critical parts of the table generation process later. # ----------------------------------------------------------------------------- -class GrammarError(YaccError): pass +class GrammarError(YaccError): + pass class Grammar(object): - def __init__(self,terminals): + def __init__(self, terminals): self.Productions = [None] # A list of all of the productions. The first - # entry is always reserved for the purpose of - # building an augmented grammar + # entry is always reserved for the purpose of + # building an augmented grammar - self.Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all - # productions of that nonterminal. + self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all + # productions of that nonterminal. - self.Prodmap = { } # A dictionary that is only used to detect duplicate - # productions. + self.Prodmap = {} # A dictionary that is only used to detect duplicate + # productions. - self.Terminals = { } # A dictionary mapping the names of terminal symbols to a - # list of the rules where they are used. + self.Terminals = {} # A dictionary mapping the names of terminal symbols to a + # list of the rules where they are used. for term in terminals: self.Terminals[term] = [] self.Terminals['error'] = [] - self.Nonterminals = { } # A dictionary mapping names of nonterminals to a list - # of rule numbers where they are used. + self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list + # of rule numbers where they are used. - self.First = { } # A dictionary of precomputed FIRST(x) symbols + self.First = {} # A dictionary of precomputed FIRST(x) symbols - self.Follow = { } # A dictionary of precomputed FOLLOW(x) symbols + self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols - self.Precedence = { } # Precedence rules for each terminal. Contains tuples of the - # form ('right',level) or ('nonassoc', level) or ('left',level) + self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the + # form ('right',level) or ('nonassoc', level) or ('left',level) - self.UsedPrecedence = { } # Precedence rules that were actually used by the grammer. - # This is only used to provide error checking and to generate - # a warning about unused precedence rules. + self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer. + # This is only used to provide error checking and to generate + # a warning about unused precedence rules. self.Start = None # Starting symbol for the grammar @@ -1351,7 +792,7 @@ class Grammar(object): def __len__(self): return len(self.Productions) - def __getitem__(self,index): + def __getitem__(self, index): return self.Productions[index] # ----------------------------------------------------------------------------- @@ -1362,14 +803,14 @@ class Grammar(object): # # ----------------------------------------------------------------------------- - def set_precedence(self,term,assoc,level): - assert self.Productions == [None],"Must call set_precedence() before add_production()" + def set_precedence(self, term, assoc, level): + assert self.Productions == [None], 'Must call set_precedence() before add_production()' if term in self.Precedence: - raise GrammarError("Precedence already specified for terminal '%s'" % term) - if assoc not in ['left','right','nonassoc']: + raise GrammarError('Precedence already specified for terminal %r' % term) + if assoc not in ['left', 'right', 'nonassoc']: raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'") - self.Precedence[term] = (assoc,level) - + self.Precedence[term] = (assoc, level) + # ----------------------------------------------------------------------------- # add_production() # @@ -1387,72 +828,74 @@ class Grammar(object): # are valid and that %prec is used correctly. # ----------------------------------------------------------------------------- - def add_production(self,prodname,syms,func=None,file='',line=0): + def add_production(self, prodname, syms, func=None, file='', line=0): if prodname in self.Terminals: - raise GrammarError("%s:%d: Illegal rule name '%s'. Already defined as a token" % (file,line,prodname)) + raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname)) if prodname == 'error': - raise GrammarError("%s:%d: Illegal rule name '%s'. error is a reserved word" % (file,line,prodname)) + raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname)) if not _is_identifier.match(prodname): - raise GrammarError("%s:%d: Illegal rule name '%s'" % (file,line,prodname)) + raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname)) - # Look for literal tokens - for n,s in enumerate(syms): + # Look for literal tokens + for n, s in enumerate(syms): if s[0] in "'\"": - try: - c = eval(s) - if (len(c) > 1): - raise GrammarError("%s:%d: Literal token %s in rule '%s' may only be a single character" % (file,line,s, prodname)) - if not c in self.Terminals: - self.Terminals[c] = [] - syms[n] = c - continue - except SyntaxError: - pass + try: + c = eval(s) + if (len(c) > 1): + raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' % + (file, line, s, prodname)) + if c not in self.Terminals: + self.Terminals[c] = [] + syms[n] = c + continue + except SyntaxError: + pass if not _is_identifier.match(s) and s != '%prec': - raise GrammarError("%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname)) - + raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname)) + # Determine the precedence level if '%prec' in syms: if syms[-1] == '%prec': - raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line)) + raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line)) if syms[-2] != '%prec': - raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line)) + raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' % + (file, line)) precname = syms[-1] - prodprec = self.Precedence.get(precname,None) + prodprec = self.Precedence.get(precname) if not prodprec: - raise GrammarError("%s:%d: Nothing known about the precedence of '%s'" % (file,line,precname)) + raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname)) else: - self.UsedPrecedence[precname] = 1 + self.UsedPrecedence.add(precname) del syms[-2:] # Drop %prec from the rule else: # If no %prec, precedence is determined by the rightmost terminal symbol - precname = rightmost_terminal(syms,self.Terminals) - prodprec = self.Precedence.get(precname,('right',0)) - + precname = rightmost_terminal(syms, self.Terminals) + prodprec = self.Precedence.get(precname, ('right', 0)) + # See if the rule is already in the rulemap - map = "%s -> %s" % (prodname,syms) + map = '%s -> %s' % (prodname, syms) if map in self.Prodmap: m = self.Prodmap[map] - raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) + - "Previous definition at %s:%d" % (m.file, m.line)) + raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) + + 'Previous definition at %s:%d' % (m.file, m.line)) # From this point on, everything is valid. Create a new Production instance pnumber = len(self.Productions) - if not prodname in self.Nonterminals: - self.Nonterminals[prodname] = [ ] + if prodname not in self.Nonterminals: + self.Nonterminals[prodname] = [] # Add the production number to Terminals and Nonterminals for t in syms: if t in self.Terminals: self.Terminals[t].append(pnumber) else: - if not t in self.Nonterminals: - self.Nonterminals[t] = [ ] + if t not in self.Nonterminals: + self.Nonterminals[t] = [] self.Nonterminals[t].append(pnumber) # Create a production and add it to the list of productions - p = Production(pnumber,prodname,syms,prodprec,func,file,line) + p = Production(pnumber, prodname, syms, prodprec, func, file, line) self.Productions.append(p) self.Prodmap[map] = p @@ -1460,22 +903,21 @@ class Grammar(object): try: self.Prodnames[prodname].append(p) except KeyError: - self.Prodnames[prodname] = [ p ] - return 0 + self.Prodnames[prodname] = [p] # ----------------------------------------------------------------------------- # set_start() # - # Sets the starting symbol and creates the augmented grammar. Production + # Sets the starting symbol and creates the augmented grammar. Production # rule 0 is S' -> start where start is the start symbol. # ----------------------------------------------------------------------------- - def set_start(self,start=None): + def set_start(self, start=None): if not start: start = self.Productions[1].name if start not in self.Nonterminals: - raise GrammarError("start symbol %s undefined" % start) - self.Productions[0] = Production(0,"S'",[start]) + raise GrammarError('start symbol %s undefined' % start) + self.Productions[0] = Production(0, "S'", [start]) self.Nonterminals[start].append(0) self.Start = start @@ -1487,26 +929,20 @@ class Grammar(object): # ----------------------------------------------------------------------------- def find_unreachable(self): - + # Mark all symbols that are reachable from a symbol s def mark_reachable_from(s): - if reachable[s]: - # We've already reached symbol s. + if s in reachable: return - reachable[s] = 1 - for p in self.Prodnames.get(s,[]): + reachable.add(s) + for p in self.Prodnames.get(s, []): for r in p.prod: mark_reachable_from(r) - reachable = { } - for s in list(self.Terminals) + list(self.Nonterminals): - reachable[s] = 0 + reachable = set() + mark_reachable_from(self.Productions[0].prod[0]) + return [s for s in self.Nonterminals if s not in reachable] - mark_reachable_from( self.Productions[0].prod[0] ) - - return [s for s in list(self.Nonterminals) - if not reachable[s]] - # ----------------------------------------------------------------------------- # infinite_cycles() # @@ -1520,20 +956,20 @@ class Grammar(object): # Terminals: for t in self.Terminals: - terminates[t] = 1 + terminates[t] = True - terminates['$end'] = 1 + terminates['$end'] = True # Nonterminals: # Initialize to false: for n in self.Nonterminals: - terminates[n] = 0 + terminates[n] = False # Then propagate termination until no change: - while 1: - some_change = 0 - for (n,pl) in self.Prodnames.items(): + while True: + some_change = False + for (n, pl) in self.Prodnames.items(): # Nonterminal n terminates iff any of its productions terminates. for p in pl: # Production p terminates iff all of its rhs symbols terminate. @@ -1541,19 +977,19 @@ class Grammar(object): if not terminates[s]: # The symbol s does not terminate, # so production p does not terminate. - p_terminates = 0 + p_terminates = False break else: # didn't break from the loop, # so every symbol s terminates # so production p terminates. - p_terminates = 1 + p_terminates = True if p_terminates: # symbol n terminates! if not terminates[n]: - terminates[n] = 1 - some_change = 1 + terminates[n] = True + some_change = True # Don't need to consider any more productions for this n. break @@ -1561,9 +997,9 @@ class Grammar(object): break infinite = [] - for (s,term) in terminates.items(): + for (s, term) in terminates.items(): if not term: - if not s in self.Prodnames and not s in self.Terminals and s != 'error': + if s not in self.Prodnames and s not in self.Terminals and s != 'error': # s is used-but-not-defined, and we've already warned of that, # so it would be overkill to say that it's also non-terminating. pass @@ -1572,22 +1008,22 @@ class Grammar(object): return infinite - # ----------------------------------------------------------------------------- # undefined_symbols() # # Find all symbols that were used the grammar, but not defined as tokens or # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol - # and prod is the production where the symbol was used. + # and prod is the production where the symbol was used. # ----------------------------------------------------------------------------- def undefined_symbols(self): result = [] for p in self.Productions: - if not p: continue + if not p: + continue for s in p.prod: - if not s in self.Prodnames and not s in self.Terminals and s != 'error': - result.append((s,p)) + if s not in self.Prodnames and s not in self.Terminals and s != 'error': + result.append((s, p)) return result # ----------------------------------------------------------------------------- @@ -1598,7 +1034,7 @@ class Grammar(object): # ----------------------------------------------------------------------------- def unused_terminals(self): unused_tok = [] - for s,v in self.Terminals.items(): + for s, v in self.Terminals.items(): if s != 'error' and not v: unused_tok.append(s) @@ -1613,7 +1049,7 @@ class Grammar(object): def unused_rules(self): unused_prod = [] - for s,v in self.Nonterminals.items(): + for s, v in self.Nonterminals.items(): if not v: p = self.Prodnames[s][0] unused_prod.append(p) @@ -1625,15 +1061,15 @@ class Grammar(object): # Returns a list of tuples (term,precedence) corresponding to precedence # rules that were never used by the grammar. term is the name of the terminal # on which precedence was applied and precedence is a string such as 'left' or - # 'right' corresponding to the type of precedence. + # 'right' corresponding to the type of precedence. # ----------------------------------------------------------------------------- def unused_precedence(self): unused = [] for termname in self.Precedence: if not (termname in self.Terminals or termname in self.UsedPrecedence): - unused.append((termname,self.Precedence[termname][0])) - + unused.append((termname, self.Precedence[termname][0])) + return unused # ------------------------------------------------------------------------- @@ -1644,19 +1080,20 @@ class Grammar(object): # During execution of compute_first1, the result may be incomplete. # Afterward (e.g., when called from compute_follow()), it will be complete. # ------------------------------------------------------------------------- - def _first(self,beta): + def _first(self, beta): # We are computing First(x1,x2,x3,...,xn) - result = [ ] + result = [] for x in beta: - x_produces_empty = 0 + x_produces_empty = False # Add all the non-<empty> symbols of First[x] to the result. for f in self.First[x]: if f == '<empty>': - x_produces_empty = 1 + x_produces_empty = True else: - if f not in result: result.append(f) + if f not in result: + result.append(f) if x_produces_empty: # We have to consider the next x in beta, @@ -1695,17 +1132,17 @@ class Grammar(object): self.First[n] = [] # Then propagate symbols until no change: - while 1: - some_change = 0 + while True: + some_change = False for n in self.Nonterminals: for p in self.Prodnames[n]: for f in self._first(p.prod): if f not in self.First[n]: - self.First[n].append( f ) - some_change = 1 + self.First[n].append(f) + some_change = True if not some_change: break - + return self.First # --------------------------------------------------------------------- @@ -1715,7 +1152,7 @@ class Grammar(object): # follow set is the set of all symbols that might follow a given # non-terminal. See the Dragon book, 2nd Ed. p. 189. # --------------------------------------------------------------------- - def compute_follow(self,start=None): + def compute_follow(self, start=None): # If already computed, return the result if self.Follow: return self.Follow @@ -1726,36 +1163,36 @@ class Grammar(object): # Add '$end' to the follow list of the start symbol for k in self.Nonterminals: - self.Follow[k] = [ ] + self.Follow[k] = [] if not start: start = self.Productions[1].name - self.Follow[start] = [ '$end' ] + self.Follow[start] = ['$end'] - while 1: - didadd = 0 + while True: + didadd = False for p in self.Productions[1:]: # Here is the production set - for i in range(len(p.prod)): - B = p.prod[i] + for i, B in enumerate(p.prod): if B in self.Nonterminals: # Okay. We got a non-terminal in a production fst = self._first(p.prod[i+1:]) - hasempty = 0 + hasempty = False for f in fst: if f != '<empty>' and f not in self.Follow[B]: self.Follow[B].append(f) - didadd = 1 + didadd = True if f == '<empty>': - hasempty = 1 + hasempty = True if hasempty or i == (len(p.prod)-1): # Add elements of follow(a) to follow(b) for f in self.Follow[p.name]: if f not in self.Follow[B]: self.Follow[B].append(f) - didadd = 1 - if not didadd: break + didadd = True + if not didadd: + break return self.Follow @@ -1779,15 +1216,15 @@ class Grammar(object): lastlri = p i = 0 lr_items = [] - while 1: + while True: if i > len(p): lri = None else: - lri = LRItem(p,i) + lri = LRItem(p, i) # Precompute the list of productions immediately following try: lri.lr_after = self.Prodnames[lri.prod[i+1]] - except (IndexError,KeyError): + except (IndexError, KeyError): lri.lr_after = [] try: lri.lr_before = lri.prod[i-1] @@ -1795,86 +1232,17 @@ class Grammar(object): lri.lr_before = None lastlri.lr_next = lri - if not lri: break + if not lri: + break lr_items.append(lri) lastlri = lri i += 1 p.lr_items = lr_items # ----------------------------------------------------------------------------- -# == Class LRTable == -# -# This basic class represents a basic table of LR parsing information. -# Methods for generating the tables are not defined here. They are defined -# in the derived class LRGeneratedTable. -# ----------------------------------------------------------------------------- - -class VersionError(YaccError): pass - -class LRTable(object): - def __init__(self): - self.lr_action = None - self.lr_goto = None - self.lr_productions = None - self.lr_method = None - - def read_table(self,module): - if isinstance(module,types.ModuleType): - parsetab = module - else: - if sys.version_info[0] < 3: - exec("import %s as parsetab" % module) - else: - env = { } - exec("import %s as parsetab" % module, env, env) - parsetab = env['parsetab'] - - if parsetab._tabversion != __tabversion__: - raise VersionError("yacc table file version is out of date") - - self.lr_action = parsetab._lr_action - self.lr_goto = parsetab._lr_goto - - self.lr_productions = [] - for p in parsetab._lr_productions: - self.lr_productions.append(MiniProduction(*p)) - - self.lr_method = parsetab._lr_method - return parsetab._lr_signature - - def read_pickle(self,filename): - try: - import cPickle as pickle - except ImportError: - import pickle - - in_f = open(filename,"rb") - - tabversion = pickle.load(in_f) - if tabversion != __tabversion__: - raise VersionError("yacc table file version is out of date") - self.lr_method = pickle.load(in_f) - signature = pickle.load(in_f) - self.lr_action = pickle.load(in_f) - self.lr_goto = pickle.load(in_f) - productions = pickle.load(in_f) - - self.lr_productions = [] - for p in productions: - self.lr_productions.append(MiniProduction(*p)) - - in_f.close() - return signature - - # Bind all production function names to callable objects in pdict - def bind_callables(self,pdict): - for p in self.lr_productions: - p.bind(pdict) - -# ----------------------------------------------------------------------------- # === LR Generator === # -# The following classes and functions are used to generate LR parsing tables on +# The following classes and functions are used to generate LR parsing tables on # a grammar. # ----------------------------------------------------------------------------- @@ -1895,17 +1263,18 @@ class LRTable(object): # FP - Set-valued function # ------------------------------------------------------------------------------ -def digraph(X,R,FP): - N = { } +def digraph(X, R, FP): + N = {} for x in X: - N[x] = 0 + N[x] = 0 stack = [] - F = { } + F = {} for x in X: - if N[x] == 0: traverse(x,N,stack,F,X,R,FP) + if N[x] == 0: + traverse(x, N, stack, F, X, R, FP) return F -def traverse(x,N,stack,F,X,R,FP): +def traverse(x, N, stack, F, X, R, FP): stack.append(x) d = len(stack) N[x] = d @@ -1914,35 +1283,34 @@ def traverse(x,N,stack,F,X,R,FP): rel = R(x) # Get y's related to x for y in rel: if N[y] == 0: - traverse(y,N,stack,F,X,R,FP) - N[x] = min(N[x],N[y]) - for a in F.get(y,[]): - if a not in F[x]: F[x].append(a) + traverse(y, N, stack, F, X, R, FP) + N[x] = min(N[x], N[y]) + for a in F.get(y, []): + if a not in F[x]: + F[x].append(a) if N[x] == d: - N[stack[-1]] = MAXINT - F[stack[-1]] = F[x] - element = stack.pop() - while element != x: - N[stack[-1]] = MAXINT - F[stack[-1]] = F[x] - element = stack.pop() + N[stack[-1]] = MAXINT + F[stack[-1]] = F[x] + element = stack.pop() + while element != x: + N[stack[-1]] = MAXINT + F[stack[-1]] = F[x] + element = stack.pop() + +class LALRError(YaccError): + pass -class LALRError(YaccError): pass # ----------------------------------------------------------------------------- -# == LRGeneratedTable == +# == LRTable == # # This class implements the LR table generation algorithm. There are no -# public methods except for write() +# public methods. # ----------------------------------------------------------------------------- -class LRGeneratedTable(LRTable): - def __init__(self,grammar,method='LALR',log=None): - if method not in ['SLR','LALR']: - raise LALRError("Unsupported method %s" % method) - +class LRTable: + def __init__(self, grammar, log=None): self.grammar = grammar - self.lr_method = method # Set up the logger if not log: @@ -1958,7 +1326,7 @@ class LRGeneratedTable(LRTable): self._add_count = 0 # Internal counter used to detect cycles - # Diagonistic information filled in by the table generator + # Diagnostic information filled in by the table generator self.sr_conflict = 0 self.rr_conflict = 0 self.conflicts = [] # List of conflicts @@ -1972,23 +1340,29 @@ class LRGeneratedTable(LRTable): self.grammar.compute_follow() self.lr_parse_table() + # Bind all production function names to callable objects in pdict + def bind_callables(self, pdict): + for p in self.lr_productions: + p.bind(pdict) + # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. - def lr0_closure(self,I): + def lr0_closure(self, I): self._add_count += 1 # Add everything in I to J J = I[:] - didadd = 1 + didadd = True while didadd: - didadd = 0 + didadd = False for j in J: for x in j.lr_after: - if getattr(x,"lr0_added",0) == self._add_count: continue + if getattr(x, 'lr0_added', 0) == self._add_count: + continue # Add B --> .G to J J.append(x.lr_next) x.lr0_added = self._add_count - didadd = 1 + didadd = True return J @@ -1999,43 +1373,43 @@ class LRGeneratedTable(LRTable): # objects). With uniqueness, we can later do fast set comparisons using # id(obj) instead of element-wise comparison. - def lr0_goto(self,I,x): + def lr0_goto(self, I, x): # First we look for a previously cached entry - g = self.lr_goto_cache.get((id(I),x),None) - if g: return g + g = self.lr_goto_cache.get((id(I), x)) + if g: + return g # Now we generate the goto set in a way that guarantees uniqueness # of the result - s = self.lr_goto_cache.get(x,None) + s = self.lr_goto_cache.get(x) if not s: - s = { } + s = {} self.lr_goto_cache[x] = s - gs = [ ] + gs = [] for p in I: n = p.lr_next if n and n.lr_before == x: - s1 = s.get(id(n),None) + s1 = s.get(id(n)) if not s1: - s1 = { } + s1 = {} s[id(n)] = s1 gs.append(n) s = s1 - g = s.get('$end',None) + g = s.get('$end') if not g: if gs: g = self.lr0_closure(gs) s['$end'] = g else: s['$end'] = gs - self.lr_goto_cache[(id(I),x)] = g + self.lr_goto_cache[(id(I), x)] = g return g # Compute the LR(0) sets of item function def lr0_items(self): - - C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ] + C = [self.lr0_closure([self.grammar.Productions[0].lr_next])] i = 0 for I in C: self.lr0_cidhash[id(I)] = i @@ -2048,15 +1422,15 @@ class LRGeneratedTable(LRTable): i += 1 # Collect all of the symbols that could possibly be in the goto(I,X) sets - asyms = { } + asyms = {} for ii in I: for s in ii.usyms: asyms[s] = None for x in asyms: - g = self.lr0_goto(I,x) - if not g: continue - if id(g) in self.lr0_cidhash: continue + g = self.lr0_goto(I, x) + if not g or id(g) in self.lr0_cidhash: + continue self.lr0_cidhash[id(g)] = len(C) C.append(g) @@ -2091,19 +1465,21 @@ class LRGeneratedTable(LRTable): # ----------------------------------------------------------------------------- def compute_nullable_nonterminals(self): - nullable = {} + nullable = set() num_nullable = 0 - while 1: - for p in self.grammar.Productions[1:]: - if p.len == 0: - nullable[p.name] = 1 + while True: + for p in self.grammar.Productions[1:]: + if p.len == 0: + nullable.add(p.name) continue - for t in p.prod: - if not t in nullable: break - else: - nullable[p.name] = 1 - if len(nullable) == num_nullable: break - num_nullable = len(nullable) + for t in p.prod: + if t not in nullable: + break + else: + nullable.add(p.name) + if len(nullable) == num_nullable: + break + num_nullable = len(nullable) return nullable # ----------------------------------------------------------------------------- @@ -2117,16 +1493,16 @@ class LRGeneratedTable(LRTable): # The input C is the set of LR(0) items. # ----------------------------------------------------------------------------- - def find_nonterminal_transitions(self,C): - trans = [] - for state in range(len(C)): - for p in C[state]: - if p.lr_index < p.len - 1: - t = (state,p.prod[p.lr_index+1]) - if t[1] in self.grammar.Nonterminals: - if t not in trans: trans.append(t) - state = state + 1 - return trans + def find_nonterminal_transitions(self, C): + trans = [] + for stateno, state in enumerate(C): + for p in state: + if p.lr_index < p.len - 1: + t = (stateno, p.prod[p.lr_index+1]) + if t[1] in self.grammar.Nonterminals: + if t not in trans: + trans.append(t) + return trans # ----------------------------------------------------------------------------- # dr_relation() @@ -2137,21 +1513,21 @@ class LRGeneratedTable(LRTable): # Returns a list of terminals. # ----------------------------------------------------------------------------- - def dr_relation(self,C,trans,nullable): - dr_set = { } - state,N = trans + def dr_relation(self, C, trans, nullable): + state, N = trans terms = [] - g = self.lr0_goto(C[state],N) + g = self.lr0_goto(C[state], N) for p in g: - if p.lr_index < p.len - 1: - a = p.prod[p.lr_index+1] - if a in self.grammar.Terminals: - if a not in terms: terms.append(a) + if p.lr_index < p.len - 1: + a = p.prod[p.lr_index+1] + if a in self.grammar.Terminals: + if a not in terms: + terms.append(a) # This extra bit is to handle the start state if state == 0 and N == self.grammar.Productions[0].prod[0]: - terms.append('$end') + terms.append('$end') return terms @@ -2161,18 +1537,18 @@ class LRGeneratedTable(LRTable): # Computes the READS() relation (p,A) READS (t,C). # ----------------------------------------------------------------------------- - def reads_relation(self,C, trans, empty): + def reads_relation(self, C, trans, empty): # Look for empty transitions rel = [] state, N = trans - g = self.lr0_goto(C[state],N) - j = self.lr0_cidhash.get(id(g),-1) + g = self.lr0_goto(C[state], N) + j = self.lr0_cidhash.get(id(g), -1) for p in g: if p.lr_index < p.len - 1: - a = p.prod[p.lr_index + 1] - if a in empty: - rel.append((j,a)) + a = p.prod[p.lr_index + 1] + if a in empty: + rel.append((j, a)) return rel @@ -2204,8 +1580,7 @@ class LRGeneratedTable(LRTable): # # ----------------------------------------------------------------------------- - def compute_lookback_includes(self,C,trans,nullable): - + def compute_lookback_includes(self, C, trans, nullable): lookdict = {} # Dictionary of lookback relations includedict = {} # Dictionary of include relations @@ -2215,11 +1590,12 @@ class LRGeneratedTable(LRTable): dtrans[t] = 1 # Loop over all transitions and compute lookbacks and includes - for state,N in trans: + for state, N in trans: lookb = [] includes = [] for p in C[state]: - if p.name != N: continue + if p.name != N: + continue # Okay, we have a name match. We now follow the production all the way # through the state machine until we get the . on the right hand side @@ -2227,44 +1603,50 @@ class LRGeneratedTable(LRTable): lr_index = p.lr_index j = state while lr_index < p.len - 1: - lr_index = lr_index + 1 - t = p.prod[lr_index] - - # Check to see if this symbol and state are a non-terminal transition - if (j,t) in dtrans: - # Yes. Okay, there is some chance that this is an includes relation - # the only way to know for certain is whether the rest of the - # production derives empty - - li = lr_index + 1 - while li < p.len: - if p.prod[li] in self.grammar.Terminals: break # No forget it - if not p.prod[li] in nullable: break - li = li + 1 - else: - # Appears to be a relation between (j,t) and (state,N) - includes.append((j,t)) - - g = self.lr0_goto(C[j],t) # Go to next set - j = self.lr0_cidhash.get(id(g),-1) # Go to next state + lr_index = lr_index + 1 + t = p.prod[lr_index] + + # Check to see if this symbol and state are a non-terminal transition + if (j, t) in dtrans: + # Yes. Okay, there is some chance that this is an includes relation + # the only way to know for certain is whether the rest of the + # production derives empty + + li = lr_index + 1 + while li < p.len: + if p.prod[li] in self.grammar.Terminals: + break # No forget it + if p.prod[li] not in nullable: + break + li = li + 1 + else: + # Appears to be a relation between (j,t) and (state,N) + includes.append((j, t)) + + g = self.lr0_goto(C[j], t) # Go to next set + j = self.lr0_cidhash.get(id(g), -1) # Go to next state # When we get here, j is the final state, now we have to locate the production for r in C[j]: - if r.name != p.name: continue - if r.len != p.len: continue - i = 0 - # This look is comparing a production ". A B C" with "A B C ." - while i < r.lr_index: - if r.prod[i] != p.prod[i+1]: break - i = i + 1 - else: - lookb.append((j,r)) + if r.name != p.name: + continue + if r.len != p.len: + continue + i = 0 + # This look is comparing a production ". A B C" with "A B C ." + while i < r.lr_index: + if r.prod[i] != p.prod[i+1]: + break + i = i + 1 + else: + lookb.append((j, r)) for i in includes: - if not i in includedict: includedict[i] = [] - includedict[i].append((state,N)) - lookdict[(state,N)] = lookb + if i not in includedict: + includedict[i] = [] + includedict[i].append((state, N)) + lookdict[(state, N)] = lookb - return lookdict,includedict + return lookdict, includedict # ----------------------------------------------------------------------------- # compute_read_sets() @@ -2278,10 +1660,10 @@ class LRGeneratedTable(LRTable): # Returns a set containing the read sets # ----------------------------------------------------------------------------- - def compute_read_sets(self,C, ntrans, nullable): - FP = lambda x: self.dr_relation(C,x,nullable) - R = lambda x: self.reads_relation(C,x,nullable) - F = digraph(ntrans,R,FP) + def compute_read_sets(self, C, ntrans, nullable): + FP = lambda x: self.dr_relation(C, x, nullable) + R = lambda x: self.reads_relation(C, x, nullable) + F = digraph(ntrans, R, FP) return F # ----------------------------------------------------------------------------- @@ -2300,11 +1682,11 @@ class LRGeneratedTable(LRTable): # Returns a set containing the follow sets # ----------------------------------------------------------------------------- - def compute_follow_sets(self,ntrans,readsets,inclsets): - FP = lambda x: readsets[x] - R = lambda x: inclsets.get(x,[]) - F = digraph(ntrans,R,FP) - return F + def compute_follow_sets(self, ntrans, readsets, inclsets): + FP = lambda x: readsets[x] + R = lambda x: inclsets.get(x, []) + F = digraph(ntrans, R, FP) + return F # ----------------------------------------------------------------------------- # add_lookaheads() @@ -2318,15 +1700,16 @@ class LRGeneratedTable(LRTable): # in the lookbacks set # ----------------------------------------------------------------------------- - def add_lookaheads(self,lookbacks,followset): - for trans,lb in lookbacks.items(): + def add_lookaheads(self, lookbacks, followset): + for trans, lb in lookbacks.items(): # Loop over productions in lookback - for state,p in lb: - if not state in p.lookaheads: - p.lookaheads[state] = [] - f = followset.get(trans,[]) - for a in f: - if a not in p.lookaheads[state]: p.lookaheads[state].append(a) + for state, p in lb: + if state not in p.lookaheads: + p.lookaheads[state] = [] + f = followset.get(trans, []) + for a in f: + if a not in p.lookaheads[state]: + p.lookaheads[state].append(a) # ----------------------------------------------------------------------------- # add_lalr_lookaheads() @@ -2335,7 +1718,7 @@ class LRGeneratedTable(LRTable): # with LALR parsing # ----------------------------------------------------------------------------- - def add_lalr_lookaheads(self,C): + def add_lalr_lookaheads(self, C): # Determine all of the nullable nonterminals nullable = self.compute_nullable_nonterminals() @@ -2343,16 +1726,16 @@ class LRGeneratedTable(LRTable): trans = self.find_nonterminal_transitions(C) # Compute read sets - readsets = self.compute_read_sets(C,trans,nullable) + readsets = self.compute_read_sets(C, trans, nullable) # Compute lookback/includes relations - lookd, included = self.compute_lookback_includes(C,trans,nullable) + lookd, included = self.compute_lookback_includes(C, trans, nullable) # Compute LALR FOLLOW sets - followsets = self.compute_follow_sets(trans,readsets,included) + followsets = self.compute_follow_sets(trans, readsets, included) # Add all of the lookaheads - self.add_lookaheads(lookd,followsets) + self.add_lookaheads(lookd, followsets) # ----------------------------------------------------------------------------- # lr_parse_table() @@ -2366,324 +1749,179 @@ class LRGeneratedTable(LRTable): action = self.lr_action # Action array log = self.log # Logger for output - actionp = { } # Action production array (temporary) - - log.info("Parsing method: %s", self.lr_method) + actionp = {} # Action production array (temporary) # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items # This determines the number of states C = self.lr0_items() - - if self.lr_method == 'LALR': - self.add_lalr_lookaheads(C) + self.add_lalr_lookaheads(C) # Build the parser table, state by state st = 0 for I in C: # Loop over each production in I - actlist = [ ] # List of actions - st_action = { } - st_actionp = { } - st_goto = { } - log.info("") - log.info("state %d", st) - log.info("") + actlist = [] # List of actions + st_action = {} + st_actionp = {} + st_goto = {} + log.info('') + log.info('state %d', st) + log.info('') for p in I: - log.info(" (%d) %s", p.number, str(p)) - log.info("") + log.info(' (%d) %s', p.number, p) + log.info('') for p in I: - if p.len == p.lr_index + 1: - if p.name == "S'": - # Start symbol. Accept! - st_action["$end"] = 0 - st_actionp["$end"] = p - else: - # We are at the end of a production. Reduce! - if self.lr_method == 'LALR': - laheads = p.lookaheads[st] - else: - laheads = self.grammar.Follow[p.name] - for a in laheads: - actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p))) - r = st_action.get(a,None) - if r is not None: - # Whoa. Have a shift/reduce or reduce/reduce conflict - if r > 0: - # Need to decide on shift or reduce here - # By default we favor shifting. Need to add - # some precedence rules here. - sprec,slevel = Productions[st_actionp[a].number].prec - rprec,rlevel = Precedence.get(a,('right',0)) - if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): - # We really need to reduce here. - st_action[a] = -p.number - st_actionp[a] = p - if not slevel and not rlevel: - log.info(" ! shift/reduce conflict for %s resolved as reduce",a) - self.sr_conflicts.append((st,a,'reduce')) - Productions[p.number].reduced += 1 - elif (slevel == rlevel) and (rprec == 'nonassoc'): - st_action[a] = None - else: - # Hmmm. Guess we'll keep the shift - if not rlevel: - log.info(" ! shift/reduce conflict for %s resolved as shift",a) - self.sr_conflicts.append((st,a,'shift')) - elif r < 0: - # Reduce/reduce conflict. In this case, we favor the rule - # that was defined first in the grammar file - oldp = Productions[-r] - pp = Productions[p.number] - if oldp.line > pp.line: - st_action[a] = -p.number - st_actionp[a] = p - chosenp,rejectp = pp,oldp - Productions[p.number].reduced += 1 - Productions[oldp.number].reduced -= 1 - else: - chosenp,rejectp = oldp,pp - self.rr_conflicts.append((st,chosenp,rejectp)) - log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a]) + if p.len == p.lr_index + 1: + if p.name == "S'": + # Start symbol. Accept! + st_action['$end'] = 0 + st_actionp['$end'] = p + else: + # We are at the end of a production. Reduce! + laheads = p.lookaheads[st] + for a in laheads: + actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p))) + r = st_action.get(a) + if r is not None: + # Whoa. Have a shift/reduce or reduce/reduce conflict + if r > 0: + # Need to decide on shift or reduce here + # By default we favor shifting. Need to add + # some precedence rules here. + + # Shift precedence comes from the token + sprec, slevel = Precedence.get(a, ('right', 0)) + + # Reduce precedence comes from rule being reduced (p) + rprec, rlevel = Productions[p.number].prec + + if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): + # We really need to reduce here. + st_action[a] = -p.number + st_actionp[a] = p + if not slevel and not rlevel: + log.info(' ! shift/reduce conflict for %s resolved as reduce', a) + self.sr_conflicts.append((st, a, 'reduce')) + Productions[p.number].reduced += 1 + elif (slevel == rlevel) and (rprec == 'nonassoc'): + st_action[a] = None + else: + # Hmmm. Guess we'll keep the shift + if not rlevel: + log.info(' ! shift/reduce conflict for %s resolved as shift', a) + self.sr_conflicts.append((st, a, 'shift')) + elif r < 0: + # Reduce/reduce conflict. In this case, we favor the rule + # that was defined first in the grammar file + oldp = Productions[-r] + pp = Productions[p.number] + if oldp.line > pp.line: + st_action[a] = -p.number + st_actionp[a] = p + chosenp, rejectp = pp, oldp + Productions[p.number].reduced += 1 + Productions[oldp.number].reduced -= 1 else: - raise LALRError("Unknown conflict in state %d" % st) + chosenp, rejectp = oldp, pp + self.rr_conflicts.append((st, chosenp, rejectp)) + log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)', + a, st_actionp[a].number, st_actionp[a]) else: - st_action[a] = -p.number - st_actionp[a] = p - Productions[p.number].reduced += 1 - else: - i = p.lr_index - a = p.prod[i+1] # Get symbol right after the "." - if a in self.grammar.Terminals: - g = self.lr0_goto(I,a) - j = self.lr0_cidhash.get(id(g),-1) - if j >= 0: - # We are in a shift state - actlist.append((a,p,"shift and go to state %d" % j)) - r = st_action.get(a,None) - if r is not None: - # Whoa have a shift/reduce or shift/shift conflict - if r > 0: - if r != j: - raise LALRError("Shift/shift conflict in state %d" % st) - elif r < 0: - # Do a precedence check. - # - if precedence of reduce rule is higher, we reduce. - # - if precedence of reduce is same and left assoc, we reduce. - # - otherwise we shift - rprec,rlevel = Productions[st_actionp[a].number].prec - sprec,slevel = Precedence.get(a,('right',0)) - if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): - # We decide to shift here... highest precedence to shift - Productions[st_actionp[a].number].reduced -= 1 - st_action[a] = j - st_actionp[a] = p - if not rlevel: - log.info(" ! shift/reduce conflict for %s resolved as shift",a) - self.sr_conflicts.append((st,a,'shift')) - elif (slevel == rlevel) and (rprec == 'nonassoc'): - st_action[a] = None - else: - # Hmmm. Guess we'll keep the reduce - if not slevel and not rlevel: - log.info(" ! shift/reduce conflict for %s resolved as reduce",a) - self.sr_conflicts.append((st,a,'reduce')) - + raise LALRError('Unknown conflict in state %d' % st) + else: + st_action[a] = -p.number + st_actionp[a] = p + Productions[p.number].reduced += 1 + else: + i = p.lr_index + a = p.prod[i+1] # Get symbol right after the "." + if a in self.grammar.Terminals: + g = self.lr0_goto(I, a) + j = self.lr0_cidhash.get(id(g), -1) + if j >= 0: + # We are in a shift state + actlist.append((a, p, 'shift and go to state %d' % j)) + r = st_action.get(a) + if r is not None: + # Whoa have a shift/reduce or shift/shift conflict + if r > 0: + if r != j: + raise LALRError('Shift/shift conflict in state %d' % st) + elif r < 0: + # Do a precedence check. + # - if precedence of reduce rule is higher, we reduce. + # - if precedence of reduce is same and left assoc, we reduce. + # - otherwise we shift + + # Shift precedence comes from the token + sprec, slevel = Precedence.get(a, ('right', 0)) + + # Reduce precedence comes from the rule that could have been reduced + rprec, rlevel = Productions[st_actionp[a].number].prec + + if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): + # We decide to shift here... highest precedence to shift + Productions[st_actionp[a].number].reduced -= 1 + st_action[a] = j + st_actionp[a] = p + if not rlevel: + log.info(' ! shift/reduce conflict for %s resolved as shift', a) + self.sr_conflicts.append((st, a, 'shift')) + elif (slevel == rlevel) and (rprec == 'nonassoc'): + st_action[a] = None else: - raise LALRError("Unknown conflict in state %d" % st) + # Hmmm. Guess we'll keep the reduce + if not slevel and not rlevel: + log.info(' ! shift/reduce conflict for %s resolved as reduce', a) + self.sr_conflicts.append((st, a, 'reduce')) + else: - st_action[a] = j - st_actionp[a] = p + raise LALRError('Unknown conflict in state %d' % st) + else: + st_action[a] = j + st_actionp[a] = p # Print the actions associated with each terminal - _actprint = { } - for a,p,m in actlist: + _actprint = {} + for a, p, m in actlist: if a in st_action: if p is st_actionp[a]: - log.info(" %-15s %s",a,m) - _actprint[(a,m)] = 1 - log.info("") + log.info(' %-15s %s', a, m) + _actprint[(a, m)] = 1 + log.info('') # Print the actions that were not used. (debugging) not_used = 0 - for a,p,m in actlist: + for a, p, m in actlist: if a in st_action: if p is not st_actionp[a]: - if not (a,m) in _actprint: - log.debug(" ! %-15s [ %s ]",a,m) + if not (a, m) in _actprint: + log.debug(' ! %-15s [ %s ]', a, m) not_used = 1 - _actprint[(a,m)] = 1 + _actprint[(a, m)] = 1 if not_used: - log.debug("") + log.debug('') # Construct the goto table for this state - nkeys = { } + nkeys = {} for ii in I: for s in ii.usyms: if s in self.grammar.Nonterminals: nkeys[s] = None for n in nkeys: - g = self.lr0_goto(I,n) - j = self.lr0_cidhash.get(id(g),-1) + g = self.lr0_goto(I, n) + j = self.lr0_cidhash.get(id(g), -1) if j >= 0: st_goto[n] = j - log.info(" %-30s shift and go to state %d",n,j) + log.info(' %-30s shift and go to state %d', n, j) action[st] = st_action actionp[st] = st_actionp goto[st] = st_goto st += 1 - - # ----------------------------------------------------------------------------- - # write() - # - # This function writes the LR parsing tables to a file - # ----------------------------------------------------------------------------- - - def write_table(self,modulename,outputdir='',signature=""): - basemodulename = modulename.split(".")[-1] - filename = os.path.join(outputdir,basemodulename) + ".py" - try: - f = open(filename,"w") - - f.write(""" -# %s -# This file is automatically generated. Do not edit. -_tabversion = %r - -_lr_method = %r - -_lr_signature = %r - """ % (filename, __tabversion__, self.lr_method, signature)) - - # Change smaller to 0 to go back to original tables - smaller = 1 - - # Factor out names to try and make smaller - if smaller: - items = { } - - for s,nd in self.lr_action.items(): - for name,v in nd.items(): - i = items.get(name) - if not i: - i = ([],[]) - items[name] = i - i[0].append(s) - i[1].append(v) - - f.write("\n_lr_action_items = {") - for k,v in items.items(): - f.write("%r:([" % k) - for i in v[0]: - f.write("%r," % i) - f.write("],[") - for i in v[1]: - f.write("%r," % i) - - f.write("]),") - f.write("}\n") - - f.write(""" -_lr_action = { } -for _k, _v in _lr_action_items.items(): - for _x,_y in zip(_v[0],_v[1]): - if not _x in _lr_action: _lr_action[_x] = { } - _lr_action[_x][_k] = _y -del _lr_action_items -""") - - else: - f.write("\n_lr_action = { "); - for k,v in self.lr_action.items(): - f.write("(%r,%r):%r," % (k[0],k[1],v)) - f.write("}\n"); - - if smaller: - # Factor out names to try and make smaller - items = { } - - for s,nd in self.lr_goto.items(): - for name,v in nd.items(): - i = items.get(name) - if not i: - i = ([],[]) - items[name] = i - i[0].append(s) - i[1].append(v) - - f.write("\n_lr_goto_items = {") - for k,v in items.items(): - f.write("%r:([" % k) - for i in v[0]: - f.write("%r," % i) - f.write("],[") - for i in v[1]: - f.write("%r," % i) - - f.write("]),") - f.write("}\n") - - f.write(""" -_lr_goto = { } -for _k, _v in _lr_goto_items.items(): - for _x,_y in zip(_v[0],_v[1]): - if not _x in _lr_goto: _lr_goto[_x] = { } - _lr_goto[_x][_k] = _y -del _lr_goto_items -""") - else: - f.write("\n_lr_goto = { "); - for k,v in self.lr_goto.items(): - f.write("(%r,%r):%r," % (k[0],k[1],v)) - f.write("}\n"); - - # Write production table - f.write("_lr_productions = [\n") - for p in self.lr_productions: - if p.func: - f.write(" (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p.func,p.file,p.line)) - else: - f.write(" (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p.len)) - f.write("]\n") - f.close() - - except IOError: - e = sys.exc_info()[1] - sys.stderr.write("Unable to create '%s'\n" % filename) - sys.stderr.write(str(e)+"\n") - return - - - # ----------------------------------------------------------------------------- - # pickle_table() - # - # This function pickles the LR parsing tables to a supplied file object - # ----------------------------------------------------------------------------- - - def pickle_table(self,filename,signature=""): - try: - import cPickle as pickle - except ImportError: - import pickle - outf = open(filename,"wb") - pickle.dump(__tabversion__,outf,pickle_protocol) - pickle.dump(self.lr_method,outf,pickle_protocol) - pickle.dump(signature,outf,pickle_protocol) - pickle.dump(self.lr_action,outf,pickle_protocol) - pickle.dump(self.lr_goto,outf,pickle_protocol) - - outp = [] - for p in self.lr_productions: - if p.func: - outp.append((p.str,p.name, p.len, p.func,p.file,p.line)) - else: - outp.append((str(p),p.name,p.len,None,None,None)) - pickle.dump(outp,outf,pickle_protocol) - outf.close() - # ----------------------------------------------------------------------------- # === INTROSPECTION === # @@ -2700,26 +1938,18 @@ del _lr_goto_items # ----------------------------------------------------------------------------- def get_caller_module_dict(levels): - try: - raise RuntimeError - except RuntimeError: - e,b,t = sys.exc_info() - f = t.tb_frame - while levels > 0: - f = f.f_back - levels -= 1 - ldict = f.f_globals.copy() - if f.f_globals != f.f_locals: - ldict.update(f.f_locals) - - return ldict + f = sys._getframe(levels) + ldict = f.f_globals.copy() + if f.f_globals != f.f_locals: + ldict.update(f.f_locals) + return ldict # ----------------------------------------------------------------------------- # parse_grammar() # # This takes a raw grammar rule string and parses it into production data # ----------------------------------------------------------------------------- -def parse_grammar(doc,file,line): +def parse_grammar(doc, file, line): grammar = [] # Split the doc string into lines pstrings = doc.splitlines() @@ -2728,12 +1958,13 @@ def parse_grammar(doc,file,line): for ps in pstrings: dline += 1 p = ps.split() - if not p: continue + if not p: + continue try: if p[0] == '|': # This is a continuation of a previous rule if not lastp: - raise SyntaxError("%s:%d: Misplaced '|'" % (file,dline)) + raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline)) prodname = lastp syms = p[1:] else: @@ -2742,13 +1973,13 @@ def parse_grammar(doc,file,line): syms = p[2:] assign = p[1] if assign != ':' and assign != '::=': - raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file,dline)) + raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline)) - grammar.append((file,dline,prodname,syms)) + grammar.append((file, dline, prodname, syms)) except SyntaxError: raise except Exception: - raise SyntaxError("%s:%d: Syntax error in rule '%s'" % (file,dline,ps.strip())) + raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip())) return grammar @@ -2760,14 +1991,14 @@ def parse_grammar(doc,file,line): # etc. # ----------------------------------------------------------------------------- class ParserReflect(object): - def __init__(self,pdict,log=None): + def __init__(self, pdict, log=None): self.pdict = pdict self.start = None self.error_func = None self.tokens = None - self.files = {} + self.modules = set() self.grammar = [] - self.error = 0 + self.error = False if log is None: self.log = PlyLogger(sys.stderr) @@ -2781,7 +2012,7 @@ class ParserReflect(object): self.get_tokens() self.get_precedence() self.get_pfunctions() - + # Validate all of the information def validate_all(self): self.validate_start() @@ -2789,32 +2020,28 @@ class ParserReflect(object): self.validate_tokens() self.validate_precedence() self.validate_pfunctions() - self.validate_files() + self.validate_modules() return self.error # Compute a signature over the grammar def signature(self): + parts = [] try: - from hashlib import md5 - except ImportError: - from md5 import md5 - try: - sig = md5() if self.start: - sig.update(self.start.encode('latin-1')) + parts.append(self.start) if self.prec: - sig.update("".join(["".join(p) for p in self.prec]).encode('latin-1')) + parts.append(''.join([''.join(p) for p in self.prec])) if self.tokens: - sig.update(" ".join(self.tokens).encode('latin-1')) + parts.append(' '.join(self.tokens)) for f in self.pfuncs: if f[3]: - sig.update(f[3].encode('latin-1')) - except (TypeError,ValueError): + parts.append(f[3]) + except (TypeError, ValueError): pass - return sig.digest() + return ''.join(parts) # ----------------------------------------------------------------------------- - # validate_file() + # validate_modules() # # This method checks to see if there are duplicated p_rulename() functions # in the parser module file. Without this function, it is really easy for @@ -2824,32 +2051,29 @@ class ParserReflect(object): # to try and detect duplicates. # ----------------------------------------------------------------------------- - def validate_files(self): + def validate_modules(self): # Match def p_funcname( fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') - for filename in self.files.keys(): - base,ext = os.path.splitext(filename) - if ext != '.py': return 1 # No idea. Assume it's okay. - + for module in self.modules: try: - f = open(filename) - lines = f.readlines() - f.close() + lines, linen = inspect.getsourcelines(module) except IOError: continue - counthash = { } - for linen,l in enumerate(lines): + counthash = {} + for linen, line in enumerate(lines): linen += 1 - m = fre.match(l) + m = fre.match(line) if m: name = m.group(1) prev = counthash.get(name) if not prev: counthash[name] = linen else: - self.log.warning("%s:%d: Function %s redefined. Previously defined on line %d", filename,linen,name,prev) + filename = inspect.getsourcefile(module) + self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d', + filename, linen, name, prev) # Get the start symbol def get_start(self): @@ -2858,7 +2082,7 @@ class ParserReflect(object): # Validate the start symbol def validate_start(self): if self.start is not None: - if not isinstance(self.start,str): + if not isinstance(self.start, str): self.log.error("'start' must be a string") # Look for error handler @@ -2868,162 +2092,173 @@ class ParserReflect(object): # Validate the error function def validate_error_func(self): if self.error_func: - if isinstance(self.error_func,types.FunctionType): + if isinstance(self.error_func, types.FunctionType): ismethod = 0 elif isinstance(self.error_func, types.MethodType): ismethod = 1 else: self.log.error("'p_error' defined, but is not a function or method") - self.error = 1 + self.error = True return - eline = func_code(self.error_func).co_firstlineno - efile = func_code(self.error_func).co_filename - self.files[efile] = 1 + eline = self.error_func.__code__.co_firstlineno + efile = self.error_func.__code__.co_filename + module = inspect.getmodule(self.error_func) + self.modules.add(module) - if (func_code(self.error_func).co_argcount != 1+ismethod): - self.log.error("%s:%d: p_error() requires 1 argument",efile,eline) - self.error = 1 + argcount = self.error_func.__code__.co_argcount - ismethod + if argcount != 1: + self.log.error('%s:%d: p_error() requires 1 argument', efile, eline) + self.error = True # Get the tokens map def get_tokens(self): - tokens = self.pdict.get("tokens",None) + tokens = self.pdict.get('tokens') if not tokens: - self.log.error("No token list is defined") - self.error = 1 + self.log.error('No token list is defined') + self.error = True return - if not isinstance(tokens,(list, tuple)): - self.log.error("tokens must be a list or tuple") - self.error = 1 + if not isinstance(tokens, (list, tuple)): + self.log.error('tokens must be a list or tuple') + self.error = True return - + if not tokens: - self.log.error("tokens is empty") - self.error = 1 + self.log.error('tokens is empty') + self.error = True return - self.tokens = tokens + self.tokens = sorted(tokens) # Validate the tokens def validate_tokens(self): # Validate the tokens. if 'error' in self.tokens: self.log.error("Illegal token name 'error'. Is a reserved word") - self.error = 1 + self.error = True return - terminals = {} + terminals = set() for n in self.tokens: if n in terminals: - self.log.warning("Token '%s' multiply defined", n) - terminals[n] = 1 + self.log.warning('Token %r multiply defined', n) + terminals.add(n) # Get the precedence map (if any) def get_precedence(self): - self.prec = self.pdict.get("precedence",None) + self.prec = self.pdict.get('precedence') # Validate and parse the precedence map def validate_precedence(self): preclist = [] if self.prec: - if not isinstance(self.prec,(list,tuple)): - self.log.error("precedence must be a list or tuple") - self.error = 1 + if not isinstance(self.prec, (list, tuple)): + self.log.error('precedence must be a list or tuple') + self.error = True return - for level,p in enumerate(self.prec): - if not isinstance(p,(list,tuple)): - self.log.error("Bad precedence table") - self.error = 1 + for level, p in enumerate(self.prec): + if not isinstance(p, (list, tuple)): + self.log.error('Bad precedence table') + self.error = True return if len(p) < 2: - self.log.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)",p) - self.error = 1 + self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p) + self.error = True return assoc = p[0] - if not isinstance(assoc,str): - self.log.error("precedence associativity must be a string") - self.error = 1 + if not isinstance(assoc, str): + self.log.error('precedence associativity must be a string') + self.error = True return for term in p[1:]: - if not isinstance(term,str): - self.log.error("precedence items must be strings") - self.error = 1 + if not isinstance(term, str): + self.log.error('precedence items must be strings') + self.error = True return - preclist.append((term,assoc,level+1)) + preclist.append((term, assoc, level+1)) self.preclist = preclist # Get all p_functions from the grammar def get_pfunctions(self): p_functions = [] for name, item in self.pdict.items(): - if name[:2] != 'p_': continue - if name == 'p_error': continue - if isinstance(item,(types.FunctionType,types.MethodType)): - line = func_code(item).co_firstlineno - file = func_code(item).co_filename - p_functions.append((line,file,name,item.__doc__)) - - # Sort all of the actions by line number - p_functions.sort() + if not name.startswith('p_') or name == 'p_error': + continue + if isinstance(item, (types.FunctionType, types.MethodType)): + line = getattr(item, 'co_firstlineno', item.__code__.co_firstlineno) + module = inspect.getmodule(item) + p_functions.append((line, module, name, item.__doc__)) + + # Sort all of the actions by line number; make sure to stringify + # modules to make them sortable, since `line` may not uniquely sort all + # p functions + p_functions.sort(key=lambda p_function: ( + p_function[0], + str(p_function[1]), + p_function[2], + p_function[3])) self.pfuncs = p_functions - # Validate all of the p_functions def validate_pfunctions(self): grammar = [] # Check for non-empty symbols if len(self.pfuncs) == 0: - self.log.error("no rules of the form p_rulename are defined") - self.error = 1 - return - - for line, file, name, doc in self.pfuncs: + self.log.error('no rules of the form p_rulename are defined') + self.error = True + return + + for line, module, name, doc in self.pfuncs: + file = inspect.getsourcefile(module) func = self.pdict[name] if isinstance(func, types.MethodType): reqargs = 2 else: reqargs = 1 - if func_code(func).co_argcount > reqargs: - self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,func.__name__) - self.error = 1 - elif func_code(func).co_argcount < reqargs: - self.log.error("%s:%d: Rule '%s' requires an argument",file,line,func.__name__) - self.error = 1 + if func.__code__.co_argcount > reqargs: + self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__) + self.error = True + elif func.__code__.co_argcount < reqargs: + self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__) + self.error = True elif not func.__doc__: - self.log.warning("%s:%d: No documentation string specified in function '%s' (ignored)",file,line,func.__name__) + self.log.warning('%s:%d: No documentation string specified in function %r (ignored)', + file, line, func.__name__) else: try: - parsed_g = parse_grammar(doc,file,line) + parsed_g = parse_grammar(doc, file, line) for g in parsed_g: grammar.append((name, g)) - except SyntaxError: - e = sys.exc_info()[1] + except SyntaxError as e: self.log.error(str(e)) - self.error = 1 + self.error = True # Looks like a valid grammar rule # Mark the file in which defined. - self.files[file] = 1 + self.modules.add(module) # Secondary validation step that looks for p_ definitions that are not functions # or functions that look like they might be grammar rules. - for n,v in self.pdict.items(): - if n[0:2] == 'p_' and isinstance(v, (types.FunctionType, types.MethodType)): continue - if n[0:2] == 't_': continue - if n[0:2] == 'p_' and n != 'p_error': - self.log.warning("'%s' not defined as a function", n) - if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount == 1) or - (isinstance(v,types.MethodType) and func_code(v).co_argcount == 2)): - try: - doc = v.__doc__.split(" ") - if doc[1] == ':': - self.log.warning("%s:%d: Possible grammar rule '%s' defined without p_ prefix", - func_code(v).co_filename, func_code(v).co_firstlineno,n) - except Exception: - pass + for n, v in self.pdict.items(): + if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)): + continue + if n.startswith('t_'): + continue + if n.startswith('p_') and n != 'p_error': + self.log.warning('%r not defined as a function', n) + if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or + (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)): + if v.__doc__: + try: + doc = v.__doc__.split(' ') + if doc[1] == ':': + self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix', + v.__code__.co_filename, v.__code__.co_firstlineno, n) + except IndexError: + pass self.grammar = grammar @@ -3033,76 +2268,61 @@ class ParserReflect(object): # Build a parser # ----------------------------------------------------------------------------- -def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None, - check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,outputdir='', - debuglog=None, errorlog = None, picklefile=None): - - global parse # Reference to the parsing method of the last built parser - - # If pickling is enabled, table files are not created +def yacc(*, debug=yaccdebug, module=None, start=None, + check_recursion=True, optimize=False, debugfile=debug_file, + debuglog=None, errorlog=None): - if picklefile: - write_tables = 0 + # Reference to the parsing method of the last built parser + global parse if errorlog is None: errorlog = PlyLogger(sys.stderr) # Get the module dictionary used for the parser if module: - _items = [(k,getattr(module,k)) for k in dir(module)] + _items = [(k, getattr(module, k)) for k in dir(module)] pdict = dict(_items) + # If no __file__ or __package__ attributes are available, try to obtain them + # from the __module__ instead + if '__file__' not in pdict: + pdict['__file__'] = sys.modules[pdict['__module__']].__file__ + if '__package__' not in pdict and '__module__' in pdict: + if hasattr(sys.modules[pdict['__module__']], '__package__'): + pdict['__package__'] = sys.modules[pdict['__module__']].__package__ else: pdict = get_caller_module_dict(2) + # Set start symbol if it's specified directly using an argument + if start is not None: + pdict['start'] = start + # Collect parser information from the dictionary - pinfo = ParserReflect(pdict,log=errorlog) + pinfo = ParserReflect(pdict, log=errorlog) pinfo.get_all() if pinfo.error: - raise YaccError("Unable to build parser") - - # Check signature against table files (if any) - signature = pinfo.signature() - - # Read the tables - try: - lr = LRTable() - if picklefile: - read_signature = lr.read_pickle(picklefile) - else: - read_signature = lr.read_table(tabmodule) - if optimize or (read_signature == signature): - try: - lr.bind_callables(pinfo.pdict) - parser = LRParser(lr,pinfo.error_func) - parse = parser.parse - return parser - except Exception: - e = sys.exc_info()[1] - errorlog.warning("There was a problem loading the table file: %s", repr(e)) - except VersionError: - e = sys.exc_info() - errorlog.warning(str(e)) - except Exception: - pass + raise YaccError('Unable to build parser') if debuglog is None: if debug: - debuglog = PlyLogger(open(debugfile,"w")) + try: + debuglog = PlyLogger(open(debugfile, 'w')) + except IOError as e: + errorlog.warning("Couldn't open %r. %s" % (debugfile, e)) + debuglog = NullLogger() else: debuglog = NullLogger() - debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__) + debuglog.info('Created by PLY (http://www.dabeaz.com/ply)') - - errors = 0 + errors = False # Validate the parser information if pinfo.validate_all(): - raise YaccError("Unable to build parser") - + raise YaccError('Unable to build parser') + if not pinfo.error_func: - errorlog.warning("no p_error() function is defined") + errorlog.warning('no p_error() function is defined') # Create a grammar object grammar = Grammar(pinfo.tokens) @@ -3110,20 +2330,18 @@ def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, star # Set precedence level for terminals for term, assoc, level in pinfo.preclist: try: - grammar.set_precedence(term,assoc,level) - except GrammarError: - e = sys.exc_info()[1] - errorlog.warning("%s",str(e)) + grammar.set_precedence(term, assoc, level) + except GrammarError as e: + errorlog.warning('%s', e) # Add productions to the grammar for funcname, gram in pinfo.grammar: file, line, prodname, syms = gram try: - grammar.add_production(prodname,syms,funcname,file,line) - except GrammarError: - e = sys.exc_info()[1] - errorlog.error("%s",str(e)) - errors = 1 + grammar.add_production(prodname, syms, funcname, file, line) + except GrammarError as e: + errorlog.error('%s', e) + errors = True # Set the grammar start symbols try: @@ -3131,146 +2349,134 @@ def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, star grammar.set_start(pinfo.start) else: grammar.set_start(start) - except GrammarError: - e = sys.exc_info()[1] + except GrammarError as e: errorlog.error(str(e)) - errors = 1 + errors = True if errors: - raise YaccError("Unable to build parser") + raise YaccError('Unable to build parser') # Verify the grammar structure undefined_symbols = grammar.undefined_symbols() for sym, prod in undefined_symbols: - errorlog.error("%s:%d: Symbol '%s' used, but not defined as a token or a rule",prod.file,prod.line,sym) - errors = 1 + errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym) + errors = True unused_terminals = grammar.unused_terminals() if unused_terminals: - debuglog.info("") - debuglog.info("Unused terminals:") - debuglog.info("") + debuglog.info('') + debuglog.info('Unused terminals:') + debuglog.info('') for term in unused_terminals: - errorlog.warning("Token '%s' defined, but not used", term) - debuglog.info(" %s", term) + errorlog.warning('Token %r defined, but not used', term) + debuglog.info(' %s', term) # Print out all productions to the debug log if debug: - debuglog.info("") - debuglog.info("Grammar") - debuglog.info("") - for n,p in enumerate(grammar.Productions): - debuglog.info("Rule %-5d %s", n, p) + debuglog.info('') + debuglog.info('Grammar') + debuglog.info('') + for n, p in enumerate(grammar.Productions): + debuglog.info('Rule %-5d %s', n, p) # Find unused non-terminals unused_rules = grammar.unused_rules() for prod in unused_rules: - errorlog.warning("%s:%d: Rule '%s' defined, but not used", prod.file, prod.line, prod.name) + errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name) if len(unused_terminals) == 1: - errorlog.warning("There is 1 unused token") + errorlog.warning('There is 1 unused token') if len(unused_terminals) > 1: - errorlog.warning("There are %d unused tokens", len(unused_terminals)) + errorlog.warning('There are %d unused tokens', len(unused_terminals)) if len(unused_rules) == 1: - errorlog.warning("There is 1 unused rule") + errorlog.warning('There is 1 unused rule') if len(unused_rules) > 1: - errorlog.warning("There are %d unused rules", len(unused_rules)) + errorlog.warning('There are %d unused rules', len(unused_rules)) if debug: - debuglog.info("") - debuglog.info("Terminals, with rules where they appear") - debuglog.info("") + debuglog.info('') + debuglog.info('Terminals, with rules where they appear') + debuglog.info('') terms = list(grammar.Terminals) terms.sort() for term in terms: - debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]])) - - debuglog.info("") - debuglog.info("Nonterminals, with rules where they appear") - debuglog.info("") + debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]])) + + debuglog.info('') + debuglog.info('Nonterminals, with rules where they appear') + debuglog.info('') nonterms = list(grammar.Nonterminals) nonterms.sort() for nonterm in nonterms: - debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in grammar.Nonterminals[nonterm]])) - debuglog.info("") + debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]])) + debuglog.info('') if check_recursion: unreachable = grammar.find_unreachable() for u in unreachable: - errorlog.warning("Symbol '%s' is unreachable",u) + errorlog.warning('Symbol %r is unreachable', u) infinite = grammar.infinite_cycles() for inf in infinite: - errorlog.error("Infinite recursion detected for symbol '%s'", inf) - errors = 1 - + errorlog.error('Infinite recursion detected for symbol %r', inf) + errors = True + unused_prec = grammar.unused_precedence() for term, assoc in unused_prec: - errorlog.error("Precedence rule '%s' defined for unknown symbol '%s'", assoc, term) - errors = 1 + errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term) + errors = True if errors: - raise YaccError("Unable to build parser") - - # Run the LRGeneratedTable on the grammar - if debug: - errorlog.debug("Generating %s tables", method) - - lr = LRGeneratedTable(grammar,method,debuglog) + raise YaccError('Unable to build parser') + + # Run the LRTable on the grammar + lr = LRTable(grammar, debuglog) if debug: num_sr = len(lr.sr_conflicts) # Report shift/reduce and reduce/reduce conflicts if num_sr == 1: - errorlog.warning("1 shift/reduce conflict") + errorlog.warning('1 shift/reduce conflict') elif num_sr > 1: - errorlog.warning("%d shift/reduce conflicts", num_sr) + errorlog.warning('%d shift/reduce conflicts', num_sr) num_rr = len(lr.rr_conflicts) if num_rr == 1: - errorlog.warning("1 reduce/reduce conflict") + errorlog.warning('1 reduce/reduce conflict') elif num_rr > 1: - errorlog.warning("%d reduce/reduce conflicts", num_rr) + errorlog.warning('%d reduce/reduce conflicts', num_rr) # Write out conflicts to the output file if debug and (lr.sr_conflicts or lr.rr_conflicts): - debuglog.warning("") - debuglog.warning("Conflicts:") - debuglog.warning("") + debuglog.warning('') + debuglog.warning('Conflicts:') + debuglog.warning('') for state, tok, resolution in lr.sr_conflicts: - debuglog.warning("shift/reduce conflict for %s in state %d resolved as %s", tok, state, resolution) - - already_reported = {} + debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution) + + already_reported = set() for state, rule, rejected in lr.rr_conflicts: - if (state,id(rule),id(rejected)) in already_reported: + if (state, id(rule), id(rejected)) in already_reported: continue - debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) - debuglog.warning("rejected rule (%s) in state %d", rejected,state) - errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) - errorlog.warning("rejected rule (%s) in state %d", rejected, state) - already_reported[state,id(rule),id(rejected)] = 1 - + debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) + debuglog.warning('rejected rule (%s) in state %d', rejected, state) + errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) + errorlog.warning('rejected rule (%s) in state %d', rejected, state) + already_reported.add((state, id(rule), id(rejected))) + warned_never = [] for state, rule, rejected in lr.rr_conflicts: if not rejected.reduced and (rejected not in warned_never): - debuglog.warning("Rule (%s) is never reduced", rejected) - errorlog.warning("Rule (%s) is never reduced", rejected) + debuglog.warning('Rule (%s) is never reduced', rejected) + errorlog.warning('Rule (%s) is never reduced', rejected) warned_never.append(rejected) - # Write the table file if requested - if write_tables: - lr.write_table(tabmodule,outputdir,signature) - - # Write a pickled version of the tables - if picklefile: - lr.pickle_table(picklefile,signature) - # Build the parser lr.bind_callables(pinfo.pdict) - parser = LRParser(lr,pinfo.error_func) + parser = LRParser(lr, pinfo.error_func) parse = parser.parse return parser diff --git a/components/script/dom/bindings/codegen/run.py b/components/script/dom/bindings/codegen/run.py index 130d35e5268..7f58de15d69 100644 --- a/components/script/dom/bindings/codegen/run.py +++ b/components/script/dom/bindings/codegen/run.py @@ -52,7 +52,7 @@ def main(): module = CGBindingRoot(config, prefix, filename).define() if module: with open(os.path.join(out_dir, prefix + ".rs"), "wb") as f: - f.write(module) + f.write(module.encode("utf-8")) def make_dir(path): @@ -66,7 +66,7 @@ def generate(config, name, filename): root = getattr(GlobalGenRoots, name)(config) code = root.define() with open(filename, "wb") as f: - f.write(code) + f.write(code.encode("utf-8")) def add_css_properties_attributes(css_properties_json, parser): diff --git a/components/servo/build.rs b/components/servo/build.rs index f8e207f0b86..936ed712dc6 100644 --- a/components/servo/build.rs +++ b/components/servo/build.rs @@ -40,11 +40,11 @@ fn error(message: &str) { } fn find_python() -> String { - env::var("PYTHON2").ok().unwrap_or_else(|| { + env::var("PYTHON3").ok().unwrap_or_else(|| { let candidates = if cfg!(windows) { - ["python2.7.exe", "python27.exe", "python.exe"] + ["python3.8.exe", "python38.exe", "python.exe"] } else { - ["python2.7", "python2", "python"] + ["python3.8", "python3", "python"] }; for &name in &candidates { if Command::new(name) @@ -57,7 +57,7 @@ fn find_python() -> String { } } panic!( - "Can't find python (tried {})! Try fixing PATH or setting the PYTHON2 env var", + "Can't find python (tried {})! Try fixing PATH or setting the PYTHON3 env var", candidates.join(", ") ) }) diff --git a/etc/memory_reports_over_time.py b/etc/memory_reports_over_time.py index 0d0859aedf7..7940c55aff1 100755 --- a/etc/memory_reports_over_time.py +++ b/etc/memory_reports_over_time.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Copyright 2018 The Servo Project Developers. See the COPYRIGHT # file at the top-level directory of this distribution. diff --git a/etc/taskcluster/decision_task.py b/etc/taskcluster/decision_task.py index 66338c53a6c..97b20f7c059 100644 --- a/etc/taskcluster/decision_task.py +++ b/etc/taskcluster/decision_task.py @@ -57,7 +57,7 @@ def tasks(task_for): "try-mac": [macos_unit], "try-linux": [linux_tidy_unit, linux_docs_check, linux_release], - "try-windows": [windows_unit, windows_arm64, windows_uwp_x64], + "try-windows": [windows_arm64, windows_uwp_x64], "try-arm": [windows_arm64], "try-wpt": [linux_wpt], "try-wpt-2020": [linux_wpt_layout_2020], @@ -130,6 +130,8 @@ windows_build_env = { }, "all": { "PYTHON3": "%HOMEDRIVE%%HOMEPATH%\\python3\\python.exe", + "PYTHONPATH": "%HOMEDRIVE%%HOMEPATH%\\python3", + "PYTHONHOME": "%HOMEDRIVE%%HOMEPATH%\\python3", "LINKER": "lld-link.exe", "MOZTOOLS_PATH_PREPEND": "%HOMEDRIVE%%HOMEPATH%\\git\\cmd", }, @@ -155,11 +157,11 @@ def linux_tidy_unit_untrusted(): .with_env(**build_env, **unix_build_env, **linux_build_env) .with_repo_bundle() .with_script(""" - ./mach test-tidy --no-progress --all - ./mach test-tidy --no-progress --self-test - ./mach bootstrap-gstreamer - ./mach build --dev - ./mach test-unit + python3 ./mach test-tidy --no-progress --all + python3 ./mach test-tidy --no-progress --self-test + python3 ./mach bootstrap-gstreamer + python3 ./mach build --dev + python3 ./mach test-unit ./etc/ci/lockfile_changed.sh ./etc/memory_reports_over_time.py --test @@ -175,8 +177,7 @@ def linux_tidy_unit(): .with_treeherder("Linux x64", "Tidy+Unit") .with_max_run_time_minutes(75) .with_script(""" - ./mach test-tidy --no-progress --all - python3 ./mach test-tidy --no-progress --all --no-wpt + python3 ./mach test-tidy --no-progress --all python3 ./mach build --dev python3 ./mach test-unit python3 ./mach package --dev @@ -201,7 +202,7 @@ def linux_docs_check(): linux_build_task("Docs + check") .with_treeherder("Linux x64", "Doc+Check") .with_script(""" - RUSTDOCFLAGS="--disable-minification" ./mach doc + RUSTDOCFLAGS="--disable-minification" python3 ./mach doc ( cd target/doc git init @@ -219,7 +220,7 @@ def linux_docs_check(): # The reverse order would not increase the total amount of work to do, # but would reduce the amount of parallelism available. """ - ./mach check + python3 ./mach check """) .with_artifacts("/repo/target/doc/docs.bundle") .find_or_create("docs." + CONFIG.tree_hash()) @@ -243,7 +244,7 @@ def upload_docs(): open("/root/.git-credentials", "w").write("https://git:%s@github.com/" % token) """) .with_script(""" - python -c "$PY" + python3 -c "$PY" git init --bare git config credential.helper store git fetch --quiet docs.bundle @@ -274,9 +275,9 @@ def macos_unit(): macos_build_task("Dev build + unit tests") .with_treeherder("macOS x64", "Unit") .with_script(""" - ./mach build --dev --verbose - ./mach test-unit - ./mach package --dev + python3 ./mach build --dev --verbose + python3 ./mach test-unit + python3 ./mach package --dev ./etc/ci/macos_package_smoketest.sh target/debug/servo-tech-demo.dmg ./etc/ci/lockfile_changed.sh """) @@ -296,8 +297,8 @@ def with_rust_nightly(): .with_treeherder("Linux x64", "RustNightly") .with_script(""" echo "nightly" > rust-toolchain - ./mach build --dev - ./mach test-unit + python3 ./mach build --dev + python3 ./mach test-unit """) .create() ) @@ -354,10 +355,10 @@ def uwp_nightly(rdp=False): "secrets:get:project/servo/windows-codesign-cert/latest", ) .with_script( - "python mach build --release --target=x86_64-uwp-windows-msvc", - "python mach build --release --target=aarch64-uwp-windows-msvc", - "python mach package --release --target=x86_64-uwp-windows-msvc --uwp=x64 --uwp=arm64", - "python mach upload-nightly uwp --secret-from-taskcluster", + "python3 mach build --release --target=x86_64-uwp-windows-msvc", + "python3 mach build --release --target=aarch64-uwp-windows-msvc", + "python3 mach package --release --target=x86_64-uwp-windows-msvc --uwp=x64 --uwp=arm64", + "python3 mach upload-nightly uwp --secret-from-taskcluster", ) .with_artifacts(appx_artifact) .with_max_run_time_minutes(3 * 60) @@ -418,9 +419,9 @@ def linux_nightly(): .with_scopes("secrets:get:project/servo/s3-upload-credentials") # Not reusing the build made for WPT because it has debug assertions .with_script( - "./mach build --release", - "./mach package --release", - "./mach upload-nightly linux --secret-from-taskcluster", + "python3 ./mach build --release", + "python3 ./mach package --release", + "python3 ./mach upload-nightly linux --secret-from-taskcluster", ) .with_artifacts("/repo/target/release/servo-tech-demo.tar.gz") .find_or_create("build.linux_x64_nightly" + CONFIG.tree_hash()) @@ -432,8 +433,8 @@ def linux_release(): linux_build_task("Release build") .with_treeherder("Linux x64", "Release") .with_script( - "./mach build --release", - "./mach package --release", + "python3 ./mach build --release", + "python3 ./mach package --release", ) .find_or_create("build.linux_x64_release" + CONFIG.tree_hash()) ) @@ -449,10 +450,10 @@ def macos_nightly(): "secrets:get:project/servo/github-homebrew-token", ) .with_script( - "./mach build --release", - "./mach package --release", + "python3 ./mach build --release", + "python3 ./mach package --release", "./etc/ci/macos_package_smoketest.sh target/release/servo-tech-demo.dmg", - "./mach upload-nightly mac --secret-from-taskcluster", + "python3 ./mach upload-nightly mac --secret-from-taskcluster", ) .with_artifacts("repo/target/release/servo-tech-demo.dmg") .find_or_create("build.mac_x64_nightly." + CONFIG.tree_hash()) @@ -489,7 +490,7 @@ def macos_release_build_with_debug_assertions(priority=None): .with_treeherder("macOS x64", "Release+A") .with_priority(priority) .with_script("\n".join([ - "./mach build --release --verbose --with-debug-assertions", + "python3 ./mach build --release --verbose --with-debug-assertions", "./etc/ci/lockfile_changed.sh", "tar -czf target.tar.gz" + " target/release/servo" + @@ -516,9 +517,9 @@ def linux_release_build_with_debug_assertions(layout_2020): linux_build_task(name_prefix + "Release build, with debug assertions") .with_treeherder("Linux x64", treeherder_prefix + "Release+A") .with_script(""" - time ./mach rustc -V - time ./mach fetch - ./mach build --release --with-debug-assertions %s -p servo + time python3 ./mach rustc -V + time python3 ./mach fetch + python3 ./mach build --release --with-debug-assertions %s -p servo ./etc/ci/lockfile_changed.sh tar -czf /target.tar.gz \ target/release/servo \ @@ -537,7 +538,7 @@ def macos_wpt(): priority = "high" if CONFIG.git_ref == "refs/heads/auto" else None build_task = macos_release_build_with_debug_assertions(priority=priority) def macos_run_task(name): - task = macos_task(name).with_python2().with_python3() \ + task = macos_task(name).with_python3() \ .with_repo_bundle(alternate_object_dir="/var/cache/servo.git/objects") return with_homebrew(task, ["etc/taskcluster/macos/Brewfile"]) wpt_chunks( @@ -619,11 +620,11 @@ def wpt_chunks(platform, make_chunk_task, build_task, total_chunks, processes, if this_chunk == 0: if run_webgpu: webgpu_script = """ - time ./mach test-wpt _webgpu --release --processes $PROCESSES \ + time python3 ./mach test-wpt _webgpu --release --processes $PROCESSES \ --headless --log-raw test-webgpu.log --always-succeed \ --log-errorsummary webgpu-errorsummary.log \ | cat - ./mach filter-intermittents \ + python3 ./mach filter-intermittents \ webgpu-errorsummary.log \ --log-intermittents webgpu-intermittents.log \ --log-filteredsummary filtered-webgpu-errorsummary.log \ @@ -634,7 +635,7 @@ def wpt_chunks(platform, make_chunk_task, build_task, total_chunks, processes, webgpu_script = "" task.with_script(""" - time python ./mach test-wpt --release --binary-arg=--multiprocess \ + time python3 ./mach test-wpt --release --binary-arg=--multiprocess \ --processes $PROCESSES \ --log-raw test-wpt-mp.log \ --log-errorsummary wpt-mp-errorsummary.log \ @@ -647,30 +648,30 @@ def wpt_chunks(platform, make_chunk_task, build_task, total_chunks, processes, --always-succeed \ url \ | cat - ./mach filter-intermittents \ + python3 ./mach filter-intermittents \ wpt-py3-errorsummary.log \ --log-intermittents wpt-py3-intermittents.log \ --log-filteredsummary filtered-py3-errorsummary.log \ --tracker-api default \ --reporter-api default - time ./mach test-wpt --release --product=servodriver --headless \ + time python3 ./mach test-wpt --release --product=servodriver --headless \ tests/wpt/mozilla/tests/mozilla/DOMParser.html \ tests/wpt/mozilla/tests/css/per_glyph_font_fallback_a.html \ tests/wpt/mozilla/tests/css/img_simple.html \ tests/wpt/mozilla/tests/mozilla/secure.https.html \ | cat - time ./mach test-wpt --release --processes $PROCESSES --product=servodriver \ + time python3 ./mach test-wpt --release --processes $PROCESSES --product=servodriver \ --headless --log-raw test-bluetooth.log \ --log-errorsummary bluetooth-errorsummary.log \ bluetooth \ | cat - time ./mach test-wpt --release --processes $PROCESSES --timeout-multiplier=4 \ + time python3 ./mach test-wpt --release --processes $PROCESSES --timeout-multiplier=4 \ --headless --log-raw test-wdspec.log \ --log-servojson wdspec-jsonsummary.log \ --always-succeed \ webdriver \ | cat - ./mach filter-intermittents \ + python3 ./mach filter-intermittents \ wdspec-jsonsummary.log \ --log-intermittents intermittents.log \ --log-filteredsummary filtered-wdspec-errorsummary.log \ @@ -680,7 +681,7 @@ def wpt_chunks(platform, make_chunk_task, build_task, total_chunks, processes, ) else: task.with_script(""" - ./mach test-wpt \ + python3 ./mach test-wpt \ --release \ $WPT_ARGS \ --processes $PROCESSES \ @@ -690,7 +691,7 @@ def wpt_chunks(platform, make_chunk_task, build_task, total_chunks, processes, --log-servojson wpt-jsonsummary.log \ --always-succeed \ | cat - ./mach filter-intermittents \ + python3 ./mach filter-intermittents \ wpt-jsonsummary.log \ --log-intermittents intermittents.log \ --log-filteredsummary filtered-wpt-errorsummary.log \ @@ -770,7 +771,7 @@ def linux_build_task(name, *, build_env=build_env): .with_dockerfile(dockerfile_path("build")) .with_env(**build_env, **unix_build_env, **linux_build_env) .with_repo_bundle() - .with_script("./mach bootstrap-gstreamer") + .with_script("python3 ./mach bootstrap-gstreamer") ) return task @@ -797,12 +798,14 @@ def windows_build_task(name, package=True, arch="x86_64", rdp=False): **windows_build_env["all"] ) .with_repo_bundle(sparse_checkout=windows_sparse_checkout) - .with_python2() - .with_directory_mount( - "https://www.python.org/ftp/python/3.7.3/python-3.7.3-embed-amd64.zip", - sha256="6de14c9223226cf0cd8c965ecb08c51d62c770171a256991b4fddc25188cfa8e", - path="python3", - ) + .with_python3() + # mozjs's virtualenv expects a DLLs folder that contains dynamic libraries. + # The embedded python distribution does not come with this. + .with_script(""" + mkdir %HOMEDRIVE%%HOMEPATH%\\python3\\DLLs + copy %HOMEDRIVE%%HOMEPATH%\\python3\\*.pyd %HOMEDRIVE%%HOMEPATH%\\python3\\DLLs + copy %HOMEDRIVE%%HOMEPATH%\\python3\\*.dll %HOMEDRIVE%%HOMEPATH%\\python3\\DLLs + """) .with_rustup() ) if arch in hashes["non-devel"] and arch in hashes["devel"]: @@ -844,7 +847,7 @@ def macos_build_task(name): .with_max_run_time_minutes(60 * 2) .with_env(**build_env, **unix_build_env, **macos_build_env) .with_repo_bundle(alternate_object_dir="/var/cache/servo.git/objects") - .with_python2() + .with_python3() .with_rustup() .with_index_and_artifacts_expire_in(build_artifacts_expire_in) # Debugging for surprising generic-worker behaviour diff --git a/etc/taskcluster/decisionlib.py b/etc/taskcluster/decisionlib.py index 13fc2b75bfc..56a59e53061 100644 --- a/etc/taskcluster/decisionlib.py +++ b/etc/taskcluster/decisionlib.py @@ -640,6 +640,30 @@ class WindowsGenericWorkerTask(GenericWorkerTask): """) \ .with_path_from_homedir("python2", "python2\\Scripts") + def with_python3(self): + """ + For Python 3, use `with_directory_mount` and the "embeddable zip file" distribution + from python.org. + You may need to remove `python37._pth` from the ZIP in order to work around + <https://bugs.python.org/issue34841>. + """ + return self \ + .with_directory_mount( + "https://www.python.org/ftp/python/3.7.3/python-3.7.3-embed-amd64.zip", + sha256="6de14c9223226cf0cd8c965ecb08c51d62c770171a256991b4fddc25188cfa8e", + path="python3", + ) \ + .with_path_from_homedir("python3", "python3\\Scripts") \ + .with_curl_script("https://bootstrap.pypa.io/get-pip.py", "get-pip.py") \ + .with_script(""" + echo import site>>%HOMEDRIVE%%HOMEPATH%%\\python3\\python37._pth + echo import sys>%HOMEDRIVE%%HOMEPATH%%\\python3\\sitecustomize.py + echo sys.path.insert(0, '')>>%HOMEDRIVE%%HOMEPATH%%\\python3\\sitecustomize.py + + python get-pip.py + python -m pip install virtualenv==20.2.1 + """) + class UnixTaskMixin(Task): def with_repo(self, alternate_object_dir=""): @@ -6,7 +6,7 @@ # The beginning of this script is both valid shell and valid python, # such that the script starts with the shell and is reexecuted with # the right python. -''':' && if [ ! -z "$MSYSTEM" ] ; then exec python "$0" "$@" ; else which python2.7 > /dev/null 2> /dev/null && exec python2.7 "$0" "$@" || exec python "$0" "$@" ; fi +''':' && if [ ! -z "$MSYSTEM" ] ; then exec python "$0" "$@" ; else which python3 > /dev/null 2> /dev/null && exec python3 "$0" "$@" || exec python "$0" "$@" ; fi ''' from __future__ import print_function, unicode_literals @@ -18,8 +18,8 @@ import sys # Check for the current python version as some users (especially on archlinux) # may not have python 2 installed and their /bin/python binary symlinked to # python 3. -if sys.version_info >= (3, 0) and sys.version_info < (3, 5): - print("mach does not support python 3 (< 3.5), please install python 2 or python 3 (>= 3.5)") +if sys.version_info < (3, 5): + print("mach does not support python 3 (< 3.5), please install python 3 (>= 3.5)") sys.exit(1) diff --git a/python/requirements.txt b/python/requirements.txt index af3e6074117..44693822306 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -5,9 +5,9 @@ blessings == 1.6 distro == 1.4 mach == 1.0.0 mozdebug == 0.2 -mozinfo == 1.1.0 -mozlog == 5.0 -setuptools == 39.0 +mozinfo == 1.2.1 +mozlog == 7.1.0 +setuptools == 50.3.2 toml == 0.9.2 # For Python linting @@ -30,6 +30,6 @@ boto3 == 1.4.4 certifi # For Python3 compatibility -six == 1.12 +six == 1.15 -e python/tidy diff --git a/python/servo/command_base.py b/python/servo/command_base.py index 7430247a2e3..fb1c79b8b3c 100644 --- a/python/servo/command_base.py +++ b/python/servo/command_base.py @@ -749,7 +749,7 @@ install them, let us know by filing a bug!") # Shorten hash # NOTE: Partially verifies the hash, but it will still pass if it's, e.g., a tree git_sha = subprocess.check_output([ - 'git', 'rev-parse', '--short', git_sha + 'git', 'rev-parse', '--short', git_sha.decode('ascii') ]) else: # This is a regular commit @@ -999,7 +999,7 @@ install them, let us know by filing a bug!") toolchain = self.rust_toolchain() status = subprocess.call( - ["rustup", "run", toolchain.encode("utf-8"), "rustc", "--version"], + ["rustup", "run", toolchain, "rustc", "--version"], stdout=open(os.devnull, "wb"), stderr=subprocess.STDOUT, ) diff --git a/python/servo/package_commands.py b/python/servo/package_commands.py index e62a742589c..f44786251cc 100644 --- a/python/servo/package_commands.py +++ b/python/servo/package_commands.py @@ -775,7 +775,11 @@ def setup_uwp_signing(ms_app_store, publisher): def run_powershell_cmd(cmd): try: - return subprocess.check_output(['powershell.exe', '-NoProfile', '-Command', cmd]) + return ( + subprocess + .check_output(['powershell.exe', '-NoProfile', '-Command', cmd]) + .decode('utf-8') + ) except subprocess.CalledProcessError: print("ERROR: PowerShell command failed: ", cmd) exit(1) @@ -841,6 +845,7 @@ def build_uwp(platforms, dev, msbuild_dir, ms_app_store): .replace("%%PACKAGE_PLATFORMS%%", '|'.join(platforms)) .replace("%%CONFIGURATION%%", Configuration) .replace("%%SOLUTION%%", path.join(os.getcwd(), 'support', 'hololens', 'ServoApp.sln')) + .encode('utf-8') ) build_file.close() # Generate an appxbundle. diff --git a/python/servo/post_build_commands.py b/python/servo/post_build_commands.py index 2e69c6ca917..daf2a9815ac 100644 --- a/python/servo/post_build_commands.py +++ b/python/servo/post_build_commands.py @@ -243,7 +243,8 @@ class PostBuildCommands(CommandBase): media_stack=None, **kwargs): self.ensure_bootstrapped(rustup_components=["rust-docs"]) rustc_path = check_output( - ["rustup" + BIN_SUFFIX, "which", "--toolchain", self.rust_toolchain(), "rustc"]) + ["rustup" + BIN_SUFFIX, "which", "--toolchain", self.rust_toolchain(), "rustc"] + ).decode('utf-8') assert path.basename(path.dirname(rustc_path)) == "bin" toolchain_path = path.dirname(path.dirname(rustc_path)) rust_docs = path.join(toolchain_path, "share", "doc", "rust", "html") diff --git a/python/servo/testing_commands.py b/python/servo/testing_commands.py index 5d05eb60300..212371a58a0 100644 --- a/python/servo/testing_commands.py +++ b/python/servo/testing_commands.py @@ -585,7 +585,10 @@ class MachCommands(CommandBase): def format(outputs, description, file=sys.stdout): formatted = "%s %s:\n%s" % (len(outputs), description, "\n".join(outputs)) - file.write(formatted.encode("utf-8")) + if file == sys.stdout: + file.write(formatted) + else: + file.write(formatted.encode("utf-8")) if log_intermittents: with open(log_intermittents, "wb") as file: diff --git a/tests/wpt/metadata/html/browsers/origin/origin-keyed-agent-clusters/getter-special-cases/cross-origin-isolated.sub.https.html.ini b/tests/wpt/metadata/html/browsers/origin/origin-keyed-agent-clusters/getter-special-cases/cross-origin-isolated.sub.https.html.ini index f547e21e87d..0946279f9e1 100644 --- a/tests/wpt/metadata/html/browsers/origin/origin-keyed-agent-clusters/getter-special-cases/cross-origin-isolated.sub.https.html.ini +++ b/tests/wpt/metadata/html/browsers/origin/origin-keyed-agent-clusters/getter-special-cases/cross-origin-isolated.sub.https.html.ini @@ -1,4 +1,5 @@ [cross-origin-isolated.sub.https.html] [self: originAgentCluster must equal true] expected: FAIL - + [child: originAgentCluster must equal true] + expected: FAIL diff --git a/tests/wpt/mozilla/meta/MANIFEST.json b/tests/wpt/mozilla/meta/MANIFEST.json index 67290bb0ab1..60a9a51578d 100644 --- a/tests/wpt/mozilla/meta/MANIFEST.json +++ b/tests/wpt/mozilla/meta/MANIFEST.json @@ -11005,7 +11005,7 @@ [] ], "interfaces.js": [ - "7a105e791dd80bd42d80055a64746bbe5fece41e", + "e37397aa973f5fb913e5b8097945368c2848bed8", [] ], "nested_asap_script.js": [ diff --git a/tests/wpt/mozilla/tests/mozilla/FileAPI/resource/file-submission.py b/tests/wpt/mozilla/tests/mozilla/FileAPI/resource/file-submission.py index 79e72fb99a9..5f65cebd05c 100644 --- a/tests/wpt/mozilla/tests/mozilla/FileAPI/resource/file-submission.py +++ b/tests/wpt/mozilla/tests/mozilla/FileAPI/resource/file-submission.py @@ -10,18 +10,18 @@ def fail(msg): def main(request, response): - content_type = request.headers.get('Content-Type').split("; ") + content_type = request.headers.get(b'Content-Type').split(b"; ") if len(content_type) != 2: return fail("content type length is incorrect") - if content_type[0] != 'multipart/form-data': + if content_type[0] != b'multipart/form-data': return fail("content type first field is incorrect") - boundary = content_type[1].strip("boundary=") + boundary = content_type[1].strip(b"boundary=") - body = "--" + boundary + "\r\nContent-Disposition: form-data; name=\"file-input\"; filename=\"upload.txt\"" - body += "\r\n" + "content-type: text/plain\r\n\r\nHello\r\n--" + boundary + "--\r\n" + body = b"--" + boundary + b"\r\nContent-Disposition: form-data; name=\"file-input\"; filename=\"upload.txt\"" + body += b"\r\n" + b"content-type: text/plain\r\n\r\nHello\r\n--" + boundary + b"--\r\n" if body != request.body: return fail("request body doesn't match: " + body + "+++++++" + request.body) diff --git a/tests/wpt/mozilla/tests/mozilla/resources/brotli.py b/tests/wpt/mozilla/tests/mozilla/resources/brotli.py index a24b4771e31..cb0d6c8b044 100644 --- a/tests/wpt/mozilla/tests/mozilla/resources/brotli.py +++ b/tests/wpt/mozilla/tests/mozilla/resources/brotli.py @@ -2,7 +2,7 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. -decoded = """\ +decoded = b"""\ Lorem ipsum dolor sit amet, consectetur adipiscing elit. Ut viverra neque in massa rutrum, non rutrum nunc pellentesque. Phasellus et nulla metus. Nam aliquet felis nec iaculis eleifend. Donec pretium tellus non aliquam tristique. Mauris feugiat eu velit sed maximus. Praesent fringilla lorem vel orci maximus accumsan. Fusce vel sapien ipsum. Nulla ac lectus non arcu semper laoreet. Aliquam et massa at ex elementum dictum vitae ac purus. Sed a nunc sed dui pulvinar mollis eu sed eros. Mauris vitae ullamcorper dolor. Ut sed nisl sem. Mauris pulvinar vitae orci nec tincidunt. Integer fringilla quam in lobortis vehicula. In aliquam egestas dapibus. Suspendisse est enim, maximus non massa eget, finibus finibus lorem. Phasellus a varius ante. Pellentesque tempor dignissim nunc ut malesuada. @@ -24,15 +24,15 @@ Aenean tincidunt consequat augue, in bibendum nisl placerat in. Nulla non dignis Nulla facilisis dui odio, at scelerisque erat cursus a. Ut arcu nunc, ullamcorper vitae orci eget, sollicitudin maximus sapien. Fusce eu arcu nunc. Integer vitae eros turpis. Fusce ac elit in nisi rutrum suscipit id consectetur lacus. Suspendisse rutrum ligula auctor fringilla cursus. Sed consequat erat in nunc lacinia, et ullamcorper velit vestibulum. Mauris sed est a tellus feugiat sagittis non nec neque. Sed eu bibendum orci. Donec diam diam, viverra sed dolor non, posuere sagittis mi. Duis rhoncus, risus sit amet luctus sodales, risus arcu faucibus enim, eu cras amet.\ """ -encoded = '\x1b\x99\x13 ,\x0elw\x08\x95t\xd2\xe6\xf2(\xcd\'5M\x9dg\xd7\xc9)uG\xf4P\x8c;\x92_(\xb1\x14@L\x9d$\xa2\x16\x8f\x06\xc8\x18J\xed\xac\xabd_\xfb\xbe\xcb\xab\x90]VTu\xbc\xe1\xc3\x11\x96\x81T\r\xabj\x07\xbd\xe0\xb2\xd7y\x89\x1c\x81\xfd8\x9f\x15\xcf\xf8\xf6\xe6\x84\x8d\x90Ta<\xef\xcf\xce\xcc\x95\xa4\xff;\xcaO\xe9e\x97z/\xeap\t\x0e\x17\xd9\x14\xb6\xa9V\x151\n\xd0\xe0Fh\xd8\xbd\xd2\xdcB@\xed\xfa\xbe\xea\xb1V\xa1\xe7I\xd5\xfa\x7fTV\xa0\xa4\xda\x86G>R\x1a\x84\x1fs\x1b/\x98\xd6\xfa#\xaa_\xf5\xb7-\xf8d\x99\x80\x87O\xdb\xa1\xbe\xa9\x1a\xc3\xfb\xaf!\xc4f#oa\xc4\xffY?\x8f\xf3\xfc{\xe6\x1dB\x11\xe6\xdd\xe6(\\_S \xfd\xc9\x12\x9d4\t\x1cMO\x9a0u\xfd\t\xd7z\x1d\xe4l\'y\x17\x83\xbcCb\x18\xbfs\x16\xe3\xcc\xf1\x82\x97y\xc9n\x93\xce>=\x05\xb7\x15i\x91\xc4\xe3\xceek_\xfe\xfdzQ\x18\xfa\x19/\xe7\xddfk\x15E\xb2\xf6\xf6\xddo\x05\xbeSOc\xbd\xcb\xad{Ve\x1e/\xa0Y\xac\xaf\x87Z\x0f\xc7\xf0\xd9\xda\x17\xf4\x8e%\xf5Qc\xb9[K\xd2\xe1\x86k\x14\x84k \xf8\x12\xe8,2\x7fE}RT\xd5\xcb\xe0lv\xb8~\x11\xc0Bl\x92`\xf1\xb2\xcd\xfc3\xba\xf1\xe5m\xc2mI\xc0>D\x813e\x1b\\]\xfb\xf4G\x1d\xf9,\xa6\xb8\xff@\x947I\x8d\xd1\xbc\x1c\x0c(\xde\x138\xa3\xd8\x8e`\xd6\x7f\x81 \x82\x0e\x87\xfa"\x01\xdbqzL\x8a\x7f{\xb2\xefw\x8c^\xcdS\x9c&K\x1e\x1f\xc7\xaaj\xad\x1f0\x1f\x199\x10\xaez\n\x18\x81R6v\x99j/^\xf9\xbb\x88WB\xae\x97\xc2*\xedM\x80a]\xcc\xc1\x0e{\xf8\x81\xbd,=\xdf\xe6c\x9a\xbe\x7f\nO\x8a\x99\xd1?\xfc\x88\xc4\\\x1a(\xa4\\\xf6!\x7f}\xfd\xed\xb7+\xe4\xff\xfa\xebhk\xf6\x13R@h9j\xfd\x8ev\x9b\x89l\xbe\xfe\x9d8S\x0b\xec\xb7gNk\xcc\x9a\x9fR\xed\xc5Fv/F\xc0\xef)B1u6z\xfc\xd6\x9d\x9a\x1b\x01;a\xfa$\x96\x1b\xd7\x97\xf5\x8f\x0316\xfb\xddZ\xe8;\xdf=\x80S\xed-\xf3\x13\xb5$1\x7f2CNm\xc3+KQ\x97\xafe\xf4i\x91\x8bNq=-h\x82\x9e\xed>B\xb1\xfc,\xbbU\xe1\x14\x1c\x1f\xc9\x14\xc6\xbd\xb5*\xc8\xc5\x0f\xc4l\xed\x13\\_\xf5j\xff0s\xbev\x11\xf0d\x1dl\xd8\x1f\xc0\xe5g\x98(P\x87\xbb%.\x8c\xf0~8\xdcF\x8e\xb3\xd8>\xc6\x0c\xfb\xc4_\xc3\xce\x85\xeds\x9aR\xf3\xdc\xe6\x8dI\xc7`F\x08B?U\xda\xcf\x0c\xb8r,\xa2\x07\x9b\xd3\x1c$aG\xfc\xe5\xd5\x02\x85\xe9\xca\t\x12\xf1\xf6@[C\x10\xe9:\xed\xb5T\x96\xca\x8a\xb1X\xbeaV\x15\x0cp\xd8k\xbam\xe4\xf2\x12*\x03\xebo\x14 \x17\xe6\x04\xff_\x80\x8f\x10\x85/\xe5T\x13\x15\x84o\xde\xc6\xac\x965\x0f\xa7\xa7]\xec^\xbfXd \xd8\x7fiL\xacg\xb2\xc7\xf1\xa5\xd0\x81;\xd7e\x87\x14.\x80\x01z\xe0\xd1\x9cV\xf4\x1e;\xfe\x83\x1d\x9e\xc1\xf6\xbd\xcb\x97\xe2xa]\x18\x1c\x02\xeeZ\xf4b\x08\xa0<\xde\xab3\xec\xe0K\x1b\xfe\xdaC\xe1 \xf7\xb3&?\xae\xa6u\x18\x9buaq\xcd\xefI\xc6zNO\xf1\xca:\xc5\xdfk\\\x96\xc5:\x01<pnba\'\x89N~\xda\xe25*\xd0i\xe2DZ\x90\xf4-\xc4\x99O91\xc9\xd5r9\x8f^8\xcaP \xce\xa3^\xce\x07L)\x87\x1c\xd6C\xad\x1d\x98V\xadl`\xd7\xa4\xc1\x9d"\x8c\xfa\xca\x84\xce\x16\x81A\xabRn?p\xb4\xdbC\x98\xf4\xa1z\x11\xcb\x86CJ\xb6#i\x08&T\x08\xe4\xb8\xf7\x81\x18h\x19h\xa4\xadT\xebD\x14t\x9d2\xff&O)\x17\xaa\x1a\xbb\x99\x98\x03\xc0\xc7\xda\xe0S\xdc\xfa\xb4p\x93g\x1dQ\xb5L\x1drL\xda`G\xa1\x1dGt\x9ci\xbd\x11X9ev^\x14\xa8\x9b\xd6<\x98\x8d\xcad\xd5\x92\xc8\x02\xc0\xc0\xe6M\xcb3\xac\x03\xa6WZ=\x14~\xde \xab@=#r\xb8]\x1c&\xec\xb0\x9e:_\xf2$\xe5b\xbe\x02\x03\x81yi\x1c\xc6>\x95S\x99\'\xc9\xa6o\x1a\xd7~w\xcb\xbc~\xd1XE\x056\x97\x06\ra\xa0\xd8\x1a\xcb\xd4jB\xa8\x9e\x0b\xbc\xf2\xcb3`Y"\xf4\x05\xbe\x98\xcb\xa4S\x97\x0b\xcd\x0cp\x84p\xad\xa2\x07\x8ej?\n\x96m \xdb`\x12\xd4\x11&\\\x07b\xa79\xda\xcb\xc8\x83\xed\x81\xefw\x9f\xf1oF\x0e\xab}`\xee\xb54\xef\xcc\x9f\xc1\xec\xfe\x16\x96B\xa7\x94^\xc4\x10P\xba,eb.\x08-8\t\x8a\xd3Uq\xc3S\x88Z"+J\x93\xd4\xc6\xdde\xde\x8au"l\xc6\x13(\x13\xe4\xc1\xf7c\x1d\xee\xe9)\x11xF\x16\x08"\xafK/W \xdc\xb9\xbd\xa5CY\x03Q\xf0\xe4F\xa5\x0eO\xec\xad\xb2q\x17>N-\x15\xff\xfa8\xbbs\xc4|\xcd;,\xc7\xec\'\xa3\xfa\xb9\x07\xd9Q%\xf6\x84\x10q\xe7*VQ\xa3\xbb\xc8\x89\xb7g\xe7t\xe1\xe7\xb5\xc0\x0e8\x8d\x19\xe5v\xa1\t{\x8c\x9b\x1dx#\xf9\xc5\xcb\xf4y\xb9^\x1d\xba\x06\x81\xc52\xb8p\x91\x8b\x04c,\x9a\xa7\xfa\xaa\x93V\xc5>\xe0\xe5X>H\x99\xa6X\x9b\xfa\xbe\xcd\x14\xfd\xe4\x8an\xa18\x1f\x11gc\x83\x0b\xb6RLIz[\x1e\x80\x18\xa3\x9d\xc5\xec\x87\x12\x1b\x12\xe7\xf1\x8a\xae\xb4\xea\x99\x0e2\xa2w\xe4S\xd7\xe9Pq\xfd\x9c\xd6k\xf6\xa5`\x99}\x08\xc9\x9b5\x12\xe8\x17\xe2\xcf\x9f\x9bm\xc3\xe5<\x9f5m\xa1\xa4\xb5\xf1\x87\x8d\xf5}2yte\x14V\xf6\x10\xae\xd4\xeec\xa0\xdaq@(\xd6B\xa8R\xee"v\xf3\xeef\xb7\xb1\x8a\x8cu|\x11J\xb0 \xbe\xe1\x0e\rg\xc3\x9dd\xe2\xb12\xaf\xa3T\xa9\x18\xe7\xf3\x14V\x90\x07\xfali\x91\xc8\x06\xb3\xad\xe0i@\x19"W\x19\x1b\xc9|\xca\xfb\xe1x\xa8\xe4\xd8\x19\x81u4%\xc4_\xfb\xe9\xf90fI\x0eo\x9b\x1d\x98\x13\xa9\xd5\x89\x8c\xab>\xafH\xa2\x91eVe\xea\x03\x19p\xab\xa5\xed\x06\xb9f\x80\xc60\xc0\x8b\x1c\x18\xec\xd3\xb2\xc6l\xe44TAs3\x15\xc4\xac\xac\x0c\x0baN\xcb\xb7\x17\xd9\x1a\xbeG\x88\x9b\x98R\xb0Tp\x04\xa8\x99\x86\x11\xd5_I\x07\xce\x0e\xb8\x92\'Y\xefV\xc287\xdb+\xfd\xd2D\x13\xf7\x84\xec\xd45\x19R\x16O\xa1\x119<2\xb9\xa0K\xf6G\x8e\xc6S\n\r*h\xb1\xd1p\x10\xdd\\\xa9\xd0y\x1cG\x95\xb3D\xba\xa16\xb0\xd1\x98E\x87\x08\x01l.J\xe8\xeaA\x11\xb4Yr@\x19d!\xbb\x91\x06\xf1\x8a\xc0\xcdK\xf9\xback\x14\xa8F\x99)\x9f\xe5\xaf\xce#}ITF\x131T\xab\xe0\x05*>\xbeA{>\xac\xeak\xea\x95\xf9Bw 4\xec\xac\xdc\xe8\xac\xe4\xb6v\xcd\x91\x95\x05' +encoded = b'\x1b\x99\x13 ,\x0elw\x08\x95t\xd2\xe6\xf2(\xcd\'5M\x9dg\xd7\xc9)uG\xf4P\x8c;\x92_(\xb1\x14@L\x9d$\xa2\x16\x8f\x06\xc8\x18J\xed\xac\xabd_\xfb\xbe\xcb\xab\x90]VTu\xbc\xe1\xc3\x11\x96\x81T\r\xabj\x07\xbd\xe0\xb2\xd7y\x89\x1c\x81\xfd8\x9f\x15\xcf\xf8\xf6\xe6\x84\x8d\x90Ta<\xef\xcf\xce\xcc\x95\xa4\xff;\xcaO\xe9e\x97z/\xeap\t\x0e\x17\xd9\x14\xb6\xa9V\x151\n\xd0\xe0Fh\xd8\xbd\xd2\xdcB@\xed\xfa\xbe\xea\xb1V\xa1\xe7I\xd5\xfa\x7fTV\xa0\xa4\xda\x86G>R\x1a\x84\x1fs\x1b/\x98\xd6\xfa#\xaa_\xf5\xb7-\xf8d\x99\x80\x87O\xdb\xa1\xbe\xa9\x1a\xc3\xfb\xaf!\xc4f#oa\xc4\xffY?\x8f\xf3\xfc{\xe6\x1dB\x11\xe6\xdd\xe6(\\_S \xfd\xc9\x12\x9d4\t\x1cMO\x9a0u\xfd\t\xd7z\x1d\xe4l\'y\x17\x83\xbcCb\x18\xbfs\x16\xe3\xcc\xf1\x82\x97y\xc9n\x93\xce>=\x05\xb7\x15i\x91\xc4\xe3\xceek_\xfe\xfdzQ\x18\xfa\x19/\xe7\xddfk\x15E\xb2\xf6\xf6\xddo\x05\xbeSOc\xbd\xcb\xad{Ve\x1e/\xa0Y\xac\xaf\x87Z\x0f\xc7\xf0\xd9\xda\x17\xf4\x8e%\xf5Qc\xb9[K\xd2\xe1\x86k\x14\x84k \xf8\x12\xe8,2\x7fE}RT\xd5\xcb\xe0lv\xb8~\x11\xc0Bl\x92`\xf1\xb2\xcd\xfc3\xba\xf1\xe5m\xc2mI\xc0>D\x813e\x1b\\]\xfb\xf4G\x1d\xf9,\xa6\xb8\xff@\x947I\x8d\xd1\xbc\x1c\x0c(\xde\x138\xa3\xd8\x8e`\xd6\x7f\x81 \x82\x0e\x87\xfa"\x01\xdbqzL\x8a\x7f{\xb2\xefw\x8c^\xcdS\x9c&K\x1e\x1f\xc7\xaaj\xad\x1f0\x1f\x199\x10\xaez\n\x18\x81R6v\x99j/^\xf9\xbb\x88WB\xae\x97\xc2*\xedM\x80a]\xcc\xc1\x0e{\xf8\x81\xbd,=\xdf\xe6c\x9a\xbe\x7f\nO\x8a\x99\xd1?\xfc\x88\xc4\\\x1a(\xa4\\\xf6!\x7f}\xfd\xed\xb7+\xe4\xff\xfa\xebhk\xf6\x13R@h9j\xfd\x8ev\x9b\x89l\xbe\xfe\x9d8S\x0b\xec\xb7gNk\xcc\x9a\x9fR\xed\xc5Fv/F\xc0\xef)B1u6z\xfc\xd6\x9d\x9a\x1b\x01;a\xfa$\x96\x1b\xd7\x97\xf5\x8f\x0316\xfb\xddZ\xe8;\xdf=\x80S\xed-\xf3\x13\xb5$1\x7f2CNm\xc3+KQ\x97\xafe\xf4i\x91\x8bNq=-h\x82\x9e\xed>B\xb1\xfc,\xbbU\xe1\x14\x1c\x1f\xc9\x14\xc6\xbd\xb5*\xc8\xc5\x0f\xc4l\xed\x13\\_\xf5j\xff0s\xbev\x11\xf0d\x1dl\xd8\x1f\xc0\xe5g\x98(P\x87\xbb%.\x8c\xf0~8\xdcF\x8e\xb3\xd8>\xc6\x0c\xfb\xc4_\xc3\xce\x85\xeds\x9aR\xf3\xdc\xe6\x8dI\xc7`F\x08B?U\xda\xcf\x0c\xb8r,\xa2\x07\x9b\xd3\x1c$aG\xfc\xe5\xd5\x02\x85\xe9\xca\t\x12\xf1\xf6@[C\x10\xe9:\xed\xb5T\x96\xca\x8a\xb1X\xbeaV\x15\x0cp\xd8k\xbam\xe4\xf2\x12*\x03\xebo\x14 \x17\xe6\x04\xff_\x80\x8f\x10\x85/\xe5T\x13\x15\x84o\xde\xc6\xac\x965\x0f\xa7\xa7]\xec^\xbfXd \xd8\x7fiL\xacg\xb2\xc7\xf1\xa5\xd0\x81;\xd7e\x87\x14.\x80\x01z\xe0\xd1\x9cV\xf4\x1e;\xfe\x83\x1d\x9e\xc1\xf6\xbd\xcb\x97\xe2xa]\x18\x1c\x02\xeeZ\xf4b\x08\xa0<\xde\xab3\xec\xe0K\x1b\xfe\xdaC\xe1 \xf7\xb3&?\xae\xa6u\x18\x9buaq\xcd\xefI\xc6zNO\xf1\xca:\xc5\xdfk\\\x96\xc5:\x01<pnba\'\x89N~\xda\xe25*\xd0i\xe2DZ\x90\xf4-\xc4\x99O91\xc9\xd5r9\x8f^8\xcaP \xce\xa3^\xce\x07L)\x87\x1c\xd6C\xad\x1d\x98V\xadl`\xd7\xa4\xc1\x9d"\x8c\xfa\xca\x84\xce\x16\x81A\xabRn?p\xb4\xdbC\x98\xf4\xa1z\x11\xcb\x86CJ\xb6#i\x08&T\x08\xe4\xb8\xf7\x81\x18h\x19h\xa4\xadT\xebD\x14t\x9d2\xff&O)\x17\xaa\x1a\xbb\x99\x98\x03\xc0\xc7\xda\xe0S\xdc\xfa\xb4p\x93g\x1dQ\xb5L\x1drL\xda`G\xa1\x1dGt\x9ci\xbd\x11X9ev^\x14\xa8\x9b\xd6<\x98\x8d\xcad\xd5\x92\xc8\x02\xc0\xc0\xe6M\xcb3\xac\x03\xa6WZ=\x14~\xde \xab@=#r\xb8]\x1c&\xec\xb0\x9e:_\xf2$\xe5b\xbe\x02\x03\x81yi\x1c\xc6>\x95S\x99\'\xc9\xa6o\x1a\xd7~w\xcb\xbc~\xd1XE\x056\x97\x06\ra\xa0\xd8\x1a\xcb\xd4jB\xa8\x9e\x0b\xbc\xf2\xcb3`Y"\xf4\x05\xbe\x98\xcb\xa4S\x97\x0b\xcd\x0cp\x84p\xad\xa2\x07\x8ej?\n\x96m \xdb`\x12\xd4\x11&\\\x07b\xa79\xda\xcb\xc8\x83\xed\x81\xefw\x9f\xf1oF\x0e\xab}`\xee\xb54\xef\xcc\x9f\xc1\xec\xfe\x16\x96B\xa7\x94^\xc4\x10P\xba,eb.\x08-8\t\x8a\xd3Uq\xc3S\x88Z"+J\x93\xd4\xc6\xdde\xde\x8au"l\xc6\x13(\x13\xe4\xc1\xf7c\x1d\xee\xe9)\x11xF\x16\x08"\xafK/W \xdc\xb9\xbd\xa5CY\x03Q\xf0\xe4F\xa5\x0eO\xec\xad\xb2q\x17>N-\x15\xff\xfa8\xbbs\xc4|\xcd;,\xc7\xec\'\xa3\xfa\xb9\x07\xd9Q%\xf6\x84\x10q\xe7*VQ\xa3\xbb\xc8\x89\xb7g\xe7t\xe1\xe7\xb5\xc0\x0e8\x8d\x19\xe5v\xa1\t{\x8c\x9b\x1dx#\xf9\xc5\xcb\xf4y\xb9^\x1d\xba\x06\x81\xc52\xb8p\x91\x8b\x04c,\x9a\xa7\xfa\xaa\x93V\xc5>\xe0\xe5X>H\x99\xa6X\x9b\xfa\xbe\xcd\x14\xfd\xe4\x8an\xa18\x1f\x11gc\x83\x0b\xb6RLIz[\x1e\x80\x18\xa3\x9d\xc5\xec\x87\x12\x1b\x12\xe7\xf1\x8a\xae\xb4\xea\x99\x0e2\xa2w\xe4S\xd7\xe9Pq\xfd\x9c\xd6k\xf6\xa5`\x99}\x08\xc9\x9b5\x12\xe8\x17\xe2\xcf\x9f\x9bm\xc3\xe5<\x9f5m\xa1\xa4\xb5\xf1\x87\x8d\xf5}2yte\x14V\xf6\x10\xae\xd4\xeec\xa0\xdaq@(\xd6B\xa8R\xee"v\xf3\xeef\xb7\xb1\x8a\x8cu|\x11J\xb0 \xbe\xe1\x0e\rg\xc3\x9dd\xe2\xb12\xaf\xa3T\xa9\x18\xe7\xf3\x14V\x90\x07\xfali\x91\xc8\x06\xb3\xad\xe0i@\x19"W\x19\x1b\xc9|\xca\xfb\xe1x\xa8\xe4\xd8\x19\x81u4%\xc4_\xfb\xe9\xf90fI\x0eo\x9b\x1d\x98\x13\xa9\xd5\x89\x8c\xab>\xafH\xa2\x91eVe\xea\x03\x19p\xab\xa5\xed\x06\xb9f\x80\xc60\xc0\x8b\x1c\x18\xec\xd3\xb2\xc6l\xe44TAs3\x15\xc4\xac\xac\x0c\x0baN\xcb\xb7\x17\xd9\x1a\xbeG\x88\x9b\x98R\xb0Tp\x04\xa8\x99\x86\x11\xd5_I\x07\xce\x0e\xb8\x92\'Y\xefV\xc287\xdb+\xfd\xd2D\x13\xf7\x84\xec\xd45\x19R\x16O\xa1\x119<2\xb9\xa0K\xf6G\x8e\xc6S\n\r*h\xb1\xd1p\x10\xdd\\\xa9\xd0y\x1cG\x95\xb3D\xba\xa16\xb0\xd1\x98E\x87\x08\x01l.J\xe8\xeaA\x11\xb4Yr@\x19d!\xbb\x91\x06\xf1\x8a\xc0\xcdK\xf9\xback\x14\xa8F\x99)\x9f\xe5\xaf\xce#}ITF\x131T\xab\xe0\x05*>\xbeA{>\xac\xeak\xea\x95\xf9Bw 4\xec\xac\xdc\xe8\xac\xe4\xb6v\xcd\x91\x95\x05' def main(request, response): - if 'raw' in request.GET: - headers = [("Content-type", "text/plain"), - ("Content-Length", len(decoded))] + if b'raw' in request.GET: + headers = [(b"Content-type", b"text/plain"), + (b"Content-Length", len(decoded))] return headers, decoded - headers = [("Content-type", "text/plain"), - ("Content-Encoding", "br"), - ("Content-Length", len(encoded))] + headers = [(b"Content-type", b"text/plain"), + (b"Content-Encoding", b"br"), + (b"Content-Length", len(encoded))] return headers, encoded diff --git a/tests/wpt/mozilla/tests/mozilla/resources/no_mime_type.py b/tests/wpt/mozilla/tests/mozilla/resources/no_mime_type.py index ba42a7f24fe..860005a42c2 100644 --- a/tests/wpt/mozilla/tests/mozilla/resources/no_mime_type.py +++ b/tests/wpt/mozilla/tests/mozilla/resources/no_mime_type.py @@ -5,7 +5,7 @@ def main(request, response): headers = [] - if 'Content-Type' in request.GET: - headers += [('Content-Type', request.GET['Content-Type'])] - with open('./resources/ahem/AHEM____.TTF') as f: + if b'Content-Type' in request.GET: + headers += [(b'Content-Type', request.GET[b'Content-Type'])] + with open('./resources/ahem/AHEM____.TTF', 'rb') as f: return 200, headers, f.read() |