diff options
Diffstat (limited to 'components/script')
-rw-r--r-- | components/script/build.rs | 8 | ||||
-rw-r--r-- | components/script/dom/bindings/codegen/CodegenRust.py | 40 | ||||
-rw-r--r-- | components/script/dom/bindings/codegen/Configuration.py | 12 | ||||
-rw-r--r-- | components/script/dom/bindings/codegen/parser/WebIDL.py | 72 | ||||
-rw-r--r-- | components/script/dom/bindings/codegen/ply/README | 2 | ||||
-rw-r--r-- | components/script/dom/bindings/codegen/ply/ply/__init__.py | 2 | ||||
-rw-r--r-- | components/script/dom/bindings/codegen/ply/ply/lex.py | 853 | ||||
-rw-r--r-- | components/script/dom/bindings/codegen/ply/ply/yacc.py | 2576 | ||||
-rw-r--r-- | components/script/dom/bindings/codegen/run.py | 4 |
9 files changed, 1315 insertions, 2254 deletions
diff --git a/components/script/build.rs b/components/script/build.rs index 6a9b4866759..ea39b698257 100644 --- a/components/script/build.rs +++ b/components/script/build.rs @@ -65,11 +65,11 @@ impl<'a> phf_shared::PhfHash for Bytes<'a> { } fn find_python() -> String { - env::var("PYTHON2").ok().unwrap_or_else(|| { + env::var("PYTHON3").ok().unwrap_or_else(|| { let candidates = if cfg!(windows) { - ["python2.7.exe", "python27.exe", "python.exe"] + ["python3.8.exe", "python38.exe", "python.exe"] } else { - ["python2.7", "python2", "python"] + ["python3.8", "python3", "python"] }; for &name in &candidates { if Command::new(name) @@ -82,7 +82,7 @@ fn find_python() -> String { } } panic!( - "Can't find python (tried {})! Try fixing PATH or setting the PYTHON2 env var", + "Can't find python (tried {})! Try fixing PATH or setting the PYTHON3 env var", candidates.join(", ") ) }) diff --git a/components/script/dom/bindings/codegen/CodegenRust.py b/components/script/dom/bindings/codegen/CodegenRust.py index dbf363d0fbc..8573998ebb3 100644 --- a/components/script/dom/bindings/codegen/CodegenRust.py +++ b/components/script/dom/bindings/codegen/CodegenRust.py @@ -344,7 +344,7 @@ class CGMethodCall(CGThing): distinguishingArg = "HandleValue::from_raw(args.get(%d))" % distinguishingIndex def pickFirstSignature(condition, filterLambda): - sigs = filter(filterLambda, possibleSignatures) + sigs = list(filter(filterLambda, possibleSignatures)) assert len(sigs) < 2 if len(sigs) > 0: call = getPerSignatureCall(sigs[0], distinguishingIndex) @@ -2117,7 +2117,7 @@ class CGImports(CGWrapper): members += [constructor] if d.proxy: - members += [o for o in d.operations.values() if o] + members += [o for o in list(d.operations.values()) if o] for m in members: if m.isMethod(): @@ -2557,7 +2557,7 @@ def UnionTypes(descriptors, dictionaries, callbacks, typedefs, config): ]) # Sort unionStructs by key, retrieve value - unionStructs = (i[1] for i in sorted(unionStructs.items(), key=operator.itemgetter(0))) + unionStructs = (i[1] for i in sorted(list(unionStructs.items()), key=operator.itemgetter(0))) return CGImports(CGList(unionStructs, "\n\n"), descriptors=[], @@ -4455,9 +4455,10 @@ class CGEnum(CGThing): pub enum %s { %s } -""" % (ident, ",\n ".join(map(getEnumValueName, enum.values()))) +""" % (ident, ",\n ".join(map(getEnumValueName, list(enum.values())))) - pairs = ",\n ".join(['("%s", super::%s::%s)' % (val, ident, getEnumValueName(val)) for val in enum.values()]) + pairs = ",\n ".join(['("%s", super::%s::%s)' % (val, ident, getEnumValueName(val)) + for val in list(enum.values())]) inner = string.Template("""\ use crate::dom::bindings::conversions::ConversionResult; @@ -4640,9 +4641,8 @@ class CGUnionStruct(CGThing): return "Rc" return "" - templateVars = map(lambda t: (getUnionTypeTemplateVars(t, self.descriptorProvider), - getTypeWrapper(t)), - self.type.flatMemberTypes) + templateVars = [(getUnionTypeTemplateVars(t, self.descriptorProvider), + getTypeWrapper(t)) for t in self.type.flatMemberTypes] enumValues = [ " %s(%s)," % (v["name"], "%s<%s>" % (wrapper, v["typeName"]) if wrapper else v["typeName"]) for (v, wrapper) in templateVars @@ -4701,7 +4701,7 @@ class CGUnionConversionStruct(CGThing): " Ok(None) => (),\n" "}\n") % (self.type, name, self.type, name) - interfaceMemberTypes = filter(lambda t: t.isNonCallbackInterface(), memberTypes) + interfaceMemberTypes = [t for t in memberTypes if t.isNonCallbackInterface()] if len(interfaceMemberTypes) > 0: typeNames = [get_name(memberType) for memberType in interfaceMemberTypes] interfaceObject = CGList(CGGeneric(get_match(typeName)) for typeName in typeNames) @@ -4709,7 +4709,7 @@ class CGUnionConversionStruct(CGThing): else: interfaceObject = None - arrayObjectMemberTypes = filter(lambda t: t.isSequence(), memberTypes) + arrayObjectMemberTypes = [t for t in memberTypes if t.isSequence()] if len(arrayObjectMemberTypes) > 0: assert len(arrayObjectMemberTypes) == 1 typeName = arrayObjectMemberTypes[0].name @@ -4718,7 +4718,7 @@ class CGUnionConversionStruct(CGThing): else: arrayObject = None - callbackMemberTypes = filter(lambda t: t.isCallback() or t.isCallbackInterface(), memberTypes) + callbackMemberTypes = [t for t in memberTypes if t.isCallback() or t.isCallbackInterface()] if len(callbackMemberTypes) > 0: assert len(callbackMemberTypes) == 1 typeName = callbackMemberTypes[0].name @@ -4726,7 +4726,7 @@ class CGUnionConversionStruct(CGThing): else: callbackObject = None - dictionaryMemberTypes = filter(lambda t: t.isDictionary(), memberTypes) + dictionaryMemberTypes = [t for t in memberTypes if t.isDictionary()] if len(dictionaryMemberTypes) > 0: assert len(dictionaryMemberTypes) == 1 typeName = dictionaryMemberTypes[0].name @@ -4735,7 +4735,7 @@ class CGUnionConversionStruct(CGThing): else: dictionaryObject = None - objectMemberTypes = filter(lambda t: t.isObject(), memberTypes) + objectMemberTypes = [t for t in memberTypes if t.isObject()] if len(objectMemberTypes) > 0: assert len(objectMemberTypes) == 1 typeName = objectMemberTypes[0].name @@ -4744,7 +4744,7 @@ class CGUnionConversionStruct(CGThing): else: object = None - mozMapMemberTypes = filter(lambda t: t.isRecord(), memberTypes) + mozMapMemberTypes = [t for t in memberTypes if t.isRecord()] if len(mozMapMemberTypes) > 0: assert len(mozMapMemberTypes) == 1 typeName = mozMapMemberTypes[0].name @@ -4790,9 +4790,9 @@ class CGUnionConversionStruct(CGThing): typename = get_name(memberType) return CGGeneric(get_match(typename)) other = [] - stringConversion = map(getStringOrPrimitiveConversion, stringTypes) - numericConversion = map(getStringOrPrimitiveConversion, numericTypes) - booleanConversion = map(getStringOrPrimitiveConversion, booleanTypes) + stringConversion = list(map(getStringOrPrimitiveConversion, stringTypes)) + numericConversion = list(map(getStringOrPrimitiveConversion, numericTypes)) + booleanConversion = list(map(getStringOrPrimitiveConversion, booleanTypes)) if stringConversion: if booleanConversion: other.append(CGIfWrapper("value.get().is_boolean()", booleanConversion[0])) @@ -5958,7 +5958,7 @@ class CGInterfaceTrait(CGThing): rettype) if descriptor.proxy: - for name, operation in descriptor.operations.iteritems(): + for name, operation in descriptor.operations.items(): if not operation or operation.isStringifier(): continue @@ -6488,7 +6488,7 @@ class CGDescriptor(CGThing): post='\n') if reexports: - reexports = ', '.join(map(lambda name: reexportedName(name), reexports)) + reexports = ', '.join([reexportedName(name) for name in reexports]) cgThings = CGList([CGGeneric('pub use self::%s::{%s};' % (toBindingNamespace(descriptor.name), reexports)), cgThings], '\n') @@ -7824,7 +7824,7 @@ impl Clone for TopTypeId { # TypeId enum. return "%s(%sTypeId)" % (name, name) if name in hierarchy else name - for base, derived in hierarchy.iteritems(): + for base, derived in hierarchy.items(): variants = [] if config.getDescriptor(base).concrete: variants.append(CGGeneric(base)) diff --git a/components/script/dom/bindings/codegen/Configuration.py b/components/script/dom/bindings/codegen/Configuration.py index 4f47a737706..b92f68af3b9 100644 --- a/components/script/dom/bindings/codegen/Configuration.py +++ b/components/script/dom/bindings/codegen/Configuration.py @@ -73,7 +73,7 @@ class Configuration: def getDescriptors(self, **filters): """Gets the descriptors that match the given filters.""" curr = self.descriptors - for key, val in filters.iteritems(): + for key, val in filters.items(): if key == 'webIDLFile': def getter(x): return x.interface.filename() @@ -104,14 +104,14 @@ class Configuration: else: def getter(x): return getattr(x, key) - curr = filter(lambda x: getter(x) == val, curr) + curr = [x for x in curr if getter(x) == val] return curr def getEnums(self, webIDLFile): - return filter(lambda e: e.filename() == webIDLFile, self.enums) + return [e for e in self.enums if e.filename() == webIDLFile] def getTypedefs(self, webIDLFile): - return filter(lambda e: e.filename() == webIDLFile, self.typedefs) + return [e for e in self.typedefs if e.filename() == webIDLFile] @staticmethod def _filterForFile(items, webIDLFile=""): @@ -119,7 +119,7 @@ class Configuration: if not webIDLFile: return items - return filter(lambda x: x.filename() == webIDLFile, items) + return [x for x in items if x.filename() == webIDLFile] def getDictionaries(self, webIDLFile=""): return self._filterForFile(self.dictionaries, webIDLFile=webIDLFile) @@ -327,7 +327,7 @@ class Descriptor(DescriptorProvider): if config == '*': iface = self.interface while iface: - add('all', map(lambda m: m.name, iface.members), attribute) + add('all', [m.name for m in iface.members], attribute) iface = iface.parent else: add('all', [config], attribute) diff --git a/components/script/dom/bindings/codegen/parser/WebIDL.py b/components/script/dom/bindings/codegen/parser/WebIDL.py index 223fd7efbb4..d74278c3e0c 100644 --- a/components/script/dom/bindings/codegen/parser/WebIDL.py +++ b/components/script/dom/bindings/codegen/parser/WebIDL.py @@ -4,7 +4,7 @@ """ A WebIDL parser. """ -from __future__ import print_function + from ply import lex, yacc import re import os @@ -57,7 +57,7 @@ def enum(*names, **kw): if "base" not in kw: return Foo(names) - return Foo(chain(kw["base"].attrs.keys(), names)) + return Foo(chain(list(kw["base"].attrs.keys()), names)) class WebIDLError(Exception): @@ -124,6 +124,9 @@ class BuiltinLocation(object): return (isinstance(other, BuiltinLocation) and self.msg == other.msg) + def __hash__(self): + return hash(self.msg) + def filename(self): return '<builtin>' @@ -2360,6 +2363,9 @@ class IDLNullableType(IDLParametrizedType): def __eq__(self, other): return isinstance(other, IDLNullableType) and self.inner == other.inner + def __hash__(self): + return hash(self.inner) + def __str__(self): return self.inner.__str__() + "OrNull" @@ -2522,6 +2528,9 @@ class IDLSequenceType(IDLParametrizedType): def __eq__(self, other): return isinstance(other, IDLSequenceType) and self.inner == other.inner + def __hash__(self): + return hash(self.inner) + def __str__(self): return self.inner.__str__() + "Sequence" @@ -2933,6 +2942,9 @@ class IDLWrapperType(IDLType): self._identifier == other._identifier and self.builtin == other.builtin) + def __hash__(self): + return hash((self._identifier, self.builtin)) + def __str__(self): return str(self.name) + " (Wrapper)" @@ -3301,6 +3313,12 @@ class IDLBuiltinType(IDLType): return "MaybeShared" + str(self.name) return str(self.name) + def __eq__(self, other): + return other and self.location == other.location and self.name == other.name and self._typeTag == other._typeTag + + def __hash__(self): + return hash((self.location, self.name, self._typeTag)) + def prettyName(self): return IDLBuiltinType.PrettyNames[self._typeTag] @@ -3628,7 +3646,7 @@ integerTypeSizes = { def matchIntegerValueToType(value): - for type, extremes in integerTypeSizes.items(): + for type, extremes in list(integerTypeSizes.items()): (min, max) = extremes if value <= max and value >= min: return BuiltinTypes[type] @@ -3707,7 +3725,7 @@ class IDLValue(IDLObject): elif self.type.isString() and type.isEnum(): # Just keep our string, but make sure it's a valid value for this enum enum = type.unroll().inner - if self.value not in enum.values(): + if self.value not in list(enum.values()): raise WebIDLError("'%s' is not a valid default value for enum %s" % (self.value, enum.identifier.name), [location, enum.location]) @@ -4789,7 +4807,7 @@ class IDLAttribute(IDLInterfaceMember): "CrossOriginWritable", "SetterThrows", ] - for (key, value) in self._extendedAttrDict.items(): + for (key, value) in list(self._extendedAttrDict.items()): if key in allowedExtAttrs: if value is not True: raise WebIDLError("[%s] with a value is currently " @@ -5479,7 +5497,7 @@ class IDLMethod(IDLInterfaceMember, IDLScope): [attr.location]) if identifier == "CrossOriginCallable" and self.isStatic(): raise WebIDLError("[CrossOriginCallable] is only allowed on non-static " - "attributes" + "attributes", [attr.location, self.location]) elif identifier == "Pure": if not attr.noArguments(): @@ -5721,6 +5739,7 @@ class Tokenizer(object): "FLOATLITERAL", "IDENTIFIER", "STRING", + "COMMENTS", "WHITESPACE", "OTHER" ] @@ -5753,8 +5772,12 @@ class Tokenizer(object): t.value = t.value[1:-1] return t + def t_COMMENTS(self, t): + r'(\/\*(.|\n)*?\*\/)|(\/\/.*)' + pass + def t_WHITESPACE(self, t): - r'[\t\n\r ]+|[\t\n\r ]*((//[^\n]*|/\*.*?\*/)[\t\n\r ]*)+' + r'[\t\n\r ]+' pass def t_ELLIPSIS(self, t): @@ -5840,7 +5863,7 @@ class Tokenizer(object): "async": "ASYNC", } - tokens.extend(keywords.values()) + tokens.extend(list(keywords.values())) def t_error(self, t): raise WebIDLError("Unrecognized Input", @@ -5849,23 +5872,21 @@ class Tokenizer(object): lexpos=self.lexer.lexpos, filename=self.filename)]) - def __init__(self, outputdir, lexer=None): + def __init__(self, lexer=None): if lexer: self.lexer = lexer else: - self.lexer = lex.lex(object=self, - outputdir=outputdir, - lextab='webidllex', - reflags=re.DOTALL) + self.lexer = lex.lex(object=self) class SqueakyCleanLogger(object): errorWhitelist = [ - # Web IDL defines the WHITESPACE token, but doesn't actually + # Web IDL defines the WHITESPACE and COMMENTS token, but doesn't actually # use it ... so far. "Token 'WHITESPACE' defined, but not used", - # And that means we have an unused token - "There is 1 unused token", + "Token 'COMMENTS' defined, but not used", + # And that means we have unused tokens + "There are 2 unused tokens", # Web IDL defines a OtherOrComma rule that's only used in # ExtendedAttributeInner, which we don't use yet. "Rule 'OtherOrComma' defined, but not used", @@ -7506,22 +7527,11 @@ class Parser(Tokenizer): raise WebIDLError("invalid syntax", [Location(self.lexer, p.lineno, p.lexpos, self._filename)]) def __init__(self, outputdir='', lexer=None): - Tokenizer.__init__(self, outputdir, lexer) + Tokenizer.__init__(self, lexer) logger = SqueakyCleanLogger() try: - self.parser = yacc.yacc(module=self, - outputdir=outputdir, - tabmodule='webidlyacc', - errorlog=logger, - debug=False - # Pickling the grammar is a speedup in - # some cases (older Python?) but a - # significant slowdown in others. - # We're not pickling for now, until it - # becomes a speedup again. - # , picklefile='WebIDLGrammar.pkl' - ) + self.parser = yacc.yacc(module=self, errorlog=logger, debug=False) finally: logger.reportGrammarErrors() @@ -7553,12 +7563,12 @@ class Parser(Tokenizer): return type def parse(self, t, filename=None): - self.lexer.input(t) + self._filename = filename + self.lexer.input(t.decode(encoding = 'utf-8')) # for tok in iter(self.lexer.token, None): # print tok - self._filename = filename self._productions.extend(self.parser.parse(lexer=self.lexer, tracking=True)) self._filename = None diff --git a/components/script/dom/bindings/codegen/ply/README b/components/script/dom/bindings/codegen/ply/README index 2459c490197..d3de9993360 100644 --- a/components/script/dom/bindings/codegen/ply/README +++ b/components/script/dom/bindings/codegen/ply/README @@ -3,7 +3,7 @@ http://www.dabeaz.com/ply/ Licensed under BSD. -This directory contains just the code and license from PLY version 3.3; +This directory contains just the code and license from PLY version 4.0; the full distribution (see the URL) also contains examples, tests, documentation, and a longer README. diff --git a/components/script/dom/bindings/codegen/ply/ply/__init__.py b/components/script/dom/bindings/codegen/ply/ply/__init__.py index 853a985542b..87838622863 100644 --- a/components/script/dom/bindings/codegen/ply/ply/__init__.py +++ b/components/script/dom/bindings/codegen/ply/ply/__init__.py @@ -1,4 +1,6 @@ # PLY package # Author: David Beazley (dave@dabeaz.com) +# https://dabeaz.com/ply/index.html +__version__ = '4.0' __all__ = ['lex','yacc'] diff --git a/components/script/dom/bindings/codegen/ply/ply/lex.py b/components/script/dom/bindings/codegen/ply/ply/lex.py index 267ec100fc2..57b61f1779e 100644 --- a/components/script/dom/bindings/codegen/ply/ply/lex.py +++ b/components/script/dom/bindings/codegen/ply/ply/lex.py @@ -1,22 +1,24 @@ # ----------------------------------------------------------------------------- # ply: lex.py # -# Copyright (C) 2001-2009, +# Copyright (C) 2001-2020 # David M. Beazley (Dabeaz LLC) # All rights reserved. # +# Latest version: https://github.com/dabeaz/ply +# # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: -# +# # * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# * Neither the name of the David Beazley or Dabeaz LLC may be used to +# and/or other materials provided with the distribution. +# * Neither the name of David Beazley or Dabeaz LLC may be used to # endorse or promote products derived from this software without -# specific prior written permission. +# specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT @@ -31,72 +33,50 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ----------------------------------------------------------------------------- -__version__ = "3.3" -__tabversion__ = "3.2" # Version of table file used - -import re, sys, types, copy, os - -# This tuple contains known string types -try: - # Python 2.6 - StringTypes = (types.StringType, types.UnicodeType) -except AttributeError: - # Python 3.0 - StringTypes = (str, bytes) - -# Extract the code attribute of a function. Different implementations -# are for Python 2/3 compatibility. +import re +import sys +import types +import copy +import os +import inspect -if sys.version_info[0] < 3: - def func_code(f): - return f.func_code -else: - def func_code(f): - return f.__code__ +# This tuple contains acceptable string types +StringTypes = (str, bytes) # This regular expression is used to match valid token names _is_identifier = re.compile(r'^[a-zA-Z0-9_]+$') # Exception thrown when invalid token encountered and no default error # handler is defined. - class LexError(Exception): - def __init__(self,message,s): - self.args = (message,) - self.text = s + def __init__(self, message, s): + self.args = (message,) + self.text = s # Token class. This class is used to represent the tokens produced. class LexToken(object): - def __str__(self): - return "LexToken(%s,%r,%d,%d)" % (self.type,self.value,self.lineno,self.lexpos) def __repr__(self): - return str(self) + return f'LexToken({self.type},{self.value!r},{self.lineno},{self.lexpos})' -# This object is a stand-in for a logging object created by the -# logging module. +# This object is a stand-in for a logging object created by the +# logging module. class PlyLogger(object): - def __init__(self,f): + def __init__(self, f): self.f = f - def critical(self,msg,*args,**kwargs): - self.f.write((msg % args) + "\n") - def warning(self,msg,*args,**kwargs): - self.f.write("WARNING: "+ (msg % args) + "\n") + def critical(self, msg, *args, **kwargs): + self.f.write((msg % args) + '\n') + + def warning(self, msg, *args, **kwargs): + self.f.write('WARNING: ' + (msg % args) + '\n') - def error(self,msg,*args,**kwargs): - self.f.write("ERROR: " + (msg % args) + "\n") + def error(self, msg, *args, **kwargs): + self.f.write('ERROR: ' + (msg % args) + '\n') info = critical debug = critical -# Null logger is used when no output is generated. Does nothing. -class NullLogger(object): - def __getattribute__(self,name): - return self - def __call__(self,*args,**kwargs): - return self - # ----------------------------------------------------------------------------- # === Lexing Engine === # @@ -114,31 +94,32 @@ class NullLogger(object): class Lexer: def __init__(self): self.lexre = None # Master regular expression. This is a list of - # tuples (re,findex) where re is a compiled - # regular expression and findex is a list - # mapping regex group numbers to rules + # tuples (re, findex) where re is a compiled + # regular expression and findex is a list + # mapping regex group numbers to rules self.lexretext = None # Current regular expression strings self.lexstatere = {} # Dictionary mapping lexer states to master regexs self.lexstateretext = {} # Dictionary mapping lexer states to regex strings self.lexstaterenames = {} # Dictionary mapping lexer states to symbol names - self.lexstate = "INITIAL" # Current lexer state + self.lexstate = 'INITIAL' # Current lexer state self.lexstatestack = [] # Stack of lexer states self.lexstateinfo = None # State information self.lexstateignore = {} # Dictionary of ignored characters for each state self.lexstateerrorf = {} # Dictionary of error functions for each state + self.lexstateeoff = {} # Dictionary of eof functions for each state self.lexreflags = 0 # Optional re compile flags self.lexdata = None # Actual input data (as a string) self.lexpos = 0 # Current position in input text self.lexlen = 0 # Length of the input text self.lexerrorf = None # Error rule (if any) + self.lexeoff = None # EOF rule (if any) self.lextokens = None # List of valid tokens - self.lexignore = "" # Ignored characters - self.lexliterals = "" # Literal characters that can be passed through + self.lexignore = '' # Ignored characters + self.lexliterals = '' # Literal characters that can be passed through self.lexmodule = None # Module self.lineno = 1 # Current line number - self.lexoptimize = 0 # Optimized mode - def clone(self,object=None): + def clone(self, object=None): c = copy.copy(self) # If the object parameter has been supplied, it means we are attaching the @@ -146,113 +127,29 @@ class Lexer: # the lexstatere and lexstateerrorf tables. if object: - newtab = { } + newtab = {} for key, ritem in self.lexstatere.items(): newre = [] for cre, findex in ritem: - newfindex = [] - for f in findex: - if not f or not f[0]: - newfindex.append(f) - continue - newfindex.append((getattr(object,f[0].__name__),f[1])) - newre.append((cre,newfindex)) + newfindex = [] + for f in findex: + if not f or not f[0]: + newfindex.append(f) + continue + newfindex.append((getattr(object, f[0].__name__), f[1])) + newre.append((cre, newfindex)) newtab[key] = newre c.lexstatere = newtab - c.lexstateerrorf = { } + c.lexstateerrorf = {} for key, ef in self.lexstateerrorf.items(): - c.lexstateerrorf[key] = getattr(object,ef.__name__) + c.lexstateerrorf[key] = getattr(object, ef.__name__) c.lexmodule = object return c # ------------------------------------------------------------ - # writetab() - Write lexer information to a table file - # ------------------------------------------------------------ - def writetab(self,tabfile,outputdir=""): - if isinstance(tabfile,types.ModuleType): - return - basetabfilename = tabfile.split(".")[-1] - filename = os.path.join(outputdir,basetabfilename)+".py" - tf = open(filename,"w") - tf.write("# %s.py. This file automatically created by PLY (version %s). Don't edit!\n" % (tabfile,__version__)) - tf.write("_tabversion = %s\n" % repr(__version__)) - tf.write("_lextokens = %s\n" % repr(self.lextokens)) - tf.write("_lexreflags = %s\n" % repr(self.lexreflags)) - tf.write("_lexliterals = %s\n" % repr(self.lexliterals)) - tf.write("_lexstateinfo = %s\n" % repr(self.lexstateinfo)) - - tabre = { } - # Collect all functions in the initial state - initial = self.lexstatere["INITIAL"] - initialfuncs = [] - for part in initial: - for f in part[1]: - if f and f[0]: - initialfuncs.append(f) - - for key, lre in self.lexstatere.items(): - titem = [] - for i in range(len(lre)): - titem.append((self.lexstateretext[key][i],_funcs_to_names(lre[i][1],self.lexstaterenames[key][i]))) - tabre[key] = titem - - tf.write("_lexstatere = %s\n" % repr(tabre)) - tf.write("_lexstateignore = %s\n" % repr(self.lexstateignore)) - - taberr = { } - for key, ef in self.lexstateerrorf.items(): - if ef: - taberr[key] = ef.__name__ - else: - taberr[key] = None - tf.write("_lexstateerrorf = %s\n" % repr(taberr)) - tf.close() - - # ------------------------------------------------------------ - # readtab() - Read lexer information from a tab file - # ------------------------------------------------------------ - def readtab(self,tabfile,fdict): - if isinstance(tabfile,types.ModuleType): - lextab = tabfile - else: - if sys.version_info[0] < 3: - exec("import %s as lextab" % tabfile) - else: - env = { } - exec("import %s as lextab" % tabfile, env,env) - lextab = env['lextab'] - - if getattr(lextab,"_tabversion","0.0") != __version__: - raise ImportError("Inconsistent PLY version") - - self.lextokens = lextab._lextokens - self.lexreflags = lextab._lexreflags - self.lexliterals = lextab._lexliterals - self.lexstateinfo = lextab._lexstateinfo - self.lexstateignore = lextab._lexstateignore - self.lexstatere = { } - self.lexstateretext = { } - for key,lre in lextab._lexstatere.items(): - titem = [] - txtitem = [] - for i in range(len(lre)): - titem.append((re.compile(lre[i][0],lextab._lexreflags | re.VERBOSE),_names_to_funcs(lre[i][1],fdict))) - txtitem.append(lre[i][0]) - self.lexstatere[key] = titem - self.lexstateretext[key] = txtitem - self.lexstateerrorf = { } - for key,ef in lextab._lexstateerrorf.items(): - self.lexstateerrorf[key] = fdict[ef] - self.begin('INITIAL') - - # ------------------------------------------------------------ # input() - Push a new string into the lexer # ------------------------------------------------------------ - def input(self,s): - # Pull off the first character to see if s looks like a string - c = s[:1] - if not isinstance(c,StringTypes): - raise ValueError("Expected a string") + def input(self, s): self.lexdata = s self.lexpos = 0 self.lexlen = len(s) @@ -260,19 +157,20 @@ class Lexer: # ------------------------------------------------------------ # begin() - Changes the lexing state # ------------------------------------------------------------ - def begin(self,state): - if not state in self.lexstatere: - raise ValueError("Undefined state") + def begin(self, state): + if state not in self.lexstatere: + raise ValueError(f'Undefined state {state!r}') self.lexre = self.lexstatere[state] self.lexretext = self.lexstateretext[state] - self.lexignore = self.lexstateignore.get(state,"") - self.lexerrorf = self.lexstateerrorf.get(state,None) + self.lexignore = self.lexstateignore.get(state, '') + self.lexerrorf = self.lexstateerrorf.get(state, None) + self.lexeoff = self.lexstateeoff.get(state, None) self.lexstate = state # ------------------------------------------------------------ # push_state() - Changes the lexing state and saves old on stack # ------------------------------------------------------------ - def push_state(self,state): + def push_state(self, state): self.lexstatestack.append(self.lexstate) self.begin(state) @@ -291,11 +189,11 @@ class Lexer: # ------------------------------------------------------------ # skip() - Skip ahead n characters # ------------------------------------------------------------ - def skip(self,n): + def skip(self, n): self.lexpos += n # ------------------------------------------------------------ - # opttoken() - Return the next token from the Lexer + # token() - Return the next token from the Lexer # # Note: This function has been carefully implemented to be as fast # as possible. Don't make changes unless you really know what @@ -315,9 +213,10 @@ class Lexer: continue # Look for a regular expression match - for lexre,lexindexfunc in self.lexre: - m = lexre.match(lexdata,lexpos) - if not m: continue + for lexre, lexindexfunc in self.lexre: + m = lexre.match(lexdata, lexpos) + if not m: + continue # Create a token for return tok = LexToken() @@ -326,16 +225,16 @@ class Lexer: tok.lexpos = lexpos i = m.lastindex - func,tok.type = lexindexfunc[i] + func, tok.type = lexindexfunc[i] if not func: - # If no token type was set, it's an ignored token - if tok.type: - self.lexpos = m.end() - return tok - else: - lexpos = m.end() - break + # If no token type was set, it's an ignored token + if tok.type: + self.lexpos = m.end() + return tok + else: + lexpos = m.end() + break lexpos = m.end() @@ -344,22 +243,15 @@ class Lexer: tok.lexer = self # Set additional attributes useful in token rules self.lexmatch = m self.lexpos = lexpos - newtok = func(tok) + del tok.lexer + del self.lexmatch # Every function must return a token, if nothing, we just move to next token if not newtok: lexpos = self.lexpos # This is here in case user has updated lexpos. lexignore = self.lexignore # This is here in case there was a state change break - - # Verify type of the token. If not in the token map, raise an error - if not self.lexoptimize: - if not newtok.type in self.lextokens: - raise LexError("%s:%d: Rule '%s' returned an unknown token type '%s'" % ( - func_code(func).co_filename, func_code(func).co_firstlineno, - func.__name__, newtok.type),lexdata[lexpos:]) - return newtok else: # No match, see if in literals @@ -377,38 +269,50 @@ class Lexer: tok = LexToken() tok.value = self.lexdata[lexpos:] tok.lineno = self.lineno - tok.type = "error" + tok.type = 'error' tok.lexer = self tok.lexpos = lexpos self.lexpos = lexpos newtok = self.lexerrorf(tok) if lexpos == self.lexpos: # Error method didn't change text position at all. This is an error. - raise LexError("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:]) + raise LexError(f"Scanning error. Illegal character {lexdata[lexpos]!r}", + lexdata[lexpos:]) lexpos = self.lexpos - if not newtok: continue + if not newtok: + continue return newtok self.lexpos = lexpos - raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos],lexpos), lexdata[lexpos:]) + raise LexError(f"Illegal character {lexdata[lexpos]!r} at index {lexpos}", + lexdata[lexpos:]) + + if self.lexeoff: + tok = LexToken() + tok.type = 'eof' + tok.value = '' + tok.lineno = self.lineno + tok.lexpos = lexpos + tok.lexer = self + self.lexpos = lexpos + newtok = self.lexeoff(tok) + return newtok self.lexpos = lexpos + 1 if self.lexdata is None: - raise RuntimeError("No input string given with input()") + raise RuntimeError('No input string given with input()') return None # Iterator interface def __iter__(self): return self - def next(self): + def __next__(self): t = self.token() if t is None: raise StopIteration return t - __next__ = next - # ----------------------------------------------------------------------------- # ==== Lex Builder === # @@ -417,59 +321,24 @@ class Lexer: # ----------------------------------------------------------------------------- # ----------------------------------------------------------------------------- +# _get_regex(func) +# +# Returns the regular expression assigned to a function either as a doc string +# or as a .regex attribute attached by the @TOKEN decorator. +# ----------------------------------------------------------------------------- +def _get_regex(func): + return getattr(func, 'regex', func.__doc__) + +# ----------------------------------------------------------------------------- # get_caller_module_dict() # # This function returns a dictionary containing all of the symbols defined within # a caller further down the call stack. This is used to get the environment # associated with the yacc() call if none was provided. # ----------------------------------------------------------------------------- - def get_caller_module_dict(levels): - try: - raise RuntimeError - except RuntimeError: - e,b,t = sys.exc_info() - f = t.tb_frame - while levels > 0: - f = f.f_back - levels -= 1 - ldict = f.f_globals.copy() - if f.f_globals != f.f_locals: - ldict.update(f.f_locals) - - return ldict - -# ----------------------------------------------------------------------------- -# _funcs_to_names() -# -# Given a list of regular expression functions, this converts it to a list -# suitable for output to a table file -# ----------------------------------------------------------------------------- - -def _funcs_to_names(funclist,namelist): - result = [] - for f,name in zip(funclist,namelist): - if f and f[0]: - result.append((name, f[1])) - else: - result.append(f) - return result - -# ----------------------------------------------------------------------------- -# _names_to_funcs() -# -# Given a list of regular expression function names, this converts it back to -# functions. -# ----------------------------------------------------------------------------- - -def _names_to_funcs(namelist,fdict): - result = [] - for n in namelist: - if n and n[0]: - result.append((fdict[n[0]],n[1])) - else: - result.append(n) - return result + f = sys._getframe(levels) + return { **f.f_globals, **f.f_locals } # ----------------------------------------------------------------------------- # _form_master_re() @@ -478,36 +347,35 @@ def _names_to_funcs(namelist,fdict): # form the master regular expression. Given limitations in the Python re # module, it may be necessary to break the master regex into separate expressions. # ----------------------------------------------------------------------------- - -def _form_master_re(relist,reflags,ldict,toknames): - if not relist: return [] - regex = "|".join(relist) +def _form_master_re(relist, reflags, ldict, toknames): + if not relist: + return [], [], [] + regex = '|'.join(relist) try: - lexre = re.compile(regex,re.VERBOSE | reflags) + lexre = re.compile(regex, reflags) # Build the index to function map for the matching engine - lexindexfunc = [ None ] * (max(lexre.groupindex.values())+1) + lexindexfunc = [None] * (max(lexre.groupindex.values()) + 1) lexindexnames = lexindexfunc[:] - for f,i in lexre.groupindex.items(): - handle = ldict.get(f,None) + for f, i in lexre.groupindex.items(): + handle = ldict.get(f, None) if type(handle) in (types.FunctionType, types.MethodType): - lexindexfunc[i] = (handle,toknames[f]) + lexindexfunc[i] = (handle, toknames[f]) lexindexnames[i] = f elif handle is not None: lexindexnames[i] = f - if f.find("ignore_") > 0: - lexindexfunc[i] = (None,None) + if f.find('ignore_') > 0: + lexindexfunc[i] = (None, None) else: lexindexfunc[i] = (None, toknames[f]) - - return [(lexre,lexindexfunc)],[regex],[lexindexnames] + + return [(lexre, lexindexfunc)], [regex], [lexindexnames] except Exception: - m = int(len(relist)/2) - if m == 0: m = 1 - llist, lre, lnames = _form_master_re(relist[:m],reflags,ldict,toknames) - rlist, rre, rnames = _form_master_re(relist[m:],reflags,ldict,toknames) - return llist+rlist, lre+rre, lnames+rnames + m = (len(relist) // 2) + 1 + llist, lre, lnames = _form_master_re(relist[:m], reflags, ldict, toknames) + rlist, rre, rnames = _form_master_re(relist[m:], reflags, ldict, toknames) + return (llist+rlist), (lre+rre), (lnames+rnames) # ----------------------------------------------------------------------------- # def _statetoken(s,names) @@ -517,22 +385,22 @@ def _form_master_re(relist,reflags,ldict,toknames): # is a tuple of state names and tokenname is the name of the token. For example, # calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM') # ----------------------------------------------------------------------------- +def _statetoken(s, names): + parts = s.split('_') + for i, part in enumerate(parts[1:], 1): + if part not in names and part != 'ANY': + break -def _statetoken(s,names): - nonstate = 1 - parts = s.split("_") - for i in range(1,len(parts)): - if not parts[i] in names and parts[i] != 'ANY': break if i > 1: - states = tuple(parts[1:i]) + states = tuple(parts[1:i]) else: - states = ('INITIAL',) + states = ('INITIAL',) if 'ANY' in states: - states = tuple(names) + states = tuple(names) - tokenname = "_".join(parts[i:]) - return (states,tokenname) + tokenname = '_'.join(parts[i:]) + return (states, tokenname) # ----------------------------------------------------------------------------- @@ -542,19 +410,15 @@ def _statetoken(s,names): # user's input file. # ----------------------------------------------------------------------------- class LexerReflect(object): - def __init__(self,ldict,log=None,reflags=0): + def __init__(self, ldict, log=None, reflags=0): self.ldict = ldict self.error_func = None self.tokens = [] self.reflags = reflags - self.stateinfo = { 'INITIAL' : 'inclusive'} - self.files = {} - self.error = 0 - - if log is None: - self.log = PlyLogger(sys.stderr) - else: - self.log = log + self.stateinfo = {'INITIAL': 'inclusive'} + self.modules = set() + self.error = False + self.log = PlyLogger(sys.stderr) if log is None else log # Get all of the basic information def get_all(self): @@ -562,7 +426,7 @@ class LexerReflect(object): self.get_literals() self.get_states() self.get_rules() - + # Validate all of the information def validate_all(self): self.validate_tokens() @@ -572,20 +436,20 @@ class LexerReflect(object): # Get the tokens map def get_tokens(self): - tokens = self.ldict.get("tokens",None) + tokens = self.ldict.get('tokens', None) if not tokens: - self.log.error("No token list is defined") - self.error = 1 + self.log.error('No token list is defined') + self.error = True return - if not isinstance(tokens,(list, tuple)): - self.log.error("tokens must be a list or tuple") - self.error = 1 + if not isinstance(tokens, (list, tuple)): + self.log.error('tokens must be a list or tuple') + self.error = True return - + if not tokens: - self.log.error("tokens is empty") - self.error = 1 + self.log.error('tokens is empty') + self.error = True return self.tokens = tokens @@ -595,280 +459,270 @@ class LexerReflect(object): terminals = {} for n in self.tokens: if not _is_identifier.match(n): - self.log.error("Bad token name '%s'",n) - self.error = 1 + self.log.error(f"Bad token name {n!r}") + self.error = True if n in terminals: - self.log.warning("Token '%s' multiply defined", n) + self.log.warning(f"Token {n!r} multiply defined") terminals[n] = 1 # Get the literals specifier def get_literals(self): - self.literals = self.ldict.get("literals","") + self.literals = self.ldict.get('literals', '') + if not self.literals: + self.literals = '' # Validate literals def validate_literals(self): try: for c in self.literals: - if not isinstance(c,StringTypes) or len(c) > 1: - self.log.error("Invalid literal %s. Must be a single character", repr(c)) - self.error = 1 - continue + if not isinstance(c, StringTypes) or len(c) > 1: + self.log.error(f'Invalid literal {c!r}. Must be a single character') + self.error = True except TypeError: - self.log.error("Invalid literals specification. literals must be a sequence of characters") - self.error = 1 + self.log.error('Invalid literals specification. literals must be a sequence of characters') + self.error = True def get_states(self): - self.states = self.ldict.get("states",None) + self.states = self.ldict.get('states', None) # Build statemap if self.states: - if not isinstance(self.states,(tuple,list)): - self.log.error("states must be defined as a tuple or list") - self.error = 1 - else: - for s in self.states: - if not isinstance(s,tuple) or len(s) != 2: - self.log.error("Invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')",repr(s)) - self.error = 1 - continue - name, statetype = s - if not isinstance(name,StringTypes): - self.log.error("State name %s must be a string", repr(name)) - self.error = 1 - continue - if not (statetype == 'inclusive' or statetype == 'exclusive'): - self.log.error("State type for state %s must be 'inclusive' or 'exclusive'",name) - self.error = 1 - continue - if name in self.stateinfo: - self.log.error("State '%s' already defined",name) - self.error = 1 - continue - self.stateinfo[name] = statetype + if not isinstance(self.states, (tuple, list)): + self.log.error('states must be defined as a tuple or list') + self.error = True + else: + for s in self.states: + if not isinstance(s, tuple) or len(s) != 2: + self.log.error("Invalid state specifier %r. Must be a tuple (statename,'exclusive|inclusive')", s) + self.error = True + continue + name, statetype = s + if not isinstance(name, StringTypes): + self.log.error('State name %r must be a string', name) + self.error = True + continue + if not (statetype == 'inclusive' or statetype == 'exclusive'): + self.log.error("State type for state %r must be 'inclusive' or 'exclusive'", name) + self.error = True + continue + if name in self.stateinfo: + self.log.error("State %r already defined", name) + self.error = True + continue + self.stateinfo[name] = statetype # Get all of the symbols with a t_ prefix and sort them into various # categories (functions, strings, error functions, and ignore characters) def get_rules(self): - tsymbols = [f for f in self.ldict if f[:2] == 't_' ] + tsymbols = [f for f in self.ldict if f[:2] == 't_'] # Now build up a list of functions and a list of strings - - self.toknames = { } # Mapping of symbols to token names - self.funcsym = { } # Symbols defined as functions - self.strsym = { } # Symbols defined as strings - self.ignore = { } # Ignore strings by state - self.errorf = { } # Error functions by state + self.toknames = {} # Mapping of symbols to token names + self.funcsym = {} # Symbols defined as functions + self.strsym = {} # Symbols defined as strings + self.ignore = {} # Ignore strings by state + self.errorf = {} # Error functions by state + self.eoff = {} # EOF functions by state for s in self.stateinfo: - self.funcsym[s] = [] - self.strsym[s] = [] + self.funcsym[s] = [] + self.strsym[s] = [] if len(tsymbols) == 0: - self.log.error("No rules of the form t_rulename are defined") - self.error = 1 + self.log.error('No rules of the form t_rulename are defined') + self.error = True return for f in tsymbols: t = self.ldict[f] - states, tokname = _statetoken(f,self.stateinfo) + states, tokname = _statetoken(f, self.stateinfo) self.toknames[f] = tokname - if hasattr(t,"__call__"): + if hasattr(t, '__call__'): if tokname == 'error': for s in states: self.errorf[s] = t + elif tokname == 'eof': + for s in states: + self.eoff[s] = t elif tokname == 'ignore': - line = func_code(t).co_firstlineno - file = func_code(t).co_filename - self.log.error("%s:%d: Rule '%s' must be defined as a string",file,line,t.__name__) - self.error = 1 + line = t.__code__.co_firstlineno + file = t.__code__.co_filename + self.log.error("%s:%d: Rule %r must be defined as a string", file, line, t.__name__) + self.error = True else: - for s in states: - self.funcsym[s].append((f,t)) + for s in states: + self.funcsym[s].append((f, t)) elif isinstance(t, StringTypes): if tokname == 'ignore': for s in states: self.ignore[s] = t - if "\\" in t: - self.log.warning("%s contains a literal backslash '\\'",f) + if '\\' in t: + self.log.warning("%s contains a literal backslash '\\'", f) elif tokname == 'error': - self.log.error("Rule '%s' must be defined as a function", f) - self.error = 1 + self.log.error("Rule %r must be defined as a function", f) + self.error = True else: - for s in states: - self.strsym[s].append((f,t)) + for s in states: + self.strsym[s].append((f, t)) else: - self.log.error("%s not defined as a function or string", f) - self.error = 1 + self.log.error('%s not defined as a function or string', f) + self.error = True # Sort the functions by line number for f in self.funcsym.values(): - if sys.version_info[0] < 3: - f.sort(lambda x,y: cmp(func_code(x[1]).co_firstlineno,func_code(y[1]).co_firstlineno)) - else: - # Python 3.0 - f.sort(key=lambda x: func_code(x[1]).co_firstlineno) + f.sort(key=lambda x: x[1].__code__.co_firstlineno) # Sort the strings by regular expression length for s in self.strsym.values(): - if sys.version_info[0] < 3: - s.sort(lambda x,y: (len(x[1]) < len(y[1])) - (len(x[1]) > len(y[1]))) - else: - # Python 3.0 - s.sort(key=lambda x: len(x[1]),reverse=True) + s.sort(key=lambda x: len(x[1]), reverse=True) - # Validate all of the t_rules collected + # Validate all of the t_rules collected def validate_rules(self): for state in self.stateinfo: # Validate all rules defined by functions - - for fname, f in self.funcsym[state]: - line = func_code(f).co_firstlineno - file = func_code(f).co_filename - self.files[file] = 1 + line = f.__code__.co_firstlineno + file = f.__code__.co_filename + module = inspect.getmodule(f) + self.modules.add(module) tokname = self.toknames[fname] if isinstance(f, types.MethodType): reqargs = 2 else: reqargs = 1 - nargs = func_code(f).co_argcount + nargs = f.__code__.co_argcount if nargs > reqargs: - self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__) - self.error = 1 + self.log.error("%s:%d: Rule %r has too many arguments", file, line, f.__name__) + self.error = True continue if nargs < reqargs: - self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__) - self.error = 1 + self.log.error("%s:%d: Rule %r requires an argument", file, line, f.__name__) + self.error = True continue - if not f.__doc__: - self.log.error("%s:%d: No regular expression defined for rule '%s'",file,line,f.__name__) - self.error = 1 + if not _get_regex(f): + self.log.error("%s:%d: No regular expression defined for rule %r", file, line, f.__name__) + self.error = True continue try: - c = re.compile("(?P<%s>%s)" % (fname,f.__doc__), re.VERBOSE | self.reflags) - if c.match(""): - self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file,line,f.__name__) - self.error = 1 - except re.error: - _etype, e, _etrace = sys.exc_info() - self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file,line,f.__name__,e) - if '#' in f.__doc__: - self.log.error("%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'",file,line, f.__name__) - self.error = 1 + c = re.compile('(?P<%s>%s)' % (fname, _get_regex(f)), self.reflags) + if c.match(''): + self.log.error("%s:%d: Regular expression for rule %r matches empty string", file, line, f.__name__) + self.error = True + except re.error as e: + self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file, line, f.__name__, e) + if '#' in _get_regex(f): + self.log.error("%s:%d. Make sure '#' in rule %r is escaped with '\\#'", file, line, f.__name__) + self.error = True # Validate all rules defined by strings - for name,r in self.strsym[state]: + for name, r in self.strsym[state]: tokname = self.toknames[name] if tokname == 'error': - self.log.error("Rule '%s' must be defined as a function", name) - self.error = 1 + self.log.error("Rule %r must be defined as a function", name) + self.error = True continue - if not tokname in self.tokens and tokname.find("ignore_") < 0: - self.log.error("Rule '%s' defined for an unspecified token %s",name,tokname) - self.error = 1 + if tokname not in self.tokens and tokname.find('ignore_') < 0: + self.log.error("Rule %r defined for an unspecified token %s", name, tokname) + self.error = True continue try: - c = re.compile("(?P<%s>%s)" % (name,r),re.VERBOSE | self.reflags) - if (c.match("")): - self.log.error("Regular expression for rule '%s' matches empty string",name) - self.error = 1 - except re.error: - _etype, e, _etrace = sys.exc_info() - self.log.error("Invalid regular expression for rule '%s'. %s",name,e) + c = re.compile('(?P<%s>%s)' % (name, r), self.reflags) + if (c.match('')): + self.log.error("Regular expression for rule %r matches empty string", name) + self.error = True + except re.error as e: + self.log.error("Invalid regular expression for rule %r. %s", name, e) if '#' in r: - self.log.error("Make sure '#' in rule '%s' is escaped with '\\#'",name) - self.error = 1 + self.log.error("Make sure '#' in rule %r is escaped with '\\#'", name) + self.error = True if not self.funcsym[state] and not self.strsym[state]: - self.log.error("No rules defined for state '%s'",state) - self.error = 1 + self.log.error("No rules defined for state %r", state) + self.error = True # Validate the error function - efunc = self.errorf.get(state,None) + efunc = self.errorf.get(state, None) if efunc: f = efunc - line = func_code(f).co_firstlineno - file = func_code(f).co_filename - self.files[file] = 1 + line = f.__code__.co_firstlineno + file = f.__code__.co_filename + module = inspect.getmodule(f) + self.modules.add(module) if isinstance(f, types.MethodType): reqargs = 2 else: reqargs = 1 - nargs = func_code(f).co_argcount + nargs = f.__code__.co_argcount if nargs > reqargs: - self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__) - self.error = 1 + self.log.error("%s:%d: Rule %r has too many arguments", file, line, f.__name__) + self.error = True if nargs < reqargs: - self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__) - self.error = 1 - - for f in self.files: - self.validate_file(f) + self.log.error("%s:%d: Rule %r requires an argument", file, line, f.__name__) + self.error = True + for module in self.modules: + self.validate_module(module) # ----------------------------------------------------------------------------- - # validate_file() + # validate_module() # # This checks to see if there are duplicated t_rulename() functions or strings # in the parser input file. This is done using a simple regular expression - # match on each line in the given file. + # match on each line in the source code of the given module. # ----------------------------------------------------------------------------- - def validate_file(self,filename): - import os.path - base,ext = os.path.splitext(filename) - if ext != '.py': return # No idea what the file is. Return OK - + def validate_module(self, module): try: - f = open(filename) - lines = f.readlines() - f.close() + lines, linen = inspect.getsourcelines(module) except IOError: - return # Couldn't find the file. Don't worry about it + return fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(') sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=') - counthash = { } - linen = 1 - for l in lines: - m = fre.match(l) + counthash = {} + linen += 1 + for line in lines: + m = fre.match(line) if not m: - m = sre.match(l) + m = sre.match(line) if m: name = m.group(1) prev = counthash.get(name) if not prev: counthash[name] = linen else: - self.log.error("%s:%d: Rule %s redefined. Previously defined on line %d",filename,linen,name,prev) - self.error = 1 + filename = inspect.getsourcefile(module) + self.log.error('%s:%d: Rule %s redefined. Previously defined on line %d', filename, linen, name, prev) + self.error = True linen += 1 - + # ----------------------------------------------------------------------------- # lex(module) # # Build all of the regular expression rules from definitions in the supplied module # ----------------------------------------------------------------------------- -def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,nowarn=0,outputdir="", debuglog=None, errorlog=None): +def lex(*, module=None, object=None, debug=False, + reflags=int(re.VERBOSE), debuglog=None, errorlog=None): + global lexer + ldict = None - stateinfo = { 'INITIAL' : 'inclusive'} + stateinfo = {'INITIAL': 'inclusive'} lexobj = Lexer() - lexobj.lexoptimize = optimize - global token,input + global token, input if errorlog is None: errorlog = PlyLogger(sys.stderr) @@ -878,131 +732,124 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,now debuglog = PlyLogger(sys.stderr) # Get the module dictionary used for the lexer - if object: module = object + if object: + module = object + # Get the module dictionary used for the parser if module: - _items = [(k,getattr(module,k)) for k in dir(module)] + _items = [(k, getattr(module, k)) for k in dir(module)] ldict = dict(_items) + # If no __file__ attribute is available, try to obtain it from the __module__ instead + if '__file__' not in ldict: + ldict['__file__'] = sys.modules[ldict['__module__']].__file__ else: ldict = get_caller_module_dict(2) # Collect parser information from the dictionary - linfo = LexerReflect(ldict,log=errorlog,reflags=reflags) + linfo = LexerReflect(ldict, log=errorlog, reflags=reflags) linfo.get_all() - if not optimize: - if linfo.validate_all(): - raise SyntaxError("Can't build lexer") - - if optimize and lextab: - try: - lexobj.readtab(lextab,ldict) - token = lexobj.token - input = lexobj.input - lexer = lexobj - return lexobj - - except ImportError: - pass + if linfo.validate_all(): + raise SyntaxError("Can't build lexer") # Dump some basic debugging information if debug: - debuglog.info("lex: tokens = %r", linfo.tokens) - debuglog.info("lex: literals = %r", linfo.literals) - debuglog.info("lex: states = %r", linfo.stateinfo) + debuglog.info('lex: tokens = %r', linfo.tokens) + debuglog.info('lex: literals = %r', linfo.literals) + debuglog.info('lex: states = %r', linfo.stateinfo) # Build a dictionary of valid token names - lexobj.lextokens = { } + lexobj.lextokens = set() for n in linfo.tokens: - lexobj.lextokens[n] = 1 + lexobj.lextokens.add(n) # Get literals specification - if isinstance(linfo.literals,(list,tuple)): + if isinstance(linfo.literals, (list, tuple)): lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals) else: lexobj.lexliterals = linfo.literals + lexobj.lextokens_all = lexobj.lextokens | set(lexobj.lexliterals) + # Get the stateinfo dictionary stateinfo = linfo.stateinfo - regexs = { } + regexs = {} # Build the master regular expressions for state in stateinfo: regex_list = [] # Add rules defined by functions first for fname, f in linfo.funcsym[state]: - line = func_code(f).co_firstlineno - file = func_code(f).co_filename - regex_list.append("(?P<%s>%s)" % (fname,f.__doc__)) + regex_list.append('(?P<%s>%s)' % (fname, _get_regex(f))) if debug: - debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",fname,f.__doc__, state) + debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", fname, _get_regex(f), state) # Now add all of the simple rules - for name,r in linfo.strsym[state]: - regex_list.append("(?P<%s>%s)" % (name,r)) + for name, r in linfo.strsym[state]: + regex_list.append('(?P<%s>%s)' % (name, r)) if debug: - debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",name,r, state) + debuglog.info("lex: Adding rule %s -> '%s' (state '%s')", name, r, state) regexs[state] = regex_list # Build the master regular expressions if debug: - debuglog.info("lex: ==== MASTER REGEXS FOLLOW ====") + debuglog.info('lex: ==== MASTER REGEXS FOLLOW ====') for state in regexs: - lexre, re_text, re_names = _form_master_re(regexs[state],reflags,ldict,linfo.toknames) + lexre, re_text, re_names = _form_master_re(regexs[state], reflags, ldict, linfo.toknames) lexobj.lexstatere[state] = lexre lexobj.lexstateretext[state] = re_text lexobj.lexstaterenames[state] = re_names if debug: - for i in range(len(re_text)): - debuglog.info("lex: state '%s' : regex[%d] = '%s'",state, i, re_text[i]) + for i, text in enumerate(re_text): + debuglog.info("lex: state '%s' : regex[%d] = '%s'", state, i, text) # For inclusive states, we need to add the regular expressions from the INITIAL state - for state,stype in stateinfo.items(): - if state != "INITIAL" and stype == 'inclusive': - lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL']) - lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL']) - lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL']) + for state, stype in stateinfo.items(): + if state != 'INITIAL' and stype == 'inclusive': + lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL']) + lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL']) + lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL']) lexobj.lexstateinfo = stateinfo - lexobj.lexre = lexobj.lexstatere["INITIAL"] - lexobj.lexretext = lexobj.lexstateretext["INITIAL"] + lexobj.lexre = lexobj.lexstatere['INITIAL'] + lexobj.lexretext = lexobj.lexstateretext['INITIAL'] lexobj.lexreflags = reflags # Set up ignore variables lexobj.lexstateignore = linfo.ignore - lexobj.lexignore = lexobj.lexstateignore.get("INITIAL","") + lexobj.lexignore = lexobj.lexstateignore.get('INITIAL', '') # Set up error functions lexobj.lexstateerrorf = linfo.errorf - lexobj.lexerrorf = linfo.errorf.get("INITIAL",None) + lexobj.lexerrorf = linfo.errorf.get('INITIAL', None) if not lexobj.lexerrorf: - errorlog.warning("No t_error rule is defined") + errorlog.warning('No t_error rule is defined') + + # Set up eof functions + lexobj.lexstateeoff = linfo.eoff + lexobj.lexeoff = linfo.eoff.get('INITIAL', None) # Check state information for ignore and error rules - for s,stype in stateinfo.items(): + for s, stype in stateinfo.items(): if stype == 'exclusive': - if not s in linfo.errorf: - errorlog.warning("No error rule is defined for exclusive state '%s'", s) - if not s in linfo.ignore and lexobj.lexignore: - errorlog.warning("No ignore rule is defined for exclusive state '%s'", s) + if s not in linfo.errorf: + errorlog.warning("No error rule is defined for exclusive state %r", s) + if s not in linfo.ignore and lexobj.lexignore: + errorlog.warning("No ignore rule is defined for exclusive state %r", s) elif stype == 'inclusive': - if not s in linfo.errorf: - linfo.errorf[s] = linfo.errorf.get("INITIAL",None) - if not s in linfo.ignore: - linfo.ignore[s] = linfo.ignore.get("INITIAL","") + if s not in linfo.errorf: + linfo.errorf[s] = linfo.errorf.get('INITIAL', None) + if s not in linfo.ignore: + linfo.ignore[s] = linfo.ignore.get('INITIAL', '') # Create global versions of the token() and input() functions token = lexobj.token input = lexobj.input lexer = lexobj - # If in optimize mode, we write the lextab - if lextab and optimize: - lexobj.writetab(lextab,outputdir) - return lexobj # ----------------------------------------------------------------------------- @@ -1011,15 +858,14 @@ def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,now # This runs the lexer as a main program # ----------------------------------------------------------------------------- -def runmain(lexer=None,data=None): +def runmain(lexer=None, data=None): if not data: try: filename = sys.argv[1] - f = open(filename) - data = f.read() - f.close() + with open(filename) as f: + data = f.read() except IndexError: - sys.stdout.write("Reading from standard input (type EOF to end):\n") + sys.stdout.write('Reading from standard input (type EOF to end):\n') data = sys.stdin.read() if lexer: @@ -1032,10 +878,11 @@ def runmain(lexer=None,data=None): else: _token = token - while 1: + while True: tok = _token() - if not tok: break - sys.stdout.write("(%s,%r,%d,%d)\n" % (tok.type, tok.value, tok.lineno,tok.lexpos)) + if not tok: + break + sys.stdout.write(f'({tok.type},{tok.value!r},{tok.lineno},{tok.lexpos})\n') # ----------------------------------------------------------------------------- # @TOKEN(regex) @@ -1045,14 +892,10 @@ def runmain(lexer=None,data=None): # ----------------------------------------------------------------------------- def TOKEN(r): - def set_doc(f): - if hasattr(r,"__call__"): - f.__doc__ = r.__doc__ + def set_regex(f): + if hasattr(r, '__call__'): + f.regex = _get_regex(r) else: - f.__doc__ = r + f.regex = r return f - return set_doc - -# Alternative spelling of the TOKEN decorator -Token = TOKEN - + return set_regex diff --git a/components/script/dom/bindings/codegen/ply/ply/yacc.py b/components/script/dom/bindings/codegen/ply/ply/yacc.py index e9f5c657551..bce63c18241 100644 --- a/components/script/dom/bindings/codegen/ply/ply/yacc.py +++ b/components/script/dom/bindings/codegen/ply/ply/yacc.py @@ -1,22 +1,24 @@ # ----------------------------------------------------------------------------- # ply: yacc.py # -# Copyright (C) 2001-2009, +# Copyright (C) 2001-2020 # David M. Beazley (Dabeaz LLC) # All rights reserved. # +# Latest version: https://github.com/dabeaz/ply +# # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: -# +# # * Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# * Neither the name of the David Beazley or Dabeaz LLC may be used to +# and/or other materials provided with the distribution. +# * Neither the name of David Beazley or Dabeaz LLC may be used to # endorse or promote products derived from this software without -# specific prior written permission. +# specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT @@ -32,7 +34,7 @@ # ----------------------------------------------------------------------------- # # This implements an LR parser that is constructed from grammar rules defined -# as Python functions. The grammer is specified by supplying the BNF inside +# as Python functions. The grammar is specified by supplying the BNF inside # Python documentation strings. The inspiration for this technique was borrowed # from John Aycock's Spark parsing system. PLY might be viewed as cross between # Spark and the GNU bison utility. @@ -59,8 +61,10 @@ # own risk! # ---------------------------------------------------------------------------- -__version__ = "3.3" -__tabversion__ = "3.2" # Table version +import re +import types +import sys +import inspect #----------------------------------------------------------------------------- # === User configurable parameters === @@ -68,95 +72,69 @@ __tabversion__ = "3.2" # Table version # Change these to modify the default behavior of yacc (if you wish) #----------------------------------------------------------------------------- -yaccdebug = 1 # Debugging mode. If set, yacc generates a - # a 'parser.out' file in the current directory +yaccdebug = False # Debugging mode. If set, yacc generates a +# a 'parser.out' file in the current directory debug_file = 'parser.out' # Default name of the debugging file -tab_module = 'parsetab' # Default name of the table module -default_lr = 'LALR' # Default LR table generation method - error_count = 3 # Number of symbols that must be shifted to leave recovery mode - -yaccdevel = 0 # Set to True if developing yacc. This turns off optimized - # implementations of certain functions. - resultlimit = 40 # Size limit of results when running in debug mode. -pickle_protocol = 0 # Protocol to use when writing pickle files - -import re, types, sys, os.path - -# Compatibility function for python 2.6/3.0 -if sys.version_info[0] < 3: - def func_code(f): - return f.func_code -else: - def func_code(f): - return f.__code__ - -# Compatibility -try: - MAXINT = sys.maxint -except AttributeError: - MAXINT = sys.maxsize +MAXINT = sys.maxsize -# Python 2.x/3.0 compatibility. -def load_ply_lex(): - if sys.version_info[0] < 3: - import lex - else: - import ply.lex as lex - return lex - -# This object is a stand-in for a logging object created by the +# This object is a stand-in for a logging object created by the # logging module. PLY will use this by default to create things # such as the parser.out file. If a user wants more detailed # information, they can create their own logging object and pass # it into PLY. class PlyLogger(object): - def __init__(self,f): + def __init__(self, f): self.f = f - def debug(self,msg,*args,**kwargs): - self.f.write((msg % args) + "\n") - info = debug - def warning(self,msg,*args,**kwargs): - self.f.write("WARNING: "+ (msg % args) + "\n") + def debug(self, msg, *args, **kwargs): + self.f.write((msg % args) + '\n') + + info = debug - def error(self,msg,*args,**kwargs): - self.f.write("ERROR: " + (msg % args) + "\n") + def warning(self, msg, *args, **kwargs): + self.f.write('WARNING: ' + (msg % args) + '\n') + + def error(self, msg, *args, **kwargs): + self.f.write('ERROR: ' + (msg % args) + '\n') critical = debug # Null logger is used when no output is generated. Does nothing. class NullLogger(object): - def __getattribute__(self,name): + def __getattribute__(self, name): return self - def __call__(self,*args,**kwargs): + + def __call__(self, *args, **kwargs): return self - + # Exception raised for yacc-related errors -class YaccError(Exception): pass +class YaccError(Exception): + pass # Format the result message that the parser produces when running in debug mode. def format_result(r): repr_str = repr(r) - if '\n' in repr_str: repr_str = repr(repr_str) + if '\n' in repr_str: + repr_str = repr(repr_str) if len(repr_str) > resultlimit: - repr_str = repr_str[:resultlimit]+" ..." - result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str) + repr_str = repr_str[:resultlimit] + ' ...' + result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str) return result - # Format stack entries when the parser is running in debug mode def format_stack_entry(r): repr_str = repr(r) - if '\n' in repr_str: repr_str = repr(repr_str) + if '\n' in repr_str: + repr_str = repr(repr_str) if len(repr_str) < 16: return repr_str else: - return "<%s @ 0x%x>" % (type(r).__name__,id(r)) + return '<%s @ 0x%x>' % (type(r).__name__, id(r)) #----------------------------------------------------------------------------- # === LR Parsing Engine === @@ -176,8 +154,11 @@ def format_stack_entry(r): # .endlexpos = Ending lex position (optional, set automatically) class YaccSymbol: - def __str__(self): return self.type - def __repr__(self): return str(self) + def __str__(self): + return self.type + + def __repr__(self): + return str(self) # This class is a wrapper around the objects actually passed to each # grammar rule. Index lookup and assignment actually assign the @@ -189,46 +170,53 @@ class YaccSymbol: # representing the range of positional information for a symbol. class YaccProduction: - def __init__(self,s,stack=None): + def __init__(self, s, stack=None): self.slice = s self.stack = stack self.lexer = None - self.parser= None - def __getitem__(self,n): - if n >= 0: return self.slice[n].value - else: return self.stack[n].value + self.parser = None + + def __getitem__(self, n): + if isinstance(n, slice): + return [s.value for s in self.slice[n]] + elif n >= 0: + return self.slice[n].value + else: + return self.stack[n].value - def __setitem__(self,n,v): + def __setitem__(self, n, v): self.slice[n].value = v - def __getslice__(self,i,j): + def __getslice__(self, i, j): return [s.value for s in self.slice[i:j]] def __len__(self): return len(self.slice) - def lineno(self,n): - return getattr(self.slice[n],"lineno",0) + def lineno(self, n): + return getattr(self.slice[n], 'lineno', 0) - def set_lineno(self,n,lineno): + def set_lineno(self, n, lineno): self.slice[n].lineno = lineno - def linespan(self,n): - startline = getattr(self.slice[n],"lineno",0) - endline = getattr(self.slice[n],"endlineno",startline) - return startline,endline + def linespan(self, n): + startline = getattr(self.slice[n], 'lineno', 0) + endline = getattr(self.slice[n], 'endlineno', startline) + return startline, endline - def lexpos(self,n): - return getattr(self.slice[n],"lexpos",0) + def lexpos(self, n): + return getattr(self.slice[n], 'lexpos', 0) - def lexspan(self,n): - startpos = getattr(self.slice[n],"lexpos",0) - endpos = getattr(self.slice[n],"endlexpos",startpos) - return startpos,endpos + def set_lexpos(self, n, lexpos): + self.slice[n].lexpos = lexpos - def error(self): - raise SyntaxError + def lexspan(self, n): + startpos = getattr(self.slice[n], 'lexpos', 0) + endpos = getattr(self.slice[n], 'endlexpos', startpos) + return startpos, endpos + def error(self): + raise SyntaxError # ----------------------------------------------------------------------------- # == LRParser == @@ -237,14 +225,16 @@ class YaccProduction: # ----------------------------------------------------------------------------- class LRParser: - def __init__(self,lrtab,errorf): + def __init__(self, lrtab, errorf): self.productions = lrtab.lr_productions - self.action = lrtab.lr_action - self.goto = lrtab.lr_goto - self.errorfunc = errorf + self.action = lrtab.lr_action + self.goto = lrtab.lr_goto + self.errorfunc = errorf + self.set_defaulted_states() + self.errorok = True def errok(self): - self.errorok = 1 + self.errorok = True def restart(self): del self.statestack[:] @@ -254,47 +244,52 @@ class LRParser: self.symstack.append(sym) self.statestack.append(0) - def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): - if debug or yaccdevel: - if isinstance(debug,int): - debug = PlyLogger(sys.stderr) - return self.parsedebug(input,lexer,debug,tracking,tokenfunc) - elif tracking: - return self.parseopt(input,lexer,debug,tracking,tokenfunc) - else: - return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc) - - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # parsedebug(). + # Defaulted state support. + # This method identifies parser states where there is only one possible reduction action. + # For such states, the parser can make a choose to make a rule reduction without consuming + # the next look-ahead token. This delayed invocation of the tokenizer can be useful in + # certain kinds of advanced parsing situations where the lexer and parser interact with + # each other or change states (i.e., manipulation of scope, lexer states, etc.). # - # This is the debugging enabled version of parse(). All changes made to the - # parsing engine should be made here. For the non-debugging version, - # copy this code to a method parseopt() and delete all of the sections - # enclosed in: + # See: http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions + def set_defaulted_states(self): + self.defaulted_states = {} + for state, actions in self.action.items(): + rules = list(actions.values()) + if len(rules) == 1 and rules[0] < 0: + self.defaulted_states[state] = rules[0] + + def disable_defaulted_states(self): + self.defaulted_states = {} + + # parse(). # - # #--! DEBUG - # statements - # #--! DEBUG - # - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None): - lookahead = None # Current lookahead symbol - lookaheadstack = [ ] # Stack of lookahead symbols - actions = self.action # Local reference to action table (to avoid lookup on self.) - goto = self.goto # Local reference to goto table (to avoid lookup on self.) - prod = self.productions # Local reference to production list (to avoid lookup on self.) - pslice = YaccProduction(None) # Production object passed to grammar rules - errorcount = 0 # Used during error recovery + # This is the core parsing engine. To operate, it requires a lexer object. + # Two options are provided. The debug flag turns on debugging so that you can + # see the various rule reductions and parsing steps. tracking turns on position + # tracking. In this mode, symbols will record the starting/ending line number and + # character index. + + def parse(self, input=None, lexer=None, debug=False, tracking=False): + # If debugging has been specified as a flag, turn it into a logging object + if isinstance(debug, int) and debug: + debug = PlyLogger(sys.stderr) + + lookahead = None # Current lookahead symbol + lookaheadstack = [] # Stack of lookahead symbols + actions = self.action # Local reference to action table (to avoid lookup on self.) + goto = self.goto # Local reference to goto table (to avoid lookup on self.) + prod = self.productions # Local reference to production list (to avoid lookup on self.) + defaulted_states = self.defaulted_states # Local reference to defaulted states + pslice = YaccProduction(None) # Production object passed to grammar rules + errorcount = 0 # Used during error recovery - # --! DEBUG - debug.info("PLY: PARSE DEBUG START") - # --! DEBUG + if debug: + debug.info('PLY: PARSE DEBUG START') # If no lexer was given, we will try to use the lex module if not lexer: - lex = load_ply_lex() + from . import lex lexer = lex.lexer # Set up the lexer and parser objects on pslice @@ -305,72 +300,67 @@ class LRParser: if input is not None: lexer.input(input) - if tokenfunc is None: - # Tokenize function - get_token = lexer.token - else: - get_token = tokenfunc + # Set the token function + get_token = self.token = lexer.token # Set up the state and symbol stacks - - statestack = [ ] # Stack of parsing states - self.statestack = statestack - symstack = [ ] # Stack of grammar symbols - self.symstack = symstack - - pslice.stack = symstack # Put in the production - errtoken = None # Err token + statestack = self.statestack = [] # Stack of parsing states + symstack = self.symstack = [] # Stack of grammar symbols + pslice.stack = symstack # Put in the production + errtoken = None # Err token # The start state is assumed to be (0,$end) statestack.append(0) sym = YaccSymbol() - sym.type = "$end" + sym.type = '$end' symstack.append(sym) state = 0 - while 1: + while True: # Get the next symbol on the input. If a lookahead symbol # is already set, we just use that. Otherwise, we'll pull # the next token off of the lookaheadstack or from the lexer - # --! DEBUG - debug.debug('') - debug.debug('State : %s', state) - # --! DEBUG + if debug: + debug.debug('State : %s', state) - if not lookahead: - if not lookaheadstack: - lookahead = get_token() # Get the next token - else: - lookahead = lookaheadstack.pop() + if state not in defaulted_states: if not lookahead: - lookahead = YaccSymbol() - lookahead.type = "$end" - - # --! DEBUG - debug.debug('Stack : %s', - ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) - # --! DEBUG + if not lookaheadstack: + lookahead = get_token() # Get the next token + else: + lookahead = lookaheadstack.pop() + if not lookahead: + lookahead = YaccSymbol() + lookahead.type = '$end' + + # Check the action table + ltype = lookahead.type + t = actions[state].get(ltype) + else: + t = defaulted_states[state] + if debug: + debug.debug('Defaulted state %s: Reduce using %d', state, -t) - # Check the action table - ltype = lookahead.type - t = actions[state].get(ltype) + if debug: + debug.debug('Stack : %s', + ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) if t is not None: if t > 0: # shift a symbol on the stack statestack.append(t) state = t - - # --! DEBUG - debug.debug("Action : Shift and goto state %s", t) - # --! DEBUG + + if debug: + debug.debug('Action : Shift and goto state %s', t) symstack.append(lookahead) lookahead = None # Decrease error count on successful shift - if errorcount: errorcount -=1 + if errorcount: + errorcount -= 1 continue if t < 0: @@ -384,358 +374,69 @@ class LRParser: sym.type = pname # Production name sym.value = None - # --! DEBUG - if plen: - debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+"]",-t) - else: - debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [],-t) - - # --! DEBUG + if debug: + if plen: + debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, + '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']', + goto[statestack[-1-plen]][pname]) + else: + debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [], + goto[statestack[-1]][pname]) if plen: targ = symstack[-plen-1:] targ[0] = sym - # --! TRACKING if tracking: - t1 = targ[1] - sym.lineno = t1.lineno - sym.lexpos = t1.lexpos - t1 = targ[-1] - sym.endlineno = getattr(t1,"endlineno",t1.lineno) - sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) - - # --! TRACKING + t1 = targ[1] + sym.lineno = t1.lineno + sym.lexpos = t1.lexpos + t1 = targ[-1] + sym.endlineno = getattr(t1, 'endlineno', t1.lineno) + sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated + # The code enclosed in this section is duplicated # below as a performance optimization. Make sure # changes get made in both locations. pslice.slice = targ - - try: - # Call the grammar rule with our special slice object - del symstack[-plen:] - del statestack[-plen:] - p.callable(pslice) - # --! DEBUG - debug.info("Result : %s", format_result(pslice[0])) - # --! DEBUG - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() - state = statestack[-1] - sym.type = 'error' - lookahead = sym - errorcount = error_count - self.errorok = 0 - continue - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - else: - - # --! TRACKING - if tracking: - sym.lineno = lexer.lineno - sym.lexpos = lexer.lexpos - # --! TRACKING - - targ = [ sym ] - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated - # above as a performance optimization. Make sure - # changes get made in both locations. - - pslice.slice = targ try: # Call the grammar rule with our special slice object + del symstack[-plen:] + self.state = state p.callable(pslice) - # --! DEBUG - debug.info("Result : %s", format_result(pslice[0])) - # --! DEBUG + del statestack[-plen:] + if debug: + debug.info('Result : %s', format_result(pslice[0])) symstack.append(sym) state = goto[statestack[-1]][pname] statestack.append(state) except SyntaxError: # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() + lookaheadstack.append(lookahead) # Save the current lookahead token + symstack.extend(targ[1:-1]) # Put the production slice back on the stack + statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] sym.type = 'error' + sym.value = 'error' lookahead = sym errorcount = error_count - self.errorok = 0 - continue - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + self.errorok = False - if t == 0: - n = symstack[-1] - result = getattr(n,"value",None) - # --! DEBUG - debug.info("Done : Returning %s", format_result(result)) - debug.info("PLY: PARSE DEBUG END") - # --! DEBUG - return result - - if t == None: - - # --! DEBUG - debug.error('Error : %s', - ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) - # --! DEBUG - - # We have some kind of parsing error here. To handle - # this, we are going to push the current token onto - # the tokenstack and replace it with an 'error' token. - # If there are any synchronization rules, they may - # catch it. - # - # In addition to pushing the error token, we call call - # the user defined p_error() function if this is the - # first syntax error. This function is only called if - # errorcount == 0. - if errorcount == 0 or self.errorok: - errorcount = error_count - self.errorok = 0 - errtoken = lookahead - if errtoken.type == "$end": - errtoken = None # End of file! - if self.errorfunc: - global errok,token,restart - errok = self.errok # Set some special functions available in error recovery - token = get_token - restart = self.restart - if errtoken and not hasattr(errtoken,'lexer'): - errtoken.lexer = lexer - tok = self.errorfunc(errtoken) - del errok, token, restart # Delete special functions - - if self.errorok: - # User must have done some kind of panic - # mode recovery on their own. The - # returned token is the next lookahead - lookahead = tok - errtoken = None - continue - else: - if errtoken: - if hasattr(errtoken,"lineno"): lineno = lookahead.lineno - else: lineno = 0 - if lineno: - sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) - else: - sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) - else: - sys.stderr.write("yacc: Parse error in input. EOF\n") - return - - else: - errorcount = error_count - - # case 1: the statestack only has 1 entry on it. If we're in this state, the - # entire parse has been rolled back and we're completely hosed. The token is - # discarded and we just keep going. - - if len(statestack) <= 1 and lookahead.type != "$end": - lookahead = None - errtoken = None - state = 0 - # Nuke the pushback stack - del lookaheadstack[:] - continue - - # case 2: the statestack has a couple of entries on it, but we're - # at the end of the file. nuke the top entry and generate an error token - - # Start nuking entries on the stack - if lookahead.type == "$end": - # Whoa. We're really hosed here. Bail out - return - - if lookahead.type != 'error': - sym = symstack[-1] - if sym.type == 'error': - # Hmmm. Error is on top of stack, we'll just nuke input - # symbol and continue - lookahead = None continue - t = YaccSymbol() - t.type = 'error' - if hasattr(lookahead,"lineno"): - t.lineno = lookahead.lineno - t.value = lookahead - lookaheadstack.append(lookahead) - lookahead = t - else: - symstack.pop() - statestack.pop() - state = statestack[-1] # Potential bug fix - - continue - - # Call an error function here - raise RuntimeError("yacc: internal parser error!!!\n") - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # parseopt(). - # - # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY. - # Edit the debug version above, then copy any modifications to the method - # below while removing #--! DEBUG sections. - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - - def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): - lookahead = None # Current lookahead symbol - lookaheadstack = [ ] # Stack of lookahead symbols - actions = self.action # Local reference to action table (to avoid lookup on self.) - goto = self.goto # Local reference to goto table (to avoid lookup on self.) - prod = self.productions # Local reference to production list (to avoid lookup on self.) - pslice = YaccProduction(None) # Production object passed to grammar rules - errorcount = 0 # Used during error recovery - - # If no lexer was given, we will try to use the lex module - if not lexer: - lex = load_ply_lex() - lexer = lex.lexer - - # Set up the lexer and parser objects on pslice - pslice.lexer = lexer - pslice.parser = self - - # If input was supplied, pass to lexer - if input is not None: - lexer.input(input) - - if tokenfunc is None: - # Tokenize function - get_token = lexer.token - else: - get_token = tokenfunc - - # Set up the state and symbol stacks - - statestack = [ ] # Stack of parsing states - self.statestack = statestack - symstack = [ ] # Stack of grammar symbols - self.symstack = symstack - - pslice.stack = symstack # Put in the production - errtoken = None # Err token - - # The start state is assumed to be (0,$end) - - statestack.append(0) - sym = YaccSymbol() - sym.type = '$end' - symstack.append(sym) - state = 0 - while 1: - # Get the next symbol on the input. If a lookahead symbol - # is already set, we just use that. Otherwise, we'll pull - # the next token off of the lookaheadstack or from the lexer - - if not lookahead: - if not lookaheadstack: - lookahead = get_token() # Get the next token - else: - lookahead = lookaheadstack.pop() - if not lookahead: - lookahead = YaccSymbol() - lookahead.type = '$end' - - # Check the action table - ltype = lookahead.type - t = actions[state].get(ltype) - - if t is not None: - if t > 0: - # shift a symbol on the stack - statestack.append(t) - state = t - - symstack.append(lookahead) - lookahead = None - - # Decrease error count on successful shift - if errorcount: errorcount -=1 - continue - - if t < 0: - # reduce a symbol on the stack, emit a production - p = prod[-t] - pname = p.name - plen = p.len - - # Get production function - sym = YaccSymbol() - sym.type = pname # Production name - sym.value = None - - if plen: - targ = symstack[-plen-1:] - targ[0] = sym - - # --! TRACKING - if tracking: - t1 = targ[1] - sym.lineno = t1.lineno - sym.lexpos = t1.lexpos - t1 = targ[-1] - sym.endlineno = getattr(t1,"endlineno",t1.lineno) - sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos) - - # --! TRACKING - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated - # below as a performance optimization. Make sure - # changes get made in both locations. - pslice.slice = targ - - try: - # Call the grammar rule with our special slice object - del symstack[-plen:] - del statestack[-plen:] - p.callable(pslice) - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() - state = statestack[-1] - sym.type = 'error' - lookahead = sym - errorcount = error_count - self.errorok = 0 - continue - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - else: - # --! TRACKING if tracking: - sym.lineno = lexer.lineno - sym.lexpos = lexer.lexpos - # --! TRACKING + sym.lineno = lexer.lineno + sym.lexpos = lexer.lexpos - targ = [ sym ] + targ = [sym] # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated + # The code enclosed in this section is duplicated # above as a performance optimization. Make sure # changes get made in both locations. @@ -743,283 +444,41 @@ class LRParser: try: # Call the grammar rule with our special slice object + self.state = state p.callable(pslice) + if debug: + debug.info('Result : %s', format_result(pslice[0])) symstack.append(sym) state = goto[statestack[-1]][pname] statestack.append(state) except SyntaxError: # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() + lookaheadstack.append(lookahead) # Save the current lookahead token + statestack.pop() # Pop back one state (before the reduce) state = statestack[-1] sym.type = 'error' + sym.value = 'error' lookahead = sym errorcount = error_count - self.errorok = 0 + self.errorok = False + continue - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! if t == 0: n = symstack[-1] - return getattr(n,"value",None) - - if t == None: - - # We have some kind of parsing error here. To handle - # this, we are going to push the current token onto - # the tokenstack and replace it with an 'error' token. - # If there are any synchronization rules, they may - # catch it. - # - # In addition to pushing the error token, we call call - # the user defined p_error() function if this is the - # first syntax error. This function is only called if - # errorcount == 0. - if errorcount == 0 or self.errorok: - errorcount = error_count - self.errorok = 0 - errtoken = lookahead - if errtoken.type == '$end': - errtoken = None # End of file! - if self.errorfunc: - global errok,token,restart - errok = self.errok # Set some special functions available in error recovery - token = get_token - restart = self.restart - if errtoken and not hasattr(errtoken,'lexer'): - errtoken.lexer = lexer - tok = self.errorfunc(errtoken) - del errok, token, restart # Delete special functions - - if self.errorok: - # User must have done some kind of panic - # mode recovery on their own. The - # returned token is the next lookahead - lookahead = tok - errtoken = None - continue - else: - if errtoken: - if hasattr(errtoken,"lineno"): lineno = lookahead.lineno - else: lineno = 0 - if lineno: - sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) - else: - sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) - else: - sys.stderr.write("yacc: Parse error in input. EOF\n") - return - - else: - errorcount = error_count - - # case 1: the statestack only has 1 entry on it. If we're in this state, the - # entire parse has been rolled back and we're completely hosed. The token is - # discarded and we just keep going. - - if len(statestack) <= 1 and lookahead.type != '$end': - lookahead = None - errtoken = None - state = 0 - # Nuke the pushback stack - del lookaheadstack[:] - continue + result = getattr(n, 'value', None) - # case 2: the statestack has a couple of entries on it, but we're - # at the end of the file. nuke the top entry and generate an error token - - # Start nuking entries on the stack - if lookahead.type == '$end': - # Whoa. We're really hosed here. Bail out - return - - if lookahead.type != 'error': - sym = symstack[-1] - if sym.type == 'error': - # Hmmm. Error is on top of stack, we'll just nuke input - # symbol and continue - lookahead = None - continue - t = YaccSymbol() - t.type = 'error' - if hasattr(lookahead,"lineno"): - t.lineno = lookahead.lineno - t.value = lookahead - lookaheadstack.append(lookahead) - lookahead = t - else: - symstack.pop() - statestack.pop() - state = statestack[-1] # Potential bug fix - - continue - - # Call an error function here - raise RuntimeError("yacc: internal parser error!!!\n") - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # parseopt_notrack(). - # - # Optimized version of parseopt() with line number tracking removed. - # DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove - # code in the #--! TRACKING sections - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None): - lookahead = None # Current lookahead symbol - lookaheadstack = [ ] # Stack of lookahead symbols - actions = self.action # Local reference to action table (to avoid lookup on self.) - goto = self.goto # Local reference to goto table (to avoid lookup on self.) - prod = self.productions # Local reference to production list (to avoid lookup on self.) - pslice = YaccProduction(None) # Production object passed to grammar rules - errorcount = 0 # Used during error recovery - - # If no lexer was given, we will try to use the lex module - if not lexer: - lex = load_ply_lex() - lexer = lex.lexer - - # Set up the lexer and parser objects on pslice - pslice.lexer = lexer - pslice.parser = self - - # If input was supplied, pass to lexer - if input is not None: - lexer.input(input) - - if tokenfunc is None: - # Tokenize function - get_token = lexer.token - else: - get_token = tokenfunc - - # Set up the state and symbol stacks - - statestack = [ ] # Stack of parsing states - self.statestack = statestack - symstack = [ ] # Stack of grammar symbols - self.symstack = symstack - - pslice.stack = symstack # Put in the production - errtoken = None # Err token - - # The start state is assumed to be (0,$end) - - statestack.append(0) - sym = YaccSymbol() - sym.type = '$end' - symstack.append(sym) - state = 0 - while 1: - # Get the next symbol on the input. If a lookahead symbol - # is already set, we just use that. Otherwise, we'll pull - # the next token off of the lookaheadstack or from the lexer - - if not lookahead: - if not lookaheadstack: - lookahead = get_token() # Get the next token - else: - lookahead = lookaheadstack.pop() - if not lookahead: - lookahead = YaccSymbol() - lookahead.type = '$end' - - # Check the action table - ltype = lookahead.type - t = actions[state].get(ltype) - - if t is not None: - if t > 0: - # shift a symbol on the stack - statestack.append(t) - state = t - - symstack.append(lookahead) - lookahead = None - - # Decrease error count on successful shift - if errorcount: errorcount -=1 - continue + if debug: + debug.info('Done : Returning %s', format_result(result)) + debug.info('PLY: PARSE DEBUG END') - if t < 0: - # reduce a symbol on the stack, emit a production - p = prod[-t] - pname = p.name - plen = p.len - - # Get production function - sym = YaccSymbol() - sym.type = pname # Production name - sym.value = None - - if plen: - targ = symstack[-plen-1:] - targ[0] = sym - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated - # below as a performance optimization. Make sure - # changes get made in both locations. - - pslice.slice = targ - - try: - # Call the grammar rule with our special slice object - del symstack[-plen:] - del statestack[-plen:] - p.callable(pslice) - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() - state = statestack[-1] - sym.type = 'error' - lookahead = sym - errorcount = error_count - self.errorok = 0 - continue - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - else: - - targ = [ sym ] - - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - # The code enclosed in this section is duplicated - # above as a performance optimization. Make sure - # changes get made in both locations. - - pslice.slice = targ - - try: - # Call the grammar rule with our special slice object - p.callable(pslice) - symstack.append(sym) - state = goto[statestack[-1]][pname] - statestack.append(state) - except SyntaxError: - # If an error was set. Enter error recovery state - lookaheadstack.append(lookahead) - symstack.pop() - statestack.pop() - state = statestack[-1] - sym.type = 'error' - lookahead = sym - errorcount = error_count - self.errorok = 0 - continue - # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + return result - if t == 0: - n = symstack[-1] - return getattr(n,"value",None) + if t is None: - if t == None: + if debug: + debug.error('Error : %s', + ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) # We have some kind of parsing error here. To handle # this, we are going to push the current token onto @@ -1033,20 +492,15 @@ class LRParser: # errorcount == 0. if errorcount == 0 or self.errorok: errorcount = error_count - self.errorok = 0 + self.errorok = False errtoken = lookahead if errtoken.type == '$end': errtoken = None # End of file! if self.errorfunc: - global errok,token,restart - errok = self.errok # Set some special functions available in error recovery - token = get_token - restart = self.restart - if errtoken and not hasattr(errtoken,'lexer'): + if errtoken and not hasattr(errtoken, 'lexer'): errtoken.lexer = lexer + self.state = state tok = self.errorfunc(errtoken) - del errok, token, restart # Delete special functions - if self.errorok: # User must have done some kind of panic # mode recovery on their own. The @@ -1056,14 +510,16 @@ class LRParser: continue else: if errtoken: - if hasattr(errtoken,"lineno"): lineno = lookahead.lineno - else: lineno = 0 + if hasattr(errtoken, 'lineno'): + lineno = lookahead.lineno + else: + lineno = 0 if lineno: - sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type)) + sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) else: - sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type) + sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) else: - sys.stderr.write("yacc: Parse error in input. EOF\n") + sys.stderr.write('yacc: Parse error in input. EOF\n') return else: @@ -1094,34 +550,43 @@ class LRParser: if sym.type == 'error': # Hmmm. Error is on top of stack, we'll just nuke input # symbol and continue + if tracking: + sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) + sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) lookahead = None continue + + # Create the error symbol for the first time and make it the new lookahead symbol t = YaccSymbol() t.type = 'error' - if hasattr(lookahead,"lineno"): - t.lineno = lookahead.lineno + + if hasattr(lookahead, 'lineno'): + t.lineno = t.endlineno = lookahead.lineno + if hasattr(lookahead, 'lexpos'): + t.lexpos = t.endlexpos = lookahead.lexpos t.value = lookahead lookaheadstack.append(lookahead) lookahead = t else: - symstack.pop() + sym = symstack.pop() + if tracking: + lookahead.lineno = sym.lineno + lookahead.lexpos = sym.lexpos statestack.pop() - state = statestack[-1] # Potential bug fix + state = statestack[-1] continue - # Call an error function here - raise RuntimeError("yacc: internal parser error!!!\n") + # If we'r here, something really bad happened + raise RuntimeError('yacc: internal parser error!!!\n') # ----------------------------------------------------------------------------- # === Grammar Representation === # # The following functions, classes, and variables are used to represent and -# manipulate the rules that make up a grammar. +# manipulate the rules that make up a grammar. # ----------------------------------------------------------------------------- -import re - # regex matching identifiers _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') @@ -1131,7 +596,7 @@ _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') # This class stores the raw information about a single production or grammar rule. # A grammar rule refers to a specification such as this: # -# expr : expr PLUS term +# expr : expr PLUS term # # Here are the basic attributes defined on all productions # @@ -1151,7 +616,7 @@ _is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') class Production(object): reduced = 0 - def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line=0): + def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0): self.name = name self.prod = tuple(prod) self.number = number @@ -1162,11 +627,11 @@ class Production(object): self.prec = precedence # Internal settings used during table construction - + self.len = len(self.prod) # Length of the production # Create a list of unique production symbols used in the production - self.usyms = [ ] + self.usyms = [] for s in self.prod: if s not in self.usyms: self.usyms.append(s) @@ -1177,15 +642,15 @@ class Production(object): # Create a string representation if self.prod: - self.str = "%s -> %s" % (self.name," ".join(self.prod)) + self.str = '%s -> %s' % (self.name, ' '.join(self.prod)) else: - self.str = "%s -> <empty>" % self.name + self.str = '%s -> <empty>' % self.name def __str__(self): return self.str def __repr__(self): - return "Production("+str(self)+")" + return 'Production(' + str(self) + ')' def __len__(self): return len(self.prod) @@ -1193,62 +658,37 @@ class Production(object): def __nonzero__(self): return 1 - def __getitem__(self,index): + def __getitem__(self, index): return self.prod[index] - - # Return the nth lr_item from the production (or None if at the end) - def lr_item(self,n): - if n > len(self.prod): return None - p = LRItem(self,n) - # Precompute the list of productions immediately following. Hack. Remove later + # Return the nth lr_item from the production (or None if at the end) + def lr_item(self, n): + if n > len(self.prod): + return None + p = LRItem(self, n) + # Precompute the list of productions immediately following. try: - p.lr_after = Prodnames[p.prod[n+1]] - except (IndexError,KeyError): + p.lr_after = self.Prodnames[p.prod[n+1]] + except (IndexError, KeyError): p.lr_after = [] try: p.lr_before = p.prod[n-1] except IndexError: p.lr_before = None - return p - - # Bind the production function name to a callable - def bind(self,pdict): - if self.func: - self.callable = pdict[self.func] - -# This class serves as a minimal standin for Production objects when -# reading table data from files. It only contains information -# actually used by the LR parsing engine, plus some additional -# debugging information. -class MiniProduction(object): - def __init__(self,str,name,len,func,file,line): - self.name = name - self.len = len - self.func = func - self.callable = None - self.file = file - self.line = line - self.str = str - def __str__(self): - return self.str - def __repr__(self): - return "MiniProduction(%s)" % self.str # Bind the production function name to a callable - def bind(self,pdict): + def bind(self, pdict): if self.func: self.callable = pdict[self.func] - # ----------------------------------------------------------------------------- # class LRItem # # This class represents a specific stage of parsing a production rule. For -# example: +# example: # -# expr : expr . PLUS term +# expr : expr . PLUS term # # In the above, the "." represents the current location of the parse. Here # basic attributes: @@ -1267,26 +707,26 @@ class MiniProduction(object): # ----------------------------------------------------------------------------- class LRItem(object): - def __init__(self,p,n): + def __init__(self, p, n): self.name = p.name self.prod = list(p.prod) self.number = p.number self.lr_index = n - self.lookaheads = { } - self.prod.insert(n,".") + self.lookaheads = {} + self.prod.insert(n, '.') self.prod = tuple(self.prod) self.len = len(self.prod) self.usyms = p.usyms def __str__(self): if self.prod: - s = "%s -> %s" % (self.name," ".join(self.prod)) + s = '%s -> %s' % (self.name, ' '.join(self.prod)) else: - s = "%s -> <empty>" % self.name + s = '%s -> <empty>' % self.name return s def __repr__(self): - return "LRItem("+str(self)+")" + return 'LRItem(' + str(self) + ')' # ----------------------------------------------------------------------------- # rightmost_terminal() @@ -1309,41 +749,42 @@ def rightmost_terminal(symbols, terminals): # This data is used for critical parts of the table generation process later. # ----------------------------------------------------------------------------- -class GrammarError(YaccError): pass +class GrammarError(YaccError): + pass class Grammar(object): - def __init__(self,terminals): + def __init__(self, terminals): self.Productions = [None] # A list of all of the productions. The first - # entry is always reserved for the purpose of - # building an augmented grammar + # entry is always reserved for the purpose of + # building an augmented grammar - self.Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all - # productions of that nonterminal. + self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all + # productions of that nonterminal. - self.Prodmap = { } # A dictionary that is only used to detect duplicate - # productions. + self.Prodmap = {} # A dictionary that is only used to detect duplicate + # productions. - self.Terminals = { } # A dictionary mapping the names of terminal symbols to a - # list of the rules where they are used. + self.Terminals = {} # A dictionary mapping the names of terminal symbols to a + # list of the rules where they are used. for term in terminals: self.Terminals[term] = [] self.Terminals['error'] = [] - self.Nonterminals = { } # A dictionary mapping names of nonterminals to a list - # of rule numbers where they are used. + self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list + # of rule numbers where they are used. - self.First = { } # A dictionary of precomputed FIRST(x) symbols + self.First = {} # A dictionary of precomputed FIRST(x) symbols - self.Follow = { } # A dictionary of precomputed FOLLOW(x) symbols + self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols - self.Precedence = { } # Precedence rules for each terminal. Contains tuples of the - # form ('right',level) or ('nonassoc', level) or ('left',level) + self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the + # form ('right',level) or ('nonassoc', level) or ('left',level) - self.UsedPrecedence = { } # Precedence rules that were actually used by the grammer. - # This is only used to provide error checking and to generate - # a warning about unused precedence rules. + self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer. + # This is only used to provide error checking and to generate + # a warning about unused precedence rules. self.Start = None # Starting symbol for the grammar @@ -1351,7 +792,7 @@ class Grammar(object): def __len__(self): return len(self.Productions) - def __getitem__(self,index): + def __getitem__(self, index): return self.Productions[index] # ----------------------------------------------------------------------------- @@ -1362,14 +803,14 @@ class Grammar(object): # # ----------------------------------------------------------------------------- - def set_precedence(self,term,assoc,level): - assert self.Productions == [None],"Must call set_precedence() before add_production()" + def set_precedence(self, term, assoc, level): + assert self.Productions == [None], 'Must call set_precedence() before add_production()' if term in self.Precedence: - raise GrammarError("Precedence already specified for terminal '%s'" % term) - if assoc not in ['left','right','nonassoc']: + raise GrammarError('Precedence already specified for terminal %r' % term) + if assoc not in ['left', 'right', 'nonassoc']: raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'") - self.Precedence[term] = (assoc,level) - + self.Precedence[term] = (assoc, level) + # ----------------------------------------------------------------------------- # add_production() # @@ -1387,72 +828,74 @@ class Grammar(object): # are valid and that %prec is used correctly. # ----------------------------------------------------------------------------- - def add_production(self,prodname,syms,func=None,file='',line=0): + def add_production(self, prodname, syms, func=None, file='', line=0): if prodname in self.Terminals: - raise GrammarError("%s:%d: Illegal rule name '%s'. Already defined as a token" % (file,line,prodname)) + raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname)) if prodname == 'error': - raise GrammarError("%s:%d: Illegal rule name '%s'. error is a reserved word" % (file,line,prodname)) + raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname)) if not _is_identifier.match(prodname): - raise GrammarError("%s:%d: Illegal rule name '%s'" % (file,line,prodname)) + raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname)) - # Look for literal tokens - for n,s in enumerate(syms): + # Look for literal tokens + for n, s in enumerate(syms): if s[0] in "'\"": - try: - c = eval(s) - if (len(c) > 1): - raise GrammarError("%s:%d: Literal token %s in rule '%s' may only be a single character" % (file,line,s, prodname)) - if not c in self.Terminals: - self.Terminals[c] = [] - syms[n] = c - continue - except SyntaxError: - pass + try: + c = eval(s) + if (len(c) > 1): + raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' % + (file, line, s, prodname)) + if c not in self.Terminals: + self.Terminals[c] = [] + syms[n] = c + continue + except SyntaxError: + pass if not _is_identifier.match(s) and s != '%prec': - raise GrammarError("%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname)) - + raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname)) + # Determine the precedence level if '%prec' in syms: if syms[-1] == '%prec': - raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line)) + raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line)) if syms[-2] != '%prec': - raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line)) + raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' % + (file, line)) precname = syms[-1] - prodprec = self.Precedence.get(precname,None) + prodprec = self.Precedence.get(precname) if not prodprec: - raise GrammarError("%s:%d: Nothing known about the precedence of '%s'" % (file,line,precname)) + raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname)) else: - self.UsedPrecedence[precname] = 1 + self.UsedPrecedence.add(precname) del syms[-2:] # Drop %prec from the rule else: # If no %prec, precedence is determined by the rightmost terminal symbol - precname = rightmost_terminal(syms,self.Terminals) - prodprec = self.Precedence.get(precname,('right',0)) - + precname = rightmost_terminal(syms, self.Terminals) + prodprec = self.Precedence.get(precname, ('right', 0)) + # See if the rule is already in the rulemap - map = "%s -> %s" % (prodname,syms) + map = '%s -> %s' % (prodname, syms) if map in self.Prodmap: m = self.Prodmap[map] - raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) + - "Previous definition at %s:%d" % (m.file, m.line)) + raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) + + 'Previous definition at %s:%d' % (m.file, m.line)) # From this point on, everything is valid. Create a new Production instance pnumber = len(self.Productions) - if not prodname in self.Nonterminals: - self.Nonterminals[prodname] = [ ] + if prodname not in self.Nonterminals: + self.Nonterminals[prodname] = [] # Add the production number to Terminals and Nonterminals for t in syms: if t in self.Terminals: self.Terminals[t].append(pnumber) else: - if not t in self.Nonterminals: - self.Nonterminals[t] = [ ] + if t not in self.Nonterminals: + self.Nonterminals[t] = [] self.Nonterminals[t].append(pnumber) # Create a production and add it to the list of productions - p = Production(pnumber,prodname,syms,prodprec,func,file,line) + p = Production(pnumber, prodname, syms, prodprec, func, file, line) self.Productions.append(p) self.Prodmap[map] = p @@ -1460,22 +903,21 @@ class Grammar(object): try: self.Prodnames[prodname].append(p) except KeyError: - self.Prodnames[prodname] = [ p ] - return 0 + self.Prodnames[prodname] = [p] # ----------------------------------------------------------------------------- # set_start() # - # Sets the starting symbol and creates the augmented grammar. Production + # Sets the starting symbol and creates the augmented grammar. Production # rule 0 is S' -> start where start is the start symbol. # ----------------------------------------------------------------------------- - def set_start(self,start=None): + def set_start(self, start=None): if not start: start = self.Productions[1].name if start not in self.Nonterminals: - raise GrammarError("start symbol %s undefined" % start) - self.Productions[0] = Production(0,"S'",[start]) + raise GrammarError('start symbol %s undefined' % start) + self.Productions[0] = Production(0, "S'", [start]) self.Nonterminals[start].append(0) self.Start = start @@ -1487,26 +929,20 @@ class Grammar(object): # ----------------------------------------------------------------------------- def find_unreachable(self): - + # Mark all symbols that are reachable from a symbol s def mark_reachable_from(s): - if reachable[s]: - # We've already reached symbol s. + if s in reachable: return - reachable[s] = 1 - for p in self.Prodnames.get(s,[]): + reachable.add(s) + for p in self.Prodnames.get(s, []): for r in p.prod: mark_reachable_from(r) - reachable = { } - for s in list(self.Terminals) + list(self.Nonterminals): - reachable[s] = 0 + reachable = set() + mark_reachable_from(self.Productions[0].prod[0]) + return [s for s in self.Nonterminals if s not in reachable] - mark_reachable_from( self.Productions[0].prod[0] ) - - return [s for s in list(self.Nonterminals) - if not reachable[s]] - # ----------------------------------------------------------------------------- # infinite_cycles() # @@ -1520,20 +956,20 @@ class Grammar(object): # Terminals: for t in self.Terminals: - terminates[t] = 1 + terminates[t] = True - terminates['$end'] = 1 + terminates['$end'] = True # Nonterminals: # Initialize to false: for n in self.Nonterminals: - terminates[n] = 0 + terminates[n] = False # Then propagate termination until no change: - while 1: - some_change = 0 - for (n,pl) in self.Prodnames.items(): + while True: + some_change = False + for (n, pl) in self.Prodnames.items(): # Nonterminal n terminates iff any of its productions terminates. for p in pl: # Production p terminates iff all of its rhs symbols terminate. @@ -1541,19 +977,19 @@ class Grammar(object): if not terminates[s]: # The symbol s does not terminate, # so production p does not terminate. - p_terminates = 0 + p_terminates = False break else: # didn't break from the loop, # so every symbol s terminates # so production p terminates. - p_terminates = 1 + p_terminates = True if p_terminates: # symbol n terminates! if not terminates[n]: - terminates[n] = 1 - some_change = 1 + terminates[n] = True + some_change = True # Don't need to consider any more productions for this n. break @@ -1561,9 +997,9 @@ class Grammar(object): break infinite = [] - for (s,term) in terminates.items(): + for (s, term) in terminates.items(): if not term: - if not s in self.Prodnames and not s in self.Terminals and s != 'error': + if s not in self.Prodnames and s not in self.Terminals and s != 'error': # s is used-but-not-defined, and we've already warned of that, # so it would be overkill to say that it's also non-terminating. pass @@ -1572,22 +1008,22 @@ class Grammar(object): return infinite - # ----------------------------------------------------------------------------- # undefined_symbols() # # Find all symbols that were used the grammar, but not defined as tokens or # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol - # and prod is the production where the symbol was used. + # and prod is the production where the symbol was used. # ----------------------------------------------------------------------------- def undefined_symbols(self): result = [] for p in self.Productions: - if not p: continue + if not p: + continue for s in p.prod: - if not s in self.Prodnames and not s in self.Terminals and s != 'error': - result.append((s,p)) + if s not in self.Prodnames and s not in self.Terminals and s != 'error': + result.append((s, p)) return result # ----------------------------------------------------------------------------- @@ -1598,7 +1034,7 @@ class Grammar(object): # ----------------------------------------------------------------------------- def unused_terminals(self): unused_tok = [] - for s,v in self.Terminals.items(): + for s, v in self.Terminals.items(): if s != 'error' and not v: unused_tok.append(s) @@ -1613,7 +1049,7 @@ class Grammar(object): def unused_rules(self): unused_prod = [] - for s,v in self.Nonterminals.items(): + for s, v in self.Nonterminals.items(): if not v: p = self.Prodnames[s][0] unused_prod.append(p) @@ -1625,15 +1061,15 @@ class Grammar(object): # Returns a list of tuples (term,precedence) corresponding to precedence # rules that were never used by the grammar. term is the name of the terminal # on which precedence was applied and precedence is a string such as 'left' or - # 'right' corresponding to the type of precedence. + # 'right' corresponding to the type of precedence. # ----------------------------------------------------------------------------- def unused_precedence(self): unused = [] for termname in self.Precedence: if not (termname in self.Terminals or termname in self.UsedPrecedence): - unused.append((termname,self.Precedence[termname][0])) - + unused.append((termname, self.Precedence[termname][0])) + return unused # ------------------------------------------------------------------------- @@ -1644,19 +1080,20 @@ class Grammar(object): # During execution of compute_first1, the result may be incomplete. # Afterward (e.g., when called from compute_follow()), it will be complete. # ------------------------------------------------------------------------- - def _first(self,beta): + def _first(self, beta): # We are computing First(x1,x2,x3,...,xn) - result = [ ] + result = [] for x in beta: - x_produces_empty = 0 + x_produces_empty = False # Add all the non-<empty> symbols of First[x] to the result. for f in self.First[x]: if f == '<empty>': - x_produces_empty = 1 + x_produces_empty = True else: - if f not in result: result.append(f) + if f not in result: + result.append(f) if x_produces_empty: # We have to consider the next x in beta, @@ -1695,17 +1132,17 @@ class Grammar(object): self.First[n] = [] # Then propagate symbols until no change: - while 1: - some_change = 0 + while True: + some_change = False for n in self.Nonterminals: for p in self.Prodnames[n]: for f in self._first(p.prod): if f not in self.First[n]: - self.First[n].append( f ) - some_change = 1 + self.First[n].append(f) + some_change = True if not some_change: break - + return self.First # --------------------------------------------------------------------- @@ -1715,7 +1152,7 @@ class Grammar(object): # follow set is the set of all symbols that might follow a given # non-terminal. See the Dragon book, 2nd Ed. p. 189. # --------------------------------------------------------------------- - def compute_follow(self,start=None): + def compute_follow(self, start=None): # If already computed, return the result if self.Follow: return self.Follow @@ -1726,36 +1163,36 @@ class Grammar(object): # Add '$end' to the follow list of the start symbol for k in self.Nonterminals: - self.Follow[k] = [ ] + self.Follow[k] = [] if not start: start = self.Productions[1].name - self.Follow[start] = [ '$end' ] + self.Follow[start] = ['$end'] - while 1: - didadd = 0 + while True: + didadd = False for p in self.Productions[1:]: # Here is the production set - for i in range(len(p.prod)): - B = p.prod[i] + for i, B in enumerate(p.prod): if B in self.Nonterminals: # Okay. We got a non-terminal in a production fst = self._first(p.prod[i+1:]) - hasempty = 0 + hasempty = False for f in fst: if f != '<empty>' and f not in self.Follow[B]: self.Follow[B].append(f) - didadd = 1 + didadd = True if f == '<empty>': - hasempty = 1 + hasempty = True if hasempty or i == (len(p.prod)-1): # Add elements of follow(a) to follow(b) for f in self.Follow[p.name]: if f not in self.Follow[B]: self.Follow[B].append(f) - didadd = 1 - if not didadd: break + didadd = True + if not didadd: + break return self.Follow @@ -1779,15 +1216,15 @@ class Grammar(object): lastlri = p i = 0 lr_items = [] - while 1: + while True: if i > len(p): lri = None else: - lri = LRItem(p,i) + lri = LRItem(p, i) # Precompute the list of productions immediately following try: lri.lr_after = self.Prodnames[lri.prod[i+1]] - except (IndexError,KeyError): + except (IndexError, KeyError): lri.lr_after = [] try: lri.lr_before = lri.prod[i-1] @@ -1795,86 +1232,17 @@ class Grammar(object): lri.lr_before = None lastlri.lr_next = lri - if not lri: break + if not lri: + break lr_items.append(lri) lastlri = lri i += 1 p.lr_items = lr_items # ----------------------------------------------------------------------------- -# == Class LRTable == -# -# This basic class represents a basic table of LR parsing information. -# Methods for generating the tables are not defined here. They are defined -# in the derived class LRGeneratedTable. -# ----------------------------------------------------------------------------- - -class VersionError(YaccError): pass - -class LRTable(object): - def __init__(self): - self.lr_action = None - self.lr_goto = None - self.lr_productions = None - self.lr_method = None - - def read_table(self,module): - if isinstance(module,types.ModuleType): - parsetab = module - else: - if sys.version_info[0] < 3: - exec("import %s as parsetab" % module) - else: - env = { } - exec("import %s as parsetab" % module, env, env) - parsetab = env['parsetab'] - - if parsetab._tabversion != __tabversion__: - raise VersionError("yacc table file version is out of date") - - self.lr_action = parsetab._lr_action - self.lr_goto = parsetab._lr_goto - - self.lr_productions = [] - for p in parsetab._lr_productions: - self.lr_productions.append(MiniProduction(*p)) - - self.lr_method = parsetab._lr_method - return parsetab._lr_signature - - def read_pickle(self,filename): - try: - import cPickle as pickle - except ImportError: - import pickle - - in_f = open(filename,"rb") - - tabversion = pickle.load(in_f) - if tabversion != __tabversion__: - raise VersionError("yacc table file version is out of date") - self.lr_method = pickle.load(in_f) - signature = pickle.load(in_f) - self.lr_action = pickle.load(in_f) - self.lr_goto = pickle.load(in_f) - productions = pickle.load(in_f) - - self.lr_productions = [] - for p in productions: - self.lr_productions.append(MiniProduction(*p)) - - in_f.close() - return signature - - # Bind all production function names to callable objects in pdict - def bind_callables(self,pdict): - for p in self.lr_productions: - p.bind(pdict) - -# ----------------------------------------------------------------------------- # === LR Generator === # -# The following classes and functions are used to generate LR parsing tables on +# The following classes and functions are used to generate LR parsing tables on # a grammar. # ----------------------------------------------------------------------------- @@ -1895,17 +1263,18 @@ class LRTable(object): # FP - Set-valued function # ------------------------------------------------------------------------------ -def digraph(X,R,FP): - N = { } +def digraph(X, R, FP): + N = {} for x in X: - N[x] = 0 + N[x] = 0 stack = [] - F = { } + F = {} for x in X: - if N[x] == 0: traverse(x,N,stack,F,X,R,FP) + if N[x] == 0: + traverse(x, N, stack, F, X, R, FP) return F -def traverse(x,N,stack,F,X,R,FP): +def traverse(x, N, stack, F, X, R, FP): stack.append(x) d = len(stack) N[x] = d @@ -1914,35 +1283,34 @@ def traverse(x,N,stack,F,X,R,FP): rel = R(x) # Get y's related to x for y in rel: if N[y] == 0: - traverse(y,N,stack,F,X,R,FP) - N[x] = min(N[x],N[y]) - for a in F.get(y,[]): - if a not in F[x]: F[x].append(a) + traverse(y, N, stack, F, X, R, FP) + N[x] = min(N[x], N[y]) + for a in F.get(y, []): + if a not in F[x]: + F[x].append(a) if N[x] == d: - N[stack[-1]] = MAXINT - F[stack[-1]] = F[x] - element = stack.pop() - while element != x: - N[stack[-1]] = MAXINT - F[stack[-1]] = F[x] - element = stack.pop() + N[stack[-1]] = MAXINT + F[stack[-1]] = F[x] + element = stack.pop() + while element != x: + N[stack[-1]] = MAXINT + F[stack[-1]] = F[x] + element = stack.pop() + +class LALRError(YaccError): + pass -class LALRError(YaccError): pass # ----------------------------------------------------------------------------- -# == LRGeneratedTable == +# == LRTable == # # This class implements the LR table generation algorithm. There are no -# public methods except for write() +# public methods. # ----------------------------------------------------------------------------- -class LRGeneratedTable(LRTable): - def __init__(self,grammar,method='LALR',log=None): - if method not in ['SLR','LALR']: - raise LALRError("Unsupported method %s" % method) - +class LRTable: + def __init__(self, grammar, log=None): self.grammar = grammar - self.lr_method = method # Set up the logger if not log: @@ -1958,7 +1326,7 @@ class LRGeneratedTable(LRTable): self._add_count = 0 # Internal counter used to detect cycles - # Diagonistic information filled in by the table generator + # Diagnostic information filled in by the table generator self.sr_conflict = 0 self.rr_conflict = 0 self.conflicts = [] # List of conflicts @@ -1972,23 +1340,29 @@ class LRGeneratedTable(LRTable): self.grammar.compute_follow() self.lr_parse_table() + # Bind all production function names to callable objects in pdict + def bind_callables(self, pdict): + for p in self.lr_productions: + p.bind(pdict) + # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. - def lr0_closure(self,I): + def lr0_closure(self, I): self._add_count += 1 # Add everything in I to J J = I[:] - didadd = 1 + didadd = True while didadd: - didadd = 0 + didadd = False for j in J: for x in j.lr_after: - if getattr(x,"lr0_added",0) == self._add_count: continue + if getattr(x, 'lr0_added', 0) == self._add_count: + continue # Add B --> .G to J J.append(x.lr_next) x.lr0_added = self._add_count - didadd = 1 + didadd = True return J @@ -1999,43 +1373,43 @@ class LRGeneratedTable(LRTable): # objects). With uniqueness, we can later do fast set comparisons using # id(obj) instead of element-wise comparison. - def lr0_goto(self,I,x): + def lr0_goto(self, I, x): # First we look for a previously cached entry - g = self.lr_goto_cache.get((id(I),x),None) - if g: return g + g = self.lr_goto_cache.get((id(I), x)) + if g: + return g # Now we generate the goto set in a way that guarantees uniqueness # of the result - s = self.lr_goto_cache.get(x,None) + s = self.lr_goto_cache.get(x) if not s: - s = { } + s = {} self.lr_goto_cache[x] = s - gs = [ ] + gs = [] for p in I: n = p.lr_next if n and n.lr_before == x: - s1 = s.get(id(n),None) + s1 = s.get(id(n)) if not s1: - s1 = { } + s1 = {} s[id(n)] = s1 gs.append(n) s = s1 - g = s.get('$end',None) + g = s.get('$end') if not g: if gs: g = self.lr0_closure(gs) s['$end'] = g else: s['$end'] = gs - self.lr_goto_cache[(id(I),x)] = g + self.lr_goto_cache[(id(I), x)] = g return g # Compute the LR(0) sets of item function def lr0_items(self): - - C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ] + C = [self.lr0_closure([self.grammar.Productions[0].lr_next])] i = 0 for I in C: self.lr0_cidhash[id(I)] = i @@ -2048,15 +1422,15 @@ class LRGeneratedTable(LRTable): i += 1 # Collect all of the symbols that could possibly be in the goto(I,X) sets - asyms = { } + asyms = {} for ii in I: for s in ii.usyms: asyms[s] = None for x in asyms: - g = self.lr0_goto(I,x) - if not g: continue - if id(g) in self.lr0_cidhash: continue + g = self.lr0_goto(I, x) + if not g or id(g) in self.lr0_cidhash: + continue self.lr0_cidhash[id(g)] = len(C) C.append(g) @@ -2091,19 +1465,21 @@ class LRGeneratedTable(LRTable): # ----------------------------------------------------------------------------- def compute_nullable_nonterminals(self): - nullable = {} + nullable = set() num_nullable = 0 - while 1: - for p in self.grammar.Productions[1:]: - if p.len == 0: - nullable[p.name] = 1 + while True: + for p in self.grammar.Productions[1:]: + if p.len == 0: + nullable.add(p.name) continue - for t in p.prod: - if not t in nullable: break - else: - nullable[p.name] = 1 - if len(nullable) == num_nullable: break - num_nullable = len(nullable) + for t in p.prod: + if t not in nullable: + break + else: + nullable.add(p.name) + if len(nullable) == num_nullable: + break + num_nullable = len(nullable) return nullable # ----------------------------------------------------------------------------- @@ -2117,16 +1493,16 @@ class LRGeneratedTable(LRTable): # The input C is the set of LR(0) items. # ----------------------------------------------------------------------------- - def find_nonterminal_transitions(self,C): - trans = [] - for state in range(len(C)): - for p in C[state]: - if p.lr_index < p.len - 1: - t = (state,p.prod[p.lr_index+1]) - if t[1] in self.grammar.Nonterminals: - if t not in trans: trans.append(t) - state = state + 1 - return trans + def find_nonterminal_transitions(self, C): + trans = [] + for stateno, state in enumerate(C): + for p in state: + if p.lr_index < p.len - 1: + t = (stateno, p.prod[p.lr_index+1]) + if t[1] in self.grammar.Nonterminals: + if t not in trans: + trans.append(t) + return trans # ----------------------------------------------------------------------------- # dr_relation() @@ -2137,21 +1513,21 @@ class LRGeneratedTable(LRTable): # Returns a list of terminals. # ----------------------------------------------------------------------------- - def dr_relation(self,C,trans,nullable): - dr_set = { } - state,N = trans + def dr_relation(self, C, trans, nullable): + state, N = trans terms = [] - g = self.lr0_goto(C[state],N) + g = self.lr0_goto(C[state], N) for p in g: - if p.lr_index < p.len - 1: - a = p.prod[p.lr_index+1] - if a in self.grammar.Terminals: - if a not in terms: terms.append(a) + if p.lr_index < p.len - 1: + a = p.prod[p.lr_index+1] + if a in self.grammar.Terminals: + if a not in terms: + terms.append(a) # This extra bit is to handle the start state if state == 0 and N == self.grammar.Productions[0].prod[0]: - terms.append('$end') + terms.append('$end') return terms @@ -2161,18 +1537,18 @@ class LRGeneratedTable(LRTable): # Computes the READS() relation (p,A) READS (t,C). # ----------------------------------------------------------------------------- - def reads_relation(self,C, trans, empty): + def reads_relation(self, C, trans, empty): # Look for empty transitions rel = [] state, N = trans - g = self.lr0_goto(C[state],N) - j = self.lr0_cidhash.get(id(g),-1) + g = self.lr0_goto(C[state], N) + j = self.lr0_cidhash.get(id(g), -1) for p in g: if p.lr_index < p.len - 1: - a = p.prod[p.lr_index + 1] - if a in empty: - rel.append((j,a)) + a = p.prod[p.lr_index + 1] + if a in empty: + rel.append((j, a)) return rel @@ -2204,8 +1580,7 @@ class LRGeneratedTable(LRTable): # # ----------------------------------------------------------------------------- - def compute_lookback_includes(self,C,trans,nullable): - + def compute_lookback_includes(self, C, trans, nullable): lookdict = {} # Dictionary of lookback relations includedict = {} # Dictionary of include relations @@ -2215,11 +1590,12 @@ class LRGeneratedTable(LRTable): dtrans[t] = 1 # Loop over all transitions and compute lookbacks and includes - for state,N in trans: + for state, N in trans: lookb = [] includes = [] for p in C[state]: - if p.name != N: continue + if p.name != N: + continue # Okay, we have a name match. We now follow the production all the way # through the state machine until we get the . on the right hand side @@ -2227,44 +1603,50 @@ class LRGeneratedTable(LRTable): lr_index = p.lr_index j = state while lr_index < p.len - 1: - lr_index = lr_index + 1 - t = p.prod[lr_index] - - # Check to see if this symbol and state are a non-terminal transition - if (j,t) in dtrans: - # Yes. Okay, there is some chance that this is an includes relation - # the only way to know for certain is whether the rest of the - # production derives empty - - li = lr_index + 1 - while li < p.len: - if p.prod[li] in self.grammar.Terminals: break # No forget it - if not p.prod[li] in nullable: break - li = li + 1 - else: - # Appears to be a relation between (j,t) and (state,N) - includes.append((j,t)) - - g = self.lr0_goto(C[j],t) # Go to next set - j = self.lr0_cidhash.get(id(g),-1) # Go to next state + lr_index = lr_index + 1 + t = p.prod[lr_index] + + # Check to see if this symbol and state are a non-terminal transition + if (j, t) in dtrans: + # Yes. Okay, there is some chance that this is an includes relation + # the only way to know for certain is whether the rest of the + # production derives empty + + li = lr_index + 1 + while li < p.len: + if p.prod[li] in self.grammar.Terminals: + break # No forget it + if p.prod[li] not in nullable: + break + li = li + 1 + else: + # Appears to be a relation between (j,t) and (state,N) + includes.append((j, t)) + + g = self.lr0_goto(C[j], t) # Go to next set + j = self.lr0_cidhash.get(id(g), -1) # Go to next state # When we get here, j is the final state, now we have to locate the production for r in C[j]: - if r.name != p.name: continue - if r.len != p.len: continue - i = 0 - # This look is comparing a production ". A B C" with "A B C ." - while i < r.lr_index: - if r.prod[i] != p.prod[i+1]: break - i = i + 1 - else: - lookb.append((j,r)) + if r.name != p.name: + continue + if r.len != p.len: + continue + i = 0 + # This look is comparing a production ". A B C" with "A B C ." + while i < r.lr_index: + if r.prod[i] != p.prod[i+1]: + break + i = i + 1 + else: + lookb.append((j, r)) for i in includes: - if not i in includedict: includedict[i] = [] - includedict[i].append((state,N)) - lookdict[(state,N)] = lookb + if i not in includedict: + includedict[i] = [] + includedict[i].append((state, N)) + lookdict[(state, N)] = lookb - return lookdict,includedict + return lookdict, includedict # ----------------------------------------------------------------------------- # compute_read_sets() @@ -2278,10 +1660,10 @@ class LRGeneratedTable(LRTable): # Returns a set containing the read sets # ----------------------------------------------------------------------------- - def compute_read_sets(self,C, ntrans, nullable): - FP = lambda x: self.dr_relation(C,x,nullable) - R = lambda x: self.reads_relation(C,x,nullable) - F = digraph(ntrans,R,FP) + def compute_read_sets(self, C, ntrans, nullable): + FP = lambda x: self.dr_relation(C, x, nullable) + R = lambda x: self.reads_relation(C, x, nullable) + F = digraph(ntrans, R, FP) return F # ----------------------------------------------------------------------------- @@ -2300,11 +1682,11 @@ class LRGeneratedTable(LRTable): # Returns a set containing the follow sets # ----------------------------------------------------------------------------- - def compute_follow_sets(self,ntrans,readsets,inclsets): - FP = lambda x: readsets[x] - R = lambda x: inclsets.get(x,[]) - F = digraph(ntrans,R,FP) - return F + def compute_follow_sets(self, ntrans, readsets, inclsets): + FP = lambda x: readsets[x] + R = lambda x: inclsets.get(x, []) + F = digraph(ntrans, R, FP) + return F # ----------------------------------------------------------------------------- # add_lookaheads() @@ -2318,15 +1700,16 @@ class LRGeneratedTable(LRTable): # in the lookbacks set # ----------------------------------------------------------------------------- - def add_lookaheads(self,lookbacks,followset): - for trans,lb in lookbacks.items(): + def add_lookaheads(self, lookbacks, followset): + for trans, lb in lookbacks.items(): # Loop over productions in lookback - for state,p in lb: - if not state in p.lookaheads: - p.lookaheads[state] = [] - f = followset.get(trans,[]) - for a in f: - if a not in p.lookaheads[state]: p.lookaheads[state].append(a) + for state, p in lb: + if state not in p.lookaheads: + p.lookaheads[state] = [] + f = followset.get(trans, []) + for a in f: + if a not in p.lookaheads[state]: + p.lookaheads[state].append(a) # ----------------------------------------------------------------------------- # add_lalr_lookaheads() @@ -2335,7 +1718,7 @@ class LRGeneratedTable(LRTable): # with LALR parsing # ----------------------------------------------------------------------------- - def add_lalr_lookaheads(self,C): + def add_lalr_lookaheads(self, C): # Determine all of the nullable nonterminals nullable = self.compute_nullable_nonterminals() @@ -2343,16 +1726,16 @@ class LRGeneratedTable(LRTable): trans = self.find_nonterminal_transitions(C) # Compute read sets - readsets = self.compute_read_sets(C,trans,nullable) + readsets = self.compute_read_sets(C, trans, nullable) # Compute lookback/includes relations - lookd, included = self.compute_lookback_includes(C,trans,nullable) + lookd, included = self.compute_lookback_includes(C, trans, nullable) # Compute LALR FOLLOW sets - followsets = self.compute_follow_sets(trans,readsets,included) + followsets = self.compute_follow_sets(trans, readsets, included) # Add all of the lookaheads - self.add_lookaheads(lookd,followsets) + self.add_lookaheads(lookd, followsets) # ----------------------------------------------------------------------------- # lr_parse_table() @@ -2366,324 +1749,179 @@ class LRGeneratedTable(LRTable): action = self.lr_action # Action array log = self.log # Logger for output - actionp = { } # Action production array (temporary) - - log.info("Parsing method: %s", self.lr_method) + actionp = {} # Action production array (temporary) # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items # This determines the number of states C = self.lr0_items() - - if self.lr_method == 'LALR': - self.add_lalr_lookaheads(C) + self.add_lalr_lookaheads(C) # Build the parser table, state by state st = 0 for I in C: # Loop over each production in I - actlist = [ ] # List of actions - st_action = { } - st_actionp = { } - st_goto = { } - log.info("") - log.info("state %d", st) - log.info("") + actlist = [] # List of actions + st_action = {} + st_actionp = {} + st_goto = {} + log.info('') + log.info('state %d', st) + log.info('') for p in I: - log.info(" (%d) %s", p.number, str(p)) - log.info("") + log.info(' (%d) %s', p.number, p) + log.info('') for p in I: - if p.len == p.lr_index + 1: - if p.name == "S'": - # Start symbol. Accept! - st_action["$end"] = 0 - st_actionp["$end"] = p - else: - # We are at the end of a production. Reduce! - if self.lr_method == 'LALR': - laheads = p.lookaheads[st] - else: - laheads = self.grammar.Follow[p.name] - for a in laheads: - actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p))) - r = st_action.get(a,None) - if r is not None: - # Whoa. Have a shift/reduce or reduce/reduce conflict - if r > 0: - # Need to decide on shift or reduce here - # By default we favor shifting. Need to add - # some precedence rules here. - sprec,slevel = Productions[st_actionp[a].number].prec - rprec,rlevel = Precedence.get(a,('right',0)) - if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): - # We really need to reduce here. - st_action[a] = -p.number - st_actionp[a] = p - if not slevel and not rlevel: - log.info(" ! shift/reduce conflict for %s resolved as reduce",a) - self.sr_conflicts.append((st,a,'reduce')) - Productions[p.number].reduced += 1 - elif (slevel == rlevel) and (rprec == 'nonassoc'): - st_action[a] = None - else: - # Hmmm. Guess we'll keep the shift - if not rlevel: - log.info(" ! shift/reduce conflict for %s resolved as shift",a) - self.sr_conflicts.append((st,a,'shift')) - elif r < 0: - # Reduce/reduce conflict. In this case, we favor the rule - # that was defined first in the grammar file - oldp = Productions[-r] - pp = Productions[p.number] - if oldp.line > pp.line: - st_action[a] = -p.number - st_actionp[a] = p - chosenp,rejectp = pp,oldp - Productions[p.number].reduced += 1 - Productions[oldp.number].reduced -= 1 - else: - chosenp,rejectp = oldp,pp - self.rr_conflicts.append((st,chosenp,rejectp)) - log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a]) + if p.len == p.lr_index + 1: + if p.name == "S'": + # Start symbol. Accept! + st_action['$end'] = 0 + st_actionp['$end'] = p + else: + # We are at the end of a production. Reduce! + laheads = p.lookaheads[st] + for a in laheads: + actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p))) + r = st_action.get(a) + if r is not None: + # Whoa. Have a shift/reduce or reduce/reduce conflict + if r > 0: + # Need to decide on shift or reduce here + # By default we favor shifting. Need to add + # some precedence rules here. + + # Shift precedence comes from the token + sprec, slevel = Precedence.get(a, ('right', 0)) + + # Reduce precedence comes from rule being reduced (p) + rprec, rlevel = Productions[p.number].prec + + if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): + # We really need to reduce here. + st_action[a] = -p.number + st_actionp[a] = p + if not slevel and not rlevel: + log.info(' ! shift/reduce conflict for %s resolved as reduce', a) + self.sr_conflicts.append((st, a, 'reduce')) + Productions[p.number].reduced += 1 + elif (slevel == rlevel) and (rprec == 'nonassoc'): + st_action[a] = None + else: + # Hmmm. Guess we'll keep the shift + if not rlevel: + log.info(' ! shift/reduce conflict for %s resolved as shift', a) + self.sr_conflicts.append((st, a, 'shift')) + elif r < 0: + # Reduce/reduce conflict. In this case, we favor the rule + # that was defined first in the grammar file + oldp = Productions[-r] + pp = Productions[p.number] + if oldp.line > pp.line: + st_action[a] = -p.number + st_actionp[a] = p + chosenp, rejectp = pp, oldp + Productions[p.number].reduced += 1 + Productions[oldp.number].reduced -= 1 else: - raise LALRError("Unknown conflict in state %d" % st) + chosenp, rejectp = oldp, pp + self.rr_conflicts.append((st, chosenp, rejectp)) + log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)', + a, st_actionp[a].number, st_actionp[a]) else: - st_action[a] = -p.number - st_actionp[a] = p - Productions[p.number].reduced += 1 - else: - i = p.lr_index - a = p.prod[i+1] # Get symbol right after the "." - if a in self.grammar.Terminals: - g = self.lr0_goto(I,a) - j = self.lr0_cidhash.get(id(g),-1) - if j >= 0: - # We are in a shift state - actlist.append((a,p,"shift and go to state %d" % j)) - r = st_action.get(a,None) - if r is not None: - # Whoa have a shift/reduce or shift/shift conflict - if r > 0: - if r != j: - raise LALRError("Shift/shift conflict in state %d" % st) - elif r < 0: - # Do a precedence check. - # - if precedence of reduce rule is higher, we reduce. - # - if precedence of reduce is same and left assoc, we reduce. - # - otherwise we shift - rprec,rlevel = Productions[st_actionp[a].number].prec - sprec,slevel = Precedence.get(a,('right',0)) - if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): - # We decide to shift here... highest precedence to shift - Productions[st_actionp[a].number].reduced -= 1 - st_action[a] = j - st_actionp[a] = p - if not rlevel: - log.info(" ! shift/reduce conflict for %s resolved as shift",a) - self.sr_conflicts.append((st,a,'shift')) - elif (slevel == rlevel) and (rprec == 'nonassoc'): - st_action[a] = None - else: - # Hmmm. Guess we'll keep the reduce - if not slevel and not rlevel: - log.info(" ! shift/reduce conflict for %s resolved as reduce",a) - self.sr_conflicts.append((st,a,'reduce')) - + raise LALRError('Unknown conflict in state %d' % st) + else: + st_action[a] = -p.number + st_actionp[a] = p + Productions[p.number].reduced += 1 + else: + i = p.lr_index + a = p.prod[i+1] # Get symbol right after the "." + if a in self.grammar.Terminals: + g = self.lr0_goto(I, a) + j = self.lr0_cidhash.get(id(g), -1) + if j >= 0: + # We are in a shift state + actlist.append((a, p, 'shift and go to state %d' % j)) + r = st_action.get(a) + if r is not None: + # Whoa have a shift/reduce or shift/shift conflict + if r > 0: + if r != j: + raise LALRError('Shift/shift conflict in state %d' % st) + elif r < 0: + # Do a precedence check. + # - if precedence of reduce rule is higher, we reduce. + # - if precedence of reduce is same and left assoc, we reduce. + # - otherwise we shift + + # Shift precedence comes from the token + sprec, slevel = Precedence.get(a, ('right', 0)) + + # Reduce precedence comes from the rule that could have been reduced + rprec, rlevel = Productions[st_actionp[a].number].prec + + if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): + # We decide to shift here... highest precedence to shift + Productions[st_actionp[a].number].reduced -= 1 + st_action[a] = j + st_actionp[a] = p + if not rlevel: + log.info(' ! shift/reduce conflict for %s resolved as shift', a) + self.sr_conflicts.append((st, a, 'shift')) + elif (slevel == rlevel) and (rprec == 'nonassoc'): + st_action[a] = None else: - raise LALRError("Unknown conflict in state %d" % st) + # Hmmm. Guess we'll keep the reduce + if not slevel and not rlevel: + log.info(' ! shift/reduce conflict for %s resolved as reduce', a) + self.sr_conflicts.append((st, a, 'reduce')) + else: - st_action[a] = j - st_actionp[a] = p + raise LALRError('Unknown conflict in state %d' % st) + else: + st_action[a] = j + st_actionp[a] = p # Print the actions associated with each terminal - _actprint = { } - for a,p,m in actlist: + _actprint = {} + for a, p, m in actlist: if a in st_action: if p is st_actionp[a]: - log.info(" %-15s %s",a,m) - _actprint[(a,m)] = 1 - log.info("") + log.info(' %-15s %s', a, m) + _actprint[(a, m)] = 1 + log.info('') # Print the actions that were not used. (debugging) not_used = 0 - for a,p,m in actlist: + for a, p, m in actlist: if a in st_action: if p is not st_actionp[a]: - if not (a,m) in _actprint: - log.debug(" ! %-15s [ %s ]",a,m) + if not (a, m) in _actprint: + log.debug(' ! %-15s [ %s ]', a, m) not_used = 1 - _actprint[(a,m)] = 1 + _actprint[(a, m)] = 1 if not_used: - log.debug("") + log.debug('') # Construct the goto table for this state - nkeys = { } + nkeys = {} for ii in I: for s in ii.usyms: if s in self.grammar.Nonterminals: nkeys[s] = None for n in nkeys: - g = self.lr0_goto(I,n) - j = self.lr0_cidhash.get(id(g),-1) + g = self.lr0_goto(I, n) + j = self.lr0_cidhash.get(id(g), -1) if j >= 0: st_goto[n] = j - log.info(" %-30s shift and go to state %d",n,j) + log.info(' %-30s shift and go to state %d', n, j) action[st] = st_action actionp[st] = st_actionp goto[st] = st_goto st += 1 - - # ----------------------------------------------------------------------------- - # write() - # - # This function writes the LR parsing tables to a file - # ----------------------------------------------------------------------------- - - def write_table(self,modulename,outputdir='',signature=""): - basemodulename = modulename.split(".")[-1] - filename = os.path.join(outputdir,basemodulename) + ".py" - try: - f = open(filename,"w") - - f.write(""" -# %s -# This file is automatically generated. Do not edit. -_tabversion = %r - -_lr_method = %r - -_lr_signature = %r - """ % (filename, __tabversion__, self.lr_method, signature)) - - # Change smaller to 0 to go back to original tables - smaller = 1 - - # Factor out names to try and make smaller - if smaller: - items = { } - - for s,nd in self.lr_action.items(): - for name,v in nd.items(): - i = items.get(name) - if not i: - i = ([],[]) - items[name] = i - i[0].append(s) - i[1].append(v) - - f.write("\n_lr_action_items = {") - for k,v in items.items(): - f.write("%r:([" % k) - for i in v[0]: - f.write("%r," % i) - f.write("],[") - for i in v[1]: - f.write("%r," % i) - - f.write("]),") - f.write("}\n") - - f.write(""" -_lr_action = { } -for _k, _v in _lr_action_items.items(): - for _x,_y in zip(_v[0],_v[1]): - if not _x in _lr_action: _lr_action[_x] = { } - _lr_action[_x][_k] = _y -del _lr_action_items -""") - - else: - f.write("\n_lr_action = { "); - for k,v in self.lr_action.items(): - f.write("(%r,%r):%r," % (k[0],k[1],v)) - f.write("}\n"); - - if smaller: - # Factor out names to try and make smaller - items = { } - - for s,nd in self.lr_goto.items(): - for name,v in nd.items(): - i = items.get(name) - if not i: - i = ([],[]) - items[name] = i - i[0].append(s) - i[1].append(v) - - f.write("\n_lr_goto_items = {") - for k,v in items.items(): - f.write("%r:([" % k) - for i in v[0]: - f.write("%r," % i) - f.write("],[") - for i in v[1]: - f.write("%r," % i) - - f.write("]),") - f.write("}\n") - - f.write(""" -_lr_goto = { } -for _k, _v in _lr_goto_items.items(): - for _x,_y in zip(_v[0],_v[1]): - if not _x in _lr_goto: _lr_goto[_x] = { } - _lr_goto[_x][_k] = _y -del _lr_goto_items -""") - else: - f.write("\n_lr_goto = { "); - for k,v in self.lr_goto.items(): - f.write("(%r,%r):%r," % (k[0],k[1],v)) - f.write("}\n"); - - # Write production table - f.write("_lr_productions = [\n") - for p in self.lr_productions: - if p.func: - f.write(" (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p.func,p.file,p.line)) - else: - f.write(" (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p.len)) - f.write("]\n") - f.close() - - except IOError: - e = sys.exc_info()[1] - sys.stderr.write("Unable to create '%s'\n" % filename) - sys.stderr.write(str(e)+"\n") - return - - - # ----------------------------------------------------------------------------- - # pickle_table() - # - # This function pickles the LR parsing tables to a supplied file object - # ----------------------------------------------------------------------------- - - def pickle_table(self,filename,signature=""): - try: - import cPickle as pickle - except ImportError: - import pickle - outf = open(filename,"wb") - pickle.dump(__tabversion__,outf,pickle_protocol) - pickle.dump(self.lr_method,outf,pickle_protocol) - pickle.dump(signature,outf,pickle_protocol) - pickle.dump(self.lr_action,outf,pickle_protocol) - pickle.dump(self.lr_goto,outf,pickle_protocol) - - outp = [] - for p in self.lr_productions: - if p.func: - outp.append((p.str,p.name, p.len, p.func,p.file,p.line)) - else: - outp.append((str(p),p.name,p.len,None,None,None)) - pickle.dump(outp,outf,pickle_protocol) - outf.close() - # ----------------------------------------------------------------------------- # === INTROSPECTION === # @@ -2700,26 +1938,18 @@ del _lr_goto_items # ----------------------------------------------------------------------------- def get_caller_module_dict(levels): - try: - raise RuntimeError - except RuntimeError: - e,b,t = sys.exc_info() - f = t.tb_frame - while levels > 0: - f = f.f_back - levels -= 1 - ldict = f.f_globals.copy() - if f.f_globals != f.f_locals: - ldict.update(f.f_locals) - - return ldict + f = sys._getframe(levels) + ldict = f.f_globals.copy() + if f.f_globals != f.f_locals: + ldict.update(f.f_locals) + return ldict # ----------------------------------------------------------------------------- # parse_grammar() # # This takes a raw grammar rule string and parses it into production data # ----------------------------------------------------------------------------- -def parse_grammar(doc,file,line): +def parse_grammar(doc, file, line): grammar = [] # Split the doc string into lines pstrings = doc.splitlines() @@ -2728,12 +1958,13 @@ def parse_grammar(doc,file,line): for ps in pstrings: dline += 1 p = ps.split() - if not p: continue + if not p: + continue try: if p[0] == '|': # This is a continuation of a previous rule if not lastp: - raise SyntaxError("%s:%d: Misplaced '|'" % (file,dline)) + raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline)) prodname = lastp syms = p[1:] else: @@ -2742,13 +1973,13 @@ def parse_grammar(doc,file,line): syms = p[2:] assign = p[1] if assign != ':' and assign != '::=': - raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file,dline)) + raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline)) - grammar.append((file,dline,prodname,syms)) + grammar.append((file, dline, prodname, syms)) except SyntaxError: raise except Exception: - raise SyntaxError("%s:%d: Syntax error in rule '%s'" % (file,dline,ps.strip())) + raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip())) return grammar @@ -2760,14 +1991,14 @@ def parse_grammar(doc,file,line): # etc. # ----------------------------------------------------------------------------- class ParserReflect(object): - def __init__(self,pdict,log=None): + def __init__(self, pdict, log=None): self.pdict = pdict self.start = None self.error_func = None self.tokens = None - self.files = {} + self.modules = set() self.grammar = [] - self.error = 0 + self.error = False if log is None: self.log = PlyLogger(sys.stderr) @@ -2781,7 +2012,7 @@ class ParserReflect(object): self.get_tokens() self.get_precedence() self.get_pfunctions() - + # Validate all of the information def validate_all(self): self.validate_start() @@ -2789,32 +2020,28 @@ class ParserReflect(object): self.validate_tokens() self.validate_precedence() self.validate_pfunctions() - self.validate_files() + self.validate_modules() return self.error # Compute a signature over the grammar def signature(self): + parts = [] try: - from hashlib import md5 - except ImportError: - from md5 import md5 - try: - sig = md5() if self.start: - sig.update(self.start.encode('latin-1')) + parts.append(self.start) if self.prec: - sig.update("".join(["".join(p) for p in self.prec]).encode('latin-1')) + parts.append(''.join([''.join(p) for p in self.prec])) if self.tokens: - sig.update(" ".join(self.tokens).encode('latin-1')) + parts.append(' '.join(self.tokens)) for f in self.pfuncs: if f[3]: - sig.update(f[3].encode('latin-1')) - except (TypeError,ValueError): + parts.append(f[3]) + except (TypeError, ValueError): pass - return sig.digest() + return ''.join(parts) # ----------------------------------------------------------------------------- - # validate_file() + # validate_modules() # # This method checks to see if there are duplicated p_rulename() functions # in the parser module file. Without this function, it is really easy for @@ -2824,32 +2051,29 @@ class ParserReflect(object): # to try and detect duplicates. # ----------------------------------------------------------------------------- - def validate_files(self): + def validate_modules(self): # Match def p_funcname( fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') - for filename in self.files.keys(): - base,ext = os.path.splitext(filename) - if ext != '.py': return 1 # No idea. Assume it's okay. - + for module in self.modules: try: - f = open(filename) - lines = f.readlines() - f.close() + lines, linen = inspect.getsourcelines(module) except IOError: continue - counthash = { } - for linen,l in enumerate(lines): + counthash = {} + for linen, line in enumerate(lines): linen += 1 - m = fre.match(l) + m = fre.match(line) if m: name = m.group(1) prev = counthash.get(name) if not prev: counthash[name] = linen else: - self.log.warning("%s:%d: Function %s redefined. Previously defined on line %d", filename,linen,name,prev) + filename = inspect.getsourcefile(module) + self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d', + filename, linen, name, prev) # Get the start symbol def get_start(self): @@ -2858,7 +2082,7 @@ class ParserReflect(object): # Validate the start symbol def validate_start(self): if self.start is not None: - if not isinstance(self.start,str): + if not isinstance(self.start, str): self.log.error("'start' must be a string") # Look for error handler @@ -2868,162 +2092,173 @@ class ParserReflect(object): # Validate the error function def validate_error_func(self): if self.error_func: - if isinstance(self.error_func,types.FunctionType): + if isinstance(self.error_func, types.FunctionType): ismethod = 0 elif isinstance(self.error_func, types.MethodType): ismethod = 1 else: self.log.error("'p_error' defined, but is not a function or method") - self.error = 1 + self.error = True return - eline = func_code(self.error_func).co_firstlineno - efile = func_code(self.error_func).co_filename - self.files[efile] = 1 + eline = self.error_func.__code__.co_firstlineno + efile = self.error_func.__code__.co_filename + module = inspect.getmodule(self.error_func) + self.modules.add(module) - if (func_code(self.error_func).co_argcount != 1+ismethod): - self.log.error("%s:%d: p_error() requires 1 argument",efile,eline) - self.error = 1 + argcount = self.error_func.__code__.co_argcount - ismethod + if argcount != 1: + self.log.error('%s:%d: p_error() requires 1 argument', efile, eline) + self.error = True # Get the tokens map def get_tokens(self): - tokens = self.pdict.get("tokens",None) + tokens = self.pdict.get('tokens') if not tokens: - self.log.error("No token list is defined") - self.error = 1 + self.log.error('No token list is defined') + self.error = True return - if not isinstance(tokens,(list, tuple)): - self.log.error("tokens must be a list or tuple") - self.error = 1 + if not isinstance(tokens, (list, tuple)): + self.log.error('tokens must be a list or tuple') + self.error = True return - + if not tokens: - self.log.error("tokens is empty") - self.error = 1 + self.log.error('tokens is empty') + self.error = True return - self.tokens = tokens + self.tokens = sorted(tokens) # Validate the tokens def validate_tokens(self): # Validate the tokens. if 'error' in self.tokens: self.log.error("Illegal token name 'error'. Is a reserved word") - self.error = 1 + self.error = True return - terminals = {} + terminals = set() for n in self.tokens: if n in terminals: - self.log.warning("Token '%s' multiply defined", n) - terminals[n] = 1 + self.log.warning('Token %r multiply defined', n) + terminals.add(n) # Get the precedence map (if any) def get_precedence(self): - self.prec = self.pdict.get("precedence",None) + self.prec = self.pdict.get('precedence') # Validate and parse the precedence map def validate_precedence(self): preclist = [] if self.prec: - if not isinstance(self.prec,(list,tuple)): - self.log.error("precedence must be a list or tuple") - self.error = 1 + if not isinstance(self.prec, (list, tuple)): + self.log.error('precedence must be a list or tuple') + self.error = True return - for level,p in enumerate(self.prec): - if not isinstance(p,(list,tuple)): - self.log.error("Bad precedence table") - self.error = 1 + for level, p in enumerate(self.prec): + if not isinstance(p, (list, tuple)): + self.log.error('Bad precedence table') + self.error = True return if len(p) < 2: - self.log.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)",p) - self.error = 1 + self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p) + self.error = True return assoc = p[0] - if not isinstance(assoc,str): - self.log.error("precedence associativity must be a string") - self.error = 1 + if not isinstance(assoc, str): + self.log.error('precedence associativity must be a string') + self.error = True return for term in p[1:]: - if not isinstance(term,str): - self.log.error("precedence items must be strings") - self.error = 1 + if not isinstance(term, str): + self.log.error('precedence items must be strings') + self.error = True return - preclist.append((term,assoc,level+1)) + preclist.append((term, assoc, level+1)) self.preclist = preclist # Get all p_functions from the grammar def get_pfunctions(self): p_functions = [] for name, item in self.pdict.items(): - if name[:2] != 'p_': continue - if name == 'p_error': continue - if isinstance(item,(types.FunctionType,types.MethodType)): - line = func_code(item).co_firstlineno - file = func_code(item).co_filename - p_functions.append((line,file,name,item.__doc__)) - - # Sort all of the actions by line number - p_functions.sort() + if not name.startswith('p_') or name == 'p_error': + continue + if isinstance(item, (types.FunctionType, types.MethodType)): + line = getattr(item, 'co_firstlineno', item.__code__.co_firstlineno) + module = inspect.getmodule(item) + p_functions.append((line, module, name, item.__doc__)) + + # Sort all of the actions by line number; make sure to stringify + # modules to make them sortable, since `line` may not uniquely sort all + # p functions + p_functions.sort(key=lambda p_function: ( + p_function[0], + str(p_function[1]), + p_function[2], + p_function[3])) self.pfuncs = p_functions - # Validate all of the p_functions def validate_pfunctions(self): grammar = [] # Check for non-empty symbols if len(self.pfuncs) == 0: - self.log.error("no rules of the form p_rulename are defined") - self.error = 1 - return - - for line, file, name, doc in self.pfuncs: + self.log.error('no rules of the form p_rulename are defined') + self.error = True + return + + for line, module, name, doc in self.pfuncs: + file = inspect.getsourcefile(module) func = self.pdict[name] if isinstance(func, types.MethodType): reqargs = 2 else: reqargs = 1 - if func_code(func).co_argcount > reqargs: - self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,func.__name__) - self.error = 1 - elif func_code(func).co_argcount < reqargs: - self.log.error("%s:%d: Rule '%s' requires an argument",file,line,func.__name__) - self.error = 1 + if func.__code__.co_argcount > reqargs: + self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__) + self.error = True + elif func.__code__.co_argcount < reqargs: + self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__) + self.error = True elif not func.__doc__: - self.log.warning("%s:%d: No documentation string specified in function '%s' (ignored)",file,line,func.__name__) + self.log.warning('%s:%d: No documentation string specified in function %r (ignored)', + file, line, func.__name__) else: try: - parsed_g = parse_grammar(doc,file,line) + parsed_g = parse_grammar(doc, file, line) for g in parsed_g: grammar.append((name, g)) - except SyntaxError: - e = sys.exc_info()[1] + except SyntaxError as e: self.log.error(str(e)) - self.error = 1 + self.error = True # Looks like a valid grammar rule # Mark the file in which defined. - self.files[file] = 1 + self.modules.add(module) # Secondary validation step that looks for p_ definitions that are not functions # or functions that look like they might be grammar rules. - for n,v in self.pdict.items(): - if n[0:2] == 'p_' and isinstance(v, (types.FunctionType, types.MethodType)): continue - if n[0:2] == 't_': continue - if n[0:2] == 'p_' and n != 'p_error': - self.log.warning("'%s' not defined as a function", n) - if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount == 1) or - (isinstance(v,types.MethodType) and func_code(v).co_argcount == 2)): - try: - doc = v.__doc__.split(" ") - if doc[1] == ':': - self.log.warning("%s:%d: Possible grammar rule '%s' defined without p_ prefix", - func_code(v).co_filename, func_code(v).co_firstlineno,n) - except Exception: - pass + for n, v in self.pdict.items(): + if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)): + continue + if n.startswith('t_'): + continue + if n.startswith('p_') and n != 'p_error': + self.log.warning('%r not defined as a function', n) + if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or + (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)): + if v.__doc__: + try: + doc = v.__doc__.split(' ') + if doc[1] == ':': + self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix', + v.__code__.co_filename, v.__code__.co_firstlineno, n) + except IndexError: + pass self.grammar = grammar @@ -3033,76 +2268,61 @@ class ParserReflect(object): # Build a parser # ----------------------------------------------------------------------------- -def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None, - check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,outputdir='', - debuglog=None, errorlog = None, picklefile=None): - - global parse # Reference to the parsing method of the last built parser - - # If pickling is enabled, table files are not created +def yacc(*, debug=yaccdebug, module=None, start=None, + check_recursion=True, optimize=False, debugfile=debug_file, + debuglog=None, errorlog=None): - if picklefile: - write_tables = 0 + # Reference to the parsing method of the last built parser + global parse if errorlog is None: errorlog = PlyLogger(sys.stderr) # Get the module dictionary used for the parser if module: - _items = [(k,getattr(module,k)) for k in dir(module)] + _items = [(k, getattr(module, k)) for k in dir(module)] pdict = dict(_items) + # If no __file__ or __package__ attributes are available, try to obtain them + # from the __module__ instead + if '__file__' not in pdict: + pdict['__file__'] = sys.modules[pdict['__module__']].__file__ + if '__package__' not in pdict and '__module__' in pdict: + if hasattr(sys.modules[pdict['__module__']], '__package__'): + pdict['__package__'] = sys.modules[pdict['__module__']].__package__ else: pdict = get_caller_module_dict(2) + # Set start symbol if it's specified directly using an argument + if start is not None: + pdict['start'] = start + # Collect parser information from the dictionary - pinfo = ParserReflect(pdict,log=errorlog) + pinfo = ParserReflect(pdict, log=errorlog) pinfo.get_all() if pinfo.error: - raise YaccError("Unable to build parser") - - # Check signature against table files (if any) - signature = pinfo.signature() - - # Read the tables - try: - lr = LRTable() - if picklefile: - read_signature = lr.read_pickle(picklefile) - else: - read_signature = lr.read_table(tabmodule) - if optimize or (read_signature == signature): - try: - lr.bind_callables(pinfo.pdict) - parser = LRParser(lr,pinfo.error_func) - parse = parser.parse - return parser - except Exception: - e = sys.exc_info()[1] - errorlog.warning("There was a problem loading the table file: %s", repr(e)) - except VersionError: - e = sys.exc_info() - errorlog.warning(str(e)) - except Exception: - pass + raise YaccError('Unable to build parser') if debuglog is None: if debug: - debuglog = PlyLogger(open(debugfile,"w")) + try: + debuglog = PlyLogger(open(debugfile, 'w')) + except IOError as e: + errorlog.warning("Couldn't open %r. %s" % (debugfile, e)) + debuglog = NullLogger() else: debuglog = NullLogger() - debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__) + debuglog.info('Created by PLY (http://www.dabeaz.com/ply)') - - errors = 0 + errors = False # Validate the parser information if pinfo.validate_all(): - raise YaccError("Unable to build parser") - + raise YaccError('Unable to build parser') + if not pinfo.error_func: - errorlog.warning("no p_error() function is defined") + errorlog.warning('no p_error() function is defined') # Create a grammar object grammar = Grammar(pinfo.tokens) @@ -3110,20 +2330,18 @@ def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, star # Set precedence level for terminals for term, assoc, level in pinfo.preclist: try: - grammar.set_precedence(term,assoc,level) - except GrammarError: - e = sys.exc_info()[1] - errorlog.warning("%s",str(e)) + grammar.set_precedence(term, assoc, level) + except GrammarError as e: + errorlog.warning('%s', e) # Add productions to the grammar for funcname, gram in pinfo.grammar: file, line, prodname, syms = gram try: - grammar.add_production(prodname,syms,funcname,file,line) - except GrammarError: - e = sys.exc_info()[1] - errorlog.error("%s",str(e)) - errors = 1 + grammar.add_production(prodname, syms, funcname, file, line) + except GrammarError as e: + errorlog.error('%s', e) + errors = True # Set the grammar start symbols try: @@ -3131,146 +2349,134 @@ def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, star grammar.set_start(pinfo.start) else: grammar.set_start(start) - except GrammarError: - e = sys.exc_info()[1] + except GrammarError as e: errorlog.error(str(e)) - errors = 1 + errors = True if errors: - raise YaccError("Unable to build parser") + raise YaccError('Unable to build parser') # Verify the grammar structure undefined_symbols = grammar.undefined_symbols() for sym, prod in undefined_symbols: - errorlog.error("%s:%d: Symbol '%s' used, but not defined as a token or a rule",prod.file,prod.line,sym) - errors = 1 + errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym) + errors = True unused_terminals = grammar.unused_terminals() if unused_terminals: - debuglog.info("") - debuglog.info("Unused terminals:") - debuglog.info("") + debuglog.info('') + debuglog.info('Unused terminals:') + debuglog.info('') for term in unused_terminals: - errorlog.warning("Token '%s' defined, but not used", term) - debuglog.info(" %s", term) + errorlog.warning('Token %r defined, but not used', term) + debuglog.info(' %s', term) # Print out all productions to the debug log if debug: - debuglog.info("") - debuglog.info("Grammar") - debuglog.info("") - for n,p in enumerate(grammar.Productions): - debuglog.info("Rule %-5d %s", n, p) + debuglog.info('') + debuglog.info('Grammar') + debuglog.info('') + for n, p in enumerate(grammar.Productions): + debuglog.info('Rule %-5d %s', n, p) # Find unused non-terminals unused_rules = grammar.unused_rules() for prod in unused_rules: - errorlog.warning("%s:%d: Rule '%s' defined, but not used", prod.file, prod.line, prod.name) + errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name) if len(unused_terminals) == 1: - errorlog.warning("There is 1 unused token") + errorlog.warning('There is 1 unused token') if len(unused_terminals) > 1: - errorlog.warning("There are %d unused tokens", len(unused_terminals)) + errorlog.warning('There are %d unused tokens', len(unused_terminals)) if len(unused_rules) == 1: - errorlog.warning("There is 1 unused rule") + errorlog.warning('There is 1 unused rule') if len(unused_rules) > 1: - errorlog.warning("There are %d unused rules", len(unused_rules)) + errorlog.warning('There are %d unused rules', len(unused_rules)) if debug: - debuglog.info("") - debuglog.info("Terminals, with rules where they appear") - debuglog.info("") + debuglog.info('') + debuglog.info('Terminals, with rules where they appear') + debuglog.info('') terms = list(grammar.Terminals) terms.sort() for term in terms: - debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]])) - - debuglog.info("") - debuglog.info("Nonterminals, with rules where they appear") - debuglog.info("") + debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]])) + + debuglog.info('') + debuglog.info('Nonterminals, with rules where they appear') + debuglog.info('') nonterms = list(grammar.Nonterminals) nonterms.sort() for nonterm in nonterms: - debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in grammar.Nonterminals[nonterm]])) - debuglog.info("") + debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]])) + debuglog.info('') if check_recursion: unreachable = grammar.find_unreachable() for u in unreachable: - errorlog.warning("Symbol '%s' is unreachable",u) + errorlog.warning('Symbol %r is unreachable', u) infinite = grammar.infinite_cycles() for inf in infinite: - errorlog.error("Infinite recursion detected for symbol '%s'", inf) - errors = 1 - + errorlog.error('Infinite recursion detected for symbol %r', inf) + errors = True + unused_prec = grammar.unused_precedence() for term, assoc in unused_prec: - errorlog.error("Precedence rule '%s' defined for unknown symbol '%s'", assoc, term) - errors = 1 + errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term) + errors = True if errors: - raise YaccError("Unable to build parser") - - # Run the LRGeneratedTable on the grammar - if debug: - errorlog.debug("Generating %s tables", method) - - lr = LRGeneratedTable(grammar,method,debuglog) + raise YaccError('Unable to build parser') + + # Run the LRTable on the grammar + lr = LRTable(grammar, debuglog) if debug: num_sr = len(lr.sr_conflicts) # Report shift/reduce and reduce/reduce conflicts if num_sr == 1: - errorlog.warning("1 shift/reduce conflict") + errorlog.warning('1 shift/reduce conflict') elif num_sr > 1: - errorlog.warning("%d shift/reduce conflicts", num_sr) + errorlog.warning('%d shift/reduce conflicts', num_sr) num_rr = len(lr.rr_conflicts) if num_rr == 1: - errorlog.warning("1 reduce/reduce conflict") + errorlog.warning('1 reduce/reduce conflict') elif num_rr > 1: - errorlog.warning("%d reduce/reduce conflicts", num_rr) + errorlog.warning('%d reduce/reduce conflicts', num_rr) # Write out conflicts to the output file if debug and (lr.sr_conflicts or lr.rr_conflicts): - debuglog.warning("") - debuglog.warning("Conflicts:") - debuglog.warning("") + debuglog.warning('') + debuglog.warning('Conflicts:') + debuglog.warning('') for state, tok, resolution in lr.sr_conflicts: - debuglog.warning("shift/reduce conflict for %s in state %d resolved as %s", tok, state, resolution) - - already_reported = {} + debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution) + + already_reported = set() for state, rule, rejected in lr.rr_conflicts: - if (state,id(rule),id(rejected)) in already_reported: + if (state, id(rule), id(rejected)) in already_reported: continue - debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) - debuglog.warning("rejected rule (%s) in state %d", rejected,state) - errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule) - errorlog.warning("rejected rule (%s) in state %d", rejected, state) - already_reported[state,id(rule),id(rejected)] = 1 - + debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) + debuglog.warning('rejected rule (%s) in state %d', rejected, state) + errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) + errorlog.warning('rejected rule (%s) in state %d', rejected, state) + already_reported.add((state, id(rule), id(rejected))) + warned_never = [] for state, rule, rejected in lr.rr_conflicts: if not rejected.reduced and (rejected not in warned_never): - debuglog.warning("Rule (%s) is never reduced", rejected) - errorlog.warning("Rule (%s) is never reduced", rejected) + debuglog.warning('Rule (%s) is never reduced', rejected) + errorlog.warning('Rule (%s) is never reduced', rejected) warned_never.append(rejected) - # Write the table file if requested - if write_tables: - lr.write_table(tabmodule,outputdir,signature) - - # Write a pickled version of the tables - if picklefile: - lr.pickle_table(picklefile,signature) - # Build the parser lr.bind_callables(pinfo.pdict) - parser = LRParser(lr,pinfo.error_func) + parser = LRParser(lr, pinfo.error_func) parse = parser.parse return parser diff --git a/components/script/dom/bindings/codegen/run.py b/components/script/dom/bindings/codegen/run.py index 130d35e5268..7f58de15d69 100644 --- a/components/script/dom/bindings/codegen/run.py +++ b/components/script/dom/bindings/codegen/run.py @@ -52,7 +52,7 @@ def main(): module = CGBindingRoot(config, prefix, filename).define() if module: with open(os.path.join(out_dir, prefix + ".rs"), "wb") as f: - f.write(module) + f.write(module.encode("utf-8")) def make_dir(path): @@ -66,7 +66,7 @@ def generate(config, name, filename): root = getattr(GlobalGenRoots, name)(config) code = root.define() with open(filename, "wb") as f: - f.write(code) + f.write(code.encode("utf-8")) def add_css_properties_attributes(css_properties_json, parser): |