././@PaxHeader 0000000 0000000 0000000 00000000034 00000000000 011452 x ustar 00 0000000 0000000 28 mtime=1615502216.5540247 antlr4-python3-runtime-4.9.2/ 0000755 0000766 0000000 00000000000 00000000000 016265 5 ustar 00parrt wheel 0000000 0000000 ././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 011453 x ustar 00 0000000 0000000 22 mtime=1609695176.0 antlr4-python3-runtime-4.9.2/MANIFEST.in 0000644 0000766 0000000 00000000056 00000000000 020024 0 ustar 00parrt wheel 0000000 0000000 include *.txt recursive-include test *.py *.c ././@PaxHeader 0000000 0000000 0000000 00000000033 00000000000 011451 x ustar 00 0000000 0000000 27 mtime=1615502216.553612 antlr4-python3-runtime-4.9.2/PKG-INFO 0000644 0000766 0000000 00000000432 00000000000 017361 0 ustar 00parrt wheel 0000000 0000000 Metadata-Version: 1.0 Name: antlr4-python3-runtime Version: 4.9.2 Summary: ANTLR 4.9.2 runtime for Python 3.7 Home-page: http://www.antlr.org Author: Eric Vergnaud, Terence Parr, Sam Harwell Author-email: eric.vergnaud@wanadoo.fr License: BSD Description: UNKNOWN Platform: UNKNOWN ././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 011453 x ustar 00 0000000 0000000 22 mtime=1606416672.0 antlr4-python3-runtime-4.9.2/README.txt 0000644 0000766 0000000 00000000272 00000000000 017764 0 ustar 00parrt wheel 0000000 0000000 This is the Python 3.4 runtime for ANTLR. Visit the ANTLR web sites for more information: http://www.antlr.org https://raw.githubusercontent.com/antlr/antlr4/master/doc/python-target.md ././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 011453 x ustar 00 0000000 0000000 22 mtime=1606416672.0 antlr4-python3-runtime-4.9.2/RELEASE-4.5.txt 0000644 0000766 0000000 00000000603 00000000000 020411 0 ustar 00parrt wheel 0000000 0000000 What's in this release? - fixed bug where non-ascii input streams would fail - added support for visitor pattern - added support for wildcards in grammar Breaking change: In version 4.4, the parser/lexer had a tokenNames member. This has been removed in favor of the following members: - lexicalNames, containing the parsed text - symbolicNames, corresponding to tokenNames ././@PaxHeader 0000000 0000000 0000000 00000000034 00000000000 011452 x ustar 00 0000000 0000000 28 mtime=1615502216.5323777 antlr4-python3-runtime-4.9.2/bin/ 0000755 0000766 0000000 00000000000 00000000000 017035 5 ustar 00parrt wheel 0000000 0000000 ././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 011453 x ustar 00 0000000 0000000 22 mtime=1606416672.0 antlr4-python3-runtime-4.9.2/bin/pygrun 0000755 0000766 0000000 00000013737 00000000000 020322 0 ustar 00parrt wheel 0000000 0000000 #!python __author__ = 'jszheng' import optparse import sys import os import importlib from antlr4 import * # this is a python version of TestRig def beautify_lisp_string(in_string): indent_size = 3 add_indent = ' '*indent_size out_string = in_string[0] # no indent for 1st ( indent = '' for i in range(1, len(in_string)): if in_string[i] == '(' and in_string[i+1] != ' ': indent += add_indent out_string += "\n" + indent + '(' elif in_string[i] == ')': out_string += ')' if len(indent) > 0: indent = indent.replace(add_indent, '', 1) else: out_string += in_string[i] return out_string if __name__ == '__main__': ############################################################# # parse options # not support -gui -encoding -ps ############################################################# usage = "Usage: %prog [options] Grammar_Name Start_Rule" parser = optparse.OptionParser(usage=usage) # parser.add_option('-t', '--tree', # dest="out_file", # default="default.out", # help='set output file name', # ) parser.add_option('-t', '--tree', default=False, action='store_true', help='Print AST tree' ) parser.add_option('-k', '--tokens', dest="token", default=False, action='store_true', help='Show Tokens' ) parser.add_option('-s', '--sll', dest="sll", default=False, action='store_true', help='Show SLL' ) parser.add_option('-d', '--diagnostics', dest="diagnostics", default=False, action='store_true', help='Enable diagnostics error listener' ) parser.add_option('-a', '--trace', dest="trace", default=False, action='store_true', help='Enable Trace' ) options, remainder = parser.parse_args() if len(remainder) < 2: print('ERROR: You have to provide at least 2 arguments!') parser.print_help() exit(1) else: grammar = remainder.pop(0) start_rule = remainder.pop(0) file_list = remainder ############################################################# # check and load antlr generated files ############################################################# # dynamic load the module and class lexerName = grammar + 'Lexer' parserName = grammar + 'Parser' # check if the generate file exist lexer_file = lexerName + '.py' parser_file = parserName + '.py' if not os.path.exists(lexer_file): print("[ERROR] Can't find lexer file {}!".format(lexer_file)) print(os.path.realpath('.')) exit(1) if not os.path.exists(parser_file): print("[ERROR] Can't find parser file {}!".format(lexer_file)) print(os.path.realpath('.')) exit(1) # current directory is where the generated file loaded # the script might be in different place. sys.path.append('.') # print(sys.path) # print("Load Lexer {}".format(lexerName)) module_lexer = __import__(lexerName, globals(), locals(), lexerName) class_lexer = getattr(module_lexer, lexerName) # print(class_lexer) # print("Load Parser {}".format(parserName)) module_parser = __import__(parserName, globals(), locals(), parserName) class_parser = getattr(module_parser, parserName) # print(class_parser) ############################################################# # main process steps. ############################################################# def process(input_stream, class_lexer, class_parser): lexer = class_lexer(input_stream) token_stream = CommonTokenStream(lexer) token_stream.fill() if options.token: # need to show token for tok in token_stream.tokens: print(tok) if start_rule == 'tokens': return parser = class_parser(token_stream) if options.diagnostics: parser.addErrorListener(DiagnosticErrorListener()) parser._interp.predictionMode = PredictionMode.LL_EXACT_AMBIG_DETECTION if options.tree: parser.buildParseTrees = True if options.sll: parser._interp.predictionMode = PredictionMode.SLL #parser.setTokenStream(token_stream) parser.setTrace(options.trace) if hasattr(parser, start_rule): func_start_rule = getattr(parser, start_rule) parser_ret = func_start_rule() if options.tree: lisp_tree_str = parser_ret.toStringTree(recog=parser) print(beautify_lisp_string(lisp_tree_str)) else: print("[ERROR] Can't find start rule '{}' in parser '{}'".format(start_rule, parserName)) ############################################################# # use stdin if not provide file as input stream ############################################################# if len(file_list) == 0: input_stream = InputStream(sys.stdin.read()) process(input_stream, class_lexer, class_parser) exit(0) ############################################################# # iterate all input file ############################################################# for file_name in file_list: if os.path.exists(file_name) and os.path.isfile(file_name): input_stream = FileStream(file_name) process(input_stream, class_lexer, class_parser) else: print("[ERROR] file {} not exist".format(os.path.normpath(file_name))) ././@PaxHeader 0000000 0000000 0000000 00000000034 00000000000 011452 x ustar 00 0000000 0000000 28 mtime=1615502216.5541408 antlr4-python3-runtime-4.9.2/setup.cfg 0000644 0000766 0000000 00000000046 00000000000 020106 0 ustar 00parrt wheel 0000000 0000000 [egg_info] tag_build = tag_date = 0 ././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 011453 x ustar 00 0000000 0000000 22 mtime=1615500126.0 antlr4-python3-runtime-4.9.2/setup.py 0000644 0000766 0000000 00000001017 00000000000 017776 0 ustar 00parrt wheel 0000000 0000000 from setuptools import setup setup( name='antlr4-python3-runtime', version='4.9.2', packages=['antlr4', 'antlr4.atn', 'antlr4.dfa', 'antlr4.tree', 'antlr4.error', 'antlr4.xpath'], package_dir={'': 'src'}, install_requires=[ "typing ; python_version<'3.5'", ], url='http://www.antlr.org', license='BSD', author='Eric Vergnaud, Terence Parr, Sam Harwell', author_email='eric.vergnaud@wanadoo.fr', scripts=["bin/pygrun"], description='ANTLR 4.9.2 runtime for Python 3.7' ) ././@PaxHeader 0000000 0000000 0000000 00000000034 00000000000 011452 x ustar 00 0000000 0000000 28 mtime=1615502216.5309317 antlr4-python3-runtime-4.9.2/src/ 0000755 0000766 0000000 00000000000 00000000000 017054 5 ustar 00parrt wheel 0000000 0000000 ././@PaxHeader 0000000 0000000 0000000 00000000033 00000000000 011451 x ustar 00 0000000 0000000 27 mtime=1615502216.538621 antlr4-python3-runtime-4.9.2/src/antlr4/ 0000755 0000766 0000000 00000000000 00000000000 020260 5 ustar 00parrt wheel 0000000 0000000 ././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 011453 x ustar 00 0000000 0000000 22 mtime=1609695176.0 antlr4-python3-runtime-4.9.2/src/antlr4/BufferedTokenStream.py 0000644 0000766 0000000 00000024764 00000000000 024546 0 ustar 00parrt wheel 0000000 0000000 # # Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. # Use of this file is governed by the BSD 3-clause license that # can be found in the LICENSE.txt file in the project root. # This implementation of {@link TokenStream} loads tokens from a # {@link TokenSource} on-demand, and places the tokens in a buffer to provide # access to any previous token by index. # #
# This token stream ignores the value of {@link Token#getChannel}. If your # parser requires the token stream filter tokens to only those on a particular # channel, such as {@link Token#DEFAULT_CHANNEL} or # {@link Token#HIDDEN_CHANNEL}, use a filtering token stream such a # {@link CommonTokenStream}.
from io import StringIO from antlr4.Token import Token from antlr4.error.Errors import IllegalStateException # need forward declaration Lexer = None # this is just to keep meaningful parameter types to Parser class TokenStream(object): pass class BufferedTokenStream(TokenStream): __slots__ = ('tokenSource', 'tokens', 'index', 'fetchedEOF') def __init__(self, tokenSource:Lexer): # The {@link TokenSource} from which tokens for this stream are fetched. self.tokenSource = tokenSource # A collection of all tokens fetched from the token source. The list is # considered a complete view of the input once {@link #fetchedEOF} is set # to {@code true}. self.tokens = [] # The index into {@link #tokens} of the current token (next token to # {@link #consume}). {@link #tokens}{@code [}{@link #p}{@code ]} should be # {@link #LT LT(1)}. # #This field is set to -1 when the stream is first constructed or when # {@link #setTokenSource} is called, indicating that the first token has # not yet been fetched from the token source. For additional information, # see the documentation of {@link IntStream} for a description of # Initializing Methods.
self.index = -1 # Indicates whether the {@link Token#EOF} token has been fetched from # {@link #tokenSource} and added to {@link #tokens}. This field improves # performance for the following cases: # #For example, {@link CommonTokenStream} overrides this method to ensure that # the seek target is always an on-channel token.
# # @param i The target token index. # @return The adjusted target token index. def adjustSeekIndex(self, i:int): return i def lazyInit(self): if self.index == -1: self.setup() def setup(self): self.sync(0) self.index = self.adjustSeekIndex(0) # Reset this token stream by setting its token source.#/ def setTokenSource(self, tokenSource:Lexer): self.tokenSource = tokenSource self.tokens = [] self.index = -1 self.fetchedEOF = False # Given a starting index, return the index of the next token on channel. # Return i if tokens[i] is on channel. Return -1 if there are no tokens # on channel between i and EOF. #/ def nextTokenOnChannel(self, i:int, channel:int): self.sync(i) if i>=len(self.tokens): return -1 token = self.tokens[i] while token.channel!=channel: if token.type==Token.EOF: return -1 i += 1 self.sync(i) token = self.tokens[i] return i # Given a starting index, return the index of the previous token on channel. # Return i if tokens[i] is on channel. Return -1 if there are no tokens # on channel between i and 0. def previousTokenOnChannel(self, i:int, channel:int): while i>=0 and self.tokens[i].channel!=channel: i -= 1 return i # Collect all tokens on specified channel to the right of # the current token up until we see a token on DEFAULT_TOKEN_CHANNEL or # EOF. If channel is -1, find any non default channel token. def getHiddenTokensToRight(self, tokenIndex:int, channel:int=-1): self.lazyInit() if tokenIndex<0 or tokenIndex>=len(self.tokens): raise Exception(str(tokenIndex) + " not in 0.." + str(len(self.tokens)-1)) from antlr4.Lexer import Lexer nextOnChannel = self.nextTokenOnChannel(tokenIndex + 1, Lexer.DEFAULT_TOKEN_CHANNEL) from_ = tokenIndex+1 # if none onchannel to right, nextOnChannel=-1 so set to = last token to = (len(self.tokens)-1) if nextOnChannel==-1 else nextOnChannel return self.filterForChannel(from_, to, channel) # Collect all tokens on specified channel to the left of # the current token up until we see a token on DEFAULT_TOKEN_CHANNEL. # If channel is -1, find any non default channel token. def getHiddenTokensToLeft(self, tokenIndex:int, channel:int=-1): self.lazyInit() if tokenIndex<0 or tokenIndex>=len(self.tokens): raise Exception(str(tokenIndex) + " not in 0.." + str(len(self.tokens)-1)) from antlr4.Lexer import Lexer prevOnChannel = self.previousTokenOnChannel(tokenIndex - 1, Lexer.DEFAULT_TOKEN_CHANNEL) if prevOnChannel == tokenIndex - 1: return None # if none on channel to left, prevOnChannel=-1 then from=0 from_ = prevOnChannel+1 to = tokenIndex-1 return self.filterForChannel(from_, to, channel) def filterForChannel(self, left:int, right:int, channel:int): hidden = [] for i in range(left, right+1): t = self.tokens[i] if channel==-1: from antlr4.Lexer import Lexer if t.channel!= Lexer.DEFAULT_TOKEN_CHANNEL: hidden.append(t) elif t.channel==channel: hidden.append(t) if len(hidden)==0: return None return hidden def getSourceName(self): return self.tokenSource.getSourceName() # Get the text of all tokens in this buffer.#/ def getText(self, start:int=None, stop:int=None): self.lazyInit() self.fill() if isinstance(start, Token): start = start.tokenIndex elif start is None: start = 0 if isinstance(stop, Token): stop = stop.tokenIndex elif stop is None or stop >= len(self.tokens): stop = len(self.tokens) - 1 if start < 0 or stop < 0 or stop < start: return "" with StringIO() as buf: for i in range(start, stop+1): t = self.tokens[i] if t.type==Token.EOF: break buf.write(t.text) return buf.getvalue() # Get all tokens from lexer until EOF#/ def fill(self): self.lazyInit() while self.fetch(1000)==1000: pass ././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 011453 x ustar 00 0000000 0000000 22 mtime=1609695176.0 antlr4-python3-runtime-4.9.2/src/antlr4/CommonTokenFactory.py 0000644 0000766 0000000 00000004076 00000000000 024422 0 ustar 00parrt wheel 0000000 0000000 # # Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. # Use of this file is governed by the BSD 3-clause license that # can be found in the LICENSE.txt file in the project root. # # # This default implementation of {@link TokenFactory} creates # {@link CommonToken} objects. # from antlr4.Token import CommonToken class TokenFactory(object): pass class CommonTokenFactory(TokenFactory): __slots__ = 'copyText' # # The default {@link CommonTokenFactory} instance. # ## This token factory does not explicitly copy token text when constructing # tokens.
# DEFAULT = None def __init__(self, copyText:bool=False): # Indicates whether {@link CommonToken#setText} should be called after # constructing tokens to explicitly set the text. This is useful for cases # where the input stream might not be able to provide arbitrary substrings # of text from the input after the lexer creates a token (e.g. the # implementation of {@link CharStream#getText} in # {@link UnbufferedCharStream} throws an # {@link UnsupportedOperationException}). Explicitly setting the token text # allows {@link Token#getText} to be called at any time regardless of the # input stream implementation. # ## The default value is {@code false} to avoid the performance and memory # overhead of copying text for every token unless explicitly requested.
# self.copyText = copyText def create(self, source, type:int, text:str, channel:int, start:int, stop:int, line:int, column:int): t = CommonToken(source, type, channel, start, stop) t.line = line t.column = column if text is not None: t.text = text elif self.copyText and source[1] is not None: t.text = source[1].getText(start,stop) return t def createThin(self, type:int, text:str): t = CommonToken(type=type) t.text = text return t CommonTokenFactory.DEFAULT = CommonTokenFactory() ././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 011453 x ustar 00 0000000 0000000 22 mtime=1609695176.0 antlr4-python3-runtime-4.9.2/src/antlr4/CommonTokenStream.py 0000644 0000766 0000000 00000005322 00000000000 024241 0 ustar 00parrt wheel 0000000 0000000 # # Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. # Use of this file is governed by the BSD 3-clause license that # can be found in the LICENSE.txt file in the project root. #/ # # This class extends {@link BufferedTokenStream} with functionality to filter # token streams to tokens on a particular channel (tokens where # {@link Token#getChannel} returns a particular value). # ## This token stream provides access to all tokens by index or when calling # methods like {@link #getText}. The channel filtering is only used for code # accessing tokens via the lookahead methods {@link #LA}, {@link #LT}, and # {@link #LB}.
# ## By default, tokens are placed on the default channel # ({@link Token#DEFAULT_CHANNEL}), but may be reassigned by using the # {@code ->channel(HIDDEN)} lexer command, or by using an embedded action to # call {@link Lexer#setChannel}. #
# ## Note: lexer rules which use the {@code ->skip} lexer command or call # {@link Lexer#skip} do not produce tokens at all, so input text matched by # such a rule will not be available as part of the token stream, regardless of # channel.
#/ from antlr4.BufferedTokenStream import BufferedTokenStream from antlr4.Lexer import Lexer from antlr4.Token import Token class CommonTokenStream(BufferedTokenStream): __slots__ = 'channel' def __init__(self, lexer:Lexer, channel:int=Token.DEFAULT_CHANNEL): super().__init__(lexer) self.channel = channel def adjustSeekIndex(self, i:int): return self.nextTokenOnChannel(i, self.channel) def LB(self, k:int): if k==0 or (self.index-k)<0: return None i = self.index n = 1 # find k good tokens looking backwards while n <= k: # skip off-channel tokens i = self.previousTokenOnChannel(i - 1, self.channel) n += 1 if i < 0: return None return self.tokens[i] def LT(self, k:int): self.lazyInit() if k == 0: return None if k < 0: return self.LB(-k) i = self.index n = 1 # we know tokens[pos] is a good one # find k good tokens while n < k: # skip off-channel tokens, but make sure to not look past EOF if self.sync(i + 1): i = self.nextTokenOnChannel(i + 1, self.channel) n += 1 return self.tokens[i] # Count EOF just once.#/ def getNumberOfOnChannelTokens(self): n = 0 self.fill() for i in range(0, len(self.tokens)): t = self.tokens[i] if t.channel==self.channel: n += 1 if t.type==Token.EOF: break return n ././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 011453 x ustar 00 0000000 0000000 22 mtime=1609695176.0 antlr4-python3-runtime-4.9.2/src/antlr4/FileStream.py 0000644 0000766 0000000 00000001544 00000000000 022671 0 ustar 00parrt wheel 0000000 0000000 # # Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. # Use of this file is governed by the BSD 3-clause license that # can be found in the LICENSE.txt file in the project root. # # # This is an InputStream that is loaded from a file all at once # when you construct the object. # import codecs from antlr4.InputStream import InputStream class FileStream(InputStream): __slots__ = 'fileName' def __init__(self, fileName:str, encoding:str='ascii', errors:str='strict'): super().__init__(self.readDataFrom(fileName, encoding, errors)) self.fileName = fileName def readDataFrom(self, fileName:str, encoding:str, errors:str='strict'): # read binary to avoid line ending conversion with open(fileName, 'rb') as file: bytes = file.read() return codecs.decode(bytes, encoding, errors) ././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 011453 x ustar 00 0000000 0000000 22 mtime=1609695176.0 antlr4-python3-runtime-4.9.2/src/antlr4/InputStream.py 0000644 0000766 0000000 00000004436 00000000000 023114 0 ustar 00parrt wheel 0000000 0000000 # # Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. # Use of this file is governed by the BSD 3-clause license that # can be found in the LICENSE.txt file in the project root. # # # Vacuum all input from a string and then treat it like a buffer. # from antlr4.Token import Token class InputStream (object): __slots__ = ('name', 'strdata', '_index', 'data', '_size') def __init__(self, data: str): self.name = "If {@code ctx} is {@code null} and the end of the rule containing # {@code s} is reached, {@link Token#EPSILON} is added to the result set. # If {@code ctx} is not {@code null} and the end of the outermost rule is # reached, {@link Token#EOF} is added to the result set.
# # @param s the ATN state # @param stopState the ATN state to stop at. This can be a # {@link BlockEndState} to detect epsilon paths through a closure. # @param ctx the complete parser context, or {@code null} if the context # should be ignored # # @return The set of tokens that can follow {@code s} in the ATN in the # specified {@code ctx}. #/ def LOOK(self, s:ATNState, stopState:ATNState=None, ctx:RuleContext=None): r = IntervalSet() seeThruPreds = True # ignore preds; get all lookahead lookContext = PredictionContextFromRuleContext(s.atn, ctx) if ctx is not None else None self._LOOK(s, stopState, lookContext, r, set(), set(), seeThruPreds, True) return r #* # Compute set of tokens that can follow {@code s} in the ATN in the # specified {@code ctx}. # #If {@code ctx} is {@code null} and {@code stopState} or the end of the # rule containing {@code s} is reached, {@link Token#EPSILON} is added to # the result set. If {@code ctx} is not {@code null} and {@code addEOF} is # {@code true} and {@code stopState} or the end of the outermost rule is # reached, {@link Token#EOF} is added to the result set.
# # @param s the ATN state. # @param stopState the ATN state to stop at. This can be a # {@link BlockEndState} to detect epsilon paths through a closure. # @param ctx The outer context, or {@code null} if the outer context should # not be used. # @param look The result lookahead set. # @param lookBusy A set used for preventing epsilon closures in the ATN # from causing a stack overflow. Outside code should pass # {@code new HashSetIf the final token in the list is an {@link Token#EOF} token, it will be used # as the EOF token for every call to {@link #nextToken} after the end of the # list is reached. Otherwise, an EOF token will be created.
# from antlr4.CommonTokenFactory import CommonTokenFactory from antlr4.Lexer import TokenSource from antlr4.Token import Token class ListTokenSource(TokenSource): __slots__ = ('tokens', 'sourceName', 'pos', 'eofToken', '_factory') # Constructs a new {@link ListTokenSource} instance from the specified # collection of {@link Token} objects and source name. # # @param tokens The collection of {@link Token} objects to provide as a # {@link TokenSource}. # @param sourceName The name of the {@link TokenSource}. If this value is # {@code null}, {@link #getSourceName} will attempt to infer the name from # the next {@link Token} (or the previous token if the end of the input has # been reached). # # @exception NullPointerException if {@code tokens} is {@code null} # def __init__(self, tokens:list, sourceName:str=None): if tokens is None: raise ReferenceError("tokens cannot be null") self.tokens = tokens self.sourceName = sourceName # The index into {@link #tokens} of token to return by the next call to # {@link #nextToken}. The end of the input is indicated by this value # being greater than or equal to the number of items in {@link #tokens}. self.pos = 0 # This field caches the EOF token for the token source. self.eofToken = None # This is the backing field for {@link #getTokenFactory} and self._factory = CommonTokenFactory.DEFAULT # # {@inheritDoc} # @property def column(self): if self.pos < len(self.tokens): return self.tokens[self.pos].column elif self.eofToken is not None: return self.eofToken.column elif len(self.tokens) > 0: # have to calculate the result from the line/column of the previous # token, along with the text of the token. lastToken = self.tokens[len(self.tokens) - 1] tokenText = lastToken.text if tokenText is not None: lastNewLine = tokenText.rfind('\n') if lastNewLine >= 0: return len(tokenText) - lastNewLine - 1 return lastToken.column + lastToken.stop - lastToken.start + 1 # only reach this if tokens is empty, meaning EOF occurs at the first # position in the input return 0 # # {@inheritDoc} # def nextToken(self): if self.pos >= len(self.tokens): if self.eofToken is None: start = -1 if len(self.tokens) > 0: previousStop = self.tokens[len(self.tokens) - 1].stop if previousStop != -1: start = previousStop + 1 stop = max(-1, start - 1) self.eofToken = self._factory.create((self, self.getInputStream()), Token.EOF, "EOF", Token.DEFAULT_CHANNEL, start, stop, self.line, self.column) return self.eofToken t = self.tokens[self.pos] if self.pos == len(self.tokens) - 1 and t.type == Token.EOF: self.eofToken = t self.pos += 1 return t # # {@inheritDoc} # @property def line(self): if self.pos < len(self.tokens): return self.tokens[self.pos].line elif self.eofToken is not None: return self.eofToken.line elif len(self.tokens) > 0: # have to calculate the result from the line/column of the previous # token, along with the text of the token. lastToken = self.tokens[len(self.tokens) - 1] line = lastToken.line tokenText = lastToken.text if tokenText is not None: line += tokenText.count('\n') # if no text is available, assume the token did not contain any newline characters. return line # only reach this if tokens is empty, meaning EOF occurs at the first # position in the input return 1 # # {@inheritDoc} # def getInputStream(self): if self.pos < len(self.tokens): return self.tokens[self.pos].getInputStream() elif self.eofToken is not None: return self.eofToken.getInputStream() elif len(self.tokens) > 0: return self.tokens[len(self.tokens) - 1].getInputStream() else: # no input stream information is available return None # # {@inheritDoc} # def getSourceName(self): if self.sourceName is not None: return self.sourceName inputStream = self.getInputStream() if inputStream is not None: return inputStream.getSourceName() else: return "List" ././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 011453 x ustar 00 0000000 0000000 22 mtime=1613495410.0 antlr4-python3-runtime-4.9.2/src/antlr4/Parser.py 0000644 0000766 0000000 00000054543 00000000000 022101 0 ustar 00parrt wheel 0000000 0000000 # # Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. # Use of this file is governed by the BSD 3-clause license that # can be found in the LICENSE.txt file in the project root. import sys if sys.version_info[1] > 5: from typing import TextIO else: from typing.io import TextIO from antlr4.BufferedTokenStream import TokenStream from antlr4.CommonTokenFactory import TokenFactory from antlr4.error.ErrorStrategy import DefaultErrorStrategy from antlr4.InputStream import InputStream from antlr4.Recognizer import Recognizer from antlr4.RuleContext import RuleContext from antlr4.ParserRuleContext import ParserRuleContext from antlr4.Token import Token from antlr4.Lexer import Lexer from antlr4.atn.ATNDeserializer import ATNDeserializer from antlr4.atn.ATNDeserializationOptions import ATNDeserializationOptions from antlr4.error.Errors import UnsupportedOperationException, RecognitionException from antlr4.tree.ParseTreePatternMatcher import ParseTreePatternMatcher from antlr4.tree.Tree import ParseTreeListener, TerminalNode, ErrorNode class TraceListener(ParseTreeListener): __slots__ = '_parser' def __init__(self, parser): self._parser = parser def enterEveryRule(self, ctx): print("enter " + self._parser.ruleNames[ctx.getRuleIndex()] + ", LT(1)=" + self._parser._input.LT(1).text, file=self._parser._output) def visitTerminal(self, node): print("consume " + str(node.symbol) + " rule " + self._parser.ruleNames[self._parser._ctx.getRuleIndex()], file=self._parser._output) def visitErrorNode(self, node): pass def exitEveryRule(self, ctx): print("exit " + self._parser.ruleNames[ctx.getRuleIndex()] + ", LT(1)=" + self._parser._input.LT(1).text, file=self._parser._output) # self is all the parsing support code essentially; most of it is error recovery stuff.# class Parser (Recognizer): __slots__ = ( '_input', '_output', '_errHandler', '_precedenceStack', '_ctx', 'buildParseTrees', '_tracer', '_parseListeners', '_syntaxErrors' ) # self field maps from the serialized ATN string to the deserialized {@link ATN} with # bypass alternatives. # # @see ATNDeserializationOptions#isGenerateRuleBypassTransitions() # bypassAltsAtnCache = dict() def __init__(self, input:TokenStream, output:TextIO = sys.stdout): super().__init__() # The input stream. self._input = None self._output = output # The error handling strategy for the parser. The default value is a new # instance of {@link DefaultErrorStrategy}. self._errHandler = DefaultErrorStrategy() self._precedenceStack = list() self._precedenceStack.append(0) # The {@link ParserRuleContext} object for the currently executing rule. # self is always non-null during the parsing process. self._ctx = None # Specifies whether or not the parser should construct a parse tree during # the parsing process. The default value is {@code true}. self.buildParseTrees = True # When {@link #setTrace}{@code (true)} is called, a reference to the # {@link TraceListener} is stored here so it can be easily removed in a # later call to {@link #setTrace}{@code (false)}. The listener itself is # implemented as a parser listener so self field is not directly used by # other parser methods. self._tracer = None # The list of {@link ParseTreeListener} listeners registered to receive # events during the parse. self._parseListeners = None # The number of syntax errors reported during parsing. self value is # incremented each time {@link #notifyErrorListeners} is called. self._syntaxErrors = 0 self.setInputStream(input) # reset the parser's state# def reset(self): if self._input is not None: self._input.seek(0) self._errHandler.reset(self) self._ctx = None self._syntaxErrors = 0 self.setTrace(False) self._precedenceStack = list() self._precedenceStack.append(0) if self._interp is not None: self._interp.reset() # Match current input symbol against {@code ttype}. If the symbol type # matches, {@link ANTLRErrorStrategy#reportMatch} and {@link #consume} are # called to complete the match process. # #If the symbol type does not match, # {@link ANTLRErrorStrategy#recoverInline} is called on the current error # strategy to attempt recovery. If {@link #getBuildParseTree} is # {@code true} and the token index of the symbol returned by # {@link ANTLRErrorStrategy#recoverInline} is -1, the symbol is added to # the parse tree by calling {@link ParserRuleContext#addErrorNode}.
# # @param ttype the token type to match # @return the matched symbol # @throws RecognitionException if the current input symbol did not match # {@code ttype} and the error strategy could not recover from the # mismatched symbol def match(self, ttype:int): t = self.getCurrentToken() if t.type==ttype: self._errHandler.reportMatch(self) self.consume() else: t = self._errHandler.recoverInline(self) if self.buildParseTrees and t.tokenIndex==-1: # we must have conjured up a new token during single token insertion # if it's not the current symbol self._ctx.addErrorNode(t) return t # Match current input symbol as a wildcard. If the symbol type matches # (i.e. has a value greater than 0), {@link ANTLRErrorStrategy#reportMatch} # and {@link #consume} are called to complete the match process. # #If the symbol type does not match, # {@link ANTLRErrorStrategy#recoverInline} is called on the current error # strategy to attempt recovery. If {@link #getBuildParseTree} is # {@code true} and the token index of the symbol returned by # {@link ANTLRErrorStrategy#recoverInline} is -1, the symbol is added to # the parse tree by calling {@link ParserRuleContext#addErrorNode}.
# # @return the matched symbol # @throws RecognitionException if the current input symbol did not match # a wildcard and the error strategy could not recover from the mismatched # symbol def matchWildcard(self): t = self.getCurrentToken() if t.type > 0: self._errHandler.reportMatch(self) self.consume() else: t = self._errHandler.recoverInline(self) if self.buildParseTrees and t.tokenIndex == -1: # we must have conjured up a new token during single token insertion # if it's not the current symbol self._ctx.addErrorNode(t) return t def getParseListeners(self): return list() if self._parseListeners is None else self._parseListeners # Registers {@code listener} to receive events during the parsing process. # #To support output-preserving grammar transformations (including but not # limited to left-recursion removal, automated left-factoring, and # optimized code generation), calls to listener methods during the parse # may differ substantially from calls made by # {@link ParseTreeWalker#DEFAULT} used after the parse is complete. In # particular, rule entry and exit events may occur in a different order # during the parse than after the parser. In addition, calls to certain # rule entry methods may be omitted.
# #With the following specific exceptions, calls to listener events are # deterministic, i.e. for identical input the calls to listener # methods will be the same.
# #If {@code listener} is {@code null} or has not been added as a parse # listener, self method does nothing.
# @param listener the listener to remove # def removeParseListener(self, listener:ParseTreeListener): if self._parseListeners is not None: self._parseListeners.remove(listener) if len(self._parseListeners)==0: self._parseListeners = None # Remove all parse listeners. def removeParseListeners(self): self._parseListeners = None # Notify any parse listeners of an enter rule event. def triggerEnterRuleEvent(self): if self._parseListeners is not None: for listener in self._parseListeners: listener.enterEveryRule(self._ctx) self._ctx.enterRule(listener) # # Notify any parse listeners of an exit rule event. # # @see #addParseListener # def triggerExitRuleEvent(self): if self._parseListeners is not None: # reverse order walk of listeners for listener in reversed(self._parseListeners): self._ctx.exitRule(listener) listener.exitEveryRule(self._ctx) # Gets the number of syntax errors reported during parsing. This value is # incremented each time {@link #notifyErrorListeners} is called. # # @see #notifyErrorListeners # def getNumberOfSyntaxErrors(self): return self._syntaxErrors def getTokenFactory(self): return self._input.tokenSource._factory # Tell our token source and error strategy about a new way to create tokens.# def setTokenFactory(self, factory:TokenFactory): self._input.tokenSource._factory = factory # The ATN with bypass alternatives is expensive to create so we create it # lazily. # # @throws UnsupportedOperationException if the current parser does not # implement the {@link #getSerializedATN()} method. # def getATNWithBypassAlts(self): serializedAtn = self.getSerializedATN() if serializedAtn is None: raise UnsupportedOperationException("The current parser does not support an ATN with bypass alternatives.") result = self.bypassAltsAtnCache.get(serializedAtn, None) if result is None: deserializationOptions = ATNDeserializationOptions() deserializationOptions.generateRuleBypassTransitions = True result = ATNDeserializer(deserializationOptions).deserialize(serializedAtn) self.bypassAltsAtnCache[serializedAtn] = result return result # The preferred method of getting a tree pattern. For example, here's a # sample use: # #
# ParseTree t = parser.expr();
# ParseTreePattern p = parser.compileParseTreePattern("<ID>+0", MyParser.RULE_expr);
# ParseTreeMatch m = p.match(t);
# String id = m.get("ID");
#
#
def compileParseTreePattern(self, pattern:str, patternRuleIndex:int, lexer:Lexer = None):
if lexer is None:
if self.getTokenStream() is not None:
tokenSource = self.getTokenStream().tokenSource
if isinstance( tokenSource, Lexer ):
lexer = tokenSource
if lexer is None:
raise UnsupportedOperationException("Parser can't discover a lexer to use")
m = ParseTreePatternMatcher(lexer, self)
return m.compile(pattern, patternRuleIndex)
def getInputStream(self):
return self.getTokenStream()
def setInputStream(self, input:InputStream):
self.setTokenStream(input)
def getTokenStream(self):
return self._input
# Set the token stream and reset the parser.#
def setTokenStream(self, input:TokenStream):
self._input = None
self.reset()
self._input = input
# Match needs to return the current input symbol, which gets put
# into the label for the associated token ref; e.g., x=ID.
#
def getCurrentToken(self):
return self._input.LT(1)
def notifyErrorListeners(self, msg:str, offendingToken:Token = None, e:RecognitionException = None):
if offendingToken is None:
offendingToken = self.getCurrentToken()
self._syntaxErrors += 1
line = offendingToken.line
column = offendingToken.column
listener = self.getErrorListenerDispatch()
listener.syntaxError(self, offendingToken, line, column, msg, e)
#
# Consume and return the {@linkplain #getCurrentToken current symbol}.
#
# E.g., given the following input with {@code A} being the current # lookahead symbol, self function moves the cursor to {@code B} and returns # {@code A}.
# #
# A B
# ^
#
#
# If the parser is not in error recovery mode, the consumed symbol is added
# to the parse tree using {@link ParserRuleContext#addChild(Token)}, and
# {@link ParseTreeListener#visitTerminal} is called on any parse listeners.
# If the parser is in error recovery mode, the consumed symbol is
# added to the parse tree using
# {@link ParserRuleContext#addErrorNode(Token)}, and
# {@link ParseTreeListener#visitErrorNode} is called on any parse
# listeners.
#
def consume(self):
o = self.getCurrentToken()
if o.type != Token.EOF:
self.getInputStream().consume()
hasListener = self._parseListeners is not None and len(self._parseListeners)>0
if self.buildParseTrees or hasListener:
if self._errHandler.inErrorRecoveryMode(self):
node = self._ctx.addErrorNode(o)
else:
node = self._ctx.addTokenNode(o)
if hasListener:
for listener in self._parseListeners:
if isinstance(node, ErrorNode):
listener.visitErrorNode(node)
elif isinstance(node, TerminalNode):
listener.visitTerminal(node)
return o
def addContextToParseTree(self):
# add current context to parent if we have a parent
if self._ctx.parentCtx is not None:
self._ctx.parentCtx.addChild(self._ctx)
# Always called by generated parsers upon entry to a rule. Access field
# {@link #_ctx} get the current context.
#
def enterRule(self, localctx:ParserRuleContext , state:int , ruleIndex:int):
self.state = state
self._ctx = localctx
self._ctx.start = self._input.LT(1)
if self.buildParseTrees:
self.addContextToParseTree()
if self._parseListeners is not None:
self.triggerEnterRuleEvent()
def exitRule(self):
self._ctx.stop = self._input.LT(-1)
# trigger event on _ctx, before it reverts to parent
if self._parseListeners is not None:
self.triggerExitRuleEvent()
self.state = self._ctx.invokingState
self._ctx = self._ctx.parentCtx
def enterOuterAlt(self, localctx:ParserRuleContext, altNum:int):
localctx.setAltNumber(altNum)
# if we have new localctx, make sure we replace existing ctx
# that is previous child of parse tree
if self.buildParseTrees and self._ctx != localctx:
if self._ctx.parentCtx is not None:
self._ctx.parentCtx.removeLastChild()
self._ctx.parentCtx.addChild(localctx)
self._ctx = localctx
# Get the precedence level for the top-most precedence rule.
#
# @return The precedence level for the top-most precedence rule, or -1 if
# the parser context is not nested within a precedence rule.
#
def getPrecedence(self):
if len(self._precedenceStack)==0:
return -1
else:
return self._precedenceStack[-1]
def enterRecursionRule(self, localctx:ParserRuleContext, state:int, ruleIndex:int, precedence:int):
self.state = state
self._precedenceStack.append(precedence)
self._ctx = localctx
self._ctx.start = self._input.LT(1)
if self._parseListeners is not None:
self.triggerEnterRuleEvent() # simulates rule entry for left-recursive rules
#
# Like {@link #enterRule} but for recursive rules.
#
def pushNewRecursionContext(self, localctx:ParserRuleContext, state:int, ruleIndex:int):
previous = self._ctx
previous.parentCtx = localctx
previous.invokingState = state
previous.stop = self._input.LT(-1)
self._ctx = localctx
self._ctx.start = previous.start
if self.buildParseTrees:
self._ctx.addChild(previous)
if self._parseListeners is not None:
self.triggerEnterRuleEvent() # simulates rule entry for left-recursive rules
def unrollRecursionContexts(self, parentCtx:ParserRuleContext):
self._precedenceStack.pop()
self._ctx.stop = self._input.LT(-1)
retCtx = self._ctx # save current ctx (return value)
# unroll so _ctx is as it was before call to recursive method
if self._parseListeners is not None:
while self._ctx is not parentCtx:
self.triggerExitRuleEvent()
self._ctx = self._ctx.parentCtx
else:
self._ctx = parentCtx
# hook into tree
retCtx.parentCtx = parentCtx
if self.buildParseTrees and parentCtx is not None:
# add return ctx into invoking rule's tree
parentCtx.addChild(retCtx)
def getInvokingContext(self, ruleIndex:int):
ctx = self._ctx
while ctx is not None:
if ctx.getRuleIndex() == ruleIndex:
return ctx
ctx = ctx.parentCtx
return None
def precpred(self, localctx:RuleContext , precedence:int):
return precedence >= self._precedenceStack[-1]
def inContext(self, context:str):
# TODO: useful in parser?
return False
#
# Checks whether or not {@code symbol} can follow the current state in the
# ATN. The behavior of self method is equivalent to the following, but is
# implemented such that the complete context-sensitive follow set does not
# need to be explicitly constructed.
#
#
# return getExpectedTokens().contains(symbol);
#
#
# @param symbol the symbol type to check
# @return {@code true} if {@code symbol} can follow the current state in
# the ATN, otherwise {@code false}.
#
def isExpectedToken(self, symbol:int):
atn = self._interp.atn
ctx = self._ctx
s = atn.states[self.state]
following = atn.nextTokens(s)
if symbol in following:
return True
if not Token.EPSILON in following:
return False
while ctx is not None and ctx.invokingState>=0 and Token.EPSILON in following:
invokingState = atn.states[ctx.invokingState]
rt = invokingState.transitions[0]
following = atn.nextTokens(rt.followState)
if symbol in following:
return True
ctx = ctx.parentCtx
if Token.EPSILON in following and symbol == Token.EOF:
return True
else:
return False
# Computes the set of input symbols which could follow the current parser
# state and context, as given by {@link #getState} and {@link #getContext},
# respectively.
#
# @see ATN#getExpectedTokens(int, RuleContext)
#
def getExpectedTokens(self):
return self._interp.atn.getExpectedTokens(self.state, self._ctx)
def getExpectedTokensWithinCurrentRule(self):
atn = self._interp.atn
s = atn.states[self.state]
return atn.nextTokens(s)
# Get a rule's index (i.e., {@code RULE_ruleName} field) or -1 if not found.#
def getRuleIndex(self, ruleName:str):
ruleIndex = self.getRuleIndexMap().get(ruleName, None)
if ruleIndex is not None:
return ruleIndex
else:
return -1
# Return List<String> of the rule names in your parser instance
# leading up to a call to the current rule. You could override if
# you want more details such as the file/line info of where
# in the ATN a rule is invoked.
#
# this is very useful for error messages.
#
def getRuleInvocationStack(self, p:RuleContext=None):
if p is None:
p = self._ctx
stack = list()
while p is not None:
# compute what follows who invoked us
ruleIndex = p.getRuleIndex()
if ruleIndex<0:
stack.append("n/a")
else:
stack.append(self.ruleNames[ruleIndex])
p = p.parentCtx
return stack
# For debugging and other purposes.#
def getDFAStrings(self):
return [ str(dfa) for dfa in self._interp.decisionToDFA]
# For debugging and other purposes.#
def dumpDFA(self):
seenOne = False
for i in range(0, len(self._interp.decisionToDFA)):
dfa = self._interp.decisionToDFA[i]
if len(dfa.states)>0:
if seenOne:
print(file=self._output)
print("Decision " + str(dfa.decision) + ":", file=self._output)
print(dfa.toString(self.literalNames, self.symbolicNames), end='', file=self._output)
seenOne = True
def getSourceName(self):
return self._input.sourceName
# During a parse is sometimes useful to listen in on the rule entry and exit
# events as well as token matches. self is for quick and dirty debugging.
#
def setTrace(self, trace:bool):
if not trace:
self.removeParseListener(self._tracer)
self._tracer = None
else:
if self._tracer is not None:
self.removeParseListener(self._tracer)
self._tracer = TraceListener(self)
self.addParseListener(self._tracer)
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 011453 x ustar 00 0000000 0000000 22 mtime=1609695176.0
antlr4-python3-runtime-4.9.2/src/antlr4/ParserInterpreter.py 0000644 0000766 0000000 00000016046 00000000000 024321 0 ustar 00parrt wheel 0000000 0000000 #
# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
# Use of this file is governed by the BSD 3-clause license that
# can be found in the LICENSE.txt file in the project root.
#
# A parser simulator that mimics what ANTLR's generated
# parser code does. A ParserATNSimulator is used to make
# predictions via adaptivePredict but this class moves a pointer through the
# ATN to simulate parsing. ParserATNSimulator just
# makes us efficient rather than having to backtrack, for example.
#
# This properly creates parse trees even for left recursive rules.
#
# We rely on the left recursive rule invocation and special predicate
# transitions to make left recursive rules work.
#
# See TestParserInterpreter for examples.
#
from antlr4.dfa.DFA import DFA
from antlr4.BufferedTokenStream import TokenStream
from antlr4.Lexer import Lexer
from antlr4.Parser import Parser
from antlr4.ParserRuleContext import InterpreterRuleContext, ParserRuleContext
from antlr4.Token import Token
from antlr4.atn.ATN import ATN
from antlr4.atn.ATNState import StarLoopEntryState, ATNState, LoopEndState
from antlr4.atn.ParserATNSimulator import ParserATNSimulator
from antlr4.PredictionContext import PredictionContextCache
from antlr4.atn.Transition import Transition
from antlr4.error.Errors import RecognitionException, UnsupportedOperationException, FailedPredicateException
class ParserInterpreter(Parser):
__slots__ = (
'grammarFileName', 'atn', 'tokenNames', 'ruleNames', 'decisionToDFA',
'sharedContextCache', '_parentContextStack',
'pushRecursionContextStates'
)
def __init__(self, grammarFileName:str, tokenNames:list, ruleNames:list, atn:ATN, input:TokenStream):
super().__init__(input)
self.grammarFileName = grammarFileName
self.atn = atn
self.tokenNames = tokenNames
self.ruleNames = ruleNames
self.decisionToDFA = [ DFA(state) for state in atn.decisionToState ]
self.sharedContextCache = PredictionContextCache()
self._parentContextStack = list()
# identify the ATN states where pushNewRecursionContext must be called
self.pushRecursionContextStates = set()
for state in atn.states:
if not isinstance(state, StarLoopEntryState):
continue
if state.isPrecedenceDecision:
self.pushRecursionContextStates.add(state.stateNumber)
# get atn simulator that knows how to do predictions
self._interp = ParserATNSimulator(self, atn, self.decisionToDFA, self.sharedContextCache)
# Begin parsing at startRuleIndex#
def parse(self, startRuleIndex:int):
startRuleStartState = self.atn.ruleToStartState[startRuleIndex]
rootContext = InterpreterRuleContext(None, ATNState.INVALID_STATE_NUMBER, startRuleIndex)
if startRuleStartState.isPrecedenceRule:
self.enterRecursionRule(rootContext, startRuleStartState.stateNumber, startRuleIndex, 0)
else:
self.enterRule(rootContext, startRuleStartState.stateNumber, startRuleIndex)
while True:
p = self.getATNState()
if p.stateType==ATNState.RULE_STOP :
# pop; return from rule
if len(self._ctx)==0:
if startRuleStartState.isPrecedenceRule:
result = self._ctx
parentContext = self._parentContextStack.pop()
self.unrollRecursionContexts(parentContext.a)
return result
else:
self.exitRule()
return rootContext
self.visitRuleStopState(p)
else:
try:
self.visitState(p)
except RecognitionException as e:
self.state = self.atn.ruleToStopState[p.ruleIndex].stateNumber
self._ctx.exception = e
self._errHandler.reportError(self, e)
self._errHandler.recover(self, e)
def enterRecursionRule(self, localctx:ParserRuleContext, state:int, ruleIndex:int, precedence:int):
self._parentContextStack.append((self._ctx, localctx.invokingState))
super().enterRecursionRule(localctx, state, ruleIndex, precedence)
def getATNState(self):
return self.atn.states[self.state]
def visitState(self, p:ATNState):
edge = 0
if len(p.transitions) > 1:
self._errHandler.sync(self)
edge = self._interp.adaptivePredict(self._input, p.decision, self._ctx)
else:
edge = 1
transition = p.transitions[edge - 1]
tt = transition.serializationType
if tt==Transition.EPSILON:
if self.pushRecursionContextStates[p.stateNumber] and not isinstance(transition.target, LoopEndState):
t = self._parentContextStack[-1]
ctx = InterpreterRuleContext(t[0], t[1], self._ctx.ruleIndex)
self.pushNewRecursionContext(ctx, self.atn.ruleToStartState[p.ruleIndex].stateNumber, self._ctx.ruleIndex)
elif tt==Transition.ATOM:
self.match(transition.label)
elif tt in [ Transition.RANGE, Transition.SET, Transition.NOT_SET]:
if not transition.matches(self._input.LA(1), Token.MIN_USER_TOKEN_TYPE, Lexer.MAX_CHAR_VALUE):
self._errHandler.recoverInline(self)
self.matchWildcard()
elif tt==Transition.WILDCARD:
self.matchWildcard()
elif tt==Transition.RULE:
ruleStartState = transition.target
ruleIndex = ruleStartState.ruleIndex
ctx = InterpreterRuleContext(self._ctx, p.stateNumber, ruleIndex)
if ruleStartState.isPrecedenceRule:
self.enterRecursionRule(ctx, ruleStartState.stateNumber, ruleIndex, transition.precedence)
else:
self.enterRule(ctx, transition.target.stateNumber, ruleIndex)
elif tt==Transition.PREDICATE:
if not self.sempred(self._ctx, transition.ruleIndex, transition.predIndex):
raise FailedPredicateException(self)
elif tt==Transition.ACTION:
self.action(self._ctx, transition.ruleIndex, transition.actionIndex)
elif tt==Transition.PRECEDENCE:
if not self.precpred(self._ctx, transition.precedence):
msg = "precpred(_ctx, " + str(transition.precedence) + ")"
raise FailedPredicateException(self, msg)
else:
raise UnsupportedOperationException("Unrecognized ATN transition type.")
self.state = transition.target.stateNumber
def visitRuleStopState(self, p:ATNState):
ruleStartState = self.atn.ruleToStartState[p.ruleIndex]
if ruleStartState.isPrecedenceRule:
parentContext = self._parentContextStack.pop()
self.unrollRecursionContexts(parentContext.a)
self.state = parentContext[1]
else:
self.exitRule()
ruleTransition = self.atn.states[self.state].transitions[0]
self.state = ruleTransition.followState.stateNumber
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 011453 x ustar 00 0000000 0000000 22 mtime=1609695176.0
antlr4-python3-runtime-4.9.2/src/antlr4/ParserRuleContext.py 0000644 0000766 0000000 00000015152 00000000000 024267 0 ustar 00parrt wheel 0000000 0000000 # Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
# Use of this file is governed by the BSD 3-clause license that
# can be found in the LICENSE.txt file in the project root.
#* A rule invocation record for parsing.
#
# Contains all of the information about the current rule not stored in the
# RuleContext. It handles parse tree children list, Any ATN state
# tracing, and the default values available for rule indications:
# start, stop, rule index, current alt number, current
# ATN state.
#
# Subclasses made for each rule and grammar track the parameters,
# return values, locals, and labels specific to that rule. These
# are the objects that are returned from rules.
#
# Note text is not an actual field of a rule return value; it is computed
# from start and stop using the input stream's toString() method. I
# could add a ctor to this so that we can pass in and store the input
# stream, but I'm not sure we want to do that. It would seem to be undefined
# to get the .text property anyway if the rule matches tokens from multiple
# input streams.
#
# I do not use getters for fields of objects that are used simply to
# group values such as this aggregate. The getters/setters are there to
# satisfy the superclass interface.
from antlr4.RuleContext import RuleContext
from antlr4.Token import Token
from antlr4.tree.Tree import ParseTreeListener, ParseTree, TerminalNodeImpl, ErrorNodeImpl, TerminalNode, \
INVALID_INTERVAL
# need forward declaration
ParserRuleContext = None
class ParserRuleContext(RuleContext):
__slots__ = ('children', 'start', 'stop', 'exception')
def __init__(self, parent:ParserRuleContext = None, invokingStateNumber:int = None ):
super().__init__(parent, invokingStateNumber)
#* If we are debugging or building a parse tree for a visitor,
# we need to track all of the tokens and rule invocations associated
# with this rule's context. This is empty for parsing w/o tree constr.
# operation because we don't the need to track the details about
# how we parse this rule.
#/
self.children = None
self.start = None
self.stop = None
# The exception that forced this rule to return. If the rule successfully
# completed, this is {@code null}.
self.exception = None
#* COPY a ctx (I'm deliberately not using copy constructor)#/
#
# This is used in the generated parser code to flip a generic XContext
# node for rule X to a YContext for alt label Y. In that sense, it is
# not really a generic copy function.
#
# If we do an error sync() at start of a rule, we might add error nodes
# to the generic XContext so this function must copy those nodes to
# the YContext as well else they are lost!
#/
def copyFrom(self, ctx:ParserRuleContext):
# from RuleContext
self.parentCtx = ctx.parentCtx
self.invokingState = ctx.invokingState
self.children = None
self.start = ctx.start
self.stop = ctx.stop
# copy any error nodes to alt label node
if ctx.children is not None:
self.children = []
# reset parent pointer for any error nodes
for child in ctx.children:
if isinstance(child, ErrorNodeImpl):
self.children.append(child)
child.parentCtx = self
# Double dispatch methods for listeners
def enterRule(self, listener:ParseTreeListener):
pass
def exitRule(self, listener:ParseTreeListener):
pass
#* Does not set parent link; other add methods do that#/
def addChild(self, child:ParseTree):
if self.children is None:
self.children = []
self.children.append(child)
return child
#* Used by enterOuterAlt to toss out a RuleContext previously added as
# we entered a rule. If we have # label, we will need to remove
# generic ruleContext object.
#/
def removeLastChild(self):
if self.children is not None:
del self.children[len(self.children)-1]
def addTokenNode(self, token:Token):
node = TerminalNodeImpl(token)
self.addChild(node)
node.parentCtx = self
return node
def addErrorNode(self, badToken:Token):
node = ErrorNodeImpl(badToken)
self.addChild(node)
node.parentCtx = self
return node
def getChild(self, i:int, ttype:type = None):
if ttype is None:
return self.children[i] if len(self.children)>i else None
else:
for child in self.getChildren():
if not isinstance(child, ttype):
continue
if i==0:
return child
i -= 1
return None
def getChildren(self, predicate = None):
if self.children is not None:
for child in self.children:
if predicate is not None and not predicate(child):
continue
yield child
def getToken(self, ttype:int, i:int):
for child in self.getChildren():
if not isinstance(child, TerminalNode):
continue
if child.symbol.type != ttype:
continue
if i==0:
return child
i -= 1
return None
def getTokens(self, ttype:int ):
if self.getChildren() is None:
return []
tokens = []
for child in self.getChildren():
if not isinstance(child, TerminalNode):
continue
if child.symbol.type != ttype:
continue
tokens.append(child)
return tokens
def getTypedRuleContext(self, ctxType:type, i:int):
return self.getChild(i, ctxType)
def getTypedRuleContexts(self, ctxType:type):
children = self.getChildren()
if children is None:
return []
contexts = []
for child in children:
if not isinstance(child, ctxType):
continue
contexts.append(child)
return contexts
def getChildCount(self):
return len(self.children) if self.children else 0
def getSourceInterval(self):
if self.start is None or self.stop is None:
return INVALID_INTERVAL
else:
return (self.start.tokenIndex, self.stop.tokenIndex)
RuleContext.EMPTY = ParserRuleContext()
class InterpreterRuleContext(ParserRuleContext):
def __init__(self, parent:ParserRuleContext, invokingStateNumber:int, ruleIndex:int):
super().__init__(parent, invokingStateNumber)
self.ruleIndex = ruleIndex
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 011453 x ustar 00 0000000 0000000 22 mtime=1606416672.0
antlr4-python3-runtime-4.9.2/src/antlr4/PredictionContext.py 0000644 0000766 0000000 00000054701 00000000000 024306 0 ustar 00parrt wheel 0000000 0000000 #
# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
# Use of this file is governed by the BSD 3-clause license that
# can be found in the LICENSE.txt file in the project root.
#/
from io import StringIO
from antlr4.error.Errors import IllegalStateException
from antlr4.RuleContext import RuleContext
from antlr4.atn.ATN import ATN
from antlr4.atn.ATNState import ATNState
class PredictionContext(object):
# Represents {@code $} in local context prediction, which means wildcard.
# {@code#+x =#}.
#/
EMPTY = None
# Represents {@code $} in an array in full context mode, when {@code $}
# doesn't mean wildcard: {@code $ + x = [$,x]}. Here,
# {@code $} = {@link #EMPTY_RETURN_STATE}.
#/
EMPTY_RETURN_STATE = 0x7FFFFFFF
globalNodeCount = 1
id = globalNodeCount
# Stores the computed hash code of this {@link PredictionContext}. The hash
# code is computed in parts to match the following reference algorithm.
#
#
# private int referenceHashCode() {
# int hash = {@link MurmurHash#initialize MurmurHash.initialize}({@link #INITIAL_HASH});
#
# for (int i = 0; i < {@link #size()}; i++) {
# hash = {@link MurmurHash#update MurmurHash.update}(hash, {@link #getParent getParent}(i));
# }
#
# for (int i = 0; i < {@link #size()}; i++) {
# hash = {@link MurmurHash#update MurmurHash.update}(hash, {@link #getReturnState getReturnState}(i));
# }
#
# hash = {@link MurmurHash#finish MurmurHash.finish}(hash, 2# {@link #size()});
# return hash;
# }
#
#/
def __init__(self, cachedHashCode:int):
self.cachedHashCode = cachedHashCode
def __len__(self):
return 0
# This means only the {@link #EMPTY} context is in set.
def isEmpty(self):
return self is self.EMPTY
def hasEmptyPath(self):
return self.getReturnState(len(self) - 1) == self.EMPTY_RETURN_STATE
def getReturnState(self, index:int):
raise IllegalStateException("illegal!")
def __hash__(self):
return self.cachedHashCode
def calculateHashCode(parent:PredictionContext, returnState:int):
return hash("") if parent is None else hash((hash(parent), returnState))
def calculateListsHashCode(parents:[], returnStates:[] ):
h = 0
for parent, returnState in zip(parents, returnStates):
h = hash((h, calculateHashCode(parent, returnState)))
return h
# Used to cache {@link PredictionContext} objects. Its used for the shared
# context cash associated with contexts in DFA states. This cache
# can be used for both lexers and parsers.
class PredictionContextCache(object):
def __init__(self):
self.cache = dict()
# Add a context to the cache and return it. If the context already exists,
# return that one instead and do not add a new context to the cache.
# Protect shared cache from unsafe thread access.
#
def add(self, ctx:PredictionContext):
if ctx==PredictionContext.EMPTY:
return PredictionContext.EMPTY
existing = self.cache.get(ctx, None)
if existing is not None:
return existing
self.cache[ctx] = ctx
return ctx
def get(self, ctx:PredictionContext):
return self.cache.get(ctx, None)
def __len__(self):
return len(self.cache)
class SingletonPredictionContext(PredictionContext):
@staticmethod
def create(parent:PredictionContext , returnState:int ):
if returnState == PredictionContext.EMPTY_RETURN_STATE and parent is None:
# someone can pass in the bits of an array ctx that mean $
return SingletonPredictionContext.EMPTY
else:
return SingletonPredictionContext(parent, returnState)
def __init__(self, parent:PredictionContext, returnState:int):
hashCode = calculateHashCode(parent, returnState)
super().__init__(hashCode)
self.parentCtx = parent
self.returnState = returnState
def __len__(self):
return 1
def getParent(self, index:int):
return self.parentCtx
def getReturnState(self, index:int):
return self.returnState
def __eq__(self, other):
if self is other:
return True
elif other is None:
return False
elif not isinstance(other, SingletonPredictionContext):
return False
else:
return self.returnState == other.returnState and self.parentCtx == other.parentCtx
def __hash__(self):
return self.cachedHashCode
def __str__(self):
up = "" if self.parentCtx is None else str(self.parentCtx)
if len(up)==0:
if self.returnState == self.EMPTY_RETURN_STATE:
return "$"
else:
return str(self.returnState)
else:
return str(self.returnState) + " " + up
class EmptyPredictionContext(SingletonPredictionContext):
def __init__(self):
super().__init__(None, PredictionContext.EMPTY_RETURN_STATE)
def isEmpty(self):
return True
def __eq__(self, other):
return self is other
def __hash__(self):
return self.cachedHashCode
def __str__(self):
return "$"
PredictionContext.EMPTY = EmptyPredictionContext()
class ArrayPredictionContext(PredictionContext):
# Parent can be null only if full ctx mode and we make an array
# from {@link #EMPTY} and non-empty. We merge {@link #EMPTY} by using null parent and
# returnState == {@link #EMPTY_RETURN_STATE}.
def __init__(self, parents:list, returnStates:list):
super().__init__(calculateListsHashCode(parents, returnStates))
self.parents = parents
self.returnStates = returnStates
def isEmpty(self):
# since EMPTY_RETURN_STATE can only appear in the last position, we
# don't need to verify that size==1
return self.returnStates[0]==PredictionContext.EMPTY_RETURN_STATE
def __len__(self):
return len(self.returnStates)
def getParent(self, index:int):
return self.parents[index]
def getReturnState(self, index:int):
return self.returnStates[index]
def __eq__(self, other):
if self is other:
return True
elif not isinstance(other, ArrayPredictionContext):
return False
elif hash(self) != hash(other):
return False # can't be same if hash is different
else:
return self.returnStates==other.returnStates and self.parents==other.parents
def __str__(self):
if self.isEmpty():
return "[]"
with StringIO() as buf:
buf.write("[")
for i in range(0,len(self.returnStates)):
if i>0:
buf.write(", ")
if self.returnStates[i]==PredictionContext.EMPTY_RETURN_STATE:
buf.write("$")
continue
buf.write(str(self.returnStates[i]))
if self.parents[i] is not None:
buf.write(' ')
buf.write(str(self.parents[i]))
else:
buf.write("null")
buf.write("]")
return buf.getvalue()
def __hash__(self):
return self.cachedHashCode
# Convert a {@link RuleContext} tree to a {@link PredictionContext} graph.
# Return {@link #EMPTY} if {@code outerContext} is empty or null.
#/
def PredictionContextFromRuleContext(atn:ATN, outerContext:RuleContext=None):
if outerContext is None:
outerContext = RuleContext.EMPTY
# if we are in RuleContext of start rule, s, then PredictionContext
# is EMPTY. Nobody called us. (if we are empty, return empty)
if outerContext.parentCtx is None or outerContext is RuleContext.EMPTY:
return PredictionContext.EMPTY
# If we have a parent, convert it to a PredictionContext graph
parent = PredictionContextFromRuleContext(atn, outerContext.parentCtx)
state = atn.states[outerContext.invokingState]
transition = state.transitions[0]
return SingletonPredictionContext.create(parent, transition.followState.stateNumber)
def merge(a:PredictionContext, b:PredictionContext, rootIsWildcard:bool, mergeCache:dict):
# share same graph if both same
if a==b:
return a
if isinstance(a, SingletonPredictionContext) and isinstance(b, SingletonPredictionContext):
return mergeSingletons(a, b, rootIsWildcard, mergeCache)
# At least one of a or b is array
# If one is $ and rootIsWildcard, return $ as# wildcard
if rootIsWildcard:
if isinstance( a, EmptyPredictionContext ):
return a
if isinstance( b, EmptyPredictionContext ):
return b
# convert singleton so both are arrays to normalize
if isinstance( a, SingletonPredictionContext ):
a = ArrayPredictionContext([a.parentCtx], [a.returnState])
if isinstance( b, SingletonPredictionContext):
b = ArrayPredictionContext([b.parentCtx], [b.returnState])
return mergeArrays(a, b, rootIsWildcard, mergeCache)
#
# Merge two {@link SingletonPredictionContext} instances.
#
# Stack tops equal, parents merge is same; return left graph.
#
Same stack top, parents differ; merge parents giving array node, then
# remainders of those graphs. A new root node is created to point to the
# merged parents.
#
Different stack tops pointing to same parent. Make array node for the
# root where both element in the root point to the same (original)
# parent.
#
Different stack tops pointing to different parents. Make array node for
# the root where each element points to the corresponding original
# parent.
#
These local-context merge operations are used when {@code rootIsWildcard} # is true.
# #{@link #EMPTY} is superset of any graph; return {@link #EMPTY}.
#
{@link #EMPTY} and anything is {@code #EMPTY}, so merged parent is
# {@code #EMPTY}; return left graph.
#
Special case of last merge if local context.
#
These full-context merge operations are used when {@code rootIsWildcard} # is false.
# # # #Must keep all contexts; {@link #EMPTY} in array is a special value (and
# null parent).
#
Different tops, different parents.
#
Shared top, same parents.
#
Shared top, different parents.
#
Shared top, all shared parents.
#
Equal tops, merge parents and reduce top to
# {@link SingletonPredictionContext}.
#
Used for XPath and tree pattern compilation.
# def getRuleIndexMap(self): ruleNames = self.getRuleNames() if ruleNames is None: from antlr4.error.Errors import UnsupportedOperationException raise UnsupportedOperationException("The current recognizer does not provide a list of rule names.") result = self.ruleIndexMapCache.get(ruleNames, None) if result is None: result = zip( ruleNames, range(0, len(ruleNames))) self.ruleIndexMapCache[ruleNames] = result return result def getTokenType(self, tokenName:str): ttype = self.getTokenTypeMap().get(tokenName, None) if ttype is not None: return ttype else: return Token.INVALID_TYPE # What is the error header, normally line/character position information?# def getErrorHeader(self, e:RecognitionException): line = e.getOffendingToken().line column = e.getOffendingToken().column return "line "+line+":"+column # How should a token be displayed in an error message? The default # is to display just the text, but during development you might # want to have a lot of information spit out. Override in that case # to use t.toString() (which, for CommonToken, dumps everything about # the token). This is better than forcing you to override a method in # your token objects because you don't have to go modify your lexer # so that it creates a new Java type. # # @deprecated This method is not called by the ANTLR 4 Runtime. Specific # implementations of {@link ANTLRErrorStrategy} may provide a similar # feature when necessary. For example, see # {@link DefaultErrorStrategy#getTokenErrorDisplay}. # def getTokenErrorDisplay(self, t:Token): if t is None: return "
# Since tokens on hidden channels (e.g. whitespace or comments) are not
# added to the parse trees, they will not appear in the output of this
# method.
#/
def getText(self):
if self.getChildCount() == 0:
return ""
with StringIO() as builder:
for child in self.getChildren():
builder.write(child.getText())
return builder.getvalue()
def getRuleIndex(self):
return -1
# For rule associated with this parse tree internal node, return
# the outer alternative number used to match the input. Default
# implementation does not compute nor store this alt num. Create
# a subclass of ParserRuleContext with backing field and set
# option contextSuperClass.
# to set it.
def getAltNumber(self):
return 0 # should use ATN.INVALID_ALT_NUMBER but won't compile
# Set the outer alternative number for this context node. Default
# implementation does nothing to avoid backing field overhead for
# trees that don't need it. Create
# a subclass of ParserRuleContext with backing field and set
# option contextSuperClass.
def setAltNumber(self, altNumber:int):
pass
def getChild(self, i:int):
return None
def getChildCount(self):
return 0
def getChildren(self):
for c in []:
yield c
def accept(self, visitor:ParseTreeVisitor):
return visitor.visitChildren(self)
# # Call this method to view a parse tree in a dialog box visually.#/
# public Future
# If {@code oldToken} is also a {@link CommonToken} instance, the newly
# constructed token will share a reference to the {@link #text} field and
# the {@link Pair} stored in {@link #source}. Otherwise, {@link #text} will
# be assigned the result of calling {@link #getText}, and {@link #source}
# will be constructed from the result of {@link Token#getTokenSource} and
# {@link Token#getInputStream}. If {@code context} is {@code null}, it is treated as
# {@link ParserRuleContext#EMPTY}. This method updates {@link #dipsIntoOuterContext} and
# {@link #hasSemanticContext} when necessary. This cache makes a huge difference in memory and a little bit in speed.
# For the Java grammar on java.*, it dropped the memory requirements
# at the end from 25M to 16M. We don't store any of the full context
# graphs in the DFA because they are limited to local context only,
# but apparently there's a lot of repetition there as well. We optimize
# the config contexts before storing the config set in the DFA states
# by literally rebuilding them with cached subgraphs only. I tried a cache for use during closure operations, that was
# whacked after each adaptivePredict(). It cost a little bit
# more time I think and doesn't save on the overall footprint
# so it's not worth the complexity. We track these variables separately for the DFA and ATN simulation
# because the DFA simulation often has to fail over to the ATN
# simulation. If the ATN simulation fails, we need the DFA to fall
# back to its previously accepted state, if any. If the ATN succeeds,
# then the ATN does the accept and the DFA simulator that invoked it
# can simply return the predicted token type. If {@code speculative} is {@code true}, this method was called before
# {@link #consume} for the matched character. This method should call
# {@link #consume} before evaluating the predicate to ensure position
# sensitive values, including {@link Lexer#getText}, {@link Lexer#getLine},
# and {@link Lexer#getcolumn}, properly reflect the current
# lexer state. This method should restore {@code input} and the simulator
# to the original state before returning (i.e. undo the actions made by the
# call to {@link #consume}. The {@code skip} command does not have any parameters, so this action is
# implemented as a singleton instance exposed by {@link #INSTANCE}. This action is implemented by calling {@link Lexer#pushMode} with the
# value provided by {@link #getMode}. The {@code popMode} command does not have any parameters, so this action is
# implemented as a singleton instance exposed by {@link #INSTANCE}. This action is implemented by calling {@link Lexer#popMode}. The {@code more} command does not have any parameters, so this action is
# implemented as a singleton instance exposed by {@link #INSTANCE}. This action is implemented by calling {@link Lexer#popMode}. This action is implemented by calling {@link Lexer#mode} with the
# value provided by {@link #getMode}. This class may represent embedded actions created with the Custom actions are implemented by calling {@link Lexer#action} with the
# appropriate rule and action indexes. This action is implemented by calling {@link Lexer#setChannel} with the
# value provided by {@link #getChannel}. This action is not serialized as part of the ATN, and is only required for
# position-dependent lexer actions which appear at a location other than the
# end of a rule. For more information about DFA optimizations employed for
# lexer actions, see {@link LexerActionExecutor#append} and
# {@link LexerActionExecutor#fixOffsetBeforeMatch}. Note: This class is only required for lexer actions for which
# {@link LexerAction#isPositionDependent} returns {@code true}. This method calls {@link #execute} on the result of {@link #getAction}
# using the provided {@code lexer}. The executor tracks position information for position-dependent lexer actions
# efficiently, ensuring that actions appearing only at the end of the rule do
# not cause bloating of the {@link DFA} created for the lexer. Normally, when the executor encounters lexer actions where
# {@link LexerAction#isPositionDependent} returns {@code true}, it calls
# {@link IntStream#seek} on the input {@link CharStream} to set the input
# position to the end of the current token. This behavior provides
# for efficient DFA representation of lexer actions which appear at the end
# of a lexer rule, even when the lexer rule matches a variable number of
# characters. Prior to traversing a match transition in the ATN, the current offset
# from the token start index is assigned to all position-dependent lexer
# actions which have not already been assigned a fixed offset. By storing
# the offsets relative to the token start index, the DFA representation of
# lexer actions which appear in the middle of tokens remains efficient due
# to sharing among tokens of the same length, regardless of their absolute
# position in the input stream. If the current executor already has offsets assigned to all
# position-dependent lexer actions, the method returns {@code this}. This method calls {@link IntStream#seek} to set the position of the
# {@code input} {@link CharStream} prior to calling
# {@link LexerAction#execute} on a position-dependent action. Before the
# method returns, the input position will be restored to the same position
# it was in when the method was invoked.
# The basic complexity of the adaptive strategy makes it harder to understand.
# We begin with ATN simulation to build paths in a DFA. Subsequent prediction
# requests go through the DFA first. If they reach a state without an edge for
# the current symbol, the algorithm fails over to the ATN simulation to
# complete the DFA path for the current input (until it finds a conflict state
# or uniquely predicting state).
# All of that is done without using the outer context because we want to create
# a DFA that is not dependent upon the rule invocation stack when we do a
# prediction. One DFA works in all contexts. We avoid using context not
# necessarily because it's slower, although it can be, but because of the DFA
# caching problem. The closure routine only considers the rule invocation stack
# created during prediction beginning in the decision rule. For example, if
# prediction occurs without invoking another rule's ATN, there are no context
# stacks in the configurations. When lack of context leads to a conflict, we
# don't know if it's an ambiguity or a weakness in the strong LL(*) parsing
# strategy (versus full LL(*)).
# When SLL yields a configuration set with conflict, we rewind the input and
# retry the ATN simulation, this time using full outer context without adding
# to the DFA. Configuration context stacks will be the full invocation stacks
# from the start rule. If we get a conflict using full context, then we can
# definitively say we have a true ambiguity for that input sequence. If we
# don't get a conflict, it implies that the decision is sensitive to the outer
# context. (It is not context-sensitive in the sense of context-sensitive
# grammars.)
# The next time we reach this DFA state with an SLL conflict, through DFA
# simulation, we will again retry the ATN simulation using full context mode.
# This is slow because we can't save the results and have to "interpret" the
# ATN each time we get that input.
# CACHING FULL CONTEXT PREDICTIONS
# We could cache results from full context to predicted alternative easily and
# that saves a lot of time but doesn't work in presence of predicates. The set
# of visible predicates from the ATN start state changes depending on the
# context, because closure can fall off the end of a rule. I tried to cache
# tuples (stack context, semantic context, predicted alt) but it was slower
# than interpreting and much more complicated. Also required a huge amount of
# memory. The goal is not to create the world's fastest parser anyway. I'd like
# to keep this algorithm simple. By launching multiple threads, we can improve
# the speed of parsing across a large number of files.
# There is no strict ordering between the amount of input used by SLL vs LL,
# which makes it really hard to build a cache for full context. Let's say that
# we have input A B C that leads to an SLL conflict with full context X. That
# implies that using X we might only use A B but we could also use A B C D to
# resolve conflict. Input A B C D could predict alternative 1 in one position
# in the input and A B C E could predict alternative 2 in another position in
# input. The conflicting SLL configurations could still be non-unique in the
# full context prediction, which would lead us to requiring more input than the
# original A B C. To make a prediction cache work, we have to track the exact
# input used during the previous prediction. That amounts to a cache that maps
# X to a specific DFA for that context.
# Something should be done for left-recursive expression predictions. They are
# likely LL(1) + pred eval. Easier to do the whole SLL unless error and retry
# with full LL thing Sam does.
# AVOIDING FULL CONTEXT PREDICTION
# We avoid doing full context retry when the outer context is empty, we did not
# dip into the outer context by falling off the end of the decision state rule,
# or when we force SLL mode.
# As an example of the not dip into outer context case, consider as super
# constructor calls versus function calls. One grammar might look like
# this:
# Or, you might see something like
# In both cases I believe that no closure operations will dip into the outer
# context. In the first case ctorBody in the worst case will stop at the '}'.
# In the 2nd case it should stop at the ';'. Both cases should stay within the
# entry rule and not dip into the outer context.
# PREDICATES
# Predicates are always evaluated if present in either SLL or LL both. SLL and
# LL simulation deals with predicates differently. SLL collects predicates as
# it performs closure operations like ANTLR v3 did. It delays predicate
# evaluation until it reaches and accept state. This allows us to cache the SLL
# ATN simulation whereas, if we had evaluated predicates on-the-fly during
# closure, the DFA state configuration sets would be different and we couldn't
# build up a suitable DFA.
# When building a DFA accept state during ATN simulation, we evaluate any
# predicates and return the sole semantically valid alternative. If there is
# more than 1 alternative, we report an ambiguity. If there are 0 alternatives,
# we throw an exception. Alternatives without predicates act like they have
# true predicates. The simple way to think about it is to strip away all
# alternatives with false predicates and choose the minimum alternative that
# remains.
# When we start in the DFA and reach an accept state that's predicated, we test
# those and return the minimum semantically viable alternative. If no
# alternatives are viable, we throw an exception.
# During full LL ATN simulation, closure always evaluates predicates and
# on-the-fly. This is crucial to reducing the configuration set size during
# closure. It hits a landmine when parsing with the Java grammar, for example,
# without this on-the-fly evaluation.
# SHARING DFA
# All instances of the same parser share the same decision DFAs through a
# static field. Each instance gets its own ATN simulator but they share the
# same {@link #decisionToDFA} field. They also share a
# {@link PredictionContextCache} object that makes sure that all
# {@link PredictionContext} objects are shared among the DFA states. This makes
# a big size difference.
# THREAD SAFETY
# The {@link ParserATNSimulator} locks on the {@link #decisionToDFA} field when
# it adds a new DFA object to that array. {@link #addDFAEdge}
# locks on the DFA for the current decision when setting the
# {@link DFAState#edges} field. {@link #addDFAState} locks on
# the DFA for the current decision when looking up a DFA state to see if it
# already exists. We must make sure that all requests to add DFA states that
# are equivalent result in the same shared DFA object. This is because lots of
# threads will be trying to update the DFA at once. The
# {@link #addDFAState} method also locks inside the DFA lock
# but this time on the shared context cache when it rebuilds the
# configurations' {@link PredictionContext} objects using cached
# subgraphs/nodes. No other locking occurs, even during DFA simulation. This is
# safe as long as we can guarantee that all threads referencing
# {@code s.edge[t]} get the same physical target {@link DFAState}, or
# {@code null}. Once into the DFA, the DFA simulation does not reference the
# {@link DFA#states} map. It follows the {@link DFAState#edges} field to new
# targets. The DFA simulator will either find {@link DFAState#edges} to be
# {@code null}, to be non-{@code null} and {@code dfa.edges[t]} null, or
# {@code dfa.edges[t]} to be non-null. The
# {@link #addDFAEdge} method could be racing to set the field
# but in either case the DFA simulator works; if {@code null}, and requests ATN
# simulation. It could also race trying to get {@code dfa.edges[t]}, but either
# way it will work because it's not doing a test and set operation.
# Starting with SLL then failing to combined SLL/LL (Two-Stage
# Parsing)
# Sam pointed out that if SLL does not give a syntax error, then there is no
# point in doing full LL, which is slower. We only have to try LL if we get a
# syntax error. For maximum speed, Sam starts the parser set to pure SLL
# mode with the {@link BailErrorStrategy}:
# If it does not get a syntax error, then we're done. If it does get a syntax
# error, we need to retry with the combined SLL/LL strategy.
# The reason this works is as follows. If there are no SLL conflicts, then the
# grammar is SLL (at least for that input set). If there is an SLL conflict,
# the full LL analysis must yield a set of viable alternatives which is a
# subset of the alternatives reported by SLL. If the LL set is a singleton,
# then the grammar is LL but not SLL. If the LL set is the same size as the SLL
# set, the decision is SLL. If the LL set has size > 1, then that decision
# is truly ambiguous on the current input. If the LL set is smaller, then the
# SLL conflict resolution might choose an alternative that the full LL would
# rule out as a possibility based upon better context information. If that's
# the case, then the SLL parse will definitely get an error because the full LL
# analysis says it's not viable. If SLL conflict resolution chooses an
# alternative within the LL set, them both SLL and LL would choose the same
# alternative because they both choose the minimum of multiple conflicting
# alternatives.
# Let's say we have a set of SLL conflicting alternatives {@code {1, 2, 3}} and
# a smaller LL set called s. If s is {@code {2, 3}}, then SLL
# parsing will get an error because SLL will pursue alternative 1. If
# s is {@code {1, 2}} or {@code {1, 3}} then both SLL and LL will
# choose the same alternative because alternative one is the minimum of either
# set. If s is {@code {2}} or {@code {3}} then SLL will get a syntax
# error. If s is {@code {1}} then SLL will succeed.
# Of course, if the input is invalid, then we will get an error for sure in
# both SLL and LL parsing. Erroneous input will therefore require 2 passes over
# the input. When {@code lookToEndOfRule} is true, this method uses
# {@link ATN#nextTokens} for each configuration in {@code configs} which is
# not already in a rule stop state to see if a rule stop state is reachable
# from the configuration via epsilon-only transitions.
# The prediction context must be considered by this filter to address
# situations like the following.
#
# If the above grammar, the ATN state immediately before the token
# reference {@code 'a'} in {@code letterA} is reachable from the left edge
# of both the primary and closure blocks of the left-recursive rule
# {@code statement}. The prediction context associated with each of these
# configurations distinguishes between them, and prevents the alternative
# which stepped out to {@code prog} (and then back in to {@code statement}
# from being eliminated by the filter.
#
# The default implementation of this method uses the following
# algorithm to identify an ATN configuration which successfully parsed the
# decision entry rule. Choosing such an alternative ensures that the
# {@link ParserRuleContext} returned by the calling rule will be complete
# and valid, and the syntax error will be reported later at a more
# localized location.
# In some scenarios, the algorithm described above could predict an
# alternative which will result in a {@link FailedPredicateException} in
# the parser. Specifically, this could occur if the only configuration
# capable of successfully parsing to the end of the decision rule is
# blocked by a semantic predicate. By choosing this alternative within
# {@link #adaptivePredict} instead of throwing a
# {@link NoViableAltException}, the resulting
# {@link FailedPredicateException} in the parser will identify the specific
# predicate which is preventing the parser from successfully parsing the
# decision rule, which helps developers identify and correct logic errors
# in semantic predicates.
# If {@code to} is {@code null}, this method returns {@code null}.
# Otherwise, this method returns the {@link DFAState} returned by calling
# {@link #addDFAState} for the {@code to} state. If {@code D} is {@link #ERROR}, this method returns {@link #ERROR} and
# does not change the DFA.
# When using this prediction mode, the parser will either return a correct
# parse tree (i.e. the same parse tree that would be returned with the
# {@link #LL} prediction mode), or it will report a syntax error. If a
# syntax error is encountered when using the {@link #SLL} prediction mode,
# it may be due to either an actual syntax error in the input or indicate
# that the particular combination of grammar and input requires the more
# powerful {@link #LL} prediction abilities to complete successfully.
# This prediction mode does not provide any guarantees for prediction
# behavior for syntactically-incorrect inputs.
# When using this prediction mode, the parser will make correct decisions
# for all syntactically-correct grammar and input combinations. However, in
# cases where the grammar is truly ambiguous this prediction mode might not
# report a precise answer for exactly which alternatives are
# ambiguous.
# This prediction mode does not provide any guarantees for prediction
# behavior for syntactically-incorrect inputs.
# This prediction mode may be used for diagnosing ambiguities during
# grammar development. Due to the performance overhead of calculating sets
# of ambiguous alternatives, this prediction mode should be avoided when
# the exact results are not necessary.
# This prediction mode does not provide any guarantees for prediction
# behavior for syntactically-incorrect inputs.
# This method computes the SLL prediction termination condition for both of
# the following cases. COMBINED SLL+LL PARSING When LL-fallback is enabled upon SLL conflict, correct predictions are
# ensured regardless of how the termination condition is computed by this
# method. Due to the substantially higher cost of LL prediction, the
# prediction should only fall back to LL when the additional lookahead
# cannot lead to a unique SLL prediction. Assuming combined SLL+LL parsing, an SLL configuration set with only
# conflicting subsets should fall back to full LL, even if the
# configuration sets don't resolve to the same alternative (e.g.
# {@code {1,2}} and {@code {3,4}}. If there is at least one non-conflicting
# configuration, SLL could continue with the hopes that more lookahead will
# resolve via one of those non-conflicting configurations. Here's the prediction termination rule them: SLL (for SLL+LL parsing)
# stops when it sees only conflicting configuration subsets. In contrast,
# full LL keeps going when there is uncertainty. HEURISTIC As a heuristic, we stop prediction when we see any conflicting subset
# unless we see a state that only has one alternative associated with it.
# The single-alt-state thing lets prediction continue upon rules like
# (otherwise, it would admit defeat too soon): {@code [12|1|[], 6|2|[], 12|2|[]]. s : (ID | ID ID?) ';' ;} When the ATN simulation reaches the state before {@code ';'}, it has a
# DFA state that looks like: {@code [12|1|[], 6|2|[], 12|2|[]]}. Naturally
# {@code 12|1|[]} and {@code 12|2|[]} conflict, but we cannot stop
# processing this node because alternative to has another way to continue,
# via {@code [6|2|[]]}. It also let's us continue for this rule: {@code [1|1|[], 1|2|[], 8|3|[]] a : A | A | A B ;} After matching input A, we reach the stop state for rule A, state 1.
# State 8 is the state right before B. Clearly alternatives 1 and 2
# conflict and no amount of further lookahead will separate the two.
# However, alternative 3 will be able to continue and so we do not stop
# working on this state. In the previous example, we're concerned with
# states associated with the conflicting alternatives. Here alt 3 is not
# associated with the conflicting configs, but since we can continue
# looking for input reasonably, don't declare the state done. PURE SLL PARSING To handle pure SLL parsing, all we have to do is make sure that we
# combine stack contexts for configurations that differ only by semantic
# predicate. From there, we can do the usual SLL termination heuristic. PREDICATES IN SLL+LL PARSING SLL decisions don't evaluate predicates until after they reach DFA stop
# states because they need to create the DFA cache that works in all
# semantic situations. In contrast, full LL evaluates predicates collected
# during start state computation so it can ignore predicates thereafter.
# This means that SLL termination detection can totally ignore semantic
# predicates. Implementation-wise, {@link ATNConfigSet} combines stack contexts but not
# semantic predicate contexts so we might see two configurations like the
# following. {@code (s, 1, x, {}), (s, 1, x', {p})} Before testing these configurations against others, we have to merge
# {@code x} and {@code x'} (without modifying the existing configurations).
# For example, we test {@code (x+x')==x''} when looking for conflicts in
# the following configurations. {@code (s, 1, x, {}), (s, 1, x', {p}), (s, 2, x'', {})} If the configuration set has predicates (as indicated by
# {@link ATNConfigSet#hasSemanticContext}), this algorithm makes a copy of
# the configurations to strip out all of the predicates so that a standard
# {@link ATNConfigSet} will merge everything ignoring predicates. Can we stop looking ahead during ATN simulation or is there some
# uncertainty as to which alternative we will ultimately pick, after
# consuming more input? Even if there are partial conflicts, we might know
# that everything is going to resolve to the same minimum alternative. That
# means we can stop since no more lookahead will change that fact. On the
# other hand, there might be multiple conflicts that resolve to different
# minimums. That means we need more look ahead to decide which of those
# alternatives we should predict. The basic idea is to split the set of configurations {@code C}, into
# conflicting subsets {@code (s, _, ctx, _)} and singleton subsets with
# non-conflicting configurations. Two configurations conflict if they have
# identical {@link ATNConfig#state} and {@link ATNConfig#context} values
# but different {@link ATNConfig#alt} value, e.g. {@code (s, i, ctx, _)}
# and {@code (s, j, ctx, _)} for {@code i!=j}. Reduce these configuration subsets to the set of possible alternatives.
# You can compute the alternative subsets in one pass as follows: {@code A_s,ctx = {i | (s, i, ctx, _)}} for each configuration in
# {@code C} holding {@code s} and {@code ctx} fixed. Or in pseudo-code, for each configuration {@code c} in {@code C}: The values in {@code map} are the set of {@code A_s,ctx} sets. If {@code |A_s,ctx|=1} then there is no conflict associated with
# {@code s} and {@code ctx}. Reduce the subsets to singletons by choosing a minimum of each subset. If
# the union of these alternative subsets is a singleton, then no amount of
# more lookahead will help us. We will always pick that alternative. If,
# however, there is more than one alternative, then we are uncertain which
# alternative to predict and must continue looking for resolution. We may
# or may not discover an ambiguity in the future, even if there are no
# conflicting subsets this round. The biggest sin is to terminate early because it means we've made a
# decision but were uncertain as to the eventual outcome. We haven't used
# enough lookahead. On the other hand, announcing a conflict too late is no
# big deal; you will still have the conflict. It's just inefficient. It
# might even look until the end of file. No special consideration for semantic predicates is required because
# predicates are evaluated on-the-fly for full LL prediction, ensuring that
# no configuration contains a semantic context during the termination
# check. CONFLICTING CONFIGS Two configurations {@code (s, i, x)} and {@code (s, j, x')}, conflict
# when {@code i!=j} but {@code x=x'}. Because we merge all
# {@code (s, i, _)} configurations together, that means that there are at
# most {@code n} configurations associated with state {@code s} for
# {@code n} possible alternatives in the decision. The merged stacks
# complicate the comparison of configuration contexts {@code x} and
# {@code x'}. Sam checks to see if one is a subset of the other by calling
# merge and checking to see if the merged result is either {@code x} or
# {@code x'}. If the {@code x} associated with lowest alternative {@code i}
# is the superset, then {@code i} is the only possible prediction since the
# others resolve to {@code min(i)} as well. However, if {@code x} is
# associated with {@code j>i} then at least one stack configuration for
# {@code j} is not in conflict with alternative {@code i}. The algorithm
# should keep going, looking for more lookahead due to the uncertainty. For simplicity, I'm doing a equality check between {@code x} and
# {@code x'} that lets the algorithm continue to consume lookahead longer
# than necessary. The reason I like the equality is of course the
# simplicity but also because that is the test you need to detect the
# alternatives that are actually in conflict. CONTINUE/STOP RULE Continue if union of resolved alternative sets from non-conflicting and
# conflicting alternative subsets has more than one alternative. We are
# uncertain about which alternative to predict. The complete set of alternatives, {@code [i for (_,i,_)]}, tells us which
# alternatives are still in the running for the amount of input we've
# consumed at this point. The conflicting sets let us to strip away
# configurations that won't lead to more states because we resolve
# conflicts to the configuration with a minimum alternate for the
# conflicting set. CASES EXACT AMBIGUITY DETECTION If all states report the same conflicting set of alternatives, then we
# know we have the exact ambiguity set. In other words, we continue examining lookahead until all {@code A_i}
# have more than one alternative and all {@code A_i} are the same. If
# {@code A={{1,2}, {1,3}}}, then regular LL prediction would terminate
# because the resolved set is {@code {1}}. To determine what the real
# ambiguity is, we have to know whether the ambiguity is between one and
# two or one and three so we keep going. We can only stop prediction when
# we need exact ambiguity detection when the sets look like
# {@code A={{1,2}}} or {@code {{1,2},{1,2}}}, etc... I have scoped the {@link AND}, {@link OR}, and {@link Predicate} subclasses of
# {@link SemanticContext} within the scope of this outer class. For context dependent predicates, we must pass in a local context so that
# references such as $arg evaluate properly as _localctx.arg. We only
# capture context dependent predicates in the context in which we begin
# prediction, so we passed in the outer context here in case of context
# dependent predicate evaluation.
# The evaluation of predicates by this context is short-circuiting, but
# unordered.
# The evaluation of predicates by this context is short-circuiting, but
# unordered. This is a one way link. It emanates from a state (usually via a list of
# transitions) and has a target state. Since we never have to change the ATN transitions once we construct it,
# we can fix these transitions as specific classes. The DFA transitions
# on the other hand need to update the labels as it adds transitions to
# the states. We'll use the term Edge for the DFA to distinguish them from
# ATN transitions. I use a set of ATNConfig objects not simple states. An ATNConfig
# is both a state (ala normal conversion) and a RuleContext describing
# the chain of rules (if any) followed to arrive at that state. A DFA state may have multiple references to a particular state,
# but with different ATN contexts (with same or different alts)
# meaning that state was reached via a different set of rule invocations. We only use these for non-{@link #requiresFullContext} but conflicting states. That
# means we know from the context (it's $ or we don't dip into outer
# context) that it's an ambiguity not a conflict. This list is computed by {@link ParserATNSimulator#predicateDFAState}. Because the number of alternatives and number of ATN configurations are
# finite, there is a finite number of DFA states that can be processed.
# This is necessary to show that the algorithm terminates. Cannot test the DFA state numbers here because in
# {@link ParserATNSimulator#addDFAState} we need to know if any other state
# exists that has this exact set of ATN configurations. The
# {@link #stateNumber} is irrelevant.
# This implementation prints messages to {@link System#err} containing the
# values of {@code line}, {@code charPositionInLine}, and {@code msg} using
# the following format. The default implementation simply calls {@link #endErrorCondition} to
# ensure that the handler is not in error recovery mode. The default implementation simply calls {@link #endErrorCondition}. The default implementation returns immediately if the handler is already
# in error recovery mode. Otherwise, it calls {@link #beginErrorCondition}
# and dispatches the reporting task based on the runtime type of {@code e}
# according to the following table. The default implementation resynchronizes the parser by consuming tokens
# until we find one in the resynchronization set--loosely the set of tokens
# that can follow the current rule. Implements Jim Idle's magic sync mechanism in closures and optional
# subrules. E.g., If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block
# with an empty alternative), then the expected set includes what follows
# the subrule. During loop iteration, it consumes until it sees a token that can start a
# sub rule or what follows loop. Yes, that is pretty aggressive. We opt to
# stay in the loop as long as possible. ORIGINS Previous versions of ANTLR did a poor job of their recovery within loops.
# A single mismatch token or missing token would force the parser to bail
# out of the entire rules surrounding the loop. So, for rule This functionality cost a little bit of effort because the parser has to
# compare token set at the start of the loop and at each iteration. If for
# some reason speed is suffering for you, you can turn off this
# functionality by simply overriding this method as a blank { }. This method is called when {@link #singleTokenDeletion} identifies
# single-token deletion as a viable recovery strategy for a mismatched
# input error. The default implementation simply returns if the handler is already in
# error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to
# enter error recovery mode, followed by calling
# {@link Parser#notifyErrorListeners}. This method is called when {@link #singleTokenInsertion} identifies
# single-token insertion as a viable recovery strategy for a mismatched
# input error. The default implementation simply returns if the handler is already in
# error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to
# enter error recovery mode, followed by calling
# {@link Parser#notifyErrorListeners}. The default implementation attempts to recover from the mismatched input
# by using single token insertion and deletion as described below. If the
# recovery attempt fails, this method throws an
# {@link InputMismatchException}. EXTRA TOKEN (single token deletion) {@code LA(1)} is not what we are looking for. If {@code LA(2)} has the
# right token, however, then assume {@code LA(1)} is some extra spurious
# token and delete it. Then consume and return the next token (which was
# the {@code LA(2)} token) as the successful result of the match operation. This recovery strategy is implemented by {@link #singleTokenDeletion}. MISSING TOKEN (single token insertion) If current token (at {@code LA(1)}) is consistent with what could come
# after the expected {@code LA(1)} token, then assume the token is missing
# and use the parser's {@link TokenFactory} to create it on the fly. The
# "insertion" is performed by returning the created token as the successful
# result of the match operation. This recovery strategy is implemented by {@link #singleTokenInsertion}. EXAMPLE For example, Input {@code i=(3;} is clearly missing the {@code ')'}. When
# the parser returns from the nested call to {@code expr}, it will have
# call chain: This method determines whether or not single-token insertion is viable by
# checking if the {@code LA(1)} input symbol could be successfully matched
# if it were instead the {@code LA(2)} symbol. If this method returns
# {@code true}, the caller is responsible for creating and inserting a
# token with the correct type to produce this behavior. If the single-token deletion is successful, this method calls
# {@link #reportUnwantedToken} to report the error, followed by
# {@link Parser#consume} to actually "delete" the extraneous token. Then,
# before returning {@link #reportMatch} is called to signal a successful
# match.
# This error strategy is useful in the following scenarios.
# {@code myparser.setErrorHandler(new BailErrorStrategy());} If the state number is not known, this method returns -1. If the set of expected tokens is not known and could not be computed,
# this method returns {@code null}. For example, for pattern {@code Pattern tags like {@code If the {@code label} is the name of a parser rule or token in the
# grammar, the resulting list will contain both the parse trees matching
# rule or tags explicitly labeled with the label and the complete set of
# parse trees matching the labeled and unlabeled tags in the pattern for
# the parser rule or token. For example, if {@code label} is {@code "foo"},
# the result will contain all of the following. Patterns are strings of source input text with special tags representing
# token or rule references such as: {@code Given a pattern start rule such as {@code statement}, this object constructs
# a {@link ParseTree} with placeholders for the {@code ID} and {@code expr}
# subtree. Then the {@link #match} routines can compare an actual
# {@link ParseTree} from a parse with this pattern. Tag {@code Pattern {@code x = 0;} is a similar pattern that matches the same pattern
# except that it requires the identifier to be {@code x} and the expression to
# be {@code 0}. The {@link #matches} routines return {@code true} or {@code false} based
# upon a match for the tree rooted at the parameter sent in. The
# {@link #match} routines return a {@link ParseTreeMatch} object that
# contains the parse tree, the parse tree pattern, and a map from tag name to
# matched nodes (more below). A subtree that fails to match, returns with
# {@link ParseTreeMatch#mismatchedNode} set to the first tree node that did not
# match. For efficiency, you can compile a tree pattern in string form to a
# {@link ParseTreePattern} object. See {@code TestParseTreeMatcher} for lots of examples.
# {@link ParseTreePattern} has two static helper methods:
# {@link ParseTreePattern#findAll} and {@link ParseTreePattern#match} that
# are easy to use but not super efficient because they create new
# {@link ParseTreePatternMatcher} objects each time and have to compile the
# pattern in string form before using it. The lexer and parser that you pass into the {@link ParseTreePatternMatcher}
# constructor are used to parse the pattern in string form. The lexer converts
# the {@code Normally a parser does not accept token {@code Delimiters are {@code <} and {@code >}, with {@code \} as the escape string
# by default, but you can set them to whatever you want using
# {@link #setDelimiters}. You must escape both start and stop strings
# {@code \<} and {@code \>}. The implementation for {@link TokenTagToken} returns the token tag
# formatted with {@code <} and {@code >} delimiters. The implementation for {@link TokenTagToken} returns a string of the form
# {@code tokenName:type}.
# Split path into words and separators {@code /} and {@code //} via ANTLR
# itself then walk path elements from left to right. At each separator-word
# pair, find set of nodes. Next stage uses those as work list.
# The basic interface is
# {@link XPath#findAll ParseTree.findAll}{@code (tree, pathString, parser)}.
# But that is just shorthand for:
# See {@code org.antlr.v4.test.TestXPath} for descriptions. In short, this
# allows operators:
# and path elements:
# Whitespace is not allowed.
#
#
#
# Basic Blocks
#
# Rule
#
#
#
# Block of 1 or more alternatives
#
#
#
# Greedy Loops
#
# Greedy Closure: {@code (...)*}
#
#
#
# Greedy Positive Closure: {@code (...)+}
#
#
#
# Greedy Optional: {@code (...)?}
#
#
#
# Non-Greedy Loops
#
# Non-Greedy Closure: {@code (...)*?}
#
#
#
# Non-Greedy Positive Closure: {@code (...)+?}
#
#
#
# Non-Greedy Optional: {@code (...)??}
#
#
#
from antlr4.atn.Transition import Transition
INITIAL_NUM_TRANSITIONS = 4
class ATNState(object):
__slots__ = (
'atn', 'stateNumber', 'stateType', 'ruleIndex', 'epsilonOnlyTransitions',
'transitions', 'nextTokenWithinRule',
)
# constants for serialization
INVALID_TYPE = 0
BASIC = 1
RULE_START = 2
BLOCK_START = 3
PLUS_BLOCK_START = 4
STAR_BLOCK_START = 5
TOKEN_START = 6
RULE_STOP = 7
BLOCK_END = 8
STAR_LOOP_BACK = 9
STAR_LOOP_ENTRY = 10
PLUS_LOOP_BACK = 11
LOOP_END = 12
serializationNames = [
"INVALID",
"BASIC",
"RULE_START",
"BLOCK_START",
"PLUS_BLOCK_START",
"STAR_BLOCK_START",
"TOKEN_START",
"RULE_STOP",
"BLOCK_END",
"STAR_LOOP_BACK",
"STAR_LOOP_ENTRY",
"PLUS_LOOP_BACK",
"LOOP_END" ]
INVALID_STATE_NUMBER = -1
def __init__(self):
# Which ATN are we in?
self.atn = None
self.stateNumber = ATNState.INVALID_STATE_NUMBER
self.stateType = None
self.ruleIndex = 0 # at runtime, we don't have Rule objects
self.epsilonOnlyTransitions = False
# Track the transitions emanating from this ATN state.
self.transitions = []
# Used to cache lookahead during parsing, not used during construction
self.nextTokenWithinRule = None
def __hash__(self):
return self.stateNumber
def __eq__(self, other):
return isinstance(other, ATNState) and self.stateNumber==other.stateNumber
def onlyHasEpsilonTransitions(self):
return self.epsilonOnlyTransitions
def isNonGreedyExitState(self):
return False
def __str__(self):
return str(self.stateNumber)
def addTransition(self, trans:Transition, index:int=-1):
if len(self.transitions)==0:
self.epsilonOnlyTransitions = trans.isEpsilon
elif self.epsilonOnlyTransitions != trans.isEpsilon:
self.epsilonOnlyTransitions = False
# TODO System.err.format(Locale.getDefault(), "ATN state %d has both epsilon and non-epsilon transitions.\n", stateNumber);
if index==-1:
self.transitions.append(trans)
else:
self.transitions.insert(index, trans)
class BasicState(ATNState):
def __init__(self):
super().__init__()
self.stateType = self.BASIC
class DecisionState(ATNState):
__slots__ = ('decision', 'nonGreedy')
def __init__(self):
super().__init__()
self.decision = -1
self.nonGreedy = False
# The start of a regular {@code (...)} block.
class BlockStartState(DecisionState):
__slots__ = 'endState'
def __init__(self):
super().__init__()
self.endState = None
class BasicBlockStartState(BlockStartState):
def __init__(self):
super().__init__()
self.stateType = self.BLOCK_START
# Terminal node of a simple {@code (a|b|c)} block.
class BlockEndState(ATNState):
__slots__ = 'startState'
def __init__(self):
super().__init__()
self.stateType = self.BLOCK_END
self.startState = None
# The last node in the ATN for a rule, unless that rule is the start symbol.
# In that case, there is one transition to EOF. Later, we might encode
# references to all calls to this rule to compute FOLLOW sets for
# error handling.
#
class RuleStopState(ATNState):
def __init__(self):
super().__init__()
self.stateType = self.RULE_STOP
class RuleStartState(ATNState):
__slots__ = ('stopState', 'isPrecedenceRule')
def __init__(self):
super().__init__()
self.stateType = self.RULE_START
self.stopState = None
self.isPrecedenceRule = False
# Decision state for {@code A+} and {@code (A|B)+}. It has two transitions:
# one to the loop back to start of the block and one to exit.
#
class PlusLoopbackState(DecisionState):
def __init__(self):
super().__init__()
self.stateType = self.PLUS_LOOP_BACK
# Start of {@code (A|B|...)+} loop. Technically a decision state, but
# we don't use for code generation; somebody might need it, so I'm defining
# it for completeness. In reality, the {@link PlusLoopbackState} node is the
# real decision-making note for {@code A+}.
#
class PlusBlockStartState(BlockStartState):
__slots__ = 'loopBackState'
def __init__(self):
super().__init__()
self.stateType = self.PLUS_BLOCK_START
self.loopBackState = None
# The block that begins a closure loop.
class StarBlockStartState(BlockStartState):
def __init__(self):
super().__init__()
self.stateType = self.STAR_BLOCK_START
class StarLoopbackState(ATNState):
def __init__(self):
super().__init__()
self.stateType = self.STAR_LOOP_BACK
class StarLoopEntryState(DecisionState):
__slots__ = ('loopBackState', 'isPrecedenceDecision')
def __init__(self):
super().__init__()
self.stateType = self.STAR_LOOP_ENTRY
self.loopBackState = None
# Indicates whether this state can benefit from a precedence DFA during SLL decision making.
self.isPrecedenceDecision = None
# Mark the end of a * or + loop.
class LoopEndState(ATNState):
__slots__ = 'loopBackState'
def __init__(self):
super().__init__()
self.stateType = self.LOOP_END
self.loopBackState = None
# The Tokens rule start state linking to each lexer rule start state */
class TokensStartState(DecisionState):
def __init__(self):
super().__init__()
self.stateType = self.TOKEN_START
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 011453 x ustar 00 0000000 0000000 22 mtime=1606416672.0
antlr4-python3-runtime-4.9.2/src/antlr4/atn/ATNType.py 0000644 0000766 0000000 00000000646 00000000000 022706 0 ustar 00parrt wheel 0000000 0000000 # Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
# Use of this file is governed by the BSD 3-clause license that
# can be found in the LICENSE.txt file in the project root.
#/
from enum import IntEnum
# Represents the type of recognizer an ATN applies to.
class ATNType(IntEnum):
LEXER = 0
PARSER = 1
@classmethod
def fromOrdinal(cls, i:int):
return cls._value2member_map_[i]
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 011453 x ustar 00 0000000 0000000 22 mtime=1609695176.0
antlr4-python3-runtime-4.9.2/src/antlr4/atn/LexerATNSimulator.py 0000644 0000766 0000000 00000061571 00000000000 024750 0 ustar 00parrt wheel 0000000 0000000 #
# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
# Use of this file is governed by the BSD 3-clause license that
# can be found in the LICENSE.txt file in the project root.
#/
# When we hit an accept state in either the DFA or the ATN, we
# have to notify the character stream to start buffering characters
# via {@link IntStream#mark} and record the current state. The current sim state
# includes the current index into the input, the current line,
# and current character position in that line. Note that the Lexer is
# tracking the starting line and characterization of the token. These
# variables track the "state" of the simulator when it hits an accept state.
#
# {...}
# syntax in ANTLR 4, as well as actions created for lexer commands where the
# command argument could not be evaluated when the grammar was compiled.
# ctorBody
# : '{' superCall? stat* '}'
# ;
#
#
#
# stat
# : superCall ';'
# | expression ';'
# | ...
# ;
#
#
#
# parser.{@link Parser#getInterpreter() getInterpreter()}.{@link #setPredictionMode setPredictionMode}{@code (}{@link PredictionMode#SLL}{@code )};
# parser.{@link Parser#setErrorHandler setErrorHandler}(new {@link BailErrorStrategy}());
#
#
#
#
#
#
#
#
#
#
# grammar TA;
# prog: statement* EOF;
# statement: letterA | statement letterA 'b' ;
# letterA: 'a';
#
#
#
#
#
#
#
#
# map[c] U= c.{@link ATNConfig#alt alt} # map hash/equals uses s and x, not
# alt and not pred
#
#
#
#
#
#
# |A_i|>1 and
# A_i = A_j for all i, j.
# map[c] U= c.{@link ATNConfig#alt alt} # map hash/equals uses s and x, not
# alt and not pred
#
#
@classmethod
def getConflictingAltSubsets(cls, configs:ATNConfigSet):
configToAlts = dict()
for c in configs:
h = hash((c.state.stateNumber, c.context))
alts = configToAlts.get(h, None)
if alts is None:
alts = set()
configToAlts[h] = alts
alts.add(c.alt)
return configToAlts.values()
#
# Get a map from state to alt subset from a configuration set. For each
# configuration {@code c} in {@code configs}:
#
#
# map[c.{@link ATNConfig#state state}] U= c.{@link ATNConfig#alt alt}
#
#
@classmethod
def getStateToAltMap(cls, configs:ATNConfigSet):
m = dict()
for c in configs:
alts = m.get(c.state, None)
if alts is None:
alts = set()
m[c.state] = alts
alts.add(c.alt)
return m
@classmethod
def hasStateAssociatedWithOneAlt(cls, configs:ATNConfigSet):
return any(len(alts) == 1 for alts in cls.getStateToAltMap(configs).values())
@classmethod
def getSingleViableAlt(cls, altsets:list):
viableAlts = set()
for alts in altsets:
minAlt = min(alts)
viableAlts.add(minAlt)
if len(viableAlts)>1 : # more than 1 viable alt
return ATN.INVALID_ALT_NUMBER
return min(viableAlts)
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 011453 x ustar 00 0000000 0000000 22 mtime=1609695176.0
antlr4-python3-runtime-4.9.2/src/antlr4/atn/SemanticContext.py 0000644 0000766 0000000 00000024377 00000000000 024541 0 ustar 00parrt wheel 0000000 0000000 #
# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
# Use of this file is governed by the BSD 3-clause license that
# can be found in the LICENSE.txt file in the project root.
#
# A tree structure used to record the semantic context in which
# an ATN configuration is valid. It's either a single predicate,
# a conjunction {@code p1&&p2}, or a sum of products {@code p1||p2}.
#
#
#
#
def evalPrecedence(self, parser:Recognizer, outerContext:RuleContext):
return self
# need forward declaration
AND = None
def andContext(a:SemanticContext, b:SemanticContext):
if a is None or a is SemanticContext.NONE:
return b
if b is None or b is SemanticContext.NONE:
return a
result = AND(a, b)
if len(result.opnds) == 1:
return result.opnds[0]
else:
return result
# need forward declaration
OR = None
def orContext(a:SemanticContext, b:SemanticContext):
if a is None:
return b
if b is None:
return a
if a is SemanticContext.NONE or b is SemanticContext.NONE:
return SemanticContext.NONE
result = OR(a, b)
if len(result.opnds) == 1:
return result.opnds[0]
else:
return result
def filterPrecedencePredicates(collection:set):
return [context for context in collection if isinstance(context, PrecedencePredicate)]
class Predicate(SemanticContext):
__slots__ = ('ruleIndex', 'predIndex', 'isCtxDependent')
def __init__(self, ruleIndex:int=-1, predIndex:int=-1, isCtxDependent:bool=False):
self.ruleIndex = ruleIndex
self.predIndex = predIndex
self.isCtxDependent = isCtxDependent # e.g., $i ref in pred
def eval(self, parser:Recognizer , outerContext:RuleContext ):
localctx = outerContext if self.isCtxDependent else None
return parser.sempred(localctx, self.ruleIndex, self.predIndex)
def __hash__(self):
return hash((self.ruleIndex, self.predIndex, self.isCtxDependent))
def __eq__(self, other):
if self is other:
return True
elif not isinstance(other, Predicate):
return False
return self.ruleIndex == other.ruleIndex and \
self.predIndex == other.predIndex and \
self.isCtxDependent == other.isCtxDependent
def __str__(self):
return "{" + str(self.ruleIndex) + ":" + str(self.predIndex) + "}?"
class PrecedencePredicate(SemanticContext):
def __init__(self, precedence:int=0):
self.precedence = precedence
def eval(self, parser:Recognizer , outerContext:RuleContext ):
return parser.precpred(outerContext, self.precedence)
def evalPrecedence(self, parser:Recognizer, outerContext:RuleContext):
if parser.precpred(outerContext, self.precedence):
return SemanticContext.NONE
else:
return None
def __lt__(self, other):
return self.precedence < other.precedence
def __hash__(self):
return 31
def __eq__(self, other):
if self is other:
return True
elif not isinstance(other, PrecedencePredicate):
return False
else:
return self.precedence == other.precedence
# A semantic context which is true whenever none of the contained contexts
# is false.
del AND
class AND(SemanticContext):
__slots__ = 'opnds'
def __init__(self, a:SemanticContext, b:SemanticContext):
operands = set()
if isinstance( a, AND ):
operands.update(a.opnds)
else:
operands.add(a)
if isinstance( b, AND ):
operands.update(b.opnds)
else:
operands.add(b)
precedencePredicates = filterPrecedencePredicates(operands)
if len(precedencePredicates)>0:
# interested in the transition with the lowest precedence
reduced = min(precedencePredicates)
operands.add(reduced)
self.opnds = list(operands)
def __eq__(self, other):
if self is other:
return True
elif not isinstance(other, AND):
return False
else:
return self.opnds == other.opnds
def __hash__(self):
h = 0
for o in self.opnds:
h = hash((h, o))
return hash((h, "AND"))
#
# {@inheritDoc}
#
#
#
#
# @param precedenceDfa {@code true} if this is a precedence DFA; otherwise,
# {@code false}
def setPrecedenceDfa(self, precedenceDfa:bool):
if self.precedenceDfa != precedenceDfa:
self._states = dict()
if precedenceDfa:
precedenceState = DFAState(configs=ATNConfigSet())
precedenceState.edges = []
precedenceState.isAcceptState = False
precedenceState.requiresFullContext = False
self.s0 = precedenceState
else:
self.s0 = None
self.precedenceDfa = precedenceDfa
@property
def states(self):
return self._states
# Return a list of all states in this DFA, ordered by state number.
def sortedStates(self):
return sorted(self._states.keys(), key=lambda state: state.stateNumber)
def __str__(self):
return self.toString(None)
def toString(self, literalNames:list=None, symbolicNames:list=None):
if self.s0 is None:
return ""
from antlr4.dfa.DFASerializer import DFASerializer
serializer = DFASerializer(self,literalNames,symbolicNames)
return str(serializer)
def toLexerString(self):
if self.s0 is None:
return ""
from antlr4.dfa.DFASerializer import LexerDFASerializer
serializer = LexerDFASerializer(self)
return str(serializer)
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 011453 x ustar 00 0000000 0000000 22 mtime=1609695176.0
antlr4-python3-runtime-4.9.2/src/antlr4/dfa/DFASerializer.py 0000644 0000766 0000000 00000004726 00000000000 024021 0 ustar 00parrt wheel 0000000 0000000 #
# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
# Use of this file is governed by the BSD 3-clause license that
# can be found in the LICENSE.txt file in the project root.
#/
# A DFA walker that knows how to dump them to serialized strings.#/
from io import StringIO
from antlr4 import DFA
from antlr4.Utils import str_list
from antlr4.dfa.DFAState import DFAState
class DFASerializer(object):
__slots__ = ('dfa', 'literalNames', 'symbolicNames')
def __init__(self, dfa:DFA, literalNames:list=None, symbolicNames:list=None):
self.dfa = dfa
self.literalNames = literalNames
self.symbolicNames = symbolicNames
def __str__(self):
if self.dfa.s0 is None:
return None
with StringIO() as buf:
for s in self.dfa.sortedStates():
n = 0
if s.edges is not None:
n = len(s.edges)
for i in range(0, n):
t = s.edges[i]
if t is not None and t.stateNumber != 0x7FFFFFFF:
buf.write(self.getStateString(s))
label = self.getEdgeLabel(i)
buf.write("-")
buf.write(label)
buf.write("->")
buf.write(self.getStateString(t))
buf.write('\n')
output = buf.getvalue()
if len(output)==0:
return None
else:
return output
def getEdgeLabel(self, i:int):
if i==0:
return "EOF"
if self.literalNames is not None and i<=len(self.literalNames):
return self.literalNames[i-1]
elif self.symbolicNames is not None and i<=len(self.symbolicNames):
return self.symbolicNames[i-1]
else:
return str(i-1)
def getStateString(self, s:DFAState):
n = s.stateNumber
baseStateStr = ( ":" if s.isAcceptState else "") + "s" + str(n) + ( "^" if s.requiresFullContext else "")
if s.isAcceptState:
if s.predicates is not None:
return baseStateStr + "=>" + str_list(s.predicates)
else:
return baseStateStr + "=>" + str(s.prediction)
else:
return baseStateStr
class LexerDFASerializer(DFASerializer):
def __init__(self, dfa:DFA):
super().__init__(dfa, None)
def getEdgeLabel(self, i:int):
return "'" + chr(i) + "'"
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 011453 x ustar 00 0000000 0000000 22 mtime=1609695176.0
antlr4-python3-runtime-4.9.2/src/antlr4/dfa/DFAState.py 0000644 0000766 0000000 00000012717 00000000000 022767 0 ustar 00parrt wheel 0000000 0000000 #
# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
# Use of this file is governed by the BSD 3-clause license that
# can be found in the LICENSE.txt file in the project root.
#/
# Map a predicate to a predicted alternative.#/
from io import StringIO
from antlr4.atn.ATNConfigSet import ATNConfigSet
from antlr4.atn.SemanticContext import SemanticContext
class PredPrediction(object):
__slots__ = ('alt', 'pred')
def __init__(self, pred:SemanticContext, alt:int):
self.alt = alt
self.pred = pred
def __str__(self):
return "(" + str(self.pred) + ", " + str(self.alt) + ")"
# A DFA state represents a set of possible ATN configurations.
# As Aho, Sethi, Ullman p. 117 says "The DFA uses its state
# to keep track of all possible states the ATN can be in after
# reading each input symbol. That is to say, after reading
# input a1a2..an, the DFA is in a state that represents the
# subset T of the states of the ATN that are reachable from the
# ATN's start state along some path labeled a1a2..an."
# In conventional NFA→DFA conversion, therefore, the subset T
# would be a bitset representing the set of states the
# ATN could be in. We need to track the alt predicted by each
# state as well, however. More importantly, we need to maintain
# a stack of states, tracking the closure operations as they
# jump from rule to rule, emulating rule invocations (method calls).
# I have to add a stack to simulate the proper lookahead sequences for
# the underlying LL grammar from which the ATN was derived.
#
#
#
from io import StringIO
from antlr4 import Parser, DFA
from antlr4.atn.ATNConfigSet import ATNConfigSet
from antlr4.error.ErrorListener import ErrorListener
class DiagnosticErrorListener(ErrorListener):
def __init__(self, exactOnly:bool=True):
# whether all ambiguities or only exact ambiguities are reported.
self.exactOnly = exactOnly
def reportAmbiguity(self, recognizer:Parser, dfa:DFA, startIndex:int,
stopIndex:int, exact:bool, ambigAlts:set, configs:ATNConfigSet):
if self.exactOnly and not exact:
return
with StringIO() as buf:
buf.write("reportAmbiguity d=")
buf.write(self.getDecisionDescription(recognizer, dfa))
buf.write(": ambigAlts=")
buf.write(str(self.getConflictingAlts(ambigAlts, configs)))
buf.write(", input='")
buf.write(recognizer.getTokenStream().getText(startIndex, stopIndex))
buf.write("'")
recognizer.notifyErrorListeners(buf.getvalue())
def reportAttemptingFullContext(self, recognizer:Parser, dfa:DFA, startIndex:int,
stopIndex:int, conflictingAlts:set, configs:ATNConfigSet):
with StringIO() as buf:
buf.write("reportAttemptingFullContext d=")
buf.write(self.getDecisionDescription(recognizer, dfa))
buf.write(", input='")
buf.write(recognizer.getTokenStream().getText(startIndex, stopIndex))
buf.write("'")
recognizer.notifyErrorListeners(buf.getvalue())
def reportContextSensitivity(self, recognizer:Parser, dfa:DFA, startIndex:int,
stopIndex:int, prediction:int, configs:ATNConfigSet):
with StringIO() as buf:
buf.write("reportContextSensitivity d=")
buf.write(self.getDecisionDescription(recognizer, dfa))
buf.write(", input='")
buf.write(recognizer.getTokenStream().getText(startIndex, stopIndex))
buf.write("'")
recognizer.notifyErrorListeners(buf.getvalue())
def getDecisionDescription(self, recognizer:Parser, dfa:DFA):
decision = dfa.decision
ruleIndex = dfa.atnStartState.ruleIndex
ruleNames = recognizer.ruleNames
if ruleIndex < 0 or ruleIndex >= len(ruleNames):
return str(decision)
ruleName = ruleNames[ruleIndex]
if ruleName is None or len(ruleName)==0:
return str(decision)
return str(decision) + " (" + ruleName + ")"
#
# Computes the set of conflicting or ambiguous alternatives from a
# configuration set, if that information was not already provided by the
# parser.
#
# @param reportedAlts The set of conflicting or ambiguous alternatives, as
# reported by the parser.
# @param configs The conflicting or ambiguous configuration set.
# @return Returns {@code reportedAlts} if it is not {@code null}, otherwise
# returns the set of alternatives represented in {@code configs}.
#
def getConflictingAlts(self, reportedAlts:set, configs:ATNConfigSet):
if reportedAlts is not None:
return reportedAlts
result = set()
for config in configs:
result.add(config.alt)
return result
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 011453 x ustar 00 0000000 0000000 22 mtime=1606416672.0
antlr4-python3-runtime-4.9.2/src/antlr4/error/ErrorListener.py 0000644 0000766 0000000 00000005242 00000000000 024565 0 ustar 00parrt wheel 0000000 0000000 #
# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
# Use of this file is governed by the BSD 3-clause license that
# can be found in the LICENSE.txt file in the project root.
# Provides an empty default implementation of {@link ANTLRErrorListener}. The
# default implementation of each method does nothing, but can be overridden as
# necessary.
import sys
class ErrorListener(object):
def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e):
pass
def reportAmbiguity(self, recognizer, dfa, startIndex, stopIndex, exact, ambigAlts, configs):
pass
def reportAttemptingFullContext(self, recognizer, dfa, startIndex, stopIndex, conflictingAlts, configs):
pass
def reportContextSensitivity(self, recognizer, dfa, startIndex, stopIndex, prediction, configs):
pass
class ConsoleErrorListener(ErrorListener):
#
# Provides a default instance of {@link ConsoleErrorListener}.
#
INSTANCE = None
#
# {@inheritDoc}
#
#
# line line:charPositionInLine msg
#
#
def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e):
print("line " + str(line) + ":" + str(column) + " " + msg, file=sys.stderr)
ConsoleErrorListener.INSTANCE = ConsoleErrorListener()
class ProxyErrorListener(ErrorListener):
def __init__(self, delegates):
super().__init__()
if delegates is None:
raise ReferenceError("delegates")
self.delegates = delegates
def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e):
for delegate in self.delegates:
delegate.syntaxError(recognizer, offendingSymbol, line, column, msg, e)
def reportAmbiguity(self, recognizer, dfa, startIndex, stopIndex, exact, ambigAlts, configs):
for delegate in self.delegates:
delegate.reportAmbiguity(recognizer, dfa, startIndex, stopIndex, exact, ambigAlts, configs)
def reportAttemptingFullContext(self, recognizer, dfa, startIndex, stopIndex, conflictingAlts, configs):
for delegate in self.delegates:
delegate.reportAttemptingFullContext(recognizer, dfa, startIndex, stopIndex, conflictingAlts, configs)
def reportContextSensitivity(self, recognizer, dfa, startIndex, stopIndex, prediction, configs):
for delegate in self.delegates:
delegate.reportContextSensitivity(recognizer, dfa, startIndex, stopIndex, prediction, configs)
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 011453 x ustar 00 0000000 0000000 22 mtime=1613495410.0
antlr4-python3-runtime-4.9.2/src/antlr4/error/ErrorStrategy.py 0000644 0000766 0000000 00000073267 00000000000 024616 0 ustar 00parrt wheel 0000000 0000000 #
# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
# Use of this file is governed by the BSD 3-clause license that
# can be found in the LICENSE.txt file in the project root.
#
import sys
from antlr4.IntervalSet import IntervalSet
from antlr4.Token import Token
from antlr4.atn.ATNState import ATNState
from antlr4.error.Errors import RecognitionException, NoViableAltException, InputMismatchException, \
FailedPredicateException, ParseCancellationException
# need forward declaration
Parser = None
class ErrorStrategy(object):
def reset(self, recognizer:Parser):
pass
def recoverInline(self, recognizer:Parser):
pass
def recover(self, recognizer:Parser, e:RecognitionException):
pass
def sync(self, recognizer:Parser):
pass
def inErrorRecoveryMode(self, recognizer:Parser):
pass
def reportError(self, recognizer:Parser, e:RecognitionException):
pass
# This is the default implementation of {@link ANTLRErrorStrategy} used for
# error reporting and recovery in ANTLR parsers.
#
class DefaultErrorStrategy(ErrorStrategy):
def __init__(self):
super().__init__()
# Indicates whether the error strategy is currently "recovering from an
# error". This is used to suppress reporting multiple error messages while
# attempting to recover from a detected syntax error.
#
# @see #inErrorRecoveryMode
#
self.errorRecoveryMode = False
# The index into the input stream where the last error occurred.
# This is used to prevent infinite loops where an error is found
# but no token is consumed during recovery...another error is found,
# ad nauseum. This is a failsafe mechanism to guarantee that at least
# one token/tree node is consumed for two errors.
#
self.lastErrorIndex = -1
self.lastErrorStates = None
self.nextTokensContext = None
self.nextTokenState = 0
#
#
#
def reportError(self, recognizer:Parser, e:RecognitionException):
# if we've already reported an error and have not matched a token
# yet successfully, don't report any errors.
if self.inErrorRecoveryMode(recognizer):
return # don't report spurious errors
self.beginErrorCondition(recognizer)
if isinstance( e, NoViableAltException ):
self.reportNoViableAlternative(recognizer, e)
elif isinstance( e, InputMismatchException ):
self.reportInputMismatch(recognizer, e)
elif isinstance( e, FailedPredicateException ):
self.reportFailedPredicate(recognizer, e)
else:
print("unknown recognition error type: " + type(e).__name__)
recognizer.notifyErrorListeners(e.message, e.offendingToken, e)
#
# {@inheritDoc}
#
#
# a : sync ( stuff sync )* ;
# sync : {consume to what can follow sync} ;
#
#
# At the start of a sub rule upon error, {@link #sync} performs single
# token deletion, if possible. If it can't do that, it bails on the current
# rule and uses the default error recovery, which consumes until the
# resynchronization set of the current rule.
#
#
# classDef : 'class' ID '{' member* '}'
#
#
# input with an extra token between members would force the parser to
# consume until it found the next class definition rather than the next
# member definition of the current class.
#
#
# stat → expr → atom
#
#
# and it will be trying to match the {@code ')'} at this point in the
# derivation:
#
#
# => ID '=' '(' INT ')' ('+' atom)* ';'
# ^
#
#
# The attempt to match {@code ')'} will fail when it sees {@code ';'} and
# call {@link #recoverInline}. To recover, it sees that {@code LA(1)==';'}
# is in the set of tokens that can follow the {@code ')'} token reference
# in rule {@code atom}. It can assume that you forgot the {@code ')'}.
#
def recoverInline(self, recognizer:Parser):
# SINGLE TOKEN DELETION
matchedSymbol = self.singleTokenDeletion(recognizer)
if matchedSymbol is not None:
# we have deleted the extra token.
# now, move past ttype token as if all were ok
recognizer.consume()
return matchedSymbol
# SINGLE TOKEN INSERTION
if self.singleTokenInsertion(recognizer):
return self.getMissingSymbol(recognizer)
# even that didn't work; must throw the exception
raise InputMismatchException(recognizer)
#
# This method implements the single-token insertion inline error recovery
# strategy. It is called by {@link #recoverInline} if the single-token
# deletion strategy fails to recover from the mismatched input. If this
# method returns {@code true}, {@code recognizer} will be in error recovery
# mode.
#
#
#
#
#
#
#
# @param label The label.
#
# @return A collection of all {@link ParseTree} nodes matching tags with
# the specified {@code label}. If no nodes matched the label, an empty list
# is returned.
#
def getAll(self, label:str):
nodes = self.labels.get(label, None)
if nodes is None:
return list()
else:
return nodes
#
# Gets a value indicating whether the match operation succeeded.
#
# @return {@code true} if the match operation succeeded; otherwise,
# {@code false}.
#
def succeeded(self):
return self.mismatchedNode is None
#
# {@inheritDoc}
#
def __str__(self):
with StringIO() as buf:
buf.write("Match ")
buf.write("succeeded" if self.succeeded() else "failed")
buf.write("; found ")
buf.write(str(len(self.labels)))
buf.write(" labels")
return buf.getvalue()
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 011453 x ustar 00 0000000 0000000 22 mtime=1609695176.0
antlr4-python3-runtime-4.9.2/src/antlr4/tree/ParseTreePattern.py 0000644 0000766 0000000 00000005411 00000000000 025022 0 ustar 00parrt wheel 0000000 0000000 #
# Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
# Use of this file is governed by the BSD 3-clause license that
# can be found in the LICENSE.txt file in the project root.
#
#
# A pattern like {@code
# {@link XPath} p = new {@link XPath#XPath XPath}(parser, pathString);
# return p.{@link #evaluate evaluate}(tree);
#
#
#
#
#
#
#
#
#