1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 | Lib/lib2to3/pgen2/driver.py
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. # Licensed to PSF under a Contributor Agreement. # Modifications: # Copyright 2006 Google, Inc. All Rights Reserved. # Licensed to PSF under a Contributor Agreement. """Parser driver. This provides a high-level interface to parse a file into a syntax tree. """ __author__ = "Guido van Rossum <guido@python.org>" __all__ = ["Driver", "load_grammar"] # Python imports import codecs import os import logging import StringIO import sys # Pgen imports from . import grammar, parse, token, tokenize, pgen class Driver(object): def __init__(self, grammar, convert=None, logger=None): self.grammar = grammar if logger is None: logger = logging.getLogger() self.logger = logger self.convert = convert def parse_tokens(self, tokens, debug=False): """Parse a series of tokens and return the syntax tree.""" # XXX Move the prefix computation into a wrapper around tokenize. p = parse.Parser(self.grammar, self.convert) p.setup() lineno = 1 column = 0 type = value = start = end = line_text = None prefix = u"" for quintuple in tokens: type, value, start, end, line_text = quintuple if start != (lineno, column): assert (lineno, column) <= start, ((lineno, column), start) s_lineno, s_column = start if lineno < s_lineno: prefix += "\n" * (s_lineno - lineno) lineno = s_lineno column = 0 if column < s_column: prefix += line_text[column:s_column] column = s_column if type in (tokenize.COMMENT, tokenize.NL): prefix += value lineno, column = end if value.endswith("\n"): lineno += 1 column = 0 continue if type == token.OP: type = grammar.opmap[value] if debug: self.logger.debug("%s %r (prefix=%r)", token.tok_name[type], value, prefix) if p.addtoken(type, value, (prefix, start)): if debug: self.logger.debug("Stop.") break prefix = "" lineno, column = end if value.endswith("\n"): lineno += 1 column = 0 else: # We never broke out -- EOF is too soon (how can this happen???) raise parse.ParseError("incomplete input", type, value, (prefix, start)) return p.rootnode def parse_stream_raw(self, stream, debug=False): """Parse a stream and return the syntax tree.""" tokens = tokenize.generate_tokens(stream.readline) return self.parse_tokens(tokens, debug) def parse_stream(self, stream, debug=False): """Parse a stream and return the syntax tree.""" return self.parse_stream_raw(stream, debug) def parse_file(self, filename, encoding=None, debug=False): """Parse a file and return the syntax tree.""" stream = codecs.open(filename, "r", encoding) try: return self.parse_stream(stream, debug) finally: stream.close() def parse_string(self, text, debug=False): """Parse a string and return the syntax tree.""" tokens = tokenize.generate_tokens(StringIO.StringIO(text).readline) return self.parse_tokens(tokens, debug) def load_grammar(gt="Grammar.txt", gp=None, save=True, force=False, logger=None): """Load the grammar (maybe from a pickle).""" if logger is None: logger = logging.getLogger() if gp is None: head, tail = os.path.splitext(gt) if tail == ".txt": tail = "" gp = head + tail + ".".join(map(str, sys.version_info)) + ".pickle" if force or not _newer(gp, gt): logger.info("Generating grammar tables from %s", gt) g = pgen.generate_grammar(gt) if save: logger.info("Writing grammar tables to %s", gp) try: g.dump(gp) except IOError, e: logger.info("Writing failed:"+str(e)) else: g = grammar.Grammar() g.load(gp) return g def _newer(a, b): """Inquire whether file a was written since file b.""" if not os.path.exists(a): return False if not os.path.exists(b): return True return os.path.getmtime(a) >= os.path.getmtime(b) def main(*args): """Main program, when run as a script: produce grammar pickle files. Calls load_grammar for each argument, a path to a grammar text file. """ if not args: args = sys.argv[1:] logging.basicConfig(level=logging.INFO, stream=sys.stdout, format='%(message)s') for gt in args: load_grammar(gt, save=True, force=True) return True if __name__ == "__main__": sys.exit(int(not main())) |