|
|
|
@ -3,8 +3,9 @@ import shutil |
|
|
|
import tokenize |
|
|
|
import sys |
|
|
|
import sysconfig |
|
|
|
import itertools |
|
|
|
|
|
|
|
from typing import Optional, Tuple |
|
|
|
from typing import Optional, Tuple, List, IO, Iterator, Set, Dict |
|
|
|
|
|
|
|
from pegen.c_generator import CParserGenerator |
|
|
|
from pegen.grammar import Grammar |
|
|
|
@ -17,12 +18,12 @@ from pegen.tokenizer import Tokenizer |
|
|
|
MOD_DIR = pathlib.Path(__file__).parent |
|
|
|
|
|
|
|
|
|
|
|
def get_extra_flags(compiler_flags, compiler_py_flags_nodist): |
|
|
|
def get_extra_flags(compiler_flags: str, compiler_py_flags_nodist: str) -> List[str]: |
|
|
|
flags = sysconfig.get_config_var(compiler_flags) |
|
|
|
py_flags_nodist = sysconfig.get_config_var(compiler_py_flags_nodist) |
|
|
|
if flags is None or py_flags_nodist is None: |
|
|
|
return [] |
|
|
|
return f'{flags} {py_flags_nodist}'.split() |
|
|
|
return f"{flags} {py_flags_nodist}".split() |
|
|
|
|
|
|
|
|
|
|
|
def compile_c_extension( |
|
|
|
@ -45,15 +46,15 @@ def compile_c_extension( |
|
|
|
from distutils.core import Distribution, Extension |
|
|
|
from distutils.command.clean import clean # type: ignore |
|
|
|
from distutils.command.build_ext import build_ext # type: ignore |
|
|
|
from distutils.tests.support import fixup_build_ext |
|
|
|
from distutils.tests.support import fixup_build_ext # type: ignore |
|
|
|
|
|
|
|
if verbose: |
|
|
|
distutils.log.set_verbosity(distutils.log.DEBUG) |
|
|
|
|
|
|
|
source_file_path = pathlib.Path(generated_source_path) |
|
|
|
extension_name = source_file_path.stem |
|
|
|
extra_compile_args = get_extra_flags('CFLAGS', 'PY_CFLAGS_NODIST') |
|
|
|
extra_link_args = get_extra_flags('LDFLAGS', 'PY_LDFLAGS_NODIST') |
|
|
|
extra_compile_args = get_extra_flags("CFLAGS", "PY_CFLAGS_NODIST") |
|
|
|
extra_link_args = get_extra_flags("LDFLAGS", "PY_LDFLAGS_NODIST") |
|
|
|
if keep_asserts: |
|
|
|
extra_compile_args.append("-UNDEBUG") |
|
|
|
extension = [ |
|
|
|
@ -111,39 +112,69 @@ def build_parser( |
|
|
|
return grammar, parser, tokenizer |
|
|
|
|
|
|
|
|
|
|
|
def build_generator( |
|
|
|
tokenizer: Tokenizer, |
|
|
|
def generate_token_definitions(tokens: IO[str]) -> Tuple[Dict[str, int], Set[str]]: |
|
|
|
exact_tokens = {} |
|
|
|
non_exact_tokens = set() |
|
|
|
numbers = itertools.count(0) |
|
|
|
|
|
|
|
for line in tokens: |
|
|
|
line = line.strip() |
|
|
|
|
|
|
|
if not line or line.startswith("#"): |
|
|
|
continue |
|
|
|
|
|
|
|
pieces = line.split() |
|
|
|
index = next(numbers) |
|
|
|
|
|
|
|
if len(pieces) == 1: |
|
|
|
(token,) = pieces |
|
|
|
non_exact_tokens.add(token) |
|
|
|
elif len(pieces) == 2: |
|
|
|
_, op = pieces |
|
|
|
exact_tokens[op.strip("'")] = index |
|
|
|
else: |
|
|
|
raise ValueError(f"Unexpected line found in Tokens file: {line}") |
|
|
|
|
|
|
|
return exact_tokens, non_exact_tokens |
|
|
|
|
|
|
|
|
|
|
|
def build_c_generator( |
|
|
|
grammar: Grammar, |
|
|
|
grammar_file: str, |
|
|
|
tokens_file: str, |
|
|
|
output_file: str, |
|
|
|
compile_extension: bool = False, |
|
|
|
verbose_c_extension: bool = False, |
|
|
|
keep_asserts_in_extension: bool = True, |
|
|
|
skip_actions: bool = False, |
|
|
|
) -> ParserGenerator: |
|
|
|
# TODO: Allow other extensions; pass the output type as an argument. |
|
|
|
if not output_file.endswith((".c", ".py")): |
|
|
|
raise RuntimeError("Your output file must either be a .c or .py file") |
|
|
|
with open(tokens_file, "r") as tok_file: |
|
|
|
exact_tok, non_exact_tok = generate_token_definitions(tok_file) |
|
|
|
with open(output_file, "w") as file: |
|
|
|
gen: ParserGenerator |
|
|
|
if output_file.endswith(".c"): |
|
|
|
gen = CParserGenerator(grammar, file, skip_actions=skip_actions) |
|
|
|
elif output_file.endswith(".py"): |
|
|
|
gen = PythonParserGenerator(grammar, file) # TODO: skip_actions |
|
|
|
else: |
|
|
|
assert False # Should have been checked above |
|
|
|
gen: ParserGenerator = CParserGenerator( |
|
|
|
grammar, exact_tok, non_exact_tok, file, skip_actions=skip_actions |
|
|
|
) |
|
|
|
gen.generate(grammar_file) |
|
|
|
|
|
|
|
if compile_extension and output_file.endswith(".c"): |
|
|
|
if compile_extension: |
|
|
|
compile_c_extension( |
|
|
|
output_file, verbose=verbose_c_extension, keep_asserts=keep_asserts_in_extension |
|
|
|
) |
|
|
|
return gen |
|
|
|
|
|
|
|
|
|
|
|
def build_python_generator( |
|
|
|
grammar: Grammar, grammar_file: str, output_file: str, skip_actions: bool = False, |
|
|
|
) -> ParserGenerator: |
|
|
|
with open(output_file, "w") as file: |
|
|
|
gen: ParserGenerator = PythonParserGenerator(grammar, file) # TODO: skip_actions |
|
|
|
gen.generate(grammar_file) |
|
|
|
return gen |
|
|
|
|
|
|
|
|
|
|
|
def build_parser_and_generator( |
|
|
|
def build_c_parser_and_generator( |
|
|
|
grammar_file: str, |
|
|
|
tokens_file: str, |
|
|
|
output_file: str, |
|
|
|
compile_extension: bool = False, |
|
|
|
verbose_tokenizer: bool = False, |
|
|
|
@ -152,10 +183,11 @@ def build_parser_and_generator( |
|
|
|
keep_asserts_in_extension: bool = True, |
|
|
|
skip_actions: bool = False, |
|
|
|
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: |
|
|
|
"""Generate rules, parser, tokenizer, parser generator for a given grammar |
|
|
|
"""Generate rules, C parser, tokenizer, parser generator for a given grammar |
|
|
|
|
|
|
|
Args: |
|
|
|
grammar_file (string): Path for the grammar file |
|
|
|
tokens_file (string): Path for the tokens file |
|
|
|
output_file (string): Path for the output file |
|
|
|
compile_extension (bool, optional): Whether to compile the C extension. |
|
|
|
Defaults to False. |
|
|
|
@ -170,10 +202,10 @@ def build_parser_and_generator( |
|
|
|
skip_actions (bool, optional): Whether to pretend no rule has any actions. |
|
|
|
""" |
|
|
|
grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser) |
|
|
|
gen = build_generator( |
|
|
|
tokenizer, |
|
|
|
gen = build_c_generator( |
|
|
|
grammar, |
|
|
|
grammar_file, |
|
|
|
tokens_file, |
|
|
|
output_file, |
|
|
|
compile_extension, |
|
|
|
verbose_c_extension, |
|
|
|
@ -182,3 +214,26 @@ def build_parser_and_generator( |
|
|
|
) |
|
|
|
|
|
|
|
return grammar, parser, tokenizer, gen |
|
|
|
|
|
|
|
|
|
|
|
def build_python_parser_and_generator( |
|
|
|
grammar_file: str, |
|
|
|
output_file: str, |
|
|
|
verbose_tokenizer: bool = False, |
|
|
|
verbose_parser: bool = False, |
|
|
|
skip_actions: bool = False, |
|
|
|
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]: |
|
|
|
"""Generate rules, python parser, tokenizer, parser generator for a given grammar |
|
|
|
|
|
|
|
Args: |
|
|
|
grammar_file (string): Path for the grammar file |
|
|
|
output_file (string): Path for the output file |
|
|
|
verbose_tokenizer (bool, optional): Whether to display additional output |
|
|
|
when generating the tokenizer. Defaults to False. |
|
|
|
verbose_parser (bool, optional): Whether to display additional output |
|
|
|
when generating the parser. Defaults to False. |
|
|
|
skip_actions (bool, optional): Whether to pretend no rule has any actions. |
|
|
|
""" |
|
|
|
grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser) |
|
|
|
gen = build_python_generator(grammar, grammar_file, output_file, skip_actions=skip_actions,) |
|
|
|
return grammar, parser, tokenizer, gen |