|
|
|
@ -1,22 +1,76 @@ |
|
|
|
import ast |
|
|
|
import contextlib |
|
|
|
import re |
|
|
|
from abc import abstractmethod |
|
|
|
from typing import IO, AbstractSet, Dict, Iterator, List, Optional, Set, Text, Tuple |
|
|
|
from typing import ( |
|
|
|
IO, |
|
|
|
AbstractSet, |
|
|
|
Any, |
|
|
|
Dict, |
|
|
|
Iterable, |
|
|
|
Iterator, |
|
|
|
List, |
|
|
|
Optional, |
|
|
|
Set, |
|
|
|
Text, |
|
|
|
Tuple, |
|
|
|
Union, |
|
|
|
) |
|
|
|
|
|
|
|
from pegen import sccutils |
|
|
|
from pegen.grammar import ( |
|
|
|
Alt, |
|
|
|
Cut, |
|
|
|
Forced, |
|
|
|
Gather, |
|
|
|
Grammar, |
|
|
|
GrammarError, |
|
|
|
GrammarVisitor, |
|
|
|
Group, |
|
|
|
Lookahead, |
|
|
|
NamedItem, |
|
|
|
NameLeaf, |
|
|
|
Opt, |
|
|
|
Plain, |
|
|
|
Repeat0, |
|
|
|
Repeat1, |
|
|
|
Rhs, |
|
|
|
Rule, |
|
|
|
StringLeaf, |
|
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
class RuleCollectorVisitor(GrammarVisitor): |
|
|
|
"""Visitor that invokes a provieded callmaker visitor with just the NamedItem nodes""" |
|
|
|
|
|
|
|
def __init__(self, rules: Dict[str, Rule], callmakervisitor: GrammarVisitor) -> None: |
|
|
|
self.rulses = rules |
|
|
|
self.callmaker = callmakervisitor |
|
|
|
|
|
|
|
def visit_Rule(self, rule: Rule) -> None: |
|
|
|
self.visit(rule.flatten()) |
|
|
|
|
|
|
|
def visit_NamedItem(self, item: NamedItem) -> None: |
|
|
|
self.callmaker.visit(item) |
|
|
|
|
|
|
|
|
|
|
|
class KeywordCollectorVisitor(GrammarVisitor): |
|
|
|
"""Visitor that collects all the keywods and soft keywords in the Grammar""" |
|
|
|
|
|
|
|
def __init__(self, gen: "ParserGenerator", keywords: Dict[str, int], soft_keywords: Set[str]): |
|
|
|
self.generator = gen |
|
|
|
self.keywords = keywords |
|
|
|
self.soft_keywords = soft_keywords |
|
|
|
|
|
|
|
def visit_StringLeaf(self, node: StringLeaf) -> None: |
|
|
|
val = ast.literal_eval(node.value) |
|
|
|
if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword |
|
|
|
if node.value.endswith("'") and node.value not in self.keywords: |
|
|
|
self.keywords[val] = self.generator.keyword_type() |
|
|
|
else: |
|
|
|
return self.soft_keywords.add(node.value.replace('"', "")) |
|
|
|
|
|
|
|
|
|
|
|
class RuleCheckingVisitor(GrammarVisitor): |
|
|
|
def __init__(self, rules: Dict[str, Rule], tokens: Set[str]): |
|
|
|
self.rules = rules |
|
|
|
@ -39,6 +93,8 @@ class ParserGenerator: |
|
|
|
def __init__(self, grammar: Grammar, tokens: Set[str], file: Optional[IO[Text]]): |
|
|
|
self.grammar = grammar |
|
|
|
self.tokens = tokens |
|
|
|
self.keywords: Dict[str, int] = {} |
|
|
|
self.soft_keywords: Set[str] = set() |
|
|
|
self.rules = grammar.rules |
|
|
|
self.validate_rule_names() |
|
|
|
if "trailer" not in grammar.metas and "start" not in self.rules: |
|
|
|
@ -48,12 +104,10 @@ class ParserGenerator: |
|
|
|
checker.visit(rule) |
|
|
|
self.file = file |
|
|
|
self.level = 0 |
|
|
|
compute_nullables(self.rules) |
|
|
|
self.first_graph, self.first_sccs = compute_left_recursives(self.rules) |
|
|
|
self.todo = self.rules.copy() # Rules to generate |
|
|
|
self.counter = 0 # For name_rule()/name_loop() |
|
|
|
self.keyword_counter = 499 # For keyword_type() |
|
|
|
self.all_rules: Dict[str, Rule] = {} # Rules + temporal rules |
|
|
|
self.all_rules: Dict[str, Rule] = self.rules.copy() # Rules + temporal rules |
|
|
|
self._local_variable_stack: List[List[str]] = [] |
|
|
|
|
|
|
|
def validate_rule_names(self) -> None: |
|
|
|
@ -94,39 +148,43 @@ class ParserGenerator: |
|
|
|
for line in lines.splitlines(): |
|
|
|
self.print(line) |
|
|
|
|
|
|
|
def collect_todo(self) -> None: |
|
|
|
def collect_rules(self) -> None: |
|
|
|
keyword_collector = KeywordCollectorVisitor(self, self.keywords, self.soft_keywords) |
|
|
|
for rule in self.all_rules.values(): |
|
|
|
keyword_collector.visit(rule) |
|
|
|
|
|
|
|
rule_collector = RuleCollectorVisitor(self.rules, self.callmakervisitor) |
|
|
|
done: Set[str] = set() |
|
|
|
while True: |
|
|
|
alltodo = list(self.todo) |
|
|
|
self.all_rules.update(self.todo) |
|
|
|
todo = [i for i in alltodo if i not in done] |
|
|
|
computed_rules = list(self.all_rules) |
|
|
|
todo = [i for i in computed_rules if i not in done] |
|
|
|
if not todo: |
|
|
|
break |
|
|
|
done = set(self.all_rules) |
|
|
|
for rulename in todo: |
|
|
|
self.todo[rulename].collect_todo(self) |
|
|
|
done = set(alltodo) |
|
|
|
rule_collector.visit(self.all_rules[rulename]) |
|
|
|
|
|
|
|
def keyword_type(self) -> int: |
|
|
|
self.keyword_counter += 1 |
|
|
|
return self.keyword_counter |
|
|
|
|
|
|
|
def name_node(self, rhs: Rhs) -> str: |
|
|
|
def artifical_rule_from_rhs(self, rhs: Rhs) -> str: |
|
|
|
self.counter += 1 |
|
|
|
name = f"_tmp_{self.counter}" # TODO: Pick a nicer name. |
|
|
|
self.todo[name] = Rule(name, None, rhs) |
|
|
|
self.all_rules[name] = Rule(name, None, rhs) |
|
|
|
return name |
|
|
|
|
|
|
|
def name_loop(self, node: Plain, is_repeat1: bool) -> str: |
|
|
|
def artificial_rule_from_repeat(self, node: Plain, is_repeat1: bool) -> str: |
|
|
|
self.counter += 1 |
|
|
|
if is_repeat1: |
|
|
|
prefix = "_loop1_" |
|
|
|
else: |
|
|
|
prefix = "_loop0_" |
|
|
|
name = f"{prefix}{self.counter}" # TODO: It's ugly to signal via the name. |
|
|
|
self.todo[name] = Rule(name, None, Rhs([Alt([NamedItem(None, node)])])) |
|
|
|
name = f"{prefix}{self.counter}" |
|
|
|
self.all_rules[name] = Rule(name, None, Rhs([Alt([NamedItem(None, node)])])) |
|
|
|
return name |
|
|
|
|
|
|
|
def name_gather(self, node: Gather) -> str: |
|
|
|
def artifical_rule_from_gather(self, node: Gather) -> str: |
|
|
|
self.counter += 1 |
|
|
|
name = f"_gather_{self.counter}" |
|
|
|
self.counter += 1 |
|
|
|
@ -135,7 +193,7 @@ class ParserGenerator: |
|
|
|
[NamedItem(None, node.separator), NamedItem("elem", node.node)], |
|
|
|
action="elem", |
|
|
|
) |
|
|
|
self.todo[extra_function_name] = Rule( |
|
|
|
self.all_rules[extra_function_name] = Rule( |
|
|
|
extra_function_name, |
|
|
|
None, |
|
|
|
Rhs([extra_function_alt]), |
|
|
|
@ -143,7 +201,7 @@ class ParserGenerator: |
|
|
|
alt = Alt( |
|
|
|
[NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))], |
|
|
|
) |
|
|
|
self.todo[name] = Rule( |
|
|
|
self.all_rules[name] = Rule( |
|
|
|
name, |
|
|
|
None, |
|
|
|
Rhs([alt]), |
|
|
|
@ -160,13 +218,120 @@ class ParserGenerator: |
|
|
|
return name |
|
|
|
|
|
|
|
|
|
|
|
def compute_nullables(rules: Dict[str, Rule]) -> None: |
|
|
|
class NullableVisitor(GrammarVisitor): |
|
|
|
def __init__(self, rules: Dict[str, Rule]) -> None: |
|
|
|
self.rules = rules |
|
|
|
self.visited: Set[Any] = set() |
|
|
|
self.nullables: Set[Union[Rule, NamedItem]] = set() |
|
|
|
|
|
|
|
def visit_Rule(self, rule: Rule) -> bool: |
|
|
|
if rule in self.visited: |
|
|
|
return False |
|
|
|
self.visited.add(rule) |
|
|
|
if self.visit(rule.rhs): |
|
|
|
self.nullables.add(rule) |
|
|
|
return rule in self.nullables |
|
|
|
|
|
|
|
def visit_Rhs(self, rhs: Rhs) -> bool: |
|
|
|
for alt in rhs.alts: |
|
|
|
if self.visit(alt): |
|
|
|
return True |
|
|
|
return False |
|
|
|
|
|
|
|
def visit_Alt(self, alt: Alt) -> bool: |
|
|
|
for item in alt.items: |
|
|
|
if not self.visit(item): |
|
|
|
return False |
|
|
|
return True |
|
|
|
|
|
|
|
def visit_Forced(self, force: Forced) -> bool: |
|
|
|
return True |
|
|
|
|
|
|
|
def visit_LookAhead(self, lookahead: Lookahead) -> bool: |
|
|
|
return True |
|
|
|
|
|
|
|
def visit_Opt(self, opt: Opt) -> bool: |
|
|
|
return True |
|
|
|
|
|
|
|
def visit_Repeat0(self, repeat: Repeat0) -> bool: |
|
|
|
return True |
|
|
|
|
|
|
|
def visit_Repeat1(self, repeat: Repeat1) -> bool: |
|
|
|
return False |
|
|
|
|
|
|
|
def visit_Gather(self, gather: Gather) -> bool: |
|
|
|
return False |
|
|
|
|
|
|
|
def visit_Cut(self, cut: Cut) -> bool: |
|
|
|
return False |
|
|
|
|
|
|
|
def visit_Group(self, group: Group) -> bool: |
|
|
|
return self.visit(group.rhs) |
|
|
|
|
|
|
|
def visit_NamedItem(self, item: NamedItem) -> bool: |
|
|
|
if self.visit(item.item): |
|
|
|
self.nullables.add(item) |
|
|
|
return item in self.nullables |
|
|
|
|
|
|
|
def visit_NameLeaf(self, node: NameLeaf) -> bool: |
|
|
|
if node.value in self.rules: |
|
|
|
return self.visit(self.rules[node.value]) |
|
|
|
# Token or unknown; never empty. |
|
|
|
return False |
|
|
|
|
|
|
|
def visit_StringLeaf(self, node: StringLeaf) -> bool: |
|
|
|
# The string token '' is considered empty. |
|
|
|
return not node.value |
|
|
|
|
|
|
|
|
|
|
|
def compute_nullables(rules: Dict[str, Rule]) -> Set[Any]: |
|
|
|
"""Compute which rules in a grammar are nullable. |
|
|
|
|
|
|
|
Thanks to TatSu (tatsu/leftrec.py) for inspiration. |
|
|
|
""" |
|
|
|
nullable_visitor = NullableVisitor(rules) |
|
|
|
for rule in rules.values(): |
|
|
|
rule.nullable_visit(rules) |
|
|
|
nullable_visitor.visit(rule) |
|
|
|
return nullable_visitor.nullables |
|
|
|
|
|
|
|
|
|
|
|
class InitialNamesVisitor(GrammarVisitor): |
|
|
|
def __init__(self, rules: Dict[str, Rule]) -> None: |
|
|
|
self.rules = rules |
|
|
|
self.nullables = compute_nullables(rules) |
|
|
|
|
|
|
|
def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> Set[Any]: |
|
|
|
names: Set[str] = set() |
|
|
|
for value in node: |
|
|
|
if isinstance(value, list): |
|
|
|
for item in value: |
|
|
|
names |= self.visit(item, *args, **kwargs) |
|
|
|
else: |
|
|
|
names |= self.visit(value, *args, **kwargs) |
|
|
|
return names |
|
|
|
|
|
|
|
def visit_Alt(self, alt: Alt) -> Set[Any]: |
|
|
|
names: Set[str] = set() |
|
|
|
for item in alt.items: |
|
|
|
names |= self.visit(item) |
|
|
|
if item not in self.nullables: |
|
|
|
break |
|
|
|
return names |
|
|
|
|
|
|
|
def visit_Forced(self, force: Forced) -> Set[Any]: |
|
|
|
return set() |
|
|
|
|
|
|
|
def visit_LookAhead(self, lookahead: Lookahead) -> Set[Any]: |
|
|
|
return set() |
|
|
|
|
|
|
|
def visit_Cut(self, cut: Cut) -> Set[Any]: |
|
|
|
return set() |
|
|
|
|
|
|
|
def visit_NameLeaf(self, node: NameLeaf) -> Set[Any]: |
|
|
|
return {node.value} |
|
|
|
|
|
|
|
def visit_StringLeaf(self, node: StringLeaf) -> Set[Any]: |
|
|
|
return set() |
|
|
|
|
|
|
|
|
|
|
|
def compute_left_recursives( |
|
|
|
@ -207,10 +372,11 @@ def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, AbstractSet[str]]: |
|
|
|
|
|
|
|
Note that this requires the nullable flags to have been computed. |
|
|
|
""" |
|
|
|
initial_name_visitor = InitialNamesVisitor(rules) |
|
|
|
graph = {} |
|
|
|
vertices: Set[str] = set() |
|
|
|
for rulename, rhs in rules.items(): |
|
|
|
graph[rulename] = names = rhs.initial_names() |
|
|
|
graph[rulename] = names = initial_name_visitor.visit(rhs) |
|
|
|
vertices |= names |
|
|
|
for vertex in vertices: |
|
|
|
graph.setdefault(vertex, set()) |
|
|
|
|