diff --git a/pyformlang/rsa/box.py b/pyformlang/rsa/box.py index c99e2ac..0baafe5 100644 --- a/pyformlang/rsa/box.py +++ b/pyformlang/rsa/box.py @@ -1,6 +1,7 @@ """ Representation of a box for recursive automaton """ +from typing import Union from pyformlang.finite_automaton.epsilon_nfa import EpsilonNFA from pyformlang.finite_automaton.finite_automaton import to_symbol @@ -16,56 +17,59 @@ class Box: ---------- enfa : :class:`~pyformlang.finite_automaton.EpsilonNFA` A epsilon nfa - label : :class:`~pyformlang.finite_automaton.Symbol` - A label for epsilon nfa + nonterminal : :class:`~pyformlang.finite_automaton.Symbol` + A nonterminal for epsilon nfa """ - def __init__(self, enfa: EpsilonNFA = None, label: Symbol = None): - if enfa is not None: - enfa = enfa.minimize() - self._dfa = enfa or EpsilonNFA() - - if label is not None: - label = to_symbol(label) - self._label = label or Symbol("") - - def change_label(self, label: Symbol): - """ Set a new label - - Parameters - ----------- - label : :class:`~pyformlang.finite_automaton.Symbol` - The new label for automaton - - """ - self._label = to_symbol(label) - - def change_dfa(self, enfa: EpsilonNFA): - """ Set an epsilon finite automaton - - Parameters - ----------- - enfa : :class:`~pyformlang.finite_automaton.EpsilonNFA` - The new epsilon finite automaton - - """ - enfa = enfa.minimize() + def __init__(self, enfa: EpsilonNFA, nonterminal: Union[Symbol, str]): self._dfa = enfa + nonterminal = to_symbol(nonterminal) + self._nonterminal = nonterminal + + def to_subgraph_dot(self): + """Creates a named subgraph representing a box""" + graph = self._dfa.to_networkx() + strange_nodes = [] + nonterminal = self.nonterminal.value.replace('"', '').replace("'", "").replace(".", "") + dot_string = (f'subgraph cluster_{nonterminal}\n{{ label="{nonterminal}"\n' + f'fontname="Helvetica,Arial,sans-serif"\n' + f'node [fontname="Helvetica,Arial,sans-serif"]\n' + f'edge [fontname="Helvetica,Arial,sans-serif"]\nrankdir=LR;\n' + f'node [shape = circle style=filled fillcolor=white]') + for node, data in graph.nodes(data=True): + node = node.replace('"', '').replace("'", "") + if 'is_start' not in data.keys() or 'is_final' not in data.keys(): + strange_nodes.append(node) + continue + if data['is_start']: + dot_string += f'\n"{node}" [fillcolor = green];' + if data['is_final']: + dot_string += f'\n"{node}" [shape = doublecircle];' + for strange_node in strange_nodes: + graph.remove_node(strange_node) + for node_from, node_to, data in graph.edges(data=True): + node_from = node_from.replace('"', '').replace("'", "") + node_to = node_to.replace('"', '').replace("'", "") + label = data['label'].replace('"', '').replace("'", "") + dot_string += f'\n"{node_from}" -> "{node_to}" [label = "{label}"];' + dot_string += "\n}" + return dot_string + @property def dfa(self): """ Box's dfa """ return self._dfa @property - def label(self): - """ Box's label """ - return self._label + def nonterminal(self): + """ Box's nonterminal """ + return self._nonterminal @property - def start_state(self): - """ The start state """ + def start_states(self): + """ The start states """ return self._dfa.start_states @property @@ -90,14 +94,10 @@ def is_equivalent_to(self, other): if not isinstance(other, Box): return False - if self._dfa.is_equivalent_to(other.dfa) and \ - self._label == other.label: - return True - - return False + return self._dfa.is_equivalent_to(other.dfa) and self.nonterminal == other.nonterminal def __eq__(self, other): return self.is_equivalent_to(other) def __hash__(self): - return self._label.__hash__() + return self._nonterminal.__hash__() diff --git a/pyformlang/rsa/recursive_automaton.py b/pyformlang/rsa/recursive_automaton.py index f995a8b..1d89f36 100644 --- a/pyformlang/rsa/recursive_automaton.py +++ b/pyformlang/rsa/recursive_automaton.py @@ -2,7 +2,7 @@ Representation of a recursive automaton """ -from typing import AbstractSet +from typing import AbstractSet, Union from pyformlang.finite_automaton.finite_automaton import to_symbol from pyformlang.finite_automaton.symbol import Symbol @@ -19,143 +19,121 @@ class RecursiveAutomaton: Parameters ---------- - labels : set of :class:`~pyformlang.finite_automaton.Symbol`, optional - A finite set of labels for boxes - initial_label : :class:`~pyformlang.finite_automaton.Symbol`, optional - A start label for automaton - boxes : set of :class:`~pyformlang.rsa.Box`, optional + start_box : :class:`~pyformlang.rsa.Box` + Start box + boxes : set of :class:`~pyformlang.rsa.Box` A finite set of boxes """ def __init__(self, - labels: AbstractSet[Symbol] = None, - initial_label: Symbol = None, - boxes: AbstractSet[Box] = None): - - if labels is not None: - labels = {to_symbol(x) for x in labels} - self._labels = labels or set() - - if initial_label is not None: - initial_label = to_symbol(initial_label) - if initial_label not in self._labels: - self._labels.add(initial_label) - self._initial_label = initial_label or Symbol("") - - self._boxes = {} - if boxes is not None: - for box in boxes: - self._boxes.update({to_symbol(box.label): box}) - self._labels.add(box.label) - - for label in self._labels: - box = self.get_box(label) - if box is None: - raise ValueError( - "RSA must have the same number of labels and DFAs") - - def get_box(self, label: Symbol): - """ Box by label """ - - label = to_symbol(label) - if label in self._boxes: - return self._boxes[label] - - return None - - def add_box(self, new_box: Box): - """ Set a box + start_box: Box, + boxes: AbstractSet[Box]): + self._nonterminal_to_box = {} + if start_box not in boxes: + self._nonterminal_to_box[to_symbol(start_box.nonterminal)] = start_box + self._start_nonterminal = to_symbol(start_box.nonterminal) + for box in boxes: + self._nonterminal_to_box[to_symbol(box.nonterminal)] = box + + def get_box_by_nonterminal(self, nonterminal: Union[Symbol, str]): + """ + Box by nonterminal Parameters - ----------- - new_box : :class:`~pyformlang.rsa.Box` - The new box + ---------- + nonterminal: :class:`~pyformlang.finite_automaton.Symbol` | str + the nonterminal of which represents a box + Returns + ----------- + box : :class:`~pyformlang.rsa.Box` | None + box represented by given nonterminal """ - self._boxes.update({new_box.label: new_box}) - self._labels.add(to_symbol(new_box.label)) + nonterminal = to_symbol(nonterminal) + if nonterminal in self._nonterminal_to_box: + return self._nonterminal_to_box[nonterminal] - def get_number_of_boxes(self): - """ Size of set of boxes """ + return None - return len(self._boxes) + def get_number_boxes(self): + """ Size of set of boxes """ - def change_initial_label(self, new_initial_label: Symbol): - """ Set an initial label + return len(self._nonterminal_to_box) - Parameters - ----------- - new_initial_label : :class:`~pyformlang.finite_automaton.Symbol` - The new initial label - - """ - - new_initial_label = to_symbol(new_initial_label) - if new_initial_label not in self._labels: - raise ValueError( - "New initial label not in set of labels for boxes") + def to_dot(self): + """ Create dot representation of recursive automaton """ + dot_string = 'digraph "" {' + for box in self._nonterminal_to_box.values(): + dot_string += f'\n{box.to_subgraph_dot()}' + dot_string += "\n}" + return dot_string @property - def labels(self) -> set: - """ The set of labels """ + def nonterminals(self) -> set: + """ The set of nonterminals """ - return self._labels + return set(self._nonterminal_to_box.keys()) @property def boxes(self) -> dict: """ The set of boxes """ - return self._boxes + return self._nonterminal_to_box + + @property + def start_nonterminal(self) -> Symbol: + """ The start nonterminal """ + + return self._start_nonterminal @property - def initial_label(self) -> Symbol: - """ The initial label """ + def start_box(self): + """ The start box """ - return self._initial_label + return self.boxes[self.start_nonterminal] @classmethod - def from_regex(cls, regex: Regex, initial_label: Symbol): + def from_regex(cls, regex: Regex, start_nonterminal: Union[Symbol, str]): """ Create a recursive automaton from regular expression Parameters ----------- regex : :class:`~pyformlang.regular_expression.Regex` The regular expression - initial_label : :class:`~pyformlang.finite_automaton.Symbol` - The initial label for the recursive automaton + start_nonterminal : :class:`~pyformlang.finite_automaton.Symbol` | str + The start nonterminal for the recursive automaton Returns ----------- rsa : :class:`~pyformlang.rsa.RecursiveAutomaton` The new recursive automaton built from regular expression """ - - initial_label = to_symbol(initial_label) - box = Box(regex.to_epsilon_nfa().minimize(), initial_label) - return RecursiveAutomaton({initial_label}, initial_label, {box}) + start_nonterminal = to_symbol(start_nonterminal) + box = Box(regex.to_epsilon_nfa().minimize(), start_nonterminal) + return RecursiveAutomaton(box, {box}) @classmethod - def from_text(cls, text, start_symbol: Symbol = Symbol("S")): - """ Create a recursive automaton from text + def from_ebnf(cls, text, start_nonterminal: Union[Symbol, str] = Symbol("S")): + """ Create a recursive automaton from ebnf (ebnf = Extended Backus-Naur Form) Parameters ----------- text : str The text of transform - start_symbol : str, optional - The start symbol, S by default + start_nonterminal : :class:`~pyformlang.finite_automaton.Symbol` | str, optional + The start nonterminal, S by default Returns ----------- rsa : :class:`~pyformlang.rsa.RecursiveAutomaton` The new recursive automaton built from context-free grammar """ - + start_nonterminal = to_symbol(start_nonterminal) productions = {} boxes = set() - labels = set() + nonterminals = set() for production in text.splitlines(): production = production.strip() if "->" not in production: @@ -164,7 +142,7 @@ def from_text(cls, text, start_symbol: Symbol = Symbol("S")): head, body = production.split("->") head = head.strip() body = body.strip() - labels.add(to_symbol(head)) + nonterminals.add(to_symbol(head)) if body == "": body = Epsilon().to_text() @@ -177,11 +155,13 @@ def from_text(cls, text, start_symbol: Symbol = Symbol("S")): for head, body in productions.items(): boxes.add(Box(Regex(body).to_epsilon_nfa().minimize(), to_symbol(head))) + start_box = Box(Regex(productions[start_nonterminal.value]).to_epsilon_nfa().minimize(), start_nonterminal) + return RecursiveAutomaton(start_box, boxes) - return RecursiveAutomaton(labels, start_symbol, boxes) - - def is_equivalent_to(self, other): - """ Check whether two recursive automata are equivalent + def is_equals_to(self, other): + """ + Check whether two recursive automata are equals by boxes. + Not equivalency in terms of formal languages theory, just mapping boxes Parameters ---------- @@ -191,23 +171,11 @@ def is_equivalent_to(self, other): Returns ---------- are_equivalent : bool - Whether the two recursive automata are equivalent or not + Whether the two recursive automata are equals or not """ - if not isinstance(other, RecursiveAutomaton): return False - - if self._labels != other._labels: - return False - - for label in self._labels: - box_1 = self.get_box(label) - box_2 = other.get_box(label) - - if box_1 != box_2: - return False - - return True + return self.boxes == other.boxes def __eq__(self, other): - return self.is_equivalent_to(other) + return self.is_equals_to(other) diff --git a/pyformlang/rsa/tests/test_rsa.py b/pyformlang/rsa/tests/test_rsa.py index cb5bb28..8ed4721 100644 --- a/pyformlang/rsa/tests/test_rsa.py +++ b/pyformlang/rsa/tests/test_rsa.py @@ -12,81 +12,64 @@ class TestRSA(unittest.TestCase): """ Test class for RSA """ def test_creation(self): - """ Test the creation of a RSA """ + """ Test the creation of an RSA """ # S -> a S b | a b - enfa = Regex("a S b | a b").to_epsilon_nfa() + regex = Regex("a S b | a b") + enfa = regex.to_epsilon_nfa() dfa = enfa.minimize() - box = Box(dfa, Symbol("S")) - rsa_1 = RecursiveAutomaton({Symbol("S")}, Symbol("S"), {box}) + box = Box(dfa, "S") + rsa_1 = RecursiveAutomaton(box, set()) - self.assertEqual(rsa_1.get_number_of_boxes(), 1) - self.assertEqual(box, rsa_1.get_box(Symbol("S"))) - self.assertEqual(rsa_1.labels, {Symbol("S")}) - self.assertEqual(rsa_1.initial_label, Symbol("S")) + self.assertEqual(rsa_1.get_number_boxes(), 1) + self.assertEqual(box, rsa_1.get_box_by_nonterminal("S")) + self.assertEqual(rsa_1.nonterminals, {Symbol("S")}) + self.assertEqual(rsa_1.start_nonterminal, Symbol("S")) - rsa_2 = RecursiveAutomaton() - rsa_2.add_box(box) - rsa_2.change_initial_label(Symbol("S")) + rsa_2 = RecursiveAutomaton.from_regex(regex, "S") self.assertEqual(rsa_2, rsa_1) - # Checking to add a start label - rsa_3 = RecursiveAutomaton(set(), Symbol("S"), {box}) - self.assertEqual(rsa_3.labels, {Symbol("S")}) - - with self.assertRaises(ValueError) as _: - RecursiveAutomaton( - {Symbol("S"), Symbol("v")}, Symbol("S"), {box}) - def test_from_regex(self): """ Test creation of an RSA from a regex""" # S -> a* - rsa_2 = RecursiveAutomaton.from_regex(Regex("a*"), Symbol("S")) + rsa_2 = RecursiveAutomaton.from_regex(Regex("a*"), "S") enfa = Regex("a*").to_epsilon_nfa() dfa = enfa.minimize() - box = Box(dfa, Symbol("S")) - rsa_1 = RecursiveAutomaton({Symbol("S")}, Symbol("S"), {box}) + box = Box(dfa, "S") + rsa_1 = RecursiveAutomaton(box, set()) self.assertEqual(rsa_2, rsa_1) - def test_is_equivalent_to(self): - """ Test the equivalence of two RSAs""" + def test_is_equals_to(self): + """ Test the equals of two RSAs""" # S -> a* b* - rsa_1 = RecursiveAutomaton.from_regex(Regex("a* b*"), Symbol("S")) + rsa_1 = RecursiveAutomaton.from_regex(Regex("a* b*"), "S") # S -> a+ b+ - rsa_2 = RecursiveAutomaton.from_regex(Regex("a a* b b*"), Symbol("S")) + rsa_2 = RecursiveAutomaton.from_regex(Regex("a a* b b*"), "S") self.assertNotEqual(rsa_1, rsa_2) - def test_add_box(self): - """ Test adding a box """ - rsa_1 = RecursiveAutomaton.from_regex(Regex("a* b*"), Symbol("S")) - new_box = Box(Regex("a*").to_epsilon_nfa().minimize(), Symbol("S")) - rsa_1.add_box(new_box) - self.assertEqual(new_box.dfa, rsa_1.get_box(Symbol("S")).dfa) - self.assertEqual(rsa_1.labels, {Symbol("S")}) - - def test_from_text(self): - """ Test reading RSA from a text""" + def test_from_ebnf(self): + """ Test reading RSA from ebnf""" # g1: S -> a S b | a b - rsa1_g1 = RecursiveAutomaton.from_text("S -> a S b | a b") + rsa1_g1 = RecursiveAutomaton.from_ebnf("S -> a S b | a b") rsa2_g1 = RecursiveAutomaton.from_regex( - Regex("a S b | a b"), Symbol("S")) + Regex("a S b | a b"), "S") self.assertEqual(rsa1_g1, rsa2_g1) # g2: S -> a V b # V -> c S d | c d - rsa1_g2 = RecursiveAutomaton.from_text(""" + rsa1_g2 = RecursiveAutomaton.from_ebnf(""" S -> a V b V -> c S d | c d""") - self.assertEqual(rsa1_g2.get_number_of_boxes(), 2) - self.assertEqual(rsa1_g2.labels, {Symbol("S"), Symbol("V")}) + self.assertEqual(rsa1_g2.get_number_boxes(), 2) + self.assertEqual(rsa1_g2.nonterminals, {Symbol("S"), Symbol("V")}) dfa_s = Regex("a V b").to_epsilon_nfa().minimize() - self.assertEqual(rsa1_g2.get_box(Symbol("S")), Box(dfa_s, Symbol("S"))) + self.assertEqual(rsa1_g2.get_box_by_nonterminal("S"), Box(dfa_s, "S")) dfa_v = Regex("c S d | c d").to_epsilon_nfa().minimize() - self.assertEqual(rsa1_g2.get_box(Symbol("V")), Box(dfa_v, Symbol("V"))) + self.assertEqual(rsa1_g2.get_box_by_nonterminal("V"), Box(dfa_v, "V"))