From e64609aba0b05623d9b3859b754e7ed712ec5f3c Mon Sep 17 00:00:00 2001 From: Efim Kubishkin Date: Tue, 19 Mar 2024 21:05:20 +0300 Subject: [PATCH 1/5] Rework recursive_automaton Rename labels to nonterminals, delete _labels set, because it can be expressed by _box dict. Also some minor rework Box class and add to_dot function. --- pyformlang/rsa/box.py | 78 +++++++++----- pyformlang/rsa/recursive_automaton.py | 150 +++++++++++++------------- 2 files changed, 124 insertions(+), 104 deletions(-) diff --git a/pyformlang/rsa/box.py b/pyformlang/rsa/box.py index c99e2ac..1dca892 100644 --- a/pyformlang/rsa/box.py +++ b/pyformlang/rsa/box.py @@ -5,6 +5,7 @@ from pyformlang.finite_automaton.epsilon_nfa import EpsilonNFA from pyformlang.finite_automaton.finite_automaton import to_symbol from pyformlang.finite_automaton.symbol import Symbol +import networkx as nx class Box: @@ -16,30 +17,27 @@ class Box: ---------- enfa : :class:`~pyformlang.finite_automaton.EpsilonNFA` A epsilon nfa - label : :class:`~pyformlang.finite_automaton.Symbol` - A label for epsilon nfa + nonterminal : :class:`~pyformlang.finite_automaton.Symbol` + A nonterminal for epsilon nfa """ - def __init__(self, enfa: EpsilonNFA = None, label: Symbol = None): - if enfa is not None: - enfa = enfa.minimize() - self._dfa = enfa or EpsilonNFA() + def __init__(self, enfa: EpsilonNFA, nonterminal: Symbol | str): + self._dfa = enfa - if label is not None: - label = to_symbol(label) - self._label = label or Symbol("") + nonterminal = to_symbol(nonterminal) + self._nonterminal = nonterminal - def change_label(self, label: Symbol): - """ Set a new label + def change_nonterminal(self, nonterminal: Symbol | str): + """ Set a new nonterminal Parameters ----------- - label : :class:`~pyformlang.finite_automaton.Symbol` - The new label for automaton + nonterminal : :class:`~pyformlang.finite_automaton.Symbol` + The new nonterminal for automaton """ - self._label = to_symbol(label) + self._nonterminal = to_symbol(nonterminal) def change_dfa(self, enfa: EpsilonNFA): """ Set an epsilon finite automaton @@ -50,22 +48,54 @@ def change_dfa(self, enfa: EpsilonNFA): The new epsilon finite automaton """ - enfa = enfa.minimize() self._dfa = enfa + def to_subgraph_dot(self): + graph = self._dfa.to_networkx() + strange_nodes = [] + dot_string = (f'subgraph cluster_{self._nonterminal}\n{{ label="{self._nonterminal}"\n' + f'fontname="Helvetica,Arial,sans-serif"\n' + f'node [fontname="Helvetica,Arial,sans-serif"]\n' + f'edge [fontname="Helvetica,Arial,sans-serif"]\nrankdir=LR;\n' + f'node [shape = circle style=filled fillcolor=white]') + for node, data in graph.nodes(data=True): + if 'is_start' not in data.keys() or 'is_final' not in data.keys(): + strange_nodes.append(node) + continue + node = node.replace(";", "") + if data['is_start']: + dot_string += f'\n{node} [fillcolor = green];' + if data['is_final']: + dot_string += f'\n{node} [shape = doublecircle];' + for strange_node in strange_nodes: + graph.remove_node(strange_node) + for node_from, node_to, data in graph.edges(data=True): + node_from = node_from.replace(";", "") + node_to = node_to.replace(";", "") + label = data['label'] + dot_string += f'\n{node_from} -> {node_to} [label = "{label}"];' + dot_string += "\n}" + return dot_string + + @classmethod + def empty_box(cls): + enfa = EpsilonNFA() + nonterminal = Symbol("") + return Box(enfa, nonterminal) + @property def dfa(self): """ Box's dfa """ return self._dfa @property - def label(self): - """ Box's label """ - return self._label + def nonterminal(self): + """ Box's nonterminal """ + return self._nonterminal @property - def start_state(self): - """ The start state """ + def start_states(self): + """ The start states """ return self._dfa.start_states @property @@ -90,14 +120,10 @@ def is_equivalent_to(self, other): if not isinstance(other, Box): return False - if self._dfa.is_equivalent_to(other.dfa) and \ - self._label == other.label: - return True - - return False + return self._dfa.is_equivalent_to(other.dfa) and self.nonterminal == other.nonterminal def __eq__(self, other): return self.is_equivalent_to(other) def __hash__(self): - return self._label.__hash__() + return self._nonterminal.__hash__() diff --git a/pyformlang/rsa/recursive_automaton.py b/pyformlang/rsa/recursive_automaton.py index f995a8b..2e0ba37 100644 --- a/pyformlang/rsa/recursive_automaton.py +++ b/pyformlang/rsa/recursive_automaton.py @@ -4,6 +4,8 @@ from typing import AbstractSet +import pydot + from pyformlang.finite_automaton.finite_automaton import to_symbol from pyformlang.finite_automaton.symbol import Symbol from pyformlang.regular_expression import Regex @@ -19,48 +21,29 @@ class RecursiveAutomaton: Parameters ---------- - labels : set of :class:`~pyformlang.finite_automaton.Symbol`, optional - A finite set of labels for boxes - initial_label : :class:`~pyformlang.finite_automaton.Symbol`, optional - A start label for automaton - boxes : set of :class:`~pyformlang.rsa.Box`, optional + start_box : :class:`~pyformlang.rsa.Box` + Start box + boxes : set of :class:`~pyformlang.rsa.Box` A finite set of boxes """ def __init__(self, - labels: AbstractSet[Symbol] = None, - initial_label: Symbol = None, - boxes: AbstractSet[Box] = None): - - if labels is not None: - labels = {to_symbol(x) for x in labels} - self._labels = labels or set() - - if initial_label is not None: - initial_label = to_symbol(initial_label) - if initial_label not in self._labels: - self._labels.add(initial_label) - self._initial_label = initial_label or Symbol("") - + start_box: Box, + boxes: AbstractSet[Box]): self._boxes = {} - if boxes is not None: - for box in boxes: - self._boxes.update({to_symbol(box.label): box}) - self._labels.add(box.label) - - for label in self._labels: - box = self.get_box(label) - if box is None: - raise ValueError( - "RSA must have the same number of labels and DFAs") + if start_box not in boxes: + self._boxes.update({to_symbol(start_box.nonterminal): start_box}) + self._start_nonterminal = to_symbol(start_box.nonterminal) + for box in boxes: + self._boxes.update({to_symbol(box.nonterminal): box}) - def get_box(self, label: Symbol): - """ Box by label """ + def get_box_by_nonterminal(self, nonterminal: Symbol | str): + """ Box by nonterminal """ - label = to_symbol(label) - if label in self._boxes: - return self._boxes[label] + nonterminal = to_symbol(nonterminal) + if nonterminal in self._boxes: + return self._boxes[nonterminal] return None @@ -74,34 +57,43 @@ def add_box(self, new_box: Box): """ - self._boxes.update({new_box.label: new_box}) - self._labels.add(to_symbol(new_box.label)) + self._boxes.update({new_box.nonterminal: new_box}) def get_number_of_boxes(self): """ Size of set of boxes """ return len(self._boxes) - def change_initial_label(self, new_initial_label: Symbol): - """ Set an initial label + def change_start_nonterminal(self, new_start_nonterminal: Symbol | str) -> None: + """ Set a start nonterminal Parameters ----------- - new_initial_label : :class:`~pyformlang.finite_automaton.Symbol` - The new initial label + new_start_nonterminal : :class:`~pyformlang.finite_automaton.Symbol` | str + The new start nonterminal """ - new_initial_label = to_symbol(new_initial_label) - if new_initial_label not in self._labels: + new_start_nonterminal = to_symbol(new_start_nonterminal) + if new_start_nonterminal not in self._boxes.keys(): raise ValueError( - "New initial label not in set of labels for boxes") + "New start nonterminal not in set of nonterminals for boxes") + if self.start_nonterminal == Symbol(""): + del self._boxes[self.start_nonterminal] + self._start_nonterminal = new_start_nonterminal + + def to_dot(self): + dot_string = f'digraph ""{{' + for box in self._boxes.values(): + dot_string += f'\n{box.to_subgraph_dot()}' + dot_string += "\n}" + return dot_string @property - def labels(self) -> set: - """ The set of labels """ + def nonterminals(self) -> set: + """ The set of nonterminals """ - return self._labels + return set(self._boxes.keys()) @property def boxes(self) -> dict: @@ -110,21 +102,27 @@ def boxes(self) -> dict: return self._boxes @property - def initial_label(self) -> Symbol: - """ The initial label """ + def start_nonterminal(self) -> Symbol: + """ The start nonterminal """ - return self._initial_label + return self._start_nonterminal + + @property + def start_box(self): + """ The start box """ + + return self.boxes[self.start_nonterminal] @classmethod - def from_regex(cls, regex: Regex, initial_label: Symbol): + def from_regex(cls, regex: Regex, start_nonterminal: Symbol | str): """ Create a recursive automaton from regular expression Parameters ----------- regex : :class:`~pyformlang.regular_expression.Regex` The regular expression - initial_label : :class:`~pyformlang.finite_automaton.Symbol` - The initial label for the recursive automaton + start_nonterminal : :class:`~pyformlang.finite_automaton.Symbol` | str + The start nonterminal for the recursive automaton Returns ----------- @@ -132,30 +130,30 @@ def from_regex(cls, regex: Regex, initial_label: Symbol): The new recursive automaton built from regular expression """ - initial_label = to_symbol(initial_label) - box = Box(regex.to_epsilon_nfa().minimize(), initial_label) - return RecursiveAutomaton({initial_label}, initial_label, {box}) + start_nonterminal = to_symbol(start_nonterminal) + box = Box(regex.to_epsilon_nfa().minimize(), start_nonterminal) + return RecursiveAutomaton(box, {box}) @classmethod - def from_text(cls, text, start_symbol: Symbol = Symbol("S")): - """ Create a recursive automaton from text + def from_ebnf(cls, text, start_nonterminal: Symbol | str = Symbol("S")): + """ Create a recursive automaton from ebnf Parameters ----------- text : str The text of transform - start_symbol : str, optional - The start symbol, S by default + start_nonterminal : :class:`~pyformlang.finite_automaton.Symbol` | str, optional + The start nonterminal, S by default Returns ----------- rsa : :class:`~pyformlang.rsa.RecursiveAutomaton` The new recursive automaton built from context-free grammar """ - + start_nonterminal = to_symbol(start_nonterminal) productions = {} boxes = set() - labels = set() + nonterminals = set() for production in text.splitlines(): production = production.strip() if "->" not in production: @@ -164,7 +162,7 @@ def from_text(cls, text, start_symbol: Symbol = Symbol("S")): head, body = production.split("->") head = head.strip() body = body.strip() - labels.add(to_symbol(head)) + nonterminals.add(to_symbol(head)) if body == "": body = Epsilon().to_text() @@ -177,10 +175,18 @@ def from_text(cls, text, start_symbol: Symbol = Symbol("S")): for head, body in productions.items(): boxes.add(Box(Regex(body).to_epsilon_nfa().minimize(), to_symbol(head))) + start_box = Box(Regex(productions[start_nonterminal.value]).to_epsilon_nfa().minimize(), start_nonterminal) + return RecursiveAutomaton(start_box, boxes) - return RecursiveAutomaton(labels, start_symbol, boxes) + @classmethod + def empty(cls): + """ Generate empty rsa """ + + empty_box = Box.empty_box() + return RecursiveAutomaton(empty_box, {empty_box}) - def is_equivalent_to(self, other): +# equivalency not in terms of formal languages theory. Just mapping boxes. + def is_equals_to(self, other): """ Check whether two recursive automata are equivalent Parameters @@ -193,21 +199,9 @@ def is_equivalent_to(self, other): are_equivalent : bool Whether the two recursive automata are equivalent or not """ - if not isinstance(other, RecursiveAutomaton): return False - - if self._labels != other._labels: - return False - - for label in self._labels: - box_1 = self.get_box(label) - box_2 = other.get_box(label) - - if box_1 != box_2: - return False - - return True + return self.boxes == other.boxes def __eq__(self, other): - return self.is_equivalent_to(other) + return self.is_equals_to(other) From af8f93a8ea4bfb2007b2c4eba03c48c220d83ca5 Mon Sep 17 00:00:00 2001 From: Efim Kubishkin Date: Tue, 19 Mar 2024 21:18:15 +0300 Subject: [PATCH 2/5] Rewrite tests to actual implementation --- pyformlang/rsa/tests/test_rsa.py | 56 ++++++++++++++------------------ 1 file changed, 24 insertions(+), 32 deletions(-) diff --git a/pyformlang/rsa/tests/test_rsa.py b/pyformlang/rsa/tests/test_rsa.py index cb5bb28..9916921 100644 --- a/pyformlang/rsa/tests/test_rsa.py +++ b/pyformlang/rsa/tests/test_rsa.py @@ -12,81 +12,73 @@ class TestRSA(unittest.TestCase): """ Test class for RSA """ def test_creation(self): - """ Test the creation of a RSA """ + """ Test the creation of an RSA """ # S -> a S b | a b enfa = Regex("a S b | a b").to_epsilon_nfa() dfa = enfa.minimize() - box = Box(dfa, Symbol("S")) - rsa_1 = RecursiveAutomaton({Symbol("S")}, Symbol("S"), {box}) + box = Box(dfa, "S") + rsa_1 = RecursiveAutomaton(box, {box}) self.assertEqual(rsa_1.get_number_of_boxes(), 1) - self.assertEqual(box, rsa_1.get_box(Symbol("S"))) - self.assertEqual(rsa_1.labels, {Symbol("S")}) - self.assertEqual(rsa_1.initial_label, Symbol("S")) + self.assertEqual(box, rsa_1.get_box_by_nonterminal("S")) + self.assertEqual(rsa_1.nonterminals, {Symbol("S")}) + self.assertEqual(rsa_1.start_nonterminal, Symbol("S")) - rsa_2 = RecursiveAutomaton() + rsa_2 = RecursiveAutomaton.empty() rsa_2.add_box(box) - rsa_2.change_initial_label(Symbol("S")) + rsa_2.change_start_nonterminal("S") self.assertEqual(rsa_2, rsa_1) - # Checking to add a start label - rsa_3 = RecursiveAutomaton(set(), Symbol("S"), {box}) - self.assertEqual(rsa_3.labels, {Symbol("S")}) - - with self.assertRaises(ValueError) as _: - RecursiveAutomaton( - {Symbol("S"), Symbol("v")}, Symbol("S"), {box}) - def test_from_regex(self): """ Test creation of an RSA from a regex""" # S -> a* - rsa_2 = RecursiveAutomaton.from_regex(Regex("a*"), Symbol("S")) + rsa_2 = RecursiveAutomaton.from_regex(Regex("a*"), "S") enfa = Regex("a*").to_epsilon_nfa() dfa = enfa.minimize() - box = Box(dfa, Symbol("S")) - rsa_1 = RecursiveAutomaton({Symbol("S")}, Symbol("S"), {box}) + box = Box(dfa, "S") + rsa_1 = RecursiveAutomaton(box, {box}) self.assertEqual(rsa_2, rsa_1) - def test_is_equivalent_to(self): + def test_is_equals_to(self): """ Test the equivalence of two RSAs""" # S -> a* b* - rsa_1 = RecursiveAutomaton.from_regex(Regex("a* b*"), Symbol("S")) + rsa_1 = RecursiveAutomaton.from_regex(Regex("a* b*"), "S") # S -> a+ b+ - rsa_2 = RecursiveAutomaton.from_regex(Regex("a a* b b*"), Symbol("S")) + rsa_2 = RecursiveAutomaton.from_regex(Regex("a a* b b*"), "S") self.assertNotEqual(rsa_1, rsa_2) def test_add_box(self): """ Test adding a box """ - rsa_1 = RecursiveAutomaton.from_regex(Regex("a* b*"), Symbol("S")) - new_box = Box(Regex("a*").to_epsilon_nfa().minimize(), Symbol("S")) + rsa_1 = RecursiveAutomaton.from_regex(Regex("a* b*"), "S") + new_box = Box(Regex("a*").to_epsilon_nfa().minimize(), "S") rsa_1.add_box(new_box) - self.assertEqual(new_box.dfa, rsa_1.get_box(Symbol("S")).dfa) - self.assertEqual(rsa_1.labels, {Symbol("S")}) + self.assertEqual(new_box.dfa, rsa_1.get_box_by_nonterminal("S").dfa) + self.assertEqual(rsa_1.nonterminals, {Symbol("S")}) def test_from_text(self): """ Test reading RSA from a text""" # g1: S -> a S b | a b - rsa1_g1 = RecursiveAutomaton.from_text("S -> a S b | a b") + rsa1_g1 = RecursiveAutomaton.from_ebnf("S -> a S b | a b") rsa2_g1 = RecursiveAutomaton.from_regex( - Regex("a S b | a b"), Symbol("S")) + Regex("a S b | a b"), "S") self.assertEqual(rsa1_g1, rsa2_g1) # g2: S -> a V b # V -> c S d | c d - rsa1_g2 = RecursiveAutomaton.from_text(""" + rsa1_g2 = RecursiveAutomaton.from_ebnf(""" S -> a V b V -> c S d | c d""") self.assertEqual(rsa1_g2.get_number_of_boxes(), 2) - self.assertEqual(rsa1_g2.labels, {Symbol("S"), Symbol("V")}) + self.assertEqual(rsa1_g2.nonterminals, {Symbol("S"), Symbol("V")}) dfa_s = Regex("a V b").to_epsilon_nfa().minimize() - self.assertEqual(rsa1_g2.get_box(Symbol("S")), Box(dfa_s, Symbol("S"))) + self.assertEqual(rsa1_g2.get_box_by_nonterminal("S"), Box(dfa_s, "S")) dfa_v = Regex("c S d | c d").to_epsilon_nfa().minimize() - self.assertEqual(rsa1_g2.get_box(Symbol("V")), Box(dfa_v, Symbol("V"))) + self.assertEqual(rsa1_g2.get_box_by_nonterminal("V"), Box(dfa_v, "V")) From 8cb66d6513962c1a964dfde645f2adaed5efd69f Mon Sep 17 00:00:00 2001 From: Efim Kubishkin Date: Tue, 26 Mar 2024 14:53:50 +0300 Subject: [PATCH 3/5] Remove all 'changing' functions and empty constructors --- pyformlang/rsa/box.py | 28 ------------------ pyformlang/rsa/recursive_automaton.py | 42 ++------------------------- pyformlang/rsa/tests/test_rsa.py | 25 +++++----------- 3 files changed, 10 insertions(+), 85 deletions(-) diff --git a/pyformlang/rsa/box.py b/pyformlang/rsa/box.py index 1dca892..1d2a065 100644 --- a/pyformlang/rsa/box.py +++ b/pyformlang/rsa/box.py @@ -28,28 +28,6 @@ def __init__(self, enfa: EpsilonNFA, nonterminal: Symbol | str): nonterminal = to_symbol(nonterminal) self._nonterminal = nonterminal - def change_nonterminal(self, nonterminal: Symbol | str): - """ Set a new nonterminal - - Parameters - ----------- - nonterminal : :class:`~pyformlang.finite_automaton.Symbol` - The new nonterminal for automaton - - """ - self._nonterminal = to_symbol(nonterminal) - - def change_dfa(self, enfa: EpsilonNFA): - """ Set an epsilon finite automaton - - Parameters - ----------- - enfa : :class:`~pyformlang.finite_automaton.EpsilonNFA` - The new epsilon finite automaton - - """ - self._dfa = enfa - def to_subgraph_dot(self): graph = self._dfa.to_networkx() strange_nodes = [] @@ -77,12 +55,6 @@ def to_subgraph_dot(self): dot_string += "\n}" return dot_string - @classmethod - def empty_box(cls): - enfa = EpsilonNFA() - nonterminal = Symbol("") - return Box(enfa, nonterminal) - @property def dfa(self): """ Box's dfa """ diff --git a/pyformlang/rsa/recursive_automaton.py b/pyformlang/rsa/recursive_automaton.py index 2e0ba37..ca00ce8 100644 --- a/pyformlang/rsa/recursive_automaton.py +++ b/pyformlang/rsa/recursive_automaton.py @@ -47,41 +47,11 @@ def get_box_by_nonterminal(self, nonterminal: Symbol | str): return None - def add_box(self, new_box: Box): - """ Set a box - - Parameters - ----------- - new_box : :class:`~pyformlang.rsa.Box` - The new box - - """ - - self._boxes.update({new_box.nonterminal: new_box}) - def get_number_of_boxes(self): """ Size of set of boxes """ return len(self._boxes) - def change_start_nonterminal(self, new_start_nonterminal: Symbol | str) -> None: - """ Set a start nonterminal - - Parameters - ----------- - new_start_nonterminal : :class:`~pyformlang.finite_automaton.Symbol` | str - The new start nonterminal - - """ - - new_start_nonterminal = to_symbol(new_start_nonterminal) - if new_start_nonterminal not in self._boxes.keys(): - raise ValueError( - "New start nonterminal not in set of nonterminals for boxes") - if self.start_nonterminal == Symbol(""): - del self._boxes[self.start_nonterminal] - self._start_nonterminal = new_start_nonterminal - def to_dot(self): dot_string = f'digraph ""{{' for box in self._boxes.values(): @@ -129,7 +99,6 @@ def from_regex(cls, regex: Regex, start_nonterminal: Symbol | str): rsa : :class:`~pyformlang.rsa.RecursiveAutomaton` The new recursive automaton built from regular expression """ - start_nonterminal = to_symbol(start_nonterminal) box = Box(regex.to_epsilon_nfa().minimize(), start_nonterminal) return RecursiveAutomaton(box, {box}) @@ -178,16 +147,9 @@ def from_ebnf(cls, text, start_nonterminal: Symbol | str = Symbol("S")): start_box = Box(Regex(productions[start_nonterminal.value]).to_epsilon_nfa().minimize(), start_nonterminal) return RecursiveAutomaton(start_box, boxes) - @classmethod - def empty(cls): - """ Generate empty rsa """ - - empty_box = Box.empty_box() - return RecursiveAutomaton(empty_box, {empty_box}) - # equivalency not in terms of formal languages theory. Just mapping boxes. def is_equals_to(self, other): - """ Check whether two recursive automata are equivalent + """ Check whether two recursive automata are equals by boxes Parameters ---------- @@ -197,7 +159,7 @@ def is_equals_to(self, other): Returns ---------- are_equivalent : bool - Whether the two recursive automata are equivalent or not + Whether the two recursive automata are equals or not """ if not isinstance(other, RecursiveAutomaton): return False diff --git a/pyformlang/rsa/tests/test_rsa.py b/pyformlang/rsa/tests/test_rsa.py index 9916921..ee1249f 100644 --- a/pyformlang/rsa/tests/test_rsa.py +++ b/pyformlang/rsa/tests/test_rsa.py @@ -14,19 +14,18 @@ class TestRSA(unittest.TestCase): def test_creation(self): """ Test the creation of an RSA """ # S -> a S b | a b - enfa = Regex("a S b | a b").to_epsilon_nfa() + regex = Regex("a S b | a b") + enfa = regex.to_epsilon_nfa() dfa = enfa.minimize() box = Box(dfa, "S") - rsa_1 = RecursiveAutomaton(box, {box}) + rsa_1 = RecursiveAutomaton(box, {}) self.assertEqual(rsa_1.get_number_of_boxes(), 1) self.assertEqual(box, rsa_1.get_box_by_nonterminal("S")) self.assertEqual(rsa_1.nonterminals, {Symbol("S")}) self.assertEqual(rsa_1.start_nonterminal, Symbol("S")) - rsa_2 = RecursiveAutomaton.empty() - rsa_2.add_box(box) - rsa_2.change_start_nonterminal("S") + rsa_2 = RecursiveAutomaton.from_regex(regex, "S") self.assertEqual(rsa_2, rsa_1) @@ -38,12 +37,12 @@ def test_from_regex(self): enfa = Regex("a*").to_epsilon_nfa() dfa = enfa.minimize() box = Box(dfa, "S") - rsa_1 = RecursiveAutomaton(box, {box}) + rsa_1 = RecursiveAutomaton(box, {}) self.assertEqual(rsa_2, rsa_1) def test_is_equals_to(self): - """ Test the equivalence of two RSAs""" + """ Test the equals of two RSAs""" # S -> a* b* rsa_1 = RecursiveAutomaton.from_regex(Regex("a* b*"), "S") @@ -52,16 +51,8 @@ def test_is_equals_to(self): self.assertNotEqual(rsa_1, rsa_2) - def test_add_box(self): - """ Test adding a box """ - rsa_1 = RecursiveAutomaton.from_regex(Regex("a* b*"), "S") - new_box = Box(Regex("a*").to_epsilon_nfa().minimize(), "S") - rsa_1.add_box(new_box) - self.assertEqual(new_box.dfa, rsa_1.get_box_by_nonterminal("S").dfa) - self.assertEqual(rsa_1.nonterminals, {Symbol("S")}) - - def test_from_text(self): - """ Test reading RSA from a text""" + def test_from_ebnf(self): + """ Test reading RSA from ebnf""" # g1: S -> a S b | a b rsa1_g1 = RecursiveAutomaton.from_ebnf("S -> a S b | a b") rsa2_g1 = RecursiveAutomaton.from_regex( From c639111500b4cd7b5fb3dfa82cd64d2aa3e1f388 Mon Sep 17 00:00:00 2001 From: Efim Kubishkin Date: Thu, 4 Apr 2024 16:33:10 +0300 Subject: [PATCH 4/5] Add docs and rename confusing function and field --- pyformlang/rsa/box.py | 15 +++++---- pyformlang/rsa/recursive_automaton.py | 45 +++++++++++++++++---------- pyformlang/rsa/tests/test_rsa.py | 8 ++--- 3 files changed, 40 insertions(+), 28 deletions(-) diff --git a/pyformlang/rsa/box.py b/pyformlang/rsa/box.py index 1d2a065..ea3a852 100644 --- a/pyformlang/rsa/box.py +++ b/pyformlang/rsa/box.py @@ -5,7 +5,6 @@ from pyformlang.finite_automaton.epsilon_nfa import EpsilonNFA from pyformlang.finite_automaton.finite_automaton import to_symbol from pyformlang.finite_automaton.symbol import Symbol -import networkx as nx class Box: @@ -29,6 +28,7 @@ def __init__(self, enfa: EpsilonNFA, nonterminal: Symbol | str): self._nonterminal = nonterminal def to_subgraph_dot(self): + """Creates a named subgraph representing a box""" graph = self._dfa.to_networkx() strange_nodes = [] dot_string = (f'subgraph cluster_{self._nonterminal}\n{{ label="{self._nonterminal}"\n' @@ -37,24 +37,23 @@ def to_subgraph_dot(self): f'edge [fontname="Helvetica,Arial,sans-serif"]\nrankdir=LR;\n' f'node [shape = circle style=filled fillcolor=white]') for node, data in graph.nodes(data=True): + node = node.replace('"', '').replace("'", "") if 'is_start' not in data.keys() or 'is_final' not in data.keys(): strange_nodes.append(node) continue - node = node.replace(";", "") if data['is_start']: - dot_string += f'\n{node} [fillcolor = green];' + dot_string += f'\n"{node}" [fillcolor = green];' if data['is_final']: - dot_string += f'\n{node} [shape = doublecircle];' + dot_string += f'\n"{node}" [shape = doublecircle];' for strange_node in strange_nodes: graph.remove_node(strange_node) for node_from, node_to, data in graph.edges(data=True): - node_from = node_from.replace(";", "") - node_to = node_to.replace(";", "") + node_from = node_from.replace('"', '').replace("'", "") + node_to = node_to.replace('"', '').replace("'", "") label = data['label'] - dot_string += f'\n{node_from} -> {node_to} [label = "{label}"];' + dot_string += f'\n"{node_from}" -> "{node_to}" [label = "{label}"];' dot_string += "\n}" return dot_string - @property def dfa(self): """ Box's dfa """ diff --git a/pyformlang/rsa/recursive_automaton.py b/pyformlang/rsa/recursive_automaton.py index ca00ce8..032957e 100644 --- a/pyformlang/rsa/recursive_automaton.py +++ b/pyformlang/rsa/recursive_automaton.py @@ -4,7 +4,6 @@ from typing import AbstractSet -import pydot from pyformlang.finite_automaton.finite_automaton import to_symbol from pyformlang.finite_automaton.symbol import Symbol @@ -31,30 +30,43 @@ class RecursiveAutomaton: def __init__(self, start_box: Box, boxes: AbstractSet[Box]): - self._boxes = {} + self._nonterminal_to_box = {} if start_box not in boxes: - self._boxes.update({to_symbol(start_box.nonterminal): start_box}) + self._nonterminal_to_box[to_symbol(start_box.nonterminal)] = start_box self._start_nonterminal = to_symbol(start_box.nonterminal) for box in boxes: - self._boxes.update({to_symbol(box.nonterminal): box}) + self._nonterminal_to_box[to_symbol(box.nonterminal)] = box def get_box_by_nonterminal(self, nonterminal: Symbol | str): - """ Box by nonterminal """ + """ + Box by nonterminal + + Parameters + ---------- + nonterminal: :class:`~pyformlang.finite_automaton.Symbol` | str + the nonterminal of which represents a box + + Returns + ----------- + box : :class:`~pyformlang.rsa.Box` | None + box represented by given nonterminal + """ nonterminal = to_symbol(nonterminal) - if nonterminal in self._boxes: - return self._boxes[nonterminal] + if nonterminal in self._nonterminal_to_box: + return self._nonterminal_to_box[nonterminal] return None - def get_number_of_boxes(self): + def get_number_boxes(self): """ Size of set of boxes """ - return len(self._boxes) + return len(self._nonterminal_to_box) def to_dot(self): - dot_string = f'digraph ""{{' - for box in self._boxes.values(): + """ Create dot representation of recursive automaton """ + dot_string = 'digraph "" {' + for box in self._nonterminal_to_box.values(): dot_string += f'\n{box.to_subgraph_dot()}' dot_string += "\n}" return dot_string @@ -63,13 +75,13 @@ def to_dot(self): def nonterminals(self) -> set: """ The set of nonterminals """ - return set(self._boxes.keys()) + return set(self._nonterminal_to_box.keys()) @property def boxes(self) -> dict: """ The set of boxes """ - return self._boxes + return self._nonterminal_to_box @property def start_nonterminal(self) -> Symbol: @@ -105,7 +117,7 @@ def from_regex(cls, regex: Regex, start_nonterminal: Symbol | str): @classmethod def from_ebnf(cls, text, start_nonterminal: Symbol | str = Symbol("S")): - """ Create a recursive automaton from ebnf + """ Create a recursive automaton from ebnf (ebnf = Extended Backus-Naur Form) Parameters ----------- @@ -147,9 +159,10 @@ def from_ebnf(cls, text, start_nonterminal: Symbol | str = Symbol("S")): start_box = Box(Regex(productions[start_nonterminal.value]).to_epsilon_nfa().minimize(), start_nonterminal) return RecursiveAutomaton(start_box, boxes) -# equivalency not in terms of formal languages theory. Just mapping boxes. def is_equals_to(self, other): - """ Check whether two recursive automata are equals by boxes + """ + Check whether two recursive automata are equals by boxes. + Not equivalency in terms of formal languages theory, just mapping boxes Parameters ---------- diff --git a/pyformlang/rsa/tests/test_rsa.py b/pyformlang/rsa/tests/test_rsa.py index ee1249f..8ed4721 100644 --- a/pyformlang/rsa/tests/test_rsa.py +++ b/pyformlang/rsa/tests/test_rsa.py @@ -18,9 +18,9 @@ def test_creation(self): enfa = regex.to_epsilon_nfa() dfa = enfa.minimize() box = Box(dfa, "S") - rsa_1 = RecursiveAutomaton(box, {}) + rsa_1 = RecursiveAutomaton(box, set()) - self.assertEqual(rsa_1.get_number_of_boxes(), 1) + self.assertEqual(rsa_1.get_number_boxes(), 1) self.assertEqual(box, rsa_1.get_box_by_nonterminal("S")) self.assertEqual(rsa_1.nonterminals, {Symbol("S")}) self.assertEqual(rsa_1.start_nonterminal, Symbol("S")) @@ -37,7 +37,7 @@ def test_from_regex(self): enfa = Regex("a*").to_epsilon_nfa() dfa = enfa.minimize() box = Box(dfa, "S") - rsa_1 = RecursiveAutomaton(box, {}) + rsa_1 = RecursiveAutomaton(box, set()) self.assertEqual(rsa_2, rsa_1) @@ -65,7 +65,7 @@ def test_from_ebnf(self): rsa1_g2 = RecursiveAutomaton.from_ebnf(""" S -> a V b V -> c S d | c d""") - self.assertEqual(rsa1_g2.get_number_of_boxes(), 2) + self.assertEqual(rsa1_g2.get_number_boxes(), 2) self.assertEqual(rsa1_g2.nonterminals, {Symbol("S"), Symbol("V")}) dfa_s = Regex("a V b").to_epsilon_nfa().minimize() From 07682740c7ace06b845d0885aa5b03ba3f86da79 Mon Sep 17 00:00:00 2001 From: Efim Kubishkin Date: Fri, 5 Apr 2024 12:52:36 +0300 Subject: [PATCH 5/5] Fix signatures and to_dot Change 'Symbol | str' to Union[Symbol, str] and add reducing special symbols in to_dot --- pyformlang/rsa/box.py | 9 ++++++--- pyformlang/rsa/recursive_automaton.py | 9 ++++----- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/pyformlang/rsa/box.py b/pyformlang/rsa/box.py index ea3a852..0baafe5 100644 --- a/pyformlang/rsa/box.py +++ b/pyformlang/rsa/box.py @@ -1,6 +1,7 @@ """ Representation of a box for recursive automaton """ +from typing import Union from pyformlang.finite_automaton.epsilon_nfa import EpsilonNFA from pyformlang.finite_automaton.finite_automaton import to_symbol @@ -21,7 +22,7 @@ class Box: """ - def __init__(self, enfa: EpsilonNFA, nonterminal: Symbol | str): + def __init__(self, enfa: EpsilonNFA, nonterminal: Union[Symbol, str]): self._dfa = enfa nonterminal = to_symbol(nonterminal) @@ -31,7 +32,8 @@ def to_subgraph_dot(self): """Creates a named subgraph representing a box""" graph = self._dfa.to_networkx() strange_nodes = [] - dot_string = (f'subgraph cluster_{self._nonterminal}\n{{ label="{self._nonterminal}"\n' + nonterminal = self.nonterminal.value.replace('"', '').replace("'", "").replace(".", "") + dot_string = (f'subgraph cluster_{nonterminal}\n{{ label="{nonterminal}"\n' f'fontname="Helvetica,Arial,sans-serif"\n' f'node [fontname="Helvetica,Arial,sans-serif"]\n' f'edge [fontname="Helvetica,Arial,sans-serif"]\nrankdir=LR;\n' @@ -50,10 +52,11 @@ def to_subgraph_dot(self): for node_from, node_to, data in graph.edges(data=True): node_from = node_from.replace('"', '').replace("'", "") node_to = node_to.replace('"', '').replace("'", "") - label = data['label'] + label = data['label'].replace('"', '').replace("'", "") dot_string += f'\n"{node_from}" -> "{node_to}" [label = "{label}"];' dot_string += "\n}" return dot_string + @property def dfa(self): """ Box's dfa """ diff --git a/pyformlang/rsa/recursive_automaton.py b/pyformlang/rsa/recursive_automaton.py index 032957e..1d89f36 100644 --- a/pyformlang/rsa/recursive_automaton.py +++ b/pyformlang/rsa/recursive_automaton.py @@ -2,8 +2,7 @@ Representation of a recursive automaton """ -from typing import AbstractSet - +from typing import AbstractSet, Union from pyformlang.finite_automaton.finite_automaton import to_symbol from pyformlang.finite_automaton.symbol import Symbol @@ -37,7 +36,7 @@ def __init__(self, for box in boxes: self._nonterminal_to_box[to_symbol(box.nonterminal)] = box - def get_box_by_nonterminal(self, nonterminal: Symbol | str): + def get_box_by_nonterminal(self, nonterminal: Union[Symbol, str]): """ Box by nonterminal @@ -96,7 +95,7 @@ def start_box(self): return self.boxes[self.start_nonterminal] @classmethod - def from_regex(cls, regex: Regex, start_nonterminal: Symbol | str): + def from_regex(cls, regex: Regex, start_nonterminal: Union[Symbol, str]): """ Create a recursive automaton from regular expression Parameters @@ -116,7 +115,7 @@ def from_regex(cls, regex: Regex, start_nonterminal: Symbol | str): return RecursiveAutomaton(box, {box}) @classmethod - def from_ebnf(cls, text, start_nonterminal: Symbol | str = Symbol("S")): + def from_ebnf(cls, text, start_nonterminal: Union[Symbol, str] = Symbol("S")): """ Create a recursive automaton from ebnf (ebnf = Extended Backus-Naur Form) Parameters