diff --git a/deploy/scripts/sem_dom_import.py b/deploy/scripts/sem_dom_import.py index 5c099e13b7..0a34701dd1 100755 --- a/deploy/scripts/sem_dom_import.py +++ b/deploy/scripts/sem_dom_import.py @@ -56,6 +56,13 @@ def parse_args() -> argparse.Namespace: default=str(default_output_dir), help="Default directory for the output files.", ) + parser.add_argument( + "--question-mode", + "-q", + choices=["full", "flat"], + default="flat", + help="Structure to be used for the domain questions.", + ) logging_group = parser.add_mutually_exclusive_group() logging_group.add_argument( "--verbose", "-v", action="store_true", help="Print detailed progress information." @@ -265,7 +272,9 @@ def write_json(output_dir: Path) -> None: file.write(f"{domain_tree[lang][id].to_json()}\n") -def generate_semantic_domains(input_files: List[Path], output_dir: Path) -> None: +def generate_semantic_domains( + input_files: List[Path], output_dir: Path, *, flatten_questions: bool = True +) -> None: for xmlfile in input_files: logging.info(f"Parsing {xmlfile}") tree = ElementTree.parse(xmlfile) @@ -290,6 +299,8 @@ def generate_semantic_domains(input_files: List[Path], output_dir: Path) -> None logging.info(f"Number of {lang} Domains: {len(domain_nodes[lang])}") for lang in domain_tree: logging.info(f"Number of {lang} Tree Nodes: {len(domain_tree[lang])}") + if not flatten_questions: + SemanticDomainFull.flatten_questions = False write_json(output_dir) @@ -303,7 +314,9 @@ def main() -> None: else: log_level = logging.WARNING logging.basicConfig(format="%(levelname)s:%(message)s", level=log_level) - generate_semantic_domains(args.input_files, args.output_dir) + generate_semantic_domains( + args.input_files, args.output_dir, flatten_questions=(args.question_mode == "flat") + ) if __name__ == "__main__": diff --git a/deploy/scripts/semantic_domains.py b/deploy/scripts/semantic_domains.py index 020d691a2e..1963a92748 100644 --- a/deploy/scripts/semantic_domains.py +++ b/deploy/scripts/semantic_domains.py @@ -39,6 +39,8 @@ def to_dict(self) -> Dict[str, str]: class SemanticDomainFull(SemanticDomain): + flatten_questions = True + def __init__(self, _guid: Optional[UUID], _lang: str, _name: str, _id: str = "") -> None: super().__init__(_guid, _lang, _name, _id) self.description = "" @@ -51,22 +53,28 @@ def to_semantic_domain_tree_node(self) -> SemanticDomainTreeNode: return SemanticDomainTreeNode(self.guid, self.lang, self.name, self.id) def to_json(self) -> str: - question_list: List[Dict[str, str]] = [] + full_question_list: List[Dict[str, str]] = [] + flat_question_list: List[str] = [] for item in self.questions: - question_list.append( - { - "question": item.question, - "example_words": item.example_words, - "example_sentences": item.example_sentences, - } - ) + if SemanticDomainFull.flatten_questions: + flat_question_list.append(item.question) + else: + full_question_list.append( + { + "question": item.question, + "example_words": item.example_words, + "example_sentences": item.example_sentences, + } + ) data = { "guid": "" if self.guid is None else str(self.guid), "lang": self.lang, "name": self.name, "id": self.id, "description": self.description, - "questions": question_list, + "questions": ( + flat_question_list if SemanticDomainFull.flatten_questions else full_question_list + ), } return json.dumps(data, indent=4)