From cbb5bf87bc86b05504ceea44f73b134a03e653b9 Mon Sep 17 00:00:00 2001 From: SebA-R <123127567+SebA-R@users.noreply.github.com> Date: Thu, 10 Aug 2023 13:08:47 -0400 Subject: [PATCH] added ipynb --- nlp2xdl.ipynb | 503 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 503 insertions(+) create mode 100644 nlp2xdl.ipynb diff --git a/nlp2xdl.ipynb b/nlp2xdl.ipynb new file mode 100644 index 0000000..61ecbc3 --- /dev/null +++ b/nlp2xdl.ipynb @@ -0,0 +1,503 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "##Run All\n", + "Enter necessary information in Startup [OPENAI_API_KEY, OPENAI_ORGANIZATION_ID] then press Ctrl+F9 (Windows) or Toolbar > Runtime > Run All" + ], + "metadata": { + "id": "7gst_ArVt5L6" + } + }, + { + "cell_type": "code", + "source": [ + "#@title Startup\n", + "import os\n", + "%pip install Flask Flask_socketio Openai\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = \"Enter OPENAI API KEY\"\n", + "os.environ[\"OPENAI_ORGANIZATION_ID\"] = \"Enter OPENAI ORG ID\"" + ], + "metadata": { + "id": "Oq7AMwiBLPWv" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "#@title Verifier\n", + "\n", + "import xml.etree.ElementTree as ET\n", + "\n", + "mandatory_properties = {\n", + " 'Add': ['vessel', 'reagent'],\n", + " 'Separate': ['purpose', 'product_phase', 'from_vessel', 'separation_vessel', 'to_vessel'],\n", + " 'Transfer': ['from_vessel', 'to_vessel'],\n", + " 'StartStir': ['vessel'],\n", + " 'Stir': ['vessel', 'time'],\n", + " 'StopStir': ['vessel'],\n", + " 'HeatChill': ['vessel', 'temp', 'time'],\n", + " 'HeatChillToTemp': ['vessel', 'temp'],\n", + " 'StartHeatChill': ['vessel', 'temp'],\n", + " 'StopHeatChill': ['vessel'],\n", + " 'EvacuateAndRefill': ['vessel'],\n", + " 'Purge': ['vessel'],\n", + " 'StartPurge': ['vessel'],\n", + " 'StopPurge': ['vessel'],\n", + " 'Filter': ['vessel'],\n", + " 'FilterThrough': ['from_vessel', 'to_vessel', 'through'],\n", + " 'WashSolid': ['vessel', 'solvent', 'volume'],\n", + " 'Wait': ['time'],\n", + " 'Repeat': ['repeats'],\n", + " 'CleanVessel': ['vessel'],\n", + " 'Crystallize': ['vessel'],\n", + " 'Dissolve': ['vessel', 'solvent'],\n", + " 'Dry': ['vessel'],\n", + " 'Evaporate': ['vessel'],\n", + " 'Irradiate': ['vessel', 'time'],\n", + " 'Precipitate': ['vessel'],\n", + " 'ResetHandling': [],\n", + " 'RunColumn': ['from_vessel', 'to_vessel']\n", + "}\n", + "\n", + "optional_properties = {\n", + " 'Add': ['vessel', 'reagent', 'volume', 'mass', 'amount', 'dropwise', 'time', 'stir', 'stir_speed', 'viscous', 'purpose'],\n", + " 'Separate': ['purpose', 'product_phase', 'from_vessel', 'separation_vessel', 'to_vessel', 'waste_phase_to_vessel', 'solvent', 'solvent_volume', 'through', 'repeats', 'stir_time', 'stir_speed', 'settling_time'],\n", + " 'Transfer': ['from_vessel', 'to_vessel', 'volume', 'amount', 'time', 'viscous', 'rinsing_solvent', 'rinsing_volume', 'rinsing_repeats', 'solid'],\n", + " 'StartStir': ['vessel', 'stir_speed', 'purpose'],\n", + " 'Stir': ['vessel', 'time', 'stir_speed', 'continue_stirring', 'purpose'],\n", + " 'StopStir': ['vessel'],\n", + " 'HeatChill': ['vessel', 'temp', 'time', 'stir', 'stir_speed', 'purpose'],\n", + " 'HeatChillToTemp': ['vessel', 'temp', 'active', 'continue_heatchill', 'stir', 'stir_speed', 'purpose'],\n", + " 'StartHeatChill': ['vessel', 'temp', 'purpose'],\n", + " 'StopHeatChill': ['vessel'],\n", + " 'EvacuateAndRefill': ['vessel', 'gas', 'repeats'],\n", + " 'Purge': ['vessel', 'gas', 'time', 'pressure', 'flow_rate'],\n", + " 'StartPurge': ['vessel', 'gas', 'pressure', 'flow_rate'],\n", + " 'StopPurge': ['vessel'],\n", + " 'Filter': ['vessel', 'filtrate_vessel', 'stir', 'stir_speed', 'temp', 'continue_heatchill', 'volume'],\n", + " 'FilterThrough': ['from_vessel', 'to_vessel', 'through', 'eluting_solvent', 'eluting_volume', 'eluting_repeats', 'residence_time'],\n", + " 'WashSolid': ['vessel', 'solvent', 'volume', 'filtrate_vessel', 'temp', 'stir', 'stir_speed', 'time', 'repeats'],\n", + " 'Wait': ['time'],\n", + " 'Repeat': ['repeats', 'children', 'loop_variables', 'iterative'],\n", + " 'CleanVessel': ['vessel', 'solvent', 'volume', 'temp', 'repeats'],\n", + " 'Crystallize': ['vessel', 'ramp_time', 'ramp_temp'],\n", + " 'Dissolve': ['vessel', 'solvent', 'volume', 'amount', 'temp', 'time', 'stir_speed'],\n", + " 'Dry': ['vessel', 'time', 'pressure', 'temp', 'continue_heatchill'],\n", + " 'Evaporate': ['vessel', 'time', 'pressure', 'temp', 'stir_speed'],\n", + " 'Irradiate': ['vessel', 'time', 'wavelegth', 'color', 'temp', 'stir', 'stir_speed', 'cooling_power'],\n", + " 'Precipitate': ['vessel', 'time', 'temp', 'stir_speed', 'reagent', 'volume', 'amount', 'add_time'],\n", + " 'ResetHandling': ['solvent', 'volume', 'repeats'],\n", + " 'RunColumn': ['from_vessel', 'to_vessel', 'column'],\n", + "}\n", + "\n", + "reagent_properties = [\"name\", \"inchi\", \"cas\", \"role\", \"preserve\",\n", + " \"use_for_cleaning\", \"clean_with\", \"stir\", \"temp\", \"atmosphere\", \"purity\"]\n", + "\n", + "\n", + "def parse_hardware(root, error_list, available_hardware):\n", + " hardware_list = []\n", + " tag_lst = list(root.iter('Hardware'))\n", + " tags = []\n", + " strs = []\n", + " error = \"\"\n", + " for item in tag_lst:\n", + " tags += [elem.tag for elem in item.iter()]\n", + " strs += [ET.tostring(item, encoding='unicode', method='xml').strip()]\n", + " for item in tags:\n", + " if item not in [\"Hardware\", \"Component\"]:\n", + " error = \"The Hardware section should only contain Component tags\"\n", + " for hardware in root.iter('Hardware'):\n", + " for component in hardware.iter('Component'):\n", + " # Check if the 'id' attribute exists in the component\n", + " if 'id' not in component.attrib:\n", + " error_str = \"One or more Component tags do not have the 'id' attribute.\"\n", + " step_str = ET.tostring(\n", + " component, encoding='unicode', method='xml').strip()\n", + " error_list.append(\n", + " {\"step\": step_str, \"errors\": [error_str]})\n", + " else:\n", + " if available_hardware:\n", + " if component.attrib['id'] not in available_hardware:\n", + " wrong_hardware = component.attrib['id']\n", + " error_str = f\"{wrong_hardware} is not defined in the given Hardware list. The available Hardware is: {', '.join(available_hardware)[:-2]}.\"\n", + " step_str = ET.tostring(\n", + " component, encoding='unicode', method='xml').strip()\n", + " error_list.append(\n", + " {\"step\": \"Hardware definition\", \"errors\": [error_str]})\n", + " hardware_list.append(component.attrib['id'])\n", + " return hardware_list, error_list, (error, strs)\n", + "\n", + "\n", + "def parse_reagents(root, error_list, available_reagents):\n", + " reagent_list = []\n", + " for reagents in root.iter('Reagents'):\n", + " for reagent in reagents.iter('Reagent'):\n", + " if available_reagents:\n", + " if reagent.attrib['name'] not in available_reagents:\n", + " wrong_reagent = reagent.attrib['name']\n", + " error_str = f\"{wrong_reagent} is not defined in the given Reagents list. The available reagents are: {', '.join(available_reagents)[:-2]}.\"\n", + " error_list.append(\n", + " {\"step\": \"Reagents definition\", \"errors\": [error_str]})\n", + " errors = []\n", + " if 'name' not in reagent.attrib:\n", + " errors.append(f\"You must have 'name' property in Reagent\")\n", + " else:\n", + " reagent_list.append(reagent.attrib['name'])\n", + " for attr in reagent.attrib:\n", + " if attr not in reagent_properties:\n", + " errors.append(\n", + " f\"The {attr} property in Reagent is not allowed\")\n", + " if errors:\n", + " step_str = ET.tostring(\n", + " reagent, encoding='unicode', method='xml').strip()\n", + " error_list.append({\"step\": step_str, \"errors\": errors})\n", + " return reagent_list\n", + "\n", + "\n", + "def verify_procedure(root, hardware, reagents, error_list):\n", + " for procedure in root.iter('Procedure'):\n", + " for step in procedure:\n", + " errors = []\n", + " # Check whether action is valid\n", + " action = step.tag\n", + " if action not in mandatory_properties:\n", + " errors.append(f\"There is no {action} action in XDL\")\n", + " else:\n", + " for prop in mandatory_properties[action]:\n", + " if prop not in step.attrib:\n", + " errors.append(\n", + " f\"You must have '{prop}' property when doing '{step.tag}'\")\n", + " for attr in step.attrib:\n", + " if attr not in optional_properties[action]:\n", + " allowed_actions = list(\n", + " set(optional_properties[action] + mandatory_properties[action]))\n", + " errors.append(\n", + " f\"The {attr} property in the {action} procedure is not allowed. The allowed properties are: {', '.join(allowed_actions)}.\")\n", + " # Check vessels are defined in Hardware\n", + " # print(error_list)\n", + " if len(error_list) == 0 or \"Hardware\" not in error_list[0][\"step\"].lower():\n", + " for attr in ['vessel', 'from_vessel', 'to_vessel']:\n", + " if attr in step.attrib and step.attrib[attr] not in hardware:\n", + " errors.append(\n", + " f\"{step.attrib[attr]} is not defined in Hardware\")\n", + " # Check reagents are defined in Reagents\n", + " if 'reagent' in step.attrib and step.attrib['reagent'] not in reagents:\n", + " reagent_name = step.attrib[\"reagent\"]\n", + " errors.append(f\"{reagent_name} is not defined in Reagents\")\n", + "\n", + " # Check if there is any text content between tags\n", + " for elem in step.iter():\n", + " if elem.text and elem.text.strip() and elem != step:\n", + " errors.append(\n", + " \"There should be no text content between tags.\")\n", + "\n", + " if errors:\n", + " step_str = ET.tostring(\n", + " step, encoding='unicode', method='xml').strip()\n", + " step_str = ' '.join(step_str.split())\n", + " error_list.append({\"step\": step_str, \"errors\": errors})\n", + " return error_list\n", + "\n", + "\n", + "def verify_synthesis(root, available_hardware, available_reagents):\n", + " error_list = []\n", + " for element in root.iter():\n", + " if element.text and element.text.strip():\n", + " errors = [\n", + " f\"Tags should not have text content: '{element.text.strip()}'\"]\n", + " step_str = ET.tostring(\n", + " element, encoding='unicode', method='xml').strip()\n", + " error_list.append({\"step\": step_str, \"errors\": errors})\n", + "\n", + " hardware, hardware_list_error_list, (errors, strs) = parse_hardware(\n", + " root, error_list, available_hardware)\n", + " if errors != \"\":\n", + " error_list.append({\"step\": \"Hardware definition\", \"errors\": [errors]})\n", + "\n", + " # return error_list\n", + " # return [{\"step\": \"Hardware definition\", \"errors\": errors}]\n", + " reagents = parse_reagents(root, error_list, available_reagents)\n", + " return verify_procedure(root, hardware, reagents, error_list)\n", + "\n", + "\n", + "def verify_xdl(xdl, available_hardware=None, available_reagents=None):\n", + " \"\"\"\n", + " Verify XDL and return errors\n", + " :param xdl: The XDL string to verify\n", + " :return: Returns an empty list if the input is valid.\n", + " Returns a string if the input cannot be parsed as XML.\n", + " Returns a list of dictionary if it has errors. Each element has two fields.\n", + " \"step\": The string of the line which contains error.\n", + " \"errors\": The error messages for that line.\n", + " \"\"\"\n", + " if \"\" and \"\" not in xdl :\n", + " error_message = f\"\\n{xdl}\\nThis XDL was not correct. XDL should start with and end with . Please fix the errors.\"\n", + " return [{\"errors\": [error_message]}]\n", + " xdl = xdl[xdl.index(\"\"):xdl.index(\"\")+6]\n", + "\n", + " try:\n", + " root = ET.fromstring(xdl)\n", + " except Exception as e:\n", + " return [{\"errors\": [\"Input XDL cannot be parsed as XML, there is {} error\".format(str(e).split(\":\")[0])]}]\n", + "\n", + " return verify_synthesis(root, available_hardware, available_reagents)\n" + ], + "metadata": { + "cellView": "form", + "id": "8c_AGd_BrP7E" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "#@title XDL Description\n", + "XDL_description = \"\"\"XDL files will follow XML syntax and consist of three mandatory sections: Hardware, where virtual vessels that the reaction mixture can reside in are declared. Reagents, where all reagents that are used in the procedure are declared, and Procedure, where the synthetic actions involved in the procedure are linearly declared.\n", + "\n", + "XDL File Stub:\n", + "\n", + " \n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + "\n", + " \n", + " \n", + " \n", + " \n", + "\n", + "\n", + "Hardware:\n", + "Each individual reagent, unless otherwise stated should be contained within their own component.\n", + "\n", + "(format is(Property, Type, Description))\n", + "\n", + "id, str, Name of hardware\n", + "\n", + "Reagents:\n", + "The Reagents section contains Reagent elements with the props below.\n", + "Any reagents which were combined before the experiment should be combined as one reagent before the procedure. (i.e. 'lime juice mixed with sugar' =