Add moa and compound annotation (scverse#444)

* fix test warning * add moa and compound * fix pre commit * add test and lookup * fix merge bug * update doc, fix test * fix typo + precommit * Update pertpy/tools/_metadata/_cell_line.py Co-authored-by: Lukas Heumos <lukas.heumos@posteo.net> * Update pertpy/tools/_metadata/_cell_line.py Co-authored-by: Lukas Heumos <lukas.heumos@posteo.net> * Update pertpy/tools/_metadata/_cell_line.py Co-authored-by: Lukas Heumos <lukas.heumos@posteo.net> * Update pertpy/tools/_metadata/_compound.py Co-authored-by: Lukas Heumos <lukas.heumos@posteo.net> * Update pertpy/tools/_metadata/_look_up.py Co-authored-by: Lukas Heumos <lukas.heumos@posteo.net> * add helper function * Update pertpy/tools/_metadata/_cell_line.py Co-authored-by: Lukas Heumos <lukas.heumos@posteo.net> * Update pertpy/tools/_metadata/_look_up.py Co-authored-by: Lukas Heumos <lukas.heumos@posteo.net> * Update pertpy/tools/_metadata/_compound.py Co-authored-by: Lukas Heumos <lukas.heumos@posteo.net> * improve docstring, add parent class * parent class. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * rename the class * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix doc * remove cachedir, use sequence instead of list * add type checking * add test scripts, add compare_categories * Refactor Signed-off-by: zethson <lukas.heumos@posteo.net> * Refactor Signed-off-by: zethson <lukas.heumos@posteo.net> * add correlate and plot_correlation function, simplify the code * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * format document * Refactoring and move plot Signed-off-by: zethson <lukas.heumos@posteo.net> --------- Signed-off-by: zethson <lukas.heumos@posteo.net> Co-authored-by: Lukas Heumos <lukas.heumos@posteo.net> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
eroell · Dec 20, 2023 · 9b715b8 · 9b715b8
1 parent 7695ed1
commit 9b715b8
Show file tree

Hide file tree

Showing 16 changed files with 1,198 additions and 332 deletions.
diff --git a/docs/usage/usage.md b/docs/usage/usage.md
@@ -320,30 +320,6 @@ etest = pt.tl.PermutationTest(
 tab = etest(adata, groupby="perturbation", contrast="control")
 ```
 
-### MetaData
-
-MetaData provides tooling to fetch and add more metadata to perturbations by querying a couple of databases.
-We are currently implementing several sources with more to come.
-
-CellLineMetaData aims to retrieve various types of information related to cell lines, including cell line annotation,
-bulk RNA and protein expression data.
-
-Available databases for cell line metadata:
-
--   The Cancer Dependency Map Project at Broad
--   The Cancer Dependency Map Project at Sanger
-
-```{eval-rst}
-.. currentmodule:: pertpy
-```
-
-```{eval-rst}
-.. autosummary::
-    :toctree: tools
-
-    tools.CellLineMetaData
-```
-
 ### Response prediction
 
 #### Augur
@@ -513,3 +489,42 @@ See [perturbation space tutorial](https://pertpy.readthedocs.io/en/latest/tutori
 Every tool has a set of plotting functions that start with `plot_`.
 
 However, we are planning to offer more general plots at a later point.
+
+## MetaData
+
+MetaData provides tooling to fetch and add more metadata to perturbations by querying a couple of databases.
+We are currently implementing several sources with more to come.
+
+CellLine aims to retrieve various types of information related to cell lines, including cell line annotation,
+bulk RNA and protein expression data.
+
+Available databases for cell line metadata:
+
+-   [The Cancer Dependency Map Project at Broad](https://depmap.org/portal/)
+-   [The Cancer Dependency Map Project at Sanger](https://depmap.sanger.ac.uk/)
+-   [Genomics of Drug Sensitivity in Cancer (GDSC)](https://www.cancerrxgene.org/)
+
+Compound aims to retrieve various types of information related to compounds of interest, including the most common synonym, pubchemID and canonical SMILES.
+
+Available databases for compound metadata:
+
+-   [PubChem](https://pubchem.ncbi.nlm.nih.gov/)
+
+Moa aims to retrieve metadata of mechanism of action studies related to perturbagens of interest, depending on the molecular targets.
+
+Available databases for mechanism of action metadata:
+
+-   [CLUE](https://clue.io/)
+
+```{eval-rst}
+.. currentmodule:: pertpy
+```
+
+```{eval-rst}
+.. autosummary::
+    :toctree: metadata
+
+    metadata.CellLine
+    metadata.Compound
+    metadata.Moa
+```
diff --git a/nicheformer-data/pyproject.toml b/nicheformer-data/pyproject.toml
@@ -0,0 +1,132 @@
+[build-system]
+build-backend = "hatchling.build"
+requires = ["hatchling"]
+
+[project]
+name = "nicheformer-data"
+version = "0.0.1"
+description = "Data collection for nicheformer"
+readme = "README.md"
+requires-python = ">=3.9"
+license = {file = "LICENSE"}
+authors = [
+    {name = "theislab"},
+]
+maintainers = [
+    {name = "theislab", email = "theislab@helmholtz-munich.de"},
+]
+urls.Documentation = "https://nicheformer-data.readthedocs.io/"
+urls.Source = "https://github.com/theislab/nicheformer-data"
+urls.Home-page = "https://github.com/theislab/nicheformer-data"
+dependencies = [
+    "anndata",
+    "scanpy",
+    "lamindb[zarr,aws,bionty,jupyter]==0.63.5",
+    "cellxgene-schema>=3.1.3",
+    # for debug logging (referenced from the issue template)
+    "session-info"
+]
+
+[project.optional-dependencies]
+dev = [
+    "pre-commit",
+    "twine>=4.0.2"
+]
+doc = [
+    "docutils>=0.8,!=0.18.*,!=0.19.*",
+    "sphinx>=4",
+    "sphinx-book-theme>=1.0.0",
+    "myst-nb",
+    "sphinxcontrib-bibtex>=1.0.0",
+    "sphinx-autodoc-typehints",
+    "sphinxext-opengraph",
+    # For notebooks
+    "ipykernel",
+    "ipython",
+    "sphinx-copybutton",
+]
+test = [
+    "pytest",
+    "pytest-cov",
+]
+
+[tool.coverage.run]
+source = ["nicheformer_data"]
+omit = [
+    "**/test_*.py",
+]
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+xfail_strict = true
+addopts = [
+    "--import-mode=importlib",  # allow using test files with same name
+]
+
+[tool.black]
+line-length = 120
+
+[tool.ruff]
+src = ["src"]
+line-length = 120
+select = [
+    "F",  # Errors detected by Pyflakes
+    "E",  # Error detected by Pycodestyle
+    "W",  # Warning detected by Pycodestyle
+    "I",  # isort
+    "D",  # pydocstyle
+    "B",  # flake8-bugbear
+    "TID",  # flake8-tidy-imports
+    "C4",  # flake8-comprehensions
+    "BLE",  # flake8-blind-except
+    "UP",  # pyupgrade
+    "RUF100",  # Report unused noqa directives
+]
+ignore = [
+    # line too long -> we accept long comment lines; black gets rid of long code lines
+    "E501",
+    # Do not assign a lambda expression, use a def -> lambda expression assignments are convenient
+    "E731",
+    # allow I, O, l as variable names -> I is the identity matrix
+    "E741",
+    # Missing docstring in public package
+    "D104",
+    # Missing docstring in public module
+    "D100",
+    # Missing docstring in __init__
+    "D107",
+    # Errors from function calls in argument defaults. These are fine when the result is immutable.
+    "B008",
+    # __magic__ methods are are often self-explanatory, allow missing docstrings
+    "D105",
+    # first line should end with a period [Bug: doesn't work with single-line docstrings]
+    "D400",
+    # First line should be in imperative mood; try rephrasing
+    "D401",
+    ## Disable one in each pair of mutually incompatible rules
+    # We don’t want a blank line before a class docstring
+    "D203",
+    # We want docstrings to start immediately after the opening triple quote
+    "D213",
+]
+
+[tool.ruff.pydocstyle]
+convention = "numpy"
+
+[tool.ruff.per-file-ignores]
+"docs/*" = ["I"]
+"tests/*" = ["D"]
+"*/__init__.py" = ["F401"]
+"scripts/*.py" = ["D","BLE","I", "E"]
+
+[tool.cruft]
+skip = [
+    "tests",
+    "src/**/__init__.py",
+    "src/**/basic.py",
+    "docs/api.md",
+    "docs/changelog.md",
+    "docs/references.bib",
+    "docs/references.md",
+    "docs/notebooks/example.ipynb"
+]
diff --git a/pertpy/__init__.py b/pertpy/__init__.py
@@ -14,6 +14,7 @@
 warnings.filterwarnings("ignore", category=UserWarning, module="scvi._settings")
 
 from . import data as dt
+from . import metadata as md
 from . import plot as pl
 from . import preprocessing as pp
 from . import tools as tl
diff --git a/pertpy/metadata/__init__.py b/pertpy/metadata/__init__.py
@@ -0,0 +1,3 @@
+from pertpy.metadata._cell_line import CellLine
+from pertpy.metadata._compound import Compound
+from pertpy.metadata._moa import Moa