diff --git a/.pyup.yml b/.pyup.yml index 3f9fa6a..18645ca 100644 --- a/.pyup.yml +++ b/.pyup.yml @@ -1,2 +1,2 @@ -branch: dev +branch: master update: insecure diff --git a/apybiomart/apybiomart.py b/apybiomart/apybiomart.py index c791ae3..0b20f6f 100644 --- a/apybiomart/apybiomart.py +++ b/apybiomart/apybiomart.py @@ -7,65 +7,79 @@ FiltersServer, Query -def find_marts() -> pd.DataFrame: - """Retrieve and list available marts.""" - server = MartServer() +def find_marts(save: bool = False) -> pd.DataFrame: + """Retrieve and list available marts. + + Args: + save: save results to a CSV file [default: False] + """ + server = MartServer(save=save) return server.find_marts() -def find_datasets(mart: str = "ENSEMBL_MART_ENSEMBL") -> pd.DataFrame: +def find_datasets(mart: str = "ENSEMBL_MART_ENSEMBL", + save: bool = False) -> pd.DataFrame: """Retrieve and list available datasets for a given mart. Args: mart: BioMart mart name (default: "ENSEMBL_MART_ENSEMBL") + save: save results to a CSV file [default: False] """ - server = DatasetServer(mart) + server = DatasetServer(mart, save=save) return server.find_datasets() -def find_attributes(dataset: str = "hsapiens_gene_ensembl") -> pd.DataFrame: +def find_attributes(dataset: str = "hsapiens_gene_ensembl", + save: bool = False) -> pd.DataFrame: """Retrieve and list available attributes for a given mart. Args: dataset: BioMart dataset name (default: "hsapiens_gene_ensembl") + save: save results to a CSV file [default: False] """ - server = AttributesServer(dataset) + server = AttributesServer(dataset, save=save) return server.find_attributes() -def find_filters(dataset: str = "hsapiens_gene_ensembl") -> pd.DataFrame: +def find_filters(dataset: str = "hsapiens_gene_ensembl", + save: bool = False) -> pd.DataFrame: """Retrieve and list available filters for a given mart. Args: dataset: BioMart dataset name (default: "hsapiens_gene_ensembl") + save: save results to a CSV file [default: False] """ - server = FiltersServer(dataset) + server = FiltersServer(dataset, save=save) return server.find_filters() def query(attributes: List[str], filters: Dict[str, Union[str, int, list, tuple, bool]], - dataset: str = "hsapiens_gene_ensembl") -> pd.DataFrame: + dataset: str = "hsapiens_gene_ensembl", + save: bool = False) -> pd.DataFrame: """Launch synchronous query using the given attributes, filters and dataset. Args: attributes: list of attributes to include filters: dict of filter name : value to filter results dataset: BioMart dataset name (default: "hsapiens_gene_ensembl") + save: save results to a CSV file [default: False] """ - server = Query(attributes, filters, dataset) + server = Query(attributes, filters, dataset, save=save) return server.query() async def aquery(attributes: List[str], filters: Dict[str, Union[str, int, list, tuple, bool]], - dataset: str = "hsapiens_gene_ensembl") -> pd.DataFrame: + dataset: str = "hsapiens_gene_ensembl", + save: bool = False) -> pd.DataFrame: """Launch asynchronous query using the given attributes, filters and dataset. Args: attributes: list of attributes to include filters: dict of filter name : value to filter results dataset: BioMart dataset name (default: "hsapiens_gene_ensembl") + save: save results to a CSV file [default: False] """ - server = Query(attributes, filters, dataset) + server = Query(attributes, filters, dataset, save=save) return await server.aquery() diff --git a/apybiomart/classes.py b/apybiomart/classes.py index 74e0174..8f0af57 100644 --- a/apybiomart/classes.py +++ b/apybiomart/classes.py @@ -1,13 +1,14 @@ #!/usr/bin/env python # -*- coding: UTF-8 -*- # Created by Roberto Preste +import io +from typing import Optional, Dict, Any, Tuple, Generator, List, Union +from xml.etree import ElementTree as ET + import asyncio import aiohttp -import io import requests import pandas as pd -from xml.etree import ElementTree as ET -from typing import Optional, Dict, Any, Tuple, Generator, List, Union class _BiomartException(Exception): @@ -20,11 +21,14 @@ class _Server: Attributes: host: URL to connect to + save: save results to a CSV file [default: False] """ def __init__(self, - host: str = "http://www.ensembl.org/biomart/martservice"): + host: str = "http://www.ensembl.org/biomart/martservice", + save: bool = False): self.host = host + self.save = save if not self._check_connection(): raise _BiomartException("No internet connection available!") @@ -76,8 +80,8 @@ async def get_async(self, class MartServer(_Server): """Class used to retrieve and list available marts.""" - def __init__(self): - super().__init__() + def __init__(self, save: bool = False): + super().__init__(save=save) def find_marts(self) -> pd.DataFrame: """Return the list of available marts as a dataframe. @@ -88,6 +92,9 @@ def find_marts(self) -> pd.DataFrame: df = pd.DataFrame.from_records(self._fetch_marts(), columns=["name", "display_name"]) df.columns = ["Mart_ID", "Mart_name"] + df.replace(pd.np.nan, "", inplace=True) + if self.save: + df.to_csv("apybiomart_marts.csv", index=False) return df @@ -131,8 +138,8 @@ class DatasetServer(_Server): mart: BioMart mart name """ - def __init__(self, mart: str): - super().__init__() + def __init__(self, mart: str, save: bool = False): + super().__init__(save=save) self.mart = mart def find_datasets(self) -> pd.DataFrame: @@ -147,6 +154,9 @@ def find_datasets(self) -> pd.DataFrame: usecols=["name", "display_name"]) df["mart"] = self.mart df.columns = ["Dataset_ID", "Dataset_name", "Mart_ID"] + df.replace(pd.np.nan, "", inplace=True) + if self.save: + df.to_csv("apybiomart_datasets.csv", index=False) return df @@ -172,8 +182,8 @@ class AttributesServer(_Server): dataset: BioMart dataset name """ - def __init__(self, dataset: str): - super().__init__() + def __init__(self, dataset: str, save: bool = False): + super().__init__(save=save) self.dataset = dataset def find_attributes(self) -> pd.DataFrame: @@ -186,6 +196,9 @@ def find_attributes(self) -> pd.DataFrame: df["dataset"] = self.dataset df.columns = ["Attribute_ID", "Attribute_name", "Attribute_description", "Dataset_ID"] + df.replace(pd.np.nan, "", inplace=True) + if self.save: + df.to_csv("apybiomart_attributes.csv", index=False) return df @@ -236,8 +249,8 @@ class FiltersServer(_Server): dataset: BioMart dataset name """ - def __init__(self, dataset: str): - super().__init__() + def __init__(self, dataset: str, save: bool = False): + super().__init__(save=save) self.dataset = dataset def find_filters(self) -> pd.DataFrame: @@ -250,6 +263,9 @@ def find_filters(self) -> pd.DataFrame: df["dataset"] = self.dataset df.columns = ["Filter_ID", "Filter_type", "Filter_description", "Dataset_ID"] + df.replace(pd.np.nan, "", inplace=True) + if self.save: + df.to_csv("apybiomart_filters.csv", index=False) return df @@ -304,8 +320,9 @@ class Query(_Server): def __init__(self, attributes: List[str], filters: Dict[str, Union[str, int, list, tuple, bool]], - dataset: str): - super().__init__() + dataset: str, + save: bool = False): + super().__init__(save=save) self.attributes = attributes self.filters = filters self.dataset = dataset @@ -357,6 +374,10 @@ def query(self) -> pd.DataFrame: # Type error is raised of a data type is not understood by pandas except TypeError as err: raise ValueError("Non valid data type is used in dtypes") + result.replace(pd.np.nan, "", inplace=True) + + if self.save: + result.to_csv("apybiomart_query.csv", index=False) return result @@ -407,6 +428,10 @@ async def aquery(self) -> pd.DataFrame: # Type error is raised of a data type is not understood by pandas except TypeError as err: raise ValueError("Non valid data type is used in dtypes") + result.replace(pd.np.nan, "", inplace=True) + + if self.save: + result.to_csv("apybiomart_aquery.csv", index=False) return result diff --git a/apybiomart/commands/attributes.py b/apybiomart/commands/attributes.py index 8f25e8a..383f075 100644 --- a/apybiomart/commands/attributes.py +++ b/apybiomart/commands/attributes.py @@ -10,10 +10,12 @@ @click.command("attributes") @click.option("--dataset", default="hsapiens_gene_ensembl", type=str, help="BioMart dataset name (default: 'hsapiens_gene_ensembl')") -def cli_attributes(dataset): +@click.option("--save", "-s", default=False, is_flag=True, + help="Save results to a CSV file [default: False]") +def cli_attributes(dataset, save): """Retrieve and list available attributes for a given mart.""" pd.set_option("max_rows", 999) - attributes = find_attributes(dataset) + attributes = find_attributes(dataset, save=save) attributes.columns = [col.replace("_", " ") for col in attributes.columns] click.echo(attributes) return 0 diff --git a/apybiomart/commands/datasets.py b/apybiomart/commands/datasets.py index 2f05b71..105aba1 100644 --- a/apybiomart/commands/datasets.py +++ b/apybiomart/commands/datasets.py @@ -10,10 +10,12 @@ @click.command("datasets") @click.option("--mart", default="ENSEMBL_MART_ENSEMBL", type=str, help="BioMart mart name (default: 'ENSEMBL_MART_ENSEMBL')") -def cli_datasets(mart): +@click.option("--save", "-s", default=False, is_flag=True, + help="Save results to a CSV file [default: False]") +def cli_datasets(mart, save): """Retrieve and list available datasets for a given mart.""" pd.set_option("max_rows", 999) - datasets = find_datasets(mart) + datasets = find_datasets(mart, save=save) datasets.columns = [col.replace("_", " ") for col in datasets.columns] click.echo(datasets) return 0 diff --git a/apybiomart/commands/filters.py b/apybiomart/commands/filters.py index bc458d3..ae53583 100644 --- a/apybiomart/commands/filters.py +++ b/apybiomart/commands/filters.py @@ -10,10 +10,12 @@ @click.command("filters") @click.option("--dataset", default="hsapiens_gene_ensembl", type=str, help="BioMart dataset name (default: 'hsapiens_gene_ensembl')") -def cli_filters(dataset): +@click.option("--save", "-s", default=False, is_flag=True, + help="Save results to a CSV file [default: False]") +def cli_filters(dataset, save): """Retrieve and list available filters for a given mart.""" pd.set_option("max_rows", 999) - filters = find_filters(dataset) + filters = find_filters(dataset, save=save) filters.columns = [col.replace("_", " ") for col in filters.columns] click.echo(filters) return 0 diff --git a/apybiomart/commands/marts.py b/apybiomart/commands/marts.py index 75410fb..e523588 100644 --- a/apybiomart/commands/marts.py +++ b/apybiomart/commands/marts.py @@ -8,10 +8,12 @@ @click.command("marts") -def cli_marts(): +@click.option("--save", "-s", default=False, is_flag=True, + help="Save results to a CSV file [default: False]") +def cli_marts(save): """Retrieve and list available marts.""" pd.set_option("max_rows", 999) - marts = find_marts() + marts = find_marts(save=save) marts.columns = [col.replace("_", " ") for col in marts.columns] click.echo(marts) return 0 diff --git a/apybiomart/tests/create_suite.py b/apybiomart/tests/create_suite.py index e156422..f3a3288 100644 --- a/apybiomart/tests/create_suite.py +++ b/apybiomart/tests/create_suite.py @@ -1,9 +1,9 @@ #!/usr/bin/env python # -*- coding: UTF-8 -*- # Created by Roberto Preste -import asyncio import os -import pandas as pd + +import asyncio import apybiomart as apy DATADIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data") diff --git a/apybiomart/tests/data/attributes_chircus_snp.pkl b/apybiomart/tests/data/attributes_chircus_snp.pkl index ba548ab..37d3445 100644 Binary files a/apybiomart/tests/data/attributes_chircus_snp.pkl and b/apybiomart/tests/data/attributes_chircus_snp.pkl differ diff --git a/apybiomart/tests/data/attributes_closure_ECO.pkl b/apybiomart/tests/data/attributes_closure_ECO.pkl index e5012ba..40d1ff1 100644 Binary files a/apybiomart/tests/data/attributes_closure_ECO.pkl and b/apybiomart/tests/data/attributes_closure_ECO.pkl differ diff --git a/apybiomart/tests/data/attributes_hsapiens_encode.pkl b/apybiomart/tests/data/attributes_hsapiens_encode.pkl index 1464873..baa6689 100644 Binary files a/apybiomart/tests/data/attributes_hsapiens_encode.pkl and b/apybiomart/tests/data/attributes_hsapiens_encode.pkl differ diff --git a/apybiomart/tests/data/attributes_hsapiens_gene_ensembl.pkl b/apybiomart/tests/data/attributes_hsapiens_gene_ensembl.pkl index 8ae48d5..6c5fde1 100644 Binary files a/apybiomart/tests/data/attributes_hsapiens_gene_ensembl.pkl and b/apybiomart/tests/data/attributes_hsapiens_gene_ensembl.pkl differ diff --git a/apybiomart/tests/data/attributes_hsapiens_peak.pkl b/apybiomart/tests/data/attributes_hsapiens_peak.pkl index 6f13a3d..85bd1cb 100644 Binary files a/apybiomart/tests/data/attributes_hsapiens_peak.pkl and b/apybiomart/tests/data/attributes_hsapiens_peak.pkl differ diff --git a/apybiomart/tests/data/datasets_ensembl.pkl b/apybiomart/tests/data/datasets_ensembl.pkl index c896271..8116ed9 100644 Binary files a/apybiomart/tests/data/datasets_ensembl.pkl and b/apybiomart/tests/data/datasets_ensembl.pkl differ diff --git a/apybiomart/tests/data/datasets_funcgen.pkl b/apybiomart/tests/data/datasets_funcgen.pkl index 92ef87e..7098d8d 100644 Binary files a/apybiomart/tests/data/datasets_funcgen.pkl and b/apybiomart/tests/data/datasets_funcgen.pkl differ diff --git a/apybiomart/tests/data/datasets_genomic.pkl b/apybiomart/tests/data/datasets_genomic.pkl index 958fb47..29b85f1 100644 Binary files a/apybiomart/tests/data/datasets_genomic.pkl and b/apybiomart/tests/data/datasets_genomic.pkl differ diff --git a/apybiomart/tests/data/datasets_mouse.pkl b/apybiomart/tests/data/datasets_mouse.pkl index a12336d..a10006a 100644 Binary files a/apybiomart/tests/data/datasets_mouse.pkl and b/apybiomart/tests/data/datasets_mouse.pkl differ diff --git a/apybiomart/tests/data/datasets_ontology.pkl b/apybiomart/tests/data/datasets_ontology.pkl index bc196c8..2302037 100644 Binary files a/apybiomart/tests/data/datasets_ontology.pkl and b/apybiomart/tests/data/datasets_ontology.pkl differ diff --git a/apybiomart/tests/data/datasets_sequence.pkl b/apybiomart/tests/data/datasets_sequence.pkl index a1d5b9b..ad52c34 100644 Binary files a/apybiomart/tests/data/datasets_sequence.pkl and b/apybiomart/tests/data/datasets_sequence.pkl differ diff --git a/apybiomart/tests/data/datasets_snp.pkl b/apybiomart/tests/data/datasets_snp.pkl index d6a399a..e590cde 100644 Binary files a/apybiomart/tests/data/datasets_snp.pkl and b/apybiomart/tests/data/datasets_snp.pkl differ diff --git a/apybiomart/tests/data/filters_chircus_snp.pkl b/apybiomart/tests/data/filters_chircus_snp.pkl index 050b4ac..67d78e5 100644 Binary files a/apybiomart/tests/data/filters_chircus_snp.pkl and b/apybiomart/tests/data/filters_chircus_snp.pkl differ diff --git a/apybiomart/tests/data/filters_closure_ECO.pkl b/apybiomart/tests/data/filters_closure_ECO.pkl index efb9b56..2c3b987 100644 Binary files a/apybiomart/tests/data/filters_closure_ECO.pkl and b/apybiomart/tests/data/filters_closure_ECO.pkl differ diff --git a/apybiomart/tests/data/filters_hsapiens_encode.pkl b/apybiomart/tests/data/filters_hsapiens_encode.pkl index f31096a..ae57d1f 100644 Binary files a/apybiomart/tests/data/filters_hsapiens_encode.pkl and b/apybiomart/tests/data/filters_hsapiens_encode.pkl differ diff --git a/apybiomart/tests/data/filters_hsapiens_gene_ensembl.pkl b/apybiomart/tests/data/filters_hsapiens_gene_ensembl.pkl index 69a9cf2..85ce42c 100644 Binary files a/apybiomart/tests/data/filters_hsapiens_gene_ensembl.pkl and b/apybiomart/tests/data/filters_hsapiens_gene_ensembl.pkl differ diff --git a/apybiomart/tests/data/filters_hsapiens_peak.pkl b/apybiomart/tests/data/filters_hsapiens_peak.pkl index c1a64ce..74240e5 100644 Binary files a/apybiomart/tests/data/filters_hsapiens_peak.pkl and b/apybiomart/tests/data/filters_hsapiens_peak.pkl differ diff --git a/apybiomart/tests/test_aquery.py b/apybiomart/tests/test_aquery.py index 67a0aa7..aed6406 100644 --- a/apybiomart/tests/test_aquery.py +++ b/apybiomart/tests/test_aquery.py @@ -1,9 +1,13 @@ #!/usr/bin/env python # -*- coding: UTF-8 -*- # Created by Roberto Preste +import os import pytest + import asyncio +import pandas as pd from pandas.testing import assert_frame_equal + from apybiomart import aquery @@ -22,6 +26,28 @@ def test_aquery_default(df_query_ensembl_hsapiens_gene_chrom_2): assert_frame_equal(result, expect) +def test_aquery_save(df_query_ensembl_hsapiens_gene_chrom_2): + """Test the saved async query results for the default dataset + (hsapiens_gene_ensembl).""" + expect = (df_query_ensembl_hsapiens_gene_chrom_2 + .reset_index(drop=True)) + + loop = asyncio.get_event_loop() + result = loop.run_until_complete( + aquery(attributes=["ensembl_gene_id", "external_gene_name"], + filters={"chromosome_name": "2"}, + save=True) + ) + saved = (pd.read_csv("apybiomart_aquery.csv") + .replace(pd.np.nan, "") + .reset_index(drop=True)) + + try: + assert_frame_equal(saved, expect) + finally: + os.remove("apybiomart_aquery.csv") + + def test_aquery_default_int(df_query_ensembl_hsapiens_gene_chrom_2): """Test the async query results for the default dataset (hsapiens_gene_ensembl) with int filters parameter.""" diff --git a/apybiomart/tests/test_attributes.py b/apybiomart/tests/test_attributes.py index 735364a..50eb37f 100644 --- a/apybiomart/tests/test_attributes.py +++ b/apybiomart/tests/test_attributes.py @@ -1,8 +1,12 @@ #!/usr/bin/env python # -*- coding: UTF-8 -*- # Created by Roberto Preste +import os import pytest + +import pandas as pd from pandas.testing import assert_frame_equal + from apybiomart import find_attributes @@ -19,6 +23,25 @@ def test_find_attributes_default(df_attributes_ensembl_hsapiens_gene): assert_frame_equal(result, expect) +def test_find_attributes_save(df_attributes_ensembl_hsapiens_gene): + """Test the available attributes returned by find_attributes(save=True) + for the default dataset (hsapiens_gene_ensembl).""" + expect = (df_attributes_ensembl_hsapiens_gene + .sort_values(by="Attribute_ID", axis=0) + .reset_index(drop=True)) + _ = find_attributes(save=True) + saved = pd.read_csv("apybiomart_attributes.csv") + result = (saved + .replace(pd.np.nan, "") + .sort_values(by="Attribute_ID", axis=0) + .reset_index(drop=True)) + + try: + assert_frame_equal(result, expect) + finally: + os.remove("apybiomart_attributes.csv") + + def test_find_attributes_ensembl(df_attributes_ensembl_hsapiens_gene): """Test the available attributes returned by find_attributes() for the hsapiens_gene_ensembl dataset.""" diff --git a/apybiomart/tests/test_cli_attributes.py b/apybiomart/tests/test_cli_attributes.py index 28c4a12..039cda1 100644 --- a/apybiomart/tests/test_cli_attributes.py +++ b/apybiomart/tests/test_cli_attributes.py @@ -1,7 +1,9 @@ #!/usr/bin/env python # -*- coding: UTF-8 -*- # Created by Roberto Preste +import os import pytest + from click.testing import CliRunner from apybiomart import cli @@ -18,6 +20,18 @@ def test_cli_attributes_default(): assert "hsapiens_gene_ensembl" in result.output +def test_cli_attributes_save(): + """Test the saved attributes returned by apybiomart attributes for the + default dataset (hsapiens_gene_ensembl).""" + runner = CliRunner() + result = runner.invoke(cli.main, ["attributes", "--save"]) + try: + assert result.exit_code == 0 + assert os.path.isfile("apybiomart_attributes.csv") + finally: + os.remove("apybiomart_attributes.csv") + + def test_cli_attributes_ontology(): """Test the available attributes returned by apybiomart attributes for the closure_ECO dataset.""" diff --git a/apybiomart/tests/test_cli_datasets.py b/apybiomart/tests/test_cli_datasets.py index 0d8a537..6dd6671 100644 --- a/apybiomart/tests/test_cli_datasets.py +++ b/apybiomart/tests/test_cli_datasets.py @@ -1,7 +1,9 @@ #!/usr/bin/env python # -*- coding: UTF-8 -*- # Created by Roberto Preste +import os import pytest + from click.testing import CliRunner from apybiomart import cli @@ -18,6 +20,18 @@ def test_cli_datasets_default(): assert "ENSEMBL_MART_ENSEMBL" in result.output +def test_cli_datasets_save(): + """Test the saved datasets returned by apybiomart datasets for the + default mart (ENSEMBL_MART_ENSEMBL).""" + runner = CliRunner() + result = runner.invoke(cli.main, ["datasets", "--save"]) + try: + assert result.exit_code == 0 + assert os.path.isfile("apybiomart_datasets.csv") + finally: + os.remove("apybiomart_datasets.csv") + + def test_cli_datasets_mouse(): """Test the available datasets returned by apybiomart datasets for the ENSEMBL_MART_MOUSE mart.""" diff --git a/apybiomart/tests/test_cli_filters.py b/apybiomart/tests/test_cli_filters.py index d71aaa2..0b3e95f 100644 --- a/apybiomart/tests/test_cli_filters.py +++ b/apybiomart/tests/test_cli_filters.py @@ -1,7 +1,9 @@ #!/usr/bin/env python # -*- coding: UTF-8 -*- # Created by Roberto Preste +import os import pytest + from click.testing import CliRunner from apybiomart import cli @@ -18,6 +20,18 @@ def test_cli_filters_default(): assert "hsapiens_gene_ensembl" in result.output +def test_cli_filters_saved(): + """Test the saved attributes returned by apybiomart filters for the + default dataset (hsapiens_gene_ensembl).""" + runner = CliRunner() + result = runner.invoke(cli.main, ["filters", "--save"]) + try: + assert result.exit_code == 0 + assert os.path.isfile("apybiomart_filters.csv") + finally: + os.remove("apybiomart_filters.csv") + + def test_cli_filters_ontology(): """Test the available attributes returned by apybiomart filters for the closure_ECO dataset.""" diff --git a/apybiomart/tests/test_cli_marts.py b/apybiomart/tests/test_cli_marts.py index 059dcc2..5c0b61b 100644 --- a/apybiomart/tests/test_cli_marts.py +++ b/apybiomart/tests/test_cli_marts.py @@ -1,7 +1,9 @@ #!/usr/bin/env python # -*- coding: UTF-8 -*- # Created by Roberto Preste +import os import pytest + from click.testing import CliRunner from apybiomart import cli @@ -15,3 +17,14 @@ def test_cli_marts(): assert "Mart ID" in result.output assert "ENSEMBL_MART_ENSEMBL" in result.output assert "Ensembl Genes 99" in result.output + + +def test_cli_marts_save(): + """Test the available marts saved by apybiomart marts.""" + runner = CliRunner() + result = runner.invoke(cli.main, ["marts", "--save"]) + try: + assert result.exit_code == 0 + assert os.path.isfile("apybiomart_marts.csv") + finally: + os.remove("apybiomart_marts.csv") diff --git a/apybiomart/tests/test_datasets.py b/apybiomart/tests/test_datasets.py index 7d6a131..4a44937 100644 --- a/apybiomart/tests/test_datasets.py +++ b/apybiomart/tests/test_datasets.py @@ -1,8 +1,12 @@ #!/usr/bin/env python # -*- coding: UTF-8 -*- # Created by Roberto Preste +import os import pytest + +import pandas as pd from pandas.testing import assert_frame_equal + from apybiomart import find_datasets @@ -19,6 +23,25 @@ def test_find_datasets_default(df_datasets_ensembl): assert_frame_equal(result, expect) +def test_find_datasets_save(df_datasets_ensembl): + """Test the available datasets returned by find_datasets(save=True) for + the default mart (ENSEMBL_MART_ENSEMBL).""" + expect = (df_datasets_ensembl + .sort_values(by="Dataset_ID", axis=0) + .reset_index(drop=True)) + _ = find_datasets(save=True) + saved = pd.read_csv("apybiomart_datasets.csv") + result = (saved + .replace(pd.np.nan, "") + .sort_values(by="Dataset_ID", axis=0) + .reset_index(drop=True)) + + try: + assert_frame_equal(result, expect) + finally: + os.remove("apybiomart_datasets.csv") + + def test_find_datasets_ensembl(df_datasets_ensembl): """Test the available datasets returned by find_datasets() for the ENSEMBL_MART_ENSEMBL mart.""" diff --git a/apybiomart/tests/test_filters.py b/apybiomart/tests/test_filters.py index 75f8b7d..6a5047c 100644 --- a/apybiomart/tests/test_filters.py +++ b/apybiomart/tests/test_filters.py @@ -1,8 +1,12 @@ #!/usr/bin/env python # -*- coding: UTF-8 -*- # Created by Roberto Preste +import os import pytest + +import pandas as pd from pandas.testing import assert_frame_equal + from apybiomart import find_filters @@ -19,6 +23,25 @@ def test_find_filters_default(df_filters_ensembl_hsapiens_gene): assert_frame_equal(result, expect) +def test_find_filters_save(df_filters_ensembl_hsapiens_gene): + """Test the available filters returned by find_filters(save=True) for the + default dataset (hsapiens_gene_ensembl).""" + expect = (df_filters_ensembl_hsapiens_gene + .sort_values(by="Filter_ID", axis=0) + .reset_index(drop=True)) + _ = find_filters(save=True) + saved = pd.read_csv("apybiomart_filters.csv") + result = (saved + .replace(pd.np.nan, "") + .sort_values(by="Filter_ID", axis=0) + .reset_index(drop=True)) + + try: + assert_frame_equal(result, expect) + finally: + os.remove("apybiomart_filters.csv") + + def test_find_filters_ensembl(df_filters_ensembl_hsapiens_gene): """Test the available filters returned by find_filters() for the hsapiens_gene_ensembl dataset.""" diff --git a/apybiomart/tests/test_marts.py b/apybiomart/tests/test_marts.py index 7787b73..cbd31cf 100644 --- a/apybiomart/tests/test_marts.py +++ b/apybiomart/tests/test_marts.py @@ -1,8 +1,12 @@ #!/usr/bin/env python # -*- coding: UTF-8 -*- # Created by Roberto Preste +import os import pytest + +import pandas as pd from pandas.testing import assert_frame_equal + from apybiomart import find_marts @@ -17,3 +21,20 @@ def test_find_marts(df_marts): assert_frame_equal(result, expect) + +def test_find_marts_save(df_marts): + """Test the available marts returned by find_marts(save=True).""" + expect = (df_marts + .sort_values(by="Mart_ID", axis=0) + .reset_index(drop=True)) + _ = find_marts(save=True) + saved = pd.read_csv("apybiomart_marts.csv") + result = (saved + .replace(pd.np.nan, "") + .sort_values(by="Mart_ID", axis=0) + .reset_index(drop=True)) + + try: + assert_frame_equal(result, expect) + finally: + os.remove("apybiomart_marts.csv") diff --git a/apybiomart/tests/test_query.py b/apybiomart/tests/test_query.py index 6d43342..c8d0171 100644 --- a/apybiomart/tests/test_query.py +++ b/apybiomart/tests/test_query.py @@ -1,8 +1,12 @@ #!/usr/bin/env python # -*- coding: UTF-8 -*- # Created by Roberto Preste +import os import pytest + +import pandas as pd from pandas.testing import assert_frame_equal + from apybiomart import query @@ -17,6 +21,25 @@ def test_query_default(df_query_ensembl_hsapiens_gene_chrom_2): assert_frame_equal(result, expect) +def test_query_save(df_query_ensembl_hsapiens_gene_chrom_2): + """Test the saved query results for the default dataset + (hsapiens_gene_ensembl).""" + expect = (df_query_ensembl_hsapiens_gene_chrom_2 + .reset_index(drop=True)) + _ = query(attributes=["ensembl_gene_id", "external_gene_name"], + filters={"chromosome_name": "2"}, + save=True) + saved = pd.read_csv("apybiomart_query.csv") + result = (saved + .replace(pd.np.nan, "") + .reset_index(drop=True)) + + try: + assert_frame_equal(result, expect) + finally: + os.remove("apybiomart_query.csv") + + def test_query_default_int(df_query_ensembl_hsapiens_gene_chrom_2): """Test the query results for the default dataset (hsapiens_gene_ensembl) with int filters parameter."""