├── BioClients
├── cfde
│ ├── __init__.py
│ └── cfchemdb
│ │ └── __init__.py
├── dnorm
│ └── __init__.py
├── omim
│ └── __init__.py
├── humanbase
│ └── __init__.py
├── maayanlab
│ ├── __init__.py
│ ├── archs4
│ │ ├── __init__.py
│ │ ├── Utils.py
│ │ └── Client.py
│ └── harmonizome
│ │ ├── __init__.py
│ │ ├── Client.py
│ │ └── Utils.py
├── medline
│ ├── __init__.py
│ ├── connect
│ │ ├── __init__.py
│ │ └── Utils.py
│ └── genetics
│ │ └── __init__.py
├── panther
│ └── __init__.py
├── pubchem
│ ├── rdf
│ │ └── __init__.py
│ ├── ftp
│ │ ├── __init__.py
│ │ └── Client.py
│ ├── soap
│ │ └── __init__.py
│ └── __init__.py
├── cdc
│ ├── __init__.py
│ ├── Utils.py
│ └── Client.py
├── chebi
│ └── __init__.py
├── fda
│ ├── __init__.py
│ └── aer
│ │ └── __init__.py
├── gtex
│ ├── __init__.py
│ └── Client.py
├── hugo
│ └── __init__.py
├── icite
│ ├── __init__.py
│ ├── Client.py
│ └── Utils.py
├── lincs
│ ├── __init__.py
│ └── sigcom
│ │ ├── __init__.py
│ │ ├── Utils.py
│ │ └── Client.py
├── mesh
│ ├── __init__.py
│ └── Client.py
├── ncats
│ ├── __init__.py
│ └── gsrs
│ │ ├── __init__.py
│ │ └── Client.py
├── ncbo
│ ├── __init__.py
│ ├── Utils.py
│ └── Client.py
├── pdb
│ ├── __init__.py
│ └── Client.py
├── tcga
│ ├── __init__.py
│ └── Client.py
├── ubkg
│ └── __init__.py
├── umls
│ └── __init__.py
├── amp
│ ├── t2d
│ │ ├── __init__.py
│ │ ├── Utils.py
│ │ └── Client.py
│ └── __init__.py
├── badapple
│ ├── __init__.py
│ └── Client.py
├── bindingdb
│ ├── __init__.py
│ ├── Utils.py
│ └── Client.py
├── biogrid
│ └── __init__.py
├── brenda
│ └── __init__.py
├── chembl
│ └── __init__.py
├── disgenet
│ └── __init__.py
├── glygen
│ ├── __init__.py
│ └── Client.py
├── hubmap
│ ├── __init__.py
│ ├── Utils.py
│ └── Client.py
├── idg
│ ├── pharos
│ │ └── __init__.py
│ ├── rss
│ │ ├── __init__.py
│ │ ├── Utils.py
│ │ └── Client.py
│ ├── tcrd
│ │ └── __init__.py
│ ├── tiga
│ │ └── __init__.py
│ ├── tinx
│ │ └── __init__.py
│ ├── __init__.py
│ └── Client.py
├── iuphar
│ └── __init__.py
├── jensenlab
│ ├── __init__.py
│ ├── Client.py
│ └── Utils.py
├── monarch
│ └── __init__.py
├── mygene
│ ├── __init__.py
│ ├── Utils.py
│ └── Client.py
├── oncotree
│ ├── __init__.py
│ └── Utils.py
├── openphacts
│ └── __init__.py
├── pubtator
│ ├── __init__.py
│ ├── Utils.py
│ └── Client.py
├── rxnorm
│ └── __init__.py
├── stringdb
│ └── __init__.py
├── uniprot
│ ├── __init__.py
│ ├── Utils.py
│ └── Client.py
├── util
│ ├── db
│ │ ├── __init__.py
│ │ └── Utils.py
│ ├── hdf
│ │ ├── __init__.py
│ │ └── Utils.py
│ ├── neo4j
│ │ ├── __init__.py
│ │ ├── Utils.py
│ │ └── App.py
│ ├── obo
│ │ ├── __init__.py
│ │ ├── App.py
│ │ └── Utils.py
│ ├── owl
│ │ ├── __init__.py
│ │ ├── Utils.py
│ │ └── App.py
│ ├── rdf
│ │ ├── __init__.py
│ │ ├── Utils.py
│ │ └── App.py
│ ├── rest
│ │ └── __init__.py
│ ├── xml
│ │ └── __init__.py
│ ├── yaml
│ │ ├── __init__.py
│ │ └── Utils.py
│ ├── graphql
│ │ ├── __init__.py
│ │ └── Utils.py
│ ├── igraph
│ │ └── __init__.py
│ ├── pandas
│ │ ├── __init__.py
│ │ └── Csv2Markdown.py
│ ├── sparql
│ │ └── __init__.py
│ └── __init__.py
├── wikidata
│ ├── __init__.py
│ ├── Client.py
│ └── Utils.py
├── allen
│ ├── brain
│ │ └── __init__.py
│ └── __init__.py
├── biomarkerkb
│ ├── __init__.py
│ ├── Utils.py
│ └── Client.py
├── bioregistry
│ ├── __init__.py
│ ├── Utils.py
│ └── Client.py
├── clinicaltrials
│ └── __init__.py
├── drugcentral
│ ├── __init__.py
│ └── Test.py
├── emblebi
│ ├── __init__.py
│ ├── unichem
│ │ ├── __init__.py
│ │ └── Client.py
│ └── identifiers
│ │ └── __init__.py
├── ensembl
│ ├── biomart
│ │ ├── __init__.py
│ │ └── Client.py
│ └── __init__.py
├── geneontology
│ ├── __init__.py
│ ├── Utils.py
│ └── Client.py
├── gwascatalog
│ └── __init__.py
├── opentargets
│ └── __init__.py
├── wikipathways
│ ├── __init__.py
│ └── Utils.py
├── reactome
│ ├── __init__.py
│ └── SMBL_utils.py
├── cas
│ ├── __init__.py
│ ├── Client.py
│ └── Utils.py
├── chem2bio2rdf
│ ├── slap
│ │ └── __init__.py
│ └── __init__.py
├── __init__.py
├── chemidplus
│ ├── __init__.py
│ └── Client.py
├── entrez
│ ├── __init__.py
│ ├── Utils.py
│ └── Client.py
└── pubmed
│ ├── __init__.py
│ └── Client.py
├── doc
├── panther.md
├── hugo.md
├── images
│ └── BioClients_logo.png
├── geneontology.md
├── entrez.md
├── pdb.md
├── monarch.md
├── brenda.md
├── biogrid.md
├── iuphar.md
├── cdc.md
├── icite.md
├── tcga.md
├── bindingdb.md
├── cas.md
├── lincs.md
├── mygene.md
├── wikipathways.md
├── ncbo.md
├── glygen.md
├── chemidplus.md
├── dnorm.md
├── fda.md
├── allen.md
├── disgenet.md
├── oncotree.md
├── amp_t2d.md
├── uniprot.md
├── maayanlab.md
├── biomarkerkb.md
├── jensenlab.md
├── humanbase.md
├── omim.md
├── chem2bio2rdf.md
├── opentargets.md
├── mesh.md
├── pubtator.md
├── reactome.md
├── gtex.md
├── bioregistry.md
├── ncats.md
├── cfde.md
├── chebi.md
├── ensembl.md
├── clinicaltrials.md
├── wikidata.md
├── badapple.md
├── stringdb.md
├── medline.md
├── pubmed.md
├── gwascatalog.md
├── rxnorm.md
├── ubkg.md
└── chembl.md
├── setup.py
└── .gitignore
/BioClients/cfde/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/BioClients/dnorm/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/BioClients/omim/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/BioClients/humanbase/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/BioClients/maayanlab/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/BioClients/medline/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/BioClients/panther/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/BioClients/pubchem/rdf/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/BioClients/cdc/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/chebi/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/fda/__init__.py:
--------------------------------------------------------------------------------
1 | from .aer import *
2 |
--------------------------------------------------------------------------------
/BioClients/gtex/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/hugo/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/icite/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/lincs/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/mesh/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/ncats/__init__.py:
--------------------------------------------------------------------------------
1 | from .gsrs import *
2 |
--------------------------------------------------------------------------------
/BioClients/ncbo/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/pdb/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/tcga/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/ubkg/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/umls/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/amp/t2d/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/badapple/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/bindingdb/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/biogrid/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/brenda/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/chembl/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/disgenet/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/fda/aer/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/glygen/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/hubmap/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/idg/pharos/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/idg/rss/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/idg/tcrd/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/idg/tiga/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/idg/tinx/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/iuphar/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/jensenlab/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/monarch/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/mygene/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/ncats/gsrs/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/oncotree/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/openphacts/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/pubtator/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/rxnorm/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/stringdb/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/uniprot/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/util/db/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/util/hdf/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/util/neo4j/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/util/obo/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/util/owl/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/util/rdf/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/util/rest/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/util/xml/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/util/yaml/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/wikidata/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/allen/brain/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/biomarkerkb/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/bioregistry/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/cfde/cfchemdb/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/clinicaltrials/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/drugcentral/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/emblebi/__init__.py:
--------------------------------------------------------------------------------
1 | from .identifiers import *
2 |
--------------------------------------------------------------------------------
/BioClients/emblebi/unichem/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/ensembl/biomart/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/geneontology/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/gwascatalog/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/lincs/sigcom/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/medline/connect/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/opentargets/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/pubchem/ftp/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/pubchem/soap/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/util/graphql/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/util/igraph/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/util/pandas/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/util/sparql/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/wikipathways/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/emblebi/identifiers/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/maayanlab/archs4/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/medline/genetics/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/doc/panther.md:
--------------------------------------------------------------------------------
1 | # `BioClients.panther`
2 |
3 | ## Panther
4 |
5 |
--------------------------------------------------------------------------------
/BioClients/maayanlab/harmonizome/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
--------------------------------------------------------------------------------
/BioClients/ensembl/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 |
3 | __all__ = [ "biomart" ]
4 |
--------------------------------------------------------------------------------
/BioClients/reactome/__init__.py:
--------------------------------------------------------------------------------
1 | from .Utils import *
2 | #from .SMBL_utils import *
3 |
--------------------------------------------------------------------------------
/doc/hugo.md:
--------------------------------------------------------------------------------
1 | # `BioClients.hugo`
2 |
3 | ## HUGO
4 |
5 | *
6 |
--------------------------------------------------------------------------------
/BioClients/cas/__init__.py:
--------------------------------------------------------------------------------
1 | """Client tools for CAS web services."""
2 |
3 | from .Utils import *
4 |
5 |
--------------------------------------------------------------------------------
/BioClients/chem2bio2rdf/slap/__init__.py:
--------------------------------------------------------------------------------
1 | """Client tools for SLAP REST API."""
2 | from .Utils import *
3 |
--------------------------------------------------------------------------------
/BioClients/__init__.py:
--------------------------------------------------------------------------------
1 | """Python package for access to online biomedical resources, usually via REST APIs."""
2 |
--------------------------------------------------------------------------------
/BioClients/allen/__init__.py:
--------------------------------------------------------------------------------
1 | """Client tools for Allen Institute web services."""
2 |
3 | __all__ = [ "brain" ]
4 |
--------------------------------------------------------------------------------
/doc/images/BioClients_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jeremyjyang/BioClients/HEAD/doc/images/BioClients_logo.png
--------------------------------------------------------------------------------
/doc/geneontology.md:
--------------------------------------------------------------------------------
1 | # `BioClients.geneontology`
2 |
3 | ## Gene Ontology
4 |
5 | *
6 |
--------------------------------------------------------------------------------
/BioClients/chemidplus/__init__.py:
--------------------------------------------------------------------------------
1 | """Client tools for NLM ChemIdPlus web services."""
2 |
3 | from .Utils import *
4 |
5 |
--------------------------------------------------------------------------------
/BioClients/idg/__init__.py:
--------------------------------------------------------------------------------
1 | """Access IDG APIs."""
2 |
3 | from .Utils import *
4 |
5 | __all__ = [ "rss", "tcrd", "tinx" ]
6 |
--------------------------------------------------------------------------------
/doc/entrez.md:
--------------------------------------------------------------------------------
1 | # `BioClients.entrez`
2 |
3 | ## NIH NCBI Entrez
4 |
5 | NIH NCBI Entrez E-Utilities client via Entrezpy.
6 |
--------------------------------------------------------------------------------
/BioClients/entrez/__init__.py:
--------------------------------------------------------------------------------
1 | """Client tools for NIH NCBI Entrez E-Utilities services, via Entrezpy."""
2 |
3 | from .Utils import *
4 |
--------------------------------------------------------------------------------
/doc/pdb.md:
--------------------------------------------------------------------------------
1 | # `BioClients.pdb`
2 |
3 | ## PDB
4 |
5 | Utility for PDB REST API.
6 |
7 | *
8 |
--------------------------------------------------------------------------------
/BioClients/amp/__init__.py:
--------------------------------------------------------------------------------
1 | """Client tools for AMP project web services."""
2 |
3 | #from .Utils import *
4 |
5 | __all__ = [ "t2d" ]
6 |
--------------------------------------------------------------------------------
/BioClients/pubmed/__init__.py:
--------------------------------------------------------------------------------
1 | """Client tools for PubMed web services, and NCBI-PubMed XML processing."""
2 |
3 | from .Utils import *
4 |
--------------------------------------------------------------------------------
/doc/monarch.md:
--------------------------------------------------------------------------------
1 | # `BioClients.monarch`
2 |
3 | ## Monarch Initiative
4 |
5 | *
6 |
--------------------------------------------------------------------------------
/BioClients/pubchem/__init__.py:
--------------------------------------------------------------------------------
1 | """Client tools for PubChem web services."""
2 |
3 | from .Utils import *
4 |
5 | __all__ = [ "ftp", "rdf", "soap" ]
6 |
--------------------------------------------------------------------------------
/BioClients/util/__init__.py:
--------------------------------------------------------------------------------
1 | """Miscellaneous utilities for web service clients."""
2 |
3 | __all__ = [ "pandas", "rest", "sparql", "xml", "yaml" ]
4 |
--------------------------------------------------------------------------------
/doc/brenda.md:
--------------------------------------------------------------------------------
1 | # `BioClients.brenda`
2 |
3 | ## BRENDA
4 |
5 | *
6 | *
7 |
8 |
--------------------------------------------------------------------------------
/doc/biogrid.md:
--------------------------------------------------------------------------------
1 | # `BioClients.biogrid`
2 |
3 | ## BioGrid
4 |
5 | *
6 | *
7 |
8 |
--------------------------------------------------------------------------------
/doc/iuphar.md:
--------------------------------------------------------------------------------
1 | # `BioClients.iuphar`
2 |
3 | ## IUPHAR, a.k.a, Guide to Pharmacology
4 |
5 | *
6 |
7 |
--------------------------------------------------------------------------------
/BioClients/chem2bio2rdf/__init__.py:
--------------------------------------------------------------------------------
1 | """Client tools for Chem2Bio2RDF (PostgreSql db) and SLAP (REST API)."""
2 |
3 | from .Utils import *
4 |
5 | __all__ = [ "slap" ]
6 |
--------------------------------------------------------------------------------
/doc/cdc.md:
--------------------------------------------------------------------------------
1 | # `BioClients.cdc`
2 |
3 | ## CDC
4 |
5 | CDC REST API client
6 |
7 | *
8 | *
9 |
--------------------------------------------------------------------------------
/doc/icite.md:
--------------------------------------------------------------------------------
1 | # `BioClients.icite`
2 |
3 | # iCite
4 |
5 | PubMed iCite REST API client
6 |
7 | * (PubMed)[https://pubmed.ncbi.nlm.nih.gov/]
8 | * (iCite)[https://icite.od.nih.gov/]
9 |
--------------------------------------------------------------------------------
/doc/tcga.md:
--------------------------------------------------------------------------------
1 | # `BioClients.tcga`
2 |
3 | ## TCGA (The Cancer Gene Atlas)
4 |
5 | *
6 | *
7 |
8 |
--------------------------------------------------------------------------------
/doc/bindingdb.md:
--------------------------------------------------------------------------------
1 | # `BioClients.bindingdb`
2 |
3 | ## BindingDb
4 |
5 | BindingDb REST API client
6 |
7 | *
8 | *
9 |
--------------------------------------------------------------------------------
/doc/cas.md:
--------------------------------------------------------------------------------
1 | # `BioClients.cas`
2 |
3 | ## CAS
4 |
5 | CAS Common Chemistry REST API client
6 |
7 | *
8 | *
9 |
--------------------------------------------------------------------------------
/doc/lincs.md:
--------------------------------------------------------------------------------
1 | # `BioClients.lincs`
2 |
3 | ## LINCS
4 |
5 | LINCS REST API client
6 |
7 | New (2019) iLINCS:
8 |
9 | *
10 | *
11 |
12 |
--------------------------------------------------------------------------------
/doc/mygene.md:
--------------------------------------------------------------------------------
1 | # `BioClients.mygene`
2 |
3 | ## MyGene
4 |
5 | Access to MyGene REST API.
6 |
7 | *
8 | *
9 |
10 | ```
11 | python3 -m BioClients.mygene.Client -h
12 | ```
13 |
--------------------------------------------------------------------------------
/doc/wikipathways.md:
--------------------------------------------------------------------------------
1 | # `BioClients.wikipathways`
2 |
3 | ## WikiPathways
4 |
5 | Access to WikiPathways REST API.
6 |
7 | *
8 |
9 | ```
10 | python3 -m BioClients.wikipathways.Client list_pathways
11 | ```
12 |
--------------------------------------------------------------------------------
/doc/ncbo.md:
--------------------------------------------------------------------------------
1 | # `BioClients.ncbo`
2 |
3 | The National Center for Biomedical Ontology was founded as one of the National Centers for Biomedical Computing, supported by the NHGRI, the NHLBI, and the NIH Common Fund.
4 |
5 | *
6 |
--------------------------------------------------------------------------------
/doc/glygen.md:
--------------------------------------------------------------------------------
1 | # `BioClients.glygen`
2 |
3 | ## GlyGen
4 |
5 | GlyGen REST API client.
6 |
7 | *
8 | *
9 |
10 |
11 | ## Example commands
12 |
13 | ```
14 | python3 -m BioClients.glygen.Client -h
15 | ```
16 |
--------------------------------------------------------------------------------
/doc/chemidplus.md:
--------------------------------------------------------------------------------
1 | # `BioClients.chemidplus`
2 |
3 | ## ChemIdPlus
4 |
5 | NIH NLM ChemIdPlus REST API client
6 |
7 | *
8 | *
9 | *
10 |
--------------------------------------------------------------------------------
/doc/dnorm.md:
--------------------------------------------------------------------------------
1 | # `BioClients.dnorm`
2 |
3 | ## DNorm
4 |
5 | NCBI CBB REST client (Computational Biology Branch)
6 |
7 | *
8 | *
9 |
--------------------------------------------------------------------------------
/doc/fda.md:
--------------------------------------------------------------------------------
1 | # `BioClients.fda`
2 |
3 | ## FDA
4 |
5 | OpenFDA Adverse Event Reports REST API client.
6 |
7 | *
8 | *
9 |
--------------------------------------------------------------------------------
/doc/allen.md:
--------------------------------------------------------------------------------
1 | # `BioClients.allen`
2 |
3 | ## Allen Brain Atlas
4 |
5 | Allen Brain Atlas REST API client
6 |
7 | *
8 |
9 | ```
10 | python3 -m BioClients.allen.brain.Client -h
11 | ```
12 |
13 | Additional Allen Institute resources may be added in future.
14 |
--------------------------------------------------------------------------------
/doc/disgenet.md:
--------------------------------------------------------------------------------
1 | # `BioClients.disgenet`
2 |
3 | ## DisGeNet
4 |
5 | *
6 | *
7 | *
8 | *
9 | *
10 |
--------------------------------------------------------------------------------
/doc/oncotree.md:
--------------------------------------------------------------------------------
1 | # `BioClients.oncotree`
2 |
3 | ## OncoTree: A Cancer Classification System for Precision Oncology
4 |
5 | *
6 |
7 | Ref: OncoTree: A Cancer Classification System for Precision Oncology, Kundra et al., JCO Clinical Cancer Informatics, 2021, https://doi.org/10.1200/CCI.20.00108.
8 |
--------------------------------------------------------------------------------
/doc/amp_t2d.md:
--------------------------------------------------------------------------------
1 | # `BioClients.amp_t2d`
2 |
3 | AMP T2D: Accelerating Medicines Partnership Type-2 Diabetes project.
4 |
5 | *
6 | *
7 |
8 | ## Usage
9 |
10 | ```
11 | $ python3 -m BioClients.amp_t2d.Client -h
12 | ```
13 |
--------------------------------------------------------------------------------
/doc/uniprot.md:
--------------------------------------------------------------------------------
1 | # `BioClients.uniprot`
2 |
3 | ## UniProt
4 |
5 | Access to Uniprot REST API.
6 |
7 | UniprotKB = Uniprot Knowledge Base
8 |
9 | *
10 | *
11 | *
12 |
13 | ```
14 | python3 -m BioClients.uniprot.Client --uids Q14790 getData
15 | ```
16 |
--------------------------------------------------------------------------------
/doc/maayanlab.md:
--------------------------------------------------------------------------------
1 | # `MaayanLab`
2 |
3 | ## `MaayanLab.harmonizome`
4 |
5 | *
6 |
7 | ## `MaayanLab.archs4`
8 |
9 | Process [HDF5](https://www.hdfgroup.org/) files from download page,
10 | using [h5py](https://docs.h5py.org/en/stable/index.html).
11 |
12 | *
13 | *
14 |
--------------------------------------------------------------------------------
/doc/biomarkerkb.md:
--------------------------------------------------------------------------------
1 | # `BioClients.biomarkerkb`
2 |
3 | ## BiomarkerKB
4 |
5 | BiomarkerKB REST API client.
6 |
7 | *
8 | *
9 |
10 |
11 | ## Example commands
12 |
13 | ```
14 | python -m BioClients.biomarkerkb.Client -h
15 | ```
16 |
17 | ```
18 | python -m BioClients.biomarkerkb.Client get_biomarker_detail --ids "AN6278-1" -v -v
19 | ```
20 |
--------------------------------------------------------------------------------
/doc/jensenlab.md:
--------------------------------------------------------------------------------
1 | # `BioClients.jensenlab`
2 |
3 | ## JensenLab
4 |
5 | *
6 |
7 | Currently focused on [DISEASES](https://diseases.jensenlab.org/).
8 | Three source channels are defined:
9 |
10 | * Experiments
11 | * Knowledge
12 | * Textmining
13 |
14 | ```
15 | python3 -m BioClients.jensenlab.Client get_disease_genes --ids "DOID:10652" --channel "Knowledge"
16 | ```
17 |
--------------------------------------------------------------------------------
/doc/humanbase.md:
--------------------------------------------------------------------------------
1 | # `BioClients.humanbase`
2 |
3 | ## HumanBase
4 |
5 | Client to HumanBase REST API.
6 | Genome-scale Integrated Analysis of gene Networks in Tissues
7 | GIANT has moved to HumanBase (http://hb.flatironinstitute.org/).
8 | GIANT tissue networks integrate 987 genome-scale datasets, encompassing
9 | ~38,000 conditions from ~14,000 publications and include both expression and
10 | interaction measurements.
11 |
12 | *
13 |
--------------------------------------------------------------------------------
/BioClients/util/yaml/Utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import os,sys,logging,yaml
4 |
5 | #############################################################################
6 | def ReadParamFile(fparam):
7 | params={};
8 | with open(fparam, 'r') as fh:
9 | for param in yaml.load_all(fh, Loader=yaml.BaseLoader):
10 | for k,v in param.items():
11 | params[k] = v
12 | return params
13 |
14 | #############################################################################
15 |
--------------------------------------------------------------------------------
/doc/omim.md:
--------------------------------------------------------------------------------
1 | # `BioClients.omim`
2 |
3 | ## OMIM
4 |
5 | Online Mendelian Inheritance in Man,
6 | "An Online Catalog of Human Genes and Genetic Disorders"
7 |
8 | See:
9 |
10 | The OMIM API URLs are organized in a very simple fashion:
11 | /api/[handler]?[parameters]
12 | /api/[handler]/[component]?[parameters]
13 | /api/[handler]/[action]?[parameters]
14 | The handler refers to the data object, such as an entry or a clinical synopsis.
15 |
16 | Handlers: entry, clinicalSynopsis, geneMap, search, html, dump
17 |
--------------------------------------------------------------------------------
/doc/chem2bio2rdf.md:
--------------------------------------------------------------------------------
1 | # `BioClients.chem2bio2rdf`
2 |
3 | ## Chem2Bio2RDF
4 |
5 | *
6 | *
7 |
8 | Chem2Bio2RDF employs a backend PostgreSql db. This
9 | BioClients API provides
10 | programmatic access to an available db instance, which available
11 | locally (e.g. within IU intranet), or may be available via download
12 | and mirror instance.
13 |
14 | ### Database credentials
15 |
16 | Db credentials are normally stored in a configuration file at
17 | `$HOME/.c2b2r.yaml`.
18 |
--------------------------------------------------------------------------------
/doc/opentargets.md:
--------------------------------------------------------------------------------
1 | # `BioClients.opentargets`
2 |
3 | ## Open Targets
4 |
5 | OpenTargets REST API client, using the Python client package
6 | `opentargets`.
7 |
8 | ```
9 | pip3 install opentargets
10 | ```
11 |
12 | *
13 | *
14 | *
15 | *
16 | *
17 | *
18 | *
19 | *
20 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import setuptools
2 |
3 | with open("README.md", "r") as fh:
4 | long_description = fh.read()
5 |
6 | setuptools.setup(
7 | name="BioClients",
8 | version="0.2.31",
9 | author="Jeremy Yang",
10 | author_email="jeremyjyang@gmail.com",
11 | description="Clients for online biomedical resources, usually via REST APIs.",
12 | long_description=long_description,
13 | long_description_content_type="text/markdown",
14 | url="https://github.com/jeremyjyang/BioClients",
15 | packages=setuptools.find_packages(),
16 | classifiers=[
17 | "Programming Language :: Python :: 3",
18 | "License :: OSI Approved :: MIT License",
19 | "Operating System :: OS Independent",
20 | ],
21 | python_requires='>=3.10',
22 | )
23 |
--------------------------------------------------------------------------------
/doc/mesh.md:
--------------------------------------------------------------------------------
1 | # `BioClients.mesh`
2 |
3 | ## MeSH
4 |
5 | From the NIH National Library of Medicine (NLM).
6 | Currently XML processing tools only.
7 |
8 | *
9 |
10 | MeSH XML utility functions.
11 |
12 | MeSH XML
13 | Download:
14 | Doc:
15 |
16 | <DescriptorRecord DescriptorClass="1">
17 | 1 = Topical Descriptor.
18 | 2 = Publication Types, for example, 'Review'.
19 | 3 = Check Tag, e.g., 'Male' (no tree number)
20 | 4 = Geographic Descriptor (Z category of tree number).
21 |
22 | Category "C" : Diseases
23 | Category "F" : Psychiatry and Psychology
24 | Category "F03" : Mental Disorders
25 | Thus, include "C\*" and "F03\*" only.
26 | Terms can have multiple TreeNumbers; diseases can be in non-disease cateories, in addition to a disease category.
27 |
--------------------------------------------------------------------------------
/BioClients/util/obo/App.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 | """
3 | Developed and tested with doid.obo (Disease Ontology).
4 | """
5 | import sys,os,argparse,re,logging
6 |
7 | from .. import obo as util_obo
8 |
9 | #############################################################################
10 | if __name__=='__main__':
11 | parser = argparse.ArgumentParser(description='OBO to TSV converter')
12 | parser.add_argument("--i", dest="ifile", required=True, help="input OBO file")
13 | parser.add_argument("--o", dest="ofile", help="output (TSV)")
14 | parser.add_argument("-v", "--verbose", action="count", default=0)
15 | args = parser.parse_args()
16 |
17 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO))
18 |
19 | fin = open(args.ifile)
20 |
21 | fout = open(args.ofile, "w+") if args.ofile else sys.stdout
22 |
23 | util_obo.OBO2CSV(fin, fout)
24 |
--------------------------------------------------------------------------------
/BioClients/util/graphql/Utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | ###
3 | # https://gql.readthedocs.io/en/stable/
4 | # https://gql.readthedocs.io/en/stable/usage/basic_usage.html
5 | ###
6 | import sys,os,json,logging
7 |
8 | from gql import Client, gql
9 | from gql.transport.aiohttp import AIOHTTPTransport
10 |
11 | #############################################################################
12 | def RunQuery(graphql, base_url, fout):
13 | if graphql is None:
14 | logging.error("No query.")
15 | return
16 |
17 | transport = AIOHTTPTransport(url=base_url)
18 | client = Client(transport=transport, fetch_schema_from_transport=True)
19 | logging.debug(f"client.schema: '{client.schema}'")
20 |
21 | try:
22 | query = gql(graphql)
23 | result = client.execute(query)
24 | fout.write(json.dumps(result, indent=2)+"\n")
25 | except Exception as e:
26 | logging.error(e)
27 |
28 | #############################################################################
29 |
30 |
31 |
--------------------------------------------------------------------------------
/doc/pubtator.md:
--------------------------------------------------------------------------------
1 | # `BioClients.pubtator`
2 |
3 | # PubTator
4 |
5 | PubMed and related NIH literature resources.
6 |
7 | * (PubMed)[https://pubmed.ncbi.nlm.nih.gov/]
8 | * (PubTator)[https://www.ncbi.nlm.nih.gov/research/pubtator/]
9 |
10 | Pubtator REST API client
11 |
12 | Formats: JSON, PubTator, BioC.
13 |
14 | Nomenclatures:
15 | Gene : NCBI Gene
16 | e.g.
17 | Disease : MEDIC (CTD, CTD\_diseases.csv)
18 | e.g.
19 | Chemical : MESH
20 | e.g.
21 | Species : NCBI Taxonomy
22 | e.g.
23 | Mutation : tmVar
24 |
25 | NOTE that the API does NOT provide keyword search capability like
26 | webapp
27 |
--------------------------------------------------------------------------------
/BioClients/entrez/Utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | '''
3 | https://pypi.org/project/entrezpy/
4 | https://entrezpy.readthedocs.io/en/master/
5 | https://academic.oup.com/bioinformatics/article/35/21/4511/5488119
6 | https://dataguide.nlm.nih.gov/eutilities/utilities.html
7 | '''
8 | import os,sys,io,re,json,time,requests,urllib.parse,logging,tqdm
9 | import pandas as pd
10 |
11 | import entrezpy.conduit
12 |
13 | from .. import util
14 |
15 | #############################################################################
16 | def Test(email):
17 | c = entrezpy.conduit.Conduit(email)
18 | fetch_influenza = c.new_pipeline()
19 | sid = fetch_influenza.add_search({'db' : 'nucleotide', 'term' : 'H3N2 [organism] AND HA', 'rettype':'count', 'sort' : 'Date Released', 'mindate': 2000, 'maxdate':2019, 'datetype' : 'pdat'})
20 | fid = fetch_influenza.add_fetch({'retmax' : 10, 'retmode' : 'text', 'rettype': 'fasta'}, dependency=sid)
21 | c.run(fetch_influenza)
22 |
23 |
24 | #############################################################################
25 | #############################################################################
26 |
--------------------------------------------------------------------------------
/doc/reactome.md:
--------------------------------------------------------------------------------
1 | # `BioClients.reactome`
2 |
3 | ## Reactome
4 |
5 | Client for Reactome REST API.
6 |
7 | *
8 |
9 | From [Reactome Data Model](https://reactome.org/documentation/data-model):
10 |
11 | Life on the cellular level is a network of molecular interactions. Molecules
12 | are synthesized and degraded, undergo a bewildering array of temporary and
13 | permanent modifications, are transported from one location to another, and
14 | form complexes with other molecules. Reactome represents all of this
15 | complexity as reactions in which input physical entities are converted to
16 | output entities.
17 |
18 | PhysicalEntities include individual molecules, multi-molecular complexes, and
19 | sets of molecules or complexes grouped together on the basis of shared
20 | characteristics. Molecules are further classified as genome encoded (DNA,
21 | RNA, and proteins) or not (all others). Attributes of a PhysicalEntity
22 | instance capture the chemical structure of an entity, including any covalent
23 | modifications in the case of a macromolecule, and its subcellular
24 | localization.
25 |
--------------------------------------------------------------------------------
/BioClients/util/owl/Utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | OWL utility functions.
4 | https://owlready2.readthedocs.io/
5 | """
6 | import sys,os,re,gzip,argparse,logging
7 |
8 | import owlready2
9 |
10 | #############################################################################
11 | def LoadOwlFile(ifile):
12 | try:
13 | onto = owlready2.get_ontology(f"file://{ifile}").load()
14 | logging.info(f"OWL ontology from {ifile} contains {len(list(onto.classes()))} classes.")
15 | except Exception as e:
16 | logging.error(e)
17 | return None
18 | return onto
19 |
20 | #############################################################################
21 | def DescribeOwl(ifile):
22 | onto = LoadOwlFile(ifile)
23 |
24 | #############################################################################
25 | def ValidateOwl(ifile):
26 | onto = LoadOwlFile(ifile)
27 | if onto is not None:
28 | logging.info(f"OWL file VALIDATED: {ifile}")
29 | return True
30 | else:
31 | logging.info(f"OWL file NOT VALIDATED: {ifile}")
32 | return False
33 |
34 | #############################################################################
35 |
--------------------------------------------------------------------------------
/doc/gtex.md:
--------------------------------------------------------------------------------
1 | # `BioClients.gtex`
2 |
3 | ## GTEx
4 |
5 | GTEx REST API client.
6 |
7 | *
8 | *
9 |
10 |
11 | ## Example commands
12 |
13 | ```
14 | $ python3 -m BioClients.gtex.Client -h
15 | usage: Client.py [-h] [--ids IDS] [--i IFILE] [--o OFILE] [--dataset DATASET]
16 | [--subject SUBJECT] [--skip SKIP] [--nmax NMAX] [--api_host API_HOST]
17 | [--api_base_path API_BASE_PATH] [-v]
18 | {list_datasets,list_subjects,list_samples,get_gene_expression}
19 |
20 | GTEx REST API client
21 |
22 | positional arguments:
23 | {list_datasets,list_subjects,list_samples,get_gene_expression}
24 | OPERATION (select one)
25 |
26 | options:
27 | -h, --help show this help message and exit
28 | --ids IDS input IDs
29 | --i IFILE input file, IDs
30 | --o OFILE output (TSV)
31 | --dataset DATASET GTEx datasetId
32 | --subject SUBJECT GTEx subjectId
33 | --skip SKIP
34 | --nmax NMAX
35 | --api_host API_HOST
36 | --api_base_path API_BASE_PATH
37 | -v, --verbose
38 | ```
39 |
--------------------------------------------------------------------------------
/BioClients/util/owl/App.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | OWL utility functions.
4 | """
5 | import sys,os,re,gzip,argparse,logging
6 |
7 | from .. import owl as util_owl
8 |
9 | #############################################################################
10 | if __name__=="__main__":
11 | parser = argparse.ArgumentParser(description="OWL utility", epilog="")
12 | ops = [ "describe_owl", "validate_owl", ]
13 | parser.add_argument("op", choices=ops, help="OPERATION")
14 | parser.add_argument("--i", dest="ifile", help="input file (OWL)")
15 | parser.add_argument("--o", dest="ofile", help="output file")
16 | parser.add_argument("-v", "--verbose", action="count", default=0)
17 | args = parser.parse_args()
18 |
19 | logging.basicConfig(format="%(levelname)s:%(message)s", level=(logging.DEBUG if args.verbose>1 else logging.INFO))
20 |
21 | fin = open(args.ifile, "r") if args.ifile else sys.stdin
22 | fout = open(args.ofile, "w") if args.ofile else sys.stdout
23 |
24 | if args.op == "describe_owl":
25 | util_owl.DescribeOwl(args.ifile)
26 |
27 | elif args.op == "validate_owl":
28 | util_owl.ValidateOwl(args.ifile)
29 |
30 | else:
31 | parser.error(f"Invalid operation: {args.op}")
32 |
--------------------------------------------------------------------------------
/doc/bioregistry.md:
--------------------------------------------------------------------------------
1 | # `BioClients.bioregistry`
2 |
3 | ## Bioregistry
4 |
5 | *
6 | *
7 |
8 | ```
9 | $ python3 -m BioClients.bioregistry.Client -h
10 | usage: Client.py [-h] [--i IFILE] [--ids IDS] [--o OFILE] [--etype ETYPE]
11 | [--prefix PREFIX] [--nchunk NCHUNK] [--nmax NMAX] [--skip SKIP]
12 | [--api_host API_HOST] [--api_base_path API_BASE_PATH] [-v]
13 | {list_collections,list_contexts,list_registry,list_metaregistry,list_contributors,get_reference}
14 |
15 | Bioregistry REST API client
16 |
17 | positional arguments:
18 | {list_collections,list_contexts,list_registry,list_metaregistry,list_contributors,get_reference}
19 | operation
20 |
21 | options:
22 | -h, --help show this help message and exit
23 | --i IFILE input query IDs
24 | --ids IDS input query IDs (comma-separated)
25 | --o OFILE output (TSV)
26 | --etype ETYPE evidence codes (|-separated)
27 | --prefix PREFIX CURIE prefix
28 | --nchunk NCHUNK
29 | --nmax NMAX
30 | --skip SKIP
31 | --api_host API_HOST
32 | --api_base_path API_BASE_PATH
33 | -v, --verbose
34 | ```
35 |
--------------------------------------------------------------------------------
/BioClients/reactome/SMBL_utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | BRN = Biochemical Network Analysis (pybrn)
4 | """
5 | import sys,os,re,logging
6 | import numpy
7 |
8 | from .. import reactome
9 |
10 | try:
11 | import brn
12 | except Exception as e:
13 | logging.error("pybrn not installed.")
14 | sys.exit()
15 | #
16 | API_HOST='reactomews.oicr.on.ca:8080'
17 | BASE_PATH='/ReactomeRESTfulAPI/RESTfulWS'
18 | API_BASE_URL='http://'+API_HOST+BASE_PATH
19 | #
20 |
21 | net = brn.fromSBML("data/reactome_reactions_homo_sapiens.2.sbml")
22 |
23 | logging.info('reactions: %d'%len(net.reactions))
24 | logging.info('species: %d'%len(net.species))
25 | logging.info('values: %d'%len(net.values))
26 |
27 | n_reac=0;
28 | for r in net.reactions:
29 | n_reac+=1
30 | logging.info('%3d. %s'%(n_reac,net.showreact(r,printstr=False)))
31 | logging.info('n_reac: %d'%n_reac)
32 |
33 |
34 | n_spec=0;
35 | for s in net.species:
36 | n_spec+=1
37 | id_spec = re.sub(r'[^\d]','',s)
38 | spec = reactome.Utils.GetId(API_BASE_URL,id_spec,'PhysicalEntity')
39 | displayName = spec['displayName'] if 'displayName' in spec else ''
40 | schemaClass = spec['schemaClass'] if 'schemaClass' in spec else ''
41 | logging.info('%3d. %s: (%s) %s'%(n_spec,id_spec,schemaClass,displayName))
42 | logging.info('n_spec: %d'%n_spec)
43 |
--------------------------------------------------------------------------------
/BioClients/lincs/sigcom/Utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #############################################################################
3 | ### https://maayanlab.cloud/sigcom-lincs/#/API
4 | #############################################################################
5 | import sys,os,re,json,requests,tqdm,logging
6 | import pandas as pd
7 | import urllib.parse
8 | #
9 | API_HOST="maayanlab.cloud"
10 | API_BASE_PATH="/sigcom-lincs/metadata-api"
11 | BASE_URL='https://'+API_HOST+API_BASE_PATH
12 | #
13 | #############################################################################
14 | def GetResources(ids, base_url=BASE_URL, fout=None):
15 | tags=None; df=None;
16 | url_base = (base_url+'/resources')
17 | for id_this in ids:
18 | url = f"{url_base}/{urllib.parse.quote(id_this)}"
19 | response = requests.get(url)
20 | rval = response.json()
21 | logging.debug(json.dumps(rval, indent=2))
22 | if not tags:
23 | tags = [tag for tag in rval.keys() if type(rval[tag]) not in (list, dict)]
24 | res = rval
25 | df = pd.concat([df, pd.DataFrame({tags[j]:[res[tags[j]]] for j in range(len(tags))})])
26 | if fout: df.to_csv(fout, sep="\t", index=False)
27 | logging.info(f"IDs: {len(ids)}")
28 | return df
29 |
30 | #############################################################################
31 |
--------------------------------------------------------------------------------
/doc/ncats.md:
--------------------------------------------------------------------------------
1 | # `BioClients.ncats`
2 |
3 | ## NIH NCATS
4 |
5 | Tools for obtaining and processing data from NIH NCATS resources.
6 |
7 | *
8 |
9 | ### Global Substance Registration System (GSRS)
10 |
11 | *
12 | *
13 | *
14 |
15 | Examples:
16 |
17 | ```
18 | $ python -m BioClients.ncats.gsrs.Client -h
19 | usage: Client.py [-h] [--i IFILE] [--o OFILE] [--ids IDS] [--query QUERY]
20 | [--api_host API_HOST] [--api_base_path API_BASE_PATH] [-v]
21 | {list_vocabularies,list_substances,search,get_substance,get_substance_names}
22 |
23 | NCATS Global Substance Registration System (GSRS) client
24 |
25 | positional arguments:
26 | {list_vocabularies,list_substances,search,get_substance,get_substance_names}
27 | OPERATION
28 |
29 | options:
30 | -h, --help show this help message and exit
31 | --i IFILE Input IDs
32 | --o OFILE Output (TSV)
33 | --ids IDS Input IDs (comma-separated)
34 | --query QUERY Search query.
35 | --api_host API_HOST
36 | --api_base_path API_BASE_PATH
37 | -v, --verbose
38 |
39 | Example search queries: IBUPRO ASPIRIN OXYTOCIN OXYTO* ASPIRIN AND ESTER COCN
40 | C=1CC=CC=C1C(=O)O
41 | ```
42 |
43 |
--------------------------------------------------------------------------------
/BioClients/biomarkerkb/Utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Utility functions for BiomarkerKB REST API.
4 | https://api.biomarkerkb.org/
5 |
6 | """
7 | ###
8 | import sys,os,re,json,collections,time,urllib.parse,logging,tqdm,tqdm.auto
9 | import pandas as pd
10 | import requests
11 | #
12 | NCHUNK=100
13 | #
14 | API_HOST="api.biomarkerkb.org"
15 | API_BASE_PATH=""
16 | BASE_URL="https://"+API_HOST+API_BASE_PATH
17 | #
18 | ##############################################################################
19 | def GetBiomarkerDetail(ids, skip, base_url=BASE_URL, fout=None):
20 | n_out=0; tags=None; df=None;
21 | for i in tqdm.auto.trange(len(ids), desc="IDs"):
22 | id_this = ids[i]
23 | response = requests.get(f"{base_url}/biomarker/detail/{id_this}", headers={"Accept":"application/json"})
24 | result = response.json()
25 | logging.debug(json.dumps(result, indent=2))
26 | if not tags: tags = [tag for tag in result.keys() if type(result[tag]) not in (list, dict, collections.OrderedDict)]
27 | df_this = pd.DataFrame({tags[j]:[result[tags[j]]] for j in range(len(tags))})
28 | if fout is None: df = pd.concat([df, df_this])
29 | else: df_this.to_csv(fout, sep="\t", index=False, header=bool(n_out==0))
30 | n_out += df_this.shape[0]
31 | logging.info(f"n_out: {n_out}")
32 | return df
33 |
34 | ##############################################################################
35 |
--------------------------------------------------------------------------------
/BioClients/maayanlab/archs4/Utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | ###
3 |
4 | import sys,os,time,logging
5 | import numpy as np
6 | import pandas as pd
7 | import h5py
8 |
9 | from ...util import hdf as hdf_util
10 |
11 | #############################################################################
12 | def ListSamples(f, fout):
13 | if "meta" not in f or "samples" not in f["meta"]:
14 | logging.error("No samples found.")
15 | return
16 | #samples_title = f["meta"]["samples/title"]
17 | #print(pd.DataFrame(samples_title.asstr()[0:10,]))
18 | samples = f["meta"]["samples"]
19 | df = pd.DataFrame()
20 | for i,k in enumerate(list(samples.keys())):
21 | logging.debug(f"{i+1}. {samples[k].name}")
22 | if type(samples[k]) is h5py.Dataset:
23 | df_this = pd.DataFrame(samples[k])
24 | df = pd.concat([df, df_this], axis=1)
25 | df = df.drop_duplicates()
26 | #if i>2: break #DEBUG
27 | df.columns = list(samples.keys())
28 | #print(df.iloc[0:10,:])
29 |
30 | for col, dtype in df.dtypes.items():
31 | if dtype == np.object: # Only process object columns.
32 | # decode, or return original value if decode return Nan
33 | df[col] = df[col].str.decode('utf-8').fillna(df[col])
34 |
35 | logging.info(f"Output rows: {df.shape[0]}; columns: {df.shape[1]}")
36 | df.to_csv(fout, sep="\t", index=False)
37 |
38 | #############################################################################
39 |
--------------------------------------------------------------------------------
/doc/cfde.md:
--------------------------------------------------------------------------------
1 | # `BioClients.cfde`
2 |
3 | ## CFDE
4 |
5 | API access for resources of the Common Fund Data Ecosystem (CFDE).
6 |
7 | *
8 |
9 | ```
10 | $ python3 -m BioClients.cfde.cfchemdb.Client -h
11 | usage: Client.py [-h] [--i IFILE] [--ids IDS] [--xref_type XREF_TYPE] [--o OFILE]
12 | [--dbhost DBHOST] [--dbport DBPORT] [--dbname DBNAME] [--dbusr DBUSR]
13 | [--dbpw DBPW] [--param_file PARAM_FILE] [--dbschema DBSCHEMA] [-v] [-q]
14 | {list_tables,list_columns,list_tables_rowCounts,version,get_structure,list_structures,list_structures2smiles,meta_listdbs}
15 |
16 | CFChemDb PostgreSql client utility
17 |
18 | positional arguments:
19 | {list_tables,list_columns,list_tables_rowCounts,version,get_structure,list_structures,list_structures2smiles,meta_listdbs}
20 | OPERATION (select one)
21 |
22 | optional arguments:
23 | -h, --help show this help message and exit
24 | --i IFILE input ID file
25 | --ids IDS input IDs (comma-separated)
26 | --xref_type XREF_TYPE
27 | xref ID type
28 | --o OFILE output (TSV)
29 | --dbhost DBHOST
30 | --dbport DBPORT
31 | --dbname DBNAME
32 | --dbusr DBUSR
33 | --dbpw DBPW
34 | --param_file PARAM_FILE
35 | --dbschema DBSCHEMA
36 | -v, --verbose
37 | -q, --quiet Suppress progress notification.
38 | ```
39 |
--------------------------------------------------------------------------------
/BioClients/maayanlab/archs4/Client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | ###
3 |
4 | import sys,os,time,argparse,logging
5 | import numpy as np
6 | import pandas as pd
7 | import h5py
8 |
9 | from ...util import hdf
10 | from ... import maayanlab
11 |
12 | #############################################################################
13 | if __name__=='__main__':
14 | parser = argparse.ArgumentParser(description='H5 file operations', epilog="")
15 | OPS = ['summary', 'list_samples']
16 | parser.add_argument("op", choices=OPS, help='OPERATION')
17 | parser.add_argument("--i", dest="ifile", required=True, help="input file")
18 | parser.add_argument("--o", dest="ofile", help="output (TSV)")
19 | parser.add_argument("-v", "--verbose", dest="verbose", action="count", default=0)
20 | args = parser.parse_args()
21 |
22 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO))
23 |
24 | t0 = time.time();
25 |
26 | fout = open(args.ofile, "w") if args.ofile else sys.stdout
27 |
28 | finh5 = h5py.File(args.ifile, 'r')
29 |
30 | if args.op == "summary":
31 | logging.debug(list(finh5.keys()))
32 | hdf.Utils.Summary(finh5)
33 |
34 | elif args.op == "list_samples":
35 | maayanlab.archs4.ListSamples(finh5, fout)
36 |
37 | else:
38 | parser.error(f"Unsupported operation: {args.op}")
39 |
40 | logging.info(f"Elapsed: {time.strftime('%Hh:%Mm:%Ss', time.gmtime(time.time()-t0))}")
41 |
42 |
--------------------------------------------------------------------------------
/BioClients/cdc/Utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #############################################################################
3 | ### CDC REST API client
4 | ### https://tools.cdc.gov/api/docs/info.aspx
5 | ### https://tools.cdc.gov/api/v2/resources
6 | #############################################################################
7 | import sys,os,re,json,csv,logging
8 | import urllib.parse,requests
9 | #
10 | from ..util import rest
11 | #
12 | #############################################################################
13 | def ListResources(base_url, resource, fout):
14 | tags=None; n_rsc=0; offset=0; nchunk=100;
15 | url=base_url+'/%s'%resource
16 | while True:
17 | url_this='%s?offset=%d&max=%d'%(url, offset, nchunk)
18 | rval=rest.GetURL(url_this, parse_json=True)
19 | try:
20 | rscs = rval['results']
21 | pag = rval['meta']['pagination']
22 | count = pag['count']
23 | total = pag['total']
24 | nextUrl = pag['nextUrl']
25 | except:
26 | break
27 | for rsc in rscs:
28 | if not tags:
29 | tags=rsc.keys()
30 | fout.write('\t'.join(tags)+'\n')
31 | vals = [str(rsc[tag]) if tag in rsc else '' for tag in tags]
32 | fout.write('\t'.join(vals)+'\n')
33 | n_rsc+=1
34 | if count>=total or not nextUrl:
35 | break
36 | else:
37 | offset+=nchunk
38 | logging.info('n_rsc = %d (total %s)'%(n_rsc, resource))
39 |
40 | #############################################################################
41 |
--------------------------------------------------------------------------------
/BioClients/util/rdf/Utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | RDF utility functions.
4 | https://rdflib.readthedocs.io/
5 | https://owlready2.readthedocs.io/
6 | """
7 | import sys,os,re,gzip,argparse,logging
8 |
9 | import rdflib
10 | import owlready2
11 |
12 | #############################################################################
13 | def LoadRdfFile(fin, ifmt):
14 | g = rdflib.Graph()
15 | try:
16 | g.parse(fin, format=ifmt)
17 | logging.info(f"RDF graph from {fin.name} ({ifmt}) contains {len(g)} triples.")
18 | except Exception as e:
19 | logging.error(e)
20 | return None
21 | return g
22 |
23 | #############################################################################
24 | def ValidateRdf(fin, ifmt):
25 | g = LoadRdfFile(fin, ifmt)
26 | if g is not None:
27 | logging.info(f"RDF file VALIDATED: {fin.name} ({ifmt})")
28 | return True
29 | else:
30 | logging.info(f"RDF file NOT VALIDATED: {fin.name} ({ifmt})")
31 | return False
32 |
33 | #############################################################################
34 | def DescribeRdf(fin, ifmt):
35 | g = LoadRdfFile(fin, ifmt)
36 |
37 | #############################################################################
38 | def ConvertRdf(fin, ifmt, ofmt, fout):
39 | g = LoadRdfFile(fin, ifmt)
40 | fout.write(g.serialize(format=ofmt).decode("utf8"))
41 | logging.info(f"RDF graph to {fout.name} ({ofmt}) containing {len(g)} triples.")
42 |
43 | #############################################################################
44 |
--------------------------------------------------------------------------------
/doc/chebi.md:
--------------------------------------------------------------------------------
1 | # `BioClients.chebi`
2 |
3 | ## ChEBI
4 |
5 | ChEBI REST API client
6 |
7 | Tools for obtaining and processing ChEBI data.
8 | Chemical Entities of Biological Interest (ChEBI) is a freely available dictionary of molecular entities focused on ‘small’ chemical compounds.
9 |
10 | *
11 | *
12 |
13 | ```
14 | python -m BioClients.chebi.Client -h
15 | usage: Client.py [-h] [--ids IDS] [--i IFILE] [--o OFILE] [--query QUERY]
16 | [--skip SKIP] [--nmax NMAX] [--api_host API_HOST]
17 | [--api_base_path API_BASE_PATH] [-v]
18 | {list_sources,get_entity,get_entity_names,get_entity_chemical_data,get_entity_secondary_ids,get_entity_children,get_entity_parents,get_entity_origins,search}
19 |
20 | ChEBI REST API client
21 |
22 | positional arguments:
23 | {list_sources,get_entity,get_entity_names,get_entity_chemical_data,get_entity_secondary_ids,get_entity_children,get_entity_parents,get_entity_origins,search}
24 | OPERATION (select one)
25 |
26 | options:
27 | -h, --help show this help message and exit
28 | --ids IDS input IDs
29 | --i IFILE input file, IDs
30 | --o OFILE output (TSV)
31 | --query QUERY search query (SMILES)
32 | --skip SKIP
33 | --nmax NMAX
34 | --api_host API_HOST
35 | --api_base_path API_BASE_PATH
36 | -v, --verbose
37 |
38 | Example entity IDs: 16737, 30273,33246,24433
39 | ```
40 |
--------------------------------------------------------------------------------
/doc/ensembl.md:
--------------------------------------------------------------------------------
1 | # `BioClients.ensembl`
2 |
3 | ## EnsEMBL
4 |
5 | Access to Ensembl REST API.
6 |
7 | *
8 |
9 | Including Variant Effect Predictor (VEP):
10 |
11 | *
12 | *
13 |
14 | ## `BioClients.ensembl.biomart`
15 |
16 | Also, the BIOMART ID mapping service.
17 |
18 | *
19 |
20 | ```
21 | $ python3 -m BioClients.ensembl.Client -h
22 | usage: Client.py [-h] [--ids IDS]
23 | [--i IFILE]
24 | [--api_host API_HOST]
25 | [--api_base_path API_BASE_PATH]
26 | [--o OFILE]
27 | [--skip SKIP]
28 | [--nmax NMAX] [-v] [-q]
29 | {list_species,get_xrefs,get_info,get_vep,show_version}
30 |
31 | Ensembl REST API client
32 |
33 | positional arguments:
34 | {list_species,get_xrefs,get_info,get_vep,show_version}
35 | operation
36 |
37 | options:
38 | -h, --help show this help message and exit
39 | --ids IDS Ensembl_IDs, comma-separated (ex:ENSG00000000003), or SNP IDs,
40 | comma-separated (ex:rs56116432)
41 | --i IFILE input file, Ensembl IDs or SNP IDs
42 | --api_host API_HOST
43 | --api_base_path API_BASE_PATH
44 | --o OFILE output (TSV)
45 | --skip SKIP
46 | --nmax NMAX
47 | -v, --verbose
48 | -q, --quiet
49 |
50 | Example IDs: ENSG00000157764, ENSG00000160785
51 | ```
52 |
--------------------------------------------------------------------------------
/BioClients/mygene/Utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | https://mygene.info/
4 | https://mygene.info/v3/api
5 | https://pypi.org/project/mygene/
6 | """
7 | ###
8 | #
9 | import sys,os
10 | import pandas as pd
11 | import mygene as mg
12 | #
13 | FIELDS = 'HGNC,symbol,name,taxid,entrezgene,ensemblgene'
14 | NCHUNK=100;
15 | #
16 | #############################################################################
17 | def GetGenes(ids, fields=FIELDS, fout=None):
18 | """Get genes by Entrez or Ensembl gene ID."""
19 | ichunk=0; n_out=0; df=None;
20 | mgi = mg.MyGeneInfo()
21 | while ichunk*NCHUNK
8 | *
9 | *
10 | *
11 |
12 | ```
13 | $ python3 -m BioClients.clinicaltrials.Client -h
14 | usage: Client.py [-h] [--i IFILE] [--o OFILE] [--ids IDS]
15 | [--query_cond QUERY_COND] [--query_term QUERY_TERM]
16 | [--api_host API_HOST] [--api_base_path API_BASE_PATH] [-v]
17 | {version,list_study_fields,list_search_areas,search_studies,get_studies}
18 |
19 | ClinicalTrials.gov API client
20 |
21 | positional arguments:
22 | {version,list_study_fields,list_search_areas,list_enums,search_studies,get_studies}
23 | OPERATION
24 |
25 | options:
26 | -h, --help show this help message and exit
27 | --i IFILE Input NCT_IDs
28 | --o OFILE Output (TSV)
29 | --ids IDS Input NCT_IDs (comma-separated)
30 | --query_cond QUERY_COND
31 | Search query condition
32 | --query_term QUERY_TERM
33 | Search query term
34 | --api_host API_HOST
35 | --api_base_path API_BASE_PATH
36 | -v, --verbose
37 |
38 | See: https://clinicaltrials.gov/data-api/about-api,
39 | https://clinicaltrials.gov/data-api/api, https://clinicaltrials.gov/find-
40 | studies/constructing-complex-search-queries
41 | ```
42 |
--------------------------------------------------------------------------------
/doc/wikidata.md:
--------------------------------------------------------------------------------
1 | # `BioClients.wikidata`
2 |
3 | ## Wikidata
4 |
5 | Wikidata is a collaboratively edited RDF/Sparql knowledge graph
6 | used ___by___ Wikipedia. The structured infobox data in Wikipedia
7 | is from Wikidata. Not to be confused with
8 | [DBPedia](https://en.wikipedia.org/wiki/DBpedia) which is
9 | built ___from___ Wikipedia.
10 |
11 | *
12 |
13 | This module provides access to Wikidata Sparql endpoint. Focus on
14 | biomedical entities and particularly GeneWiki.
15 |
16 | *
17 | *
18 |
19 | ### GeneWiki
20 |
21 | *
22 | *
23 |
24 | ### Dependencies
25 |
26 | * [WikidataIntegrator](https://github.com/SuLab/WikidataIntegrator)
27 |
28 | ### Usage
29 |
30 | ```
31 | $ python3 -m BioClients.wikidata.Client -h
32 | usage: Client.py [-h] [--o OFILE] [--rqfile RQFILE] [--rq RQ] [-v]
33 | {query,list_drugTargetPairs,list_geneDiseasePairs}
34 |
35 | Wikidata utilities
36 |
37 | positional arguments:
38 | {list_drugTargetPairs,list_geneDiseasePairs,query,test}
39 | OPERATION
40 |
41 | options:
42 | -h, --help show this help message and exit
43 | --o OFILE output (TSV)
44 | --rqfile RQFILE input Sparql file
45 | --rq RQ input Sparql string
46 | -v, --verbose
47 | ```
48 |
49 | ```
50 | $ python3 -m BioClients.wikidata.Client list_geneDiseasePairs
51 | ```
52 |
--------------------------------------------------------------------------------
/BioClients/util/rdf/App.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | RDF utility functions.
4 | """
5 | import sys,os,re,gzip,argparse,logging
6 |
7 | import rdflib
8 |
9 | from .. import rdf as util_rdf
10 |
11 | #############################################################################
12 | if __name__=="__main__":
13 | parser = argparse.ArgumentParser(description="RDF utility", epilog="")
14 | ops = [ "describe_rdf", "validate_rdf", "convert_rdf", ]
15 | FORMATS = ["text/turtle", "application/rdf+xml", "text/n3", ]
16 | parser.add_argument("op", choices=ops, help="OPERATION")
17 | parser.add_argument("--i", dest="ifile", help="input file (RDF)")
18 | parser.add_argument("--ifmt", choices=FORMATS, default="text/turtle", help="input RDF format")
19 | parser.add_argument("--ofmt", choices=FORMATS, default="text/turtle", help="output RDF format")
20 | parser.add_argument("--o", dest="ofile", help="output file")
21 | parser.add_argument("-v", "--verbose", action="count", default=0)
22 | args = parser.parse_args()
23 |
24 | logging.basicConfig(format="%(levelname)s:%(message)s", level=(logging.DEBUG if args.verbose>1 else logging.INFO))
25 |
26 | fin = open(args.ifile, "r") if args.ifile else sys.stdin
27 | fout = open(args.ofile, "w") if args.ofile else sys.stdout
28 |
29 | if args.op == "describe_rdf":
30 | util_rdf.DescribeRdf(fin, args.ifmt)
31 |
32 | elif args.op == "validate_rdf":
33 | util_rdf.ValidateRdf(fin, args.ifmt)
34 |
35 | elif args.op == "convert_rdf":
36 | util_rdf.ConvertRdf(fin, args.ifmt, args.ofmt, fout)
37 |
38 | else:
39 | parser.error(f"Invalid operation: {args.op}")
40 |
--------------------------------------------------------------------------------
/BioClients/hubmap/Utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Utility functions for HuBMAP REST API.
4 | https://portal.hubmapconsortium.org/apis
5 | """
6 | ###
7 | import sys,os,re,time,json,logging,requests,tqdm
8 | import pandas as pd
9 | #
10 | API_HOST='entity.api.hubmapconsortium.org'
11 | API_BASE_PATH=''
12 | API_BASE_URL = f"https://{API_HOST}{API_BASE_PATH}"
13 | #
14 | #############################################################################
15 | def ListEntityTypes(base_url=API_BASE_URL, fout=None):
16 | response = requests.get(f"{base_url}/entity-types")
17 | if response.status_code != 200:
18 | logging.error(f"status_code: {response.status_code}")
19 | return []
20 | results = response.json()
21 | if fout is not None:
22 | for result in results:
23 | fout.write(f"{result}\n")
24 | logging.info(f"Entities: {len(results)}")
25 | return results
26 |
27 | #############################################################################
28 | def GetEntity(ids, base_url=API_BASE_URL, fout=None):
29 | n_out=0; df=None; tq=None;
30 | for id_this in ids:
31 | response = requests.get(f"{base_url}/entities/{id_this}")
32 | if response.status_code != 200:
33 | logging.error(f"status_code: {response.status_code}")
34 | continue
35 | results = response.json()
36 | logging.debug(json.dumps(results, indent=2))
37 | if fout is not None:
38 | for result in results:
39 | fout.write(f"{result}\n")
40 | n_out += len(results)
41 | logging.info(f"n_out: {n_out}")
42 | return df
43 |
44 | #############################################################################
45 |
--------------------------------------------------------------------------------
/doc/badapple.md:
--------------------------------------------------------------------------------
1 | # `Badapple`
2 |
3 | __Badapple__ : BioAssay Data Associative Promiscuity Prediction Learning Engine
4 |
5 | ## `Badapple2-API`
6 |
7 | Client for the Badapple REST API.
8 |
9 | * [Badapple2-API](https://github.com/unmtransinfo/Badapple2-API)
10 | * [Badapple2 API-Docs](https://chiltepin.health.unm.edu/badapple2/apidocs/)
11 |
12 | ```
13 | python -m BioClients.badapple.Client -h
14 | usage: Client.py [-h] [--smi SMI] [--ids IDS] [--i IFILE]
15 | [--db {badapple2,badapple_classic}] [--o OFILE] [--max_rings MAX_RINGS]
16 | [--api_host API_HOST] [--api_base_path API_BASE_PATH] [-v]
17 | {get_compound2scaffolds,get_scaffold_info,get_scaffold2compounds,get_scaffold2drugs}
18 |
19 | Badapple REST API client utility
20 |
21 | positional arguments:
22 | {get_compound2scaffolds,get_scaffold_info,get_scaffold2compounds,get_scaffold2drugs}
23 | OPERATION
24 |
25 | options:
26 | -h, --help show this help message and exit
27 | --smi SMI input SMILES
28 | --ids IDS input IDs, comma-separated
29 | --i IFILE input SMILES file (with optional appended NAME), or input
30 | IDs file
31 | --db {badapple2,badapple_classic}
32 | default=badapple2
33 | --o OFILE output file (TSV)
34 | --max_rings MAX_RINGS
35 | max rings
36 | --api_host API_HOST
37 | --api_base_path API_BASE_PATH
38 | -v, --verbose
39 |
40 | Example SMILES: OC(=O)C1=C2CCCC(C=C3C=CC(=O)C=C3)=C2NC2=CC=CC=C12 Example scaffold IDs:
41 | 46,50
42 | ```
43 |
--------------------------------------------------------------------------------
/doc/stringdb.md:
--------------------------------------------------------------------------------
1 | # `BioClients.stringdb`
2 |
3 | ## STRINGDB
4 |
5 | ## Usage
6 |
7 | ```
8 | $ python3 -m BioClients.stringdb.Client -h
9 |
10 | usage: Client.py [-h] [--id ID] [--ids IDS] [--idfile IDFILE] [--o OFILE]
11 | [--species SPECIES] [--minscore MINSCORE]
12 | [--netflavor {evidence,confidence,actions}]
13 | [--imgfmt {image,highres_image,svg}] [--api_host API_HOST]
14 | [--api_base_path API_BASE_PATH] [-v]
15 | {getIds,getInteractionPartners,getNetwork,getNetworkImage,getEnrichment,getPPIEnrichment,getInteractors,getActions,getAbstracts}
16 |
17 | STRING-DB REST API client utility
18 |
19 | positional arguments:
20 | {getIds,getInteractionPartners,getNetwork,getNetworkImage,getEnrichment,getPPIEnrichment,getInteractors,getActions,getAbstracts}
21 | operation
22 |
23 | optional arguments:
24 | -h, --help show this help message and exit
25 | --id ID protein ID (ex:DRD1_HUMAN)
26 | --ids IDS protein IDs, comma-separated
27 | --idfile IDFILE input file, protein IDs
28 | --o OFILE output file
29 | --species SPECIES taxon code, ex: 9606 (human)
30 | --minscore MINSCORE signifcance threshold 0-1000
31 | --netflavor {evidence,confidence,actions}
32 | network flavor
33 | --imgfmt {image,highres_image,svg}
34 | image format
35 | --api_host API_HOST
36 | --api_base_path API_BASE_PATH
37 | -v, --verbose
38 |
39 | Example protein IDs: DRD1 DRD1_HUMAN DRD2 DRD2_HUMAN ; Example species: 9606 (human, via taxon identifiers, http://www.uniprot.org/taxonomy) ; Image formats: PNG PNG_highres SVG ; MAY BE DEPRECATED: getInteractors, getActions, getAbstracts
40 | ```
41 |
42 |
--------------------------------------------------------------------------------
/BioClients/bindingdb/Utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #############################################################################
3 | ### BindingDb Utilities
4 | ### http://www.bindingdb.org/bind/BindingDBRESTfulAPI.jsp
5 | ### http://www.bindingdb.org/axis2/services/BDBService/getLigandsByUniprots?uniprot=P35355,Q8HZR1&cutoff=1000&code=0&response=application/json
6 | #############################################################################
7 | import sys,os,re,time,json,logging
8 | import urllib.parse
9 |
10 | from ..util import rest
11 | #
12 | ##############################################################################
13 | def GetLigandsByUniprot(base_url, ids, ic50_max, fout):
14 | n_out=0; tags=None;
15 | for id_this in ids:
16 | rval = rest.GetURL(base_url+'/getLigandsByUniprots?uniprot=%s&cutoff=%d&response=application/json'%(id_this, ic50_max), parse_json=True)
17 | logging.debug(json.dumps(rval, sort_keys=True, indent=2))
18 | ligands = rval["getLigandsByUniprotsResponse"]["affinities"] if "getLigandsByUniprotsResponse" in rval and "affinities" in rval["getLigandsByUniprotsResponse"] else []
19 | for ligand in ligands:
20 | if not tags:
21 | tags = ligand.keys()
22 | fout.write("\t".join(tags)+"\n")
23 | vals = [(str(ligand[tag]) if tag in ligand else '') for tag in tags]
24 | fout.write("\t".join(vals)+"\n")
25 | logging.info("n_out: {}".format(n_out))
26 |
27 | ##############################################################################
28 | def GetTargetsByCompound(base_url, smiles, sim_min, fout):
29 | rval = rest.GetURL(base_url+'/getTargetByCompound?smiles=%s&cutoff=%.2f'%(urllib.parse.quote(smiles),
30 | sim_min), parse_xml=True)
31 | fout.write(rval.tostring())
32 |
33 | ##############################################################################
34 |
--------------------------------------------------------------------------------
/BioClients/pubtator/Utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | ###
3 | import sys,os,time,json,argparse,re,logging
4 | #
5 | from ..util import rest
6 | #
7 | #############################################################################
8 | def GetAnnotations(base_url, mode, pmids, fout):
9 | n_assn=0; n_hit=0;
10 | fout.write('sourcedb\tsourceid\tbegin\tend\tobj_type\tobj\n')
11 | for pmid in pmids:
12 | url = base_url+'/%s/%s/JSON'%(mode, pmid)
13 | rval = rest.GetURL(url, parse_json=True)
14 | if not rval:
15 | logging.info('not found: %s'%(pmid))
16 | continue
17 |
18 | n_assn_this=0
19 | sources = rval if type(rval) is list else [rval]
20 | for source in sources:
21 | sourceDb = source['sourcedb'] if 'sourcedb' in source else ''
22 | sourceId = source['sourceid'] if 'sourceid' in source else ''
23 | anns = source['denotations'] if (type(source) is dict and 'denotations') in source else []
24 |
25 | for ann in anns:
26 | obj = ann['obj'] if 'obj' in ann else None
27 | begin = ann['span']['begin'] if 'span' in ann and 'begin' in ann['span'] else ''
28 | end = ann['span']['end'] if 'span' in ann and 'end' in ann['span'] else ''
29 | if obj and begin and end:
30 | obj_type,obj_id = re.split(':', obj, 1)
31 | fout.write('%s\t%s\t%d\t%d\t%s\t%s\n'%(sourceDb, sourceId, begin, end, obj_type, obj_id))
32 | n_assn_this+=1
33 | if n_assn_this: n_hit+=1
34 | n_assn+=n_assn_this
35 |
36 | logging.info('n_in = %d (PMIDs)'%(len(pmids)))
37 | logging.info('n_hit = %d (PMIDs with associations)'%(n_hit))
38 | logging.info('n_miss = %d (PMIDs with NO associations)'%(len(pmids)-n_hit))
39 | logging.info('n_assn = %d (total associations)'%(n_assn))
40 |
41 | #############################################################################
42 |
--------------------------------------------------------------------------------
/BioClients/uniprot/Utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | ##############################################################################
3 | ### uniprot_utils.py - utility functions for access to Uniprot REST API.
4 | ### UniprotKB = Uniprot Knowledge Base
5 | ##############################################################################
6 | import sys,os,re,logging
7 | #
8 | from ..util import rest
9 | #
10 | #############################################################################
11 | def GetData(base_uri, uids, ofmt, fout):
12 | """Need to handle xml, rdf better (merge)."""
13 | n_prot=0; n_err=0;
14 | for uid in uids:
15 | rval=rest.GetURL(base_uri+'/%s.%s'%(uid, ofmt))
16 | if not rval:
17 | n_err+=1
18 | continue
19 | if ofmt=='tab':
20 | lines=[];
21 | for line in rval.splitlines():
22 | vals=re.split(r'\t', line)
23 | lines.append('\t'.join(vals))
24 | for i,line in enumerate(lines):
25 | if n_prot>0 and i==0: continue #Skip duplicate headers.
26 | fout.write(line+'\n')
27 | else:
28 | fout.write(rval+'\n')
29 | n_prot+=1
30 | logging.info('n_in: %d; n_prot: %d; n_err: %d'%(len(uids), n_prot, n_err))
31 |
32 | #############################################################################
33 | def UIDs2JSON(base_uri, uids, fout):
34 | """ Uses uniprot library from Bosco Ho (https://github.com/boscoh/uniprot)."""
35 | import uniprot ## Bosco Ho (https://github.com/boscoh/uniprot)
36 | uniprot_data=uniprot.batch_uniprot_metadata(uids, None)
37 | for uid in uniprot_data.keys():
38 | for key in uniprot_data[uid].keys():
39 | if key in ('accs', 'sequence', 'go', 'description'): #keep simple
40 | del uniprot_data[uid][key]
41 | json_txt=json.dumps(uniprot_data, sort_keys=True, indent=2)
42 | fout.write(json_txt+'\n')
43 | return
44 |
--------------------------------------------------------------------------------
/BioClients/geneontology/Utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | GeneOntology client.
4 | """
5 | import os,sys,re,json,time,logging
6 | import urllib.parse
7 | import pandas as pd
8 |
9 | from ..util import rest
10 |
11 | API_HOST='api.geneontology.org'
12 | API_BASE_PATH='/api'
13 | BASE_URL = 'https://'+API_HOST+API_BASE_PATH
14 | #
15 | ##############################################################################
16 | def GetEntities(ids, base_url=BASE_URL, fout=None):
17 | """For only one type of entity per call (gene, term)."""
18 | tags=[]; df=pd.DataFrame();
19 | for id_this in ids:
20 | ent = rest.GetURL(base_url+'/bioentity/'+urllib.parse.quote(id_this), parse_json=True)
21 | logging.debug(json.dumps(ent, sort_keys=True, indent=2))
22 | if not tags: tags = ent.keys()
23 | df = pd.concat([df, pd.DataFrame({tags[j]:[ent[tags[j]]] for j in range(len(tags))})])
24 | logging.info('n_ent: {}'.format(df.shape[0]))
25 | if fout: df.to_csv(fout, sep="\t", index=False)
26 | return df
27 |
28 | ##############################################################################
29 | def GetGeneTerms(ids, base_url=BASE_URL, fout=None):
30 | tags=[]; df=pd.DataFrame();
31 | for id_this in ids:
32 | rval = rest.GetURL(base_url+'/bioentity/gene/{}/function'.format(urllib.parse.quote(id_this)), parse_json=True)
33 | assns = rval['associations'] if 'associations' in rval else []
34 | for assn in assns:
35 | logging.debug(json.dumps(assn, sort_keys=True, indent=2))
36 | if not tags: tags = assn.keys()
37 | df = pd.concat([df, pd.DataFrame({tags[j]:[assn[tags[j]]] for j in range(len(tags))})])
38 | logging.info('n_gene: {}; n_assn: {}'.format(len(ids), df.shape[0]))
39 | if fout: df.to_csv(fout, sep="\t", index=False)
40 | return df
41 |
42 | ##############################################################################
43 |
--------------------------------------------------------------------------------
/BioClients/util/db/Utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | '''
3 | Commonly used functions for database client applications.
4 | '''
5 | import sys,os,logging,urllib.parse
6 | import sqlalchemy
7 |
8 | ##############################################################################
9 | def PostgreSqlConnect(dbhost, dbport, dbname, dbusr, dbpw):
10 | try:
11 | import psycopg2
12 | engine = sqlalchemy.create_engine(f"postgresql+psycopg2://{dbusr}:{urllib.parse.quote_plus(dbpw)}@{dbhost}:{dbport}/{dbname}")
13 | except Exception as e:
14 | logging.info(f"{e}")
15 | logging.error("Failed to connect.")
16 | return None
17 | return engine.connect()
18 |
19 | ##############################################################################
20 | def MySqlConnect(dbhost, dbport, dbname, dbusr, dbpw):
21 | try:
22 | # https://pypi.org/project/mysql-connector-python/
23 | # pip install mysql-connector-python
24 | import mysql.connector
25 | engine = sqlalchemy.create_engine(f"mysql+mysqlconnector://{dbusr}:{urllib.parse.quote_plus(dbpw)}@{dbhost}:{dbport}/{dbname}")
26 | except Exception as e:
27 | logging.info(f"{e}")
28 | try:
29 | # https://pypi.org/project/PyMySQL/
30 | # pip install PyMySQL
31 | import pymysql
32 | engine = sqlalchemy.create_engine(f"mysql+pymysql://{dbusr}:{urllib.parse.quote_plus(dbpw)}@{dbhost}:{dbport}/{dbname}")
33 | except Exception as e:
34 | logging.info(f"{e}")
35 | try:
36 | # https://mysqlclient.readthedocs.io/
37 | # pip install mysqlclient
38 | import MySQLdb
39 | engine = sqlalchemy.create_engine(f"mysql+mysqldb://{dbusr}:{urllib.parse.quote_plus(dbpw)}@{dbhost}:{dbport}/{dbname}")
40 | except Exception as e:
41 | logging.info(f"{e}")
42 | logging.error("Failed to connect.")
43 | return None
44 | return engine.connect()
45 |
--------------------------------------------------------------------------------
/BioClients/idg/rss/Utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | IDG Resource Submission System
4 | https://rss.ccs.miami.edu/
5 | """
6 | ###
7 | import sys,os,re,json,time,logging,tqdm
8 | import pandas as pd
9 | import requests
10 | import urllib,urllib.parse
11 | #
12 | #
13 | API_HOST="rss.ccs.miami.edu"
14 | API_BASE_PATH="/rss-api"
15 | BASE_URL = 'https://'+API_HOST+API_BASE_PATH
16 | #
17 | ##############################################################################
18 | def ListTargets(base_url=BASE_URL, fout=None):
19 | tags=[]; tq=None; df=pd.DataFrame();
20 | url = (base_url+f'/target')
21 | resp = requests.get(url, verify=False)
22 | targets = resp.json() if resp.status_code==200 else []
23 | for target in targets:
24 | logging.debug(json.dumps(target, sort_keys=True, indent=2))
25 | if not tags: tags = list(target.keys())
26 | df = pd.concat([df, pd.DataFrame({tags[j]:[target[tags[j]]] for j in range(len(tags))})])
27 | if fout: df.to_csv(fout, sep="\t", index=False)
28 | logging.info("n_out: {}".format(df.shape[0]))
29 | return(df)
30 |
31 | ##############################################################################
32 | def GetTargetResources(ids, base_url=BASE_URL, fout=None):
33 | tags=[]; tq=None; df=pd.DataFrame();
34 | for id_this in ids:
35 | url_this = (base_url+f'/target/id?id={id_this}')
36 | resp = requests.get(url_this, verify=False)
37 | resources = resp.json() if resp.status_code==200 else []
38 | for resource in resources:
39 | logging.debug(json.dumps(resource, sort_keys=True, indent=2))
40 | if not tags: tags = list(resource.keys())
41 | df = pd.concat([df, pd.DataFrame({tags[j]:[resource[tags[j]]] for j in range(len(tags))})])
42 | if fout: df.to_csv(fout, sep="\t", index=False)
43 | logging.info("n_out: {}".format(df.shape[0]))
44 | return(df)
45 |
46 | ##############################################################################
47 |
--------------------------------------------------------------------------------
/BioClients/cas/Client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Utility app for the CAS REST API.
4 | """
5 | ###
6 | import sys,os,re,argparse,time,logging
7 | #
8 | from .. import cas
9 | #
10 | ##############################################################################
11 | if __name__=='__main__':
12 | ops = ["get_rn2details", "get_rn2image"]
13 | parser = argparse.ArgumentParser(description="CAS REST client")
14 | parser.add_argument("op", choices=ops,help='OPERATION')
15 | parser.add_argument("--i", dest="ifile", help="input IDs file (CAS Registry Number)")
16 | parser.add_argument("--ids", help="input IDs (CAS Registry Number) (comma-separated)")
17 | parser.add_argument("--o", dest="ofile", help="output (usually TSV)")
18 | parser.add_argument("--api_host", default=cas.API_HOST)
19 | parser.add_argument("--api_base_path", default=cas.API_BASE_PATH)
20 | parser.add_argument("--skip", type=int, default=0)
21 | parser.add_argument("--nmax", type=int, default=0)
22 | parser.add_argument("-v", "--verbose", default=0, action="count")
23 | args = parser.parse_args()
24 |
25 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO))
26 |
27 | base_url = 'https://'+args.api_host+args.api_base_path
28 |
29 | fout = open(args.ofile, "w") if args.ofile else sys.stdout
30 |
31 | ids=[]
32 | if args.ifile:
33 | fin = open(args.ifile)
34 | while True:
35 | line = fin.readline()
36 | if not line: break
37 | ids.append(line.rstrip())
38 | fin.close()
39 | elif args.ids:
40 | ids = re.split(r'[,\s]+', args.ids)
41 | logging.info(f"Input IDs: {len(ids)}")
42 |
43 | t0=time.time()
44 |
45 | if args.op == 'get_rn2details':
46 | cas.GetRN2Details(ids, base_url, fout)
47 |
48 | else:
49 | parser.error(f"Invalid operation: {args.op}")
50 |
51 | logging.info(('elapsed time: %s'%(time.strftime('%Hh:%Mm:%Ss',time.gmtime(time.time()-t0)))))
52 |
53 |
--------------------------------------------------------------------------------
/BioClients/util/neo4j/Utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | https://py2neo.org/
4 | https://py2neo.org/v4/database.html
5 | """
6 | import sys,os,json,logging
7 | import pandas as pd
8 | import py2neo
9 |
10 | DBHOST="localhost"
11 | DBPORT=7687
12 | DBSCHEME="bolt"
13 | DBUSR="neo4j"
14 | DBPW="neo4j"
15 |
16 | #############################################################################
17 | def DbConnect(dbhost=DBHOST, dbport=DBPORT, dbscheme=DBSCHEME, dbusr=DBUSR, dbpw=DBPW, secure=False):
18 | db=None;
19 | try:
20 | db = py2neo.GraphService(host=dbhost, port=dbport, scheme=dbscheme, secure=secure, user=dbusr, password=dbpw)
21 | except Exception as e:
22 | logging.error(f"{e}")
23 | return db
24 |
25 | #############################################################################
26 | def DbInfo(db, fout):
27 | logging.debug(f"db.config: {db.config}")
28 | df = pd.DataFrame({"uri": [db.uri],
29 | "kernel_version": [db.kernel_version],
30 | "default_graph": [db.default_graph],
31 | "product": [db.product]})
32 | df.transpose().to_csv(fout, sep="\t")
33 |
34 | #############################################################################
35 | def DbSummary(db, fout):
36 | g = db.default_graph
37 | df = pd.DataFrame({"nodes": [len(g.nodes)],
38 | "relationships": [len(g.relationships)],
39 | "schema": [g.schema],
40 | })
41 | df.transpose().to_csv(fout, sep="\t")
42 |
43 | #############################################################################
44 | def DbQuery(db, cql, fmt, fout):
45 | n_out=0;
46 | g = db.default_graph
47 | result = g.run(cql)
48 | if fmt.upper()=='JSON':
49 | rows = result.data()
50 | n_out = len(rows)
51 | fout.write(json.dumps(rows, indent=2)+'\n')
52 | else: #TSV
53 | df = result.to_data_frame()
54 | n_out = df.shape[0]
55 | df.to_csv(fout, '\t', index=False)
56 | logging.info(f"rows: {n_out}")
57 |
58 | #############################################################################
59 |
--------------------------------------------------------------------------------
/BioClients/cdc/Client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #############################################################################
3 | ### CDC REST API client
4 | ### https://tools.cdc.gov/api/docs/info.aspx
5 | ### https://tools.cdc.gov/api/v2/resources
6 | #############################################################################
7 | import sys,os,argparse,re,time,logging
8 | #
9 | from .. import cdc
10 | #
11 | #############################################################################
12 | if __name__=='__main__':
13 | API_HOST="tools.cdc.gov"
14 | API_BASE_PATH="/api/v2/resources"
15 | ops = ["list_sources", "list_topics", "list_organizations", "list_audiences"]
16 | parser = argparse.ArgumentParser( description='CDC REST API client utility')
17 | parser.add_argument("op", choices=ops, help='OPERATION (select one)')
18 | parser.add_argument("--i", dest="ifile", help="input file")
19 | parser.add_argument("--nmax", help="list: max to return")
20 | parser.add_argument("--o", dest="ofile", help="output (TSV)")
21 | parser.add_argument("--api_host", default=API_HOST)
22 | parser.add_argument("--api_base_path", default=API_BASE_PATH)
23 | parser.add_argument("-v", "--verbose", action="count", default=0)
24 | args = parser.parse_args()
25 |
26 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO))
27 |
28 | api_base_url='https://'+args.api_host+args.api_base_path
29 |
30 | fout = open(args.ofile, "w+") if args.ofile else sys.stdout
31 |
32 | if args.op=="list_sources":
33 | cdc.ListResources(api_base_url, 'sources', fout)
34 |
35 | elif args.op=="list_topics":
36 | cdc.ListResources(api_base_url, 'topics', fout)
37 |
38 | elif args.op=="list_organizations":
39 | cdc.ListResources(api_base_url, 'organizations', fout)
40 |
41 | elif args.op=="list_audiences":
42 | cdc.ListResources(api_base_url, 'audiences', fout)
43 |
44 | else:
45 | parser.error('Operation invalid: {}'.format(args.op))
46 |
--------------------------------------------------------------------------------
/BioClients/util/hdf/Utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | ###
3 |
4 | import sys,os,time,argparse,logging
5 | import numpy as np
6 | import pandas as pd
7 | import h5py
8 |
9 | ###
10 | def h5_type(ob):
11 | return "Group" if type(ob) is h5py.Group else "Dataset" if type(ob) is h5py.Dataset else type(ob)
12 |
13 | ###
14 | def list_all(name, ob):
15 | logging.info(f"{ob.name} ({h5_type(ob)})")
16 | if type(ob) is h5py.Dataset:
17 | logging.info(f"Dataset dtype: {ob.dtype}; shape: {ob.shape}; size: {ob.size}; ndim: {ob.ndim}; nbytes: {ob.nbytes}")
18 | return None
19 |
20 | #############################################################################
21 | def Summary(f):
22 | logging.debug(list(f.keys()))
23 | for k in list(f.keys()):
24 | if type(f[k]) is h5py.Group:
25 | logging.debug(f"{f.name}:{f[k].name}")
26 | f[k].visititems(list_all)
27 | else:
28 | logging.info(f"{f[k].name} ({h5_type(f[k])})")
29 |
30 | #############################################################################
31 | if __name__=='__main__':
32 | parser = argparse.ArgumentParser(description='H5 file operations', epilog="")
33 | OPS = ['summary']
34 | parser.add_argument("op", choices=OPS, help='OPERATION')
35 | parser.add_argument("--i", dest="ifile", required=True, help="input file")
36 | parser.add_argument("--o", dest="ofile", help="output (TSV)")
37 | parser.add_argument("-v", "--verbose", dest="verbose", action="count", default=0)
38 | args = parser.parse_args()
39 |
40 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO))
41 |
42 | t0 = time.time();
43 |
44 | fout = open(args.ofile, "w") if args.ofile else sys.stdout
45 |
46 | finh5 = h5py.File(args.ifile, 'r')
47 |
48 | if args.op == "summary":
49 | Summary(finh5)
50 |
51 | else:
52 | parser.error(f"Unsupported operation: {args.op}")
53 |
54 | logging.info(f"Elapsed: {time.strftime('%Hh:%Mm:%Ss', time.gmtime(time.time()-t0))}")
55 |
56 |
--------------------------------------------------------------------------------
/BioClients/geneontology/Client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | GeneOntology client.
4 | """
5 | ###
6 | import os,sys,argparse,re,json,time,logging
7 | import urllib.parse
8 |
9 | from .. import geneontology
10 | #
11 | ##############################################################################
12 | if __name__=='__main__':
13 | epilog="""\
14 | """
15 | parser = argparse.ArgumentParser(description='GeneOntolgy API client', epilog=epilog)
16 | ops=['list_terms', 'list_genes', 'get_entities', 'get_geneTerms' ]
17 | parser.add_argument("op", choices=ops, help='OPERATION')
18 | parser.add_argument("--i", dest="ifile", help="input file of IDs")
19 | parser.add_argument("--ids", help="ID list (comma-separated)(e.g. NCBIGene:84570, GO:0006954)")
20 | parser.add_argument("--o", dest="ofile", help="output file (TSV)")
21 | parser.add_argument("--api_host", default=geneontology.API_HOST)
22 | parser.add_argument("--api_base_path", default=geneontology.API_BASE_PATH)
23 | parser.add_argument("-v", "--verbose", default=0, action="count")
24 | args = parser.parse_args()
25 |
26 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO))
27 |
28 | BASE_URL = 'https://'+args.api_host+args.api_base_path
29 |
30 | fout = open(args.ofile, "w+") if args.ofile else sys.stdout
31 |
32 | if args.ids:
33 | ids = re.split(r'[,\s]+', args.ids.strip())
34 | elif args.ifile:
35 | with open(args.ifile) as fin:
36 | while True:
37 | line = fin.readline()
38 | if not line: break
39 | ids.append(line.rstrip())
40 |
41 | if args.op == 'list_terms':
42 | geneontology.ListTerms(BASE_URL, fout)
43 |
44 | elif args.op == 'list_genes':
45 | geneontology.ListGenes(BASE_URL, fout)
46 |
47 | elif args.op == 'get_entities':
48 | geneontology.GetEntities(ids, BASE_URL, fout)
49 |
50 | elif args.op == 'get_geneTerms':
51 | geneontology.GetGeneTerms(ids, BASE_URL, fout)
52 |
53 | else:
54 | parser.error(f'Invalid operation: {args.op}')
55 |
56 |
--------------------------------------------------------------------------------
/BioClients/maayanlab/harmonizome/Client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | See: http://amp.pharm.mssm.edu/Harmonizome/documentation
4 | """
5 | ###
6 | import sys,os,re,argparse,time,json,logging
7 | #
8 | from ... import maayanlab
9 | #
10 | #
11 | ##############################################################################
12 | if __name__=='__main__':
13 | parser = argparse.ArgumentParser(description='MaayanLab Harmonizome REST API client')
14 | ops = [ 'get_gene', 'get_gene_associations' ]
15 | parser.add_argument("op", choices=ops, help='operation')
16 | parser.add_argument("--i", dest="ifile", help="input IDs")
17 | parser.add_argument("--ids", help="input IDs (comma-separated)")
18 | parser.add_argument("--o", dest="ofile", help="output (TSV)")
19 | parser.add_argument("--api_host", default=maayanlab.harmonizome.API_HOST)
20 | parser.add_argument("--api_base_path", default=maayanlab.harmonizome.API_BASE_PATH)
21 | parser.add_argument("-v", "--verbose", default=0, action="count")
22 | args = parser.parse_args()
23 |
24 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO))
25 |
26 | base_url = 'https://'+args.api_host+args.api_base_path
27 |
28 | fout = open(args.ofile,"w") if args.ofile else sys.stdout
29 |
30 | t0=time.time()
31 |
32 | ids=[];
33 | if args.ifile:
34 | fin = open(args.ifile)
35 | while True:
36 | line = fin.readline()
37 | if not line: break
38 | if line.rstrip(): ids.append(line.rstrip())
39 | fin.close()
40 | elif args.ids:
41 | ids = re.split(r'[,\s]+', args.ids)
42 | logging.info(f"Input queries: {len(ids)}")
43 |
44 | if args.op == "get_gene":
45 | maayanlab.harmonizome.Utils.GetGene(ids, base_url, fout)
46 |
47 | elif args.op == "get_gene_associations":
48 | maayanlab.harmonizome.Utils.GetGeneAssociations(ids, base_url, fout)
49 |
50 | else:
51 | parser.error(f"Invalid operation: {args.op}")
52 |
53 | logging.info(f"elapsed time: {time.strftime('%Hh:%Mm:%Ss', time.gmtime(time.time()-t0))}")
54 |
--------------------------------------------------------------------------------
/BioClients/wikidata/Client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | https://query.wikidata.org/sparql
4 |
5 | https://www.wikidata.org/wiki/User:ProteinBoxBot/SPARQL_Examples
6 |
7 | PREFIX wd:
8 | PREFIX wdt:
9 | PREFIX bd:
10 | PREFIX up:
11 | PREFIX uniprotkb:
12 |
13 | GeneWiki:
14 | *
15 | """
16 | ###
17 | import sys,os,argparse,logging
18 | import pandas as pd
19 |
20 | import wikidataintegrator
21 |
22 | from .. import wikidata
23 |
24 | #############################################################################
25 | if __name__=="__main__":
26 | parser = argparse.ArgumentParser(description="Wikidata utilities", epilog="")
27 | ops = ["query", "list_drugTargetPairs", "list_geneDiseasePairs", ]
28 | parser.add_argument("op", choices=ops, help="OPERATION")
29 | parser.add_argument("--o", dest="ofile", help="output (TSV)")
30 | parser.add_argument("--rqfile", help="input Sparql file")
31 | parser.add_argument("--rq", help="input Sparql string")
32 | parser.add_argument("-v", "--verbose", action="count", default=0)
33 | args = parser.parse_args()
34 |
35 | logging.basicConfig(format="%(levelname)s:%(message)s", level=(logging.DEBUG if args.verbose>1 else logging.INFO))
36 |
37 | logging.debug(f"Python: {sys.version.split()[0]}; wikidataintegrator: {wikidataintegrator.__version__}")
38 |
39 | fout = open(args.ofile, "w") if args.ofile else sys.stdout
40 |
41 | rq = open(args.rqfile).read() if args.rqfile else args.rq if args.rq else None
42 |
43 | if args.op == "query":
44 | if not rq: parser.error(f"--rq or --rqfile required for: {args.op}")
45 | wikidata.Query(rq, fout)
46 |
47 | elif args.op == "list_drugTargetPairs":
48 | wikidata.ListDrugTargetPairs(fout)
49 |
50 | elif args.op == "list_geneDiseasePairs":
51 | wikidata.ListGeneDiseasePairs(fout)
52 |
53 | else:
54 | parser.error(f"Unknown operation: {args.op}")
55 |
--------------------------------------------------------------------------------
/BioClients/idg/rss/Client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | IDG Resource Submission System
4 | https://rss.ccs.miami.edu/
5 | """
6 | ###
7 | import sys,os,re,argparse,time,logging
8 | #
9 | from ...idg import rss
10 | #
11 | ##############################################################################
12 | if __name__=='__main__':
13 | epilog="""\
14 | """
15 | parser = argparse.ArgumentParser(description="IDG RSS (Resource Submission System) REST API client)", epilog=epilog)
16 | ops = [ "list_targets", "get_target_resources" ]
17 | parser.add_argument("op", choices=ops, help="operation")
18 | parser.add_argument("--i", dest="ifile", help="input IDs")
19 | parser.add_argument("--ids", help="IDs (comma-separated)")
20 | parser.add_argument("--o", dest="ofile", help="output (TSV)")
21 | parser.add_argument("--api_host", default=rss.Utils.API_HOST)
22 | parser.add_argument("--api_base_path", default=rss.Utils.API_BASE_PATH)
23 | parser.add_argument("-v", "--verbose", default=0, action="count")
24 | args = parser.parse_args()
25 |
26 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO))
27 |
28 | fout = open(args.ofile, "w") if args.ofile else sys.stdout
29 |
30 | ids=[]
31 | if args.ifile:
32 | fin = open(args.ifile)
33 | while True:
34 | line = fin.readline()
35 | if not line: break
36 | ids.append(line.rstrip())
37 | fin.close()
38 | elif args.ids:
39 | ids = re.split('[,\s]+', args.ids.strip())
40 | if ids: logging.info('Input IDs: %d'%(len(ids)))
41 |
42 | t0=time.time()
43 |
44 | base_url = 'https://'+args.api_host+args.api_base_path
45 |
46 | if args.op=='list_targets':
47 | rss.Utils.ListTargets(base_url, fout)
48 |
49 | elif args.op=='get_target_resources':
50 | if not ids: parser.error(f"--i or --ids required for {args.op}")
51 | rss.Utils.GetTargetResources(ids, base_url, fout)
52 |
53 | else:
54 | parser.error(f'Unknown operation: {args.op}')
55 |
56 | logging.info(('elapsed time: %s'%(time.strftime('%Hh:%Mm:%Ss',time.gmtime(time.time()-t0)))))
57 |
--------------------------------------------------------------------------------
/BioClients/wikipathways/Utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | ##############################################################################
3 | ### Utilities for Wikipathways REST API.
4 | ### See: http://www.wikipathways.org/index.php/Help:WikiPathways_Webservice/API
5 | ##############################################################################
6 | import sys,os,re,time,logging
7 | import urllib.parse
8 | #
9 | from ..util import rest
10 | #
11 | ##############################################################################
12 | def ListOrganisms(base_url, fout):
13 | n_all=0; n_out=0; n_err=0;
14 | url=base_url+'/listOrganisms?format=json'
15 | rval=rest.GetURL(url, parse_json=True)
16 | organisms = rval['organisms']
17 | fout.write('Organism\n')
18 | for organism in organisms:
19 | fout.write('%s\n'%(organism))
20 | n_out+=1
21 | logging.info('n_out: %d'%(n_out))
22 |
23 | ##############################################################################
24 | def ListPathways(base_url, params, fout):
25 | n_all=0; n_out=0; n_err=0;
26 | url = base_url+'/listPathways?format=json'
27 | if params['human']:
28 | url+=('&organism=%s'%urllib.parse.quote('Homo sapiens'))
29 | rval=rest.GetURL(url, parse_json=True)
30 | pathways = rval['pathways']
31 | tags=[];
32 | for pathway in pathways:
33 | n_all+=1
34 | if n_all==1 or not tags:
35 | tags = sorted(pathway.keys())
36 | fout.write('\t'.join(tags)+'\n')
37 | vals = [pathway[tag] if tag in pathway else '' for tag in tags]
38 | fout.write((','.join(vals))+'\n')
39 | n_out+=1
40 | logging.info('n_all: %d; n_out: %d; n_err: %d'%(n_all,n_out,n_err))
41 |
42 | ##############################################################################
43 | def GetPathway(base_url, id_query, ofmt, fout):
44 | n_all=0; n_out=0; n_err=0;
45 | if ofmt.lower() == 'gpml':
46 | url = base_url+'/getPathway?pwId=%s&revision=0'%id_query
47 | else:
48 | url = base_url+'/index.php?method=getPathwayAs&fileType=%s&pwId=%s&revision=0'%(ofmt.lower(), id_query)
49 | rval = rest.GetURL(url, parse_json=False)
50 | fout.write(rval)
51 |
52 |
--------------------------------------------------------------------------------
/BioClients/lincs/sigcom/Client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #############################################################################
3 | ### https://maayanlab.cloud/sigcom-lincs/#/API
4 | #############################################################################
5 | import sys,os,re,argparse,json,logging
6 | #
7 | from ...lincs import sigcom as sigcom_lincs
8 | #
9 | #############################################################################
10 | if __name__=="__main__":
11 | parser = argparse.ArgumentParser(description='CLUE.IO REST API client utility')
12 | ops = ['getResources', ]
13 | parser.add_argument("op", choices=ops, help='OPERATION')
14 | parser.add_argument("--ids", help="IDs, comma-separated")
15 | parser.add_argument("--i", dest="ifile", help="input file, IDs")
16 | parser.add_argument("--o", dest="ofile", help="output (TSV)")
17 | parser.add_argument("--nmax", type=int, default=1000, help="max results")
18 | parser.add_argument("--skip", type=int, default=0, help="skip results")
19 | parser.add_argument("--api_host", default=sigcom_lincs.Utils.API_HOST)
20 | parser.add_argument("--api_base_path", default=sigcom_lincs.Utils.API_BASE_PATH)
21 | parser.add_argument("-v", "--verbose", dest="verbose", action="count", default=0)
22 | args = parser.parse_args()
23 |
24 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO))
25 |
26 | base_url = 'https://'+args.api_host+args.api_base_path
27 |
28 | fout = open(args.ofile, "w+") if args.ofile else sys.stdout
29 |
30 | if args.ifile:
31 | fin = open(args.ifile)
32 | ids=[];
33 | while True:
34 | line = fin.readline()
35 | if not line: break
36 | ids.append(line.rstrip())
37 | fin.close()
38 | logging.info(f"input queries: {len(ids)}")
39 | elif args.ids:
40 | ids = re.split('[, ]+', args.ids.strip())
41 |
42 | if args.op=='getResources':
43 | if not ids: parser.error('--ids or --i required.')
44 | sigcom_lincs.Utils.GetResources(ids, base_url, fout)
45 |
46 | else:
47 | parser.error(f"Unsupported operation: {args.op}")
48 |
--------------------------------------------------------------------------------
/BioClients/icite/Client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | PubMed iCite REST API client
4 | https://icite.od.nih.gov/api
5 | """
6 | ###
7 | import sys,os,re,argparse,logging
8 | #
9 | from .. import icite
10 |
11 | #############################################################################
12 | if __name__=='__main__':
13 | parser = argparse.ArgumentParser(description='PubMed iCite REST API client utility', epilog='Publication metadata.')
14 | ops = ['get_stats']
15 | parser.add_argument("op", choices=ops, help='OPERATION')
16 | parser.add_argument("--ids", help="PubMed IDs, comma-separated (ex:25533513)")
17 | parser.add_argument("--i", dest="ifile", help="input file, PubMed IDs")
18 | parser.add_argument("--nmax", help="list: max to return")
19 | parser.add_argument("--year", help="list: year of publication")
20 | parser.add_argument("--o", dest="ofile", help="output (TSV)")
21 | parser.add_argument("--api_host", default=icite.API_HOST)
22 | parser.add_argument("--api_base_path", default=icite.API_BASE_PATH)
23 | parser.add_argument("-v", "--verbose", default=0, action="count")
24 | parser.add_argument("-q", "--quiet", action="store_true", help="Suppress progress notification.")
25 | args = parser.parse_args()
26 |
27 | # logging.PROGRESS = 15 (custom)
28 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>0 else logging.ERROR if args.quiet else 15))
29 |
30 | base_url='https://'+args.api_host+args.api_base_path
31 |
32 | fout = open(args.ofile, "w", encoding="utf-8") if args.ofile else sys.stdout
33 |
34 | ids=[];
35 | if args.ifile:
36 | fin = open(args.ifile)
37 | while True:
38 | line = fin.readline()
39 | if not line: break
40 | ids.append(line.rstrip())
41 | logging.info('Input IDs: %d'%(len(ids)))
42 | fin.close()
43 | elif args.ids:
44 | ids = re.split(r'[\s,]+', args.ids.strip())
45 |
46 | if args.op == 'get_stats':
47 | if not ids: parser.error(f'Operation requires PMID[s]: {args.op}')
48 | icite.GetStats(ids, base_url, fout)
49 |
50 | else:
51 | parser.error(f"Invalid operation: {args.op}")
52 |
53 |
--------------------------------------------------------------------------------
/BioClients/entrez/Client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Utility app for the NIH NCBI Entrez E-Utils via Entrezpy.
4 | """
5 | ###
6 | import sys,os,re,argparse,time,logging
7 | #
8 | from .. import entrez
9 | #
10 | ##############################################################################
11 | if __name__=='__main__':
12 | OPS = [ "test", "get_esummary", "get_record", ]
13 | parser = argparse.ArgumentParser(description="NIH NCBI Entrez client")
14 | parser.add_argument("op", choices=OPS, help="OPERATION")
15 | parser.add_argument("--i", dest="ifile", help="input IDs file (PMIDs)")
16 | parser.add_argument("--ids", help="input IDs (comma-separated)")
17 | parser.add_argument("--o", dest="ofile", help="output (usually TSV)")
18 | parser.add_argument("--email", dest="email", help="user email address")
19 | #parser.add_argument("--api_host", default=entrez.API_HOST)
20 | #parser.add_argument("--api_base_path", default=entrez.API_BASE_PATH)
21 | parser.add_argument("--skip", type=int, default=0)
22 | parser.add_argument("--nmax", type=int, default=None)
23 | parser.add_argument("-q", "--quiet", action="store_true", help="Suppress progress notification.")
24 | parser.add_argument("-v", "--verbose", default=0, action="count")
25 | args = parser.parse_args()
26 |
27 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>0 else logging.ERROR if args.quiet else 15))
28 |
29 | #base_url = f"https://{args.api_host}{args.api_base_path}"
30 |
31 | fout = open(args.ofile, "w") if args.ofile else sys.stdout
32 |
33 | ids=[]
34 | if args.ifile:
35 | fin = open(args.ifile)
36 | while True:
37 | line = fin.readline()
38 | if not line: break
39 | ids.append(line.rstrip())
40 | fin.close()
41 | elif args.ids:
42 | ids = re.split(r'[,\s]+', args.ids)
43 | logging.info(f"Input IDs: {len(ids)}")
44 |
45 | t0=time.time()
46 |
47 | if args.op == 'test':
48 | entrez.Test(args.email)
49 |
50 | else:
51 | parser.error(f"Invalid operation: {args.op}")
52 |
53 | logging.info(('elapsed time: %s'%(time.strftime('%Hh:%Mm:%Ss',time.gmtime(time.time()-t0)))))
54 |
55 |
--------------------------------------------------------------------------------
/BioClients/cas/Utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Utility functions for the CAS REST API.
4 | """
5 | ###
6 | import sys,os,io,re,csv,json,time,logging,tempfile,tqdm
7 | import requests
8 | import urllib.request,urllib.parse
9 | import pandas as pd
10 | #
11 | #
12 | API_HOST='commonchemistry.cas.org'
13 | API_BASE_PATH='/api'
14 | BASE_URL="https://"+API_HOST+API_BASE_PATH
15 | #
16 | # https://commonchemistry.cas.org/api/detail?uri=substance%2Fpt%2F50000
17 | #############################################################################
18 | def GetRN2Details(ids, base_url=BASE_URL, fout=None):
19 | n_out=0; n_err=0; tags=None; df=None; tq=None;
20 | for i,id_this in enumerate(ids):
21 | uri = urllib.parse.quote(f"substance/pt/{id_this}")
22 | url = (base_url+f"/detail?uri={uri}")
23 | if tq is None: tq = tqdm.tqdm(total=len(ids), unit="mols")
24 | tq.update(n=1)
25 | response = requests.get(url, headers={"Accept": "application/json"})
26 | logging.debug(response.text)
27 | if response.status_code==requests.codes.not_found:
28 | continue
29 | if response.status_code!=requests.codes.ok:
30 | logging.error(f"HTTP status_code: {response.status_code}")
31 | mol = response.json()
32 | if not tags:
33 | tags = list(mol.keys())
34 | for tag in tags[:]:
35 | if type(mol[tag]) in (dict, list, tuple):
36 | tags.remove(tag)
37 | logging.debug(f'Ignoring field: "{tag}"')
38 | if "image" in tags:
39 | tags.remove("image")
40 | logging.debug(f'Ignoring field: "image"')
41 | synonyms = mol["synonyms"] if "synonyms" in mol else []
42 | replacedRns = mol["replacedRns"] if "replacedRns" in mol else []
43 | data_this = {tag:[mol[tag]] for tag in tags}
44 | data_this["synonyms"] = ",".join(synonyms)
45 | data_this["replacedRns"] = ",".join(replacedRns)
46 | df_this = pd.DataFrame(data_this)
47 | if fout is not None:
48 | df_this.to_csv(fout, sep='\t', index=False, header=bool(n_out==0))
49 | else:
50 | df = pd.concat([df, df_this])
51 | n_out+=1
52 | tq.close()
53 | logging.info(f"Input IDs: {len(ids)}; Output records: {n_out}")
54 | return df
55 |
--------------------------------------------------------------------------------
/BioClients/mygene/Client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | ###
3 | # https://mygene.info/
4 | # https://pypi.org/project/mygene/
5 | ###
6 | import sys,os,re,argparse,time,logging
7 | import pandas as pd
8 | import mygene as mg
9 | #
10 | from .. import mygene as bc_mygene
11 | #
12 | #############################################################################
13 | if __name__=='__main__':
14 | epilog = "See https://mygene.info/, https://pypi.org/project/mygene/. Example queries: 'cdk2', 'symbol:cdk2', 'symbol:cdk*'"
15 | ops = ["get", "search"]
16 | parser = argparse.ArgumentParser(description='MyGene API client', epilog=epilog)
17 | parser.add_argument("op", choices=ops, help="OPERATION")
18 | parser.add_argument("--i", dest="ifile", help="input gene IDs or queries")
19 | parser.add_argument("--ids", help="input gene IDs or queries, comma-separated")
20 | parser.add_argument("--o", dest="ofile", help="output (TSV)")
21 | parser.add_argument("--species", default="human", help="species name or taxonomy ID")
22 | parser.add_argument("--fields", default=bc_mygene.FIELDS, help="requested fields")
23 | parser.add_argument("-v", "--verbose", action="count", default=0)
24 | args = parser.parse_args()
25 |
26 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO))
27 |
28 | fout = open(args.ofile, 'w') if args.ofile else sys.stdout
29 |
30 | t0 = time.time()
31 | logging.info('Python: {}; mygene: {}'.format(sys.version.split()[0], mg.__version__))
32 |
33 | if args.ifile:
34 | genes = pd.read_table(args.ifile, header=None, names=["ID"])
35 | ids = list(genes.ID)
36 | elif args.ids:
37 | ids = re.split(r'[,\s]+', args.ids)
38 | else:
39 | parser.error("Input IDs required via --i or --ids.")
40 |
41 | fields = re.split(r'[,\s]+', args.fields)
42 |
43 | if args.op=="get":
44 | bc_mygene.GetGenes(ids, fields, fout)
45 |
46 | elif args.op=="search":
47 | bc_mygene.SearchGenes(ids, args.species, fout)
48 |
49 | else:
50 | parser.error(f"Invalid operation: {args.op}")
51 |
52 | logging.info(('elapsed time: %s'%(time.strftime('%Hh:%Mm:%Ss', time.gmtime(time.time()-t0)))))
53 |
54 |
--------------------------------------------------------------------------------
/BioClients/util/obo/Utils.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 | """
3 | Developed and tested with doid.obo (Disease Ontology)
4 | """
5 | import sys,os,argparse,re,logging
6 |
7 | #############################################################################
8 | def OBO2CSV(fin, fout):
9 | n_in=0; n_rec=0; n_out=0;
10 | tags = ['id', 'name', 'namespace', 'alt_id', 'def', 'subset', 'synonym', 'xref', 'is_a', 'is_obsolete']
11 | fout.write('\t'.join(tags)+'\n')
12 | reclines=[];
13 | while True:
14 | line=fin.readline()
15 | if not line: break
16 | n_in+=1
17 | line=line.strip()
18 | if reclines:
19 | if line == '':
20 | row = OBO2CSV_Record(reclines)
21 | n_rec+=1
22 | vals=[]
23 | is_obsolete=False
24 | for tag in tags:
25 | if tag in row:
26 | val=row[tag]
27 | if tag in ('def', 'synonym'):
28 | val=re.sub(r'^"([^"]*)".*$', r'\1', val)
29 | else:
30 | val=re.sub(r'^"(.*)"$', r'\1', val)
31 | else:
32 | val=''
33 | if tag=='is_obsolete': is_obsolete = bool(val.lower() == "true")
34 | vals.append(val)
35 | if not is_obsolete:
36 | fout.write('\t'.join(vals)+'\n')
37 | n_out+=1
38 | reclines=[];
39 | else:
40 | reclines.append(line)
41 | else:
42 | if line == '[Term]':
43 | reclines.append(line)
44 | else: continue
45 |
46 | logging.info("input lines: %d; input records: %d ; output lines: %d"%(n_in, n_rec, n_out))
47 |
48 | #############################################################################
49 | def OBO2CSV_Record(reclines):
50 | vals={};
51 | if reclines[0] != '[Term]':
52 | logging.error('reclines[0] = "%s"'%reclines[0])
53 | return
54 | for line in reclines[1:]:
55 | line = re.sub(r'\s*!.*$','',line)
56 | k,v = re.split(r':\s*', line, maxsplit=1)
57 | if k=='xref' and not re.match(r'\S+:\S+$',v): continue
58 | if k not in vals: vals[k]=''
59 | vals[k] = '%s%s%s'%(vals[k],(';' if vals[k] else ''),v)
60 | return vals
61 |
62 | #############################################################################
63 |
--------------------------------------------------------------------------------
/BioClients/pubchem/ftp/Client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #############################################################################
3 | ### For accessing files via FTP site; ftp://ftp.ncbi.nlm.nih.gov/pubchem/
4 | #############################################################################
5 | import sys,os,re,time,argparse,logging
6 |
7 | from ... import pubchem
8 |
9 | FTP_URL='ftp://ftp.ncbi.nlm.nih.gov/pubchem'
10 | POLL_WAIT=10
11 | MAX_WAIT=600
12 |
13 | #############################################################################
14 | if __name__=='__main__':
15 | epilog="""
16 | FTP_URL: {0}""".format(FTP_URL)
17 | parser = argparse.ArgumentParser(description="access PubChem FTP site", epilog=epilog)
18 | parser.add_argument("--ftp_get", help="path of file")
19 | parser.add_argument("--ftp_ls", help="path of dir")
20 | parser.add_argument("--ftp_url", default=FTP_URL)
21 | parser.add_argument("--skip", type=int, default=0)
22 | parser.add_argument("--nmax", type=int, default=0)
23 | parser.add_argument("--ftp_ntries", type=int, default=20, help="max tries per ftp-get")
24 | parser.add_argument("--sdf2smi", action="store_true", help="convert SDF to SMILES")
25 | parser.add_argument("--o", dest="ofile", help="output file")
26 | parser.add_argument("-v", "--verbose", default=0, action="count")
27 | args = parser.parse_args()
28 |
29 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO))
30 |
31 | if not (args.ftp_get or args.ftp_ls):
32 | parser.error("--ftp_get or --ftp_ls required.")
33 |
34 | if args.ofile:
35 | fout = open(args.ofile, "w")
36 | else:
37 | fout = sys.stdout
38 |
39 | if args.ftp_get:
40 | url=("%s%s"%(args.ftp_url, args.ftp_get))
41 | if args.sdf2smi:
42 | nbytes = pubchem.ftp.Utils.GetUrlSDF2SMI(url, fout, ntries=args.ftp_ntries, poll_wait=10)
43 | else:
44 | nbytes = pubchem.ftp.Utils.GetUrl(url, fout, ntries=args.ftp_ntries, poll_wait=10)
45 | logging.info("bytes: %.2fMB"%(nbytes/1e6))
46 | elif args.ftp_ls:
47 | url=("%s%s"%(args.ftp_url, args.ftp_ls))
48 | pubchem.ftp.Utils.GetUrl(url, fout, ntries=args.ftp_ntries, poll_wait=10)
49 |
--------------------------------------------------------------------------------
/BioClients/uniprot/Client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Access to Uniprot REST API.
4 | https://www.uniprot.org/help/api
5 | UniprotKB = Uniprot Knowledge Base
6 |
7 | python3 -m BioClients.uniprot.Client --uids Q14790 getData
8 | """
9 | import sys,os,re,argparse,time,logging
10 | #
11 | from .. import uniprot
12 | #
13 | API_HOST='www.uniprot.org'
14 | API_BASE_PATH='/uniprot'
15 | #
16 | ##############################################################################
17 | if __name__=='__main__':
18 | parser = argparse.ArgumentParser(description='Uniprot query client; get data for specified IDs')
19 | ops = ['getData', 'listData']
20 | ofmts = ['txt', 'tab', 'xml', 'rdf', 'fasta', 'gff']
21 | parser.add_argument("op", choices=ops, help='operation')
22 | parser.add_argument("--uids", dest="uids", help="UniProt IDs, comma-separated (ex: Q14790)")
23 | parser.add_argument("--i", dest="ifile", help="input file, UniProt IDs")
24 | parser.add_argument("--o", dest="ofile", help="output (CSV)")
25 | parser.add_argument("--ofmt", default='txt')
26 | parser.add_argument("--api_host", default=API_HOST)
27 | parser.add_argument("--api_base_path", default=API_BASE_PATH)
28 | parser.add_argument("-v", "--verbose", default=0, action="count")
29 | args = parser.parse_args()
30 |
31 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO))
32 |
33 | BASE_URI='https://'+args.api_host+args.api_base_path
34 |
35 | if args.ofile:
36 | fout = open(args.ofile, 'w')
37 | else:
38 | fout = sys.stdout
39 |
40 | t0=time.time()
41 |
42 | uids=[]
43 | if args.ifile:
44 | fin = open(args.ifile)
45 | while True:
46 | line = fin.readline()
47 | if not line: break
48 | uids.append(line.strip())
49 | elif args.uids:
50 | uids = re.split(r'[\s,]+', args.uids.strip())
51 | else:
52 | parser.error('--i or --uids required.')
53 |
54 | if args.op == 'getData':
55 | uniprot.GetData(BASE_URI, uids, args.ofmt, fout)
56 |
57 | else:
58 | parser.error('Unknown operation: %s'%args.op)
59 |
60 | logging.info('elapsed time: %s'%(time.strftime('%Hh:%Mm:%Ss', time.gmtime(time.time()-t0))))
61 |
62 |
--------------------------------------------------------------------------------
/BioClients/hubmap/Client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Utility for HuBMAP REST API.
4 | """
5 | ###
6 | import sys,os,re,json,argparse,time,logging
7 | #
8 | from .. import hubmap as hubmap_utils
9 | #
10 | ##############################################################################
11 | if __name__=='__main__':
12 | epilog="""
13 | Example IDs:
14 | HBM437.HTCQ.742 (donor)
15 | HBM525.JNPV.685 (donor)
16 | HBM292.WDQS.245 (sample)
17 | HBM638.DVCH.366 (sample)
18 | HBM543.RSRV.265 (dataset)
19 | HBM287.WDKX.539 (dataset)
20 | HBM925.SGXL.596 (collection)
21 | HBM876.XNRH.336 (collection)
22 | """
23 | parser = argparse.ArgumentParser(description='HuBMAP REST API client', epilog=epilog)
24 | ops = ['list_entity_types', 'get_entity',
25 | ]
26 | parser.add_argument("op", choices=ops, help='OPERATION')
27 | parser.add_argument("--ids", dest="ids", help="IDs, comma-separated")
28 | parser.add_argument("--i", dest="ifile", help="input file, HuBMAP entity IDs or UUIDs")
29 | parser.add_argument("--o", dest="ofile", help="output file (TSV)")
30 | parser.add_argument("--api_host", default=hubmap_utils.API_HOST)
31 | parser.add_argument("--api_base_path", default=hubmap_utils.API_BASE_PATH)
32 | parser.add_argument("-v", "--verbose", action="count", default=0)
33 | args = parser.parse_args()
34 |
35 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO))
36 |
37 | base_url = f"https://{args.api_host}{args.api_base_path}"
38 |
39 | ids=[]
40 | if args.ifile:
41 | fin = open(args.ifile)
42 | while True:
43 | line = fin.readline()
44 | if not line: break
45 | ids.append(line.strip())
46 | elif args.ids:
47 | ids = re.split('[, ]+', args.ids.strip())
48 |
49 | fout = open(args.ofile, "w+") if args.ofile else sys.stdout
50 |
51 | t0=time.time()
52 |
53 | if args.op == "list_entity_types":
54 | hubmap_utils.ListEntityTypes(base_url, fout)
55 |
56 | elif args.op == "get_entity":
57 | hubmap_utils.GetEntity(ids, base_url, fout)
58 |
59 | else:
60 | parser.error(f"Invalid operation: {args.op}")
61 |
62 | logging.info(f"Elapsed time: {time.strftime('%Hh:%Mm:%Ss', time.gmtime(time.time()-t0))}")
63 |
--------------------------------------------------------------------------------
/BioClients/biomarkerkb/Client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Utility for BiomarkerKB REST API.
4 |
5 | * https://api.biomarkerkb.org/
6 | """
7 | ###
8 | import sys,os,re,json,argparse,time,logging
9 | import pandas as pd
10 | #
11 | from .. import biomarkerkb
12 | #
13 | ##############################################################################
14 | if __name__=='__main__':
15 | epilog="Example BiomarkerKB ID: AN6278-1"
16 | parser = argparse.ArgumentParser(description='BiomarkerKB REST API client', epilog=epilog)
17 | ops = [
18 | "get_biomarker_detail",
19 | "list_biomarker",
20 | "search_biomarker"
21 | ]
22 | parser.add_argument("op", choices=ops, help='OPERATION (select one)')
23 | parser.add_argument("--ids", help="input IDs")
24 | parser.add_argument("--i", dest="ifile", help="input file, IDs")
25 | parser.add_argument("--o", dest="ofile", help="output (TSV)")
26 | parser.add_argument("--query", help="search query (SMILES)")
27 | parser.add_argument("--skip", type=int, default=0)
28 | parser.add_argument("--nmax", type=int, default=None)
29 | parser.add_argument("--api_host", default=biomarkerkb.API_HOST)
30 | parser.add_argument("--api_base_path", default=biomarkerkb.API_BASE_PATH)
31 | parser.add_argument("-v","--verbose", action="count", default=0)
32 | args = parser.parse_args()
33 |
34 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO))
35 |
36 | base_url='https://'+args.api_host+args.api_base_path
37 |
38 | fout = open(args.ofile, 'w') if args.ofile else sys.stdout
39 |
40 | ids=[]
41 | if args.ifile:
42 | fin = open(args.ifile)
43 | while True:
44 | line = fin.readline()
45 | if not line: break
46 | ids.append(line.rstrip())
47 | fin.close()
48 | elif args.ids:
49 | ids = re.split('[, ]+', args.ids.strip())
50 | if len(ids)>0: logging.info('Input IDs: %d'%(len(ids)))
51 |
52 | if args.op[:3]=="get" and not (args.ifile or args.ids):
53 | parser.error(f"--i or --ids required for operation {args.op}.")
54 |
55 | if args.op == "get_biomarker_detail":
56 | biomarkerkb.GetBiomarkerDetail(ids, args.skip, base_url, fout)
57 |
58 | else:
59 | parser.error(f'Invalid operation: {args.op}')
60 |
--------------------------------------------------------------------------------
/BioClients/bioregistry/Utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Bioregistry.io
4 | """
5 | import sys,os,re,time,json,requests,logging
6 | import pandas as pd
7 | #
8 | API_HOST='bioregistry.io'
9 | API_BASE_PATH='/api'
10 | API_BASE_URL='https://'+API_HOST+API_BASE_PATH
11 | #
12 | ##############################################################################
13 | def ListEntities(etype, base_url=API_BASE_URL, fout=None):
14 | "Entities: contributors|contexts|collections|registry|metaregistry"
15 | tags=None; df=None; n_out=0;
16 | response = requests.get(f"{base_url}/{etype}")
17 | logging.debug(json.dumps(response.json(), indent=2))
18 | results = response.json()
19 | for id_this,thing in results.items():
20 | if not tags:
21 | tags = list(thing.keys())
22 | for tag in tags[:]:
23 | if type(thing[tag]) in (list, dict):
24 | logging.info(f"Ignoring field: {tag}")
25 | tags.remove(tag)
26 | data = {"id":[id_this]}
27 | data.update({tag:[thing[tag] if tag in thing else None] for tag in tags})
28 | df_this = pd.DataFrame(data)
29 | if fout is None: df = pd.concat([df, df_this])
30 | else: df_this.to_csv(fout, sep="\t", index=False, header=bool(n_out==0))
31 | n_out += df_this.shape[0]
32 | logging.info(f"n_out ({etype}): {n_out}")
33 | return df
34 |
35 | #############################################################################
36 | def GetReference(ids, prefix, base_url=API_BASE_URL, fout=None):
37 | df=None; n_out=0;
38 | for id_this in ids:
39 | response = requests.get(f"{base_url}/reference/{prefix}:{id_this}")
40 | logging.debug(json.dumps(response.json(), indent=2))
41 | result = response.json()
42 | providers = result["providers"] if "providers" in result else []
43 | for provider,url_this in providers.items():
44 | df_this = pd.DataFrame({"prefix":[prefix], "id":[id_this], "provider_name":[provider], "provider_url":[url_this]})
45 | if fout is None: df = pd.concat([df, df_this])
46 | else: df_this.to_csv(fout, sep="\t", index=False, header=bool(n_out==0))
47 | n_out += df_this.shape[0]
48 | logging.info(f"n_out: {n_out}")
49 | return df
50 |
51 | ##############################################################################
52 |
--------------------------------------------------------------------------------
/BioClients/pubmed/Client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Utility app for the PubMed REST-ful-ish webservices API.
4 | """
5 | ###
6 | import sys,os,re,argparse,time,logging
7 | #
8 | from .. import pubmed
9 | #
10 | ##############################################################################
11 | if __name__=='__main__':
12 | OPS = [
13 | "get_esummary",
14 | "get_record",
15 | ]
16 | parser = argparse.ArgumentParser(description="PubMed webservices client")
17 | parser.add_argument("op", choices=OPS, help="OPERATION")
18 | parser.add_argument("--i", dest="ifile", help="input IDs file (PMIDs)")
19 | parser.add_argument("--ids", help="input IDs (PMIDs) (comma-separated)")
20 | parser.add_argument("--o", dest="ofile", help="output (usually TSV)")
21 | parser.add_argument("--api_host", default=pubmed.API_HOST)
22 | parser.add_argument("--api_base_path", default=pubmed.API_BASE_PATH)
23 | parser.add_argument("--skip", type=int, default=0)
24 | parser.add_argument("--nmax", type=int, default=None)
25 | parser.add_argument("-q", "--quiet", action="store_true", help="Suppress progress notification.")
26 | parser.add_argument("-v", "--verbose", default=0, action="count")
27 | args = parser.parse_args()
28 |
29 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>0 else logging.ERROR if args.quiet else 15))
30 |
31 | base_url = f"https://{args.api_host}{args.api_base_path}"
32 |
33 | fout = open(args.ofile, "w") if args.ofile else sys.stdout
34 |
35 | ids=[]
36 | if args.ifile:
37 | fin = open(args.ifile)
38 | while True:
39 | line = fin.readline()
40 | if not line: break
41 | ids.append(line.rstrip())
42 | fin.close()
43 | elif args.ids:
44 | ids = re.split(r'[,\s]+', args.ids)
45 | logging.info(f"Input IDs: {len(ids)}")
46 |
47 | t0=time.time()
48 |
49 | if args.op == 'get_esummary':
50 | pubmed.GetESummary(ids, args.skip, args.nmax, base_url, fout)
51 |
52 | elif args.op == 'get_record':
53 | pubmed.GetRecord(ids, args.skip, args.nmax, base_url, fout)
54 |
55 | else:
56 | parser.error(f"Invalid operation: {args.op}")
57 |
58 | logging.info(('elapsed time: %s'%(time.strftime('%Hh:%Mm:%Ss',time.gmtime(time.time()-t0)))))
59 |
60 |
--------------------------------------------------------------------------------
/doc/medline.md:
--------------------------------------------------------------------------------
1 | # `BioClients.medline`
2 |
3 | ## Medline Plus
4 |
5 | Note that the Medline Plus Genetics resource has superceded the
6 | Genetics Home Reference (GHR).
7 |
8 | * [Medline Plus](https://medlineplus.gov/) | [Medline Plus Web Services](https://medlineplus.gov/about/developers/webservices/)
9 | * [Medline Plus Genetics](https://medlineplus.gov/genetics) | [Medline Plus Genetics API](https://medlineplus.gov/about/developers/geneticsdatafilesapi/)
10 | * [Medline Plus Connect](https://medlineplus.gov/connect/overview.html) | [Medline Plus Connect Web Service](https://medlineplus.gov/connect/service.html)
11 |
12 | ## Dependencies
13 |
14 | * Python package `xmltodict`
15 |
16 | ## Example commands
17 |
18 | ```
19 | $ python3 -m BioClients.medline.genetics.Client -h
20 | usage: Client.py [-h] [--i IFILE] [--o OFILE] [--ids IDS] [--api_host API_HOST]
21 | [--api_base_path API_BASE_PATH]
22 | [--download_host DOWNLOAD_HOST]
23 | [--download_base_path DOWNLOAD_BASE_PATH]
24 | [--summary_url SUMMARY_URL] [-v]
25 | {search,list_conditions,list_genes,get_condition_genes}
26 |
27 | MedlinePlus Genetics REST API client
28 |
29 | positional arguments:
30 | {search,list_conditions,list_genes,get_condition_genes}
31 | OPERATION (select one)
32 |
33 | optional arguments:
34 | -h, --help show this help message and exit
35 | --i IFILE input term file (one per line)
36 | --o OFILE output (TSV)
37 | --ids IDS term list (comma-separated)
38 | --api_host API_HOST
39 | --api_base_path API_BASE_PATH
40 | --download_host DOWNLOAD_HOST
41 | --download_base_path DOWNLOAD_BASE_PATH
42 | --summary_url SUMMARY_URL
43 | -v, --verbose
44 |
45 | Example conditions: allergic-asthma, alzheimer-disease, parkinson-disease,
46 | rapid-onset-dystonia-parkinsonism, type-1-diabetes, type-2-diabetes
47 | ```
48 |
49 | ```
50 | python3 -m BioClients.medline.genetics.Client list_conditions
51 | python3 -m BioClients.medline.genetics.Client search --ids "Asthma"
52 | python3 -m BioClients.medline.genetics.Client search --ids "Alzheimer"
53 | python3 -m BioClients.medline.genetics.Client get_condition_genes --ids "parkinson-disease"
54 | ```
55 |
--------------------------------------------------------------------------------
/BioClients/jensenlab/Client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | See: https://api.jensenlab.org/About
4 | """
5 | ###
6 | import sys,os,re,argparse,time,json,logging
7 | #
8 | from .. import jensenlab
9 | #
10 | ##############################################################################
11 | if __name__=='__main__':
12 | CHANNELS= ['Knowledge', 'Experiments', 'Textmining', 'All']
13 | parser = argparse.ArgumentParser(description='JensenLab REST API client')
14 | ops = ['get_disease_genes', 'get_comention_genes' ]
15 | parser.add_argument("op", choices=ops, help='OPERATION')
16 | parser.add_argument("--i", dest="ifile", help="input IDs (for diseases should be DOIDs, e.g. \"DOID:10652\"")
17 | parser.add_argument("--channel", choices=CHANNELS, default="Textmining", help="source channel")
18 | parser.add_argument("--ids", help="input IDs (comma-separated)")
19 | parser.add_argument("--o", dest="ofile", help="output (TSV)")
20 | parser.add_argument("--nmax", type=int, default=100, help="max hits")
21 | parser.add_argument("--api_host", default=jensenlab.API_HOST)
22 | parser.add_argument("--api_base_path", default=jensenlab.API_BASE_PATH)
23 | parser.add_argument("-v", "--verbose", default=0, action="count")
24 | args = parser.parse_args()
25 |
26 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO))
27 |
28 | base_url = 'https://'+args.api_host+args.api_base_path
29 |
30 | fout = open(args.ofile,"w") if args.ofile else sys.stdout
31 |
32 | t0 = time.time()
33 |
34 | ids=[];
35 | if args.ifile:
36 | with open(args.ifile) as fin:
37 | while True:
38 | line = fin.readline()
39 | if not line: break
40 | if line.rstrip(): ids.append(line.rstrip())
41 | elif args.ids:
42 | ids = re.split(r'[,\s]+', args.ids)
43 | logging.info('Input queries: %d'%(len(ids)))
44 |
45 | if args.op == "get_disease_genes":
46 | jensenlab.GetDiseaseGenes(args.channel, ids, args.nmax, base_url, fout)
47 |
48 | elif args.op == "get_comention_genes":
49 | jensenlab.GetPubmedComentionGenes(ids, base_url, fout)
50 |
51 | else:
52 | parser.error(f"Invalid operation: {args.op}")
53 |
54 | logging.info(('elapsed time: %s'%(time.strftime('%Hh:%Mm:%Ss', time.gmtime(time.time()-t0)))))
55 |
--------------------------------------------------------------------------------
/doc/pubmed.md:
--------------------------------------------------------------------------------
1 | # `BioClients.pubmed`
2 |
3 | ## PubMed
4 |
5 | * (PubMed)[https://pubmed.ncbi.nlm.nih.gov/]
6 | *
7 | *
8 |
9 | ### PubMed Web Services Client
10 |
11 | `get_record` returns selected fields: title, abstract, firstAuthorLastName, journal, and year.
12 |
13 | ```
14 | $ python3 -m BioClients.pubmed.Client -h
15 | usage: Client.py [-h] [--i IFILE] [--ids IDS] [--o OFILE] [--api_host API_HOST]
16 | [--api_base_path API_BASE_PATH] [--skip SKIP] [--nmax NMAX] [-q] [-v]
17 | {get_esummary,get_record}
18 |
19 | PubMed webservices client
20 |
21 | positional arguments:
22 | {get_esummary,get_record}
23 | OPERATION
24 |
25 | options:
26 | -h, --help show this help message and exit
27 | --i IFILE input IDs file (PMIDs)
28 | --ids IDS input IDs (PMIDs) (comma-separated)
29 | --o OFILE output (usually TSV)
30 | --api_host API_HOST
31 | --api_base_path API_BASE_PATH
32 | --skip SKIP
33 | --nmax NMAX
34 | -q, --quiet Suppress progress notification.
35 | -v, --verbose
36 | ```
37 |
38 | ### PubMed XML Processing App
39 |
40 | Parse, process Entrez PubMed XML (summaries or full), normally obtained via
41 | Entrez eUtils, eDirect CLI or Perl API.
42 |
43 | Note that other Entrez XML (e.g. PubChem) very similar.
44 |
45 | ```
46 | $ python3 -m BioClients.pubmed.App_XML -h
47 | usage: App_XML.py [-h] --i IFILE [--ids IDS] [--idfile IDFILE] [--nmax NMAX] [--o OFILE]
48 | [--odir ODIR] [-v]
49 | {summary2tsv,summary2abstract,full2tsv,full2abstract,full2authorlist}
50 |
51 | process PubMed XML (summaries or full), typically obtained via Entrez eUtils.
52 |
53 | positional arguments:
54 | {summary2tsv,summary2abstract,full2tsv,full2abstract,full2authorlist}
55 | operation
56 |
57 | options:
58 | -h, --help show this help message and exit
59 | --i IFILE input file, XML
60 | --ids IDS PubMed IDs, comma-separated (ex:25533513)
61 | --idfile IDFILE input file, PubMed IDs
62 | --nmax NMAX max to return
63 | --o OFILE output (TSV)
64 | --odir ODIR output directory
65 | -v, --verbose
66 | ```
67 |
--------------------------------------------------------------------------------
/BioClients/maayanlab/harmonizome/Utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Utility functions for MaayanLab REST APIs.
4 | Alternately could use harmonizomeapi:
5 | http://amp.pharm.mssm.edu/Harmonizome/static/harmonizomeapi.py
6 |
7 | https://amp.pharm.mssm.edu/Harmonizome/api/1.0/gene/NANOG
8 | https://amp.pharm.mssm.edu/Harmonizome/api/1.0/gene/NANOG?showAssociations=true
9 | """
10 | import sys,os,re,json,logging
11 |
12 | from ...util import rest
13 | #
14 | API_HOST='amp.pharm.mssm.edu'
15 | API_BASE_PATH='/Harmonizome/api/1.0'
16 | BASE_URL='https://'+API_HOST+API_BASE_PATH
17 | #
18 | ##############################################################################
19 | def GetGene(ids, base_url=BASE_URL, fout=None):
20 | """Gene symbols expected, e.g. NANOG."""
21 | n_out=0; tags=None;
22 | for id_this in ids:
23 | gene = rest.Utils.GetURL(base_url+'/gene/{0}'.format(id_this), parse_json=True)
24 | logging.debug(json.dumps(gene, indent=2))
25 | if not tags:
26 | tags = list(gene.keys())
27 | fout.write("\t".join(tags)+"\n")
28 | vals = [(str(gene[tag]) if tag in gene else "") for tag in tags]
29 | fout.write("\t".join(vals)+"\n")
30 | n_out+=1
31 | logging.info("n_out: %d"%(n_out))
32 |
33 | ##############################################################################
34 | def GetGeneAssociations(ids, base_url=BASE_URL, fout=None):
35 | """Gene symbols expected, e.g. NANOG."""
36 | n_out=0; gene_tags=[]; assn_tags=[];
37 | for id_this in ids:
38 | gene = rest.Utils.GetURL(base_url+'/gene/{0}?showAssociations=true'.format(id_this), parse_json=True)
39 | assns = gene["associations"] if "associations" in gene else []
40 | if not assns: continue
41 | if not gene_tags:
42 | for tag in gene.keys():
43 | if type(gene[tag]) not in (list,dict): gene_tags.append(tag)
44 | for assn in assns:
45 | logging.debug(json.dumps(assn, indent=2))
46 | if not assn_tags:
47 | assn_tags = list(assn.keys())
48 | fout.write("\t".join(gene_tags+assn_tags)+"\n")
49 | vals = [(str(gene[tag]) if tag in gene else "") for tag in gene_tags]+[(str(assn[tag]) if tag in assn else "") for tag in assn_tags]
50 | fout.write("\t".join(vals)+"\n")
51 | n_out+=1
52 | logging.info("n_out: %d"%(n_out))
53 |
54 | ##############################################################################
55 |
--------------------------------------------------------------------------------
/BioClients/tcga/Client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | See: https://docs.gdc.cancer.gov/API/Users_Guide/
4 | """
5 | ###
6 | import sys,os,re,argparse,time,json,logging
7 | #
8 | from .. import tcga
9 | #
10 | API_HOST='api.gdc.cancer.gov'
11 | API_BASE_PATH=''
12 | #
13 | ##############################################################################
14 | if __name__=='__main__':
15 | parser = argparse.ArgumentParser(description='TCGA REST API client')
16 | ops = [ 'list_projects', 'list_cases', 'list_files', 'list_annotations' ]
17 | parser.add_argument("op", choices=ops, help='operation')
18 | parser.add_argument("--i", dest="ifile", help="input IDs")
19 | parser.add_argument("--ids", help="input IDs (comma-separated)")
20 | parser.add_argument("--o", dest="ofile", help="output (TSV)")
21 | parser.add_argument("--skip", type=int, default=0)
22 | parser.add_argument("--nmax", type=int, default=None)
23 | parser.add_argument("--api_host", default=API_HOST)
24 | parser.add_argument("--api_base_path", default=API_BASE_PATH)
25 | parser.add_argument("-v", "--verbose", default=0, action="count")
26 | args = parser.parse_args()
27 |
28 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO))
29 |
30 | base_url = 'https://'+args.api_host+args.api_base_path
31 |
32 | fout = open(args.ofile,"w") if args.ofile else sys.stdout
33 |
34 | t0=time.time()
35 |
36 | ids=[];
37 | if args.ifile:
38 | fin = open(args.ifile)
39 | while True:
40 | line = fin.readline()
41 | if not line: break
42 | if line.rstrip(): ids.append(line.rstrip())
43 | fin.close()
44 | elif args.ids:
45 | ids = re.split(r'[,\s]+', args.ids)
46 | logging.info('Input queries: %d'%(len(ids)))
47 |
48 | if args.op == "list_projects":
49 | tcga.ListProjects(base_url, args.skip, args.nmax, fout)
50 |
51 | elif args.op == "list_cases":
52 | tcga.ListCases(base_url, args.skip, args.nmax, fout)
53 |
54 | elif args.op == "list_files":
55 | tcga.ListFiles(base_url, args.skip, args.nmax, fout)
56 |
57 | elif args.op == "list_annotations":
58 | tcga.ListAnnotations(base_url, args.skip, args.nmax, fout)
59 |
60 | else:
61 | parser.error("Invalid operation: %s"%args.op)
62 |
63 | logging.info(('elapsed time: %s'%(time.strftime('%Hh:%Mm:%Ss', time.gmtime(time.time()-t0)))))
64 |
--------------------------------------------------------------------------------
/BioClients/drugcentral/Test.py:
--------------------------------------------------------------------------------
1 | import os,sys,unittest
2 |
3 | from .. import drugcentral
4 |
5 | class TestAPI(unittest.TestCase):
6 |
7 | def __init__(self, methodName=""):
8 | super().__init__(methodName)
9 | self.params = drugcentral.ReadParamFile(os.environ['HOME']+"/.drugcentral.yaml")
10 | self.dbcon = drugcentral.Utils.Connect(self.params['DBHOST'], self.params['DBPORT'], self.params['DBNAME'], self.params['DBUSR'], self.params['DBPW'])
11 |
12 | def test_Version(self):
13 | self.assertTrue(type(drugcentral.Version(self.dbcon)) is not None)
14 |
15 | def test_Version_02(self):
16 | self.assertEqual(drugcentral.Version(self.dbcon).shape[0], 1)
17 |
18 | def test_Describe(self):
19 | self.assertTrue(drugcentral.Describe(self.dbcon).shape[0]>10)
20 |
21 | def test_Counts(self):
22 | self.assertTrue(drugcentral.Counts(self.dbcon).shape[0]>10)
23 |
24 | def test_ListStructures(self):
25 | df = drugcentral.ListStructures(self.dbcon)
26 | self.assertTrue(df.shape[0]>4000)
27 |
28 | def test_ListStructures2Smiles(self):
29 | df = drugcentral.ListStructures2Smiles(self.dbcon)
30 | self.assertTrue(df.shape[0]>4000)
31 |
32 | def test_ListProducts(self):
33 | df = drugcentral.ListProducts(self.dbcon)
34 | self.assertTrue(df.shape[0]>4000)
35 |
36 | def test_ListActiveIngredients(self):
37 | df = drugcentral.ListActiveIngredients(self.dbcon)
38 | self.assertTrue(df.shape[0]>4000)
39 |
40 | def test_ListIndications(self):
41 | self.assertTrue(drugcentral.ListIndications(self.dbcon).shape[0]>100)
42 |
43 | def test_SearchIndications(self):
44 | self.assertTrue(drugcentral.SearchIndications(self.dbcon, "Alzheimer").shape[0]>0)
45 |
46 | def test_GetStructure(self):
47 | self.assertTrue(drugcentral.GetStructure(self.dbcon, ["1725"]).shape[0]==1)
48 |
49 | def test_GetStructureBySynonym(self):
50 | self.assertTrue(drugcentral.GetStructureBySynonym(self.dbcon, ["zantac"]).shape[0]==1)
51 |
52 | def test_GetStructureIds(self):
53 | self.assertTrue(drugcentral.GetStructureIds(self.dbcon, ["1725"]).shape[0]>5)
54 |
55 | def test_GetStructureProducts(self):
56 | self.assertTrue(drugcentral.GetStructureProducts(self.dbcon, ["1725"]).shape[0]>10)
57 |
58 | #############################################################################
59 | if __name__ == '__main__':
60 | unittest.main(verbosity=2)
61 |
--------------------------------------------------------------------------------
/BioClients/glygen/Client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Utility for GlyGen REST API.
4 |
5 | * https://api.glygen.org/
6 | """
7 | ###
8 | import sys,os,re,json,argparse,time,logging
9 | import pandas as pd
10 | #
11 | from .. import glygen
12 | #
13 | ##############################################################################
14 | if __name__=='__main__':
15 | epilog="Example GlyTouCan Accession IDs: G00053MO"
16 | parser = argparse.ArgumentParser(description='GlyGen REST API client', epilog=epilog)
17 | ops = [
18 | "get_glycans",
19 | "list_glycans",
20 | "search_glycans"
21 | ]
22 | parser.add_argument("op", choices=ops, help='OPERATION (select one)')
23 | parser.add_argument("--ids", help="input IDs")
24 | parser.add_argument("--i", dest="ifile", help="input file, IDs")
25 | parser.add_argument("--o", dest="ofile", help="output (TSV)")
26 | parser.add_argument("--query", help="search query (SMILES)")
27 | parser.add_argument("--skip", type=int, default=0)
28 | parser.add_argument("--nmax", type=int, default=None)
29 | parser.add_argument("--api_host", default=glygen.API_HOST)
30 | parser.add_argument("--api_base_path", default=glygen.API_BASE_PATH)
31 | parser.add_argument("-v","--verbose", action="count", default=0)
32 | args = parser.parse_args()
33 |
34 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO))
35 |
36 | base_url='https://'+args.api_host+args.api_base_path
37 |
38 | fout = open(args.ofile, 'w') if args.ofile else sys.stdout
39 |
40 | ids=[]
41 | if args.ifile:
42 | fin = open(args.ifile)
43 | while True:
44 | line = fin.readline()
45 | if not line: break
46 | ids.append(line.rstrip())
47 | fin.close()
48 | elif args.ids:
49 | ids = re.split('[, ]+', args.ids.strip())
50 | if len(ids)>0: logging.info('Input IDs: %d'%(len(ids)))
51 |
52 | if args.op[:3]=="get" and not (args.ifile or args.ids):
53 | parser.error(f"--i or --ids required for operation {args.op}.")
54 |
55 | if args.op == "get_glycans":
56 | glygen.GetGlycans(ids, args.skip, base_url, fout)
57 |
58 | elif args.op == "list_glycans":
59 | glygen.ListGlycans(args.skip, base_url, fout)
60 |
61 | elif args.op == "search_glycans":
62 | parser.error(f'Not yet implemented: {args.op}')
63 | #glygen.SearchGlycans(args.query, base_url, fout)
64 |
65 | else:
66 | parser.error(f'Invalid operation: {args.op}')
67 |
--------------------------------------------------------------------------------
/BioClients/medline/connect/Utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | ##############################################################################
3 | ### Medline utilities - access SNOMED and ICD codes
4 | ### https://medlineplus.gov/connect/technical.html
5 | ### https://medlineplus.gov/connect/service.html
6 | ##############################################################################
7 | ### https://apps.nlm.nih.gov/medlineplus/services/mpconnect_service.cfm
8 | ### Two required parameters:
9 | ### 1. Code System (one of):
10 | ### ICD-10-CM: mainSearchCriteria.v.cs=2.16.840.1.113883.6.90
11 | ### ICD-9-CM: mainSearchCriteria.v.cs=2.16.840.1.113883.6.103
12 | ### SNOMED_CT: mainSearchCriteria.v.cs=2.16.840.1.113883.6.96
13 | ### NDC: mainSearchCriteria.v.cs=2.16.840.1.113883.6.69
14 | ### RXNORM: mainSearchCriteria.v.cs=2.16.840.1.113883.6.88
15 | ### LOINC: mainSearchCriteria.v.cs=2.16.840.1.113883.6.1
16 | ### 2. Code:
17 | ### mainSearchCriteria.v.c=250.33
18 | ###
19 | ### Content format:
20 | ### XML (default): knowledgeResponseType=text/xml
21 | ### JSON: knowledgeResponseType=application/json
22 | ### JSONP: knowledgeResponseType=application/javascript&callback=CallbackFunction
23 | ### where CallbackFunction is a name you give the call back function.
24 | ##############################################################################
25 | import sys,os,re,time,logging
26 | import urllib.parse,json
27 | #
28 | from ...util import rest
29 | #
30 | CODESYSTEMS = {
31 | 'SNOWMEDCT': '2.16.840.1.113883.6.96',
32 | 'ICD9CM': '2.16.840.1.113883.6.103',
33 | 'ICD10CM': '2.16.840.1.113883.6.90',
34 | 'NDC' : '2.16.840.1.113883.6.69',
35 | 'RXNORM': '2.16.840.1.113883.6.88',
36 | 'LOINC' : '2.16.840.1.113883.6.1'
37 | }
38 | #
39 | API_HOST='apps.nlm.nih.gov'
40 | API_BASE_PATH='/medlineplus/services/mpconnect_service.cfm'
41 | API_BASE_URL='https://'+API_HOST+API_BASE_PATH
42 | #
43 | ##############################################################################
44 | def GetCode(codes, codesys, base_url=API_BASE_URL, fout=None):
45 | url=base_url
46 | url+=('?knowledgeResponseType=application/json')
47 | url+=('&mainSearchCriteria.v.cs='+CODESYSTEMS[codesys])
48 | for code in codes:
49 | url_this =url+('&mainSearchCriteria.v.c='+code)
50 | rval = rest.Utils.GetURL(url_this, parse_json=True)
51 | logging.debug(json.dumps(rval, sort_keys=True, indent=2))
52 |
53 | ##############################################################################
54 |
--------------------------------------------------------------------------------
/BioClients/pdb/Client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Utility for PDB REST API.
4 | https://www.rcsb.org/docs/programmatic-access/web-services-overview
5 | https://data.rcsb.org/redoc/
6 | """
7 | ###
8 | import sys,os,re,json,argparse,time,logging
9 | #
10 | from .. import pdb as pdb_utils
11 | #
12 | ##############################################################################
13 | if __name__=='__main__':
14 | epilog="""
15 | Example entry IDs: 3ERT, 3TTC
16 | Example chemical IDs: CFF
17 | """
18 | parser = argparse.ArgumentParser(description='PDB REST API client', epilog=epilog)
19 | ops = ['list_entrys',
20 | 'get_entrys', 'get_chemicals',
21 | ]
22 | parser.add_argument("op", choices=ops, help='OPERATION')
23 | parser.add_argument("--ids", dest="ids", help="PDB entry IDs, comma-separated")
24 | parser.add_argument("--i", dest="ifile", help="input file, PDB entry IDs")
25 | parser.add_argument("--druglike", action="store_true", help="druglike chemicals only (organic; !polymer; !monoatomic)")
26 | parser.add_argument("--o", dest="ofile", help="output file (TSV)")
27 | parser.add_argument("--api_host", default=pdb_utils.API_HOST)
28 | parser.add_argument("--api_base_path", default=pdb_utils.API_BASE_PATH)
29 | parser.add_argument("-v", "--verbose", action="count", default=0)
30 | args = parser.parse_args()
31 |
32 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO))
33 |
34 | base_url = f"https://{args.api_host}{args.api_base_path}"
35 |
36 | ids=[]
37 | if args.ifile:
38 | fin = open(args.ifile)
39 | while True:
40 | line = fin.readline()
41 | if not line: break
42 | ids.append(line.strip())
43 | elif args.ids:
44 | ids = re.split('[, ]+', args.ids.strip())
45 |
46 | fout = open(args.ofile, "w+") if args.ofile else sys.stdout
47 |
48 | t0=time.time()
49 |
50 | if args.op == "get_entrys":
51 | if not ids: parser.error('ID[s] required.')
52 | pdb_utils.GetEntrys(ids, base_url, fout)
53 |
54 | elif args.op == "get_chemicals":
55 | if not ids: parser.error('ID[s] required.')
56 | pdb_utils.GetChemicals(ids, args.druglike, base_url, fout)
57 |
58 | elif args.op == "list_entrys":
59 | pdb_utils.ListEntrys(base_url, fout)
60 |
61 | else:
62 | parser.error(f"Invalid operation: {args.op}")
63 |
64 | logging.info(f"Elapsed time: {time.strftime('%Hh:%Mm:%Ss', time.gmtime(time.time()-t0))}")
65 |
--------------------------------------------------------------------------------
/BioClients/jensenlab/Utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Utility functions for JensenLab REST APIs.
4 | https://api.jensenlab.org/Textmining?type1=-26&id1=DOID:10652&type2=9606&limit=10&format=json
5 | https://api.jensenlab.org/Textmining?query=jetlag[tiab]%20OR%20jet-lag[tiab]&type2=9606&limit=10&format=json
6 | https://api.jensenlab.org/Knowledge?type1=-26&id1=DOID:10652&type2=9606&limit=10&format=json
7 | https://api.jensenlab.org/Experiments?type1=-26&id1=DOID:10652&type2=9606&limit=10&format=json
8 | """
9 | import sys,os,re,json,time,logging
10 | import pandas as pd
11 |
12 | from ..util import rest
13 | #
14 | API_HOST='api.jensenlab.org'
15 | API_BASE_PATH=''
16 | BASE_URL='https://'+API_HOST+API_BASE_PATH
17 | #
18 | ##############################################################################
19 | def GetDiseaseGenes(channel, ids, nmax, base_url=BASE_URL, fout=None):
20 | tags=[]; df=pd.DataFrame();
21 | for id_this in ids:
22 | rval = rest.GetURL(base_url+f'/{channel}?type1=-26&id1={id_this}&type2=9606&limit={nmax}&format=json', parse_json=True)
23 | genes = rval[0] #dict
24 | ensgs = list(genes.keys())
25 | flag = rval[1] #?
26 | for ensg in ensgs:
27 | gene = genes[ensg]
28 | logging.debug(json.dumps(gene, indent=2))
29 | if not tags: tags = list(gene.keys())
30 | df = pd.concat([df, pd.DataFrame({tags[j]:[gene[tags[j]]] for j in range(len(tags))})])
31 | if fout: df.to_csv(fout, sep="\t", index=False)
32 | logging.info("n_out: {}".format(df.shape[0]))
33 | return df
34 |
35 | ##############################################################################
36 | def GetPubmedComentionGenes(ids, base_url=BASE_URL, fout=None):
37 | """Search by co-mentioned terms."""
38 | tags=[]; df=pd.DataFrame();
39 | for id_this in ids:
40 | rval = rest.GetURL(base_url+f'/Textmining?query={id_this}[tiab]&type2=9606&limit=10&format=json', parse_json=True)
41 | genes = rval[0] #dict
42 | ensgs = list(genes.keys())
43 | flag = rval[1] #?
44 | for ensg in ensgs:
45 | gene = genes[ensg]
46 | logging.debug(json.dumps(gene, indent=2))
47 | if not tags: tags = list(gene.keys())
48 | df = pd.concat([df, pd.DataFrame({tags[j]:[gene[tags[j]]] for j in range(len(tags))})])
49 | if fout: df.to_csv(fout, sep="\t", index=False)
50 | logging.info("n_out: {}".format(df.shape[0]))
51 | return df
52 |
53 | ##############################################################################
54 |
--------------------------------------------------------------------------------
/BioClients/ncbo/Utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | http://data.bioontology.org/documentation
4 | The National Center for Biomedical Ontology was founded as one of the National Centers for Biomedical Computing, supported by the NHGRI, the NHLBI, and the NIH Common Fund.
5 |
6 | An API Key is required to access any API call. It can be provided in three ways:
7 |
8 | 1. Using the apikey query string parameter.
9 | 2. Providing an Authorization header: Authorization: apikey token=your_apikey (replace `your_apikey` with your actual key)
10 | 3. When using a web browser to explore the API, if you provide your API Key once using method 1, it will be stored in a cookie for subsequent requests. You can override this by providing a different API Key in a new call.
11 | """
12 | ###
13 | import sys,os,re,json,requests,urllib.parse,logging,time
14 | import pandas as pd
15 | #
16 | from ..util import rest
17 | #
18 | API_HOST="data.bioontology.org"
19 | API_BASE_PATH=""
20 | #
21 | #############################################################################
22 | def RecommendOntologies(base_url, api_key, texts, fout):
23 | """This API call designed for text, not necessarily single terms."""
24 | #input_type={1|2} // default = 1. 1 means that the input type is text. 2 means that the input type is a list of comma separated keywords.
25 | tags=[]; df=pd.DataFrame(); n_err=0;
26 | resultTags=["coverageResult", "specializationResult", "acceptanceResult", "detailResult"];
27 | headers = {"Authorization":f"apikey token={api_key}"}
28 | for text in texts:
29 | url_this = base_url+f"/recommender?input={urllib.parse.quote(text)}"
30 | url_this += "&input_type=2"
31 | url_this += "&display_context=false&display_links=false"
32 | logging.debug(url_this)
33 | rval = requests.get(url_this, headers=headers)
34 | if not rval.ok:
35 | logging.error(f'{rval.status_code} : "{text}"')
36 | n_err+=1
37 | continue
38 | results = rval.json()
39 | for result in results:
40 | logging.debug(json.dumps(result, indent=2))
41 | if not tags:
42 | tags = list(result.keys())
43 | df_this = pd.DataFrame({tags[j]:([str(result[tags[j]])] if tags[j] in result else ['']) for j in range(len(tags))})
44 | df = pd.concat([df, df_this])
45 | if fout: df.to_csv(fout, sep="\t", index=False)
46 | logging.info(f"n_texts: {len(texts)}; n_out: {df.shape[0]}; n_err: {n_err}")
47 | return df
48 |
49 | #############################################################################
50 |
--------------------------------------------------------------------------------
/BioClients/amp/t2d/Utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Utilities for the AMP T2D REST API.
4 | http://www.type2diabetesgenetics.org/
5 | http://www.kp4cd.org/apis/t2d
6 | http://52.54.103.84/kpn-kb-openapi/
7 |
8 | DEPICT software (Pers, TH, et al., 2015)
9 | """
10 | ###
11 | import sys,os,re,json,time,logging
12 | #
13 | from ...util import rest
14 | #
15 | #############################################################################
16 | def ListTissues(base_url, fout):
17 | rval = rest.Utils.GetURL(base_url+'/graph/tissue/list/object', parse_json=True)
18 | tissues = rval["data"] if "data" in rval else []
19 | tags = None; n_out=0;
20 | for tissue in tissues:
21 | logging.debug(json.dumps(tissue, indent=2))
22 | if not tags:
23 | tags = tissue.keys()
24 | fout.write('\t'.join(tags)+'\n')
25 | vals = [str(tissue[tag]) if tag in tissue else '' for tag in tags]
26 | fout.write('\t'.join(vals)+'\n')
27 | n_out += 1
28 | logging.info("n_out: %d"%(n_out))
29 |
30 | #############################################################################
31 | def ListPhenotypes(base_url, fout):
32 | rval=rest.Utils.GetURL(base_url+'/graph/phenotype/list/object', parse_json=True)
33 | phenotypes = rval["data"] if "data" in rval else []
34 | tags = None; n_out=0;
35 | for phenotype in phenotypes:
36 | logging.debug(json.dumps(phenotype, indent=2))
37 | if not tags:
38 | tags = phenotype.keys()
39 | fout.write('\t'.join(tags)+'\n')
40 | vals = [str(phenotype[tag]) if tag in phenotype else '' for tag in tags]
41 | fout.write('\t'.join(vals)+'\n')
42 | n_out += 1
43 | logging.info("n_out: %d"%(n_out))
44 |
45 | ##############################################################################
46 | def DepictGenePathway(base_url, gene, phenotype, max_pval, fout):
47 | url = base_url+('/testcalls/depict/genepathway/object?gene=%s&phenotype=%s<_value=%f'%(gene, phenotype, max_pval))
48 | rval = rest.Utils.GetURL(url, parse_json=True)
49 | pathways = rval["data"] if "data" in rval else []
50 | tags = None; n_out=0;
51 | for pathway in pathways:
52 | logging.debug(json.dumps(pathway, indent=2))
53 | if not tags:
54 | tags = pathway.keys()
55 | fout.write('\t'.join(tags)+'\n')
56 | vals = [str(pathway[tag]) if tag in pathway else '' for tag in tags]
57 | fout.write('\t'.join(vals)+'\n')
58 | n_out += 1
59 | logging.info("n_out: %d"%(n_out))
60 |
61 | ##############################################################################
62 |
--------------------------------------------------------------------------------
/doc/gwascatalog.md:
--------------------------------------------------------------------------------
1 | # `BioClients.gwascatalog`
2 |
3 | ## GWAS Catalog
4 |
5 | GWAS Catalog REST API client.
6 |
7 | __Version 1:__
8 | *
9 | *
10 | *
11 | *
12 |
13 | __Version 2:__
14 | -
15 | - _"GWAS RESTful API V2 has been released with various enhancements & improvements over GWAS RESTful API V1. V1 is deprecated and will be retired no later than May 2026."_
16 | -
17 |
18 |
19 | ## Example commands
20 |
21 | ```
22 | python3 -m BioClients.gwascatalog.Client list_studies_v2 --o gwascatalog_studies.tsv
23 | ```
24 |
25 | ```
26 | python3 -m BioClients.gwascatalog.Client get_studyAssociations_v2 --ids "GCST004364,GCST000227"
27 | ```
28 |
29 | ```
30 | python3 -m BioClients.gwascatalog.Client get_snps_v2 --ids "rs6085920,rs2273833,rs6684514,rs144991356"
31 | ```
32 |
33 | ```
34 | python -m BioClients.gwascatalog.Client -h
35 | usage: Client.py [-h] [--ids IDS]
36 | [--searchtype {pubmedmid,gcst,efotrait,efouri,accessionid,rs}]
37 | [--i IFILE] [--o OFILE] [--skip SKIP] [--nmax NMAX] [--api_host API_HOST]
38 | [--api_base_path API_BASE_PATH] [--api_base_path_v2 API_BASE_PATH_V2]
39 | [-v] [-q]
40 | {get_metadata_v2,list_studies,list_studies_v2,get_studyAssociations,get_studyAssociations_v2,get_snps,get_snps_v2,search_studies,search_studies_v2}
41 |
42 | GWAS Catalog REST API (V1|V2) client
43 |
44 | positional arguments:
45 | {get_metadata_v2,list_studies,list_studies_v2,get_studyAssociations,get_studyAssociations_v2,get_snps,get_snps_v2,search_studies,search_studies_v2}
46 | operation
47 |
48 | options:
49 | -h, --help show this help message and exit
50 | --ids IDS IDs, comma-separated
51 | --searchtype {pubmedmid,gcst,efotrait,efouri,accessionid,rs}
52 | ID type
53 | --i IFILE input file, IDs
54 | --o OFILE output (TSV)
55 | --skip SKIP
56 | --nmax NMAX
57 | --api_host API_HOST
58 | --api_base_path API_BASE_PATH
59 | --api_base_path_v2 API_BASE_PATH_V2
60 | -v, --verbose
61 | -q, --quiet
62 |
63 | Example PMIDs: 28530673; Example GCSTs: GCST004364, GCST000227; Example EFOIDs:
64 | EFO_0004232; Example SNPIDs: rs6085920, rs2273833, rs6684514, rs144991356
65 | ```
66 |
--------------------------------------------------------------------------------
/BioClients/bindingdb/Client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #############################################################################
3 | ### BindingDb Utilities
4 | ### http://www.bindingdb.org/bind/BindingDBRESTfulAPI.jsp
5 | ### XML output only.
6 | #############################################################################
7 | import sys,os,re,argparse,time,logging
8 | #
9 | from .. import bindingdb
10 | #
11 | ##############################################################################
12 | if __name__=='__main__':
13 | epilog = "Example Uniprot IDs: P35355, Q8HZR1"
14 | API_HOST='www.bindingdb.org'
15 | API_BASE_PATH='/axis2/services/BDBService'
16 | ops = ["get_ligands_by_uniprot", "get_targets_by_compound"]
17 | parser = argparse.ArgumentParser( description='BindingDb REST API client', epilog=epilog)
18 | parser.add_argument("op", choices=ops, help='OPERATION (select one)')
19 | parser.add_argument("--i", dest="ifile", help="input file, Uniprot IDs")
20 | parser.add_argument("--ids", help="Uniprot IDs")
21 | parser.add_argument("--o", dest="ofile", help="output (TSV)")
22 | parser.add_argument("--smiles", help="compound query")
23 | parser.add_argument("--ic50_max", type=int, default=100)
24 | parser.add_argument("--sim_min", type=float, default=0.85)
25 | parser.add_argument("--api_host", default=API_HOST)
26 | parser.add_argument("--api_base_path", default=API_BASE_PATH)
27 | parser.add_argument("-v", "--verbose", action="count", default=0)
28 | args = parser.parse_args()
29 |
30 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO))
31 |
32 | api_base_url='http://'+args.api_host+args.api_base_path
33 |
34 | fout = open(args.ofile, "w+") if args.ofile else sys.stdout
35 |
36 | t0=time.time()
37 |
38 | ids=[]
39 | if args.ifile:
40 | fin = open(args.ifile)
41 | while True:
42 | line = fin.readline()
43 | if not line: break
44 | ids.append(line.rstrip())
45 | fin.close()
46 | elif args.ids:
47 | ids = re.split('[, ]+', args.ids.strip())
48 | if len(ids)>0: logging.info('Input IDs: %d'%(len(ids)))
49 |
50 | if args.op=="get_ligands_by_uniprot":
51 | bindingdb.GetLigandsByUniprot(api_base_url, ids, args.ic50_max, fout)
52 |
53 | elif args.op=="get_targets_by_compound":
54 | bindingdb.GetTargetsByCompound(api_base_url, args.smiles, args.sim_min, fout)
55 |
56 | else:
57 | parser.error('Operation invalid: {}'.format(args.op))
58 |
59 | logging.info('Elapsed time: %s'%(time.strftime('%Hh:%Mm:%Ss', time.gmtime(time.time()-t0))))
60 |
61 |
--------------------------------------------------------------------------------
/BioClients/gtex/Client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Utility for GTEx REST API.
4 |
5 | * https://www.gtexportal.org/home/api-docs/
6 | """
7 | ###
8 | import sys,os,re,json,argparse,time,logging
9 | import pandas as pd
10 | #
11 | from .. import gtex
12 | #
13 | ##############################################################################
14 | if __name__=='__main__':
15 | epilog=""
16 | parser = argparse.ArgumentParser(description='GTEx REST API client', epilog=epilog)
17 | ops = [
18 | "list_datasets",
19 | "list_subjects",
20 | "list_samples",
21 | "get_gene_expression"
22 | ]
23 | parser.add_argument("op", choices=ops, help='OPERATION (select one)')
24 | parser.add_argument("--ids", help="input IDs")
25 | parser.add_argument("--i", dest="ifile", help="input file, IDs")
26 | parser.add_argument("--o", dest="ofile", help="output (TSV)")
27 | parser.add_argument("--dataset", default="gtex_v8", help="GTEx datasetId")
28 | parser.add_argument("--subject", help="GTEx subjectId")
29 | parser.add_argument("--skip", type=int, default=0)
30 | parser.add_argument("--nmax", type=int, default=None)
31 | parser.add_argument("--api_host", default=gtex.API_HOST)
32 | parser.add_argument("--api_base_path", default=gtex.API_BASE_PATH)
33 | parser.add_argument("-v","--verbose", action="count", default=0)
34 | args = parser.parse_args()
35 |
36 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO))
37 |
38 | base_url='https://'+args.api_host+args.api_base_path
39 |
40 | fout = open(args.ofile, 'w') if args.ofile else sys.stdout
41 |
42 | ids=[]
43 | if args.ifile:
44 | fin = open(args.ifile)
45 | while True:
46 | line = fin.readline()
47 | if not line: break
48 | ids.append(line.rstrip())
49 | fin.close()
50 | elif args.ids:
51 | ids = re.split('[, ]+', args.ids.strip())
52 | if len(ids)>0: logging.info('Input IDs: %d'%(len(ids)))
53 |
54 | if args.op[:3]=="get" and not (args.ifile or args.ids):
55 | parser.error(f"--i or --ids required for operation {args.op}.")
56 |
57 | if args.op == "list_datasets":
58 | gtex.ListDatasets(base_url, fout)
59 |
60 | elif args.op == "list_subjects":
61 | gtex.ListSubjects(args.dataset, base_url, fout)
62 |
63 | elif args.op == "list_samples":
64 | gtex.ListSamples(args.dataset, args.subject, base_url, fout)
65 |
66 | elif args.op == "get_gene_expression":
67 | gtex.GetGeneExpression(ids, args.dataset, args.skip, base_url, fout)
68 |
69 | else:
70 | parser.error(f'Invalid operation: {args.op}')
71 |
--------------------------------------------------------------------------------
/BioClients/amp/t2d/Client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | utility app for the AMP T2D REST API.
4 | http://www.type2diabetesgenetics.org/
5 | http://www.kp4cd.org/apis/t2d
6 | http://52.54.103.84/kpn-kb-openapi/
7 |
8 | DEPICT software (Pers, TH, et al., 2015)
9 | """
10 | ###
11 | import sys,os,re,json,argparse,time,logging
12 | #
13 | from ... import amp
14 | #
15 | API_HOST='public.type2diabeteskb.org'
16 | API_BASE_PATH='/dccservices'
17 | #
18 | #############################################################################
19 | if __name__=='__main__':
20 | ops = ["list_tissues", "list_phenotypes", "depict_genepathway"]
21 | parser = argparse.ArgumentParser(description="AMP T2D REST client")
22 | parser.add_argument("op",choices=ops,help='operation')
23 | parser.add_argument("--i", dest="ifile", help="input IDs file")
24 | parser.add_argument("--ids", help="input IDs, comma-separated")
25 | parser.add_argument("--gene", help="query gene (e.g. SLC30A8)")
26 | parser.add_argument("--phenotype", default="T2D")
27 | parser.add_argument("--max_pval", type=float, default=.0005)
28 | parser.add_argument("--api_host", default=API_HOST)
29 | parser.add_argument("--api_base_path", default=API_BASE_PATH)
30 | parser.add_argument("--skip", type=int, default=0)
31 | parser.add_argument("--nmax", type=int, default=0)
32 | parser.add_argument("--o", dest="ofile", help="output (TSV)")
33 | parser.add_argument("-v", "--verbose", default=0, action="count")
34 | args = parser.parse_args()
35 |
36 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO))
37 |
38 | BASE_URL = 'http://'+args.api_host+args.api_base_path
39 |
40 | fout = open(args.ofile, "w") if args.ofile else sys.stdout
41 |
42 | if args.ifile:
43 | fin = open(args.ifile)
44 | ids=[]
45 | while True:
46 | line = fin.readline()
47 | if not line: break
48 | ids.append(line.strip())
49 | logging.info('input IDs: %d'%(len(ids)))
50 | fin.close()
51 | elif args.ids:
52 | ids = re.split('[, ]+', args.ids.strip())
53 |
54 | t0=time.time()
55 |
56 | if args.op == 'list_tissues':
57 | amp.t2d.ListTissues(BASE_URL, fout)
58 |
59 | elif args.op == 'list_phenotypes':
60 | amp.t2d.ListPhenotypes(BASE_URL, fout)
61 |
62 | elif args.op == 'depict_genepathway':
63 | amp.t2d.DepictGenePathway(BASE_URL, args.gene, args.phenotype, args.max_pval, fout)
64 |
65 | else:
66 | parser.error('Invalid operation: %s'%args.op)
67 |
68 | logging.info('elapsed time: %s'%(time.strftime('%Hh:%Mm:%Ss', time.gmtime(time.time()-t0))))
69 |
70 |
--------------------------------------------------------------------------------
/BioClients/ncbo/Client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | http://data.bioontology.org/documentation
4 | The National Center for Biomedical Ontology was founded as one of the
5 | National Centers for Biomedical Computing, supported by the NHGRI, the
6 | NHLBI, and the NIH Common Fund.
7 | """
8 | ###
9 | import sys,os,argparse,re,yaml,logging,time
10 | #
11 | from .. import ncbo
12 | from ..util import yaml as util_yaml
13 | #
14 | #############################################################################
15 | if __name__=='__main__':
16 | EPILOG="""The National Center for Biomedical Ontology was founded as one of the National Centers for Biomedical Computing, supported by the NHGRI, the NHLBI, and the NIH Common Fund."""
17 | parser = argparse.ArgumentParser(description='NCBO REST API client utility', epilog=EPILOG)
18 | OPS = ['recommendOntologies']
19 | parser.add_argument("op", choices=OPS, help="OPERATION")
20 | parser.add_argument("--i", dest="ifile", help="input texts")
21 | parser.add_argument("--o", dest="ofile", help="output (TSV)")
22 | parser.add_argument("--text", help="input text")
23 | parser.add_argument("--api_host", default=ncbo.API_HOST)
24 | parser.add_argument("--api_base_path", default=ncbo.API_BASE_PATH)
25 | parser.add_argument("--param_file", default=os.environ["HOME"]+"/.ncbo.yaml")
26 | parser.add_argument("--api_key", help="API key")
27 | parser.add_argument("-v", "--verbose", default=0, action="count")
28 |
29 | args = parser.parse_args()
30 |
31 | logging.basicConfig(format="%(levelname)s:%(message)s", level=(logging.DEBUG if args.verbose>1 else logging.INFO))
32 |
33 | base_url = "https://"+args.api_host+args.api_base_path
34 |
35 | fout = open(args.ofile, "w+") if args.ofile else sys.stdout
36 |
37 | params = util_yaml.ReadParamFile(args.param_file)
38 | if args.api_key: params["API_KEY"] = args.api_key
39 | if not params["API_KEY"]:
40 | parser.error("Please specify valid API_KEY via --api_key or --param_file")
41 |
42 | texts=[];
43 | if args.ifile:
44 | with open(args.ifile) as fin:
45 | while True:
46 | line = fin.readline()
47 | if not line: break
48 | texts.append(line.rstrip())
49 | logging.info(f"input texts: {len(texts)}")
50 | elif args.text:
51 | texts = [args.text]
52 |
53 | t0 = time.time()
54 |
55 | if args.op == "recommendOntologies":
56 | ncbo.RecommendOntologies(base_url, params["API_KEY"], texts, fout)
57 |
58 | else:
59 | parser.error(f"Invalid operation: {args.op}")
60 |
61 | logging.info(("Elapsed time: %s"%(time.strftime('%Hh:%Mm:%Ss',time.gmtime(time.time()-t0)))))
62 |
--------------------------------------------------------------------------------
/doc/rxnorm.md:
--------------------------------------------------------------------------------
1 | # `BioClients.rxnorm`
2 |
3 | ## RxNorm
4 |
5 | From the NIH National Library of Medicine (NLM).
6 |
7 | *
8 | *
9 | *
10 | *
11 | *
12 | *
13 |
14 | TERM TYPES
15 | TTY Name
16 | IN Ingredient
17 | PIN Precise Ingredient
18 | MIN Multiple Ingredients
19 | SCDC Semantic Clinical Drug Component
20 | SCDF Semantic Clinical Drug Form
21 | SCDG Semantic Clinical Dose Form Group
22 | SCD Semantic Clinical Drug
23 | GPCK Generic Pack
24 | BN Brand Name
25 | SBDC Semantic Branded Drug Component
26 | SBDF Semantic Branded Drug Form
27 | SBDG Semantic Branded Dose Form Group
28 | SBD Semantic Branded Drug
29 | BPCK Brand Name Pack
30 | PSN Prescribable Name
31 | SY Synonym
32 | TMSY Tall Man Lettering Synonym
33 | DF Dose Form
34 | ET Dose Form Entry Term
35 | DFG Dose Form Group
36 |
37 | ### Usage examples
38 |
39 | ```
40 | python3 -m BioClients.rxnorm.Client -h
41 | python3 -m BioClients.rxnorm.Client list_sourcetypes
42 | python3 -m BioClients.rxnorm.Client list_relationtypes
43 | python3 -m BioClients.rxnorm.Client list_termtypes
44 | python3 -m BioClients.rxnorm.Client list_propnames
45 | python3 -m BioClients.rxnorm.Client list_propcategories
46 | python3 -m BioClients.rxnorm.Client list_idtypes
47 | python3 -m BioClients.rxnorm.Client list_class_types
48 | python3 -m BioClients.rxnorm.Client list_classes
49 | python3 -m BioClients.rxnorm.Client list_classes --class_types 'MESHPA,ATC1-4'
50 | ```
51 |
52 | Requiring names:
53 |
54 | ```
55 | python3 -m BioClients.rxnorm.Client get_name --ids "metformin"
56 | python3 -m BioClients.rxnorm.Client get_name --ids "prozac,tamiflu"
57 | python3 -m BioClients.rxnorm.Client get_name2rxcui --ids "prozac,tamiflu"
58 | ```
59 |
60 | Requiring external IDs:
61 | ```
62 | python3 -m BioClients.rxnorm.Client get_id2rxcui --ids "C2709711" --idtype UMLSCUI
63 | ```
64 |
65 | Requiring RxCUI IDs:
66 | ```
67 | python3 -m BioClients.rxnorm.Client get_rxcui_status --ids "131725,213269"
68 | python3 -m BioClients.rxnorm.Client get_rxcui_properties --ids "131725,213269"
69 | python3 -m BioClients.rxnorm.Client get_rxcui_allproperties --ids "6809,131725,213269"
70 | python3 -m BioClients.rxnorm.Client get_rxcui_ndcs --ids "131725,213269"
71 | python3 -m BioClients.rxnorm.Client get_rxcui_allrelated --ids "131725,213269"
72 | ```
73 |
74 |
--------------------------------------------------------------------------------
/BioClients/wikidata/Utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | https://www.wikidata.org/wiki/User:ProteinBoxBot/SPARQL_Examples
4 |
5 | PREFIX wdt:
6 | PREFIX wd:
7 | PREFIX bd:
8 | PREFIX up:
9 | PREFIX uniprotkb:
10 | """
11 | ###
12 | import sys,os,logging
13 | import pandas as pd
14 | from wikidataintegrator import wdi_core, wdi_login
15 |
16 | #############################################################################
17 | def Rq2Df(rq):
18 | logging.debug(f"{rq}")
19 | r = wdi_core.WDItemEngine.execute_sparql_query(rq)['results']['bindings']
20 | df = pd.DataFrame([{k:v['value'] for k,v in item.items()} for item in r])
21 | logging.debug(f"rows: {df.shape[0]}; cols: {df.shape[1]}")
22 | return(df)
23 |
24 | #############################################################################
25 | def Query(rq, fout=None):
26 | df = Rq2Df(rq)
27 | if fout is not None: df.to_csv(fout, '\t', index=False)
28 | logging.info(f"n_out: {df.shape[0]}")
29 | return df
30 |
31 | #############################################################################
32 | def ListDrugTargetPairs(fout=None):
33 | "List drugs with known targets."
34 | rq = """SELECT DISTINCT ?drug ?drugLabel ?gene_product ?gene_productLabel
35 | WHERE {
36 | ?drug wdt:P129 ?gene_product .
37 | SERVICE wikibase:label { bd:serviceParam wikibase:language "en" . }
38 | }"""
39 | df = Rq2Df(rq)
40 | if fout is not None: df.to_csv(fout, '\t', index=False)
41 | logging.info(f"n_out: {df.shape[0]}")
42 | return df
43 |
44 | #############################################################################
45 | def ListGeneDiseasePairs(fout=None):
46 | "List genes with associated diseases."
47 | rq = """SELECT DISTINCT ?gene ?geneLabel ?disease ?diseaseLabel
48 | WHERE {
49 | ?gene wdt:P2293 ?disease .
50 | SERVICE wikibase:label { bd:serviceParam wikibase:language "en" . }
51 | }"""
52 | df = Rq2Df(rq)
53 | if fout is not None: df.to_csv(fout, '\t', index=False)
54 | logging.info(f"n_out: {df.shape[0]}")
55 | return df
56 |
57 | #############################################################################
58 | def Test(fout=None):
59 | rq = """SELECT ?item ?itemLabel
60 | WHERE {
61 | ?item wdt:P279 wd:Q1049021 .
62 | SERVICE wikibase:label { bd:serviceParam wikibase:language "en" . }
63 | }"""
64 | df = Rq2Df(rq)
65 | if fout is not None: df.to_csv(fout, '\t', index=False)
66 | logging.info(f"n_out: {df.shape[0]}")
67 | return df
68 |
69 | #############################################################################
70 |
--------------------------------------------------------------------------------
/BioClients/icite/Utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | ###
3 | import sys,os,json,re,logging,tqdm,requests,urllib.parse
4 | import pandas as pd
5 | #
6 | API_HOST="icite.od.nih.gov"
7 | API_BASE_PATH="/api/pubs"
8 | BASE_URL='https://'+API_HOST+API_BASE_PATH
9 | #
10 | NCHUNK=100;
11 | #
12 | #############################################################################
13 | def GetStats(pmids, base_url=BASE_URL, fout=None):
14 | """Request multiply by chunk. Lists of PMIDs (e.g. references,
15 | cited_by) reported as counts."""
16 | n_in=0; n_out=0; tags=None; df=pd.DataFrame(); tq=None;
17 | quiet = bool(logging.getLogger().getEffectiveLevel()>15)
18 | while True:
19 | if tq is None and not quiet: tq = tqdm.tqdm(total=len(pmids), unit="pmids")
20 | if n_in>=len(pmids): break
21 | pmids_this = pmids[n_in:n_in+NCHUNK]
22 | n_in += (NCHUNK if n_in+NCHUNK < len(pmids) else len(pmids)-n_in)
23 | url_this = (f"""{base_url}?pmids={(','.join(pmids_this))}""")
24 | response = requests.get(url_this)
25 | if response.status_code != 200:
26 | logging.error(f"status_code: {response.status_code}")
27 | break
28 | result = response.json()
29 | logging.debug(json.dumps(result, indent=2))
30 | #url_self = result['links']['self']
31 | pubs = result['data'] if 'data' in result else []
32 | for pub in pubs:
33 | if not tags: tags = list(pub.keys())
34 | df_this = pd.DataFrame({tag:[pub[tag] if tag in pub else None] for tag in tags})
35 | if fout: df_this.to_csv(fout, sep="\t", index=False, header=bool(n_out==0))
36 | df = pd.concat([df, df_this])
37 | n_out+=1
38 | if not quiet:
39 | for j in range(len(pmids_this)): tq.update()
40 | logging.info(f"n_in: {len(pmids)}; n_out: {n_out}")
41 | return df
42 |
43 | #############################################################################
44 | def GetStats_single(pmids, base_url=BASE_URL, fout=None):
45 | """Request singly."""
46 | tags=None; df=pd.DataFrame(); tq=None;
47 | for pmid in pmids:
48 | if tq is None: tq = tqdm.tqdm(total=len(pmids), unit="pmids")
49 | tq.update()
50 | url = base_url+'/'+pmid
51 | response = requests.get(url)
52 | if response.status_code != 200:
53 | logging.error(f"status_code: {response.status_code}")
54 | continue
55 | pub = response.json()
56 | if not tags: tags = list(pub.keys())
57 | df = pd.concat([df, pd.DataFrame({tags[j]:[pub[tags[j]]] for j in range(len(tags))})])
58 | if fout: df.to_csv(fout, sep="\t", index=False)
59 | logging.info(f"n_in: {len(pmids)}; n_out: {df.shape[0]}")
60 | return df
61 |
62 | #############################################################################
63 |
--------------------------------------------------------------------------------
/BioClients/ncats/gsrs/Client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | https://ncats.nih.gov/expertise/preclinical/gsrs
4 | https://gsrs.ncats.nih.gov/
5 | https://gsrs.ncats.nih.gov/#/api
6 | """
7 | ###
8 | import sys,os,re,json,argparse,time,logging
9 |
10 | from ... import ncats
11 | #
12 | #############################################################################
13 | if __name__=='__main__':
14 | epilog='''\
15 | Example search queries:
16 | IBUPRO
17 | ASPIRIN
18 | OXYTOCIN
19 | OXYTO*
20 | ASPIRIN AND ESTER
21 | COCN
22 | C=1CC=CC=C1C(=O)O
23 | '''
24 | parser = argparse.ArgumentParser(description='NCATS Global Substance Registration System (GSRS) client', epilog=epilog)
25 | ops = [
26 | 'list_vocabularies',
27 | 'list_substances',
28 | 'search',
29 | 'get_substance',
30 | 'get_substance_names',
31 | ]
32 | parser.add_argument("op", choices=ops, help='OPERATION')
33 | parser.add_argument("--i", dest="ifile", help="Input IDs")
34 | parser.add_argument("--o", dest="ofile", help="Output (TSV)")
35 | parser.add_argument("--ids", help="Input IDs (comma-separated)")
36 | parser.add_argument("--query", help="Search query.")
37 | parser.add_argument("--api_host", default=ncats.gsrs.API_HOST)
38 | parser.add_argument("--api_base_path", default=ncats.gsrs.API_BASE_PATH)
39 | parser.add_argument("-v", "--verbose", default=0, action="count")
40 | args = parser.parse_args()
41 |
42 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO))
43 |
44 | api_base_url = 'https://'+args.api_host+args.api_base_path
45 |
46 | fout = open(args.ofile, "w+") if args.ofile else sys.stdout
47 |
48 | t0=time.time()
49 |
50 | ids=[]
51 | if args.ifile:
52 | fin = open(args.ifile)
53 | while True:
54 | line = fin.readline()
55 | if not line: break
56 | ids.append(line.rstrip())
57 | fin.close()
58 | logging.info(f"Input IDs: {len(ids)}")
59 | elif args.ids:
60 | ids = re.split(r'[,\s]+', args.ids)
61 |
62 | if args.op == "list_vocabularies":
63 | ncats.gsrs.Utils.ListVocabularies(api_base_url, fout)
64 |
65 | elif args.op == "list_substances":
66 | ncats.gsrs.Utils.ListSubstances(api_base_url, fout)
67 |
68 | elif args.op == "search":
69 | ncats.gsrs.Utils.Search(args.query, api_base_url, fout)
70 |
71 | elif args.op == "get_substance":
72 | ncats.gsrs.Utils.GetSubstance(ids, api_base_url, fout)
73 |
74 | elif args.op == "get_substance_names":
75 | ncats.gsrs.Utils.GetSubstanceNames(ids, api_base_url, fout)
76 |
77 | else:
78 | parser.error(f"Invalid operation: {args.op}")
79 |
80 | logging.info(f"Elapsed time: {time.strftime('%Hh:%Mm:%Ss',time.gmtime(time.time()-t0))}")
81 |
--------------------------------------------------------------------------------
/BioClients/util/neo4j/App.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Neo4j client (via py2neo API)
4 | https://neo4j.com/docs/cypher-manual
5 | https://py2neo.org
6 | """
7 | #############################################################################
8 | import sys,os,argparse,logging
9 | import py2neo
10 |
11 | from .. import neo4j as util_neo4j
12 |
13 | #############################################################################
14 | if __name__=='__main__':
15 | parser = argparse.ArgumentParser(description="Neo4j client (via py2neo API)", epilog="See https://neo4j.com/docs/cypher-manual, https://py2neo.org.")
16 | ops = [ 'dbinfo', 'query', 'dbsummary' ]
17 | parser.add_argument("op", choices=ops, help='OPERATION')
18 | parser.add_argument("--i", dest="ifile", help="input query file (CQL aka Cypher)")
19 | parser.add_argument("--cql", help="input query (CQL aka Cypher)")
20 | parser.add_argument("--o", dest="ofile", help="output (TSV|JSON)")
21 | parser.add_argument("--ofmt", choices=('TSV', 'JSON'), default='TSV')
22 | parser.add_argument("--dbhost", default=util_neo4j.DBHOST)
23 | parser.add_argument("--dbport", type=int, default=util_neo4j.DBPORT)
24 | parser.add_argument("--dbscheme", default=util_neo4j.DBSCHEME)
25 | parser.add_argument("--dbusr", default=util_neo4j.DBUSR)
26 | parser.add_argument("--dbpw", default=util_neo4j.DBPW)
27 | parser.add_argument("--secure", action="store_true", help="secure connection (TLS)")
28 | parser.add_argument("-v", "--verbose", dest="verbose", action="count", default=0)
29 | args = parser.parse_args()
30 |
31 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>0 else logging.ERROR))
32 |
33 | fout = open(args.ofile, "w+") if args.ofile else sys.stdout
34 |
35 | if args.op == 'dbinfo':
36 | db = util_neo4j.DbConnect(dbhost=args.dbhost, dbport=args.dbport, dbscheme=args.dbscheme, dbusr=args.dbusr, dbpw=args.dbpw, secure=args.secure)
37 | util_neo4j.DbInfo(db, fout)
38 |
39 | elif args.op == 'dbsummary':
40 | db = util_neo4j.DbConnect(dbhost=args.dbhost, dbport=args.dbport, dbscheme=args.dbscheme, dbusr=args.dbusr, dbpw=args.dbpw, secure=args.secure)
41 | util_neo4j.DbSummary(db, fout)
42 |
43 | elif args.op == 'query':
44 | db = util_neo4j.DbConnect(dbhost=args.dbhost, dbport=args.dbport, dbscheme=args.dbscheme, dbusr=args.dbusr, dbpw=args.dbpw, secure=args.secure)
45 | if args.ifile:
46 | fin = open(args.ifile)
47 | cql = fin.read()
48 | elif args.cql:
49 | cql = args.cql
50 | else:
51 | parser.error('--cql or --i required for query.')
52 | util_neo4j.DbQuery(db, cql, args.ofmt, fout)
53 |
54 | else:
55 | parser.error(f"Unsupported operation: {args.op}")
56 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 | cover/
54 |
55 | # Translations
56 | *.mo
57 | *.pot
58 |
59 | # Django stuff:
60 | *.log
61 | local_settings.py
62 | db.sqlite3
63 | db.sqlite3-journal
64 |
65 | # Flask stuff:
66 | instance/
67 | .webassets-cache
68 |
69 | # Scrapy stuff:
70 | .scrapy
71 |
72 | # Sphinx documentation
73 | docs/_build/
74 |
75 | # PyBuilder
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
98 | __pypackages__/
99 |
100 | # Celery stuff
101 | celerybeat-schedule
102 | celerybeat.pid
103 |
104 | # SageMath parsed files
105 | *.sage.py
106 |
107 | # Environments
108 | .env
109 | .venv
110 | env/
111 | venv/
112 | ENV/
113 | env.bak/
114 | venv.bak/
115 |
116 | # Spyder project settings
117 | .spyderproject
118 | .spyproject
119 |
120 | # Rope project settings
121 | .ropeproject
122 |
123 | # mkdocs documentation
124 | /site
125 |
126 | # mypy
127 | .mypy_cache/
128 | .dmypy.json
129 | dmypy.json
130 |
131 | # Pyre type checker
132 | .pyre/
133 |
134 | # pytype static type analyzer
135 | .pytype/
136 |
137 | # PyCharm
138 | .idea/
139 |
--------------------------------------------------------------------------------
/BioClients/chemidplus/Client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Utility functions for the NLM ChemIDplus REST API.
4 | https://chem.nlm.nih.gov/chemidsearch/api
5 | https://chem.nlm.nih.gov/api/swagger-ui.html
6 | https://chem.nlm.nih.gov/api/v2/api-docs
7 | """
8 | ###
9 | import sys,os,re,argparse,time,logging
10 | #
11 | from .. import chemidplus
12 | #
13 | ##############################################################################
14 | if __name__=='__main__':
15 | ops = ["list_sources", "list_types", "get_id2summary", "get_id2names",
16 | "get_id2numbers",
17 | "get_id2toxlist"]
18 | parser = argparse.ArgumentParser(description="ChemIDPlus REST client")
19 | parser.add_argument("op", choices=ops,help='OPERATION')
20 | parser.add_argument("--i", dest="ifile", help="input IDs file")
21 | parser.add_argument("--ids", help="input IDs (comma-separated)")
22 | parser.add_argument("--id_type", default="auto", help="input ID type")
23 | parser.add_argument("--o", dest="ofile", help="output (usually TSV)")
24 | parser.add_argument("--api_host", default=chemidplus.API_HOST)
25 | parser.add_argument("--api_base_path", default=chemidplus.API_BASE_PATH)
26 | parser.add_argument("--skip", type=int, default=0)
27 | parser.add_argument("--nmax", type=int, default=0)
28 | parser.add_argument("-v", "--verbose", default=0, action="count")
29 | args = parser.parse_args()
30 |
31 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO))
32 |
33 | base_url = 'https://'+args.api_host+args.api_base_path
34 |
35 | fout = open(args.ofile, "w") if args.ofile else sys.stdout
36 |
37 | ids=[]
38 | if args.ifile:
39 | fin = open(args.ifile)
40 | while True:
41 | line = fin.readline()
42 | if not line: break
43 | ids.append(line.rstrip())
44 | fin.close()
45 | elif args.ids:
46 | ids = re.split(r'[,\s]+', args.ids)
47 | logging.info(f"Input IDs: {len(ids)}")
48 |
49 | t0=time.time()
50 |
51 | if args.op == 'list_sources':
52 | chemidplus.ListSources(base_url, fout)
53 |
54 | elif args.op == 'list_types':
55 | chemidplus.ListTypes(base_url, fout)
56 |
57 | elif args.op == 'get_id2summary':
58 | chemidplus.GetId2Summary(ids, args.id_type, base_url, fout)
59 |
60 | elif args.op == 'get_id2names':
61 | chemidplus.GetId2Names(ids, args.id_type, base_url, fout)
62 |
63 | elif args.op == 'get_id2numbers':
64 | chemidplus.GetId2Numbers(ids, args.id_type, base_url, fout)
65 |
66 | elif args.op == 'get_id2toxlist':
67 | chemidplus.GetId2ToxicityList(ids, args.id_type, base_url, fout)
68 |
69 | else:
70 | parser.error(f"Invalid operation: {args.op}")
71 |
72 | logging.info(('elapsed time: %s'%(time.strftime('%Hh:%Mm:%Ss',time.gmtime(time.time()-t0)))))
73 |
74 |
--------------------------------------------------------------------------------
/BioClients/util/pandas/Csv2Markdown.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Pandas/Tabulate Csv2Markdown.
4 | Tabulate formatting https://pypi.org/project/tabulate/
5 | """
6 | ###
7 | import sys,os,argparse,re,logging
8 | import tabulate
9 | import pandas as pd
10 |
11 | # default=lambda(x: f"{x:.3f}"),
12 | #############################################################################
13 | if __name__=='__main__':
14 | FORMATS = ["plain", "simple", "github", "grid", "fancy_grid", "pipe", "orgtbl", "jira", "presto", "pretty", "psql", "rst", "mediawiki", "moinmoin", "youtrack", "html", "unsafehtml", "latex", "latex_raw", "latex_booktabs", "latex_longtable", "textile", "tsv"]
15 | parser = argparse.ArgumentParser(description='Pandas/Tabulate Csv2Markdown.')
16 | parser.add_argument("--i", dest="ifile", required=True, help="input (CSV|TSV)")
17 | parser.add_argument("--o", dest="ofile", help="output (HTML)")
18 | parser.add_argument("--csv", action="store_true", help="delimiter is comma")
19 | parser.add_argument("--tsv", action="store_true", help="delimiter is tab")
20 | parser.add_argument("--title", help="Markdown heading")
21 | parser.add_argument("--columns", help="Subset of columns to write (comma delimited)")
22 | parser.add_argument("--nrows", type=int)
23 | parser.add_argument("--skiprows", type=int)
24 | parser.add_argument("--numalign", choices=["center","right","left","decimal"], default="center")
25 | parser.add_argument("--stralign", choices=["center","right","left"], default="left")
26 | parser.add_argument("--format", choices=FORMATS, default="github", help="tabulate format (tablefmt)")
27 | parser.add_argument("--na_rep", default="", help="String representation of NaN")
28 | parser.add_argument("--float_format", help="Function(float) -> string.")
29 | parser.add_argument("-v", "--verbose", action="count", default=0)
30 | args = parser.parse_args()
31 |
32 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO))
33 |
34 | fout = open(args.ofile, "w") if args.ofile else sys.stdout
35 |
36 | if args.csv: delim=','
37 | elif args.tsv: delim='\t'
38 | elif re.search('\.csv', args.ifile, re.I): delim=','
39 | else: delim='\t'
40 |
41 | title = args.title if args.title else f"Csv2Markdown: {os.path.basename(args.ifile)}"
42 | columns = re.split(r',', args.columns) if args.columns else None
43 |
44 | df = pd.read_csv(args.ifile, sep=delim, nrows=args.nrows, skiprows=args.skiprows)
45 |
46 | if columns is not None:
47 | df = df[columns]
48 |
49 | if args.na_rep is not None:
50 | df = df.fillna(args.na_rep)
51 |
52 | table_md = df.to_markdown(tablefmt=args.format)
53 |
54 | md = f"""
55 | # {title}
56 |
57 | {table_md}
58 | """
59 |
60 | fout.write(md)
61 | fout.close()
62 |
63 |
--------------------------------------------------------------------------------
/BioClients/bioregistry/Client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Client for Bioregistry REST API.
4 |
5 | See: https://bioregistry.io/apidocs/
6 | """
7 | ###
8 | import sys,os,re,json,argparse,time,logging
9 | #
10 | from .. import bioregistry
11 | #
12 | ##############################################################################
13 | if __name__=='__main__':
14 | parser = argparse.ArgumentParser(description='Bioregistry REST API client', epilog='')
15 | ops = [
16 | 'list_collections',
17 | 'list_contexts',
18 | 'list_registry',
19 | 'list_metaregistry',
20 | 'list_contributors',
21 | 'get_reference',
22 | ]
23 | parser.add_argument("op", choices=ops, help='operation')
24 | parser.add_argument("--i", dest="ifile", help="input query IDs")
25 | parser.add_argument("--ids", help="input query IDs (comma-separated)")
26 | parser.add_argument("--o", dest="ofile", help="output (TSV)")
27 | parser.add_argument("--etype", default="", help="evidence codes (|-separated)")
28 | parser.add_argument("--prefix", help="CURIE prefix")
29 | parser.add_argument("--nchunk", type=int)
30 | parser.add_argument("--nmax", type=int, default=None)
31 | parser.add_argument("--skip", type=int, default=0)
32 | parser.add_argument("--api_host", default=bioregistry.API_HOST)
33 | parser.add_argument("--api_base_path", default=bioregistry.API_BASE_PATH)
34 | parser.add_argument("-v", "--verbose", default=0, action="count")
35 | args = parser.parse_args()
36 |
37 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO))
38 |
39 | base_url = f"https://{args.api_host}{args.api_base_path}"
40 |
41 | fout = open(args.ofile, "w") if args.ofile else sys.stdout
42 |
43 | ids=[]
44 | if args.ifile:
45 | fin = open(args.ifile)
46 | while True:
47 | line = fin.readline()
48 | if not line: break
49 | ids.append(line.strip())
50 | elif args.ids:
51 | ids = re.split('[,\s]+', args.ids.strip())
52 |
53 | t0=time.time()
54 |
55 | if args.op =="list_contributors":
56 | bioregistry.ListEntities("contributors", base_url, fout)
57 | elif args.op =="list_collections":
58 | bioregistry.ListEntities("collections", base_url, fout)
59 | elif args.op =="list_contexts":
60 | bioregistry.ListEntities("contexts", base_url, fout)
61 | elif args.op =="list_metaregistry":
62 | bioregistry.ListEntities("metaregistry", base_url, fout)
63 | elif args.op =="list_registry":
64 | bioregistry.ListEntities("registry", base_url, fout)
65 | elif args.op =="get_reference":
66 | bioregistry.GetReference(ids, args.prefix, base_url, fout)
67 |
68 | else:
69 | parser.error("Invalid operation: {0}".format(args.op))
70 |
71 | logging.info(('Elapsed time: %s'%(time.strftime('%Hh:%Mm:%Ss', time.gmtime(time.time()-t0)))))
72 |
--------------------------------------------------------------------------------
/BioClients/idg/Client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Pharos REST API client
4 | https://pharos.nih.gov/idg/api/v1/targets(589)
5 | """
6 | ###
7 | import sys,os,argparse,json,re,time,logging
8 | #
9 | from .. import idg
10 | #
11 | #############################################################################
12 | if __name__=='__main__':
13 | API_HOST="pharos.nih.gov"
14 | API_BASE_PATH="/idg/api/v1"
15 | IDTYPES = ['IDG_TARGET_ID', 'UNIPROT', 'ENSP', 'GSYMB']
16 | parser = argparse.ArgumentParser(description='Pharos REST API client')
17 | ops = [ 'list_targets', 'list_ligands', 'list_diseases',
18 | 'get_targets', 'get_targetProperties', 'search_targets' ]
19 | parser.add_argument("op", choices=ops, help='operation')
20 | parser.add_argument("--i", dest="ifile", help="input file, target IDs")
21 | parser.add_argument("--ids", dest="ids", help="IDs, target, comma-separated")
22 | parser.add_argument("--o", dest="ofile", help="output (TSV)")
23 | parser.add_argument("--idtype", choices=IDTYPES, default='IDG_TARGET_ID', help="target ID type")
24 | parser.add_argument("--nmax", type=int, help="max to return")
25 | parser.add_argument("--api_host", default=API_HOST)
26 | parser.add_argument("--api_base_path", default=API_BASE_PATH)
27 | parser.add_argument("-v", "--verbose", default=0, action="count")
28 | args = parser.parse_args()
29 |
30 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO))
31 |
32 | BASE_URL = 'https://'+args.api_host+args.api_base_path
33 |
34 | fout = open(args.ofile, "w+") if args.ofile else sys.stdout
35 |
36 | ids=[];
37 | if args.ifile:
38 | fin = open(args.ifile)
39 | while True:
40 | line = fin.readline()
41 | if not line: break
42 | ids.append(line.rstrip())
43 | logging.info('input IDs: %d'%(len(ids)))
44 | fin.close()
45 | elif args.ids:
46 | ids = re.split(r'\s*,\s*',args.ids.strip())
47 |
48 | t0 = time.time()
49 |
50 | if re.match(r'^get_', args.op) and not ids:
51 | parser.error('{0} requires IDs.'.format(args.op))
52 |
53 | if args.op=='get_targets':
54 | idg.GetTargets(BASE_URL, ids, args.idtype, fout)
55 |
56 | elif args.op=='get_targetProperties':
57 | idg.GetTargetProperties(BASE_URL, ids, args.idtype, fout)
58 |
59 | elif args.op=='list_targets':
60 | idg.ListItems('targets', BASE_URL, fout)
61 |
62 | elif args.op=='list_diseases':
63 | idg.ListItems('diseases', BASE_URL, fout)
64 |
65 | elif args.op=='list_ligands':
66 | idg.ListItems('ligands', BASE_URL, fout)
67 |
68 | elif args.op=='search_targets':
69 | logging.error('Not implemented yet.')
70 |
71 | else:
72 | logging.error('Invalid operation: {0}'.format(args.op))
73 |
74 | logging.info(('Elapsed time: %s'%(time.strftime('%Hh:%Mm:%Ss', time.gmtime(time.time()-t0)))))
75 |
--------------------------------------------------------------------------------
/doc/ubkg.md:
--------------------------------------------------------------------------------
1 | # `BioClients.ubkg`
2 |
3 | ## UBKG - Unified Biomedical Knowledge Graph
4 |
5 | Client to the UBKG REST API. Note that an UMLS API Key is required for access
6 | to UBKG.
7 |
8 | * [Smart-API:UBKG-API](https://smart-api.info/ui/96e5b5c0b0efeef5b93ea98ac2794837)
9 | * [NIH-NLM UMLS Terminology Services](https://uts.nlm.nih.gov/uts/)
10 |
11 | The Unified Biomedical Knowledge Graph (UBKG) was developed by the University of
12 | Pittsburgh, Children's Hospital of Philadelphia, and others, built upon the NIH-NLM
13 | Unified Medical Language System (UMLS) metathesaurus, composed of numerous leading,
14 | community standard controlled vocabularies.
15 |
16 | The Data Distillery Knowledge Graph (DDKG) is a context and extension of UBKG, developed
17 | by the Common Fund Data Ecosystem (CFDE) Data Distillery Partnership Project team,
18 | including the IDG DCC team at UNM.
19 |
20 | ```
21 | python -m BioClients.ubkg.Client -h
22 | usage: Client.py [-h] [--o OFILE] [--i IFILE] [--ids IDS] [--term TERM] [--sab SAB]
23 | [--relationship RELATIONSHIP]
24 | [--context {base_context,data_distillery_context,hubmap_sennet_context}]
25 | [--mindepth MINDEPTH] [--maxdepth MAXDEPTH] [--nmax NMAX] [--skip SKIP]
26 | [--api_host API_HOST] [--api_base_path API_BASE_PATH]
27 | [--api_key API_KEY] [--param_file PARAM_FILE] [-v]
28 | {search,info,list_relationship_types,list_node_types,list_node_type_counts,list_property_types,list_sabs,list_sources,list_semantic_types,get_concept2codes,get_concept2concepts,get_concept2definitions,get_concept2paths,get_concept2trees,get_term2concepts}
29 |
30 | UBKG REST API client
31 |
32 | positional arguments:
33 | {search,info,list_relationship_types,list_node_types,list_node_type_counts,list_property_types,list_sabs,list_sources,list_semantic_types,get_concept2codes,get_concept2concepts,get_concept2definitions,get_concept2paths,get_concept2trees,get_term2concepts}
34 | OPERATION
35 |
36 | options:
37 | -h, --help show this help message and exit
38 | --o OFILE output (TSV)
39 | --i IFILE UMLS CUI ID file
40 | --ids IDS UMLS CUI IDs, comma-separated
41 | --term TERM UMLS term, e.g. 'Asthma'
42 | --sab SAB Standard abbreviation type
43 | --relationship RELATIONSHIP
44 | Relationship type
45 | --context {base_context,data_distillery_context,hubmap_sennet_context}
46 | --mindepth MINDEPTH min path depth
47 | --maxdepth MAXDEPTH max path depth
48 | --nmax NMAX max records
49 | --skip SKIP skip 1st SKIP queries
50 | --api_host API_HOST
51 | --api_base_path API_BASE_PATH
52 | --api_key API_KEY UMLS API Key
53 | --param_file PARAM_FILE
54 | -v, --verbose
55 |
56 | ```
57 |
--------------------------------------------------------------------------------
/doc/chembl.md:
--------------------------------------------------------------------------------
1 | # `BioClients.chembl`
2 |
3 | ## ChEMBL
4 |
5 | Tools for obtaining and processing ChEMBL data.
6 |
7 | *
8 | *
9 | *
10 |
11 | ```
12 | $ python3 -m BioClients.chembl.Client get_drug_indications -h
13 | usage: Client.py [-h] [--ids IDS] [--i IFILE] [--o OFILE] [--skip SKIP] [--nmax NMAX]
14 | [--dev_phase {0,1,2,3,4}] [--assay_source ASSAY_SOURCE]
15 | [--assay_type ASSAY_TYPE] [--pmin PMIN] [--include_phenotypic]
16 | [--api_host API_HOST] [--api_base_path API_BASE_PATH] [-v]
17 | {status,list_sources,list_targets,list_assays,list_docs,list_mols,list_drugs,list_drug_indications,list_tissues,list_cells,list_mechanisms,list_organisms,list_protein_classes,search_assays,search_mols_by_name,get_mol,get_mol_by_inchikey,get_target,get_target_components,get_target_by_uniprot,get_assay,get_activity_by_mol,get_activity_by_assay,get_activity_by_target,get_activity_properties,get_drug_indications,get_document}
18 |
19 | ChEMBL REST API client
20 |
21 | positional arguments:
22 | {status,list_sources,list_targets,list_assays,list_docs,list_mols,list_drugs,list_drug_indications,list_tissues,list_cells,list_mechanisms,list_organisms,list_protein_classes,search_assays,search_mols_by_name,get_mol,get_mol_by_inchikey,get_target,get_target_components,get_target_by_uniprot,get_assay,get_activity_by_mol,get_activity_by_assay,get_activity_by_target,get_activity_properties,get_drug_indications,get_document}
23 | OPERATION (select one)
24 |
25 | options:
26 | -h, --help show this help message and exit
27 | --ids IDS input IDs (e.g. mol, assay, target, document)
28 | --i IFILE input file, IDs
29 | --o OFILE output (TSV)
30 | --skip SKIP
31 | --nmax NMAX
32 | --dev_phase {0,1,2,3,4}
33 | molecule development phase
34 | --assay_source ASSAY_SOURCE
35 | source_id
36 | --assay_type ASSAY_TYPE
37 | {'B': 'Binding', 'F': 'Functional', 'A': 'ADMET', 'T':
38 | 'Toxicity', 'P': 'Physicochemical', 'U': 'Unclassified'}
39 | --pmin PMIN min pChEMBL activity value (9 ~ 1nM *C50)
40 | --include_phenotypic else pChembl required
41 | --api_host API_HOST
42 | --api_base_path API_BASE_PATH
43 | -v, --verbose
44 |
45 | Assay types: {'B': 'Binding', 'F': 'Functional', 'A': 'ADMET', 'T': 'Toxicity', 'P':
46 | 'Physicochemical', 'U': 'Unclassified'}. Example IDs: CHEMBL2 (compound); CHEMBL1642
47 | (compound & drug); CHEMBL240 (target); CHEMBL1824 (target); CHEMBL1217643 (assay);
48 | CHEMBL3215220 (assay, PubChem assay 519, NMMLSC FPR); Q12809 (Uniprot)
49 | ```
50 |
--------------------------------------------------------------------------------
/BioClients/emblebi/unichem/Client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | EMBL-EBI Unichem
4 | UCI is UniChem Compound Id
5 | """
6 | ###
7 | import sys,os,re,json,argparse,time,logging
8 |
9 | from ... import emblebi
10 | #
11 | #############################################################################
12 | if __name__=='__main__':
13 | epilog='''\
14 | Example InChIkey: LCNDUGHNYMJGIW-UHFFFAOYSA-N
15 | '''
16 | INCHI_REPRESENTATIONS = ["uci", "inchi", "inchikey", "sourceID"]
17 | parser = argparse.ArgumentParser(description='EMBL-EBI Unichem client', epilog=epilog)
18 | ops = ['getFromSourceId', 'listSources', 'getFromInchi']
19 | parser.add_argument("op", choices=ops, help='OPERATION')
20 | parser.add_argument("--i", dest="ifile", help="Input IDs")
21 | parser.add_argument("--o", dest="ofile", help="Output (TSV)")
22 | parser.add_argument("--ids", help="Input IDs (comma-separated)")
23 | parser.add_argument("--inchi_representation", choices=INCHI_REPRESENTATIONS, default="inchi", help=(f"[{'|'.join(INCHI_REPRESENTATIONS)}]"))
24 | parser.add_argument("--src_id_in", type=int, help="")
25 | parser.add_argument("--src_id_out", type=int, help="")
26 | parser.add_argument("--search_components", action="store_true", help="InChI search option")
27 | parser.add_argument("--api_host", default=emblebi.unichem.API_HOST)
28 | parser.add_argument("--api_base_path", default=emblebi.unichem.API_BASE_PATH)
29 | parser.add_argument("--skip", type=int, help="")
30 | parser.add_argument("--nmax", type=int, help="")
31 | parser.add_argument("-v", "--verbose", default=0, action="count")
32 | args = parser.parse_args()
33 |
34 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO))
35 |
36 | api_base_url = 'https://'+args.api_host+args.api_base_path
37 |
38 | fout = open(args.ofile, "w+") if args.ofile else sys.stdout
39 |
40 | t0=time.time()
41 |
42 | ids=[]
43 | if args.ifile:
44 | fin = open(args.ifile)
45 | while True:
46 | line = fin.readline()
47 | if not line: break
48 | ids.append(line.rstrip())
49 | fin.close()
50 | logging.info(f"Input IDs: {len(ids)}")
51 | elif args.ids:
52 | ids = re.split(r'[,\s]+', args.ids)
53 |
54 | if args.op == "getFromSourceId":
55 | emblebi.unichem.Utils.GetFromSourceId(ids, args.src_id_in, args.src_id_out, args.skip, args.nmax, api_base_url, fout)
56 |
57 | elif args.op == "getFromInchi":
58 | emblebi.unichem.Utils.GetFromInchi(ids, args.inchi_representation, args.search_components, args.src_id_in, args.src_id_out, args.skip, args.nmax, api_base_url, fout)
59 |
60 | elif args.op == "listSources":
61 | emblebi.unichem.Utils.ListSources(api_base_url, fout)
62 |
63 | else:
64 | parser.error(f"Invalid operation: {args.op}")
65 |
66 | logging.info(f"Elapsed time: {time.strftime('%Hh:%Mm:%Ss',time.gmtime(time.time()-t0))}")
67 |
--------------------------------------------------------------------------------
/BioClients/mesh/Client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | MeSH XML utility functions.
4 |
5 | MeSH XML
6 | Download: https://www.nlm.nih.gov/mesh/download_mesh.html
7 | Doc: https://www.nlm.nih.gov/mesh/xml_data_elements.html
8 |
9 |
10 | 1 = Topical Descriptor.
11 | 2 = Publication Types, for example, 'Review'.
12 | 3 = Check Tag, e.g., 'Male' (no tree number)
13 | 4 = Geographic Descriptor (Z category of tree number).
14 |
15 | Category "C" : Diseases
16 | Category "F" : Psychiatry and Psychology
17 | Category "F03" : Mental Disorders
18 | Thus, include "C*" and "F03*" only.
19 | Terms can have multiple TreeNumbers; diseases can be in non-disease cateories, in addition to a disease category.
20 | """
21 | ###
22 | import sys,os,re,argparse,logging
23 |
24 | from .. import mesh
25 |
26 | BRANCHES={
27 | 'A':'Anatomy',
28 | 'B':'Organisms',
29 | 'C':'Diseases',
30 | 'D':'Chemicals and Drugs',
31 | 'E':'Analytical, Diagnostic and Therapeutic Techniques, and Equipment',
32 | 'F':'Psychiatry and Psychology',
33 | 'G':'Phenomena and Processes',
34 | 'H':'Disciplines and Occupations',
35 | 'I':'Anthropology, Education, Sociology, and Social Phenomena',
36 | 'J':'Technology, Industry, and Agriculture',
37 | 'K':'Humanities',
38 | 'L':'Information Science',
39 | 'M':'Named Groups',
40 | 'N':'Health Care',
41 | 'V':'Publication Characteristics',
42 | 'Z':'Geographicals'}
43 |
44 | #############################################################################
45 | if __name__=='__main__':
46 | BRANCH='C'
47 | EPILOG = f"""
48 | operations:
49 | desc2csv: descriptors XML input;
50 | supp2csv: supplementary records XML input;
51 | Branches:
52 | {"; ".join([f"{k}: {BRANCHES[k]}" for k in sorted(BRANCHES.keys())])}
53 | """
54 | parser = argparse.ArgumentParser(description='MeSH XML utility', epilog=EPILOG)
55 | ops=['desc2csv', 'supp2csv']
56 | parser.add_argument("op", choices=ops, help='operation')
57 | parser.add_argument("--i", dest="ifile", help="input MeSH XML file [stdin]")
58 | parser.add_argument("--o", dest="ofile", help="output file (TSV)")
59 | parser.add_argument("--branch", choices=BRANCHES, default=BRANCH, help="top-level branch of MeSH tree")
60 | parser.add_argument("--force", action="store_true", help="ignore UTF-8 encoding errors")
61 | parser.add_argument("-v", "--verbose", default=0, action="count")
62 | args = parser.parse_args()
63 |
64 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO))
65 |
66 | fin = open(args.ifile, "r") if args.ifile else sys.stdin
67 |
68 | fout = open(args.ofile, "w") if args.ofile else sys.stdout
69 |
70 | if args.op == "desc2csv":
71 | mesh.Desc2Csv(args.branch, fin, fout)
72 |
73 | elif args.op == "supp2csv":
74 | mesh.Supp2Csv(args.branch, fin, fout)
75 |
76 | else:
77 | parser.error(f"Invalid operation: {args.op}")
78 |
--------------------------------------------------------------------------------
/BioClients/pubtator/Client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Pubtator REST API client
4 | https://www.ncbi.nlm.nih.gov/CBBresearch/Lu/Demo/tmTools/RESTfulAPIs.html
5 | Formats: JSON, PubTator, BioC.
6 |
7 | Nomenclatures:
8 | Gene : NCBI Gene
9 | e.g. https://www.ncbi.nlm.nih.gov/sites/entrez?db=gene&term=145226
10 | Disease : MEDIC (CTD, CTD_diseases.csv)
11 | e.g. http://ctdbase.org/basicQuery.go?bqCat=disease&bq=C537775
12 | Chemical : MESH
13 | e.g. http://www.nlm.nih.gov/cgi/mesh/2014/MB_cgi?field=uid&term=D000596
14 | Species : NCBI Taxonomy
15 | e.g. https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?name=10090
16 | Mutation : tmVar
17 | https://www.ncbi.nlm.nih.gov/CBBresearch/Lu/Demo/PubTator/tutorial/tmVar.html
18 |
19 | NOTE that the API does NOT provide keyword search capability like
20 | webapp https://www.ncbi.nlm.nih.gov/CBBresearch/Lu/Demo/PubTator/index.cgi
21 | """
22 | import sys,os,time,json,argparse,re,logging
23 | #
24 | from .. import pubtator
25 | #
26 | API_HOST="www.ncbi.nlm.nih.gov"
27 | API_BASE_PATH="/CBBresearch/Lu/Demo/RESTful/tmTool.cgi"
28 | #
29 | #############################################################################
30 | if __name__=='__main__':
31 | parser = argparse.ArgumentParser(description='PubTator REST API client', epilog='Reports PubMed NER annotations for specified PMID[s].')
32 | ops=['get_annotations']
33 | modes = ['Gene', 'Chemical', 'BioConcept']
34 | parser.add_argument("op", choices=ops, help="operation")
35 | parser.add_argument("--mode", choices=modes, help='mode', default='BioConcept')
36 | parser.add_argument("--ids", help="PubMed IDs, comma-separated (ex:25533513)")
37 | parser.add_argument("--i", dest="ifile", help="input file, PubMed IDs")
38 | parser.add_argument("--nmax", help="list: max to return")
39 | parser.add_argument("--api_host", default=API_HOST)
40 | parser.add_argument("--api_base_path", default=API_BASE_PATH)
41 | parser.add_argument("--o", dest="ofile", help="output (TSV)")
42 | parser.add_argument("-v", "--verbose", default=0, action="count")
43 | args = parser.parse_args()
44 |
45 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO))
46 |
47 | BASE_URL='https://'+args.api_host+args.api_base_path
48 |
49 | fout = open(args.ofile, "w+") if args.ofile else sys.stdout
50 |
51 | ids=[];
52 | if args.ifile:
53 | fin = open(args.ifile)
54 | while True:
55 | line = fin.readline()
56 | if not line: break
57 | ids.append(line.rstrip())
58 | logging.info('Input IDs: %d'%(len(ids)))
59 | fin.close()
60 | elif args.ids:
61 | ids = re.split(r'[\s,]+', args.ids.strip())
62 |
63 | if args.op == 'get_annotations':
64 | if not ids: logging.error('Input PMIDs required.')
65 | pubtator.GetAnnotations(BASE_URL, args.mode, ids, fout)
66 |
67 | else:
68 | logging.error('Invalid operation: {0}'.format(args.op))
69 |
--------------------------------------------------------------------------------
/BioClients/oncotree/Utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Utility functions for Oncotree REST API.
4 | See: https://oncotree.mskcc.org/
5 | See: https://oncotree.mskcc.org/#/home
6 | """
7 | import sys,os,re,requests,urllib,json,time,logging,tqdm
8 | import pandas as pd
9 |
10 | #
11 | API_HOST="oncotree.mskcc.org"
12 | API_BASE_PATH="/api"
13 | BASE_URL="https://"+API_HOST+API_BASE_PATH
14 | #
15 | HEADERS={"Accept":"application/json"}
16 | #
17 | ##############################################################################
18 | def Info(base_url=BASE_URL, fout=None):
19 | response = requests.get(base_url+"/info", headers=HEADERS)
20 | result = response.json()
21 | logging.debug(json.dumps(result, sort_keys=True, indent=2))
22 |
23 | ##############################################################################
24 | def ListVersions(base_url=BASE_URL, fout=None):
25 | response = requests.get(base_url+'/versions', headers=HEADERS)
26 | result = response.json()
27 | logging.debug(json.dumps(result, sort_keys=True, indent=2))
28 | versions = result
29 | df = pd.DataFrame.from_records(versions)
30 | if fout: df.to_csv(fout, sep="\t", index=False)
31 | logging.info(f"Versions: {len(versions)}")
32 | return df
33 |
34 | ##############################################################################
35 | def ListMainTypes(base_url=BASE_URL, fout=None):
36 | response = requests.get(base_url+'/mainTypes', headers=HEADERS)
37 | result = response.json()
38 | logging.debug(json.dumps(result, sort_keys=True, indent=2))
39 | maintypes = result
40 | maintypes.sort()
41 | df = pd.DataFrame({"main_types": maintypes})
42 | if fout: df.to_csv(fout, sep="\t", index=False)
43 | logging.info(f"Main types: {len(maintypes)}")
44 | return df
45 |
46 | ##############################################################################
47 | def ListTumorTypes(base_url=BASE_URL, fout=None):
48 | response = requests.get(base_url+'/tumorTypes', headers=HEADERS)
49 | result = response.json()
50 | logging.debug(json.dumps(result, sort_keys=True, indent=2))
51 | tumortypes = result
52 | df = pd.DataFrame.from_records(tumortypes)
53 | if fout: df.to_csv(fout, sep="\t", index=False)
54 | logging.info(f"Tumor types: {len(tumortypes)}")
55 | return df
56 |
57 | ##############################################################################
58 | def SearchTumorTypes(qry, qtype, exact, levels, base_url=BASE_URL, fout=None):
59 | response = requests.get(base_url+f"/tumorTypes/search/{qtype}/{qry}?exactMatch={str(exact)}&levels={urllib.parse.quote(levels)}", headers=HEADERS)
60 | result = response.json()
61 | logging.debug(json.dumps(result, sort_keys=True, indent=2))
62 | tumortypes = result
63 | df = pd.DataFrame.from_records(tumortypes)
64 | if fout: df.to_csv(fout, sep="\t", index=False)
65 | logging.info(f"Tumor types: {len(tumortypes)}")
66 | return df
67 |
68 | ##############################################################################
69 |
--------------------------------------------------------------------------------
/BioClients/badapple/Client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | https://chiltepin.health.unm.edu/badapple2/apidocs/
4 | """
5 | ###
6 | import sys,os,re,argparse,time,logging
7 | #
8 | from .. import badapple
9 | #
10 | ##############################################################################
11 | if __name__=='__main__':
12 | parser = argparse.ArgumentParser(
13 | description='Badapple REST API client utility',
14 | epilog="""\
15 | Example SMILES: OC(=O)C1=C2CCCC(C=C3C=CC(=O)C=C3)=C2NC2=CC=CC=C12
16 | Example scaffold IDs: 46,50
17 | """)
18 | ops = ['get_compound2scaffolds', 'get_scaffold_info', 'get_scaffold2compounds', 'get_scaffold2drugs', ]
19 | parser.add_argument("op",choices=ops,help='OPERATION')
20 | parser.add_argument("--smi", dest="smi", help="input SMILES")
21 | parser.add_argument("--ids", dest="ids", help="input IDs, comma-separated")
22 | parser.add_argument("--i", dest="ifile", help="input SMILES file (with optional appended NAME), or input IDs file")
23 | parser.add_argument("--db", choices=badapple.DATABASES, default="badapple2", help="default=badapple2")
24 | parser.add_argument("--o", dest="ofile", help="output file (TSV)")
25 | parser.add_argument("--max_rings", type=int, default=10, help="max rings")
26 | parser.add_argument("--api_host", default=badapple.API_HOST)
27 | parser.add_argument("--api_base_path", default=badapple.API_BASE_PATH)
28 | parser.add_argument("-v", "--verbose", action="count", default=0)
29 |
30 | args = parser.parse_args()
31 |
32 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO))
33 |
34 | base_url='https://'+args.api_host+args.api_base_path
35 |
36 | fout = open(args.ofile,"w") if args.ofile else sys.stdout
37 |
38 | t0=time.time()
39 |
40 | if args.ifile:
41 | fin = open(args.ifile)
42 | ids = [];
43 | while True:
44 | line = fin.readline()
45 | if not line: break
46 | if line.rstrip(): ids.append(line.rstrip())
47 | logging.info(f"Input SMILES: {len(ids)}")
48 | fin.close()
49 | elif args.ids:
50 | ids = re.split(r'[, ]+', args.ids.strip())
51 | elif args.smi:
52 | ids = [args.smi.strip()]
53 |
54 | if args.op == "get_compound2scaffolds":
55 | badapple.GetCompound2Scaffolds(ids, args.db, args.max_rings, base_url, fout)
56 |
57 | elif args.op == "get_scaffold_info":
58 | badapple.GetScaffoldInfo(ids, args.db, base_url, fout)
59 |
60 | elif args.op == "get_scaffold2compounds":
61 | badapple.GetScaffold2Compounds(ids, args.db, base_url, fout)
62 |
63 | elif args.op == "get_scaffold2drugs":
64 | badapple.GetScaffold2Drugs(ids, args.db, base_url, fout)
65 |
66 | elif args.op == "get_version":
67 | #badapple.GetVersion(args.db, base_url, fout)
68 | parser.error("Not implemented.")
69 |
70 | else:
71 | parser.error("No operation specified.")
72 |
73 | logging.info(f"elapsed time: {time.strftime('%Hh:%Mm:%Ss',time.gmtime(time.time()-t0))}")
74 |
--------------------------------------------------------------------------------
/BioClients/ensembl/biomart/Client.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | """
3 | Access to Ensembl BIOMART REST API.
4 | https://m.ensembl.org/info/data/biomart/biomart_restful.html
5 | """
6 | import sys,os,re,argparse,time,logging
7 |
8 | from ... import ensembl
9 | #
10 | ##############################################################################
11 | def DemoXMLQuery(base_url, fout):
12 | xmltext = """\
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 | """
26 | ensembl.biomart.Utils.XMLQuery(xmltext, base_url, fout)
27 |
28 | ##############################################################################
29 | if __name__=='__main__':
30 | parser = argparse.ArgumentParser(prog=sys.argv[0], description="Ensembl BIOMART REST API client", epilog="For XML query file format, see https://m.ensembl.org/info/data/biomart/biomart_restful.html#biomartxml")
31 | ops = ["xmlQuery", "ensg2ncbi", "ensg2hgnc", "ensg2ncbihgnc", "demo", "show_version"]
32 | parser.add_argument("op", choices=ops, help='operation')
33 | parser.add_argument("--ixml", dest="ixmlfile", help="input file, XML query")
34 | parser.add_argument("--api_host", default=ensembl.biomart.API_HOST)
35 | parser.add_argument("--api_base_path", default=ensembl.biomart.API_BASE_PATH)
36 | parser.add_argument("--o", dest="ofile", help="output (TSV)")
37 | parser.add_argument("-v", "--verbose", action="count", default=0)
38 | args = parser.parse_args()
39 |
40 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO))
41 |
42 | base_url='http://'+args.api_host+args.api_base_path
43 |
44 | fout = open(args.ofile, "w") if args.ofile else sys.stdout
45 |
46 | t0=time.time()
47 |
48 | if args.op=='xmlQuery':
49 | if not args.ixmlfile:
50 | parser.error(f"Input XML file required for {args.op}")
51 | sys.exit(1)
52 | xmltext = open(args.ixmlfile).read()
53 | ensembl.biomart.Utils.XMLQuery(xmltext, base_url, fout)
54 |
55 | elif args.op=='ensg2ncbi':
56 | ensembl.biomart.Utils.ENSG2NCBI(base_url, fout)
57 |
58 | elif args.op=='ensg2hgnc':
59 | ensembl.biomart.Utils.ENSG2HGNC(base_url, fout)
60 |
61 | elif args.op=='ensg2ncbihgnc':
62 | ensembl.biomart.Utils.ENSG2NCBIHGNC(base_url, fout)
63 |
64 | elif args.op=='demo':
65 | DemoXMLQuery(base_url, fout)
66 |
67 | else:
68 | parser.error(f'Invalid operation: {args.op}')
69 |
70 | logging.info(('%s: elapsed time: %s'%(os.path.basename(sys.argv[0]), time.strftime('%Hh:%Mm:%Ss', time.gmtime(time.time()-t0)))))
71 |
--------------------------------------------------------------------------------