├── BioClients ├── cfde │ ├── __init__.py │ └── cfchemdb │ │ └── __init__.py ├── dnorm │ └── __init__.py ├── omim │ └── __init__.py ├── humanbase │ └── __init__.py ├── maayanlab │ ├── __init__.py │ ├── archs4 │ │ ├── __init__.py │ │ ├── Utils.py │ │ └── Client.py │ └── harmonizome │ │ ├── __init__.py │ │ ├── Client.py │ │ └── Utils.py ├── medline │ ├── __init__.py │ ├── connect │ │ ├── __init__.py │ │ └── Utils.py │ └── genetics │ │ └── __init__.py ├── panther │ └── __init__.py ├── pubchem │ ├── rdf │ │ └── __init__.py │ ├── ftp │ │ ├── __init__.py │ │ └── Client.py │ ├── soap │ │ └── __init__.py │ └── __init__.py ├── cdc │ ├── __init__.py │ ├── Utils.py │ └── Client.py ├── chebi │ └── __init__.py ├── fda │ ├── __init__.py │ └── aer │ │ └── __init__.py ├── gtex │ ├── __init__.py │ └── Client.py ├── hugo │ └── __init__.py ├── icite │ ├── __init__.py │ ├── Client.py │ └── Utils.py ├── lincs │ ├── __init__.py │ └── sigcom │ │ ├── __init__.py │ │ ├── Utils.py │ │ └── Client.py ├── mesh │ ├── __init__.py │ └── Client.py ├── ncats │ ├── __init__.py │ └── gsrs │ │ ├── __init__.py │ │ └── Client.py ├── ncbo │ ├── __init__.py │ ├── Utils.py │ └── Client.py ├── pdb │ ├── __init__.py │ └── Client.py ├── tcga │ ├── __init__.py │ └── Client.py ├── ubkg │ └── __init__.py ├── umls │ └── __init__.py ├── amp │ ├── t2d │ │ ├── __init__.py │ │ ├── Utils.py │ │ └── Client.py │ └── __init__.py ├── badapple │ ├── __init__.py │ └── Client.py ├── bindingdb │ ├── __init__.py │ ├── Utils.py │ └── Client.py ├── biogrid │ └── __init__.py ├── brenda │ └── __init__.py ├── chembl │ └── __init__.py ├── disgenet │ └── __init__.py ├── glygen │ ├── __init__.py │ └── Client.py ├── hubmap │ ├── __init__.py │ ├── Utils.py │ └── Client.py ├── idg │ ├── pharos │ │ └── __init__.py │ ├── rss │ │ ├── __init__.py │ │ ├── Utils.py │ │ └── Client.py │ ├── tcrd │ │ └── __init__.py │ ├── tiga │ │ └── __init__.py │ ├── tinx │ │ └── __init__.py │ ├── __init__.py │ └── Client.py ├── iuphar │ └── __init__.py ├── jensenlab │ ├── __init__.py │ ├── Client.py │ └── Utils.py ├── monarch │ └── __init__.py ├── mygene │ ├── __init__.py │ ├── Utils.py │ └── Client.py ├── oncotree │ ├── __init__.py │ └── Utils.py ├── openphacts │ └── __init__.py ├── pubtator │ ├── __init__.py │ ├── Utils.py │ └── Client.py ├── rxnorm │ └── __init__.py ├── stringdb │ └── __init__.py ├── uniprot │ ├── __init__.py │ ├── Utils.py │ └── Client.py ├── util │ ├── db │ │ ├── __init__.py │ │ └── Utils.py │ ├── hdf │ │ ├── __init__.py │ │ └── Utils.py │ ├── neo4j │ │ ├── __init__.py │ │ ├── Utils.py │ │ └── App.py │ ├── obo │ │ ├── __init__.py │ │ ├── App.py │ │ └── Utils.py │ ├── owl │ │ ├── __init__.py │ │ ├── Utils.py │ │ └── App.py │ ├── rdf │ │ ├── __init__.py │ │ ├── Utils.py │ │ └── App.py │ ├── rest │ │ └── __init__.py │ ├── xml │ │ └── __init__.py │ ├── yaml │ │ ├── __init__.py │ │ └── Utils.py │ ├── graphql │ │ ├── __init__.py │ │ └── Utils.py │ ├── igraph │ │ └── __init__.py │ ├── pandas │ │ ├── __init__.py │ │ └── Csv2Markdown.py │ ├── sparql │ │ └── __init__.py │ └── __init__.py ├── wikidata │ ├── __init__.py │ ├── Client.py │ └── Utils.py ├── allen │ ├── brain │ │ └── __init__.py │ └── __init__.py ├── biomarkerkb │ ├── __init__.py │ ├── Utils.py │ └── Client.py ├── bioregistry │ ├── __init__.py │ ├── Utils.py │ └── Client.py ├── clinicaltrials │ └── __init__.py ├── drugcentral │ ├── __init__.py │ └── Test.py ├── emblebi │ ├── __init__.py │ ├── unichem │ │ ├── __init__.py │ │ └── Client.py │ └── identifiers │ │ └── __init__.py ├── ensembl │ ├── biomart │ │ ├── __init__.py │ │ └── Client.py │ └── __init__.py ├── geneontology │ ├── __init__.py │ ├── Utils.py │ └── Client.py ├── gwascatalog │ └── __init__.py ├── opentargets │ └── __init__.py ├── wikipathways │ ├── __init__.py │ └── Utils.py ├── reactome │ ├── __init__.py │ └── SMBL_utils.py ├── cas │ ├── __init__.py │ ├── Client.py │ └── Utils.py ├── chem2bio2rdf │ ├── slap │ │ └── __init__.py │ └── __init__.py ├── __init__.py ├── chemidplus │ ├── __init__.py │ └── Client.py ├── entrez │ ├── __init__.py │ ├── Utils.py │ └── Client.py └── pubmed │ ├── __init__.py │ └── Client.py ├── doc ├── panther.md ├── hugo.md ├── images │ └── BioClients_logo.png ├── geneontology.md ├── entrez.md ├── pdb.md ├── monarch.md ├── brenda.md ├── biogrid.md ├── iuphar.md ├── cdc.md ├── icite.md ├── tcga.md ├── bindingdb.md ├── cas.md ├── lincs.md ├── mygene.md ├── wikipathways.md ├── ncbo.md ├── glygen.md ├── chemidplus.md ├── dnorm.md ├── fda.md ├── allen.md ├── disgenet.md ├── oncotree.md ├── amp_t2d.md ├── uniprot.md ├── maayanlab.md ├── biomarkerkb.md ├── jensenlab.md ├── humanbase.md ├── omim.md ├── chem2bio2rdf.md ├── opentargets.md ├── mesh.md ├── pubtator.md ├── reactome.md ├── gtex.md ├── bioregistry.md ├── ncats.md ├── cfde.md ├── chebi.md ├── ensembl.md ├── clinicaltrials.md ├── wikidata.md ├── badapple.md ├── stringdb.md ├── medline.md ├── pubmed.md ├── gwascatalog.md ├── rxnorm.md ├── ubkg.md └── chembl.md ├── setup.py └── .gitignore /BioClients/cfde/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /BioClients/dnorm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /BioClients/omim/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /BioClients/humanbase/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /BioClients/maayanlab/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /BioClients/medline/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /BioClients/panther/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /BioClients/pubchem/rdf/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /BioClients/cdc/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/chebi/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/fda/__init__.py: -------------------------------------------------------------------------------- 1 | from .aer import * 2 | -------------------------------------------------------------------------------- /BioClients/gtex/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/hugo/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/icite/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/lincs/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/mesh/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/ncats/__init__.py: -------------------------------------------------------------------------------- 1 | from .gsrs import * 2 | -------------------------------------------------------------------------------- /BioClients/ncbo/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/pdb/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/tcga/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/ubkg/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/umls/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/amp/t2d/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/badapple/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/bindingdb/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/biogrid/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/brenda/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/chembl/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/disgenet/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/fda/aer/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/glygen/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/hubmap/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/idg/pharos/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/idg/rss/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/idg/tcrd/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/idg/tiga/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/idg/tinx/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/iuphar/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/jensenlab/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/monarch/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/mygene/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/ncats/gsrs/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/oncotree/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/openphacts/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/pubtator/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/rxnorm/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/stringdb/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/uniprot/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/util/db/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/util/hdf/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/util/neo4j/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/util/obo/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/util/owl/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/util/rdf/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/util/rest/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/util/xml/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/util/yaml/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/wikidata/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/allen/brain/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/biomarkerkb/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/bioregistry/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/cfde/cfchemdb/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/clinicaltrials/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/drugcentral/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/emblebi/__init__.py: -------------------------------------------------------------------------------- 1 | from .identifiers import * 2 | -------------------------------------------------------------------------------- /BioClients/emblebi/unichem/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/ensembl/biomart/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/geneontology/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/gwascatalog/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/lincs/sigcom/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/medline/connect/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/opentargets/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/pubchem/ftp/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/pubchem/soap/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/util/graphql/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/util/igraph/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/util/pandas/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/util/sparql/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/wikipathways/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/emblebi/identifiers/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/maayanlab/archs4/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/medline/genetics/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /doc/panther.md: -------------------------------------------------------------------------------- 1 | # `BioClients.panther` 2 | 3 | ## Panther 4 | 5 | -------------------------------------------------------------------------------- /BioClients/maayanlab/harmonizome/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | -------------------------------------------------------------------------------- /BioClients/ensembl/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | 3 | __all__ = [ "biomart" ] 4 | -------------------------------------------------------------------------------- /BioClients/reactome/__init__.py: -------------------------------------------------------------------------------- 1 | from .Utils import * 2 | #from .SMBL_utils import * 3 | -------------------------------------------------------------------------------- /doc/hugo.md: -------------------------------------------------------------------------------- 1 | # `BioClients.hugo` 2 | 3 | ## HUGO 4 | 5 | * 6 | -------------------------------------------------------------------------------- /BioClients/cas/__init__.py: -------------------------------------------------------------------------------- 1 | """Client tools for CAS web services.""" 2 | 3 | from .Utils import * 4 | 5 | -------------------------------------------------------------------------------- /BioClients/chem2bio2rdf/slap/__init__.py: -------------------------------------------------------------------------------- 1 | """Client tools for SLAP REST API.""" 2 | from .Utils import * 3 | -------------------------------------------------------------------------------- /BioClients/__init__.py: -------------------------------------------------------------------------------- 1 | """Python package for access to online biomedical resources, usually via REST APIs.""" 2 | -------------------------------------------------------------------------------- /BioClients/allen/__init__.py: -------------------------------------------------------------------------------- 1 | """Client tools for Allen Institute web services.""" 2 | 3 | __all__ = [ "brain" ] 4 | -------------------------------------------------------------------------------- /doc/images/BioClients_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jeremyjyang/BioClients/HEAD/doc/images/BioClients_logo.png -------------------------------------------------------------------------------- /doc/geneontology.md: -------------------------------------------------------------------------------- 1 | # `BioClients.geneontology` 2 | 3 | ## Gene Ontology 4 | 5 | * 6 | -------------------------------------------------------------------------------- /BioClients/chemidplus/__init__.py: -------------------------------------------------------------------------------- 1 | """Client tools for NLM ChemIdPlus web services.""" 2 | 3 | from .Utils import * 4 | 5 | -------------------------------------------------------------------------------- /BioClients/idg/__init__.py: -------------------------------------------------------------------------------- 1 | """Access IDG APIs.""" 2 | 3 | from .Utils import * 4 | 5 | __all__ = [ "rss", "tcrd", "tinx" ] 6 | -------------------------------------------------------------------------------- /doc/entrez.md: -------------------------------------------------------------------------------- 1 | # `BioClients.entrez` 2 | 3 | ## NIH NCBI Entrez 4 | 5 | NIH NCBI Entrez E-Utilities client via Entrezpy. 6 | -------------------------------------------------------------------------------- /BioClients/entrez/__init__.py: -------------------------------------------------------------------------------- 1 | """Client tools for NIH NCBI Entrez E-Utilities services, via Entrezpy.""" 2 | 3 | from .Utils import * 4 | -------------------------------------------------------------------------------- /doc/pdb.md: -------------------------------------------------------------------------------- 1 | # `BioClients.pdb` 2 | 3 | ## PDB 4 | 5 | Utility for PDB REST API. 6 | 7 | * 8 | -------------------------------------------------------------------------------- /BioClients/amp/__init__.py: -------------------------------------------------------------------------------- 1 | """Client tools for AMP project web services.""" 2 | 3 | #from .Utils import * 4 | 5 | __all__ = [ "t2d" ] 6 | -------------------------------------------------------------------------------- /BioClients/pubmed/__init__.py: -------------------------------------------------------------------------------- 1 | """Client tools for PubMed web services, and NCBI-PubMed XML processing.""" 2 | 3 | from .Utils import * 4 | -------------------------------------------------------------------------------- /doc/monarch.md: -------------------------------------------------------------------------------- 1 | # `BioClients.monarch` 2 | 3 | ## Monarch Initiative 4 | 5 | * 6 | -------------------------------------------------------------------------------- /BioClients/pubchem/__init__.py: -------------------------------------------------------------------------------- 1 | """Client tools for PubChem web services.""" 2 | 3 | from .Utils import * 4 | 5 | __all__ = [ "ftp", "rdf", "soap" ] 6 | -------------------------------------------------------------------------------- /BioClients/util/__init__.py: -------------------------------------------------------------------------------- 1 | """Miscellaneous utilities for web service clients.""" 2 | 3 | __all__ = [ "pandas", "rest", "sparql", "xml", "yaml" ] 4 | -------------------------------------------------------------------------------- /doc/brenda.md: -------------------------------------------------------------------------------- 1 | # `BioClients.brenda` 2 | 3 | ## BRENDA 4 | 5 | * 6 | * 7 | 8 | -------------------------------------------------------------------------------- /doc/biogrid.md: -------------------------------------------------------------------------------- 1 | # `BioClients.biogrid` 2 | 3 | ## BioGrid 4 | 5 | * 6 | * 7 | 8 | -------------------------------------------------------------------------------- /doc/iuphar.md: -------------------------------------------------------------------------------- 1 | # `BioClients.iuphar` 2 | 3 | ## IUPHAR, a.k.a, Guide to Pharmacology 4 | 5 | * 6 | 7 | -------------------------------------------------------------------------------- /BioClients/chem2bio2rdf/__init__.py: -------------------------------------------------------------------------------- 1 | """Client tools for Chem2Bio2RDF (PostgreSql db) and SLAP (REST API).""" 2 | 3 | from .Utils import * 4 | 5 | __all__ = [ "slap" ] 6 | -------------------------------------------------------------------------------- /doc/cdc.md: -------------------------------------------------------------------------------- 1 | # `BioClients.cdc` 2 | 3 | ## CDC 4 | 5 | CDC REST API client 6 | 7 | * 8 | * 9 | -------------------------------------------------------------------------------- /doc/icite.md: -------------------------------------------------------------------------------- 1 | # `BioClients.icite` 2 | 3 | # iCite 4 | 5 | PubMed iCite REST API client 6 | 7 | * (PubMed)[https://pubmed.ncbi.nlm.nih.gov/] 8 | * (iCite)[https://icite.od.nih.gov/] 9 | -------------------------------------------------------------------------------- /doc/tcga.md: -------------------------------------------------------------------------------- 1 | # `BioClients.tcga` 2 | 3 | ## TCGA (The Cancer Gene Atlas) 4 | 5 | * 6 | * 7 | 8 | -------------------------------------------------------------------------------- /doc/bindingdb.md: -------------------------------------------------------------------------------- 1 | # `BioClients.bindingdb` 2 | 3 | ## BindingDb 4 | 5 | BindingDb REST API client 6 | 7 | * 8 | * 9 | -------------------------------------------------------------------------------- /doc/cas.md: -------------------------------------------------------------------------------- 1 | # `BioClients.cas` 2 | 3 | ## CAS 4 | 5 | CAS Common Chemistry REST API client 6 | 7 | * 8 | * 9 | -------------------------------------------------------------------------------- /doc/lincs.md: -------------------------------------------------------------------------------- 1 | # `BioClients.lincs` 2 | 3 | ## LINCS 4 | 5 | LINCS REST API client 6 | 7 | New (2019) iLINCS: 8 | 9 | * 10 | * 11 | 12 | -------------------------------------------------------------------------------- /doc/mygene.md: -------------------------------------------------------------------------------- 1 | # `BioClients.mygene` 2 | 3 | ## MyGene 4 | 5 | Access to MyGene REST API. 6 | 7 | * 8 | * 9 | 10 | ``` 11 | python3 -m BioClients.mygene.Client -h 12 | ``` 13 | -------------------------------------------------------------------------------- /doc/wikipathways.md: -------------------------------------------------------------------------------- 1 | # `BioClients.wikipathways` 2 | 3 | ## WikiPathways 4 | 5 | Access to WikiPathways REST API. 6 | 7 | * 8 | 9 | ``` 10 | python3 -m BioClients.wikipathways.Client list_pathways 11 | ``` 12 | -------------------------------------------------------------------------------- /doc/ncbo.md: -------------------------------------------------------------------------------- 1 | # `BioClients.ncbo` 2 | 3 | The National Center for Biomedical Ontology was founded as one of the National Centers for Biomedical Computing, supported by the NHGRI, the NHLBI, and the NIH Common Fund. 4 | 5 | * 6 | -------------------------------------------------------------------------------- /doc/glygen.md: -------------------------------------------------------------------------------- 1 | # `BioClients.glygen` 2 | 3 | ## GlyGen 4 | 5 | GlyGen REST API client. 6 | 7 | * 8 | * 9 | 10 | 11 | ## Example commands 12 | 13 | ``` 14 | python3 -m BioClients.glygen.Client -h 15 | ``` 16 | -------------------------------------------------------------------------------- /doc/chemidplus.md: -------------------------------------------------------------------------------- 1 | # `BioClients.chemidplus` 2 | 3 | ## ChemIdPlus 4 | 5 | NIH NLM ChemIdPlus REST API client 6 | 7 | * 8 | * 9 | * 10 | -------------------------------------------------------------------------------- /doc/dnorm.md: -------------------------------------------------------------------------------- 1 | # `BioClients.dnorm` 2 | 3 | ## DNorm 4 | 5 | NCBI CBB REST client (Computational Biology Branch) 6 | 7 | * 8 | * 9 | -------------------------------------------------------------------------------- /doc/fda.md: -------------------------------------------------------------------------------- 1 | # `BioClients.fda` 2 | 3 | ## FDA 4 | 5 | OpenFDA Adverse Event Reports REST API client. 6 | 7 | * 8 | * 9 | -------------------------------------------------------------------------------- /doc/allen.md: -------------------------------------------------------------------------------- 1 | # `BioClients.allen` 2 | 3 | ## Allen Brain Atlas 4 | 5 | Allen Brain Atlas REST API client 6 | 7 | * 8 | 9 | ``` 10 | python3 -m BioClients.allen.brain.Client -h 11 | ``` 12 | 13 | Additional Allen Institute resources may be added in future. 14 | -------------------------------------------------------------------------------- /doc/disgenet.md: -------------------------------------------------------------------------------- 1 | # `BioClients.disgenet` 2 | 3 | ## DisGeNet 4 | 5 | * 6 | * 7 | * 8 | * 9 | * 10 | -------------------------------------------------------------------------------- /doc/oncotree.md: -------------------------------------------------------------------------------- 1 | # `BioClients.oncotree` 2 | 3 | ## OncoTree: A Cancer Classification System for Precision Oncology 4 | 5 | * 6 | 7 | Ref: OncoTree: A Cancer Classification System for Precision Oncology, Kundra et al., JCO Clinical Cancer Informatics, 2021, https://doi.org/10.1200/CCI.20.00108. 8 | -------------------------------------------------------------------------------- /doc/amp_t2d.md: -------------------------------------------------------------------------------- 1 | # `BioClients.amp_t2d` 2 | 3 | AMP T2D: Accelerating Medicines Partnership Type-2 Diabetes project. 4 | 5 | * 6 | * 7 | 8 | ## Usage 9 | 10 | ``` 11 | $ python3 -m BioClients.amp_t2d.Client -h 12 | ``` 13 | -------------------------------------------------------------------------------- /doc/uniprot.md: -------------------------------------------------------------------------------- 1 | # `BioClients.uniprot` 2 | 3 | ## UniProt 4 | 5 | Access to Uniprot REST API. 6 | 7 | UniprotKB = Uniprot Knowledge Base 8 | 9 | * 10 | * 11 | * 12 | 13 | ``` 14 | python3 -m BioClients.uniprot.Client --uids Q14790 getData 15 | ``` 16 | -------------------------------------------------------------------------------- /doc/maayanlab.md: -------------------------------------------------------------------------------- 1 | # `MaayanLab` 2 | 3 | ## `MaayanLab.harmonizome` 4 | 5 | * 6 | 7 | ## `MaayanLab.archs4` 8 | 9 | Process [HDF5](https://www.hdfgroup.org/) files from download page, 10 | using [h5py](https://docs.h5py.org/en/stable/index.html). 11 | 12 | * 13 | * 14 | -------------------------------------------------------------------------------- /doc/biomarkerkb.md: -------------------------------------------------------------------------------- 1 | # `BioClients.biomarkerkb` 2 | 3 | ## BiomarkerKB 4 | 5 | BiomarkerKB REST API client. 6 | 7 | * 8 | * 9 | 10 | 11 | ## Example commands 12 | 13 | ``` 14 | python -m BioClients.biomarkerkb.Client -h 15 | ``` 16 | 17 | ``` 18 | python -m BioClients.biomarkerkb.Client get_biomarker_detail --ids "AN6278-1" -v -v 19 | ``` 20 | -------------------------------------------------------------------------------- /doc/jensenlab.md: -------------------------------------------------------------------------------- 1 | # `BioClients.jensenlab` 2 | 3 | ## JensenLab 4 | 5 | * 6 | 7 | Currently focused on [DISEASES](https://diseases.jensenlab.org/). 8 | Three source channels are defined: 9 | 10 | * Experiments 11 | * Knowledge 12 | * Textmining 13 | 14 | ``` 15 | python3 -m BioClients.jensenlab.Client get_disease_genes --ids "DOID:10652" --channel "Knowledge" 16 | ``` 17 | -------------------------------------------------------------------------------- /doc/humanbase.md: -------------------------------------------------------------------------------- 1 | # `BioClients.humanbase` 2 | 3 | ## HumanBase 4 | 5 | Client to HumanBase REST API. 6 | Genome-scale Integrated Analysis of gene Networks in Tissues 7 | GIANT has moved to HumanBase (http://hb.flatironinstitute.org/). 8 | GIANT tissue networks integrate 987 genome-scale datasets, encompassing 9 | ~38,000 conditions from ~14,000 publications and include both expression and 10 | interaction measurements. 11 | 12 | * 13 | -------------------------------------------------------------------------------- /BioClients/util/yaml/Utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os,sys,logging,yaml 4 | 5 | ############################################################################# 6 | def ReadParamFile(fparam): 7 | params={}; 8 | with open(fparam, 'r') as fh: 9 | for param in yaml.load_all(fh, Loader=yaml.BaseLoader): 10 | for k,v in param.items(): 11 | params[k] = v 12 | return params 13 | 14 | ############################################################################# 15 | -------------------------------------------------------------------------------- /doc/omim.md: -------------------------------------------------------------------------------- 1 | # `BioClients.omim` 2 | 3 | ## OMIM 4 | 5 | Online Mendelian Inheritance in Man, 6 | "An Online Catalog of Human Genes and Genetic Disorders" 7 | 8 | See: 9 | 10 | The OMIM API URLs are organized in a very simple fashion: 11 | /api/[handler]?[parameters] 12 | /api/[handler]/[component]?[parameters] 13 | /api/[handler]/[action]?[parameters] 14 | The handler refers to the data object, such as an entry or a clinical synopsis. 15 | 16 | Handlers: entry, clinicalSynopsis, geneMap, search, html, dump 17 | -------------------------------------------------------------------------------- /doc/chem2bio2rdf.md: -------------------------------------------------------------------------------- 1 | # `BioClients.chem2bio2rdf` 2 | 3 | ## Chem2Bio2RDF 4 | 5 | * 6 | * 7 | 8 | Chem2Bio2RDF employs a backend PostgreSql db. This 9 | BioClients API provides 10 | programmatic access to an available db instance, which available 11 | locally (e.g. within IU intranet), or may be available via download 12 | and mirror instance. 13 | 14 | ### Database credentials 15 | 16 | Db credentials are normally stored in a configuration file at 17 | `$HOME/.c2b2r.yaml`. 18 | -------------------------------------------------------------------------------- /doc/opentargets.md: -------------------------------------------------------------------------------- 1 | # `BioClients.opentargets` 2 | 3 | ## Open Targets 4 | 5 | OpenTargets REST API client, using the Python client package 6 | `opentargets`. 7 | 8 | ``` 9 | pip3 install opentargets 10 | ``` 11 | 12 | * 13 | * 14 | * 15 | * 16 | * 17 | * 18 | * 19 | * 20 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", "r") as fh: 4 | long_description = fh.read() 5 | 6 | setuptools.setup( 7 | name="BioClients", 8 | version="0.2.31", 9 | author="Jeremy Yang", 10 | author_email="jeremyjyang@gmail.com", 11 | description="Clients for online biomedical resources, usually via REST APIs.", 12 | long_description=long_description, 13 | long_description_content_type="text/markdown", 14 | url="https://github.com/jeremyjyang/BioClients", 15 | packages=setuptools.find_packages(), 16 | classifiers=[ 17 | "Programming Language :: Python :: 3", 18 | "License :: OSI Approved :: MIT License", 19 | "Operating System :: OS Independent", 20 | ], 21 | python_requires='>=3.10', 22 | ) 23 | -------------------------------------------------------------------------------- /doc/mesh.md: -------------------------------------------------------------------------------- 1 | # `BioClients.mesh` 2 | 3 | ## MeSH 4 | 5 | From the NIH National Library of Medicine (NLM). 6 | Currently XML processing tools only. 7 | 8 | * 9 | 10 | MeSH XML utility functions. 11 | 12 | MeSH XML 13 | Download: 14 | Doc: 15 | 16 | <DescriptorRecord DescriptorClass="1"> 17 | 1 = Topical Descriptor. 18 | 2 = Publication Types, for example, 'Review'. 19 | 3 = Check Tag, e.g., 'Male' (no tree number) 20 | 4 = Geographic Descriptor (Z category of tree number). 21 | 22 | Category "C" : Diseases 23 | Category "F" : Psychiatry and Psychology 24 | Category "F03" : Mental Disorders 25 | Thus, include "C\*" and "F03\*" only. 26 | Terms can have multiple TreeNumbers; diseases can be in non-disease cateories, in addition to a disease category. 27 | -------------------------------------------------------------------------------- /BioClients/util/obo/App.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | """ 3 | Developed and tested with doid.obo (Disease Ontology). 4 | """ 5 | import sys,os,argparse,re,logging 6 | 7 | from .. import obo as util_obo 8 | 9 | ############################################################################# 10 | if __name__=='__main__': 11 | parser = argparse.ArgumentParser(description='OBO to TSV converter') 12 | parser.add_argument("--i", dest="ifile", required=True, help="input OBO file") 13 | parser.add_argument("--o", dest="ofile", help="output (TSV)") 14 | parser.add_argument("-v", "--verbose", action="count", default=0) 15 | args = parser.parse_args() 16 | 17 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO)) 18 | 19 | fin = open(args.ifile) 20 | 21 | fout = open(args.ofile, "w+") if args.ofile else sys.stdout 22 | 23 | util_obo.OBO2CSV(fin, fout) 24 | -------------------------------------------------------------------------------- /BioClients/util/graphql/Utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | ### 3 | # https://gql.readthedocs.io/en/stable/ 4 | # https://gql.readthedocs.io/en/stable/usage/basic_usage.html 5 | ### 6 | import sys,os,json,logging 7 | 8 | from gql import Client, gql 9 | from gql.transport.aiohttp import AIOHTTPTransport 10 | 11 | ############################################################################# 12 | def RunQuery(graphql, base_url, fout): 13 | if graphql is None: 14 | logging.error("No query.") 15 | return 16 | 17 | transport = AIOHTTPTransport(url=base_url) 18 | client = Client(transport=transport, fetch_schema_from_transport=True) 19 | logging.debug(f"client.schema: '{client.schema}'") 20 | 21 | try: 22 | query = gql(graphql) 23 | result = client.execute(query) 24 | fout.write(json.dumps(result, indent=2)+"\n") 25 | except Exception as e: 26 | logging.error(e) 27 | 28 | ############################################################################# 29 | 30 | 31 | -------------------------------------------------------------------------------- /doc/pubtator.md: -------------------------------------------------------------------------------- 1 | # `BioClients.pubtator` 2 | 3 | # PubTator 4 | 5 | PubMed and related NIH literature resources. 6 | 7 | * (PubMed)[https://pubmed.ncbi.nlm.nih.gov/] 8 | * (PubTator)[https://www.ncbi.nlm.nih.gov/research/pubtator/] 9 | 10 | Pubtator REST API client 11 | 12 | Formats: JSON, PubTator, BioC. 13 | 14 | Nomenclatures: 15 | Gene : NCBI Gene 16 | e.g. 17 | Disease : MEDIC (CTD, CTD\_diseases.csv) 18 | e.g. 19 | Chemical : MESH 20 | e.g. 21 | Species : NCBI Taxonomy 22 | e.g. 23 | Mutation : tmVar 24 | 25 | NOTE that the API does NOT provide keyword search capability like 26 | webapp 27 | -------------------------------------------------------------------------------- /BioClients/entrez/Utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | ''' 3 | https://pypi.org/project/entrezpy/ 4 | https://entrezpy.readthedocs.io/en/master/ 5 | https://academic.oup.com/bioinformatics/article/35/21/4511/5488119 6 | https://dataguide.nlm.nih.gov/eutilities/utilities.html 7 | ''' 8 | import os,sys,io,re,json,time,requests,urllib.parse,logging,tqdm 9 | import pandas as pd 10 | 11 | import entrezpy.conduit 12 | 13 | from .. import util 14 | 15 | ############################################################################# 16 | def Test(email): 17 | c = entrezpy.conduit.Conduit(email) 18 | fetch_influenza = c.new_pipeline() 19 | sid = fetch_influenza.add_search({'db' : 'nucleotide', 'term' : 'H3N2 [organism] AND HA', 'rettype':'count', 'sort' : 'Date Released', 'mindate': 2000, 'maxdate':2019, 'datetype' : 'pdat'}) 20 | fid = fetch_influenza.add_fetch({'retmax' : 10, 'retmode' : 'text', 'rettype': 'fasta'}, dependency=sid) 21 | c.run(fetch_influenza) 22 | 23 | 24 | ############################################################################# 25 | ############################################################################# 26 | -------------------------------------------------------------------------------- /doc/reactome.md: -------------------------------------------------------------------------------- 1 | # `BioClients.reactome` 2 | 3 | ## Reactome 4 | 5 | Client for Reactome REST API. 6 | 7 | * 8 | 9 | From [Reactome Data Model](https://reactome.org/documentation/data-model): 10 | 11 | Life on the cellular level is a network of molecular interactions. Molecules 12 | are synthesized and degraded, undergo a bewildering array of temporary and 13 | permanent modifications, are transported from one location to another, and 14 | form complexes with other molecules. Reactome represents all of this 15 | complexity as reactions in which input physical entities are converted to 16 | output entities. 17 | 18 | PhysicalEntities include individual molecules, multi-molecular complexes, and 19 | sets of molecules or complexes grouped together on the basis of shared 20 | characteristics. Molecules are further classified as genome encoded (DNA, 21 | RNA, and proteins) or not (all others). Attributes of a PhysicalEntity 22 | instance capture the chemical structure of an entity, including any covalent 23 | modifications in the case of a macromolecule, and its subcellular 24 | localization. 25 | -------------------------------------------------------------------------------- /BioClients/util/owl/Utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | OWL utility functions. 4 | https://owlready2.readthedocs.io/ 5 | """ 6 | import sys,os,re,gzip,argparse,logging 7 | 8 | import owlready2 9 | 10 | ############################################################################# 11 | def LoadOwlFile(ifile): 12 | try: 13 | onto = owlready2.get_ontology(f"file://{ifile}").load() 14 | logging.info(f"OWL ontology from {ifile} contains {len(list(onto.classes()))} classes.") 15 | except Exception as e: 16 | logging.error(e) 17 | return None 18 | return onto 19 | 20 | ############################################################################# 21 | def DescribeOwl(ifile): 22 | onto = LoadOwlFile(ifile) 23 | 24 | ############################################################################# 25 | def ValidateOwl(ifile): 26 | onto = LoadOwlFile(ifile) 27 | if onto is not None: 28 | logging.info(f"OWL file VALIDATED: {ifile}") 29 | return True 30 | else: 31 | logging.info(f"OWL file NOT VALIDATED: {ifile}") 32 | return False 33 | 34 | ############################################################################# 35 | -------------------------------------------------------------------------------- /doc/gtex.md: -------------------------------------------------------------------------------- 1 | # `BioClients.gtex` 2 | 3 | ## GTEx 4 | 5 | GTEx REST API client. 6 | 7 | * 8 | * 9 | 10 | 11 | ## Example commands 12 | 13 | ``` 14 | $ python3 -m BioClients.gtex.Client -h 15 | usage: Client.py [-h] [--ids IDS] [--i IFILE] [--o OFILE] [--dataset DATASET] 16 | [--subject SUBJECT] [--skip SKIP] [--nmax NMAX] [--api_host API_HOST] 17 | [--api_base_path API_BASE_PATH] [-v] 18 | {list_datasets,list_subjects,list_samples,get_gene_expression} 19 | 20 | GTEx REST API client 21 | 22 | positional arguments: 23 | {list_datasets,list_subjects,list_samples,get_gene_expression} 24 | OPERATION (select one) 25 | 26 | options: 27 | -h, --help show this help message and exit 28 | --ids IDS input IDs 29 | --i IFILE input file, IDs 30 | --o OFILE output (TSV) 31 | --dataset DATASET GTEx datasetId 32 | --subject SUBJECT GTEx subjectId 33 | --skip SKIP 34 | --nmax NMAX 35 | --api_host API_HOST 36 | --api_base_path API_BASE_PATH 37 | -v, --verbose 38 | ``` 39 | -------------------------------------------------------------------------------- /BioClients/util/owl/App.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | OWL utility functions. 4 | """ 5 | import sys,os,re,gzip,argparse,logging 6 | 7 | from .. import owl as util_owl 8 | 9 | ############################################################################# 10 | if __name__=="__main__": 11 | parser = argparse.ArgumentParser(description="OWL utility", epilog="") 12 | ops = [ "describe_owl", "validate_owl", ] 13 | parser.add_argument("op", choices=ops, help="OPERATION") 14 | parser.add_argument("--i", dest="ifile", help="input file (OWL)") 15 | parser.add_argument("--o", dest="ofile", help="output file") 16 | parser.add_argument("-v", "--verbose", action="count", default=0) 17 | args = parser.parse_args() 18 | 19 | logging.basicConfig(format="%(levelname)s:%(message)s", level=(logging.DEBUG if args.verbose>1 else logging.INFO)) 20 | 21 | fin = open(args.ifile, "r") if args.ifile else sys.stdin 22 | fout = open(args.ofile, "w") if args.ofile else sys.stdout 23 | 24 | if args.op == "describe_owl": 25 | util_owl.DescribeOwl(args.ifile) 26 | 27 | elif args.op == "validate_owl": 28 | util_owl.ValidateOwl(args.ifile) 29 | 30 | else: 31 | parser.error(f"Invalid operation: {args.op}") 32 | -------------------------------------------------------------------------------- /doc/bioregistry.md: -------------------------------------------------------------------------------- 1 | # `BioClients.bioregistry` 2 | 3 | ## Bioregistry 4 | 5 | * 6 | * 7 | 8 | ``` 9 | $ python3 -m BioClients.bioregistry.Client -h 10 | usage: Client.py [-h] [--i IFILE] [--ids IDS] [--o OFILE] [--etype ETYPE] 11 | [--prefix PREFIX] [--nchunk NCHUNK] [--nmax NMAX] [--skip SKIP] 12 | [--api_host API_HOST] [--api_base_path API_BASE_PATH] [-v] 13 | {list_collections,list_contexts,list_registry,list_metaregistry,list_contributors,get_reference} 14 | 15 | Bioregistry REST API client 16 | 17 | positional arguments: 18 | {list_collections,list_contexts,list_registry,list_metaregistry,list_contributors,get_reference} 19 | operation 20 | 21 | options: 22 | -h, --help show this help message and exit 23 | --i IFILE input query IDs 24 | --ids IDS input query IDs (comma-separated) 25 | --o OFILE output (TSV) 26 | --etype ETYPE evidence codes (|-separated) 27 | --prefix PREFIX CURIE prefix 28 | --nchunk NCHUNK 29 | --nmax NMAX 30 | --skip SKIP 31 | --api_host API_HOST 32 | --api_base_path API_BASE_PATH 33 | -v, --verbose 34 | ``` 35 | -------------------------------------------------------------------------------- /BioClients/reactome/SMBL_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | BRN = Biochemical Network Analysis (pybrn) 4 | """ 5 | import sys,os,re,logging 6 | import numpy 7 | 8 | from .. import reactome 9 | 10 | try: 11 | import brn 12 | except Exception as e: 13 | logging.error("pybrn not installed.") 14 | sys.exit() 15 | # 16 | API_HOST='reactomews.oicr.on.ca:8080' 17 | BASE_PATH='/ReactomeRESTfulAPI/RESTfulWS' 18 | API_BASE_URL='http://'+API_HOST+BASE_PATH 19 | # 20 | 21 | net = brn.fromSBML("data/reactome_reactions_homo_sapiens.2.sbml") 22 | 23 | logging.info('reactions: %d'%len(net.reactions)) 24 | logging.info('species: %d'%len(net.species)) 25 | logging.info('values: %d'%len(net.values)) 26 | 27 | n_reac=0; 28 | for r in net.reactions: 29 | n_reac+=1 30 | logging.info('%3d. %s'%(n_reac,net.showreact(r,printstr=False))) 31 | logging.info('n_reac: %d'%n_reac) 32 | 33 | 34 | n_spec=0; 35 | for s in net.species: 36 | n_spec+=1 37 | id_spec = re.sub(r'[^\d]','',s) 38 | spec = reactome.Utils.GetId(API_BASE_URL,id_spec,'PhysicalEntity') 39 | displayName = spec['displayName'] if 'displayName' in spec else '' 40 | schemaClass = spec['schemaClass'] if 'schemaClass' in spec else '' 41 | logging.info('%3d. %s: (%s) %s'%(n_spec,id_spec,schemaClass,displayName)) 42 | logging.info('n_spec: %d'%n_spec) 43 | -------------------------------------------------------------------------------- /BioClients/lincs/sigcom/Utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | ############################################################################# 3 | ### https://maayanlab.cloud/sigcom-lincs/#/API 4 | ############################################################################# 5 | import sys,os,re,json,requests,tqdm,logging 6 | import pandas as pd 7 | import urllib.parse 8 | # 9 | API_HOST="maayanlab.cloud" 10 | API_BASE_PATH="/sigcom-lincs/metadata-api" 11 | BASE_URL='https://'+API_HOST+API_BASE_PATH 12 | # 13 | ############################################################################# 14 | def GetResources(ids, base_url=BASE_URL, fout=None): 15 | tags=None; df=None; 16 | url_base = (base_url+'/resources') 17 | for id_this in ids: 18 | url = f"{url_base}/{urllib.parse.quote(id_this)}" 19 | response = requests.get(url) 20 | rval = response.json() 21 | logging.debug(json.dumps(rval, indent=2)) 22 | if not tags: 23 | tags = [tag for tag in rval.keys() if type(rval[tag]) not in (list, dict)] 24 | res = rval 25 | df = pd.concat([df, pd.DataFrame({tags[j]:[res[tags[j]]] for j in range(len(tags))})]) 26 | if fout: df.to_csv(fout, sep="\t", index=False) 27 | logging.info(f"IDs: {len(ids)}") 28 | return df 29 | 30 | ############################################################################# 31 | -------------------------------------------------------------------------------- /doc/ncats.md: -------------------------------------------------------------------------------- 1 | # `BioClients.ncats` 2 | 3 | ## NIH NCATS 4 | 5 | Tools for obtaining and processing data from NIH NCATS resources. 6 | 7 | * 8 | 9 | ### Global Substance Registration System (GSRS) 10 | 11 | * 12 | * 13 | * 14 | 15 | Examples: 16 | 17 | ``` 18 | $ python -m BioClients.ncats.gsrs.Client -h 19 | usage: Client.py [-h] [--i IFILE] [--o OFILE] [--ids IDS] [--query QUERY] 20 | [--api_host API_HOST] [--api_base_path API_BASE_PATH] [-v] 21 | {list_vocabularies,list_substances,search,get_substance,get_substance_names} 22 | 23 | NCATS Global Substance Registration System (GSRS) client 24 | 25 | positional arguments: 26 | {list_vocabularies,list_substances,search,get_substance,get_substance_names} 27 | OPERATION 28 | 29 | options: 30 | -h, --help show this help message and exit 31 | --i IFILE Input IDs 32 | --o OFILE Output (TSV) 33 | --ids IDS Input IDs (comma-separated) 34 | --query QUERY Search query. 35 | --api_host API_HOST 36 | --api_base_path API_BASE_PATH 37 | -v, --verbose 38 | 39 | Example search queries: IBUPRO ASPIRIN OXYTOCIN OXYTO* ASPIRIN AND ESTER COCN 40 | C=1CC=CC=C1C(=O)O 41 | ``` 42 | 43 | -------------------------------------------------------------------------------- /BioClients/biomarkerkb/Utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Utility functions for BiomarkerKB REST API. 4 | https://api.biomarkerkb.org/ 5 | 6 | """ 7 | ### 8 | import sys,os,re,json,collections,time,urllib.parse,logging,tqdm,tqdm.auto 9 | import pandas as pd 10 | import requests 11 | # 12 | NCHUNK=100 13 | # 14 | API_HOST="api.biomarkerkb.org" 15 | API_BASE_PATH="" 16 | BASE_URL="https://"+API_HOST+API_BASE_PATH 17 | # 18 | ############################################################################## 19 | def GetBiomarkerDetail(ids, skip, base_url=BASE_URL, fout=None): 20 | n_out=0; tags=None; df=None; 21 | for i in tqdm.auto.trange(len(ids), desc="IDs"): 22 | id_this = ids[i] 23 | response = requests.get(f"{base_url}/biomarker/detail/{id_this}", headers={"Accept":"application/json"}) 24 | result = response.json() 25 | logging.debug(json.dumps(result, indent=2)) 26 | if not tags: tags = [tag for tag in result.keys() if type(result[tag]) not in (list, dict, collections.OrderedDict)] 27 | df_this = pd.DataFrame({tags[j]:[result[tags[j]]] for j in range(len(tags))}) 28 | if fout is None: df = pd.concat([df, df_this]) 29 | else: df_this.to_csv(fout, sep="\t", index=False, header=bool(n_out==0)) 30 | n_out += df_this.shape[0] 31 | logging.info(f"n_out: {n_out}") 32 | return df 33 | 34 | ############################################################################## 35 | -------------------------------------------------------------------------------- /BioClients/maayanlab/archs4/Utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | ### 3 | 4 | import sys,os,time,logging 5 | import numpy as np 6 | import pandas as pd 7 | import h5py 8 | 9 | from ...util import hdf as hdf_util 10 | 11 | ############################################################################# 12 | def ListSamples(f, fout): 13 | if "meta" not in f or "samples" not in f["meta"]: 14 | logging.error("No samples found.") 15 | return 16 | #samples_title = f["meta"]["samples/title"] 17 | #print(pd.DataFrame(samples_title.asstr()[0:10,])) 18 | samples = f["meta"]["samples"] 19 | df = pd.DataFrame() 20 | for i,k in enumerate(list(samples.keys())): 21 | logging.debug(f"{i+1}. {samples[k].name}") 22 | if type(samples[k]) is h5py.Dataset: 23 | df_this = pd.DataFrame(samples[k]) 24 | df = pd.concat([df, df_this], axis=1) 25 | df = df.drop_duplicates() 26 | #if i>2: break #DEBUG 27 | df.columns = list(samples.keys()) 28 | #print(df.iloc[0:10,:]) 29 | 30 | for col, dtype in df.dtypes.items(): 31 | if dtype == np.object: # Only process object columns. 32 | # decode, or return original value if decode return Nan 33 | df[col] = df[col].str.decode('utf-8').fillna(df[col]) 34 | 35 | logging.info(f"Output rows: {df.shape[0]}; columns: {df.shape[1]}") 36 | df.to_csv(fout, sep="\t", index=False) 37 | 38 | ############################################################################# 39 | -------------------------------------------------------------------------------- /doc/cfde.md: -------------------------------------------------------------------------------- 1 | # `BioClients.cfde` 2 | 3 | ## CFDE 4 | 5 | API access for resources of the Common Fund Data Ecosystem (CFDE). 6 | 7 | * 8 | 9 | ``` 10 | $ python3 -m BioClients.cfde.cfchemdb.Client -h 11 | usage: Client.py [-h] [--i IFILE] [--ids IDS] [--xref_type XREF_TYPE] [--o OFILE] 12 | [--dbhost DBHOST] [--dbport DBPORT] [--dbname DBNAME] [--dbusr DBUSR] 13 | [--dbpw DBPW] [--param_file PARAM_FILE] [--dbschema DBSCHEMA] [-v] [-q] 14 | {list_tables,list_columns,list_tables_rowCounts,version,get_structure,list_structures,list_structures2smiles,meta_listdbs} 15 | 16 | CFChemDb PostgreSql client utility 17 | 18 | positional arguments: 19 | {list_tables,list_columns,list_tables_rowCounts,version,get_structure,list_structures,list_structures2smiles,meta_listdbs} 20 | OPERATION (select one) 21 | 22 | optional arguments: 23 | -h, --help show this help message and exit 24 | --i IFILE input ID file 25 | --ids IDS input IDs (comma-separated) 26 | --xref_type XREF_TYPE 27 | xref ID type 28 | --o OFILE output (TSV) 29 | --dbhost DBHOST 30 | --dbport DBPORT 31 | --dbname DBNAME 32 | --dbusr DBUSR 33 | --dbpw DBPW 34 | --param_file PARAM_FILE 35 | --dbschema DBSCHEMA 36 | -v, --verbose 37 | -q, --quiet Suppress progress notification. 38 | ``` 39 | -------------------------------------------------------------------------------- /BioClients/maayanlab/archs4/Client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | ### 3 | 4 | import sys,os,time,argparse,logging 5 | import numpy as np 6 | import pandas as pd 7 | import h5py 8 | 9 | from ...util import hdf 10 | from ... import maayanlab 11 | 12 | ############################################################################# 13 | if __name__=='__main__': 14 | parser = argparse.ArgumentParser(description='H5 file operations', epilog="") 15 | OPS = ['summary', 'list_samples'] 16 | parser.add_argument("op", choices=OPS, help='OPERATION') 17 | parser.add_argument("--i", dest="ifile", required=True, help="input file") 18 | parser.add_argument("--o", dest="ofile", help="output (TSV)") 19 | parser.add_argument("-v", "--verbose", dest="verbose", action="count", default=0) 20 | args = parser.parse_args() 21 | 22 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO)) 23 | 24 | t0 = time.time(); 25 | 26 | fout = open(args.ofile, "w") if args.ofile else sys.stdout 27 | 28 | finh5 = h5py.File(args.ifile, 'r') 29 | 30 | if args.op == "summary": 31 | logging.debug(list(finh5.keys())) 32 | hdf.Utils.Summary(finh5) 33 | 34 | elif args.op == "list_samples": 35 | maayanlab.archs4.ListSamples(finh5, fout) 36 | 37 | else: 38 | parser.error(f"Unsupported operation: {args.op}") 39 | 40 | logging.info(f"Elapsed: {time.strftime('%Hh:%Mm:%Ss', time.gmtime(time.time()-t0))}") 41 | 42 | -------------------------------------------------------------------------------- /BioClients/cdc/Utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | ############################################################################# 3 | ### CDC REST API client 4 | ### https://tools.cdc.gov/api/docs/info.aspx 5 | ### https://tools.cdc.gov/api/v2/resources 6 | ############################################################################# 7 | import sys,os,re,json,csv,logging 8 | import urllib.parse,requests 9 | # 10 | from ..util import rest 11 | # 12 | ############################################################################# 13 | def ListResources(base_url, resource, fout): 14 | tags=None; n_rsc=0; offset=0; nchunk=100; 15 | url=base_url+'/%s'%resource 16 | while True: 17 | url_this='%s?offset=%d&max=%d'%(url, offset, nchunk) 18 | rval=rest.GetURL(url_this, parse_json=True) 19 | try: 20 | rscs = rval['results'] 21 | pag = rval['meta']['pagination'] 22 | count = pag['count'] 23 | total = pag['total'] 24 | nextUrl = pag['nextUrl'] 25 | except: 26 | break 27 | for rsc in rscs: 28 | if not tags: 29 | tags=rsc.keys() 30 | fout.write('\t'.join(tags)+'\n') 31 | vals = [str(rsc[tag]) if tag in rsc else '' for tag in tags] 32 | fout.write('\t'.join(vals)+'\n') 33 | n_rsc+=1 34 | if count>=total or not nextUrl: 35 | break 36 | else: 37 | offset+=nchunk 38 | logging.info('n_rsc = %d (total %s)'%(n_rsc, resource)) 39 | 40 | ############################################################################# 41 | -------------------------------------------------------------------------------- /BioClients/util/rdf/Utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | RDF utility functions. 4 | https://rdflib.readthedocs.io/ 5 | https://owlready2.readthedocs.io/ 6 | """ 7 | import sys,os,re,gzip,argparse,logging 8 | 9 | import rdflib 10 | import owlready2 11 | 12 | ############################################################################# 13 | def LoadRdfFile(fin, ifmt): 14 | g = rdflib.Graph() 15 | try: 16 | g.parse(fin, format=ifmt) 17 | logging.info(f"RDF graph from {fin.name} ({ifmt}) contains {len(g)} triples.") 18 | except Exception as e: 19 | logging.error(e) 20 | return None 21 | return g 22 | 23 | ############################################################################# 24 | def ValidateRdf(fin, ifmt): 25 | g = LoadRdfFile(fin, ifmt) 26 | if g is not None: 27 | logging.info(f"RDF file VALIDATED: {fin.name} ({ifmt})") 28 | return True 29 | else: 30 | logging.info(f"RDF file NOT VALIDATED: {fin.name} ({ifmt})") 31 | return False 32 | 33 | ############################################################################# 34 | def DescribeRdf(fin, ifmt): 35 | g = LoadRdfFile(fin, ifmt) 36 | 37 | ############################################################################# 38 | def ConvertRdf(fin, ifmt, ofmt, fout): 39 | g = LoadRdfFile(fin, ifmt) 40 | fout.write(g.serialize(format=ofmt).decode("utf8")) 41 | logging.info(f"RDF graph to {fout.name} ({ofmt}) containing {len(g)} triples.") 42 | 43 | ############################################################################# 44 | -------------------------------------------------------------------------------- /doc/chebi.md: -------------------------------------------------------------------------------- 1 | # `BioClients.chebi` 2 | 3 | ## ChEBI 4 | 5 | ChEBI REST API client 6 | 7 | Tools for obtaining and processing ChEBI data. 8 | Chemical Entities of Biological Interest (ChEBI) is a freely available dictionary of molecular entities focused on ‘small’ chemical compounds. 9 | 10 | * 11 | * 12 | 13 | ``` 14 | python -m BioClients.chebi.Client -h 15 | usage: Client.py [-h] [--ids IDS] [--i IFILE] [--o OFILE] [--query QUERY] 16 | [--skip SKIP] [--nmax NMAX] [--api_host API_HOST] 17 | [--api_base_path API_BASE_PATH] [-v] 18 | {list_sources,get_entity,get_entity_names,get_entity_chemical_data,get_entity_secondary_ids,get_entity_children,get_entity_parents,get_entity_origins,search} 19 | 20 | ChEBI REST API client 21 | 22 | positional arguments: 23 | {list_sources,get_entity,get_entity_names,get_entity_chemical_data,get_entity_secondary_ids,get_entity_children,get_entity_parents,get_entity_origins,search} 24 | OPERATION (select one) 25 | 26 | options: 27 | -h, --help show this help message and exit 28 | --ids IDS input IDs 29 | --i IFILE input file, IDs 30 | --o OFILE output (TSV) 31 | --query QUERY search query (SMILES) 32 | --skip SKIP 33 | --nmax NMAX 34 | --api_host API_HOST 35 | --api_base_path API_BASE_PATH 36 | -v, --verbose 37 | 38 | Example entity IDs: 16737, 30273,33246,24433 39 | ``` 40 | -------------------------------------------------------------------------------- /doc/ensembl.md: -------------------------------------------------------------------------------- 1 | # `BioClients.ensembl` 2 | 3 | ## EnsEMBL 4 | 5 | Access to Ensembl REST API. 6 | 7 | * 8 | 9 | Including Variant Effect Predictor (VEP): 10 | 11 | * 12 | * 13 | 14 | ## `BioClients.ensembl.biomart` 15 | 16 | Also, the BIOMART ID mapping service. 17 | 18 | * 19 | 20 | ``` 21 | $ python3 -m BioClients.ensembl.Client -h 22 | usage: Client.py [-h] [--ids IDS] 23 | [--i IFILE] 24 | [--api_host API_HOST] 25 | [--api_base_path API_BASE_PATH] 26 | [--o OFILE] 27 | [--skip SKIP] 28 | [--nmax NMAX] [-v] [-q] 29 | {list_species,get_xrefs,get_info,get_vep,show_version} 30 | 31 | Ensembl REST API client 32 | 33 | positional arguments: 34 | {list_species,get_xrefs,get_info,get_vep,show_version} 35 | operation 36 | 37 | options: 38 | -h, --help show this help message and exit 39 | --ids IDS Ensembl_IDs, comma-separated (ex:ENSG00000000003), or SNP IDs, 40 | comma-separated (ex:rs56116432) 41 | --i IFILE input file, Ensembl IDs or SNP IDs 42 | --api_host API_HOST 43 | --api_base_path API_BASE_PATH 44 | --o OFILE output (TSV) 45 | --skip SKIP 46 | --nmax NMAX 47 | -v, --verbose 48 | -q, --quiet 49 | 50 | Example IDs: ENSG00000157764, ENSG00000160785 51 | ``` 52 | -------------------------------------------------------------------------------- /BioClients/mygene/Utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | https://mygene.info/ 4 | https://mygene.info/v3/api 5 | https://pypi.org/project/mygene/ 6 | """ 7 | ### 8 | # 9 | import sys,os 10 | import pandas as pd 11 | import mygene as mg 12 | # 13 | FIELDS = 'HGNC,symbol,name,taxid,entrezgene,ensemblgene' 14 | NCHUNK=100; 15 | # 16 | ############################################################################# 17 | def GetGenes(ids, fields=FIELDS, fout=None): 18 | """Get genes by Entrez or Ensembl gene ID.""" 19 | ichunk=0; n_out=0; df=None; 20 | mgi = mg.MyGeneInfo() 21 | while ichunk*NCHUNK 8 | * 9 | * 10 | * 11 | 12 | ``` 13 | $ python3 -m BioClients.clinicaltrials.Client -h 14 | usage: Client.py [-h] [--i IFILE] [--o OFILE] [--ids IDS] 15 | [--query_cond QUERY_COND] [--query_term QUERY_TERM] 16 | [--api_host API_HOST] [--api_base_path API_BASE_PATH] [-v] 17 | {version,list_study_fields,list_search_areas,search_studies,get_studies} 18 | 19 | ClinicalTrials.gov API client 20 | 21 | positional arguments: 22 | {version,list_study_fields,list_search_areas,list_enums,search_studies,get_studies} 23 | OPERATION 24 | 25 | options: 26 | -h, --help show this help message and exit 27 | --i IFILE Input NCT_IDs 28 | --o OFILE Output (TSV) 29 | --ids IDS Input NCT_IDs (comma-separated) 30 | --query_cond QUERY_COND 31 | Search query condition 32 | --query_term QUERY_TERM 33 | Search query term 34 | --api_host API_HOST 35 | --api_base_path API_BASE_PATH 36 | -v, --verbose 37 | 38 | See: https://clinicaltrials.gov/data-api/about-api, 39 | https://clinicaltrials.gov/data-api/api, https://clinicaltrials.gov/find- 40 | studies/constructing-complex-search-queries 41 | ``` 42 | -------------------------------------------------------------------------------- /doc/wikidata.md: -------------------------------------------------------------------------------- 1 | # `BioClients.wikidata` 2 | 3 | ## Wikidata 4 | 5 | Wikidata is a collaboratively edited RDF/Sparql knowledge graph 6 | used ___by___ Wikipedia. The structured infobox data in Wikipedia 7 | is from Wikidata. Not to be confused with 8 | [DBPedia](https://en.wikipedia.org/wiki/DBpedia) which is 9 | built ___from___ Wikipedia. 10 | 11 | * 12 | 13 | This module provides access to Wikidata Sparql endpoint. Focus on 14 | biomedical entities and particularly GeneWiki. 15 | 16 | * 17 | * 18 | 19 | ### GeneWiki 20 | 21 | * 22 | * 23 | 24 | ### Dependencies 25 | 26 | * [WikidataIntegrator](https://github.com/SuLab/WikidataIntegrator) 27 | 28 | ### Usage 29 | 30 | ``` 31 | $ python3 -m BioClients.wikidata.Client -h 32 | usage: Client.py [-h] [--o OFILE] [--rqfile RQFILE] [--rq RQ] [-v] 33 | {query,list_drugTargetPairs,list_geneDiseasePairs} 34 | 35 | Wikidata utilities 36 | 37 | positional arguments: 38 | {list_drugTargetPairs,list_geneDiseasePairs,query,test} 39 | OPERATION 40 | 41 | options: 42 | -h, --help show this help message and exit 43 | --o OFILE output (TSV) 44 | --rqfile RQFILE input Sparql file 45 | --rq RQ input Sparql string 46 | -v, --verbose 47 | ``` 48 | 49 | ``` 50 | $ python3 -m BioClients.wikidata.Client list_geneDiseasePairs 51 | ``` 52 | -------------------------------------------------------------------------------- /BioClients/util/rdf/App.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | RDF utility functions. 4 | """ 5 | import sys,os,re,gzip,argparse,logging 6 | 7 | import rdflib 8 | 9 | from .. import rdf as util_rdf 10 | 11 | ############################################################################# 12 | if __name__=="__main__": 13 | parser = argparse.ArgumentParser(description="RDF utility", epilog="") 14 | ops = [ "describe_rdf", "validate_rdf", "convert_rdf", ] 15 | FORMATS = ["text/turtle", "application/rdf+xml", "text/n3", ] 16 | parser.add_argument("op", choices=ops, help="OPERATION") 17 | parser.add_argument("--i", dest="ifile", help="input file (RDF)") 18 | parser.add_argument("--ifmt", choices=FORMATS, default="text/turtle", help="input RDF format") 19 | parser.add_argument("--ofmt", choices=FORMATS, default="text/turtle", help="output RDF format") 20 | parser.add_argument("--o", dest="ofile", help="output file") 21 | parser.add_argument("-v", "--verbose", action="count", default=0) 22 | args = parser.parse_args() 23 | 24 | logging.basicConfig(format="%(levelname)s:%(message)s", level=(logging.DEBUG if args.verbose>1 else logging.INFO)) 25 | 26 | fin = open(args.ifile, "r") if args.ifile else sys.stdin 27 | fout = open(args.ofile, "w") if args.ofile else sys.stdout 28 | 29 | if args.op == "describe_rdf": 30 | util_rdf.DescribeRdf(fin, args.ifmt) 31 | 32 | elif args.op == "validate_rdf": 33 | util_rdf.ValidateRdf(fin, args.ifmt) 34 | 35 | elif args.op == "convert_rdf": 36 | util_rdf.ConvertRdf(fin, args.ifmt, args.ofmt, fout) 37 | 38 | else: 39 | parser.error(f"Invalid operation: {args.op}") 40 | -------------------------------------------------------------------------------- /BioClients/hubmap/Utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Utility functions for HuBMAP REST API. 4 | https://portal.hubmapconsortium.org/apis 5 | """ 6 | ### 7 | import sys,os,re,time,json,logging,requests,tqdm 8 | import pandas as pd 9 | # 10 | API_HOST='entity.api.hubmapconsortium.org' 11 | API_BASE_PATH='' 12 | API_BASE_URL = f"https://{API_HOST}{API_BASE_PATH}" 13 | # 14 | ############################################################################# 15 | def ListEntityTypes(base_url=API_BASE_URL, fout=None): 16 | response = requests.get(f"{base_url}/entity-types") 17 | if response.status_code != 200: 18 | logging.error(f"status_code: {response.status_code}") 19 | return [] 20 | results = response.json() 21 | if fout is not None: 22 | for result in results: 23 | fout.write(f"{result}\n") 24 | logging.info(f"Entities: {len(results)}") 25 | return results 26 | 27 | ############################################################################# 28 | def GetEntity(ids, base_url=API_BASE_URL, fout=None): 29 | n_out=0; df=None; tq=None; 30 | for id_this in ids: 31 | response = requests.get(f"{base_url}/entities/{id_this}") 32 | if response.status_code != 200: 33 | logging.error(f"status_code: {response.status_code}") 34 | continue 35 | results = response.json() 36 | logging.debug(json.dumps(results, indent=2)) 37 | if fout is not None: 38 | for result in results: 39 | fout.write(f"{result}\n") 40 | n_out += len(results) 41 | logging.info(f"n_out: {n_out}") 42 | return df 43 | 44 | ############################################################################# 45 | -------------------------------------------------------------------------------- /doc/badapple.md: -------------------------------------------------------------------------------- 1 | # `Badapple` 2 | 3 | __Badapple__ : BioAssay Data Associative Promiscuity Prediction Learning Engine 4 | 5 | ## `Badapple2-API` 6 | 7 | Client for the Badapple REST API. 8 | 9 | * [Badapple2-API](https://github.com/unmtransinfo/Badapple2-API) 10 | * [Badapple2 API-Docs](https://chiltepin.health.unm.edu/badapple2/apidocs/) 11 | 12 | ``` 13 | python -m BioClients.badapple.Client -h 14 | usage: Client.py [-h] [--smi SMI] [--ids IDS] [--i IFILE] 15 | [--db {badapple2,badapple_classic}] [--o OFILE] [--max_rings MAX_RINGS] 16 | [--api_host API_HOST] [--api_base_path API_BASE_PATH] [-v] 17 | {get_compound2scaffolds,get_scaffold_info,get_scaffold2compounds,get_scaffold2drugs} 18 | 19 | Badapple REST API client utility 20 | 21 | positional arguments: 22 | {get_compound2scaffolds,get_scaffold_info,get_scaffold2compounds,get_scaffold2drugs} 23 | OPERATION 24 | 25 | options: 26 | -h, --help show this help message and exit 27 | --smi SMI input SMILES 28 | --ids IDS input IDs, comma-separated 29 | --i IFILE input SMILES file (with optional appended NAME), or input 30 | IDs file 31 | --db {badapple2,badapple_classic} 32 | default=badapple2 33 | --o OFILE output file (TSV) 34 | --max_rings MAX_RINGS 35 | max rings 36 | --api_host API_HOST 37 | --api_base_path API_BASE_PATH 38 | -v, --verbose 39 | 40 | Example SMILES: OC(=O)C1=C2CCCC(C=C3C=CC(=O)C=C3)=C2NC2=CC=CC=C12 Example scaffold IDs: 41 | 46,50 42 | ``` 43 | -------------------------------------------------------------------------------- /doc/stringdb.md: -------------------------------------------------------------------------------- 1 | # `BioClients.stringdb` 2 | 3 | ## STRINGDB 4 | 5 | ## Usage 6 | 7 | ``` 8 | $ python3 -m BioClients.stringdb.Client -h 9 | 10 | usage: Client.py [-h] [--id ID] [--ids IDS] [--idfile IDFILE] [--o OFILE] 11 | [--species SPECIES] [--minscore MINSCORE] 12 | [--netflavor {evidence,confidence,actions}] 13 | [--imgfmt {image,highres_image,svg}] [--api_host API_HOST] 14 | [--api_base_path API_BASE_PATH] [-v] 15 | {getIds,getInteractionPartners,getNetwork,getNetworkImage,getEnrichment,getPPIEnrichment,getInteractors,getActions,getAbstracts} 16 | 17 | STRING-DB REST API client utility 18 | 19 | positional arguments: 20 | {getIds,getInteractionPartners,getNetwork,getNetworkImage,getEnrichment,getPPIEnrichment,getInteractors,getActions,getAbstracts} 21 | operation 22 | 23 | optional arguments: 24 | -h, --help show this help message and exit 25 | --id ID protein ID (ex:DRD1_HUMAN) 26 | --ids IDS protein IDs, comma-separated 27 | --idfile IDFILE input file, protein IDs 28 | --o OFILE output file 29 | --species SPECIES taxon code, ex: 9606 (human) 30 | --minscore MINSCORE signifcance threshold 0-1000 31 | --netflavor {evidence,confidence,actions} 32 | network flavor 33 | --imgfmt {image,highres_image,svg} 34 | image format 35 | --api_host API_HOST 36 | --api_base_path API_BASE_PATH 37 | -v, --verbose 38 | 39 | Example protein IDs: DRD1 DRD1_HUMAN DRD2 DRD2_HUMAN ; Example species: 9606 (human, via taxon identifiers, http://www.uniprot.org/taxonomy) ; Image formats: PNG PNG_highres SVG ; MAY BE DEPRECATED: getInteractors, getActions, getAbstracts 40 | ``` 41 | 42 | -------------------------------------------------------------------------------- /BioClients/bindingdb/Utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | ############################################################################# 3 | ### BindingDb Utilities 4 | ### http://www.bindingdb.org/bind/BindingDBRESTfulAPI.jsp 5 | ### http://www.bindingdb.org/axis2/services/BDBService/getLigandsByUniprots?uniprot=P35355,Q8HZR1&cutoff=1000&code=0&response=application/json 6 | ############################################################################# 7 | import sys,os,re,time,json,logging 8 | import urllib.parse 9 | 10 | from ..util import rest 11 | # 12 | ############################################################################## 13 | def GetLigandsByUniprot(base_url, ids, ic50_max, fout): 14 | n_out=0; tags=None; 15 | for id_this in ids: 16 | rval = rest.GetURL(base_url+'/getLigandsByUniprots?uniprot=%s&cutoff=%d&response=application/json'%(id_this, ic50_max), parse_json=True) 17 | logging.debug(json.dumps(rval, sort_keys=True, indent=2)) 18 | ligands = rval["getLigandsByUniprotsResponse"]["affinities"] if "getLigandsByUniprotsResponse" in rval and "affinities" in rval["getLigandsByUniprotsResponse"] else [] 19 | for ligand in ligands: 20 | if not tags: 21 | tags = ligand.keys() 22 | fout.write("\t".join(tags)+"\n") 23 | vals = [(str(ligand[tag]) if tag in ligand else '') for tag in tags] 24 | fout.write("\t".join(vals)+"\n") 25 | logging.info("n_out: {}".format(n_out)) 26 | 27 | ############################################################################## 28 | def GetTargetsByCompound(base_url, smiles, sim_min, fout): 29 | rval = rest.GetURL(base_url+'/getTargetByCompound?smiles=%s&cutoff=%.2f'%(urllib.parse.quote(smiles), 30 | sim_min), parse_xml=True) 31 | fout.write(rval.tostring()) 32 | 33 | ############################################################################## 34 | -------------------------------------------------------------------------------- /BioClients/pubtator/Utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | ### 3 | import sys,os,time,json,argparse,re,logging 4 | # 5 | from ..util import rest 6 | # 7 | ############################################################################# 8 | def GetAnnotations(base_url, mode, pmids, fout): 9 | n_assn=0; n_hit=0; 10 | fout.write('sourcedb\tsourceid\tbegin\tend\tobj_type\tobj\n') 11 | for pmid in pmids: 12 | url = base_url+'/%s/%s/JSON'%(mode, pmid) 13 | rval = rest.GetURL(url, parse_json=True) 14 | if not rval: 15 | logging.info('not found: %s'%(pmid)) 16 | continue 17 | 18 | n_assn_this=0 19 | sources = rval if type(rval) is list else [rval] 20 | for source in sources: 21 | sourceDb = source['sourcedb'] if 'sourcedb' in source else '' 22 | sourceId = source['sourceid'] if 'sourceid' in source else '' 23 | anns = source['denotations'] if (type(source) is dict and 'denotations') in source else [] 24 | 25 | for ann in anns: 26 | obj = ann['obj'] if 'obj' in ann else None 27 | begin = ann['span']['begin'] if 'span' in ann and 'begin' in ann['span'] else '' 28 | end = ann['span']['end'] if 'span' in ann and 'end' in ann['span'] else '' 29 | if obj and begin and end: 30 | obj_type,obj_id = re.split(':', obj, 1) 31 | fout.write('%s\t%s\t%d\t%d\t%s\t%s\n'%(sourceDb, sourceId, begin, end, obj_type, obj_id)) 32 | n_assn_this+=1 33 | if n_assn_this: n_hit+=1 34 | n_assn+=n_assn_this 35 | 36 | logging.info('n_in = %d (PMIDs)'%(len(pmids))) 37 | logging.info('n_hit = %d (PMIDs with associations)'%(n_hit)) 38 | logging.info('n_miss = %d (PMIDs with NO associations)'%(len(pmids)-n_hit)) 39 | logging.info('n_assn = %d (total associations)'%(n_assn)) 40 | 41 | ############################################################################# 42 | -------------------------------------------------------------------------------- /BioClients/uniprot/Utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | ############################################################################## 3 | ### uniprot_utils.py - utility functions for access to Uniprot REST API. 4 | ### UniprotKB = Uniprot Knowledge Base 5 | ############################################################################## 6 | import sys,os,re,logging 7 | # 8 | from ..util import rest 9 | # 10 | ############################################################################# 11 | def GetData(base_uri, uids, ofmt, fout): 12 | """Need to handle xml, rdf better (merge).""" 13 | n_prot=0; n_err=0; 14 | for uid in uids: 15 | rval=rest.GetURL(base_uri+'/%s.%s'%(uid, ofmt)) 16 | if not rval: 17 | n_err+=1 18 | continue 19 | if ofmt=='tab': 20 | lines=[]; 21 | for line in rval.splitlines(): 22 | vals=re.split(r'\t', line) 23 | lines.append('\t'.join(vals)) 24 | for i,line in enumerate(lines): 25 | if n_prot>0 and i==0: continue #Skip duplicate headers. 26 | fout.write(line+'\n') 27 | else: 28 | fout.write(rval+'\n') 29 | n_prot+=1 30 | logging.info('n_in: %d; n_prot: %d; n_err: %d'%(len(uids), n_prot, n_err)) 31 | 32 | ############################################################################# 33 | def UIDs2JSON(base_uri, uids, fout): 34 | """ Uses uniprot library from Bosco Ho (https://github.com/boscoh/uniprot).""" 35 | import uniprot ## Bosco Ho (https://github.com/boscoh/uniprot) 36 | uniprot_data=uniprot.batch_uniprot_metadata(uids, None) 37 | for uid in uniprot_data.keys(): 38 | for key in uniprot_data[uid].keys(): 39 | if key in ('accs', 'sequence', 'go', 'description'): #keep simple 40 | del uniprot_data[uid][key] 41 | json_txt=json.dumps(uniprot_data, sort_keys=True, indent=2) 42 | fout.write(json_txt+'\n') 43 | return 44 | -------------------------------------------------------------------------------- /BioClients/geneontology/Utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | GeneOntology client. 4 | """ 5 | import os,sys,re,json,time,logging 6 | import urllib.parse 7 | import pandas as pd 8 | 9 | from ..util import rest 10 | 11 | API_HOST='api.geneontology.org' 12 | API_BASE_PATH='/api' 13 | BASE_URL = 'https://'+API_HOST+API_BASE_PATH 14 | # 15 | ############################################################################## 16 | def GetEntities(ids, base_url=BASE_URL, fout=None): 17 | """For only one type of entity per call (gene, term).""" 18 | tags=[]; df=pd.DataFrame(); 19 | for id_this in ids: 20 | ent = rest.GetURL(base_url+'/bioentity/'+urllib.parse.quote(id_this), parse_json=True) 21 | logging.debug(json.dumps(ent, sort_keys=True, indent=2)) 22 | if not tags: tags = ent.keys() 23 | df = pd.concat([df, pd.DataFrame({tags[j]:[ent[tags[j]]] for j in range(len(tags))})]) 24 | logging.info('n_ent: {}'.format(df.shape[0])) 25 | if fout: df.to_csv(fout, sep="\t", index=False) 26 | return df 27 | 28 | ############################################################################## 29 | def GetGeneTerms(ids, base_url=BASE_URL, fout=None): 30 | tags=[]; df=pd.DataFrame(); 31 | for id_this in ids: 32 | rval = rest.GetURL(base_url+'/bioentity/gene/{}/function'.format(urllib.parse.quote(id_this)), parse_json=True) 33 | assns = rval['associations'] if 'associations' in rval else [] 34 | for assn in assns: 35 | logging.debug(json.dumps(assn, sort_keys=True, indent=2)) 36 | if not tags: tags = assn.keys() 37 | df = pd.concat([df, pd.DataFrame({tags[j]:[assn[tags[j]]] for j in range(len(tags))})]) 38 | logging.info('n_gene: {}; n_assn: {}'.format(len(ids), df.shape[0])) 39 | if fout: df.to_csv(fout, sep="\t", index=False) 40 | return df 41 | 42 | ############################################################################## 43 | -------------------------------------------------------------------------------- /BioClients/util/db/Utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | ''' 3 | Commonly used functions for database client applications. 4 | ''' 5 | import sys,os,logging,urllib.parse 6 | import sqlalchemy 7 | 8 | ############################################################################## 9 | def PostgreSqlConnect(dbhost, dbport, dbname, dbusr, dbpw): 10 | try: 11 | import psycopg2 12 | engine = sqlalchemy.create_engine(f"postgresql+psycopg2://{dbusr}:{urllib.parse.quote_plus(dbpw)}@{dbhost}:{dbport}/{dbname}") 13 | except Exception as e: 14 | logging.info(f"{e}") 15 | logging.error("Failed to connect.") 16 | return None 17 | return engine.connect() 18 | 19 | ############################################################################## 20 | def MySqlConnect(dbhost, dbport, dbname, dbusr, dbpw): 21 | try: 22 | # https://pypi.org/project/mysql-connector-python/ 23 | # pip install mysql-connector-python 24 | import mysql.connector 25 | engine = sqlalchemy.create_engine(f"mysql+mysqlconnector://{dbusr}:{urllib.parse.quote_plus(dbpw)}@{dbhost}:{dbport}/{dbname}") 26 | except Exception as e: 27 | logging.info(f"{e}") 28 | try: 29 | # https://pypi.org/project/PyMySQL/ 30 | # pip install PyMySQL 31 | import pymysql 32 | engine = sqlalchemy.create_engine(f"mysql+pymysql://{dbusr}:{urllib.parse.quote_plus(dbpw)}@{dbhost}:{dbport}/{dbname}") 33 | except Exception as e: 34 | logging.info(f"{e}") 35 | try: 36 | # https://mysqlclient.readthedocs.io/ 37 | # pip install mysqlclient 38 | import MySQLdb 39 | engine = sqlalchemy.create_engine(f"mysql+mysqldb://{dbusr}:{urllib.parse.quote_plus(dbpw)}@{dbhost}:{dbport}/{dbname}") 40 | except Exception as e: 41 | logging.info(f"{e}") 42 | logging.error("Failed to connect.") 43 | return None 44 | return engine.connect() 45 | -------------------------------------------------------------------------------- /BioClients/idg/rss/Utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | IDG Resource Submission System 4 | https://rss.ccs.miami.edu/ 5 | """ 6 | ### 7 | import sys,os,re,json,time,logging,tqdm 8 | import pandas as pd 9 | import requests 10 | import urllib,urllib.parse 11 | # 12 | # 13 | API_HOST="rss.ccs.miami.edu" 14 | API_BASE_PATH="/rss-api" 15 | BASE_URL = 'https://'+API_HOST+API_BASE_PATH 16 | # 17 | ############################################################################## 18 | def ListTargets(base_url=BASE_URL, fout=None): 19 | tags=[]; tq=None; df=pd.DataFrame(); 20 | url = (base_url+f'/target') 21 | resp = requests.get(url, verify=False) 22 | targets = resp.json() if resp.status_code==200 else [] 23 | for target in targets: 24 | logging.debug(json.dumps(target, sort_keys=True, indent=2)) 25 | if not tags: tags = list(target.keys()) 26 | df = pd.concat([df, pd.DataFrame({tags[j]:[target[tags[j]]] for j in range(len(tags))})]) 27 | if fout: df.to_csv(fout, sep="\t", index=False) 28 | logging.info("n_out: {}".format(df.shape[0])) 29 | return(df) 30 | 31 | ############################################################################## 32 | def GetTargetResources(ids, base_url=BASE_URL, fout=None): 33 | tags=[]; tq=None; df=pd.DataFrame(); 34 | for id_this in ids: 35 | url_this = (base_url+f'/target/id?id={id_this}') 36 | resp = requests.get(url_this, verify=False) 37 | resources = resp.json() if resp.status_code==200 else [] 38 | for resource in resources: 39 | logging.debug(json.dumps(resource, sort_keys=True, indent=2)) 40 | if not tags: tags = list(resource.keys()) 41 | df = pd.concat([df, pd.DataFrame({tags[j]:[resource[tags[j]]] for j in range(len(tags))})]) 42 | if fout: df.to_csv(fout, sep="\t", index=False) 43 | logging.info("n_out: {}".format(df.shape[0])) 44 | return(df) 45 | 46 | ############################################################################## 47 | -------------------------------------------------------------------------------- /BioClients/cas/Client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Utility app for the CAS REST API. 4 | """ 5 | ### 6 | import sys,os,re,argparse,time,logging 7 | # 8 | from .. import cas 9 | # 10 | ############################################################################## 11 | if __name__=='__main__': 12 | ops = ["get_rn2details", "get_rn2image"] 13 | parser = argparse.ArgumentParser(description="CAS REST client") 14 | parser.add_argument("op", choices=ops,help='OPERATION') 15 | parser.add_argument("--i", dest="ifile", help="input IDs file (CAS Registry Number)") 16 | parser.add_argument("--ids", help="input IDs (CAS Registry Number) (comma-separated)") 17 | parser.add_argument("--o", dest="ofile", help="output (usually TSV)") 18 | parser.add_argument("--api_host", default=cas.API_HOST) 19 | parser.add_argument("--api_base_path", default=cas.API_BASE_PATH) 20 | parser.add_argument("--skip", type=int, default=0) 21 | parser.add_argument("--nmax", type=int, default=0) 22 | parser.add_argument("-v", "--verbose", default=0, action="count") 23 | args = parser.parse_args() 24 | 25 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO)) 26 | 27 | base_url = 'https://'+args.api_host+args.api_base_path 28 | 29 | fout = open(args.ofile, "w") if args.ofile else sys.stdout 30 | 31 | ids=[] 32 | if args.ifile: 33 | fin = open(args.ifile) 34 | while True: 35 | line = fin.readline() 36 | if not line: break 37 | ids.append(line.rstrip()) 38 | fin.close() 39 | elif args.ids: 40 | ids = re.split(r'[,\s]+', args.ids) 41 | logging.info(f"Input IDs: {len(ids)}") 42 | 43 | t0=time.time() 44 | 45 | if args.op == 'get_rn2details': 46 | cas.GetRN2Details(ids, base_url, fout) 47 | 48 | else: 49 | parser.error(f"Invalid operation: {args.op}") 50 | 51 | logging.info(('elapsed time: %s'%(time.strftime('%Hh:%Mm:%Ss',time.gmtime(time.time()-t0))))) 52 | 53 | -------------------------------------------------------------------------------- /BioClients/util/neo4j/Utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | https://py2neo.org/ 4 | https://py2neo.org/v4/database.html 5 | """ 6 | import sys,os,json,logging 7 | import pandas as pd 8 | import py2neo 9 | 10 | DBHOST="localhost" 11 | DBPORT=7687 12 | DBSCHEME="bolt" 13 | DBUSR="neo4j" 14 | DBPW="neo4j" 15 | 16 | ############################################################################# 17 | def DbConnect(dbhost=DBHOST, dbport=DBPORT, dbscheme=DBSCHEME, dbusr=DBUSR, dbpw=DBPW, secure=False): 18 | db=None; 19 | try: 20 | db = py2neo.GraphService(host=dbhost, port=dbport, scheme=dbscheme, secure=secure, user=dbusr, password=dbpw) 21 | except Exception as e: 22 | logging.error(f"{e}") 23 | return db 24 | 25 | ############################################################################# 26 | def DbInfo(db, fout): 27 | logging.debug(f"db.config: {db.config}") 28 | df = pd.DataFrame({"uri": [db.uri], 29 | "kernel_version": [db.kernel_version], 30 | "default_graph": [db.default_graph], 31 | "product": [db.product]}) 32 | df.transpose().to_csv(fout, sep="\t") 33 | 34 | ############################################################################# 35 | def DbSummary(db, fout): 36 | g = db.default_graph 37 | df = pd.DataFrame({"nodes": [len(g.nodes)], 38 | "relationships": [len(g.relationships)], 39 | "schema": [g.schema], 40 | }) 41 | df.transpose().to_csv(fout, sep="\t") 42 | 43 | ############################################################################# 44 | def DbQuery(db, cql, fmt, fout): 45 | n_out=0; 46 | g = db.default_graph 47 | result = g.run(cql) 48 | if fmt.upper()=='JSON': 49 | rows = result.data() 50 | n_out = len(rows) 51 | fout.write(json.dumps(rows, indent=2)+'\n') 52 | else: #TSV 53 | df = result.to_data_frame() 54 | n_out = df.shape[0] 55 | df.to_csv(fout, '\t', index=False) 56 | logging.info(f"rows: {n_out}") 57 | 58 | ############################################################################# 59 | -------------------------------------------------------------------------------- /BioClients/cdc/Client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | ############################################################################# 3 | ### CDC REST API client 4 | ### https://tools.cdc.gov/api/docs/info.aspx 5 | ### https://tools.cdc.gov/api/v2/resources 6 | ############################################################################# 7 | import sys,os,argparse,re,time,logging 8 | # 9 | from .. import cdc 10 | # 11 | ############################################################################# 12 | if __name__=='__main__': 13 | API_HOST="tools.cdc.gov" 14 | API_BASE_PATH="/api/v2/resources" 15 | ops = ["list_sources", "list_topics", "list_organizations", "list_audiences"] 16 | parser = argparse.ArgumentParser( description='CDC REST API client utility') 17 | parser.add_argument("op", choices=ops, help='OPERATION (select one)') 18 | parser.add_argument("--i", dest="ifile", help="input file") 19 | parser.add_argument("--nmax", help="list: max to return") 20 | parser.add_argument("--o", dest="ofile", help="output (TSV)") 21 | parser.add_argument("--api_host", default=API_HOST) 22 | parser.add_argument("--api_base_path", default=API_BASE_PATH) 23 | parser.add_argument("-v", "--verbose", action="count", default=0) 24 | args = parser.parse_args() 25 | 26 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO)) 27 | 28 | api_base_url='https://'+args.api_host+args.api_base_path 29 | 30 | fout = open(args.ofile, "w+") if args.ofile else sys.stdout 31 | 32 | if args.op=="list_sources": 33 | cdc.ListResources(api_base_url, 'sources', fout) 34 | 35 | elif args.op=="list_topics": 36 | cdc.ListResources(api_base_url, 'topics', fout) 37 | 38 | elif args.op=="list_organizations": 39 | cdc.ListResources(api_base_url, 'organizations', fout) 40 | 41 | elif args.op=="list_audiences": 42 | cdc.ListResources(api_base_url, 'audiences', fout) 43 | 44 | else: 45 | parser.error('Operation invalid: {}'.format(args.op)) 46 | -------------------------------------------------------------------------------- /BioClients/util/hdf/Utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | ### 3 | 4 | import sys,os,time,argparse,logging 5 | import numpy as np 6 | import pandas as pd 7 | import h5py 8 | 9 | ### 10 | def h5_type(ob): 11 | return "Group" if type(ob) is h5py.Group else "Dataset" if type(ob) is h5py.Dataset else type(ob) 12 | 13 | ### 14 | def list_all(name, ob): 15 | logging.info(f"{ob.name} ({h5_type(ob)})") 16 | if type(ob) is h5py.Dataset: 17 | logging.info(f"Dataset dtype: {ob.dtype}; shape: {ob.shape}; size: {ob.size}; ndim: {ob.ndim}; nbytes: {ob.nbytes}") 18 | return None 19 | 20 | ############################################################################# 21 | def Summary(f): 22 | logging.debug(list(f.keys())) 23 | for k in list(f.keys()): 24 | if type(f[k]) is h5py.Group: 25 | logging.debug(f"{f.name}:{f[k].name}") 26 | f[k].visititems(list_all) 27 | else: 28 | logging.info(f"{f[k].name} ({h5_type(f[k])})") 29 | 30 | ############################################################################# 31 | if __name__=='__main__': 32 | parser = argparse.ArgumentParser(description='H5 file operations', epilog="") 33 | OPS = ['summary'] 34 | parser.add_argument("op", choices=OPS, help='OPERATION') 35 | parser.add_argument("--i", dest="ifile", required=True, help="input file") 36 | parser.add_argument("--o", dest="ofile", help="output (TSV)") 37 | parser.add_argument("-v", "--verbose", dest="verbose", action="count", default=0) 38 | args = parser.parse_args() 39 | 40 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO)) 41 | 42 | t0 = time.time(); 43 | 44 | fout = open(args.ofile, "w") if args.ofile else sys.stdout 45 | 46 | finh5 = h5py.File(args.ifile, 'r') 47 | 48 | if args.op == "summary": 49 | Summary(finh5) 50 | 51 | else: 52 | parser.error(f"Unsupported operation: {args.op}") 53 | 54 | logging.info(f"Elapsed: {time.strftime('%Hh:%Mm:%Ss', time.gmtime(time.time()-t0))}") 55 | 56 | -------------------------------------------------------------------------------- /BioClients/geneontology/Client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | GeneOntology client. 4 | """ 5 | ### 6 | import os,sys,argparse,re,json,time,logging 7 | import urllib.parse 8 | 9 | from .. import geneontology 10 | # 11 | ############################################################################## 12 | if __name__=='__main__': 13 | epilog="""\ 14 | """ 15 | parser = argparse.ArgumentParser(description='GeneOntolgy API client', epilog=epilog) 16 | ops=['list_terms', 'list_genes', 'get_entities', 'get_geneTerms' ] 17 | parser.add_argument("op", choices=ops, help='OPERATION') 18 | parser.add_argument("--i", dest="ifile", help="input file of IDs") 19 | parser.add_argument("--ids", help="ID list (comma-separated)(e.g. NCBIGene:84570, GO:0006954)") 20 | parser.add_argument("--o", dest="ofile", help="output file (TSV)") 21 | parser.add_argument("--api_host", default=geneontology.API_HOST) 22 | parser.add_argument("--api_base_path", default=geneontology.API_BASE_PATH) 23 | parser.add_argument("-v", "--verbose", default=0, action="count") 24 | args = parser.parse_args() 25 | 26 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO)) 27 | 28 | BASE_URL = 'https://'+args.api_host+args.api_base_path 29 | 30 | fout = open(args.ofile, "w+") if args.ofile else sys.stdout 31 | 32 | if args.ids: 33 | ids = re.split(r'[,\s]+', args.ids.strip()) 34 | elif args.ifile: 35 | with open(args.ifile) as fin: 36 | while True: 37 | line = fin.readline() 38 | if not line: break 39 | ids.append(line.rstrip()) 40 | 41 | if args.op == 'list_terms': 42 | geneontology.ListTerms(BASE_URL, fout) 43 | 44 | elif args.op == 'list_genes': 45 | geneontology.ListGenes(BASE_URL, fout) 46 | 47 | elif args.op == 'get_entities': 48 | geneontology.GetEntities(ids, BASE_URL, fout) 49 | 50 | elif args.op == 'get_geneTerms': 51 | geneontology.GetGeneTerms(ids, BASE_URL, fout) 52 | 53 | else: 54 | parser.error(f'Invalid operation: {args.op}') 55 | 56 | -------------------------------------------------------------------------------- /BioClients/maayanlab/harmonizome/Client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | See: http://amp.pharm.mssm.edu/Harmonizome/documentation 4 | """ 5 | ### 6 | import sys,os,re,argparse,time,json,logging 7 | # 8 | from ... import maayanlab 9 | # 10 | # 11 | ############################################################################## 12 | if __name__=='__main__': 13 | parser = argparse.ArgumentParser(description='MaayanLab Harmonizome REST API client') 14 | ops = [ 'get_gene', 'get_gene_associations' ] 15 | parser.add_argument("op", choices=ops, help='operation') 16 | parser.add_argument("--i", dest="ifile", help="input IDs") 17 | parser.add_argument("--ids", help="input IDs (comma-separated)") 18 | parser.add_argument("--o", dest="ofile", help="output (TSV)") 19 | parser.add_argument("--api_host", default=maayanlab.harmonizome.API_HOST) 20 | parser.add_argument("--api_base_path", default=maayanlab.harmonizome.API_BASE_PATH) 21 | parser.add_argument("-v", "--verbose", default=0, action="count") 22 | args = parser.parse_args() 23 | 24 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO)) 25 | 26 | base_url = 'https://'+args.api_host+args.api_base_path 27 | 28 | fout = open(args.ofile,"w") if args.ofile else sys.stdout 29 | 30 | t0=time.time() 31 | 32 | ids=[]; 33 | if args.ifile: 34 | fin = open(args.ifile) 35 | while True: 36 | line = fin.readline() 37 | if not line: break 38 | if line.rstrip(): ids.append(line.rstrip()) 39 | fin.close() 40 | elif args.ids: 41 | ids = re.split(r'[,\s]+', args.ids) 42 | logging.info(f"Input queries: {len(ids)}") 43 | 44 | if args.op == "get_gene": 45 | maayanlab.harmonizome.Utils.GetGene(ids, base_url, fout) 46 | 47 | elif args.op == "get_gene_associations": 48 | maayanlab.harmonizome.Utils.GetGeneAssociations(ids, base_url, fout) 49 | 50 | else: 51 | parser.error(f"Invalid operation: {args.op}") 52 | 53 | logging.info(f"elapsed time: {time.strftime('%Hh:%Mm:%Ss', time.gmtime(time.time()-t0))}") 54 | -------------------------------------------------------------------------------- /BioClients/wikidata/Client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | https://query.wikidata.org/sparql 4 | 5 | https://www.wikidata.org/wiki/User:ProteinBoxBot/SPARQL_Examples 6 | 7 | PREFIX wd: 8 | PREFIX wdt: 9 | PREFIX bd: 10 | PREFIX up: 11 | PREFIX uniprotkb: 12 | 13 | GeneWiki: 14 | * 15 | """ 16 | ### 17 | import sys,os,argparse,logging 18 | import pandas as pd 19 | 20 | import wikidataintegrator 21 | 22 | from .. import wikidata 23 | 24 | ############################################################################# 25 | if __name__=="__main__": 26 | parser = argparse.ArgumentParser(description="Wikidata utilities", epilog="") 27 | ops = ["query", "list_drugTargetPairs", "list_geneDiseasePairs", ] 28 | parser.add_argument("op", choices=ops, help="OPERATION") 29 | parser.add_argument("--o", dest="ofile", help="output (TSV)") 30 | parser.add_argument("--rqfile", help="input Sparql file") 31 | parser.add_argument("--rq", help="input Sparql string") 32 | parser.add_argument("-v", "--verbose", action="count", default=0) 33 | args = parser.parse_args() 34 | 35 | logging.basicConfig(format="%(levelname)s:%(message)s", level=(logging.DEBUG if args.verbose>1 else logging.INFO)) 36 | 37 | logging.debug(f"Python: {sys.version.split()[0]}; wikidataintegrator: {wikidataintegrator.__version__}") 38 | 39 | fout = open(args.ofile, "w") if args.ofile else sys.stdout 40 | 41 | rq = open(args.rqfile).read() if args.rqfile else args.rq if args.rq else None 42 | 43 | if args.op == "query": 44 | if not rq: parser.error(f"--rq or --rqfile required for: {args.op}") 45 | wikidata.Query(rq, fout) 46 | 47 | elif args.op == "list_drugTargetPairs": 48 | wikidata.ListDrugTargetPairs(fout) 49 | 50 | elif args.op == "list_geneDiseasePairs": 51 | wikidata.ListGeneDiseasePairs(fout) 52 | 53 | else: 54 | parser.error(f"Unknown operation: {args.op}") 55 | -------------------------------------------------------------------------------- /BioClients/idg/rss/Client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | IDG Resource Submission System 4 | https://rss.ccs.miami.edu/ 5 | """ 6 | ### 7 | import sys,os,re,argparse,time,logging 8 | # 9 | from ...idg import rss 10 | # 11 | ############################################################################## 12 | if __name__=='__main__': 13 | epilog="""\ 14 | """ 15 | parser = argparse.ArgumentParser(description="IDG RSS (Resource Submission System) REST API client)", epilog=epilog) 16 | ops = [ "list_targets", "get_target_resources" ] 17 | parser.add_argument("op", choices=ops, help="operation") 18 | parser.add_argument("--i", dest="ifile", help="input IDs") 19 | parser.add_argument("--ids", help="IDs (comma-separated)") 20 | parser.add_argument("--o", dest="ofile", help="output (TSV)") 21 | parser.add_argument("--api_host", default=rss.Utils.API_HOST) 22 | parser.add_argument("--api_base_path", default=rss.Utils.API_BASE_PATH) 23 | parser.add_argument("-v", "--verbose", default=0, action="count") 24 | args = parser.parse_args() 25 | 26 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO)) 27 | 28 | fout = open(args.ofile, "w") if args.ofile else sys.stdout 29 | 30 | ids=[] 31 | if args.ifile: 32 | fin = open(args.ifile) 33 | while True: 34 | line = fin.readline() 35 | if not line: break 36 | ids.append(line.rstrip()) 37 | fin.close() 38 | elif args.ids: 39 | ids = re.split('[,\s]+', args.ids.strip()) 40 | if ids: logging.info('Input IDs: %d'%(len(ids))) 41 | 42 | t0=time.time() 43 | 44 | base_url = 'https://'+args.api_host+args.api_base_path 45 | 46 | if args.op=='list_targets': 47 | rss.Utils.ListTargets(base_url, fout) 48 | 49 | elif args.op=='get_target_resources': 50 | if not ids: parser.error(f"--i or --ids required for {args.op}") 51 | rss.Utils.GetTargetResources(ids, base_url, fout) 52 | 53 | else: 54 | parser.error(f'Unknown operation: {args.op}') 55 | 56 | logging.info(('elapsed time: %s'%(time.strftime('%Hh:%Mm:%Ss',time.gmtime(time.time()-t0))))) 57 | -------------------------------------------------------------------------------- /BioClients/wikipathways/Utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | ############################################################################## 3 | ### Utilities for Wikipathways REST API. 4 | ### See: http://www.wikipathways.org/index.php/Help:WikiPathways_Webservice/API 5 | ############################################################################## 6 | import sys,os,re,time,logging 7 | import urllib.parse 8 | # 9 | from ..util import rest 10 | # 11 | ############################################################################## 12 | def ListOrganisms(base_url, fout): 13 | n_all=0; n_out=0; n_err=0; 14 | url=base_url+'/listOrganisms?format=json' 15 | rval=rest.GetURL(url, parse_json=True) 16 | organisms = rval['organisms'] 17 | fout.write('Organism\n') 18 | for organism in organisms: 19 | fout.write('%s\n'%(organism)) 20 | n_out+=1 21 | logging.info('n_out: %d'%(n_out)) 22 | 23 | ############################################################################## 24 | def ListPathways(base_url, params, fout): 25 | n_all=0; n_out=0; n_err=0; 26 | url = base_url+'/listPathways?format=json' 27 | if params['human']: 28 | url+=('&organism=%s'%urllib.parse.quote('Homo sapiens')) 29 | rval=rest.GetURL(url, parse_json=True) 30 | pathways = rval['pathways'] 31 | tags=[]; 32 | for pathway in pathways: 33 | n_all+=1 34 | if n_all==1 or not tags: 35 | tags = sorted(pathway.keys()) 36 | fout.write('\t'.join(tags)+'\n') 37 | vals = [pathway[tag] if tag in pathway else '' for tag in tags] 38 | fout.write((','.join(vals))+'\n') 39 | n_out+=1 40 | logging.info('n_all: %d; n_out: %d; n_err: %d'%(n_all,n_out,n_err)) 41 | 42 | ############################################################################## 43 | def GetPathway(base_url, id_query, ofmt, fout): 44 | n_all=0; n_out=0; n_err=0; 45 | if ofmt.lower() == 'gpml': 46 | url = base_url+'/getPathway?pwId=%s&revision=0'%id_query 47 | else: 48 | url = base_url+'/index.php?method=getPathwayAs&fileType=%s&pwId=%s&revision=0'%(ofmt.lower(), id_query) 49 | rval = rest.GetURL(url, parse_json=False) 50 | fout.write(rval) 51 | 52 | -------------------------------------------------------------------------------- /BioClients/lincs/sigcom/Client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | ############################################################################# 3 | ### https://maayanlab.cloud/sigcom-lincs/#/API 4 | ############################################################################# 5 | import sys,os,re,argparse,json,logging 6 | # 7 | from ...lincs import sigcom as sigcom_lincs 8 | # 9 | ############################################################################# 10 | if __name__=="__main__": 11 | parser = argparse.ArgumentParser(description='CLUE.IO REST API client utility') 12 | ops = ['getResources', ] 13 | parser.add_argument("op", choices=ops, help='OPERATION') 14 | parser.add_argument("--ids", help="IDs, comma-separated") 15 | parser.add_argument("--i", dest="ifile", help="input file, IDs") 16 | parser.add_argument("--o", dest="ofile", help="output (TSV)") 17 | parser.add_argument("--nmax", type=int, default=1000, help="max results") 18 | parser.add_argument("--skip", type=int, default=0, help="skip results") 19 | parser.add_argument("--api_host", default=sigcom_lincs.Utils.API_HOST) 20 | parser.add_argument("--api_base_path", default=sigcom_lincs.Utils.API_BASE_PATH) 21 | parser.add_argument("-v", "--verbose", dest="verbose", action="count", default=0) 22 | args = parser.parse_args() 23 | 24 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO)) 25 | 26 | base_url = 'https://'+args.api_host+args.api_base_path 27 | 28 | fout = open(args.ofile, "w+") if args.ofile else sys.stdout 29 | 30 | if args.ifile: 31 | fin = open(args.ifile) 32 | ids=[]; 33 | while True: 34 | line = fin.readline() 35 | if not line: break 36 | ids.append(line.rstrip()) 37 | fin.close() 38 | logging.info(f"input queries: {len(ids)}") 39 | elif args.ids: 40 | ids = re.split('[, ]+', args.ids.strip()) 41 | 42 | if args.op=='getResources': 43 | if not ids: parser.error('--ids or --i required.') 44 | sigcom_lincs.Utils.GetResources(ids, base_url, fout) 45 | 46 | else: 47 | parser.error(f"Unsupported operation: {args.op}") 48 | -------------------------------------------------------------------------------- /BioClients/icite/Client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | PubMed iCite REST API client 4 | https://icite.od.nih.gov/api 5 | """ 6 | ### 7 | import sys,os,re,argparse,logging 8 | # 9 | from .. import icite 10 | 11 | ############################################################################# 12 | if __name__=='__main__': 13 | parser = argparse.ArgumentParser(description='PubMed iCite REST API client utility', epilog='Publication metadata.') 14 | ops = ['get_stats'] 15 | parser.add_argument("op", choices=ops, help='OPERATION') 16 | parser.add_argument("--ids", help="PubMed IDs, comma-separated (ex:25533513)") 17 | parser.add_argument("--i", dest="ifile", help="input file, PubMed IDs") 18 | parser.add_argument("--nmax", help="list: max to return") 19 | parser.add_argument("--year", help="list: year of publication") 20 | parser.add_argument("--o", dest="ofile", help="output (TSV)") 21 | parser.add_argument("--api_host", default=icite.API_HOST) 22 | parser.add_argument("--api_base_path", default=icite.API_BASE_PATH) 23 | parser.add_argument("-v", "--verbose", default=0, action="count") 24 | parser.add_argument("-q", "--quiet", action="store_true", help="Suppress progress notification.") 25 | args = parser.parse_args() 26 | 27 | # logging.PROGRESS = 15 (custom) 28 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>0 else logging.ERROR if args.quiet else 15)) 29 | 30 | base_url='https://'+args.api_host+args.api_base_path 31 | 32 | fout = open(args.ofile, "w", encoding="utf-8") if args.ofile else sys.stdout 33 | 34 | ids=[]; 35 | if args.ifile: 36 | fin = open(args.ifile) 37 | while True: 38 | line = fin.readline() 39 | if not line: break 40 | ids.append(line.rstrip()) 41 | logging.info('Input IDs: %d'%(len(ids))) 42 | fin.close() 43 | elif args.ids: 44 | ids = re.split(r'[\s,]+', args.ids.strip()) 45 | 46 | if args.op == 'get_stats': 47 | if not ids: parser.error(f'Operation requires PMID[s]: {args.op}') 48 | icite.GetStats(ids, base_url, fout) 49 | 50 | else: 51 | parser.error(f"Invalid operation: {args.op}") 52 | 53 | -------------------------------------------------------------------------------- /BioClients/entrez/Client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Utility app for the NIH NCBI Entrez E-Utils via Entrezpy. 4 | """ 5 | ### 6 | import sys,os,re,argparse,time,logging 7 | # 8 | from .. import entrez 9 | # 10 | ############################################################################## 11 | if __name__=='__main__': 12 | OPS = [ "test", "get_esummary", "get_record", ] 13 | parser = argparse.ArgumentParser(description="NIH NCBI Entrez client") 14 | parser.add_argument("op", choices=OPS, help="OPERATION") 15 | parser.add_argument("--i", dest="ifile", help="input IDs file (PMIDs)") 16 | parser.add_argument("--ids", help="input IDs (comma-separated)") 17 | parser.add_argument("--o", dest="ofile", help="output (usually TSV)") 18 | parser.add_argument("--email", dest="email", help="user email address") 19 | #parser.add_argument("--api_host", default=entrez.API_HOST) 20 | #parser.add_argument("--api_base_path", default=entrez.API_BASE_PATH) 21 | parser.add_argument("--skip", type=int, default=0) 22 | parser.add_argument("--nmax", type=int, default=None) 23 | parser.add_argument("-q", "--quiet", action="store_true", help="Suppress progress notification.") 24 | parser.add_argument("-v", "--verbose", default=0, action="count") 25 | args = parser.parse_args() 26 | 27 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>0 else logging.ERROR if args.quiet else 15)) 28 | 29 | #base_url = f"https://{args.api_host}{args.api_base_path}" 30 | 31 | fout = open(args.ofile, "w") if args.ofile else sys.stdout 32 | 33 | ids=[] 34 | if args.ifile: 35 | fin = open(args.ifile) 36 | while True: 37 | line = fin.readline() 38 | if not line: break 39 | ids.append(line.rstrip()) 40 | fin.close() 41 | elif args.ids: 42 | ids = re.split(r'[,\s]+', args.ids) 43 | logging.info(f"Input IDs: {len(ids)}") 44 | 45 | t0=time.time() 46 | 47 | if args.op == 'test': 48 | entrez.Test(args.email) 49 | 50 | else: 51 | parser.error(f"Invalid operation: {args.op}") 52 | 53 | logging.info(('elapsed time: %s'%(time.strftime('%Hh:%Mm:%Ss',time.gmtime(time.time()-t0))))) 54 | 55 | -------------------------------------------------------------------------------- /BioClients/cas/Utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Utility functions for the CAS REST API. 4 | """ 5 | ### 6 | import sys,os,io,re,csv,json,time,logging,tempfile,tqdm 7 | import requests 8 | import urllib.request,urllib.parse 9 | import pandas as pd 10 | # 11 | # 12 | API_HOST='commonchemistry.cas.org' 13 | API_BASE_PATH='/api' 14 | BASE_URL="https://"+API_HOST+API_BASE_PATH 15 | # 16 | # https://commonchemistry.cas.org/api/detail?uri=substance%2Fpt%2F50000 17 | ############################################################################# 18 | def GetRN2Details(ids, base_url=BASE_URL, fout=None): 19 | n_out=0; n_err=0; tags=None; df=None; tq=None; 20 | for i,id_this in enumerate(ids): 21 | uri = urllib.parse.quote(f"substance/pt/{id_this}") 22 | url = (base_url+f"/detail?uri={uri}") 23 | if tq is None: tq = tqdm.tqdm(total=len(ids), unit="mols") 24 | tq.update(n=1) 25 | response = requests.get(url, headers={"Accept": "application/json"}) 26 | logging.debug(response.text) 27 | if response.status_code==requests.codes.not_found: 28 | continue 29 | if response.status_code!=requests.codes.ok: 30 | logging.error(f"HTTP status_code: {response.status_code}") 31 | mol = response.json() 32 | if not tags: 33 | tags = list(mol.keys()) 34 | for tag in tags[:]: 35 | if type(mol[tag]) in (dict, list, tuple): 36 | tags.remove(tag) 37 | logging.debug(f'Ignoring field: "{tag}"') 38 | if "image" in tags: 39 | tags.remove("image") 40 | logging.debug(f'Ignoring field: "image"') 41 | synonyms = mol["synonyms"] if "synonyms" in mol else [] 42 | replacedRns = mol["replacedRns"] if "replacedRns" in mol else [] 43 | data_this = {tag:[mol[tag]] for tag in tags} 44 | data_this["synonyms"] = ",".join(synonyms) 45 | data_this["replacedRns"] = ",".join(replacedRns) 46 | df_this = pd.DataFrame(data_this) 47 | if fout is not None: 48 | df_this.to_csv(fout, sep='\t', index=False, header=bool(n_out==0)) 49 | else: 50 | df = pd.concat([df, df_this]) 51 | n_out+=1 52 | tq.close() 53 | logging.info(f"Input IDs: {len(ids)}; Output records: {n_out}") 54 | return df 55 | -------------------------------------------------------------------------------- /BioClients/mygene/Client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | ### 3 | # https://mygene.info/ 4 | # https://pypi.org/project/mygene/ 5 | ### 6 | import sys,os,re,argparse,time,logging 7 | import pandas as pd 8 | import mygene as mg 9 | # 10 | from .. import mygene as bc_mygene 11 | # 12 | ############################################################################# 13 | if __name__=='__main__': 14 | epilog = "See https://mygene.info/, https://pypi.org/project/mygene/. Example queries: 'cdk2', 'symbol:cdk2', 'symbol:cdk*'" 15 | ops = ["get", "search"] 16 | parser = argparse.ArgumentParser(description='MyGene API client', epilog=epilog) 17 | parser.add_argument("op", choices=ops, help="OPERATION") 18 | parser.add_argument("--i", dest="ifile", help="input gene IDs or queries") 19 | parser.add_argument("--ids", help="input gene IDs or queries, comma-separated") 20 | parser.add_argument("--o", dest="ofile", help="output (TSV)") 21 | parser.add_argument("--species", default="human", help="species name or taxonomy ID") 22 | parser.add_argument("--fields", default=bc_mygene.FIELDS, help="requested fields") 23 | parser.add_argument("-v", "--verbose", action="count", default=0) 24 | args = parser.parse_args() 25 | 26 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO)) 27 | 28 | fout = open(args.ofile, 'w') if args.ofile else sys.stdout 29 | 30 | t0 = time.time() 31 | logging.info('Python: {}; mygene: {}'.format(sys.version.split()[0], mg.__version__)) 32 | 33 | if args.ifile: 34 | genes = pd.read_table(args.ifile, header=None, names=["ID"]) 35 | ids = list(genes.ID) 36 | elif args.ids: 37 | ids = re.split(r'[,\s]+', args.ids) 38 | else: 39 | parser.error("Input IDs required via --i or --ids.") 40 | 41 | fields = re.split(r'[,\s]+', args.fields) 42 | 43 | if args.op=="get": 44 | bc_mygene.GetGenes(ids, fields, fout) 45 | 46 | elif args.op=="search": 47 | bc_mygene.SearchGenes(ids, args.species, fout) 48 | 49 | else: 50 | parser.error(f"Invalid operation: {args.op}") 51 | 52 | logging.info(('elapsed time: %s'%(time.strftime('%Hh:%Mm:%Ss', time.gmtime(time.time()-t0))))) 53 | 54 | -------------------------------------------------------------------------------- /BioClients/util/obo/Utils.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | """ 3 | Developed and tested with doid.obo (Disease Ontology) 4 | """ 5 | import sys,os,argparse,re,logging 6 | 7 | ############################################################################# 8 | def OBO2CSV(fin, fout): 9 | n_in=0; n_rec=0; n_out=0; 10 | tags = ['id', 'name', 'namespace', 'alt_id', 'def', 'subset', 'synonym', 'xref', 'is_a', 'is_obsolete'] 11 | fout.write('\t'.join(tags)+'\n') 12 | reclines=[]; 13 | while True: 14 | line=fin.readline() 15 | if not line: break 16 | n_in+=1 17 | line=line.strip() 18 | if reclines: 19 | if line == '': 20 | row = OBO2CSV_Record(reclines) 21 | n_rec+=1 22 | vals=[] 23 | is_obsolete=False 24 | for tag in tags: 25 | if tag in row: 26 | val=row[tag] 27 | if tag in ('def', 'synonym'): 28 | val=re.sub(r'^"([^"]*)".*$', r'\1', val) 29 | else: 30 | val=re.sub(r'^"(.*)"$', r'\1', val) 31 | else: 32 | val='' 33 | if tag=='is_obsolete': is_obsolete = bool(val.lower() == "true") 34 | vals.append(val) 35 | if not is_obsolete: 36 | fout.write('\t'.join(vals)+'\n') 37 | n_out+=1 38 | reclines=[]; 39 | else: 40 | reclines.append(line) 41 | else: 42 | if line == '[Term]': 43 | reclines.append(line) 44 | else: continue 45 | 46 | logging.info("input lines: %d; input records: %d ; output lines: %d"%(n_in, n_rec, n_out)) 47 | 48 | ############################################################################# 49 | def OBO2CSV_Record(reclines): 50 | vals={}; 51 | if reclines[0] != '[Term]': 52 | logging.error('reclines[0] = "%s"'%reclines[0]) 53 | return 54 | for line in reclines[1:]: 55 | line = re.sub(r'\s*!.*$','',line) 56 | k,v = re.split(r':\s*', line, maxsplit=1) 57 | if k=='xref' and not re.match(r'\S+:\S+$',v): continue 58 | if k not in vals: vals[k]='' 59 | vals[k] = '%s%s%s'%(vals[k],(';' if vals[k] else ''),v) 60 | return vals 61 | 62 | ############################################################################# 63 | -------------------------------------------------------------------------------- /BioClients/pubchem/ftp/Client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | ############################################################################# 3 | ### For accessing files via FTP site; ftp://ftp.ncbi.nlm.nih.gov/pubchem/ 4 | ############################################################################# 5 | import sys,os,re,time,argparse,logging 6 | 7 | from ... import pubchem 8 | 9 | FTP_URL='ftp://ftp.ncbi.nlm.nih.gov/pubchem' 10 | POLL_WAIT=10 11 | MAX_WAIT=600 12 | 13 | ############################################################################# 14 | if __name__=='__main__': 15 | epilog=""" 16 | FTP_URL: {0}""".format(FTP_URL) 17 | parser = argparse.ArgumentParser(description="access PubChem FTP site", epilog=epilog) 18 | parser.add_argument("--ftp_get", help="path of file") 19 | parser.add_argument("--ftp_ls", help="path of dir") 20 | parser.add_argument("--ftp_url", default=FTP_URL) 21 | parser.add_argument("--skip", type=int, default=0) 22 | parser.add_argument("--nmax", type=int, default=0) 23 | parser.add_argument("--ftp_ntries", type=int, default=20, help="max tries per ftp-get") 24 | parser.add_argument("--sdf2smi", action="store_true", help="convert SDF to SMILES") 25 | parser.add_argument("--o", dest="ofile", help="output file") 26 | parser.add_argument("-v", "--verbose", default=0, action="count") 27 | args = parser.parse_args() 28 | 29 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO)) 30 | 31 | if not (args.ftp_get or args.ftp_ls): 32 | parser.error("--ftp_get or --ftp_ls required.") 33 | 34 | if args.ofile: 35 | fout = open(args.ofile, "w") 36 | else: 37 | fout = sys.stdout 38 | 39 | if args.ftp_get: 40 | url=("%s%s"%(args.ftp_url, args.ftp_get)) 41 | if args.sdf2smi: 42 | nbytes = pubchem.ftp.Utils.GetUrlSDF2SMI(url, fout, ntries=args.ftp_ntries, poll_wait=10) 43 | else: 44 | nbytes = pubchem.ftp.Utils.GetUrl(url, fout, ntries=args.ftp_ntries, poll_wait=10) 45 | logging.info("bytes: %.2fMB"%(nbytes/1e6)) 46 | elif args.ftp_ls: 47 | url=("%s%s"%(args.ftp_url, args.ftp_ls)) 48 | pubchem.ftp.Utils.GetUrl(url, fout, ntries=args.ftp_ntries, poll_wait=10) 49 | -------------------------------------------------------------------------------- /BioClients/uniprot/Client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Access to Uniprot REST API. 4 | https://www.uniprot.org/help/api 5 | UniprotKB = Uniprot Knowledge Base 6 | 7 | python3 -m BioClients.uniprot.Client --uids Q14790 getData 8 | """ 9 | import sys,os,re,argparse,time,logging 10 | # 11 | from .. import uniprot 12 | # 13 | API_HOST='www.uniprot.org' 14 | API_BASE_PATH='/uniprot' 15 | # 16 | ############################################################################## 17 | if __name__=='__main__': 18 | parser = argparse.ArgumentParser(description='Uniprot query client; get data for specified IDs') 19 | ops = ['getData', 'listData'] 20 | ofmts = ['txt', 'tab', 'xml', 'rdf', 'fasta', 'gff'] 21 | parser.add_argument("op", choices=ops, help='operation') 22 | parser.add_argument("--uids", dest="uids", help="UniProt IDs, comma-separated (ex: Q14790)") 23 | parser.add_argument("--i", dest="ifile", help="input file, UniProt IDs") 24 | parser.add_argument("--o", dest="ofile", help="output (CSV)") 25 | parser.add_argument("--ofmt", default='txt') 26 | parser.add_argument("--api_host", default=API_HOST) 27 | parser.add_argument("--api_base_path", default=API_BASE_PATH) 28 | parser.add_argument("-v", "--verbose", default=0, action="count") 29 | args = parser.parse_args() 30 | 31 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO)) 32 | 33 | BASE_URI='https://'+args.api_host+args.api_base_path 34 | 35 | if args.ofile: 36 | fout = open(args.ofile, 'w') 37 | else: 38 | fout = sys.stdout 39 | 40 | t0=time.time() 41 | 42 | uids=[] 43 | if args.ifile: 44 | fin = open(args.ifile) 45 | while True: 46 | line = fin.readline() 47 | if not line: break 48 | uids.append(line.strip()) 49 | elif args.uids: 50 | uids = re.split(r'[\s,]+', args.uids.strip()) 51 | else: 52 | parser.error('--i or --uids required.') 53 | 54 | if args.op == 'getData': 55 | uniprot.GetData(BASE_URI, uids, args.ofmt, fout) 56 | 57 | else: 58 | parser.error('Unknown operation: %s'%args.op) 59 | 60 | logging.info('elapsed time: %s'%(time.strftime('%Hh:%Mm:%Ss', time.gmtime(time.time()-t0)))) 61 | 62 | -------------------------------------------------------------------------------- /BioClients/hubmap/Client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Utility for HuBMAP REST API. 4 | """ 5 | ### 6 | import sys,os,re,json,argparse,time,logging 7 | # 8 | from .. import hubmap as hubmap_utils 9 | # 10 | ############################################################################## 11 | if __name__=='__main__': 12 | epilog=""" 13 | Example IDs: 14 | HBM437.HTCQ.742 (donor) 15 | HBM525.JNPV.685 (donor) 16 | HBM292.WDQS.245 (sample) 17 | HBM638.DVCH.366 (sample) 18 | HBM543.RSRV.265 (dataset) 19 | HBM287.WDKX.539 (dataset) 20 | HBM925.SGXL.596 (collection) 21 | HBM876.XNRH.336 (collection) 22 | """ 23 | parser = argparse.ArgumentParser(description='HuBMAP REST API client', epilog=epilog) 24 | ops = ['list_entity_types', 'get_entity', 25 | ] 26 | parser.add_argument("op", choices=ops, help='OPERATION') 27 | parser.add_argument("--ids", dest="ids", help="IDs, comma-separated") 28 | parser.add_argument("--i", dest="ifile", help="input file, HuBMAP entity IDs or UUIDs") 29 | parser.add_argument("--o", dest="ofile", help="output file (TSV)") 30 | parser.add_argument("--api_host", default=hubmap_utils.API_HOST) 31 | parser.add_argument("--api_base_path", default=hubmap_utils.API_BASE_PATH) 32 | parser.add_argument("-v", "--verbose", action="count", default=0) 33 | args = parser.parse_args() 34 | 35 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO)) 36 | 37 | base_url = f"https://{args.api_host}{args.api_base_path}" 38 | 39 | ids=[] 40 | if args.ifile: 41 | fin = open(args.ifile) 42 | while True: 43 | line = fin.readline() 44 | if not line: break 45 | ids.append(line.strip()) 46 | elif args.ids: 47 | ids = re.split('[, ]+', args.ids.strip()) 48 | 49 | fout = open(args.ofile, "w+") if args.ofile else sys.stdout 50 | 51 | t0=time.time() 52 | 53 | if args.op == "list_entity_types": 54 | hubmap_utils.ListEntityTypes(base_url, fout) 55 | 56 | elif args.op == "get_entity": 57 | hubmap_utils.GetEntity(ids, base_url, fout) 58 | 59 | else: 60 | parser.error(f"Invalid operation: {args.op}") 61 | 62 | logging.info(f"Elapsed time: {time.strftime('%Hh:%Mm:%Ss', time.gmtime(time.time()-t0))}") 63 | -------------------------------------------------------------------------------- /BioClients/biomarkerkb/Client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Utility for BiomarkerKB REST API. 4 | 5 | * https://api.biomarkerkb.org/ 6 | """ 7 | ### 8 | import sys,os,re,json,argparse,time,logging 9 | import pandas as pd 10 | # 11 | from .. import biomarkerkb 12 | # 13 | ############################################################################## 14 | if __name__=='__main__': 15 | epilog="Example BiomarkerKB ID: AN6278-1" 16 | parser = argparse.ArgumentParser(description='BiomarkerKB REST API client', epilog=epilog) 17 | ops = [ 18 | "get_biomarker_detail", 19 | "list_biomarker", 20 | "search_biomarker" 21 | ] 22 | parser.add_argument("op", choices=ops, help='OPERATION (select one)') 23 | parser.add_argument("--ids", help="input IDs") 24 | parser.add_argument("--i", dest="ifile", help="input file, IDs") 25 | parser.add_argument("--o", dest="ofile", help="output (TSV)") 26 | parser.add_argument("--query", help="search query (SMILES)") 27 | parser.add_argument("--skip", type=int, default=0) 28 | parser.add_argument("--nmax", type=int, default=None) 29 | parser.add_argument("--api_host", default=biomarkerkb.API_HOST) 30 | parser.add_argument("--api_base_path", default=biomarkerkb.API_BASE_PATH) 31 | parser.add_argument("-v","--verbose", action="count", default=0) 32 | args = parser.parse_args() 33 | 34 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO)) 35 | 36 | base_url='https://'+args.api_host+args.api_base_path 37 | 38 | fout = open(args.ofile, 'w') if args.ofile else sys.stdout 39 | 40 | ids=[] 41 | if args.ifile: 42 | fin = open(args.ifile) 43 | while True: 44 | line = fin.readline() 45 | if not line: break 46 | ids.append(line.rstrip()) 47 | fin.close() 48 | elif args.ids: 49 | ids = re.split('[, ]+', args.ids.strip()) 50 | if len(ids)>0: logging.info('Input IDs: %d'%(len(ids))) 51 | 52 | if args.op[:3]=="get" and not (args.ifile or args.ids): 53 | parser.error(f"--i or --ids required for operation {args.op}.") 54 | 55 | if args.op == "get_biomarker_detail": 56 | biomarkerkb.GetBiomarkerDetail(ids, args.skip, base_url, fout) 57 | 58 | else: 59 | parser.error(f'Invalid operation: {args.op}') 60 | -------------------------------------------------------------------------------- /BioClients/bioregistry/Utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Bioregistry.io 4 | """ 5 | import sys,os,re,time,json,requests,logging 6 | import pandas as pd 7 | # 8 | API_HOST='bioregistry.io' 9 | API_BASE_PATH='/api' 10 | API_BASE_URL='https://'+API_HOST+API_BASE_PATH 11 | # 12 | ############################################################################## 13 | def ListEntities(etype, base_url=API_BASE_URL, fout=None): 14 | "Entities: contributors|contexts|collections|registry|metaregistry" 15 | tags=None; df=None; n_out=0; 16 | response = requests.get(f"{base_url}/{etype}") 17 | logging.debug(json.dumps(response.json(), indent=2)) 18 | results = response.json() 19 | for id_this,thing in results.items(): 20 | if not tags: 21 | tags = list(thing.keys()) 22 | for tag in tags[:]: 23 | if type(thing[tag]) in (list, dict): 24 | logging.info(f"Ignoring field: {tag}") 25 | tags.remove(tag) 26 | data = {"id":[id_this]} 27 | data.update({tag:[thing[tag] if tag in thing else None] for tag in tags}) 28 | df_this = pd.DataFrame(data) 29 | if fout is None: df = pd.concat([df, df_this]) 30 | else: df_this.to_csv(fout, sep="\t", index=False, header=bool(n_out==0)) 31 | n_out += df_this.shape[0] 32 | logging.info(f"n_out ({etype}): {n_out}") 33 | return df 34 | 35 | ############################################################################# 36 | def GetReference(ids, prefix, base_url=API_BASE_URL, fout=None): 37 | df=None; n_out=0; 38 | for id_this in ids: 39 | response = requests.get(f"{base_url}/reference/{prefix}:{id_this}") 40 | logging.debug(json.dumps(response.json(), indent=2)) 41 | result = response.json() 42 | providers = result["providers"] if "providers" in result else [] 43 | for provider,url_this in providers.items(): 44 | df_this = pd.DataFrame({"prefix":[prefix], "id":[id_this], "provider_name":[provider], "provider_url":[url_this]}) 45 | if fout is None: df = pd.concat([df, df_this]) 46 | else: df_this.to_csv(fout, sep="\t", index=False, header=bool(n_out==0)) 47 | n_out += df_this.shape[0] 48 | logging.info(f"n_out: {n_out}") 49 | return df 50 | 51 | ############################################################################## 52 | -------------------------------------------------------------------------------- /BioClients/pubmed/Client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Utility app for the PubMed REST-ful-ish webservices API. 4 | """ 5 | ### 6 | import sys,os,re,argparse,time,logging 7 | # 8 | from .. import pubmed 9 | # 10 | ############################################################################## 11 | if __name__=='__main__': 12 | OPS = [ 13 | "get_esummary", 14 | "get_record", 15 | ] 16 | parser = argparse.ArgumentParser(description="PubMed webservices client") 17 | parser.add_argument("op", choices=OPS, help="OPERATION") 18 | parser.add_argument("--i", dest="ifile", help="input IDs file (PMIDs)") 19 | parser.add_argument("--ids", help="input IDs (PMIDs) (comma-separated)") 20 | parser.add_argument("--o", dest="ofile", help="output (usually TSV)") 21 | parser.add_argument("--api_host", default=pubmed.API_HOST) 22 | parser.add_argument("--api_base_path", default=pubmed.API_BASE_PATH) 23 | parser.add_argument("--skip", type=int, default=0) 24 | parser.add_argument("--nmax", type=int, default=None) 25 | parser.add_argument("-q", "--quiet", action="store_true", help="Suppress progress notification.") 26 | parser.add_argument("-v", "--verbose", default=0, action="count") 27 | args = parser.parse_args() 28 | 29 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>0 else logging.ERROR if args.quiet else 15)) 30 | 31 | base_url = f"https://{args.api_host}{args.api_base_path}" 32 | 33 | fout = open(args.ofile, "w") if args.ofile else sys.stdout 34 | 35 | ids=[] 36 | if args.ifile: 37 | fin = open(args.ifile) 38 | while True: 39 | line = fin.readline() 40 | if not line: break 41 | ids.append(line.rstrip()) 42 | fin.close() 43 | elif args.ids: 44 | ids = re.split(r'[,\s]+', args.ids) 45 | logging.info(f"Input IDs: {len(ids)}") 46 | 47 | t0=time.time() 48 | 49 | if args.op == 'get_esummary': 50 | pubmed.GetESummary(ids, args.skip, args.nmax, base_url, fout) 51 | 52 | elif args.op == 'get_record': 53 | pubmed.GetRecord(ids, args.skip, args.nmax, base_url, fout) 54 | 55 | else: 56 | parser.error(f"Invalid operation: {args.op}") 57 | 58 | logging.info(('elapsed time: %s'%(time.strftime('%Hh:%Mm:%Ss',time.gmtime(time.time()-t0))))) 59 | 60 | -------------------------------------------------------------------------------- /doc/medline.md: -------------------------------------------------------------------------------- 1 | # `BioClients.medline` 2 | 3 | ## Medline Plus 4 | 5 | Note that the Medline Plus Genetics resource has superceded the 6 | Genetics Home Reference (GHR). 7 | 8 | * [Medline Plus](https://medlineplus.gov/) | [Medline Plus Web Services](https://medlineplus.gov/about/developers/webservices/) 9 | * [Medline Plus Genetics](https://medlineplus.gov/genetics) | [Medline Plus Genetics API](https://medlineplus.gov/about/developers/geneticsdatafilesapi/) 10 | * [Medline Plus Connect](https://medlineplus.gov/connect/overview.html) | [Medline Plus Connect Web Service](https://medlineplus.gov/connect/service.html) 11 | 12 | ## Dependencies 13 | 14 | * Python package `xmltodict` 15 | 16 | ## Example commands 17 | 18 | ``` 19 | $ python3 -m BioClients.medline.genetics.Client -h 20 | usage: Client.py [-h] [--i IFILE] [--o OFILE] [--ids IDS] [--api_host API_HOST] 21 | [--api_base_path API_BASE_PATH] 22 | [--download_host DOWNLOAD_HOST] 23 | [--download_base_path DOWNLOAD_BASE_PATH] 24 | [--summary_url SUMMARY_URL] [-v] 25 | {search,list_conditions,list_genes,get_condition_genes} 26 | 27 | MedlinePlus Genetics REST API client 28 | 29 | positional arguments: 30 | {search,list_conditions,list_genes,get_condition_genes} 31 | OPERATION (select one) 32 | 33 | optional arguments: 34 | -h, --help show this help message and exit 35 | --i IFILE input term file (one per line) 36 | --o OFILE output (TSV) 37 | --ids IDS term list (comma-separated) 38 | --api_host API_HOST 39 | --api_base_path API_BASE_PATH 40 | --download_host DOWNLOAD_HOST 41 | --download_base_path DOWNLOAD_BASE_PATH 42 | --summary_url SUMMARY_URL 43 | -v, --verbose 44 | 45 | Example conditions: allergic-asthma, alzheimer-disease, parkinson-disease, 46 | rapid-onset-dystonia-parkinsonism, type-1-diabetes, type-2-diabetes 47 | ``` 48 | 49 | ``` 50 | python3 -m BioClients.medline.genetics.Client list_conditions 51 | python3 -m BioClients.medline.genetics.Client search --ids "Asthma" 52 | python3 -m BioClients.medline.genetics.Client search --ids "Alzheimer" 53 | python3 -m BioClients.medline.genetics.Client get_condition_genes --ids "parkinson-disease" 54 | ``` 55 | -------------------------------------------------------------------------------- /BioClients/jensenlab/Client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | See: https://api.jensenlab.org/About 4 | """ 5 | ### 6 | import sys,os,re,argparse,time,json,logging 7 | # 8 | from .. import jensenlab 9 | # 10 | ############################################################################## 11 | if __name__=='__main__': 12 | CHANNELS= ['Knowledge', 'Experiments', 'Textmining', 'All'] 13 | parser = argparse.ArgumentParser(description='JensenLab REST API client') 14 | ops = ['get_disease_genes', 'get_comention_genes' ] 15 | parser.add_argument("op", choices=ops, help='OPERATION') 16 | parser.add_argument("--i", dest="ifile", help="input IDs (for diseases should be DOIDs, e.g. \"DOID:10652\"") 17 | parser.add_argument("--channel", choices=CHANNELS, default="Textmining", help="source channel") 18 | parser.add_argument("--ids", help="input IDs (comma-separated)") 19 | parser.add_argument("--o", dest="ofile", help="output (TSV)") 20 | parser.add_argument("--nmax", type=int, default=100, help="max hits") 21 | parser.add_argument("--api_host", default=jensenlab.API_HOST) 22 | parser.add_argument("--api_base_path", default=jensenlab.API_BASE_PATH) 23 | parser.add_argument("-v", "--verbose", default=0, action="count") 24 | args = parser.parse_args() 25 | 26 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO)) 27 | 28 | base_url = 'https://'+args.api_host+args.api_base_path 29 | 30 | fout = open(args.ofile,"w") if args.ofile else sys.stdout 31 | 32 | t0 = time.time() 33 | 34 | ids=[]; 35 | if args.ifile: 36 | with open(args.ifile) as fin: 37 | while True: 38 | line = fin.readline() 39 | if not line: break 40 | if line.rstrip(): ids.append(line.rstrip()) 41 | elif args.ids: 42 | ids = re.split(r'[,\s]+', args.ids) 43 | logging.info('Input queries: %d'%(len(ids))) 44 | 45 | if args.op == "get_disease_genes": 46 | jensenlab.GetDiseaseGenes(args.channel, ids, args.nmax, base_url, fout) 47 | 48 | elif args.op == "get_comention_genes": 49 | jensenlab.GetPubmedComentionGenes(ids, base_url, fout) 50 | 51 | else: 52 | parser.error(f"Invalid operation: {args.op}") 53 | 54 | logging.info(('elapsed time: %s'%(time.strftime('%Hh:%Mm:%Ss', time.gmtime(time.time()-t0))))) 55 | -------------------------------------------------------------------------------- /doc/pubmed.md: -------------------------------------------------------------------------------- 1 | # `BioClients.pubmed` 2 | 3 | ## PubMed 4 | 5 | * (PubMed)[https://pubmed.ncbi.nlm.nih.gov/] 6 | * 7 | * 8 | 9 | ### PubMed Web Services Client 10 | 11 | `get_record` returns selected fields: title, abstract, firstAuthorLastName, journal, and year. 12 | 13 | ``` 14 | $ python3 -m BioClients.pubmed.Client -h 15 | usage: Client.py [-h] [--i IFILE] [--ids IDS] [--o OFILE] [--api_host API_HOST] 16 | [--api_base_path API_BASE_PATH] [--skip SKIP] [--nmax NMAX] [-q] [-v] 17 | {get_esummary,get_record} 18 | 19 | PubMed webservices client 20 | 21 | positional arguments: 22 | {get_esummary,get_record} 23 | OPERATION 24 | 25 | options: 26 | -h, --help show this help message and exit 27 | --i IFILE input IDs file (PMIDs) 28 | --ids IDS input IDs (PMIDs) (comma-separated) 29 | --o OFILE output (usually TSV) 30 | --api_host API_HOST 31 | --api_base_path API_BASE_PATH 32 | --skip SKIP 33 | --nmax NMAX 34 | -q, --quiet Suppress progress notification. 35 | -v, --verbose 36 | ``` 37 | 38 | ### PubMed XML Processing App 39 | 40 | Parse, process Entrez PubMed XML (summaries or full), normally obtained via 41 | Entrez eUtils, eDirect CLI or Perl API. 42 | 43 | Note that other Entrez XML (e.g. PubChem) very similar. 44 | 45 | ``` 46 | $ python3 -m BioClients.pubmed.App_XML -h 47 | usage: App_XML.py [-h] --i IFILE [--ids IDS] [--idfile IDFILE] [--nmax NMAX] [--o OFILE] 48 | [--odir ODIR] [-v] 49 | {summary2tsv,summary2abstract,full2tsv,full2abstract,full2authorlist} 50 | 51 | process PubMed XML (summaries or full), typically obtained via Entrez eUtils. 52 | 53 | positional arguments: 54 | {summary2tsv,summary2abstract,full2tsv,full2abstract,full2authorlist} 55 | operation 56 | 57 | options: 58 | -h, --help show this help message and exit 59 | --i IFILE input file, XML 60 | --ids IDS PubMed IDs, comma-separated (ex:25533513) 61 | --idfile IDFILE input file, PubMed IDs 62 | --nmax NMAX max to return 63 | --o OFILE output (TSV) 64 | --odir ODIR output directory 65 | -v, --verbose 66 | ``` 67 | -------------------------------------------------------------------------------- /BioClients/maayanlab/harmonizome/Utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Utility functions for MaayanLab REST APIs. 4 | Alternately could use harmonizomeapi: 5 | http://amp.pharm.mssm.edu/Harmonizome/static/harmonizomeapi.py 6 | 7 | https://amp.pharm.mssm.edu/Harmonizome/api/1.0/gene/NANOG 8 | https://amp.pharm.mssm.edu/Harmonizome/api/1.0/gene/NANOG?showAssociations=true 9 | """ 10 | import sys,os,re,json,logging 11 | 12 | from ...util import rest 13 | # 14 | API_HOST='amp.pharm.mssm.edu' 15 | API_BASE_PATH='/Harmonizome/api/1.0' 16 | BASE_URL='https://'+API_HOST+API_BASE_PATH 17 | # 18 | ############################################################################## 19 | def GetGene(ids, base_url=BASE_URL, fout=None): 20 | """Gene symbols expected, e.g. NANOG.""" 21 | n_out=0; tags=None; 22 | for id_this in ids: 23 | gene = rest.Utils.GetURL(base_url+'/gene/{0}'.format(id_this), parse_json=True) 24 | logging.debug(json.dumps(gene, indent=2)) 25 | if not tags: 26 | tags = list(gene.keys()) 27 | fout.write("\t".join(tags)+"\n") 28 | vals = [(str(gene[tag]) if tag in gene else "") for tag in tags] 29 | fout.write("\t".join(vals)+"\n") 30 | n_out+=1 31 | logging.info("n_out: %d"%(n_out)) 32 | 33 | ############################################################################## 34 | def GetGeneAssociations(ids, base_url=BASE_URL, fout=None): 35 | """Gene symbols expected, e.g. NANOG.""" 36 | n_out=0; gene_tags=[]; assn_tags=[]; 37 | for id_this in ids: 38 | gene = rest.Utils.GetURL(base_url+'/gene/{0}?showAssociations=true'.format(id_this), parse_json=True) 39 | assns = gene["associations"] if "associations" in gene else [] 40 | if not assns: continue 41 | if not gene_tags: 42 | for tag in gene.keys(): 43 | if type(gene[tag]) not in (list,dict): gene_tags.append(tag) 44 | for assn in assns: 45 | logging.debug(json.dumps(assn, indent=2)) 46 | if not assn_tags: 47 | assn_tags = list(assn.keys()) 48 | fout.write("\t".join(gene_tags+assn_tags)+"\n") 49 | vals = [(str(gene[tag]) if tag in gene else "") for tag in gene_tags]+[(str(assn[tag]) if tag in assn else "") for tag in assn_tags] 50 | fout.write("\t".join(vals)+"\n") 51 | n_out+=1 52 | logging.info("n_out: %d"%(n_out)) 53 | 54 | ############################################################################## 55 | -------------------------------------------------------------------------------- /BioClients/tcga/Client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | See: https://docs.gdc.cancer.gov/API/Users_Guide/ 4 | """ 5 | ### 6 | import sys,os,re,argparse,time,json,logging 7 | # 8 | from .. import tcga 9 | # 10 | API_HOST='api.gdc.cancer.gov' 11 | API_BASE_PATH='' 12 | # 13 | ############################################################################## 14 | if __name__=='__main__': 15 | parser = argparse.ArgumentParser(description='TCGA REST API client') 16 | ops = [ 'list_projects', 'list_cases', 'list_files', 'list_annotations' ] 17 | parser.add_argument("op", choices=ops, help='operation') 18 | parser.add_argument("--i", dest="ifile", help="input IDs") 19 | parser.add_argument("--ids", help="input IDs (comma-separated)") 20 | parser.add_argument("--o", dest="ofile", help="output (TSV)") 21 | parser.add_argument("--skip", type=int, default=0) 22 | parser.add_argument("--nmax", type=int, default=None) 23 | parser.add_argument("--api_host", default=API_HOST) 24 | parser.add_argument("--api_base_path", default=API_BASE_PATH) 25 | parser.add_argument("-v", "--verbose", default=0, action="count") 26 | args = parser.parse_args() 27 | 28 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO)) 29 | 30 | base_url = 'https://'+args.api_host+args.api_base_path 31 | 32 | fout = open(args.ofile,"w") if args.ofile else sys.stdout 33 | 34 | t0=time.time() 35 | 36 | ids=[]; 37 | if args.ifile: 38 | fin = open(args.ifile) 39 | while True: 40 | line = fin.readline() 41 | if not line: break 42 | if line.rstrip(): ids.append(line.rstrip()) 43 | fin.close() 44 | elif args.ids: 45 | ids = re.split(r'[,\s]+', args.ids) 46 | logging.info('Input queries: %d'%(len(ids))) 47 | 48 | if args.op == "list_projects": 49 | tcga.ListProjects(base_url, args.skip, args.nmax, fout) 50 | 51 | elif args.op == "list_cases": 52 | tcga.ListCases(base_url, args.skip, args.nmax, fout) 53 | 54 | elif args.op == "list_files": 55 | tcga.ListFiles(base_url, args.skip, args.nmax, fout) 56 | 57 | elif args.op == "list_annotations": 58 | tcga.ListAnnotations(base_url, args.skip, args.nmax, fout) 59 | 60 | else: 61 | parser.error("Invalid operation: %s"%args.op) 62 | 63 | logging.info(('elapsed time: %s'%(time.strftime('%Hh:%Mm:%Ss', time.gmtime(time.time()-t0))))) 64 | -------------------------------------------------------------------------------- /BioClients/drugcentral/Test.py: -------------------------------------------------------------------------------- 1 | import os,sys,unittest 2 | 3 | from .. import drugcentral 4 | 5 | class TestAPI(unittest.TestCase): 6 | 7 | def __init__(self, methodName=""): 8 | super().__init__(methodName) 9 | self.params = drugcentral.ReadParamFile(os.environ['HOME']+"/.drugcentral.yaml") 10 | self.dbcon = drugcentral.Utils.Connect(self.params['DBHOST'], self.params['DBPORT'], self.params['DBNAME'], self.params['DBUSR'], self.params['DBPW']) 11 | 12 | def test_Version(self): 13 | self.assertTrue(type(drugcentral.Version(self.dbcon)) is not None) 14 | 15 | def test_Version_02(self): 16 | self.assertEqual(drugcentral.Version(self.dbcon).shape[0], 1) 17 | 18 | def test_Describe(self): 19 | self.assertTrue(drugcentral.Describe(self.dbcon).shape[0]>10) 20 | 21 | def test_Counts(self): 22 | self.assertTrue(drugcentral.Counts(self.dbcon).shape[0]>10) 23 | 24 | def test_ListStructures(self): 25 | df = drugcentral.ListStructures(self.dbcon) 26 | self.assertTrue(df.shape[0]>4000) 27 | 28 | def test_ListStructures2Smiles(self): 29 | df = drugcentral.ListStructures2Smiles(self.dbcon) 30 | self.assertTrue(df.shape[0]>4000) 31 | 32 | def test_ListProducts(self): 33 | df = drugcentral.ListProducts(self.dbcon) 34 | self.assertTrue(df.shape[0]>4000) 35 | 36 | def test_ListActiveIngredients(self): 37 | df = drugcentral.ListActiveIngredients(self.dbcon) 38 | self.assertTrue(df.shape[0]>4000) 39 | 40 | def test_ListIndications(self): 41 | self.assertTrue(drugcentral.ListIndications(self.dbcon).shape[0]>100) 42 | 43 | def test_SearchIndications(self): 44 | self.assertTrue(drugcentral.SearchIndications(self.dbcon, "Alzheimer").shape[0]>0) 45 | 46 | def test_GetStructure(self): 47 | self.assertTrue(drugcentral.GetStructure(self.dbcon, ["1725"]).shape[0]==1) 48 | 49 | def test_GetStructureBySynonym(self): 50 | self.assertTrue(drugcentral.GetStructureBySynonym(self.dbcon, ["zantac"]).shape[0]==1) 51 | 52 | def test_GetStructureIds(self): 53 | self.assertTrue(drugcentral.GetStructureIds(self.dbcon, ["1725"]).shape[0]>5) 54 | 55 | def test_GetStructureProducts(self): 56 | self.assertTrue(drugcentral.GetStructureProducts(self.dbcon, ["1725"]).shape[0]>10) 57 | 58 | ############################################################################# 59 | if __name__ == '__main__': 60 | unittest.main(verbosity=2) 61 | -------------------------------------------------------------------------------- /BioClients/glygen/Client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Utility for GlyGen REST API. 4 | 5 | * https://api.glygen.org/ 6 | """ 7 | ### 8 | import sys,os,re,json,argparse,time,logging 9 | import pandas as pd 10 | # 11 | from .. import glygen 12 | # 13 | ############################################################################## 14 | if __name__=='__main__': 15 | epilog="Example GlyTouCan Accession IDs: G00053MO" 16 | parser = argparse.ArgumentParser(description='GlyGen REST API client', epilog=epilog) 17 | ops = [ 18 | "get_glycans", 19 | "list_glycans", 20 | "search_glycans" 21 | ] 22 | parser.add_argument("op", choices=ops, help='OPERATION (select one)') 23 | parser.add_argument("--ids", help="input IDs") 24 | parser.add_argument("--i", dest="ifile", help="input file, IDs") 25 | parser.add_argument("--o", dest="ofile", help="output (TSV)") 26 | parser.add_argument("--query", help="search query (SMILES)") 27 | parser.add_argument("--skip", type=int, default=0) 28 | parser.add_argument("--nmax", type=int, default=None) 29 | parser.add_argument("--api_host", default=glygen.API_HOST) 30 | parser.add_argument("--api_base_path", default=glygen.API_BASE_PATH) 31 | parser.add_argument("-v","--verbose", action="count", default=0) 32 | args = parser.parse_args() 33 | 34 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO)) 35 | 36 | base_url='https://'+args.api_host+args.api_base_path 37 | 38 | fout = open(args.ofile, 'w') if args.ofile else sys.stdout 39 | 40 | ids=[] 41 | if args.ifile: 42 | fin = open(args.ifile) 43 | while True: 44 | line = fin.readline() 45 | if not line: break 46 | ids.append(line.rstrip()) 47 | fin.close() 48 | elif args.ids: 49 | ids = re.split('[, ]+', args.ids.strip()) 50 | if len(ids)>0: logging.info('Input IDs: %d'%(len(ids))) 51 | 52 | if args.op[:3]=="get" and not (args.ifile or args.ids): 53 | parser.error(f"--i or --ids required for operation {args.op}.") 54 | 55 | if args.op == "get_glycans": 56 | glygen.GetGlycans(ids, args.skip, base_url, fout) 57 | 58 | elif args.op == "list_glycans": 59 | glygen.ListGlycans(args.skip, base_url, fout) 60 | 61 | elif args.op == "search_glycans": 62 | parser.error(f'Not yet implemented: {args.op}') 63 | #glygen.SearchGlycans(args.query, base_url, fout) 64 | 65 | else: 66 | parser.error(f'Invalid operation: {args.op}') 67 | -------------------------------------------------------------------------------- /BioClients/medline/connect/Utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | ############################################################################## 3 | ### Medline utilities - access SNOMED and ICD codes 4 | ### https://medlineplus.gov/connect/technical.html 5 | ### https://medlineplus.gov/connect/service.html 6 | ############################################################################## 7 | ### https://apps.nlm.nih.gov/medlineplus/services/mpconnect_service.cfm 8 | ### Two required parameters: 9 | ### 1. Code System (one of): 10 | ### ICD-10-CM: mainSearchCriteria.v.cs=2.16.840.1.113883.6.90 11 | ### ICD-9-CM: mainSearchCriteria.v.cs=2.16.840.1.113883.6.103 12 | ### SNOMED_CT: mainSearchCriteria.v.cs=2.16.840.1.113883.6.96 13 | ### NDC: mainSearchCriteria.v.cs=2.16.840.1.113883.6.69 14 | ### RXNORM: mainSearchCriteria.v.cs=2.16.840.1.113883.6.88 15 | ### LOINC: mainSearchCriteria.v.cs=2.16.840.1.113883.6.1 16 | ### 2. Code: 17 | ### mainSearchCriteria.v.c=250.33 18 | ### 19 | ### Content format: 20 | ### XML (default): knowledgeResponseType=text/xml 21 | ### JSON: knowledgeResponseType=application/json 22 | ### JSONP: knowledgeResponseType=application/javascript&callback=CallbackFunction 23 | ### where CallbackFunction is a name you give the call back function. 24 | ############################################################################## 25 | import sys,os,re,time,logging 26 | import urllib.parse,json 27 | # 28 | from ...util import rest 29 | # 30 | CODESYSTEMS = { 31 | 'SNOWMEDCT': '2.16.840.1.113883.6.96', 32 | 'ICD9CM': '2.16.840.1.113883.6.103', 33 | 'ICD10CM': '2.16.840.1.113883.6.90', 34 | 'NDC' : '2.16.840.1.113883.6.69', 35 | 'RXNORM': '2.16.840.1.113883.6.88', 36 | 'LOINC' : '2.16.840.1.113883.6.1' 37 | } 38 | # 39 | API_HOST='apps.nlm.nih.gov' 40 | API_BASE_PATH='/medlineplus/services/mpconnect_service.cfm' 41 | API_BASE_URL='https://'+API_HOST+API_BASE_PATH 42 | # 43 | ############################################################################## 44 | def GetCode(codes, codesys, base_url=API_BASE_URL, fout=None): 45 | url=base_url 46 | url+=('?knowledgeResponseType=application/json') 47 | url+=('&mainSearchCriteria.v.cs='+CODESYSTEMS[codesys]) 48 | for code in codes: 49 | url_this =url+('&mainSearchCriteria.v.c='+code) 50 | rval = rest.Utils.GetURL(url_this, parse_json=True) 51 | logging.debug(json.dumps(rval, sort_keys=True, indent=2)) 52 | 53 | ############################################################################## 54 | -------------------------------------------------------------------------------- /BioClients/pdb/Client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Utility for PDB REST API. 4 | https://www.rcsb.org/docs/programmatic-access/web-services-overview 5 | https://data.rcsb.org/redoc/ 6 | """ 7 | ### 8 | import sys,os,re,json,argparse,time,logging 9 | # 10 | from .. import pdb as pdb_utils 11 | # 12 | ############################################################################## 13 | if __name__=='__main__': 14 | epilog=""" 15 | Example entry IDs: 3ERT, 3TTC 16 | Example chemical IDs: CFF 17 | """ 18 | parser = argparse.ArgumentParser(description='PDB REST API client', epilog=epilog) 19 | ops = ['list_entrys', 20 | 'get_entrys', 'get_chemicals', 21 | ] 22 | parser.add_argument("op", choices=ops, help='OPERATION') 23 | parser.add_argument("--ids", dest="ids", help="PDB entry IDs, comma-separated") 24 | parser.add_argument("--i", dest="ifile", help="input file, PDB entry IDs") 25 | parser.add_argument("--druglike", action="store_true", help="druglike chemicals only (organic; !polymer; !monoatomic)") 26 | parser.add_argument("--o", dest="ofile", help="output file (TSV)") 27 | parser.add_argument("--api_host", default=pdb_utils.API_HOST) 28 | parser.add_argument("--api_base_path", default=pdb_utils.API_BASE_PATH) 29 | parser.add_argument("-v", "--verbose", action="count", default=0) 30 | args = parser.parse_args() 31 | 32 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO)) 33 | 34 | base_url = f"https://{args.api_host}{args.api_base_path}" 35 | 36 | ids=[] 37 | if args.ifile: 38 | fin = open(args.ifile) 39 | while True: 40 | line = fin.readline() 41 | if not line: break 42 | ids.append(line.strip()) 43 | elif args.ids: 44 | ids = re.split('[, ]+', args.ids.strip()) 45 | 46 | fout = open(args.ofile, "w+") if args.ofile else sys.stdout 47 | 48 | t0=time.time() 49 | 50 | if args.op == "get_entrys": 51 | if not ids: parser.error('ID[s] required.') 52 | pdb_utils.GetEntrys(ids, base_url, fout) 53 | 54 | elif args.op == "get_chemicals": 55 | if not ids: parser.error('ID[s] required.') 56 | pdb_utils.GetChemicals(ids, args.druglike, base_url, fout) 57 | 58 | elif args.op == "list_entrys": 59 | pdb_utils.ListEntrys(base_url, fout) 60 | 61 | else: 62 | parser.error(f"Invalid operation: {args.op}") 63 | 64 | logging.info(f"Elapsed time: {time.strftime('%Hh:%Mm:%Ss', time.gmtime(time.time()-t0))}") 65 | -------------------------------------------------------------------------------- /BioClients/jensenlab/Utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Utility functions for JensenLab REST APIs. 4 | https://api.jensenlab.org/Textmining?type1=-26&id1=DOID:10652&type2=9606&limit=10&format=json 5 | https://api.jensenlab.org/Textmining?query=jetlag[tiab]%20OR%20jet-lag[tiab]&type2=9606&limit=10&format=json 6 | https://api.jensenlab.org/Knowledge?type1=-26&id1=DOID:10652&type2=9606&limit=10&format=json 7 | https://api.jensenlab.org/Experiments?type1=-26&id1=DOID:10652&type2=9606&limit=10&format=json 8 | """ 9 | import sys,os,re,json,time,logging 10 | import pandas as pd 11 | 12 | from ..util import rest 13 | # 14 | API_HOST='api.jensenlab.org' 15 | API_BASE_PATH='' 16 | BASE_URL='https://'+API_HOST+API_BASE_PATH 17 | # 18 | ############################################################################## 19 | def GetDiseaseGenes(channel, ids, nmax, base_url=BASE_URL, fout=None): 20 | tags=[]; df=pd.DataFrame(); 21 | for id_this in ids: 22 | rval = rest.GetURL(base_url+f'/{channel}?type1=-26&id1={id_this}&type2=9606&limit={nmax}&format=json', parse_json=True) 23 | genes = rval[0] #dict 24 | ensgs = list(genes.keys()) 25 | flag = rval[1] #? 26 | for ensg in ensgs: 27 | gene = genes[ensg] 28 | logging.debug(json.dumps(gene, indent=2)) 29 | if not tags: tags = list(gene.keys()) 30 | df = pd.concat([df, pd.DataFrame({tags[j]:[gene[tags[j]]] for j in range(len(tags))})]) 31 | if fout: df.to_csv(fout, sep="\t", index=False) 32 | logging.info("n_out: {}".format(df.shape[0])) 33 | return df 34 | 35 | ############################################################################## 36 | def GetPubmedComentionGenes(ids, base_url=BASE_URL, fout=None): 37 | """Search by co-mentioned terms.""" 38 | tags=[]; df=pd.DataFrame(); 39 | for id_this in ids: 40 | rval = rest.GetURL(base_url+f'/Textmining?query={id_this}[tiab]&type2=9606&limit=10&format=json', parse_json=True) 41 | genes = rval[0] #dict 42 | ensgs = list(genes.keys()) 43 | flag = rval[1] #? 44 | for ensg in ensgs: 45 | gene = genes[ensg] 46 | logging.debug(json.dumps(gene, indent=2)) 47 | if not tags: tags = list(gene.keys()) 48 | df = pd.concat([df, pd.DataFrame({tags[j]:[gene[tags[j]]] for j in range(len(tags))})]) 49 | if fout: df.to_csv(fout, sep="\t", index=False) 50 | logging.info("n_out: {}".format(df.shape[0])) 51 | return df 52 | 53 | ############################################################################## 54 | -------------------------------------------------------------------------------- /BioClients/ncbo/Utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | http://data.bioontology.org/documentation 4 | The National Center for Biomedical Ontology was founded as one of the National Centers for Biomedical Computing, supported by the NHGRI, the NHLBI, and the NIH Common Fund. 5 | 6 | An API Key is required to access any API call. It can be provided in three ways: 7 | 8 | 1. Using the apikey query string parameter. 9 | 2. Providing an Authorization header: Authorization: apikey token=your_apikey (replace `your_apikey` with your actual key) 10 | 3. When using a web browser to explore the API, if you provide your API Key once using method 1, it will be stored in a cookie for subsequent requests. You can override this by providing a different API Key in a new call. 11 | """ 12 | ### 13 | import sys,os,re,json,requests,urllib.parse,logging,time 14 | import pandas as pd 15 | # 16 | from ..util import rest 17 | # 18 | API_HOST="data.bioontology.org" 19 | API_BASE_PATH="" 20 | # 21 | ############################################################################# 22 | def RecommendOntologies(base_url, api_key, texts, fout): 23 | """This API call designed for text, not necessarily single terms.""" 24 | #input_type={1|2} // default = 1. 1 means that the input type is text. 2 means that the input type is a list of comma separated keywords. 25 | tags=[]; df=pd.DataFrame(); n_err=0; 26 | resultTags=["coverageResult", "specializationResult", "acceptanceResult", "detailResult"]; 27 | headers = {"Authorization":f"apikey token={api_key}"} 28 | for text in texts: 29 | url_this = base_url+f"/recommender?input={urllib.parse.quote(text)}" 30 | url_this += "&input_type=2" 31 | url_this += "&display_context=false&display_links=false" 32 | logging.debug(url_this) 33 | rval = requests.get(url_this, headers=headers) 34 | if not rval.ok: 35 | logging.error(f'{rval.status_code} : "{text}"') 36 | n_err+=1 37 | continue 38 | results = rval.json() 39 | for result in results: 40 | logging.debug(json.dumps(result, indent=2)) 41 | if not tags: 42 | tags = list(result.keys()) 43 | df_this = pd.DataFrame({tags[j]:([str(result[tags[j]])] if tags[j] in result else ['']) for j in range(len(tags))}) 44 | df = pd.concat([df, df_this]) 45 | if fout: df.to_csv(fout, sep="\t", index=False) 46 | logging.info(f"n_texts: {len(texts)}; n_out: {df.shape[0]}; n_err: {n_err}") 47 | return df 48 | 49 | ############################################################################# 50 | -------------------------------------------------------------------------------- /BioClients/amp/t2d/Utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Utilities for the AMP T2D REST API. 4 | http://www.type2diabetesgenetics.org/ 5 | http://www.kp4cd.org/apis/t2d 6 | http://52.54.103.84/kpn-kb-openapi/ 7 | 8 | DEPICT software (Pers, TH, et al., 2015) 9 | """ 10 | ### 11 | import sys,os,re,json,time,logging 12 | # 13 | from ...util import rest 14 | # 15 | ############################################################################# 16 | def ListTissues(base_url, fout): 17 | rval = rest.Utils.GetURL(base_url+'/graph/tissue/list/object', parse_json=True) 18 | tissues = rval["data"] if "data" in rval else [] 19 | tags = None; n_out=0; 20 | for tissue in tissues: 21 | logging.debug(json.dumps(tissue, indent=2)) 22 | if not tags: 23 | tags = tissue.keys() 24 | fout.write('\t'.join(tags)+'\n') 25 | vals = [str(tissue[tag]) if tag in tissue else '' for tag in tags] 26 | fout.write('\t'.join(vals)+'\n') 27 | n_out += 1 28 | logging.info("n_out: %d"%(n_out)) 29 | 30 | ############################################################################# 31 | def ListPhenotypes(base_url, fout): 32 | rval=rest.Utils.GetURL(base_url+'/graph/phenotype/list/object', parse_json=True) 33 | phenotypes = rval["data"] if "data" in rval else [] 34 | tags = None; n_out=0; 35 | for phenotype in phenotypes: 36 | logging.debug(json.dumps(phenotype, indent=2)) 37 | if not tags: 38 | tags = phenotype.keys() 39 | fout.write('\t'.join(tags)+'\n') 40 | vals = [str(phenotype[tag]) if tag in phenotype else '' for tag in tags] 41 | fout.write('\t'.join(vals)+'\n') 42 | n_out += 1 43 | logging.info("n_out: %d"%(n_out)) 44 | 45 | ############################################################################## 46 | def DepictGenePathway(base_url, gene, phenotype, max_pval, fout): 47 | url = base_url+('/testcalls/depict/genepathway/object?gene=%s&phenotype=%s<_value=%f'%(gene, phenotype, max_pval)) 48 | rval = rest.Utils.GetURL(url, parse_json=True) 49 | pathways = rval["data"] if "data" in rval else [] 50 | tags = None; n_out=0; 51 | for pathway in pathways: 52 | logging.debug(json.dumps(pathway, indent=2)) 53 | if not tags: 54 | tags = pathway.keys() 55 | fout.write('\t'.join(tags)+'\n') 56 | vals = [str(pathway[tag]) if tag in pathway else '' for tag in tags] 57 | fout.write('\t'.join(vals)+'\n') 58 | n_out += 1 59 | logging.info("n_out: %d"%(n_out)) 60 | 61 | ############################################################################## 62 | -------------------------------------------------------------------------------- /doc/gwascatalog.md: -------------------------------------------------------------------------------- 1 | # `BioClients.gwascatalog` 2 | 3 | ## GWAS Catalog 4 | 5 | GWAS Catalog REST API client. 6 | 7 | __Version 1:__ 8 | * 9 | * 10 | * 11 | * 12 | 13 | __Version 2:__ 14 | - 15 | - _"GWAS RESTful API V2 has been released with various enhancements & improvements over GWAS RESTful API V1. V1 is deprecated and will be retired no later than May 2026."_ 16 | - 17 | 18 | 19 | ## Example commands 20 | 21 | ``` 22 | python3 -m BioClients.gwascatalog.Client list_studies_v2 --o gwascatalog_studies.tsv 23 | ``` 24 | 25 | ``` 26 | python3 -m BioClients.gwascatalog.Client get_studyAssociations_v2 --ids "GCST004364,GCST000227" 27 | ``` 28 | 29 | ``` 30 | python3 -m BioClients.gwascatalog.Client get_snps_v2 --ids "rs6085920,rs2273833,rs6684514,rs144991356" 31 | ``` 32 | 33 | ``` 34 | python -m BioClients.gwascatalog.Client -h 35 | usage: Client.py [-h] [--ids IDS] 36 | [--searchtype {pubmedmid,gcst,efotrait,efouri,accessionid,rs}] 37 | [--i IFILE] [--o OFILE] [--skip SKIP] [--nmax NMAX] [--api_host API_HOST] 38 | [--api_base_path API_BASE_PATH] [--api_base_path_v2 API_BASE_PATH_V2] 39 | [-v] [-q] 40 | {get_metadata_v2,list_studies,list_studies_v2,get_studyAssociations,get_studyAssociations_v2,get_snps,get_snps_v2,search_studies,search_studies_v2} 41 | 42 | GWAS Catalog REST API (V1|V2) client 43 | 44 | positional arguments: 45 | {get_metadata_v2,list_studies,list_studies_v2,get_studyAssociations,get_studyAssociations_v2,get_snps,get_snps_v2,search_studies,search_studies_v2} 46 | operation 47 | 48 | options: 49 | -h, --help show this help message and exit 50 | --ids IDS IDs, comma-separated 51 | --searchtype {pubmedmid,gcst,efotrait,efouri,accessionid,rs} 52 | ID type 53 | --i IFILE input file, IDs 54 | --o OFILE output (TSV) 55 | --skip SKIP 56 | --nmax NMAX 57 | --api_host API_HOST 58 | --api_base_path API_BASE_PATH 59 | --api_base_path_v2 API_BASE_PATH_V2 60 | -v, --verbose 61 | -q, --quiet 62 | 63 | Example PMIDs: 28530673; Example GCSTs: GCST004364, GCST000227; Example EFOIDs: 64 | EFO_0004232; Example SNPIDs: rs6085920, rs2273833, rs6684514, rs144991356 65 | ``` 66 | -------------------------------------------------------------------------------- /BioClients/bindingdb/Client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | ############################################################################# 3 | ### BindingDb Utilities 4 | ### http://www.bindingdb.org/bind/BindingDBRESTfulAPI.jsp 5 | ### XML output only. 6 | ############################################################################# 7 | import sys,os,re,argparse,time,logging 8 | # 9 | from .. import bindingdb 10 | # 11 | ############################################################################## 12 | if __name__=='__main__': 13 | epilog = "Example Uniprot IDs: P35355, Q8HZR1" 14 | API_HOST='www.bindingdb.org' 15 | API_BASE_PATH='/axis2/services/BDBService' 16 | ops = ["get_ligands_by_uniprot", "get_targets_by_compound"] 17 | parser = argparse.ArgumentParser( description='BindingDb REST API client', epilog=epilog) 18 | parser.add_argument("op", choices=ops, help='OPERATION (select one)') 19 | parser.add_argument("--i", dest="ifile", help="input file, Uniprot IDs") 20 | parser.add_argument("--ids", help="Uniprot IDs") 21 | parser.add_argument("--o", dest="ofile", help="output (TSV)") 22 | parser.add_argument("--smiles", help="compound query") 23 | parser.add_argument("--ic50_max", type=int, default=100) 24 | parser.add_argument("--sim_min", type=float, default=0.85) 25 | parser.add_argument("--api_host", default=API_HOST) 26 | parser.add_argument("--api_base_path", default=API_BASE_PATH) 27 | parser.add_argument("-v", "--verbose", action="count", default=0) 28 | args = parser.parse_args() 29 | 30 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO)) 31 | 32 | api_base_url='http://'+args.api_host+args.api_base_path 33 | 34 | fout = open(args.ofile, "w+") if args.ofile else sys.stdout 35 | 36 | t0=time.time() 37 | 38 | ids=[] 39 | if args.ifile: 40 | fin = open(args.ifile) 41 | while True: 42 | line = fin.readline() 43 | if not line: break 44 | ids.append(line.rstrip()) 45 | fin.close() 46 | elif args.ids: 47 | ids = re.split('[, ]+', args.ids.strip()) 48 | if len(ids)>0: logging.info('Input IDs: %d'%(len(ids))) 49 | 50 | if args.op=="get_ligands_by_uniprot": 51 | bindingdb.GetLigandsByUniprot(api_base_url, ids, args.ic50_max, fout) 52 | 53 | elif args.op=="get_targets_by_compound": 54 | bindingdb.GetTargetsByCompound(api_base_url, args.smiles, args.sim_min, fout) 55 | 56 | else: 57 | parser.error('Operation invalid: {}'.format(args.op)) 58 | 59 | logging.info('Elapsed time: %s'%(time.strftime('%Hh:%Mm:%Ss', time.gmtime(time.time()-t0)))) 60 | 61 | -------------------------------------------------------------------------------- /BioClients/gtex/Client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Utility for GTEx REST API. 4 | 5 | * https://www.gtexportal.org/home/api-docs/ 6 | """ 7 | ### 8 | import sys,os,re,json,argparse,time,logging 9 | import pandas as pd 10 | # 11 | from .. import gtex 12 | # 13 | ############################################################################## 14 | if __name__=='__main__': 15 | epilog="" 16 | parser = argparse.ArgumentParser(description='GTEx REST API client', epilog=epilog) 17 | ops = [ 18 | "list_datasets", 19 | "list_subjects", 20 | "list_samples", 21 | "get_gene_expression" 22 | ] 23 | parser.add_argument("op", choices=ops, help='OPERATION (select one)') 24 | parser.add_argument("--ids", help="input IDs") 25 | parser.add_argument("--i", dest="ifile", help="input file, IDs") 26 | parser.add_argument("--o", dest="ofile", help="output (TSV)") 27 | parser.add_argument("--dataset", default="gtex_v8", help="GTEx datasetId") 28 | parser.add_argument("--subject", help="GTEx subjectId") 29 | parser.add_argument("--skip", type=int, default=0) 30 | parser.add_argument("--nmax", type=int, default=None) 31 | parser.add_argument("--api_host", default=gtex.API_HOST) 32 | parser.add_argument("--api_base_path", default=gtex.API_BASE_PATH) 33 | parser.add_argument("-v","--verbose", action="count", default=0) 34 | args = parser.parse_args() 35 | 36 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO)) 37 | 38 | base_url='https://'+args.api_host+args.api_base_path 39 | 40 | fout = open(args.ofile, 'w') if args.ofile else sys.stdout 41 | 42 | ids=[] 43 | if args.ifile: 44 | fin = open(args.ifile) 45 | while True: 46 | line = fin.readline() 47 | if not line: break 48 | ids.append(line.rstrip()) 49 | fin.close() 50 | elif args.ids: 51 | ids = re.split('[, ]+', args.ids.strip()) 52 | if len(ids)>0: logging.info('Input IDs: %d'%(len(ids))) 53 | 54 | if args.op[:3]=="get" and not (args.ifile or args.ids): 55 | parser.error(f"--i or --ids required for operation {args.op}.") 56 | 57 | if args.op == "list_datasets": 58 | gtex.ListDatasets(base_url, fout) 59 | 60 | elif args.op == "list_subjects": 61 | gtex.ListSubjects(args.dataset, base_url, fout) 62 | 63 | elif args.op == "list_samples": 64 | gtex.ListSamples(args.dataset, args.subject, base_url, fout) 65 | 66 | elif args.op == "get_gene_expression": 67 | gtex.GetGeneExpression(ids, args.dataset, args.skip, base_url, fout) 68 | 69 | else: 70 | parser.error(f'Invalid operation: {args.op}') 71 | -------------------------------------------------------------------------------- /BioClients/amp/t2d/Client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | utility app for the AMP T2D REST API. 4 | http://www.type2diabetesgenetics.org/ 5 | http://www.kp4cd.org/apis/t2d 6 | http://52.54.103.84/kpn-kb-openapi/ 7 | 8 | DEPICT software (Pers, TH, et al., 2015) 9 | """ 10 | ### 11 | import sys,os,re,json,argparse,time,logging 12 | # 13 | from ... import amp 14 | # 15 | API_HOST='public.type2diabeteskb.org' 16 | API_BASE_PATH='/dccservices' 17 | # 18 | ############################################################################# 19 | if __name__=='__main__': 20 | ops = ["list_tissues", "list_phenotypes", "depict_genepathway"] 21 | parser = argparse.ArgumentParser(description="AMP T2D REST client") 22 | parser.add_argument("op",choices=ops,help='operation') 23 | parser.add_argument("--i", dest="ifile", help="input IDs file") 24 | parser.add_argument("--ids", help="input IDs, comma-separated") 25 | parser.add_argument("--gene", help="query gene (e.g. SLC30A8)") 26 | parser.add_argument("--phenotype", default="T2D") 27 | parser.add_argument("--max_pval", type=float, default=.0005) 28 | parser.add_argument("--api_host", default=API_HOST) 29 | parser.add_argument("--api_base_path", default=API_BASE_PATH) 30 | parser.add_argument("--skip", type=int, default=0) 31 | parser.add_argument("--nmax", type=int, default=0) 32 | parser.add_argument("--o", dest="ofile", help="output (TSV)") 33 | parser.add_argument("-v", "--verbose", default=0, action="count") 34 | args = parser.parse_args() 35 | 36 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO)) 37 | 38 | BASE_URL = 'http://'+args.api_host+args.api_base_path 39 | 40 | fout = open(args.ofile, "w") if args.ofile else sys.stdout 41 | 42 | if args.ifile: 43 | fin = open(args.ifile) 44 | ids=[] 45 | while True: 46 | line = fin.readline() 47 | if not line: break 48 | ids.append(line.strip()) 49 | logging.info('input IDs: %d'%(len(ids))) 50 | fin.close() 51 | elif args.ids: 52 | ids = re.split('[, ]+', args.ids.strip()) 53 | 54 | t0=time.time() 55 | 56 | if args.op == 'list_tissues': 57 | amp.t2d.ListTissues(BASE_URL, fout) 58 | 59 | elif args.op == 'list_phenotypes': 60 | amp.t2d.ListPhenotypes(BASE_URL, fout) 61 | 62 | elif args.op == 'depict_genepathway': 63 | amp.t2d.DepictGenePathway(BASE_URL, args.gene, args.phenotype, args.max_pval, fout) 64 | 65 | else: 66 | parser.error('Invalid operation: %s'%args.op) 67 | 68 | logging.info('elapsed time: %s'%(time.strftime('%Hh:%Mm:%Ss', time.gmtime(time.time()-t0)))) 69 | 70 | -------------------------------------------------------------------------------- /BioClients/ncbo/Client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | http://data.bioontology.org/documentation 4 | The National Center for Biomedical Ontology was founded as one of the 5 | National Centers for Biomedical Computing, supported by the NHGRI, the 6 | NHLBI, and the NIH Common Fund. 7 | """ 8 | ### 9 | import sys,os,argparse,re,yaml,logging,time 10 | # 11 | from .. import ncbo 12 | from ..util import yaml as util_yaml 13 | # 14 | ############################################################################# 15 | if __name__=='__main__': 16 | EPILOG="""The National Center for Biomedical Ontology was founded as one of the National Centers for Biomedical Computing, supported by the NHGRI, the NHLBI, and the NIH Common Fund.""" 17 | parser = argparse.ArgumentParser(description='NCBO REST API client utility', epilog=EPILOG) 18 | OPS = ['recommendOntologies'] 19 | parser.add_argument("op", choices=OPS, help="OPERATION") 20 | parser.add_argument("--i", dest="ifile", help="input texts") 21 | parser.add_argument("--o", dest="ofile", help="output (TSV)") 22 | parser.add_argument("--text", help="input text") 23 | parser.add_argument("--api_host", default=ncbo.API_HOST) 24 | parser.add_argument("--api_base_path", default=ncbo.API_BASE_PATH) 25 | parser.add_argument("--param_file", default=os.environ["HOME"]+"/.ncbo.yaml") 26 | parser.add_argument("--api_key", help="API key") 27 | parser.add_argument("-v", "--verbose", default=0, action="count") 28 | 29 | args = parser.parse_args() 30 | 31 | logging.basicConfig(format="%(levelname)s:%(message)s", level=(logging.DEBUG if args.verbose>1 else logging.INFO)) 32 | 33 | base_url = "https://"+args.api_host+args.api_base_path 34 | 35 | fout = open(args.ofile, "w+") if args.ofile else sys.stdout 36 | 37 | params = util_yaml.ReadParamFile(args.param_file) 38 | if args.api_key: params["API_KEY"] = args.api_key 39 | if not params["API_KEY"]: 40 | parser.error("Please specify valid API_KEY via --api_key or --param_file") 41 | 42 | texts=[]; 43 | if args.ifile: 44 | with open(args.ifile) as fin: 45 | while True: 46 | line = fin.readline() 47 | if not line: break 48 | texts.append(line.rstrip()) 49 | logging.info(f"input texts: {len(texts)}") 50 | elif args.text: 51 | texts = [args.text] 52 | 53 | t0 = time.time() 54 | 55 | if args.op == "recommendOntologies": 56 | ncbo.RecommendOntologies(base_url, params["API_KEY"], texts, fout) 57 | 58 | else: 59 | parser.error(f"Invalid operation: {args.op}") 60 | 61 | logging.info(("Elapsed time: %s"%(time.strftime('%Hh:%Mm:%Ss',time.gmtime(time.time()-t0))))) 62 | -------------------------------------------------------------------------------- /doc/rxnorm.md: -------------------------------------------------------------------------------- 1 | # `BioClients.rxnorm` 2 | 3 | ## RxNorm 4 | 5 | From the NIH National Library of Medicine (NLM). 6 | 7 | * 8 | * 9 | * 10 | * 11 | * 12 | * 13 | 14 | TERM TYPES 15 | TTY Name 16 | IN Ingredient 17 | PIN Precise Ingredient 18 | MIN Multiple Ingredients 19 | SCDC Semantic Clinical Drug Component 20 | SCDF Semantic Clinical Drug Form 21 | SCDG Semantic Clinical Dose Form Group 22 | SCD Semantic Clinical Drug 23 | GPCK Generic Pack 24 | BN Brand Name 25 | SBDC Semantic Branded Drug Component 26 | SBDF Semantic Branded Drug Form 27 | SBDG Semantic Branded Dose Form Group 28 | SBD Semantic Branded Drug 29 | BPCK Brand Name Pack 30 | PSN Prescribable Name 31 | SY Synonym 32 | TMSY Tall Man Lettering Synonym 33 | DF Dose Form 34 | ET Dose Form Entry Term 35 | DFG Dose Form Group 36 | 37 | ### Usage examples 38 | 39 | ``` 40 | python3 -m BioClients.rxnorm.Client -h 41 | python3 -m BioClients.rxnorm.Client list_sourcetypes 42 | python3 -m BioClients.rxnorm.Client list_relationtypes 43 | python3 -m BioClients.rxnorm.Client list_termtypes 44 | python3 -m BioClients.rxnorm.Client list_propnames 45 | python3 -m BioClients.rxnorm.Client list_propcategories 46 | python3 -m BioClients.rxnorm.Client list_idtypes 47 | python3 -m BioClients.rxnorm.Client list_class_types 48 | python3 -m BioClients.rxnorm.Client list_classes 49 | python3 -m BioClients.rxnorm.Client list_classes --class_types 'MESHPA,ATC1-4' 50 | ``` 51 | 52 | Requiring names: 53 | 54 | ``` 55 | python3 -m BioClients.rxnorm.Client get_name --ids "metformin" 56 | python3 -m BioClients.rxnorm.Client get_name --ids "prozac,tamiflu" 57 | python3 -m BioClients.rxnorm.Client get_name2rxcui --ids "prozac,tamiflu" 58 | ``` 59 | 60 | Requiring external IDs: 61 | ``` 62 | python3 -m BioClients.rxnorm.Client get_id2rxcui --ids "C2709711" --idtype UMLSCUI 63 | ``` 64 | 65 | Requiring RxCUI IDs: 66 | ``` 67 | python3 -m BioClients.rxnorm.Client get_rxcui_status --ids "131725,213269" 68 | python3 -m BioClients.rxnorm.Client get_rxcui_properties --ids "131725,213269" 69 | python3 -m BioClients.rxnorm.Client get_rxcui_allproperties --ids "6809,131725,213269" 70 | python3 -m BioClients.rxnorm.Client get_rxcui_ndcs --ids "131725,213269" 71 | python3 -m BioClients.rxnorm.Client get_rxcui_allrelated --ids "131725,213269" 72 | ``` 73 | 74 | -------------------------------------------------------------------------------- /BioClients/wikidata/Utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | https://www.wikidata.org/wiki/User:ProteinBoxBot/SPARQL_Examples 4 | 5 | PREFIX wdt: 6 | PREFIX wd: 7 | PREFIX bd: 8 | PREFIX up: 9 | PREFIX uniprotkb: 10 | """ 11 | ### 12 | import sys,os,logging 13 | import pandas as pd 14 | from wikidataintegrator import wdi_core, wdi_login 15 | 16 | ############################################################################# 17 | def Rq2Df(rq): 18 | logging.debug(f"{rq}") 19 | r = wdi_core.WDItemEngine.execute_sparql_query(rq)['results']['bindings'] 20 | df = pd.DataFrame([{k:v['value'] for k,v in item.items()} for item in r]) 21 | logging.debug(f"rows: {df.shape[0]}; cols: {df.shape[1]}") 22 | return(df) 23 | 24 | ############################################################################# 25 | def Query(rq, fout=None): 26 | df = Rq2Df(rq) 27 | if fout is not None: df.to_csv(fout, '\t', index=False) 28 | logging.info(f"n_out: {df.shape[0]}") 29 | return df 30 | 31 | ############################################################################# 32 | def ListDrugTargetPairs(fout=None): 33 | "List drugs with known targets." 34 | rq = """SELECT DISTINCT ?drug ?drugLabel ?gene_product ?gene_productLabel 35 | WHERE { 36 | ?drug wdt:P129 ?gene_product . 37 | SERVICE wikibase:label { bd:serviceParam wikibase:language "en" . } 38 | }""" 39 | df = Rq2Df(rq) 40 | if fout is not None: df.to_csv(fout, '\t', index=False) 41 | logging.info(f"n_out: {df.shape[0]}") 42 | return df 43 | 44 | ############################################################################# 45 | def ListGeneDiseasePairs(fout=None): 46 | "List genes with associated diseases." 47 | rq = """SELECT DISTINCT ?gene ?geneLabel ?disease ?diseaseLabel 48 | WHERE { 49 | ?gene wdt:P2293 ?disease . 50 | SERVICE wikibase:label { bd:serviceParam wikibase:language "en" . } 51 | }""" 52 | df = Rq2Df(rq) 53 | if fout is not None: df.to_csv(fout, '\t', index=False) 54 | logging.info(f"n_out: {df.shape[0]}") 55 | return df 56 | 57 | ############################################################################# 58 | def Test(fout=None): 59 | rq = """SELECT ?item ?itemLabel 60 | WHERE { 61 | ?item wdt:P279 wd:Q1049021 . 62 | SERVICE wikibase:label { bd:serviceParam wikibase:language "en" . } 63 | }""" 64 | df = Rq2Df(rq) 65 | if fout is not None: df.to_csv(fout, '\t', index=False) 66 | logging.info(f"n_out: {df.shape[0]}") 67 | return df 68 | 69 | ############################################################################# 70 | -------------------------------------------------------------------------------- /BioClients/icite/Utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | ### 3 | import sys,os,json,re,logging,tqdm,requests,urllib.parse 4 | import pandas as pd 5 | # 6 | API_HOST="icite.od.nih.gov" 7 | API_BASE_PATH="/api/pubs" 8 | BASE_URL='https://'+API_HOST+API_BASE_PATH 9 | # 10 | NCHUNK=100; 11 | # 12 | ############################################################################# 13 | def GetStats(pmids, base_url=BASE_URL, fout=None): 14 | """Request multiply by chunk. Lists of PMIDs (e.g. references, 15 | cited_by) reported as counts.""" 16 | n_in=0; n_out=0; tags=None; df=pd.DataFrame(); tq=None; 17 | quiet = bool(logging.getLogger().getEffectiveLevel()>15) 18 | while True: 19 | if tq is None and not quiet: tq = tqdm.tqdm(total=len(pmids), unit="pmids") 20 | if n_in>=len(pmids): break 21 | pmids_this = pmids[n_in:n_in+NCHUNK] 22 | n_in += (NCHUNK if n_in+NCHUNK < len(pmids) else len(pmids)-n_in) 23 | url_this = (f"""{base_url}?pmids={(','.join(pmids_this))}""") 24 | response = requests.get(url_this) 25 | if response.status_code != 200: 26 | logging.error(f"status_code: {response.status_code}") 27 | break 28 | result = response.json() 29 | logging.debug(json.dumps(result, indent=2)) 30 | #url_self = result['links']['self'] 31 | pubs = result['data'] if 'data' in result else [] 32 | for pub in pubs: 33 | if not tags: tags = list(pub.keys()) 34 | df_this = pd.DataFrame({tag:[pub[tag] if tag in pub else None] for tag in tags}) 35 | if fout: df_this.to_csv(fout, sep="\t", index=False, header=bool(n_out==0)) 36 | df = pd.concat([df, df_this]) 37 | n_out+=1 38 | if not quiet: 39 | for j in range(len(pmids_this)): tq.update() 40 | logging.info(f"n_in: {len(pmids)}; n_out: {n_out}") 41 | return df 42 | 43 | ############################################################################# 44 | def GetStats_single(pmids, base_url=BASE_URL, fout=None): 45 | """Request singly.""" 46 | tags=None; df=pd.DataFrame(); tq=None; 47 | for pmid in pmids: 48 | if tq is None: tq = tqdm.tqdm(total=len(pmids), unit="pmids") 49 | tq.update() 50 | url = base_url+'/'+pmid 51 | response = requests.get(url) 52 | if response.status_code != 200: 53 | logging.error(f"status_code: {response.status_code}") 54 | continue 55 | pub = response.json() 56 | if not tags: tags = list(pub.keys()) 57 | df = pd.concat([df, pd.DataFrame({tags[j]:[pub[tags[j]]] for j in range(len(tags))})]) 58 | if fout: df.to_csv(fout, sep="\t", index=False) 59 | logging.info(f"n_in: {len(pmids)}; n_out: {df.shape[0]}") 60 | return df 61 | 62 | ############################################################################# 63 | -------------------------------------------------------------------------------- /BioClients/ncats/gsrs/Client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | https://ncats.nih.gov/expertise/preclinical/gsrs 4 | https://gsrs.ncats.nih.gov/ 5 | https://gsrs.ncats.nih.gov/#/api 6 | """ 7 | ### 8 | import sys,os,re,json,argparse,time,logging 9 | 10 | from ... import ncats 11 | # 12 | ############################################################################# 13 | if __name__=='__main__': 14 | epilog='''\ 15 | Example search queries: 16 | IBUPRO 17 | ASPIRIN 18 | OXYTOCIN 19 | OXYTO* 20 | ASPIRIN AND ESTER 21 | COCN 22 | C=1CC=CC=C1C(=O)O 23 | ''' 24 | parser = argparse.ArgumentParser(description='NCATS Global Substance Registration System (GSRS) client', epilog=epilog) 25 | ops = [ 26 | 'list_vocabularies', 27 | 'list_substances', 28 | 'search', 29 | 'get_substance', 30 | 'get_substance_names', 31 | ] 32 | parser.add_argument("op", choices=ops, help='OPERATION') 33 | parser.add_argument("--i", dest="ifile", help="Input IDs") 34 | parser.add_argument("--o", dest="ofile", help="Output (TSV)") 35 | parser.add_argument("--ids", help="Input IDs (comma-separated)") 36 | parser.add_argument("--query", help="Search query.") 37 | parser.add_argument("--api_host", default=ncats.gsrs.API_HOST) 38 | parser.add_argument("--api_base_path", default=ncats.gsrs.API_BASE_PATH) 39 | parser.add_argument("-v", "--verbose", default=0, action="count") 40 | args = parser.parse_args() 41 | 42 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO)) 43 | 44 | api_base_url = 'https://'+args.api_host+args.api_base_path 45 | 46 | fout = open(args.ofile, "w+") if args.ofile else sys.stdout 47 | 48 | t0=time.time() 49 | 50 | ids=[] 51 | if args.ifile: 52 | fin = open(args.ifile) 53 | while True: 54 | line = fin.readline() 55 | if not line: break 56 | ids.append(line.rstrip()) 57 | fin.close() 58 | logging.info(f"Input IDs: {len(ids)}") 59 | elif args.ids: 60 | ids = re.split(r'[,\s]+', args.ids) 61 | 62 | if args.op == "list_vocabularies": 63 | ncats.gsrs.Utils.ListVocabularies(api_base_url, fout) 64 | 65 | elif args.op == "list_substances": 66 | ncats.gsrs.Utils.ListSubstances(api_base_url, fout) 67 | 68 | elif args.op == "search": 69 | ncats.gsrs.Utils.Search(args.query, api_base_url, fout) 70 | 71 | elif args.op == "get_substance": 72 | ncats.gsrs.Utils.GetSubstance(ids, api_base_url, fout) 73 | 74 | elif args.op == "get_substance_names": 75 | ncats.gsrs.Utils.GetSubstanceNames(ids, api_base_url, fout) 76 | 77 | else: 78 | parser.error(f"Invalid operation: {args.op}") 79 | 80 | logging.info(f"Elapsed time: {time.strftime('%Hh:%Mm:%Ss',time.gmtime(time.time()-t0))}") 81 | -------------------------------------------------------------------------------- /BioClients/util/neo4j/App.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Neo4j client (via py2neo API) 4 | https://neo4j.com/docs/cypher-manual 5 | https://py2neo.org 6 | """ 7 | ############################################################################# 8 | import sys,os,argparse,logging 9 | import py2neo 10 | 11 | from .. import neo4j as util_neo4j 12 | 13 | ############################################################################# 14 | if __name__=='__main__': 15 | parser = argparse.ArgumentParser(description="Neo4j client (via py2neo API)", epilog="See https://neo4j.com/docs/cypher-manual, https://py2neo.org.") 16 | ops = [ 'dbinfo', 'query', 'dbsummary' ] 17 | parser.add_argument("op", choices=ops, help='OPERATION') 18 | parser.add_argument("--i", dest="ifile", help="input query file (CQL aka Cypher)") 19 | parser.add_argument("--cql", help="input query (CQL aka Cypher)") 20 | parser.add_argument("--o", dest="ofile", help="output (TSV|JSON)") 21 | parser.add_argument("--ofmt", choices=('TSV', 'JSON'), default='TSV') 22 | parser.add_argument("--dbhost", default=util_neo4j.DBHOST) 23 | parser.add_argument("--dbport", type=int, default=util_neo4j.DBPORT) 24 | parser.add_argument("--dbscheme", default=util_neo4j.DBSCHEME) 25 | parser.add_argument("--dbusr", default=util_neo4j.DBUSR) 26 | parser.add_argument("--dbpw", default=util_neo4j.DBPW) 27 | parser.add_argument("--secure", action="store_true", help="secure connection (TLS)") 28 | parser.add_argument("-v", "--verbose", dest="verbose", action="count", default=0) 29 | args = parser.parse_args() 30 | 31 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>0 else logging.ERROR)) 32 | 33 | fout = open(args.ofile, "w+") if args.ofile else sys.stdout 34 | 35 | if args.op == 'dbinfo': 36 | db = util_neo4j.DbConnect(dbhost=args.dbhost, dbport=args.dbport, dbscheme=args.dbscheme, dbusr=args.dbusr, dbpw=args.dbpw, secure=args.secure) 37 | util_neo4j.DbInfo(db, fout) 38 | 39 | elif args.op == 'dbsummary': 40 | db = util_neo4j.DbConnect(dbhost=args.dbhost, dbport=args.dbport, dbscheme=args.dbscheme, dbusr=args.dbusr, dbpw=args.dbpw, secure=args.secure) 41 | util_neo4j.DbSummary(db, fout) 42 | 43 | elif args.op == 'query': 44 | db = util_neo4j.DbConnect(dbhost=args.dbhost, dbport=args.dbport, dbscheme=args.dbscheme, dbusr=args.dbusr, dbpw=args.dbpw, secure=args.secure) 45 | if args.ifile: 46 | fin = open(args.ifile) 47 | cql = fin.read() 48 | elif args.cql: 49 | cql = args.cql 50 | else: 51 | parser.error('--cql or --i required for query.') 52 | util_neo4j.DbQuery(db, cql, args.ofmt, fout) 53 | 54 | else: 55 | parser.error(f"Unsupported operation: {args.op}") 56 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | cover/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | 134 | # pytype static type analyzer 135 | .pytype/ 136 | 137 | # PyCharm 138 | .idea/ 139 | -------------------------------------------------------------------------------- /BioClients/chemidplus/Client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Utility functions for the NLM ChemIDplus REST API. 4 | https://chem.nlm.nih.gov/chemidsearch/api 5 | https://chem.nlm.nih.gov/api/swagger-ui.html 6 | https://chem.nlm.nih.gov/api/v2/api-docs 7 | """ 8 | ### 9 | import sys,os,re,argparse,time,logging 10 | # 11 | from .. import chemidplus 12 | # 13 | ############################################################################## 14 | if __name__=='__main__': 15 | ops = ["list_sources", "list_types", "get_id2summary", "get_id2names", 16 | "get_id2numbers", 17 | "get_id2toxlist"] 18 | parser = argparse.ArgumentParser(description="ChemIDPlus REST client") 19 | parser.add_argument("op", choices=ops,help='OPERATION') 20 | parser.add_argument("--i", dest="ifile", help="input IDs file") 21 | parser.add_argument("--ids", help="input IDs (comma-separated)") 22 | parser.add_argument("--id_type", default="auto", help="input ID type") 23 | parser.add_argument("--o", dest="ofile", help="output (usually TSV)") 24 | parser.add_argument("--api_host", default=chemidplus.API_HOST) 25 | parser.add_argument("--api_base_path", default=chemidplus.API_BASE_PATH) 26 | parser.add_argument("--skip", type=int, default=0) 27 | parser.add_argument("--nmax", type=int, default=0) 28 | parser.add_argument("-v", "--verbose", default=0, action="count") 29 | args = parser.parse_args() 30 | 31 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO)) 32 | 33 | base_url = 'https://'+args.api_host+args.api_base_path 34 | 35 | fout = open(args.ofile, "w") if args.ofile else sys.stdout 36 | 37 | ids=[] 38 | if args.ifile: 39 | fin = open(args.ifile) 40 | while True: 41 | line = fin.readline() 42 | if not line: break 43 | ids.append(line.rstrip()) 44 | fin.close() 45 | elif args.ids: 46 | ids = re.split(r'[,\s]+', args.ids) 47 | logging.info(f"Input IDs: {len(ids)}") 48 | 49 | t0=time.time() 50 | 51 | if args.op == 'list_sources': 52 | chemidplus.ListSources(base_url, fout) 53 | 54 | elif args.op == 'list_types': 55 | chemidplus.ListTypes(base_url, fout) 56 | 57 | elif args.op == 'get_id2summary': 58 | chemidplus.GetId2Summary(ids, args.id_type, base_url, fout) 59 | 60 | elif args.op == 'get_id2names': 61 | chemidplus.GetId2Names(ids, args.id_type, base_url, fout) 62 | 63 | elif args.op == 'get_id2numbers': 64 | chemidplus.GetId2Numbers(ids, args.id_type, base_url, fout) 65 | 66 | elif args.op == 'get_id2toxlist': 67 | chemidplus.GetId2ToxicityList(ids, args.id_type, base_url, fout) 68 | 69 | else: 70 | parser.error(f"Invalid operation: {args.op}") 71 | 72 | logging.info(('elapsed time: %s'%(time.strftime('%Hh:%Mm:%Ss',time.gmtime(time.time()-t0))))) 73 | 74 | -------------------------------------------------------------------------------- /BioClients/util/pandas/Csv2Markdown.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Pandas/Tabulate Csv2Markdown. 4 | Tabulate formatting https://pypi.org/project/tabulate/ 5 | """ 6 | ### 7 | import sys,os,argparse,re,logging 8 | import tabulate 9 | import pandas as pd 10 | 11 | # default=lambda(x: f"{x:.3f}"), 12 | ############################################################################# 13 | if __name__=='__main__': 14 | FORMATS = ["plain", "simple", "github", "grid", "fancy_grid", "pipe", "orgtbl", "jira", "presto", "pretty", "psql", "rst", "mediawiki", "moinmoin", "youtrack", "html", "unsafehtml", "latex", "latex_raw", "latex_booktabs", "latex_longtable", "textile", "tsv"] 15 | parser = argparse.ArgumentParser(description='Pandas/Tabulate Csv2Markdown.') 16 | parser.add_argument("--i", dest="ifile", required=True, help="input (CSV|TSV)") 17 | parser.add_argument("--o", dest="ofile", help="output (HTML)") 18 | parser.add_argument("--csv", action="store_true", help="delimiter is comma") 19 | parser.add_argument("--tsv", action="store_true", help="delimiter is tab") 20 | parser.add_argument("--title", help="Markdown heading") 21 | parser.add_argument("--columns", help="Subset of columns to write (comma delimited)") 22 | parser.add_argument("--nrows", type=int) 23 | parser.add_argument("--skiprows", type=int) 24 | parser.add_argument("--numalign", choices=["center","right","left","decimal"], default="center") 25 | parser.add_argument("--stralign", choices=["center","right","left"], default="left") 26 | parser.add_argument("--format", choices=FORMATS, default="github", help="tabulate format (tablefmt)") 27 | parser.add_argument("--na_rep", default="", help="String representation of NaN") 28 | parser.add_argument("--float_format", help="Function(float) -> string.") 29 | parser.add_argument("-v", "--verbose", action="count", default=0) 30 | args = parser.parse_args() 31 | 32 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO)) 33 | 34 | fout = open(args.ofile, "w") if args.ofile else sys.stdout 35 | 36 | if args.csv: delim=',' 37 | elif args.tsv: delim='\t' 38 | elif re.search('\.csv', args.ifile, re.I): delim=',' 39 | else: delim='\t' 40 | 41 | title = args.title if args.title else f"Csv2Markdown: {os.path.basename(args.ifile)}" 42 | columns = re.split(r',', args.columns) if args.columns else None 43 | 44 | df = pd.read_csv(args.ifile, sep=delim, nrows=args.nrows, skiprows=args.skiprows) 45 | 46 | if columns is not None: 47 | df = df[columns] 48 | 49 | if args.na_rep is not None: 50 | df = df.fillna(args.na_rep) 51 | 52 | table_md = df.to_markdown(tablefmt=args.format) 53 | 54 | md = f""" 55 | # {title} 56 | 57 | {table_md} 58 | """ 59 | 60 | fout.write(md) 61 | fout.close() 62 | 63 | -------------------------------------------------------------------------------- /BioClients/bioregistry/Client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Client for Bioregistry REST API. 4 | 5 | See: https://bioregistry.io/apidocs/ 6 | """ 7 | ### 8 | import sys,os,re,json,argparse,time,logging 9 | # 10 | from .. import bioregistry 11 | # 12 | ############################################################################## 13 | if __name__=='__main__': 14 | parser = argparse.ArgumentParser(description='Bioregistry REST API client', epilog='') 15 | ops = [ 16 | 'list_collections', 17 | 'list_contexts', 18 | 'list_registry', 19 | 'list_metaregistry', 20 | 'list_contributors', 21 | 'get_reference', 22 | ] 23 | parser.add_argument("op", choices=ops, help='operation') 24 | parser.add_argument("--i", dest="ifile", help="input query IDs") 25 | parser.add_argument("--ids", help="input query IDs (comma-separated)") 26 | parser.add_argument("--o", dest="ofile", help="output (TSV)") 27 | parser.add_argument("--etype", default="", help="evidence codes (|-separated)") 28 | parser.add_argument("--prefix", help="CURIE prefix") 29 | parser.add_argument("--nchunk", type=int) 30 | parser.add_argument("--nmax", type=int, default=None) 31 | parser.add_argument("--skip", type=int, default=0) 32 | parser.add_argument("--api_host", default=bioregistry.API_HOST) 33 | parser.add_argument("--api_base_path", default=bioregistry.API_BASE_PATH) 34 | parser.add_argument("-v", "--verbose", default=0, action="count") 35 | args = parser.parse_args() 36 | 37 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO)) 38 | 39 | base_url = f"https://{args.api_host}{args.api_base_path}" 40 | 41 | fout = open(args.ofile, "w") if args.ofile else sys.stdout 42 | 43 | ids=[] 44 | if args.ifile: 45 | fin = open(args.ifile) 46 | while True: 47 | line = fin.readline() 48 | if not line: break 49 | ids.append(line.strip()) 50 | elif args.ids: 51 | ids = re.split('[,\s]+', args.ids.strip()) 52 | 53 | t0=time.time() 54 | 55 | if args.op =="list_contributors": 56 | bioregistry.ListEntities("contributors", base_url, fout) 57 | elif args.op =="list_collections": 58 | bioregistry.ListEntities("collections", base_url, fout) 59 | elif args.op =="list_contexts": 60 | bioregistry.ListEntities("contexts", base_url, fout) 61 | elif args.op =="list_metaregistry": 62 | bioregistry.ListEntities("metaregistry", base_url, fout) 63 | elif args.op =="list_registry": 64 | bioregistry.ListEntities("registry", base_url, fout) 65 | elif args.op =="get_reference": 66 | bioregistry.GetReference(ids, args.prefix, base_url, fout) 67 | 68 | else: 69 | parser.error("Invalid operation: {0}".format(args.op)) 70 | 71 | logging.info(('Elapsed time: %s'%(time.strftime('%Hh:%Mm:%Ss', time.gmtime(time.time()-t0))))) 72 | -------------------------------------------------------------------------------- /BioClients/idg/Client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Pharos REST API client 4 | https://pharos.nih.gov/idg/api/v1/targets(589) 5 | """ 6 | ### 7 | import sys,os,argparse,json,re,time,logging 8 | # 9 | from .. import idg 10 | # 11 | ############################################################################# 12 | if __name__=='__main__': 13 | API_HOST="pharos.nih.gov" 14 | API_BASE_PATH="/idg/api/v1" 15 | IDTYPES = ['IDG_TARGET_ID', 'UNIPROT', 'ENSP', 'GSYMB'] 16 | parser = argparse.ArgumentParser(description='Pharos REST API client') 17 | ops = [ 'list_targets', 'list_ligands', 'list_diseases', 18 | 'get_targets', 'get_targetProperties', 'search_targets' ] 19 | parser.add_argument("op", choices=ops, help='operation') 20 | parser.add_argument("--i", dest="ifile", help="input file, target IDs") 21 | parser.add_argument("--ids", dest="ids", help="IDs, target, comma-separated") 22 | parser.add_argument("--o", dest="ofile", help="output (TSV)") 23 | parser.add_argument("--idtype", choices=IDTYPES, default='IDG_TARGET_ID', help="target ID type") 24 | parser.add_argument("--nmax", type=int, help="max to return") 25 | parser.add_argument("--api_host", default=API_HOST) 26 | parser.add_argument("--api_base_path", default=API_BASE_PATH) 27 | parser.add_argument("-v", "--verbose", default=0, action="count") 28 | args = parser.parse_args() 29 | 30 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO)) 31 | 32 | BASE_URL = 'https://'+args.api_host+args.api_base_path 33 | 34 | fout = open(args.ofile, "w+") if args.ofile else sys.stdout 35 | 36 | ids=[]; 37 | if args.ifile: 38 | fin = open(args.ifile) 39 | while True: 40 | line = fin.readline() 41 | if not line: break 42 | ids.append(line.rstrip()) 43 | logging.info('input IDs: %d'%(len(ids))) 44 | fin.close() 45 | elif args.ids: 46 | ids = re.split(r'\s*,\s*',args.ids.strip()) 47 | 48 | t0 = time.time() 49 | 50 | if re.match(r'^get_', args.op) and not ids: 51 | parser.error('{0} requires IDs.'.format(args.op)) 52 | 53 | if args.op=='get_targets': 54 | idg.GetTargets(BASE_URL, ids, args.idtype, fout) 55 | 56 | elif args.op=='get_targetProperties': 57 | idg.GetTargetProperties(BASE_URL, ids, args.idtype, fout) 58 | 59 | elif args.op=='list_targets': 60 | idg.ListItems('targets', BASE_URL, fout) 61 | 62 | elif args.op=='list_diseases': 63 | idg.ListItems('diseases', BASE_URL, fout) 64 | 65 | elif args.op=='list_ligands': 66 | idg.ListItems('ligands', BASE_URL, fout) 67 | 68 | elif args.op=='search_targets': 69 | logging.error('Not implemented yet.') 70 | 71 | else: 72 | logging.error('Invalid operation: {0}'.format(args.op)) 73 | 74 | logging.info(('Elapsed time: %s'%(time.strftime('%Hh:%Mm:%Ss', time.gmtime(time.time()-t0))))) 75 | -------------------------------------------------------------------------------- /doc/ubkg.md: -------------------------------------------------------------------------------- 1 | # `BioClients.ubkg` 2 | 3 | ## UBKG - Unified Biomedical Knowledge Graph 4 | 5 | Client to the UBKG REST API. Note that an UMLS API Key is required for access 6 | to UBKG. 7 | 8 | * [Smart-API:UBKG-API](https://smart-api.info/ui/96e5b5c0b0efeef5b93ea98ac2794837) 9 | * [NIH-NLM UMLS Terminology Services](https://uts.nlm.nih.gov/uts/) 10 | 11 | The Unified Biomedical Knowledge Graph (UBKG) was developed by the University of 12 | Pittsburgh, Children's Hospital of Philadelphia, and others, built upon the NIH-NLM 13 | Unified Medical Language System (UMLS) metathesaurus, composed of numerous leading, 14 | community standard controlled vocabularies. 15 | 16 | The Data Distillery Knowledge Graph (DDKG) is a context and extension of UBKG, developed 17 | by the Common Fund Data Ecosystem (CFDE) Data Distillery Partnership Project team, 18 | including the IDG DCC team at UNM. 19 | 20 | ``` 21 | python -m BioClients.ubkg.Client -h 22 | usage: Client.py [-h] [--o OFILE] [--i IFILE] [--ids IDS] [--term TERM] [--sab SAB] 23 | [--relationship RELATIONSHIP] 24 | [--context {base_context,data_distillery_context,hubmap_sennet_context}] 25 | [--mindepth MINDEPTH] [--maxdepth MAXDEPTH] [--nmax NMAX] [--skip SKIP] 26 | [--api_host API_HOST] [--api_base_path API_BASE_PATH] 27 | [--api_key API_KEY] [--param_file PARAM_FILE] [-v] 28 | {search,info,list_relationship_types,list_node_types,list_node_type_counts,list_property_types,list_sabs,list_sources,list_semantic_types,get_concept2codes,get_concept2concepts,get_concept2definitions,get_concept2paths,get_concept2trees,get_term2concepts} 29 | 30 | UBKG REST API client 31 | 32 | positional arguments: 33 | {search,info,list_relationship_types,list_node_types,list_node_type_counts,list_property_types,list_sabs,list_sources,list_semantic_types,get_concept2codes,get_concept2concepts,get_concept2definitions,get_concept2paths,get_concept2trees,get_term2concepts} 34 | OPERATION 35 | 36 | options: 37 | -h, --help show this help message and exit 38 | --o OFILE output (TSV) 39 | --i IFILE UMLS CUI ID file 40 | --ids IDS UMLS CUI IDs, comma-separated 41 | --term TERM UMLS term, e.g. 'Asthma' 42 | --sab SAB Standard abbreviation type 43 | --relationship RELATIONSHIP 44 | Relationship type 45 | --context {base_context,data_distillery_context,hubmap_sennet_context} 46 | --mindepth MINDEPTH min path depth 47 | --maxdepth MAXDEPTH max path depth 48 | --nmax NMAX max records 49 | --skip SKIP skip 1st SKIP queries 50 | --api_host API_HOST 51 | --api_base_path API_BASE_PATH 52 | --api_key API_KEY UMLS API Key 53 | --param_file PARAM_FILE 54 | -v, --verbose 55 | 56 | ``` 57 | -------------------------------------------------------------------------------- /doc/chembl.md: -------------------------------------------------------------------------------- 1 | # `BioClients.chembl` 2 | 3 | ## ChEMBL 4 | 5 | Tools for obtaining and processing ChEMBL data. 6 | 7 | * 8 | * 9 | * 10 | 11 | ``` 12 | $ python3 -m BioClients.chembl.Client get_drug_indications -h 13 | usage: Client.py [-h] [--ids IDS] [--i IFILE] [--o OFILE] [--skip SKIP] [--nmax NMAX] 14 | [--dev_phase {0,1,2,3,4}] [--assay_source ASSAY_SOURCE] 15 | [--assay_type ASSAY_TYPE] [--pmin PMIN] [--include_phenotypic] 16 | [--api_host API_HOST] [--api_base_path API_BASE_PATH] [-v] 17 | {status,list_sources,list_targets,list_assays,list_docs,list_mols,list_drugs,list_drug_indications,list_tissues,list_cells,list_mechanisms,list_organisms,list_protein_classes,search_assays,search_mols_by_name,get_mol,get_mol_by_inchikey,get_target,get_target_components,get_target_by_uniprot,get_assay,get_activity_by_mol,get_activity_by_assay,get_activity_by_target,get_activity_properties,get_drug_indications,get_document} 18 | 19 | ChEMBL REST API client 20 | 21 | positional arguments: 22 | {status,list_sources,list_targets,list_assays,list_docs,list_mols,list_drugs,list_drug_indications,list_tissues,list_cells,list_mechanisms,list_organisms,list_protein_classes,search_assays,search_mols_by_name,get_mol,get_mol_by_inchikey,get_target,get_target_components,get_target_by_uniprot,get_assay,get_activity_by_mol,get_activity_by_assay,get_activity_by_target,get_activity_properties,get_drug_indications,get_document} 23 | OPERATION (select one) 24 | 25 | options: 26 | -h, --help show this help message and exit 27 | --ids IDS input IDs (e.g. mol, assay, target, document) 28 | --i IFILE input file, IDs 29 | --o OFILE output (TSV) 30 | --skip SKIP 31 | --nmax NMAX 32 | --dev_phase {0,1,2,3,4} 33 | molecule development phase 34 | --assay_source ASSAY_SOURCE 35 | source_id 36 | --assay_type ASSAY_TYPE 37 | {'B': 'Binding', 'F': 'Functional', 'A': 'ADMET', 'T': 38 | 'Toxicity', 'P': 'Physicochemical', 'U': 'Unclassified'} 39 | --pmin PMIN min pChEMBL activity value (9 ~ 1nM *C50) 40 | --include_phenotypic else pChembl required 41 | --api_host API_HOST 42 | --api_base_path API_BASE_PATH 43 | -v, --verbose 44 | 45 | Assay types: {'B': 'Binding', 'F': 'Functional', 'A': 'ADMET', 'T': 'Toxicity', 'P': 46 | 'Physicochemical', 'U': 'Unclassified'}. Example IDs: CHEMBL2 (compound); CHEMBL1642 47 | (compound & drug); CHEMBL240 (target); CHEMBL1824 (target); CHEMBL1217643 (assay); 48 | CHEMBL3215220 (assay, PubChem assay 519, NMMLSC FPR); Q12809 (Uniprot) 49 | ``` 50 | -------------------------------------------------------------------------------- /BioClients/emblebi/unichem/Client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | EMBL-EBI Unichem 4 | UCI is UniChem Compound Id 5 | """ 6 | ### 7 | import sys,os,re,json,argparse,time,logging 8 | 9 | from ... import emblebi 10 | # 11 | ############################################################################# 12 | if __name__=='__main__': 13 | epilog='''\ 14 | Example InChIkey: LCNDUGHNYMJGIW-UHFFFAOYSA-N 15 | ''' 16 | INCHI_REPRESENTATIONS = ["uci", "inchi", "inchikey", "sourceID"] 17 | parser = argparse.ArgumentParser(description='EMBL-EBI Unichem client', epilog=epilog) 18 | ops = ['getFromSourceId', 'listSources', 'getFromInchi'] 19 | parser.add_argument("op", choices=ops, help='OPERATION') 20 | parser.add_argument("--i", dest="ifile", help="Input IDs") 21 | parser.add_argument("--o", dest="ofile", help="Output (TSV)") 22 | parser.add_argument("--ids", help="Input IDs (comma-separated)") 23 | parser.add_argument("--inchi_representation", choices=INCHI_REPRESENTATIONS, default="inchi", help=(f"[{'|'.join(INCHI_REPRESENTATIONS)}]")) 24 | parser.add_argument("--src_id_in", type=int, help="") 25 | parser.add_argument("--src_id_out", type=int, help="") 26 | parser.add_argument("--search_components", action="store_true", help="InChI search option") 27 | parser.add_argument("--api_host", default=emblebi.unichem.API_HOST) 28 | parser.add_argument("--api_base_path", default=emblebi.unichem.API_BASE_PATH) 29 | parser.add_argument("--skip", type=int, help="") 30 | parser.add_argument("--nmax", type=int, help="") 31 | parser.add_argument("-v", "--verbose", default=0, action="count") 32 | args = parser.parse_args() 33 | 34 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO)) 35 | 36 | api_base_url = 'https://'+args.api_host+args.api_base_path 37 | 38 | fout = open(args.ofile, "w+") if args.ofile else sys.stdout 39 | 40 | t0=time.time() 41 | 42 | ids=[] 43 | if args.ifile: 44 | fin = open(args.ifile) 45 | while True: 46 | line = fin.readline() 47 | if not line: break 48 | ids.append(line.rstrip()) 49 | fin.close() 50 | logging.info(f"Input IDs: {len(ids)}") 51 | elif args.ids: 52 | ids = re.split(r'[,\s]+', args.ids) 53 | 54 | if args.op == "getFromSourceId": 55 | emblebi.unichem.Utils.GetFromSourceId(ids, args.src_id_in, args.src_id_out, args.skip, args.nmax, api_base_url, fout) 56 | 57 | elif args.op == "getFromInchi": 58 | emblebi.unichem.Utils.GetFromInchi(ids, args.inchi_representation, args.search_components, args.src_id_in, args.src_id_out, args.skip, args.nmax, api_base_url, fout) 59 | 60 | elif args.op == "listSources": 61 | emblebi.unichem.Utils.ListSources(api_base_url, fout) 62 | 63 | else: 64 | parser.error(f"Invalid operation: {args.op}") 65 | 66 | logging.info(f"Elapsed time: {time.strftime('%Hh:%Mm:%Ss',time.gmtime(time.time()-t0))}") 67 | -------------------------------------------------------------------------------- /BioClients/mesh/Client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | MeSH XML utility functions. 4 | 5 | MeSH XML 6 | Download: https://www.nlm.nih.gov/mesh/download_mesh.html 7 | Doc: https://www.nlm.nih.gov/mesh/xml_data_elements.html 8 | 9 | 10 | 1 = Topical Descriptor. 11 | 2 = Publication Types, for example, 'Review'. 12 | 3 = Check Tag, e.g., 'Male' (no tree number) 13 | 4 = Geographic Descriptor (Z category of tree number). 14 | 15 | Category "C" : Diseases 16 | Category "F" : Psychiatry and Psychology 17 | Category "F03" : Mental Disorders 18 | Thus, include "C*" and "F03*" only. 19 | Terms can have multiple TreeNumbers; diseases can be in non-disease cateories, in addition to a disease category. 20 | """ 21 | ### 22 | import sys,os,re,argparse,logging 23 | 24 | from .. import mesh 25 | 26 | BRANCHES={ 27 | 'A':'Anatomy', 28 | 'B':'Organisms', 29 | 'C':'Diseases', 30 | 'D':'Chemicals and Drugs', 31 | 'E':'Analytical, Diagnostic and Therapeutic Techniques, and Equipment', 32 | 'F':'Psychiatry and Psychology', 33 | 'G':'Phenomena and Processes', 34 | 'H':'Disciplines and Occupations', 35 | 'I':'Anthropology, Education, Sociology, and Social Phenomena', 36 | 'J':'Technology, Industry, and Agriculture', 37 | 'K':'Humanities', 38 | 'L':'Information Science', 39 | 'M':'Named Groups', 40 | 'N':'Health Care', 41 | 'V':'Publication Characteristics', 42 | 'Z':'Geographicals'} 43 | 44 | ############################################################################# 45 | if __name__=='__main__': 46 | BRANCH='C' 47 | EPILOG = f""" 48 | operations: 49 | desc2csv: descriptors XML input; 50 | supp2csv: supplementary records XML input; 51 | Branches: 52 | {"; ".join([f"{k}: {BRANCHES[k]}" for k in sorted(BRANCHES.keys())])} 53 | """ 54 | parser = argparse.ArgumentParser(description='MeSH XML utility', epilog=EPILOG) 55 | ops=['desc2csv', 'supp2csv'] 56 | parser.add_argument("op", choices=ops, help='operation') 57 | parser.add_argument("--i", dest="ifile", help="input MeSH XML file [stdin]") 58 | parser.add_argument("--o", dest="ofile", help="output file (TSV)") 59 | parser.add_argument("--branch", choices=BRANCHES, default=BRANCH, help="top-level branch of MeSH tree") 60 | parser.add_argument("--force", action="store_true", help="ignore UTF-8 encoding errors") 61 | parser.add_argument("-v", "--verbose", default=0, action="count") 62 | args = parser.parse_args() 63 | 64 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO)) 65 | 66 | fin = open(args.ifile, "r") if args.ifile else sys.stdin 67 | 68 | fout = open(args.ofile, "w") if args.ofile else sys.stdout 69 | 70 | if args.op == "desc2csv": 71 | mesh.Desc2Csv(args.branch, fin, fout) 72 | 73 | elif args.op == "supp2csv": 74 | mesh.Supp2Csv(args.branch, fin, fout) 75 | 76 | else: 77 | parser.error(f"Invalid operation: {args.op}") 78 | -------------------------------------------------------------------------------- /BioClients/pubtator/Client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Pubtator REST API client 4 | https://www.ncbi.nlm.nih.gov/CBBresearch/Lu/Demo/tmTools/RESTfulAPIs.html 5 | Formats: JSON, PubTator, BioC. 6 | 7 | Nomenclatures: 8 | Gene : NCBI Gene 9 | e.g. https://www.ncbi.nlm.nih.gov/sites/entrez?db=gene&term=145226 10 | Disease : MEDIC (CTD, CTD_diseases.csv) 11 | e.g. http://ctdbase.org/basicQuery.go?bqCat=disease&bq=C537775 12 | Chemical : MESH 13 | e.g. http://www.nlm.nih.gov/cgi/mesh/2014/MB_cgi?field=uid&term=D000596 14 | Species : NCBI Taxonomy 15 | e.g. https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?name=10090 16 | Mutation : tmVar 17 | https://www.ncbi.nlm.nih.gov/CBBresearch/Lu/Demo/PubTator/tutorial/tmVar.html 18 | 19 | NOTE that the API does NOT provide keyword search capability like 20 | webapp https://www.ncbi.nlm.nih.gov/CBBresearch/Lu/Demo/PubTator/index.cgi 21 | """ 22 | import sys,os,time,json,argparse,re,logging 23 | # 24 | from .. import pubtator 25 | # 26 | API_HOST="www.ncbi.nlm.nih.gov" 27 | API_BASE_PATH="/CBBresearch/Lu/Demo/RESTful/tmTool.cgi" 28 | # 29 | ############################################################################# 30 | if __name__=='__main__': 31 | parser = argparse.ArgumentParser(description='PubTator REST API client', epilog='Reports PubMed NER annotations for specified PMID[s].') 32 | ops=['get_annotations'] 33 | modes = ['Gene', 'Chemical', 'BioConcept'] 34 | parser.add_argument("op", choices=ops, help="operation") 35 | parser.add_argument("--mode", choices=modes, help='mode', default='BioConcept') 36 | parser.add_argument("--ids", help="PubMed IDs, comma-separated (ex:25533513)") 37 | parser.add_argument("--i", dest="ifile", help="input file, PubMed IDs") 38 | parser.add_argument("--nmax", help="list: max to return") 39 | parser.add_argument("--api_host", default=API_HOST) 40 | parser.add_argument("--api_base_path", default=API_BASE_PATH) 41 | parser.add_argument("--o", dest="ofile", help="output (TSV)") 42 | parser.add_argument("-v", "--verbose", default=0, action="count") 43 | args = parser.parse_args() 44 | 45 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO)) 46 | 47 | BASE_URL='https://'+args.api_host+args.api_base_path 48 | 49 | fout = open(args.ofile, "w+") if args.ofile else sys.stdout 50 | 51 | ids=[]; 52 | if args.ifile: 53 | fin = open(args.ifile) 54 | while True: 55 | line = fin.readline() 56 | if not line: break 57 | ids.append(line.rstrip()) 58 | logging.info('Input IDs: %d'%(len(ids))) 59 | fin.close() 60 | elif args.ids: 61 | ids = re.split(r'[\s,]+', args.ids.strip()) 62 | 63 | if args.op == 'get_annotations': 64 | if not ids: logging.error('Input PMIDs required.') 65 | pubtator.GetAnnotations(BASE_URL, args.mode, ids, fout) 66 | 67 | else: 68 | logging.error('Invalid operation: {0}'.format(args.op)) 69 | -------------------------------------------------------------------------------- /BioClients/oncotree/Utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Utility functions for Oncotree REST API. 4 | See: https://oncotree.mskcc.org/ 5 | See: https://oncotree.mskcc.org/#/home 6 | """ 7 | import sys,os,re,requests,urllib,json,time,logging,tqdm 8 | import pandas as pd 9 | 10 | # 11 | API_HOST="oncotree.mskcc.org" 12 | API_BASE_PATH="/api" 13 | BASE_URL="https://"+API_HOST+API_BASE_PATH 14 | # 15 | HEADERS={"Accept":"application/json"} 16 | # 17 | ############################################################################## 18 | def Info(base_url=BASE_URL, fout=None): 19 | response = requests.get(base_url+"/info", headers=HEADERS) 20 | result = response.json() 21 | logging.debug(json.dumps(result, sort_keys=True, indent=2)) 22 | 23 | ############################################################################## 24 | def ListVersions(base_url=BASE_URL, fout=None): 25 | response = requests.get(base_url+'/versions', headers=HEADERS) 26 | result = response.json() 27 | logging.debug(json.dumps(result, sort_keys=True, indent=2)) 28 | versions = result 29 | df = pd.DataFrame.from_records(versions) 30 | if fout: df.to_csv(fout, sep="\t", index=False) 31 | logging.info(f"Versions: {len(versions)}") 32 | return df 33 | 34 | ############################################################################## 35 | def ListMainTypes(base_url=BASE_URL, fout=None): 36 | response = requests.get(base_url+'/mainTypes', headers=HEADERS) 37 | result = response.json() 38 | logging.debug(json.dumps(result, sort_keys=True, indent=2)) 39 | maintypes = result 40 | maintypes.sort() 41 | df = pd.DataFrame({"main_types": maintypes}) 42 | if fout: df.to_csv(fout, sep="\t", index=False) 43 | logging.info(f"Main types: {len(maintypes)}") 44 | return df 45 | 46 | ############################################################################## 47 | def ListTumorTypes(base_url=BASE_URL, fout=None): 48 | response = requests.get(base_url+'/tumorTypes', headers=HEADERS) 49 | result = response.json() 50 | logging.debug(json.dumps(result, sort_keys=True, indent=2)) 51 | tumortypes = result 52 | df = pd.DataFrame.from_records(tumortypes) 53 | if fout: df.to_csv(fout, sep="\t", index=False) 54 | logging.info(f"Tumor types: {len(tumortypes)}") 55 | return df 56 | 57 | ############################################################################## 58 | def SearchTumorTypes(qry, qtype, exact, levels, base_url=BASE_URL, fout=None): 59 | response = requests.get(base_url+f"/tumorTypes/search/{qtype}/{qry}?exactMatch={str(exact)}&levels={urllib.parse.quote(levels)}", headers=HEADERS) 60 | result = response.json() 61 | logging.debug(json.dumps(result, sort_keys=True, indent=2)) 62 | tumortypes = result 63 | df = pd.DataFrame.from_records(tumortypes) 64 | if fout: df.to_csv(fout, sep="\t", index=False) 65 | logging.info(f"Tumor types: {len(tumortypes)}") 66 | return df 67 | 68 | ############################################################################## 69 | -------------------------------------------------------------------------------- /BioClients/badapple/Client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | https://chiltepin.health.unm.edu/badapple2/apidocs/ 4 | """ 5 | ### 6 | import sys,os,re,argparse,time,logging 7 | # 8 | from .. import badapple 9 | # 10 | ############################################################################## 11 | if __name__=='__main__': 12 | parser = argparse.ArgumentParser( 13 | description='Badapple REST API client utility', 14 | epilog="""\ 15 | Example SMILES: OC(=O)C1=C2CCCC(C=C3C=CC(=O)C=C3)=C2NC2=CC=CC=C12 16 | Example scaffold IDs: 46,50 17 | """) 18 | ops = ['get_compound2scaffolds', 'get_scaffold_info', 'get_scaffold2compounds', 'get_scaffold2drugs', ] 19 | parser.add_argument("op",choices=ops,help='OPERATION') 20 | parser.add_argument("--smi", dest="smi", help="input SMILES") 21 | parser.add_argument("--ids", dest="ids", help="input IDs, comma-separated") 22 | parser.add_argument("--i", dest="ifile", help="input SMILES file (with optional appended NAME), or input IDs file") 23 | parser.add_argument("--db", choices=badapple.DATABASES, default="badapple2", help="default=badapple2") 24 | parser.add_argument("--o", dest="ofile", help="output file (TSV)") 25 | parser.add_argument("--max_rings", type=int, default=10, help="max rings") 26 | parser.add_argument("--api_host", default=badapple.API_HOST) 27 | parser.add_argument("--api_base_path", default=badapple.API_BASE_PATH) 28 | parser.add_argument("-v", "--verbose", action="count", default=0) 29 | 30 | args = parser.parse_args() 31 | 32 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO)) 33 | 34 | base_url='https://'+args.api_host+args.api_base_path 35 | 36 | fout = open(args.ofile,"w") if args.ofile else sys.stdout 37 | 38 | t0=time.time() 39 | 40 | if args.ifile: 41 | fin = open(args.ifile) 42 | ids = []; 43 | while True: 44 | line = fin.readline() 45 | if not line: break 46 | if line.rstrip(): ids.append(line.rstrip()) 47 | logging.info(f"Input SMILES: {len(ids)}") 48 | fin.close() 49 | elif args.ids: 50 | ids = re.split(r'[, ]+', args.ids.strip()) 51 | elif args.smi: 52 | ids = [args.smi.strip()] 53 | 54 | if args.op == "get_compound2scaffolds": 55 | badapple.GetCompound2Scaffolds(ids, args.db, args.max_rings, base_url, fout) 56 | 57 | elif args.op == "get_scaffold_info": 58 | badapple.GetScaffoldInfo(ids, args.db, base_url, fout) 59 | 60 | elif args.op == "get_scaffold2compounds": 61 | badapple.GetScaffold2Compounds(ids, args.db, base_url, fout) 62 | 63 | elif args.op == "get_scaffold2drugs": 64 | badapple.GetScaffold2Drugs(ids, args.db, base_url, fout) 65 | 66 | elif args.op == "get_version": 67 | #badapple.GetVersion(args.db, base_url, fout) 68 | parser.error("Not implemented.") 69 | 70 | else: 71 | parser.error("No operation specified.") 72 | 73 | logging.info(f"elapsed time: {time.strftime('%Hh:%Mm:%Ss',time.gmtime(time.time()-t0))}") 74 | -------------------------------------------------------------------------------- /BioClients/ensembl/biomart/Client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Access to Ensembl BIOMART REST API. 4 | https://m.ensembl.org/info/data/biomart/biomart_restful.html 5 | """ 6 | import sys,os,re,argparse,time,logging 7 | 8 | from ... import ensembl 9 | # 10 | ############################################################################## 11 | def DemoXMLQuery(base_url, fout): 12 | xmltext = """\ 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | """ 26 | ensembl.biomart.Utils.XMLQuery(xmltext, base_url, fout) 27 | 28 | ############################################################################## 29 | if __name__=='__main__': 30 | parser = argparse.ArgumentParser(prog=sys.argv[0], description="Ensembl BIOMART REST API client", epilog="For XML query file format, see https://m.ensembl.org/info/data/biomart/biomart_restful.html#biomartxml") 31 | ops = ["xmlQuery", "ensg2ncbi", "ensg2hgnc", "ensg2ncbihgnc", "demo", "show_version"] 32 | parser.add_argument("op", choices=ops, help='operation') 33 | parser.add_argument("--ixml", dest="ixmlfile", help="input file, XML query") 34 | parser.add_argument("--api_host", default=ensembl.biomart.API_HOST) 35 | parser.add_argument("--api_base_path", default=ensembl.biomart.API_BASE_PATH) 36 | parser.add_argument("--o", dest="ofile", help="output (TSV)") 37 | parser.add_argument("-v", "--verbose", action="count", default=0) 38 | args = parser.parse_args() 39 | 40 | logging.basicConfig(format='%(levelname)s:%(message)s', level=(logging.DEBUG if args.verbose>1 else logging.INFO)) 41 | 42 | base_url='http://'+args.api_host+args.api_base_path 43 | 44 | fout = open(args.ofile, "w") if args.ofile else sys.stdout 45 | 46 | t0=time.time() 47 | 48 | if args.op=='xmlQuery': 49 | if not args.ixmlfile: 50 | parser.error(f"Input XML file required for {args.op}") 51 | sys.exit(1) 52 | xmltext = open(args.ixmlfile).read() 53 | ensembl.biomart.Utils.XMLQuery(xmltext, base_url, fout) 54 | 55 | elif args.op=='ensg2ncbi': 56 | ensembl.biomart.Utils.ENSG2NCBI(base_url, fout) 57 | 58 | elif args.op=='ensg2hgnc': 59 | ensembl.biomart.Utils.ENSG2HGNC(base_url, fout) 60 | 61 | elif args.op=='ensg2ncbihgnc': 62 | ensembl.biomart.Utils.ENSG2NCBIHGNC(base_url, fout) 63 | 64 | elif args.op=='demo': 65 | DemoXMLQuery(base_url, fout) 66 | 67 | else: 68 | parser.error(f'Invalid operation: {args.op}') 69 | 70 | logging.info(('%s: elapsed time: %s'%(os.path.basename(sys.argv[0]), time.strftime('%Hh:%Mm:%Ss', time.gmtime(time.time()-t0))))) 71 | --------------------------------------------------------------------------------