├── .bumpversion.cfg ├── .coveragerc ├── .gitattributes ├── .gitignore ├── .travis.yml ├── AUTHORS.rst ├── CONTRIBUTING.rst ├── HISTORY.rst ├── LICENSE ├── MANIFEST.IN ├── Makefile ├── README.rst ├── docs ├── Makefile ├── api.rst ├── authors.rst ├── conf.py ├── contributing.rst ├── history.rst ├── index.rst ├── installation.rst ├── make.bat └── usage.rst ├── environment.yml ├── pylintrc ├── requirements.txt ├── setup.cfg ├── setup.py ├── src └── pybiomart │ ├── __init__.py │ ├── base.py │ ├── dataset.py │ ├── mart.py │ └── server.py ├── tests ├── conftest.py ├── data │ ├── config_response.pkl │ ├── datasets_response.pkl │ ├── marts_response.pkl │ └── query_response.pkl ├── test_base.py ├── test_dataset.py ├── test_mart.py └── test_server.py └── tox.ini /.bumpversion.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 0.2.0 3 | 4 | [bumpversion:file:setup.py] 5 | 6 | [bumpversion:file:src/imfusion/__init__.py] 7 | search = __version__ = '{current_version}' 8 | replace = __version__ = '{new_version}' 9 | 10 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source = pybiomart 3 | 4 | [report] 5 | omit = pybiomart/_version.py 6 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | pybiomart/_version.py export-subst 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | __pycache__ 3 | pybiomart.egg-info 4 | *.pyc 5 | dist 6 | .coverage 7 | .DS_Store 8 | .cache 9 | htmlcov 10 | RELEASE-VERSION 11 | docs/_build 12 | .vscode 13 | .tox 14 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | # We don't actually use the Travis Python, but this keeps it organized. 5 | - '2.7' 6 | - '3.4' 7 | - '3.5' 8 | 9 | install: 10 | - sudo apt-get update 11 | 12 | - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; 13 | - bash miniconda.sh -b -p $HOME/miniconda 14 | - export PATH="$HOME/miniconda/bin:$PATH" 15 | - hash -r 16 | - conda config --set always_yes yes --set changeps1 no 17 | - conda update -q conda 18 | 19 | # Useful for debugging any issues with conda. 20 | - conda info -a 21 | 22 | # Create conda environment. 23 | - conda create -q -n test python=$TRAVIS_PYTHON_VERSION 24 | - source activate test 25 | 26 | # Install with test dependencies. 27 | - pip install .[dev] 28 | 29 | 30 | script: py.test --cov pybiomart --cov-report term-missing 31 | 32 | after_success: 33 | - coveralls 34 | -------------------------------------------------------------------------------- /AUTHORS.rst: -------------------------------------------------------------------------------- 1 | ======= 2 | Credits 3 | ======= 4 | 5 | Development Lead 6 | ---------------- 7 | 8 | * Julian de Ruiter 9 | 10 | Contributors 11 | ------------ 12 | 13 | None yet. Why not be the first? 14 | -------------------------------------------------------------------------------- /CONTRIBUTING.rst: -------------------------------------------------------------------------------- 1 | .. highlight:: shell 2 | 3 | ============ 4 | Contributing 5 | ============ 6 | 7 | Contributions are welcome, and they are greatly appreciated! Every 8 | little bit helps, and credit will always be given. 9 | 10 | Types of Contributions 11 | ---------------------- 12 | 13 | Report Bugs 14 | ~~~~~~~~~~~ 15 | 16 | Report bugs at https://github.com/jrderuiter/pybiomart/issues. 17 | 18 | If you are reporting a bug, please include: 19 | 20 | * Your operating system name and version. 21 | * Any details about your local setup that might be helpful in troubleshooting. 22 | * Detailed steps to reproduce the bug. 23 | 24 | Fix Bugs 25 | ~~~~~~~~ 26 | 27 | Look through the GitHub issues for bugs. Anything tagged with "bug" 28 | and "help wanted" is open to whoever wants to implement it. 29 | 30 | Implement Features 31 | ~~~~~~~~~~~~~~~~~~ 32 | 33 | Look through the GitHub issues for features. Anything tagged with "enhancement" 34 | and "help wanted" is open to whoever wants to implement it. 35 | 36 | Write Documentation 37 | ~~~~~~~~~~~~~~~~~~~ 38 | 39 | pybiomart could always use more documentation, whether as part of the 40 | official pybiomart docs, in docstrings, or even on the web in blog posts, 41 | articles, and such. 42 | 43 | Submit Feedback 44 | ~~~~~~~~~~~~~~~ 45 | 46 | The best way to send feedback is to file an issue at https://github.com/jrderuiter/pybiomart/issues. 47 | 48 | If you are proposing a feature: 49 | 50 | * Explain in detail how it would work. 51 | * Keep the scope as narrow as possible, to make it easier to implement. 52 | * Remember that this is a volunteer-driven project, and that contributions 53 | are welcome :) 54 | 55 | Get Started! 56 | ------------ 57 | 58 | Ready to contribute? Here's how to set up `pybiomart` for local development. 59 | 60 | 1. Fork the `pybiomart` repo on GitHub. 61 | 2. Clone your fork locally:: 62 | 63 | $ git clone git@github.com:your_name_here/pybiomart.git 64 | 65 | 3. Install your local copy into a virtualenv. Assuming you have virtualenvwrapper installed, this is how you set up your fork for local development:: 66 | 67 | $ mkvirtualenv pybiomart 68 | $ cd pybiomart/ 69 | $ python setup.py develop 70 | 71 | 4. Create a branch for local development:: 72 | 73 | $ git checkout -b name-of-your-bugfix-or-feature 74 | 75 | Now you can make your changes locally. 76 | 77 | 5. When you're done making changes, check that your changes pass the tests:: 78 | 79 | $ python setup.py test or py.test 80 | 81 | 6. Commit your changes and push your branch to GitHub:: 82 | 83 | $ git add . 84 | $ git commit -m "Your detailed description of your changes." 85 | $ git push origin name-of-your-bugfix-or-feature 86 | 87 | 7. Submit a pull request through the GitHub website. 88 | 89 | Pull Request Guidelines 90 | ----------------------- 91 | 92 | Before you submit a pull request, check that it meets these guidelines: 93 | 94 | 1. The pull request should include tests. 95 | 2. If the pull request adds functionality, the docs should be updated. Put 96 | your new functionality into a function with a docstring, and add the 97 | feature to the list in README.rst. 98 | 3. The pull request should work for Python 2.7, 3.4 and 3.5. Check 99 | https://travis-ci.org/jrderuiter/pybiomart/pull_requests 100 | and make sure that the tests pass for all supported Python versions. 101 | -------------------------------------------------------------------------------- /HISTORY.rst: -------------------------------------------------------------------------------- 1 | ======= 2 | History 3 | ======= 4 | 5 | 0.2.0 (2017-05-10) 6 | ------------------ 7 | 8 | - Fixed Python 2.7 bug encountered when parsing XML. 9 | - Dropped versioneer for bumpversion. 10 | - Refactored unit tests. 11 | - Added tox for testing against multiple pythons. 12 | - Updated documentation. 13 | 14 | 0.1.0 15 | ------------------ 16 | 17 | * Intial release. 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015-2017 Julian de Ruiter 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.IN: -------------------------------------------------------------------------------- 1 | 2 | include AUTHORS.rst 3 | 4 | include CONTRIBUTING.rst 5 | include HISTORY.rst 6 | include LICENSE 7 | include README.rst 8 | 9 | recursive-include tests * 10 | recursive-exclude * __pycache__ 11 | recursive-exclude * *.py[co] 12 | 13 | recursive-include docs *.rst conf.py Makefile make.bat *.jpg *.png *.gif 14 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean clean-test clean-pyc clean-build docs help 2 | .DEFAULT_GOAL := help 3 | define BROWSER_PYSCRIPT 4 | import os, webbrowser, sys 5 | try: 6 | from urllib import pathname2url 7 | except: 8 | from urllib.request import pathname2url 9 | 10 | webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1]))) 11 | endef 12 | export BROWSER_PYSCRIPT 13 | 14 | define PRINT_HELP_PYSCRIPT 15 | import re, sys 16 | 17 | for line in sys.stdin: 18 | match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line) 19 | if match: 20 | target, help = match.groups() 21 | print("%-20s %s" % (target, help)) 22 | endef 23 | export PRINT_HELP_PYSCRIPT 24 | BROWSER := python -c "$$BROWSER_PYSCRIPT" 25 | 26 | help: 27 | @python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST) 28 | 29 | clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts 30 | 31 | clean-build: ## remove build artifacts 32 | rm -fr build/ 33 | rm -fr dist/ 34 | rm -fr .eggs/ 35 | find . -name '*.egg-info' -exec rm -fr {} + 36 | find . -name '*.egg' -exec rm -f {} + 37 | 38 | clean-pyc: ## remove Python file artifacts 39 | find . -name '*.pyc' -exec rm -f {} + 40 | find . -name '*.pyo' -exec rm -f {} + 41 | find . -name '*~' -exec rm -f {} + 42 | find . -name '__pycache__' -exec rm -fr {} + 43 | 44 | clean-test: ## remove test and coverage artifacts 45 | rm -f .coverage 46 | rm -fr htmlcov/ 47 | 48 | lint: ## check style with flake8 49 | pylint src/genemap tests 50 | 51 | test: clean-pyc ## run tests quickly with the default Python 52 | py.test tests 53 | 54 | tox: clean 55 | docker run -v `pwd`:/app -t -i themattrix/tox-base 56 | 57 | coverage: ## check code coverage quickly with the default Python 58 | py.test tests --cov=geneviz --cov-report=html 59 | $(BROWSER) htmlcov/index.html 60 | 61 | docs: ## generate and serve Sphinx documentation 62 | rm -rf docs/_build 63 | sphinx-autobuild docs docs/_build 64 | 65 | release: clean ## package and upload a release 66 | python setup.py sdist upload 67 | python setup.py bdist_wheel upload 68 | 69 | dist: clean ## builds source and wheel package 70 | python setup.py sdist 71 | python setup.py bdist_wheel 72 | ls -l dist 73 | 74 | install: clean ## install the package to the active Python's site-packages 75 | python setup.py install 76 | 77 | gh-pages: 78 | git checkout gh-pages 79 | find ./* -not -path '*/\.*' -prune -exec rm -r "{}" \; 80 | git checkout develop docs Makefile src AUTHORS.rst CONTRIBUTING.rst HISTORY.rst README.rst 81 | git reset HEAD 82 | (cd docs && make html) 83 | mv -fv docs/_build/html/* ./ 84 | rm -rf docs Makefile src AUTHORS.rst CONTRIBUTING.rst HISTORY.rst README.rst 85 | touch .nojekyll 86 | git add -A 87 | git commit -m "Generated gh-pages for `git log develop -1 --pretty=short --abbrev-commit`" && git push origin gh-pages ; git checkout develop 88 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | PyBiomart 2 | ========= 3 | 4 | .. image:: https://travis-ci.org/jrderuiter/pybiomart.svg?branch=develop 5 | :target: https://travis-ci.org/jrderuiter/pybiomart 6 | 7 | .. image:: https://coveralls.io/repos/github/jrderuiter/pybiomart/badge.svg?branch=develop 8 | :target: https://coveralls.io/github/jrderuiter/pybiomart?branch=develop 9 | 10 | A simple and pythonic biomart interface for Python. 11 | 12 | The intent of pybiomart is to provide a simple interface to biomart, which can be used to easily query biomart databases from Python. In this sense, pybiomart aims to provide functionality similar to packages such as biomaRt (which provides access to biomart from R). 13 | 14 | Documentation 15 | ------------- 16 | 17 | Documentation is available at: `https://jrderuiter.github.io/pybiomart `_. 18 | 19 | Examples 20 | -------- 21 | 22 | Retrieving and querying a dataset using the server interface: 23 | 24 | .. code:: python 25 | 26 | from pybiomart import Server 27 | 28 | server = Server(host='http://www.ensembl.org') 29 | 30 | dataset = (server.marts['ENSEMBL_MART_ENSEMBL'] 31 | .datasets['hsapiens_gene_ensembl']) 32 | 33 | dataset.query(attributes=['ensembl_gene_id', 'external_gene_name'], 34 | filters={'chromosome_name': ['1','2']}) 35 | 36 | Retrieving a dataset directly with known dataset name: 37 | 38 | .. code:: python 39 | 40 | from pybiomart import Dataset 41 | 42 | dataset = Dataset(name='hsapiens_gene_ensembl', 43 | host='http://www.ensembl.org') 44 | 45 | dataset.query(attributes=['ensembl_gene_id', 'external_gene_name'], 46 | filters={'chromosome_name': ['1','2']}) 47 | 48 | License 49 | ------- 50 | 51 | Released under the MIT license. 52 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help 23 | help: 24 | @echo "Please use \`make ' where is one of" 25 | @echo " html to make standalone HTML files" 26 | @echo " dirhtml to make HTML files named index.html in directories" 27 | @echo " singlehtml to make a single large HTML file" 28 | @echo " pickle to make pickle files" 29 | @echo " json to make JSON files" 30 | @echo " htmlhelp to make HTML files and a HTML help project" 31 | @echo " qthelp to make HTML files and a qthelp project" 32 | @echo " applehelp to make an Apple Help Book" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | @echo " coverage to run coverage check of the documentation (if enabled)" 49 | 50 | .PHONY: clean 51 | clean: 52 | rm -rf $(BUILDDIR)/* 53 | 54 | .PHONY: html 55 | html: 56 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 57 | @echo 58 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 59 | 60 | .PHONY: dirhtml 61 | dirhtml: 62 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 63 | @echo 64 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 65 | 66 | .PHONY: singlehtml 67 | singlehtml: 68 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 69 | @echo 70 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 71 | 72 | .PHONY: pickle 73 | pickle: 74 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 75 | @echo 76 | @echo "Build finished; now you can process the pickle files." 77 | 78 | .PHONY: json 79 | json: 80 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 81 | @echo 82 | @echo "Build finished; now you can process the JSON files." 83 | 84 | .PHONY: htmlhelp 85 | htmlhelp: 86 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 87 | @echo 88 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 89 | ".hhp project file in $(BUILDDIR)/htmlhelp." 90 | 91 | .PHONY: qthelp 92 | qthelp: 93 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 94 | @echo 95 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 96 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 97 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pybiomart.qhcp" 98 | @echo "To view the help file:" 99 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pybiomart.qhc" 100 | 101 | .PHONY: applehelp 102 | applehelp: 103 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp 104 | @echo 105 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." 106 | @echo "N.B. You won't be able to view it unless you put it in" \ 107 | "~/Library/Documentation/Help or install it in your application" \ 108 | "bundle." 109 | 110 | .PHONY: devhelp 111 | devhelp: 112 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 113 | @echo 114 | @echo "Build finished." 115 | @echo "To view the help file:" 116 | @echo "# mkdir -p $$HOME/.local/share/devhelp/pybiomart" 117 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/pybiomart" 118 | @echo "# devhelp" 119 | 120 | .PHONY: epub 121 | epub: 122 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 123 | @echo 124 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 125 | 126 | .PHONY: latex 127 | latex: 128 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 129 | @echo 130 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 131 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 132 | "(use \`make latexpdf' here to do that automatically)." 133 | 134 | .PHONY: latexpdf 135 | latexpdf: 136 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 137 | @echo "Running LaTeX files through pdflatex..." 138 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 139 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 140 | 141 | .PHONY: latexpdfja 142 | latexpdfja: 143 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 144 | @echo "Running LaTeX files through platex and dvipdfmx..." 145 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 146 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 147 | 148 | .PHONY: text 149 | text: 150 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 151 | @echo 152 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 153 | 154 | .PHONY: man 155 | man: 156 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 157 | @echo 158 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 159 | 160 | .PHONY: texinfo 161 | texinfo: 162 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 163 | @echo 164 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 165 | @echo "Run \`make' in that directory to run these through makeinfo" \ 166 | "(use \`make info' here to do that automatically)." 167 | 168 | .PHONY: info 169 | info: 170 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 171 | @echo "Running Texinfo files through makeinfo..." 172 | make -C $(BUILDDIR)/texinfo info 173 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 174 | 175 | .PHONY: gettext 176 | gettext: 177 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 178 | @echo 179 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 180 | 181 | .PHONY: changes 182 | changes: 183 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 184 | @echo 185 | @echo "The overview file is in $(BUILDDIR)/changes." 186 | 187 | .PHONY: linkcheck 188 | linkcheck: 189 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 190 | @echo 191 | @echo "Link check complete; look for any errors in the above output " \ 192 | "or in $(BUILDDIR)/linkcheck/output.txt." 193 | 194 | .PHONY: doctest 195 | doctest: 196 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 197 | @echo "Testing of doctests in the sources finished, look at the " \ 198 | "results in $(BUILDDIR)/doctest/output.txt." 199 | 200 | .PHONY: coverage 201 | coverage: 202 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage 203 | @echo "Testing of coverage in the sources finished, look at the " \ 204 | "results in $(BUILDDIR)/coverage/python.txt." 205 | 206 | .PHONY: xml 207 | xml: 208 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 209 | @echo 210 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 211 | 212 | .PHONY: pseudoxml 213 | pseudoxml: 214 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 215 | @echo 216 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 217 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | API 2 | === 3 | 4 | pybiomart.Dataset 5 | ----------------- 6 | 7 | .. autoclass:: pybiomart.Dataset 8 | :members: 9 | 10 | pybiomart.Server 11 | ---------------- 12 | 13 | .. autoclass:: pybiomart.Server 14 | :members: 15 | 16 | pybiomart.Mart 17 | -------------- 18 | 19 | .. autoclass:: pybiomart.Mart 20 | :members: 21 | -------------------------------------------------------------------------------- /docs/authors.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../AUTHORS.rst 2 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # pybiomart documentation build configuration file, created by 5 | # sphinx-quickstart on Sun Mar 20 16:34:07 2016. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | import sys 17 | import os 18 | 19 | import sphinx_rtd_theme 20 | 21 | sys.path.insert(0, os.path.abspath('..')) 22 | 23 | from pybiomart import __version__ as pybiomart_version 24 | 25 | # If extensions (or modules to document with autodoc) are in another directory, 26 | # add these directories to sys.path here. If the directory is relative to the 27 | # documentation root, use os.path.abspath to make it absolute, like shown here. 28 | #sys.path.insert(0, os.path.abspath('.')) 29 | 30 | # -- General configuration ------------------------------------------------ 31 | 32 | # If your documentation needs a minimal Sphinx version, state it here. 33 | #needs_sphinx = '1.0' 34 | 35 | # Add any Sphinx extension module names here, as strings. They can be 36 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 37 | # ones. 38 | extensions = [ 39 | 'sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.coverage', 40 | 'sphinx.ext.viewcode', 'sphinx.ext.autodoc', 'sphinx.ext.napoleon' 41 | ] 42 | 43 | # Add any paths that contain templates here, relative to this directory. 44 | templates_path = ['_templates'] 45 | 46 | # The suffix(es) of source filenames. 47 | # You can specify multiple suffix as a list of string: 48 | # source_suffix = ['.rst', '.md'] 49 | source_suffix = '.rst' 50 | 51 | # The encoding of source files. 52 | #source_encoding = 'utf-8-sig' 53 | 54 | # The master toctree document. 55 | master_doc = 'index' 56 | 57 | # General information about the project. 58 | project = 'pybiomart' 59 | copyright = '2016, Julian de Ruiter' 60 | author = 'Julian de Ruiter' 61 | 62 | # The version info for the project you're documenting, acts as replacement for 63 | # |version| and |release|, also used in various other places throughout the 64 | # built documents. 65 | # 66 | # The short X.Y version. 67 | # version = versioneer.get_version() 68 | # The full version, including alpha/beta/rc tags. 69 | # release = versioneer.get_version() 70 | 71 | release = pybiomart_version 72 | version = pybiomart_version.split('+')[0] 73 | 74 | # The language for content autogenerated by Sphinx. Refer to documentation 75 | # for a list of supported languages. 76 | # 77 | # This is also used if you do content translation via gettext catalogs. 78 | # Usually you set "language" from the command line for these cases. 79 | language = None 80 | 81 | # There are two options for replacing |today|: either, you set today to some 82 | # non-false value, then it is used: 83 | #today = '' 84 | # Else, today_fmt is used as the format for a strftime call. 85 | #today_fmt = '%B %d, %Y' 86 | 87 | # List of patterns, relative to source directory, that match files and 88 | # directories to ignore when looking for source files. 89 | exclude_patterns = ['_build'] 90 | 91 | # The reST default role (used for this markup: `text`) to use for all 92 | # documents. 93 | #default_role = None 94 | 95 | # If true, '()' will be appended to :func: etc. cross-reference text. 96 | #add_function_parentheses = True 97 | 98 | # If true, the current module name will be prepended to all description 99 | # unit titles (such as .. function::). 100 | #add_module_names = True 101 | 102 | # If true, sectionauthor and moduleauthor directives will be shown in the 103 | # output. They are ignored by default. 104 | #show_authors = False 105 | 106 | # The name of the Pygments (syntax highlighting) style to use. 107 | pygments_style = 'sphinx' 108 | 109 | # A list of ignored prefixes for module index sorting. 110 | #modindex_common_prefix = [] 111 | 112 | # If true, keep warnings as "system message" paragraphs in the built documents. 113 | #keep_warnings = False 114 | 115 | # If true, `todo` and `todoList` produce output, else they produce nothing. 116 | todo_include_todos = False 117 | 118 | # -- Options for HTML output ---------------------------------------------- 119 | 120 | # The theme to use for HTML and HTML Help pages. See the documentation for 121 | # a list of builtin themes. 122 | #html_theme = 'alabaster' 123 | html_theme = "sphinx_rtd_theme" 124 | 125 | html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] 126 | 127 | # Theme options are theme-specific and customize the look and feel of a theme 128 | # further. For a list of options available for each theme, see the 129 | # documentation. 130 | #html_theme_options = {} 131 | 132 | # Add any paths that contain custom themes here, relative to this directory. 133 | #html_theme_path = [] 134 | 135 | # The name for this set of Sphinx documents. If None, it defaults to 136 | # " v documentation". 137 | #html_title = None 138 | 139 | # A shorter title for the navigation bar. Default is the same as html_title. 140 | #html_short_title = None 141 | 142 | # The name of an image file (relative to this directory) to place at the top 143 | # of the sidebar. 144 | #html_logo = None 145 | 146 | # The name of an image file (within the static path) to use as favicon of the 147 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 148 | # pixels large. 149 | #html_favicon = None 150 | 151 | # Add any paths that contain custom static files (such as style sheets) here, 152 | # relative to this directory. They are copied after the builtin static files, 153 | # so a file named "default.css" will overwrite the builtin "default.css". 154 | html_static_path = ['_static'] 155 | 156 | # Add any extra paths that contain custom files (such as robots.txt or 157 | # .htaccess) here, relative to this directory. These files are copied 158 | # directly to the root of the documentation. 159 | #html_extra_path = [] 160 | 161 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 162 | # using the given strftime format. 163 | #html_last_updated_fmt = '%b %d, %Y' 164 | 165 | # If true, SmartyPants will be used to convert quotes and dashes to 166 | # typographically correct entities. 167 | #html_use_smartypants = True 168 | 169 | # Custom sidebar templates, maps document names to template names. 170 | #html_sidebars = {} 171 | 172 | # Additional templates that should be rendered to pages, maps page names to 173 | # template names. 174 | #html_additional_pages = {} 175 | 176 | # If false, no module index is generated. 177 | #html_domain_indices = True 178 | 179 | # If false, no index is generated. 180 | #html_use_index = True 181 | 182 | # If true, the index is split into individual pages for each letter. 183 | #html_split_index = False 184 | 185 | # If true, links to the reST sources are added to the pages. 186 | #html_show_sourcelink = True 187 | 188 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 189 | #html_show_sphinx = True 190 | 191 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 192 | #html_show_copyright = True 193 | 194 | # If true, an OpenSearch description file will be output, and all pages will 195 | # contain a tag referring to it. The value of this option must be the 196 | # base URL from which the finished HTML is served. 197 | #html_use_opensearch = '' 198 | 199 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 200 | #html_file_suffix = None 201 | 202 | # Language to be used for generating the HTML full-text search index. 203 | # Sphinx supports the following languages: 204 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' 205 | # 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr' 206 | #html_search_language = 'en' 207 | 208 | # A dictionary with options for the search language support, empty by default. 209 | # Now only 'ja' uses this config value 210 | #html_search_options = {'type': 'default'} 211 | 212 | # The name of a javascript file (relative to the configuration directory) that 213 | # implements a search results scorer. If empty, the default will be used. 214 | #html_search_scorer = 'scorer.js' 215 | 216 | # Output file base name for HTML help builder. 217 | htmlhelp_basename = 'pybiomartdoc' 218 | 219 | # -- Options for LaTeX output --------------------------------------------- 220 | 221 | latex_elements = { 222 | # The paper size ('letterpaper' or 'a4paper'). 223 | #'papersize': 'letterpaper', 224 | 225 | # The font size ('10pt', '11pt' or '12pt'). 226 | #'pointsize': '10pt', 227 | 228 | # Additional stuff for the LaTeX preamble. 229 | #'preamble': '', 230 | 231 | # Latex figure (float) alignment 232 | #'figure_align': 'htbp', 233 | } 234 | 235 | # Grouping the document tree into LaTeX files. List of tuples 236 | # (source start file, target name, title, 237 | # author, documentclass [howto, manual, or own class]). 238 | latex_documents = [(master_doc, 'pybiomart.tex', 'pybiomart Documentation', 239 | 'Julian de Ruiter', 'manual'), ] 240 | 241 | # The name of an image file (relative to this directory) to place at the top of 242 | # the title page. 243 | #latex_logo = None 244 | 245 | # For "manual" documents, if this is true, then toplevel headings are parts, 246 | # not chapters. 247 | #latex_use_parts = False 248 | 249 | # If true, show page references after internal links. 250 | #latex_show_pagerefs = False 251 | 252 | # If true, show URL addresses after external links. 253 | #latex_show_urls = False 254 | 255 | # Documents to append as an appendix to all manuals. 256 | #latex_appendices = [] 257 | 258 | # If false, no module index is generated. 259 | #latex_domain_indices = True 260 | 261 | # -- Options for manual page output --------------------------------------- 262 | 263 | # One entry per manual page. List of tuples 264 | # (source start file, name, description, authors, manual section). 265 | man_pages = [(master_doc, 'pybiomart', 'pybiomart Documentation', [author], 1)] 266 | 267 | # If true, show URL addresses after external links. 268 | #man_show_urls = False 269 | 270 | # -- Options for Texinfo output ------------------------------------------- 271 | 272 | # Grouping the document tree into Texinfo files. List of tuples 273 | # (source start file, target name, title, author, 274 | # dir menu entry, description, category) 275 | texinfo_documents = [ 276 | (master_doc, 'pybiomart', 'pybiomart Documentation', author, 'pybiomart', 277 | 'One line description of project.', 'Miscellaneous'), 278 | ] 279 | 280 | # Documents to append as an appendix to all manuals. 281 | #texinfo_appendices = [] 282 | 283 | # If false, no module index is generated. 284 | #texinfo_domain_indices = True 285 | 286 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 287 | #texinfo_show_urls = 'footnote' 288 | 289 | # If true, do not generate a @detailmenu in the "Top" node's menu. 290 | #texinfo_no_detailmenu = False 291 | -------------------------------------------------------------------------------- /docs/contributing.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../CONTRIBUTING.rst 2 | -------------------------------------------------------------------------------- /docs/history.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../HISTORY.rst 2 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | PyBiomart 2 | ========= 3 | 4 | .. image:: https://travis-ci.org/jrderuiter/pybiomart.svg?branch=develop 5 | :target: https://travis-ci.org/jrderuiter/pybiomart 6 | 7 | .. image:: https://coveralls.io/repos/github/jrderuiter/pybiomart/badge.svg?branch=develop 8 | :target: https://coveralls.io/github/jrderuiter/pybiomart?branch=develop 9 | 10 | A simple and pythonic biomart interface for Python. 11 | 12 | The intent of pybiomart is to provide a simple interface to biomart, which can be used to easily query biomart databases from Python. In this sense, pybiomart aims to provide functionality similar to packages such as biomaRt (which provides access to biomart from R). 13 | 14 | .. toctree:: 15 | :maxdepth: 2 16 | :hidden: 17 | 18 | self 19 | installation 20 | usage 21 | api 22 | contributing 23 | authors 24 | history 25 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Installation 3 | ============ 4 | 5 | Dependencies 6 | ------------ 7 | 8 | - Python 2.7, 3.4+ 9 | - future, pandas, requests, requests-cache 10 | 11 | Stable release 12 | -------------- 13 | 14 | To install pybiomart, run this command in your terminal: 15 | 16 | .. code-block:: console 17 | 18 | $ pip install pybiomart 19 | 20 | This is the preferred method to install pybiomart, as it will always install 21 | the most recent stable release. 22 | 23 | If you don't have `pip`_ installed, this `Python installation guide`_ can guide 24 | you through the process. 25 | 26 | .. _pip: https://pip.pypa.io 27 | .. _Python installation guide: http://docs.python-guide.org/en/latest/starting/installation/ 28 | 29 | From sources 30 | ------------ 31 | 32 | The sources for pybiomart can be downloaded from the `Github repo`_. 33 | 34 | You can either clone the public repository: 35 | 36 | .. code-block:: console 37 | 38 | $ git clone git://github.com/jrderuiter/pybiomart 39 | 40 | Or download the `tarball`_: 41 | 42 | .. code-block:: console 43 | 44 | $ curl -OL https://github.com/jrderuiter/pybiomart/tarball/master 45 | 46 | Once you have a copy of the source, you can install it with: 47 | 48 | .. code-block:: console 49 | 50 | $ python setup.py install 51 | 52 | 53 | .. _Github repo: https://github.com/jrderuiter/pybiomart 54 | .. _tarball: https://github.com/jrderuiter/pybiomart/tarball/master 55 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=_build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . 10 | set I18NSPHINXOPTS=%SPHINXOPTS% . 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. xml to make Docutils-native XML files 37 | echo. pseudoxml to make pseudoxml-XML files for display purposes 38 | echo. linkcheck to check all external links for integrity 39 | echo. doctest to run all doctests embedded in the documentation if enabled 40 | echo. coverage to run coverage check of the documentation if enabled 41 | goto end 42 | ) 43 | 44 | if "%1" == "clean" ( 45 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 46 | del /q /s %BUILDDIR%\* 47 | goto end 48 | ) 49 | 50 | 51 | REM Check if sphinx-build is available and fallback to Python version if any 52 | %SPHINXBUILD% 1>NUL 2>NUL 53 | if errorlevel 9009 goto sphinx_python 54 | goto sphinx_ok 55 | 56 | :sphinx_python 57 | 58 | set SPHINXBUILD=python -m sphinx.__init__ 59 | %SPHINXBUILD% 2> nul 60 | if errorlevel 9009 ( 61 | echo. 62 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 63 | echo.installed, then set the SPHINXBUILD environment variable to point 64 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 65 | echo.may add the Sphinx directory to PATH. 66 | echo. 67 | echo.If you don't have Sphinx installed, grab it from 68 | echo.http://sphinx-doc.org/ 69 | exit /b 1 70 | ) 71 | 72 | :sphinx_ok 73 | 74 | 75 | if "%1" == "html" ( 76 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 77 | if errorlevel 1 exit /b 1 78 | echo. 79 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 80 | goto end 81 | ) 82 | 83 | if "%1" == "dirhtml" ( 84 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 85 | if errorlevel 1 exit /b 1 86 | echo. 87 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 88 | goto end 89 | ) 90 | 91 | if "%1" == "singlehtml" ( 92 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 93 | if errorlevel 1 exit /b 1 94 | echo. 95 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 96 | goto end 97 | ) 98 | 99 | if "%1" == "pickle" ( 100 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 101 | if errorlevel 1 exit /b 1 102 | echo. 103 | echo.Build finished; now you can process the pickle files. 104 | goto end 105 | ) 106 | 107 | if "%1" == "json" ( 108 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 109 | if errorlevel 1 exit /b 1 110 | echo. 111 | echo.Build finished; now you can process the JSON files. 112 | goto end 113 | ) 114 | 115 | if "%1" == "htmlhelp" ( 116 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 117 | if errorlevel 1 exit /b 1 118 | echo. 119 | echo.Build finished; now you can run HTML Help Workshop with the ^ 120 | .hhp project file in %BUILDDIR%/htmlhelp. 121 | goto end 122 | ) 123 | 124 | if "%1" == "qthelp" ( 125 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 129 | .qhcp project file in %BUILDDIR%/qthelp, like this: 130 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\pybiomart.qhcp 131 | echo.To view the help file: 132 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\pybiomart.ghc 133 | goto end 134 | ) 135 | 136 | if "%1" == "devhelp" ( 137 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 138 | if errorlevel 1 exit /b 1 139 | echo. 140 | echo.Build finished. 141 | goto end 142 | ) 143 | 144 | if "%1" == "epub" ( 145 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 146 | if errorlevel 1 exit /b 1 147 | echo. 148 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 149 | goto end 150 | ) 151 | 152 | if "%1" == "latex" ( 153 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 154 | if errorlevel 1 exit /b 1 155 | echo. 156 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 157 | goto end 158 | ) 159 | 160 | if "%1" == "latexpdf" ( 161 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 162 | cd %BUILDDIR%/latex 163 | make all-pdf 164 | cd %~dp0 165 | echo. 166 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 167 | goto end 168 | ) 169 | 170 | if "%1" == "latexpdfja" ( 171 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 172 | cd %BUILDDIR%/latex 173 | make all-pdf-ja 174 | cd %~dp0 175 | echo. 176 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 177 | goto end 178 | ) 179 | 180 | if "%1" == "text" ( 181 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 182 | if errorlevel 1 exit /b 1 183 | echo. 184 | echo.Build finished. The text files are in %BUILDDIR%/text. 185 | goto end 186 | ) 187 | 188 | if "%1" == "man" ( 189 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 190 | if errorlevel 1 exit /b 1 191 | echo. 192 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 193 | goto end 194 | ) 195 | 196 | if "%1" == "texinfo" ( 197 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 198 | if errorlevel 1 exit /b 1 199 | echo. 200 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 201 | goto end 202 | ) 203 | 204 | if "%1" == "gettext" ( 205 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 206 | if errorlevel 1 exit /b 1 207 | echo. 208 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 209 | goto end 210 | ) 211 | 212 | if "%1" == "changes" ( 213 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 214 | if errorlevel 1 exit /b 1 215 | echo. 216 | echo.The overview file is in %BUILDDIR%/changes. 217 | goto end 218 | ) 219 | 220 | if "%1" == "linkcheck" ( 221 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 222 | if errorlevel 1 exit /b 1 223 | echo. 224 | echo.Link check complete; look for any errors in the above output ^ 225 | or in %BUILDDIR%/linkcheck/output.txt. 226 | goto end 227 | ) 228 | 229 | if "%1" == "doctest" ( 230 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 231 | if errorlevel 1 exit /b 1 232 | echo. 233 | echo.Testing of doctests in the sources finished, look at the ^ 234 | results in %BUILDDIR%/doctest/output.txt. 235 | goto end 236 | ) 237 | 238 | if "%1" == "coverage" ( 239 | %SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage 240 | if errorlevel 1 exit /b 1 241 | echo. 242 | echo.Testing of coverage in the sources finished, look at the ^ 243 | results in %BUILDDIR%/coverage/python.txt. 244 | goto end 245 | ) 246 | 247 | if "%1" == "xml" ( 248 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml 249 | if errorlevel 1 exit /b 1 250 | echo. 251 | echo.Build finished. The XML files are in %BUILDDIR%/xml. 252 | goto end 253 | ) 254 | 255 | if "%1" == "pseudoxml" ( 256 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml 257 | if errorlevel 1 exit /b 1 258 | echo. 259 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. 260 | goto end 261 | ) 262 | 263 | :end 264 | -------------------------------------------------------------------------------- /docs/usage.rst: -------------------------------------------------------------------------------- 1 | Usage 2 | ===== 3 | 4 | Datasets 5 | -------- 6 | 7 | There main interface of pybiomart is provided by the *Dataset* class. A *Dataset* instance can be constructed directly if the name of the dataset and the url of the host are known: 8 | 9 | >>> dataset = Dataset(name='hsapiens_gene_ensembl', 10 | >>> host='http://www.ensembl.org') 11 | 12 | Querying 13 | ~~~~~~~~ 14 | 15 | Dataset instances can be used to query the biomart server using their *query* method. This method takes an optional argument *attributes* which specifies the attributes to be retrieved: 16 | 17 | >>> dataset.query(attributes=['ensembl_gene_id', 'external_gene_name']}) 18 | 19 | The *query* method returns a pandas DataFrame instance, which contains a DataFrame representation of the requested attributes. If no attributes are given, the default attributes of the dataset are used. These default attributes can be identified using the *default_attributes* property of the dataset. A list of all available attributes can be obtained from the *attributes* property. Alternatively, a more convenient overview of all attributes can be obtained in DataFrame format using the *list_attributes* method. 20 | 21 | Data Types 22 | ~~~~~~~~~~~ 23 | 24 | When creating a pandas dataframe from the result of a query, pandas needs to read through all lines to know the proper data type of a column. To improve performance users can specify data types on the columns by providing a dictionary with column names as keys and the data type as values to dataset.query: 25 | 26 | >>> dataset.query(attributes=['ensembl_gene_id'], dtypes={"Ensembl Gene ID": str}) 27 | 28 | Please see https://stackoverflow.com/questions/24251219/pandas-read-csv-low-memory-and-dtype-options#27232309 for more info. 29 | 30 | 31 | Filtering 32 | ~~~~~~~~~ 33 | 34 | Dataset queries can be filtered to avoid fetching unneeded data from the server, thereby reducing the size of the result (and the required bandwidth): 35 | 36 | >>> dataset.query(attributes=['ensembl_gene_id', 'external_gene_name'], 37 | >>> filters={'chromosome_name': ['1','2']}) 38 | 39 | The available filters depend on the dataset. All available filters can be accessed using the *filters* property or the *list_filters* method, the latter of which returns an overview of available filters in a DataFrame format. The type of a filter describes what kind of values can be provided for a filter. For example, boolean filters require a boolean value, string filters require a string value, whilst list filters can take a list of values. 40 | 41 | Servers and Marts 42 | ----------------- 43 | 44 | If the exact dataset not known, the *Server* and *Mart* classes can be used to explore the available marts and datasets on a biomart server. A server instance can be constructed using an optional host url (the url http://www.biomart.org is used by default). This instance can then be used to identify all available marts, either via the *marts* property or the *list_marts* method: 45 | 46 | >>> server = Server(host='http://www.ensembl.org') 47 | >>> server.list_marts() 48 | 49 | Marts can be accessed by using the mart name as an index for the marts property, or directly as an index on the server instance. This mart instance can then similarly be used to identify datasets available in the mart, using the marts *datasets* property or its *list_datasets* method: 50 | 51 | >>> mart = server['ENSEMBL_MART_ENSEMBL'] 52 | >>> mart.list_datasets() 53 | 54 | Datasets can be retrieved from a mart instance by using the dataset name as an index on the mart object, or alternatively as an index for its *datasets* property. 55 | 56 | >>> dataset = mart['hsapiens_gene_ensembl'] 57 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: pybiomart 2 | dependencies: 3 | - future 4 | - pandas 5 | - requests 6 | - pip: 7 | - requests_cache 8 | - git+https://github.com/jrderuiter/pybiomart.git 9 | -------------------------------------------------------------------------------- /pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | 3 | # Specify a configuration file. 4 | #rcfile= 5 | 6 | # Python code to execute, usually for sys.path manipulation such as 7 | # pygtk.require(). 8 | #init-hook= 9 | 10 | # Add files or directories to the blacklist. They should be base names, not 11 | # paths. 12 | ignore=CVS 13 | 14 | # Add files or directories matching the regex patterns to the blacklist. The 15 | # regex matches against base names, not paths. 16 | ignore-patterns= 17 | 18 | # Pickle collected data for later comparisons. 19 | persistent=yes 20 | 21 | # List of plugins (as comma separated values of python modules names) to load, 22 | # usually to register additional checkers. 23 | load-plugins= 24 | 25 | # Use multiple processes to speed up Pylint. 26 | jobs=1 27 | 28 | # Allow loading of arbitrary C extensions. Extensions are imported into the 29 | # active Python interpreter and may run arbitrary code. 30 | unsafe-load-any-extension=no 31 | 32 | # A comma-separated list of package or module names from where C extensions may 33 | # be loaded. Extensions are loading into the active Python interpreter and may 34 | # run arbitrary code 35 | extension-pkg-whitelist= 36 | 37 | # Allow optimization of some AST trees. This will activate a peephole AST 38 | # optimizer, which will apply various small optimizations. For instance, it can 39 | # be used to obtain the result of joining multiple strings with the addition 40 | # operator. Joining a lot of strings can lead to a maximum recursion error in 41 | # Pylint and this flag can prevent that. It has one side effect, the resulting 42 | # AST will be different than the one from reality. This option is deprecated 43 | # and it will be removed in Pylint 2.0. 44 | optimize-ast=no 45 | 46 | 47 | [MESSAGES CONTROL] 48 | 49 | # Only show warnings with the listed confidence levels. Leave empty to show 50 | # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED 51 | confidence= 52 | 53 | # Enable the message, report, category or checker with the given id(s). You can 54 | # either give multiple identifier separated by comma (,) or put this option 55 | # multiple time (only on the command line, not in the configuration file where 56 | # it should appear only once). See also the "--disable" option for examples. 57 | #enable= 58 | 59 | # Disable the message, report, category or checker with the given id(s). You 60 | # can either give multiple identifiers separated by comma (,) or put this 61 | # option multiple times (only on the command line, not in the configuration 62 | # file where it should appear only once).You can also use "--disable=all" to 63 | # disable everything first and then reenable specific checks. For example, if 64 | # you want to run only the similarities checker, you can use "--disable=all 65 | # --enable=similarities". If you want to run only the classes checker, but have 66 | # no Warning level messages displayed, use"--disable=all --enable=classes 67 | # --disable=W" 68 | disable=unused-import,locally-enabled,locally-disabled 69 | # ,dict-view-method,backtick,cmp-builtin,parameter-unpacking,reduce-builtin,coerce-builtin,delslice-method,using-cmp-argument,apply-builtin,hex-method,old-octal-literal,old-division,long-builtin,no-absolute-import,oct-method,useless-suppression,reload-builtin,nonzero-method,old-raise-syntax,input-builtin,standarderror-builtin,raw_input-builtin,long-suffix,intern-builtin,import-star-module-level,unicode-builtin,raising-string,indexing-exception,dict-iter-method,round-builtin,cmp-method,map-builtin-not-iterating,filter-builtin-not-iterating,unpacking-in-except,zip-builtin-not-iterating,file-builtin,range-builtin-not-iterating,execfile-builtin,unichr-builtin,coerce-method,old-ne-operator,buffer-builtin,setslice-method,metaclass-assignment,next-method-called,print-statement,getslice-method,xrange-builtin,suppressed-message,basestring-builtin 70 | 71 | 72 | [REPORTS] 73 | 74 | # Set the output format. Available formats are text, parseable, colorized, msvs 75 | # (visual studio) and html. You can also give a reporter class, eg 76 | # mypackage.mymodule.MyReporterClass. 77 | output-format=text 78 | 79 | # Put messages in a separate file for each module / package specified on the 80 | # command line instead of printing them on stdout. Reports (if any) will be 81 | # written in a file name "pylint_global.[txt|html]". This option is deprecated 82 | # and it will be removed in Pylint 2.0. 83 | files-output=no 84 | 85 | # Tells whether to display a full report or only the messages 86 | reports=yes 87 | 88 | # Python expression which should return a note less than 10 (10 is the highest 89 | # note). You have access to the variables errors warning, statement which 90 | # respectively contain the number of errors / warnings messages and the total 91 | # number of statements analyzed. This is used by the global evaluation report 92 | # (RP0004). 93 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) 94 | 95 | # Template used to display messages. This is a python new-style format string 96 | # used to format the message information. See doc for all details 97 | #msg-template= 98 | 99 | 100 | [BASIC] 101 | 102 | # Good variable names which should always be accepted, separated by a comma 103 | good-names=i,j,k,ex,Run,_,x,mu,iv,df,ax 104 | 105 | # Bad variable names which should always be refused, separated by a comma 106 | bad-names=foo,bar,baz,toto,tutu,tata 107 | 108 | # Colon-delimited sets of names that determine each other's naming style when 109 | # the name regexes allow several styles. 110 | name-group= 111 | 112 | # Include a hint for the correct naming format with invalid-name 113 | include-naming-hint=no 114 | 115 | # List of decorators that produce properties, such as abc.abstractproperty. Add 116 | # to this list to register other decorators that produce valid properties. 117 | property-classes=abc.abstractproperty 118 | 119 | # Regular expression matching correct module names 120 | module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ 121 | 122 | # Naming hint for module names 123 | module-name-hint=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ 124 | 125 | # Regular expression matching correct constant names 126 | const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$ 127 | 128 | # Naming hint for constant names 129 | const-name-hint=(([A-Z_][A-Z0-9_]*)|(__.*__))$ 130 | 131 | # Regular expression matching correct class attribute names 132 | class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ 133 | 134 | # Naming hint for class attribute names 135 | class-attribute-name-hint=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ 136 | 137 | # Regular expression matching correct class names 138 | class-rgx=[A-Z_][a-zA-Z0-9]+$ 139 | 140 | # Naming hint for class names 141 | class-name-hint=[A-Z_][a-zA-Z0-9]+$ 142 | 143 | # Regular expression matching correct argument names 144 | argument-rgx=[a-z_][a-z0-9_]{2,30}$ 145 | 146 | # Naming hint for argument names 147 | argument-name-hint=[a-z_][a-z0-9_]{2,30}$ 148 | 149 | # Regular expression matching correct attribute names 150 | attr-rgx=[a-z_][a-z0-9_]{2,30}$ 151 | 152 | # Naming hint for attribute names 153 | attr-name-hint=[a-z_][a-z0-9_]{2,30}$ 154 | 155 | # Regular expression matching correct variable names 156 | variable-rgx=[a-z_][a-z0-9_]{2,30}$ 157 | 158 | # Naming hint for variable names 159 | variable-name-hint=[a-z_][a-z0-9_]{2,30}$ 160 | 161 | # Regular expression matching correct inline iteration names 162 | inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ 163 | 164 | # Naming hint for inline iteration names 165 | inlinevar-name-hint=[A-Za-z_][A-Za-z0-9_]*$ 166 | 167 | # Regular expression matching correct method names 168 | method-rgx=[a-z_][a-z0-9_]{2,30}$ 169 | 170 | # Naming hint for method names 171 | method-name-hint=[a-z_][a-z0-9_]{2,30}$ 172 | 173 | # Regular expression matching correct function names 174 | function-rgx=[a-z_][a-z0-9_]{2,30}$ 175 | 176 | # Naming hint for function names 177 | function-name-hint=[a-z_][a-z0-9_]{2,30}$ 178 | 179 | # Regular expression which should only match function or class names that do 180 | # not require a docstring. 181 | no-docstring-rgx=^_ 182 | 183 | # Minimum line length for functions/classes that require docstrings, shorter 184 | # ones are exempt. 185 | docstring-min-length=-1 186 | 187 | 188 | [ELIF] 189 | 190 | # Maximum number of nested blocks for function / method body 191 | max-nested-blocks=5 192 | 193 | 194 | [FORMAT] 195 | 196 | # Maximum number of characters on a single line. 197 | max-line-length=100 198 | 199 | # Regexp for a line that is allowed to be longer than the limit. 200 | ignore-long-lines=^\s*(# )??$ 201 | 202 | # Allow the body of an if to be on the same line as the test if there is no 203 | # else. 204 | single-line-if-stmt=no 205 | 206 | # List of optional constructs for which whitespace checking is disabled. `dict- 207 | # separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. 208 | # `trailing-comma` allows a space between comma and closing bracket: (a, ). 209 | # `empty-line` allows space-only lines. 210 | no-space-check=trailing-comma,dict-separator 211 | 212 | # Maximum number of lines in a module 213 | max-module-lines=1000 214 | 215 | # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 216 | # tab). 217 | indent-string=' ' 218 | 219 | # Number of spaces of indent required inside a hanging or continued line. 220 | indent-after-paren=4 221 | 222 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF. 223 | expected-line-ending-format= 224 | 225 | 226 | [LOGGING] 227 | 228 | # Logging modules to check that the string format arguments are in logging 229 | # function parameter format 230 | logging-modules=logging 231 | 232 | 233 | [MISCELLANEOUS] 234 | 235 | # List of note tags to take in consideration, separated by a comma. 236 | notes=FIXME,XXX,TODO 237 | 238 | 239 | [SIMILARITIES] 240 | 241 | # Minimum lines number of a similarity. 242 | min-similarity-lines=4 243 | 244 | # Ignore comments when computing similarities. 245 | ignore-comments=yes 246 | 247 | # Ignore docstrings when computing similarities. 248 | ignore-docstrings=yes 249 | 250 | # Ignore imports when computing similarities. 251 | ignore-imports=no 252 | 253 | 254 | [SPELLING] 255 | 256 | # Spelling dictionary name. Available dictionaries: none. To make it working 257 | # install python-enchant package. 258 | spelling-dict= 259 | 260 | # List of comma separated words that should not be checked. 261 | spelling-ignore-words= 262 | 263 | # A path to a file that contains private dictionary; one word per line. 264 | spelling-private-dict-file= 265 | 266 | # Tells whether to store unknown words to indicated private dictionary in 267 | # --spelling-private-dict-file option instead of raising a message. 268 | spelling-store-unknown-words=no 269 | 270 | 271 | [TYPECHECK] 272 | 273 | # Tells whether missing members accessed in mixin class should be ignored. A 274 | # mixin class is detected if its name ends with "mixin" (case insensitive). 275 | ignore-mixin-members=yes 276 | 277 | # List of module names for which member attributes should not be checked 278 | # (useful for modules/projects where namespaces are manipulated during runtime 279 | # and thus existing member attributes cannot be deduced by static analysis. It 280 | # supports qualified module names, as well as Unix pattern matching. 281 | ignored-modules= 282 | 283 | # List of class names for which member attributes should not be checked (useful 284 | # for classes with dynamically set attributes). This supports the use of 285 | # qualified names. 286 | ignored-classes=optparse.Values,thread._local,_thread._local 287 | 288 | # List of members which are set dynamically and missed by pylint inference 289 | # system, and so shouldn't trigger E1101 when accessed. Python regular 290 | # expressions are accepted. 291 | generated-members=pysam.*,numpy.*,np.*,pytest.* 292 | 293 | # List of decorators that produce context managers, such as 294 | # contextlib.contextmanager. Add to this list to register other decorators that 295 | # produce valid context managers. 296 | contextmanager-decorators=contextlib.contextmanager 297 | 298 | 299 | [VARIABLES] 300 | 301 | # Tells whether we should check for unused import in __init__ files. 302 | init-import=no 303 | 304 | # A regular expression matching the name of dummy variables (i.e. expectedly 305 | # not used). 306 | dummy-variables-rgx=(_+[a-zA-Z0-9]*?$)|dummy 307 | 308 | # List of additional names supposed to be defined in builtins. Remember that 309 | # you should avoid to define new builtins when possible. 310 | additional-builtins= 311 | 312 | # List of strings which can identify a callback function by name. A callback 313 | # name must start or end with one of those strings. 314 | callbacks=cb_,_cb 315 | 316 | # List of qualified module names which can have objects that can redefine 317 | # builtins. 318 | redefining-builtins-modules=six.moves,future.builtins 319 | 320 | 321 | [CLASSES] 322 | 323 | # List of method names used to declare (i.e. assign) instance attributes. 324 | defining-attr-methods=__init__,__new__,setUp 325 | 326 | # List of valid names for the first argument in a class method. 327 | valid-classmethod-first-arg=cls 328 | 329 | # List of valid names for the first argument in a metaclass class method. 330 | valid-metaclass-classmethod-first-arg=mcs 331 | 332 | # List of member names, which should be excluded from the protected access 333 | # warning. 334 | exclude-protected=_asdict,_fields,_replace,_source,_make 335 | 336 | 337 | [DESIGN] 338 | 339 | # Maximum number of arguments for function / method 340 | max-args=5 341 | 342 | # Argument names that match this expression will be ignored. Default to name 343 | # with leading underscore 344 | ignored-argument-names=_.* 345 | 346 | # Maximum number of locals for function / method body 347 | max-locals=15 348 | 349 | # Maximum number of return / yield for function / method body 350 | max-returns=6 351 | 352 | # Maximum number of branch for function / method body 353 | max-branches=12 354 | 355 | # Maximum number of statements in function / method body 356 | max-statements=50 357 | 358 | # Maximum number of parents for a class (see R0901). 359 | max-parents=7 360 | 361 | # Maximum number of attributes for a class (see R0902). 362 | max-attributes=7 363 | 364 | # Minimum number of public methods for a class (see R0903). 365 | min-public-methods=2 366 | 367 | # Maximum number of public methods for a class (see R0904). 368 | max-public-methods=20 369 | 370 | # Maximum number of boolean expressions in a if statement 371 | max-bool-expr=5 372 | 373 | 374 | [IMPORTS] 375 | 376 | # Deprecated modules which should not be used, separated by a comma 377 | deprecated-modules=optparse 378 | 379 | # Create a graph of every (i.e. internal and external) dependencies in the 380 | # given file (report RP0402 must not be disabled) 381 | import-graph= 382 | 383 | # Create a graph of external dependencies in the given file (report RP0402 must 384 | # not be disabled) 385 | ext-import-graph= 386 | 387 | # Create a graph of internal dependencies in the given file (report RP0402 must 388 | # not be disabled) 389 | int-import-graph= 390 | 391 | # Force import order to recognize a module as part of the standard 392 | # compatibility libraries. 393 | known-standard-library= 394 | 395 | # Force import order to recognize a module as part of a third party library. 396 | known-third-party=enchant 397 | 398 | # Analyse import fallback blocks. This can be used to support both Python 2 and 399 | # 3 compatible code, which means that the block might have code that exists 400 | # only in one or another interpreter, leading to false positives when analysed. 401 | analyse-fallback-blocks=no 402 | 403 | 404 | [EXCEPTIONS] 405 | 406 | # Exceptions that will emit a warning when being caught. Defaults to 407 | # "Exception" 408 | overgeneral-exceptions=Exception 409 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | future==0.15.2 2 | pandas==0.18.0 3 | requests==2.9.1 4 | requests-cache==0.4.12 5 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal = 0 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # -*- coding: utf-8 -*- 4 | 5 | # pylint: disable=invalid-name 6 | 7 | import setuptools 8 | 9 | REQUIREMENTS = ['future', 'pandas', 'requests', 'requests_cache'] 10 | 11 | EXTRAS_REQUIRE = { 12 | 'dev': [ 13 | 'sphinx', 'sphinx-autobuild', 'sphinx-rtd-theme', 'bumpversion', 14 | 'pytest>=2.7', 'pytest-mock', 'pytest-helpers-namespace', 'pytest-cov', 15 | 'python-coveralls' 16 | ] 17 | } 18 | 19 | setuptools.setup( 20 | name='pybiomart', 21 | version='0.2.0', 22 | url='https://github.com/jrderuiter/pybiomart', 23 | author='Julian de Ruiter', 24 | author_email='julianderuiter@gmail.com', 25 | description='A simple pythonic interface to biomart.', 26 | license='MIT', 27 | packages=setuptools.find_packages('src'), 28 | package_dir={'': 'src'}, 29 | zip_safe=True, 30 | classifiers=[ 31 | 'Intended Audience :: Developers', 32 | 'Intended Audience :: Science/Research', 33 | 'License :: OSI Approved :: MIT License', 34 | 'Operating System :: OS Independent', 35 | 'Programming Language :: Python :: 2.7', 36 | 'Programming Language :: Python :: 3.4', 37 | 'Programming Language :: Python :: 3.5' 38 | ], 39 | install_requires=REQUIREMENTS, 40 | extras_require=EXTRAS_REQUIRE) 41 | -------------------------------------------------------------------------------- /src/pybiomart/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .server import Server 4 | from .mart import Mart 5 | from .dataset import Dataset 6 | 7 | __author__ = 'Julian de Ruiter' 8 | __email__ = 'julianderuiter@gmail.com' 9 | __version__ = '0.2.0' 10 | -------------------------------------------------------------------------------- /src/pybiomart/base.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | # pylint: disable=wildcard-import,redefined-builtin,unused-wildcard-import 4 | from builtins import * 5 | # pylint: enable=wildcard-import,redefined-builtin,unused-wildcard-import 6 | 7 | import requests 8 | import requests_cache 9 | 10 | DEFAULT_HOST = 'http://www.biomart.org' 11 | DEFAULT_PATH = '/biomart/martservice' 12 | DEFAULT_PORT = 80 13 | DEFAULT_SCHEMA = 'default' 14 | 15 | requests_cache.install_cache('.pybiomart') 16 | 17 | 18 | class ServerBase(object): 19 | """Base class that handles requests to the biomart server. 20 | 21 | Attributes: 22 | host (str): Host to connect to for the biomart service. 23 | path (str): Path to the biomart service on the host. 24 | port (str): Port to connect to on the host. 25 | url (str): Url used to connect to the biomart service. 26 | use_cache (bool): Whether to cache requests to biomart. 27 | 28 | """ 29 | 30 | def __init__(self, host=None, path=None, port=None, use_cache=True): 31 | """ServerBase constructor. 32 | 33 | Args: 34 | host (str): Url of host to connect to. 35 | path (str): Path on the host to access to the biomart service. 36 | port (int): Port to use for the connection. 37 | use_cache (bool): Whether to cache requests. 38 | 39 | """ 40 | # Use defaults if arg is None. 41 | host = host or DEFAULT_HOST 42 | path = path or DEFAULT_PATH 43 | port = port or DEFAULT_PORT 44 | 45 | # Add http prefix and remove trailing slash. 46 | host = self._add_http_prefix(host) 47 | host = self._remove_trailing_slash(host) 48 | 49 | # Ensure path starts with slash. 50 | if not path.startswith('/'): 51 | path = '/' + path 52 | 53 | self._host = host 54 | self._path = path 55 | self._port = port 56 | self._use_cache = use_cache 57 | 58 | @property 59 | def host(self): 60 | """Host to connect to for the biomart service.""" 61 | return self._host 62 | 63 | @property 64 | def path(self): 65 | """Path to the biomart service on the host.""" 66 | return self._path 67 | 68 | @property 69 | def port(self): 70 | """Port to connect to on the host.""" 71 | return self._port 72 | 73 | @property 74 | def url(self): 75 | """Url used to connect to the biomart service.""" 76 | return '{}:{}{}'.format(self._host, self._port, self._path) 77 | 78 | @property 79 | def use_cache(self): 80 | """Whether to cache requests to biomart.""" 81 | return self._use_cache 82 | 83 | @staticmethod 84 | def _add_http_prefix(url, prefix='http://'): 85 | if not url.startswith('http://') or url.startswith('https://'): 86 | url = prefix + url 87 | return url 88 | 89 | @staticmethod 90 | def _remove_trailing_slash(url): 91 | if url.endswith('/'): 92 | url = url[:-1] 93 | return url 94 | 95 | def get(self, **params): 96 | """Performs get request to the biomart service. 97 | 98 | Args: 99 | **params (dict of str: any): Arbitrary keyword arguments, which 100 | are added as parameters to the get request to biomart. 101 | 102 | Returns: 103 | requests.models.Response: Response from biomart for the request. 104 | 105 | """ 106 | if self._use_cache: 107 | r = requests.get(self.url, params=params) 108 | else: 109 | with requests_cache.disabled(): 110 | r = requests.get(self.url, params=params) 111 | r.raise_for_status() 112 | return r 113 | 114 | 115 | class BiomartException(Exception): 116 | """Basic exception class for biomart exceptions.""" 117 | pass 118 | -------------------------------------------------------------------------------- /src/pybiomart/dataset.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | # pylint: disable=wildcard-import,redefined-builtin,unused-wildcard-import 4 | from builtins import * 5 | # pylint: enable=wildcard-import,redefined-builtin,unused-wildcard-import 6 | from future.utils import native_str 7 | 8 | from io import StringIO 9 | from xml.etree import ElementTree 10 | 11 | import pandas as pd 12 | 13 | # pylint: disable=import-error 14 | from .base import ServerBase, BiomartException, DEFAULT_SCHEMA 15 | 16 | # pylint: enable=import-error 17 | 18 | class Dataset(ServerBase): 19 | """Class representing a biomart dataset. 20 | 21 | This class is responsible for handling queries to biomart 22 | datasets. Queries can select a subset of attributes and can be filtered 23 | using any available filters. A list of valid attributes is available in 24 | the attributes property. If no attributes are given, a set of default 25 | attributes is used. A list of valid filters is available in the filters 26 | property. The type of value that can be specified for a given filter 27 | depends on the filter as some filters accept single values, whilst others 28 | can take lists of values. 29 | 30 | Args: 31 | name (str): Id of the dataset. 32 | display_name (str): Display name of the dataset. 33 | host (str): Url of host to connect to. 34 | path (str): Path on the host to access to the biomart service. 35 | port (int): Port to use for the connection. 36 | use_cache (bool): Whether to cache requests. 37 | virtual_schema (str): The virtual schema of the dataset. 38 | 39 | Examples: 40 | Directly connecting to a dataset: 41 | >>> dataset = Dataset(name='hsapiens_gene_ensembl', 42 | >>> host='http://www.ensembl.org') 43 | 44 | Querying the dataset: 45 | >>> dataset.query(attributes=['ensembl_gene_id', 46 | >>> 'external_gene_name'], 47 | >>> filters={'chromosome_name': ['1','2']}) 48 | 49 | Listing available attributes: 50 | >>> dataset.attributes 51 | >>> dataset.list_attributes() 52 | 53 | Listing available filters: 54 | >>> dataset.filters 55 | >>> dataset.list_filters() 56 | 57 | """ 58 | 59 | def __init__(self, 60 | name, 61 | display_name='', 62 | host=None, 63 | path=None, 64 | port=None, 65 | use_cache=True, 66 | virtual_schema=DEFAULT_SCHEMA): 67 | super().__init__(host=host, path=path, port=port, use_cache=use_cache) 68 | 69 | self._name = name 70 | self._display_name = display_name 71 | self._virtual_schema = virtual_schema 72 | 73 | self._filters = None 74 | self._attributes = None 75 | self._default_attributes = None 76 | 77 | @property 78 | def name(self): 79 | """Name of the dataset (used as dataset id).""" 80 | return self._name 81 | 82 | @property 83 | def display_name(self): 84 | """Display name of the dataset.""" 85 | return self._display_name 86 | 87 | @property 88 | def filters(self): 89 | """List of filters available for the dataset.""" 90 | if self._filters is None: 91 | self._filters, self._attributes = self._fetch_configuration() 92 | return self._filters 93 | 94 | @property 95 | def attributes(self): 96 | """List of attributes available for the dataset (cached).""" 97 | if self._attributes is None: 98 | self._filters, self._attributes = self._fetch_configuration() 99 | return self._attributes 100 | 101 | @property 102 | def default_attributes(self): 103 | """List of default attributes for the dataset.""" 104 | if self._default_attributes is None: 105 | self._default_attributes = { 106 | name: attr 107 | for name, attr in self.attributes.items() 108 | if attr.default is True 109 | } 110 | return self._default_attributes 111 | 112 | def list_attributes(self): 113 | """Lists available attributes in a readable DataFrame format. 114 | 115 | Returns: 116 | pd.DataFrame: Frame listing available attributes. 117 | """ 118 | 119 | def _row_gen(attributes): 120 | for attr in attributes.values(): 121 | yield (attr.name, attr.display_name, attr.description) 122 | 123 | return pd.DataFrame.from_records( 124 | _row_gen(self.attributes), 125 | columns=['name', 'display_name', 'description']) 126 | 127 | def list_filters(self): 128 | """Lists available filters in a readable DataFrame format. 129 | 130 | Returns: 131 | pd.DataFrame: Frame listing available filters. 132 | """ 133 | 134 | def _row_gen(attributes): 135 | for attr in attributes.values(): 136 | yield (attr.name, attr.type, attr.description) 137 | 138 | return pd.DataFrame.from_records( 139 | _row_gen(self.filters), columns=['name', 'type', 'description']) 140 | 141 | def _fetch_configuration(self): 142 | # Get datasets using biomart. 143 | response = self.get(type='configuration', dataset=self._name) 144 | 145 | # Check response for problems. 146 | if 'Problem retrieving configuration' in response.text: 147 | raise BiomartException('Failed to retrieve dataset configuration, ' 148 | 'check the dataset name and schema.') 149 | 150 | # Get filters and attributes from xml. 151 | xml = ElementTree.fromstring(response.content) 152 | 153 | filters = {f.name: f for f in self._filters_from_xml(xml)} 154 | attributes = {a.name: a for a in self._attributes_from_xml(xml)} 155 | 156 | return filters, attributes 157 | 158 | @staticmethod 159 | def _filters_from_xml(xml): 160 | for node in xml.iter('FilterDescription'): 161 | attrib = node.attrib 162 | yield Filter( 163 | name=attrib['internalName'], type=attrib.get('type', '')) 164 | 165 | @staticmethod 166 | def _attributes_from_xml(xml): 167 | for page_index, page in enumerate(xml.iter('AttributePage')): 168 | for desc in page.iter('AttributeDescription'): 169 | attrib = desc.attrib 170 | 171 | # Default attributes can only be from the first page. 172 | default = (page_index == 0 and 173 | attrib.get('default', '') == 'true') 174 | 175 | yield Attribute( 176 | name=attrib['internalName'], 177 | display_name=attrib.get('displayName', ''), 178 | description=attrib.get('description', ''), 179 | default=default) 180 | 181 | def query(self, 182 | attributes=None, 183 | filters=None, 184 | only_unique=True, 185 | use_attr_names=False, 186 | dtypes = None 187 | ): 188 | """Queries the dataset to retrieve the contained data. 189 | 190 | Args: 191 | attributes (list[str]): Names of attributes to fetch in query. 192 | Attribute names must correspond to valid attributes. See 193 | the attributes property for a list of valid attributes. 194 | filters (dict[str,any]): Dictionary of filters --> values 195 | to filter the dataset by. Filter names and values must 196 | correspond to valid filters and filter values. See the 197 | filters property for a list of valid filters. 198 | only_unique (bool): Whether to return only rows containing 199 | unique values (True) or to include duplicate rows (False). 200 | use_attr_names (bool): Whether to use the attribute names 201 | as column names in the result (True) or the attribute 202 | display names (False). 203 | dtypes (dict[str,any]): Dictionary of attributes --> data types 204 | to describe to pandas how the columns should be handled 205 | 206 | Returns: 207 | pandas.DataFrame: DataFrame containing the query results. 208 | 209 | """ 210 | 211 | # Example query from Ensembl biomart: 212 | # 213 | # 214 | # 215 | # 217 | # 218 | # 219 | # 220 | # 221 | # 222 | # 223 | # 224 | # 225 | 226 | # Setup query element. 227 | root = ElementTree.Element('Query') 228 | root.set('virtualSchemaName', self._virtual_schema) 229 | root.set('formatter', 'TSV') 230 | root.set('header', '1') 231 | root.set('uniqueRows', native_str(int(only_unique))) 232 | root.set('datasetConfigVersion', '0.6') 233 | 234 | # Add dataset element. 235 | dataset = ElementTree.SubElement(root, 'Dataset') 236 | dataset.set('name', self.name) 237 | dataset.set('interface', 'default') 238 | 239 | # Default to default attributes if none requested. 240 | if attributes is None: 241 | attributes = list(self.default_attributes.keys()) 242 | 243 | # Add attribute elements. 244 | for name in attributes: 245 | try: 246 | attr = self.attributes[name] 247 | self._add_attr_node(dataset, attr) 248 | except KeyError: 249 | raise BiomartException( 250 | 'Unknown attribute {}, check dataset attributes ' 251 | 'for a list of valid attributes.'.format(name)) 252 | 253 | if filters is not None: 254 | # Add filter elements. 255 | for name, value in filters.items(): 256 | try: 257 | filter_ = self.filters[name] 258 | self._add_filter_node(dataset, filter_, value) 259 | except KeyError: 260 | raise BiomartException( 261 | 'Unknown filter {}, check dataset filters ' 262 | 'for a list of valid filters.'.format(name)) 263 | 264 | # Fetch response. 265 | response = self.get(query=ElementTree.tostring(root)) 266 | 267 | # Raise exception if an error occurred. 268 | if 'Query ERROR' in response.text: 269 | raise BiomartException(response.text) 270 | 271 | # Parse results into a DataFrame. 272 | try: 273 | result = pd.read_csv(StringIO(response.text), sep='\t', dtype=dtypes) 274 | # Type error is raised of a data type is not understood by pandas 275 | except TypeError as err: 276 | raise ValueError("Non valid data type is used in dtypes") 277 | 278 | if use_attr_names: 279 | # Rename columns with attribute names instead of display names. 280 | column_map = { 281 | self.attributes[attr].display_name: attr 282 | for attr in attributes 283 | } 284 | result.rename(columns=column_map, inplace=True) 285 | 286 | return result 287 | 288 | @staticmethod 289 | def _add_attr_node(root, attr): 290 | attr_el = ElementTree.SubElement(root, 'Attribute') 291 | attr_el.set('name', attr.name) 292 | 293 | @staticmethod 294 | def _add_filter_node(root, filter_, value): 295 | """Adds filter xml node to root.""" 296 | filter_el = ElementTree.SubElement(root, 'Filter') 297 | filter_el.set('name', filter_.name) 298 | 299 | # Set filter value depending on type. 300 | if filter_.type == 'boolean': 301 | # Boolean case. 302 | if value is True or value.lower() in {'included', 'only'}: 303 | filter_el.set('excluded', '0') 304 | elif value is False or value.lower() == 'excluded': 305 | filter_el.set('excluded', '1') 306 | else: 307 | raise ValueError('Invalid value for boolean filter ({})' 308 | .format(value)) 309 | elif isinstance(value, list) or isinstance(value, tuple): 310 | # List case. 311 | filter_el.set('value', ','.join(map(str, value))) 312 | else: 313 | # Default case. 314 | filter_el.set('value', str(value)) 315 | 316 | def __repr__(self): 317 | return ('' 318 | .format(self._name, self._display_name)) 319 | 320 | 321 | class Attribute(object): 322 | """Biomart dataset attribute. 323 | 324 | Attributes: 325 | name (str): Attribute name. 326 | display_name (str): Attribute display name. 327 | description (str): Attribute description. 328 | 329 | """ 330 | 331 | def __init__(self, name, display_name='', description='', default=False): 332 | """Attribute constructor. 333 | 334 | Args: 335 | name (str): Attribute name. 336 | display_name (str): Attribute display name. 337 | description (str): Attribute description. 338 | default (bool): Whether the attribute is a default 339 | attribute of the corresponding datasets. 340 | 341 | """ 342 | self._name = name 343 | self._display_name = display_name 344 | self._description = description 345 | self._default = default 346 | 347 | @property 348 | def name(self): 349 | """Name of the attribute.""" 350 | return self._name 351 | 352 | @property 353 | def display_name(self): 354 | """Display name of the attribute.""" 355 | return self._display_name 356 | 357 | @property 358 | def description(self): 359 | """Description of the attribute.""" 360 | return self._description 361 | 362 | @property 363 | def default(self): 364 | """Whether this is a default attribute.""" 365 | return self._default 366 | 367 | def __repr__(self): 368 | return (('') 370 | .format(self._name, self._display_name, self._description)) 371 | 372 | 373 | class Filter(object): 374 | """Biomart dataset filter. 375 | 376 | Attributes: 377 | name (str): Filter name. 378 | type (str): Type of the filter (boolean, int, etc.). 379 | description (str): Filter description. 380 | 381 | """ 382 | 383 | def __init__(self, name, type, description=''): 384 | """ Filter constructor. 385 | 386 | Args: 387 | name (str): Filter name. 388 | type (str): Type of the filter (boolean, int, etc.). 389 | description (str): Filter description. 390 | 391 | """ 392 | self._name = name 393 | self._type = type 394 | self._description = description 395 | 396 | @property 397 | def name(self): 398 | """Filter name.""" 399 | return self._name 400 | 401 | @property 402 | def type(self): 403 | """Filter type.""" 404 | return self._type 405 | 406 | @property 407 | def description(self): 408 | """Filter description.""" 409 | return self._description 410 | 411 | def __repr__(self): 412 | return ('' 413 | .format(self.name, self.type)) 414 | -------------------------------------------------------------------------------- /src/pybiomart/mart.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | # pylint: disable=wildcard-import,redefined-builtin,unused-wildcard-import 4 | from builtins import * 5 | # pylint: enable=wildcard-import,redefined-builtin,unused-wildcard-import 6 | 7 | from io import StringIO 8 | 9 | import pandas as pd 10 | 11 | # pylint: disable=import-error 12 | from .base import ServerBase, DEFAULT_SCHEMA 13 | from .dataset import Dataset 14 | # pylint: enable=import-error 15 | 16 | 17 | class Mart(ServerBase): 18 | 19 | """Class representing a biomart mart. 20 | 21 | Used to represent specific mart instances on the server. Provides 22 | functionality for listing and loading the datasets that are available 23 | in the corresponding mart. 24 | 25 | Args: 26 | name (str): Name of the mart. 27 | database_name (str): ID of the mart on the host. 28 | display_name (str): Display name of the mart. 29 | host (str): Url of host to connect to. 30 | path (str): Path on the host to access to the biomart service. 31 | port (int): Port to use for the connection. 32 | use_cache (bool): Whether to cache requests. 33 | virtual_schema (str): The virtual schema of the dataset. 34 | 35 | Examples: 36 | 37 | Listing datasets: 38 | >>> server = Server(host='http://www.ensembl.org') 39 | >>> mart = server.['ENSEMBL_MART_ENSEMBL'] 40 | >>> mart.list_datasets() 41 | 42 | Selecting a dataset: 43 | >>> dataset = mart['hsapiens_gene_ensembl'] 44 | 45 | """ 46 | 47 | RESULT_COLNAMES = ['type', 'name', 'display_name', 'unknown', 'unknown2', 48 | 'unknown3', 'unknown4', 'virtual_schema', 'unknown5'] 49 | 50 | def __init__(self, name, database_name, display_name, 51 | host=None, path=None, port=None, use_cache=True, 52 | virtual_schema=DEFAULT_SCHEMA, extra_params=None): 53 | super().__init__(host=host, path=path, 54 | port=port, use_cache=use_cache) 55 | 56 | self._name = name 57 | self._database_name = database_name 58 | self._display_name = display_name 59 | 60 | self._virtual_schema = virtual_schema 61 | self._extra_params = extra_params 62 | 63 | self._datasets = None 64 | 65 | def __getitem__(self, name): 66 | return self.datasets[name] 67 | 68 | @property 69 | def name(self): 70 | """Name of the mart (used as id).""" 71 | return self._name 72 | 73 | @property 74 | def display_name(self): 75 | """Display name of the mart.""" 76 | return self._display_name 77 | 78 | @property 79 | def database_name(self): 80 | """Database name of the mart on the host.""" 81 | return self._database_name 82 | 83 | @property 84 | def datasets(self): 85 | """List of datasets in this mart.""" 86 | if self._datasets is None: 87 | self._datasets = self._fetch_datasets() 88 | return self._datasets 89 | 90 | def list_datasets(self): 91 | """Lists available datasets in a readable DataFrame format. 92 | 93 | Returns: 94 | pd.DataFrame: Frame listing available datasets. 95 | """ 96 | def _row_gen(attributes): 97 | for attr in attributes.values(): 98 | yield (attr.name, attr.display_name) 99 | 100 | return pd.DataFrame.from_records( 101 | _row_gen(self.datasets), 102 | columns=['name', 'display_name']) 103 | 104 | def _fetch_datasets(self): 105 | # Get datasets using biomart. 106 | response = self.get(type='datasets', mart=self._name) 107 | 108 | # Read result frame from response. 109 | result = pd.read_csv(StringIO(response.text), sep='\t', 110 | header=None, names=self.RESULT_COLNAMES) 111 | 112 | # Convert result to a dict of datasets. 113 | datasets = (self._dataset_from_row(row) 114 | for _, row in result.iterrows()) 115 | 116 | return {d.name: d for d in datasets} 117 | 118 | def _dataset_from_row(self, row): 119 | return Dataset(name=row['name'], display_name=row['display_name'], 120 | host=self.host, path=self.path, 121 | port=self.port, use_cache=self.use_cache, 122 | virtual_schema=row['virtual_schema']) 123 | 124 | def __repr__(self): 125 | return (('') 127 | .format(self._name, self._display_name, 128 | self._database_name)) 129 | -------------------------------------------------------------------------------- /src/pybiomart/server.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | # pylint: disable=wildcard-import,redefined-builtin,unused-wildcard-import 4 | from builtins import * 5 | # pylint: enable=wildcard-import,redefined-builtin,unused-wildcard-import 6 | 7 | from xml.etree.ElementTree import fromstring as xml_from_string 8 | 9 | import pandas as pd 10 | 11 | # pylint: disable=import-error 12 | from .base import ServerBase 13 | from .mart import Mart 14 | 15 | # pylint: enable=import-error 16 | 17 | 18 | class Server(ServerBase): 19 | """Class representing a biomart server. 20 | 21 | Typically used as main entry point to the biomart server. Provides 22 | functionality for listing and loading the marts that are available 23 | on the server. 24 | 25 | Args: 26 | host (str): Url of host to connect to. 27 | path (str): Path on the host to access to the biomart service. 28 | port (int): Port to use for the connection. 29 | use_cache (bool): Whether to cache requests. 30 | 31 | Examples: 32 | Connecting to a server and listing available marts: 33 | >>> server = Server(host='http://www.ensembl.org') 34 | >>> server.list_marts() 35 | 36 | Retrieving a mart: 37 | >>> mart = server['ENSEMBL_MART_ENSEMBL'] 38 | 39 | """ 40 | 41 | _MART_XML_MAP = { 42 | 'name': 'name', 43 | 'database_name': 'database', 44 | 'display_name': 'displayName', 45 | 'host': 'host', 46 | 'path': 'path', 47 | 'virtual_schema': 'serverVirtualSchema' 48 | } 49 | 50 | def __init__(self, host=None, path=None, port=None, use_cache=True): 51 | super().__init__(host=host, path=path, port=port, use_cache=use_cache) 52 | self._marts = None 53 | 54 | def __getitem__(self, name): 55 | return self.marts[name] 56 | 57 | @property 58 | def marts(self): 59 | """List of available marts.""" 60 | if self._marts is None: 61 | self._marts = self._fetch_marts() 62 | return self._marts 63 | 64 | def list_marts(self): 65 | """Lists available marts in a readable DataFrame format. 66 | 67 | Returns: 68 | pd.DataFrame: Frame listing available marts. 69 | """ 70 | 71 | def _row_gen(attributes): 72 | for attr in attributes.values(): 73 | yield (attr.name, attr.display_name) 74 | 75 | return pd.DataFrame.from_records( 76 | _row_gen(self.marts), columns=['name', 'display_name']) 77 | 78 | def _fetch_marts(self): 79 | response = self.get(type='registry') 80 | 81 | xml = xml_from_string(response.content) 82 | marts = [ 83 | self._mart_from_xml(child) 84 | for child in xml.findall('MartURLLocation') 85 | ] 86 | 87 | return {m.name: m for m in marts} 88 | 89 | def _mart_from_xml(self, node): 90 | params = {k: node.attrib[v] for k, v in self._MART_XML_MAP.items()} 91 | params['extra_params'] = { 92 | k: v 93 | for k, v in node.attrib.items() 94 | if k not in set(self._MART_XML_MAP.values()) 95 | } 96 | return Mart(use_cache=self.use_cache, **params) 97 | 98 | def __repr__(self): 99 | return ('' 100 | .format(self.host, self.path, self.port)) 101 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | from os import path 2 | import pickle 3 | import pkg_resources 4 | 5 | import pytest 6 | 7 | from pybiomart import Server 8 | 9 | BASE_DIR = path.dirname(__file__) 10 | 11 | 12 | @pytest.helpers.register 13 | def data_path(relative_path, relative_to=BASE_DIR): 14 | """Returns data path to test file.""" 15 | 16 | return path.join(relative_to, 'data', relative_path) 17 | 18 | 19 | class MockResponse(object): 20 | """Mock response class.""" 21 | 22 | def __init__(self, text=''): 23 | self.text = text 24 | self.content = text.encode('utf-8') 25 | 26 | def raise_for_status(self): 27 | """Mock raise_for_status function.""" 28 | pass 29 | 30 | 31 | @pytest.helpers.register 32 | def mock_response(text=''): 33 | """Helper function for creating a mock response.""" 34 | return MockResponse(text=text) 35 | 36 | 37 | @pytest.fixture 38 | def server_marts_response(): 39 | """Returns a cached Server response containing marts.""" 40 | 41 | # Code for saving cached request. 42 | # server = Server(host='http://www.ensembl.org') 43 | # req = server.get(type='registry') 44 | # with open('server_request.pkl', 'wb') as file_: 45 | # pickle.dump(req.text, file=file_, protocol=2) 46 | 47 | # Load cached request. 48 | file_path = pytest.helpers.data_path('marts_response.pkl') 49 | 50 | with open(file_path, 'rb') as file_: 51 | return MockResponse(text=pickle.load(file_)) 52 | 53 | 54 | @pytest.fixture 55 | def mock_mart(mocker, server_marts_response): 56 | """Returns an example mart, built using a cached response.""" 57 | 58 | mocker.patch.object(Server, 'get', return_value=server_marts_response) 59 | 60 | server = Server(host='http://www.ensembl.org') 61 | return server['ENSEMBL_MART_ENSEMBL'] 62 | 63 | 64 | @pytest.fixture 65 | def mart_datasets_response(): 66 | """Returns a cached Mart response containing datasets.""" 67 | 68 | # Code for saving pickle. 69 | # req = mart.get(type='datasets', mart=mart._name) 70 | # with open('mart_request.pkl', 'wb') as file_: 71 | # pickle.dump(req, file=file_, protocol=2) 72 | 73 | # Load cached request. 74 | file_path = pytest.helpers.data_path('datasets_response.pkl') 75 | 76 | with open(file_path, 'rb') as file_: 77 | return pytest.helpers.mock_response(text=pickle.load(file_)) 78 | 79 | 80 | @pytest.fixture 81 | def mock_dataset(mocker, mock_mart, mart_datasets_response): 82 | """Returns an example dataset, built using a cached response.""" 83 | 84 | mocker.patch.object(mock_mart, 'get', return_value=mart_datasets_response) 85 | return mock_mart.datasets['mmusculus_gene_ensembl'] 86 | 87 | 88 | @pytest.fixture 89 | def mock_dataset_with_config(mocker, mock_dataset, dataset_config_response): 90 | """Returns an example dataset, mocked to return a configuration.""" 91 | 92 | mocker.patch.object( 93 | mock_dataset, 'get', return_value=dataset_config_response) 94 | mock_dataset.attributes 95 | return mock_dataset 96 | 97 | 98 | @pytest.fixture 99 | def dataset_config_response(): 100 | """Returns a cached Dataset config response.""" 101 | 102 | # Dumped using the following code. 103 | # req = dataset.get(type='configuration', dataset=dataset_.name) 104 | # with open('config_response.pkl', 'wb') as file_: 105 | # pickle.dump(req, file=file_, protocol=2) 106 | 107 | # Load cached request. 108 | file_path = pytest.helpers.data_path('config_response.pkl') 109 | 110 | with open(file_path, 'rb') as file_: 111 | return pytest.helpers.mock_response(pickle.load(file_)) 112 | 113 | 114 | @pytest.fixture 115 | def dataset_query_response(): 116 | """Returns a cached Dataset query response.""" 117 | 118 | # Dumped from inside query using the below code. 119 | # import pickle 120 | # with open('query_response.pkl', 'wb') as file_: 121 | # pickle.dump(response, file=file_, protocol=2) 122 | 123 | # Load cached request. 124 | file_path = pytest.helpers.data_path('query_response.pkl') 125 | 126 | with open(file_path, 'rb') as file_: 127 | return pytest.helpers.mock_response(pickle.load(file_)) 128 | -------------------------------------------------------------------------------- /tests/data/config_response.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jrderuiter/pybiomart/7802d45fe88549ab0512d6f37f815fc43b172b39/tests/data/config_response.pkl -------------------------------------------------------------------------------- /tests/data/datasets_response.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jrderuiter/pybiomart/7802d45fe88549ab0512d6f37f815fc43b172b39/tests/data/datasets_response.pkl -------------------------------------------------------------------------------- /tests/data/marts_response.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jrderuiter/pybiomart/7802d45fe88549ab0512d6f37f815fc43b172b39/tests/data/marts_response.pkl -------------------------------------------------------------------------------- /tests/data/query_response.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jrderuiter/pybiomart/7802d45fe88549ab0512d6f37f815fc43b172b39/tests/data/query_response.pkl -------------------------------------------------------------------------------- /tests/test_base.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import requests 3 | 4 | from pybiomart import base 5 | 6 | # pylint: disable=redefined-outer-name, no-self-use 7 | 8 | 9 | @pytest.fixture 10 | def default_url(): 11 | """Default URL fixture.""" 12 | return '{}:{}{}'.format(base.DEFAULT_HOST, base.DEFAULT_PORT, 13 | base.DEFAULT_PATH) 14 | 15 | 16 | # pylint: disable=no-self-use 17 | class TestBase(object): 18 | """Tests for ServerBase class.""" 19 | 20 | def test_basic(self, default_url): 21 | """Tests default instantation.""" 22 | 23 | base_obj = base.ServerBase() 24 | 25 | assert base_obj.host == base.DEFAULT_HOST 26 | assert base_obj.path == base.DEFAULT_PATH 27 | assert base_obj.port == base.DEFAULT_PORT 28 | assert base_obj.use_cache 29 | assert base_obj.url == default_url 30 | 31 | def test_params(self): 32 | """Tests instantation with custom args.""" 33 | 34 | base_obj = base.ServerBase( 35 | host='http://www.ensembl.org', 36 | path='/other/path', 37 | port=8080, 38 | use_cache=False) 39 | 40 | assert base_obj.host == 'http://www.ensembl.org' 41 | assert base_obj.path == '/other/path' 42 | assert base_obj.port == 8080 43 | assert not base_obj.use_cache 44 | assert base_obj.url == 'http://www.ensembl.org:8080/other/path' 45 | 46 | def test_missing_http(self): 47 | """Tests url with missing http.""" 48 | 49 | base_obj = base.ServerBase(host='www.ensembl.org') 50 | assert base_obj.host == 'http://www.ensembl.org' 51 | 52 | def test_trailing_slash(self): 53 | """Tests url with trailing slash.""" 54 | 55 | base_obj = base.ServerBase(host='http://www.ensembl.org/') 56 | assert base_obj.host == 'http://www.ensembl.org' 57 | 58 | def test_get(self, mocker, default_url): 59 | """Tests get invocation.""" 60 | 61 | req = pytest.helpers.mock_response() 62 | 63 | mock_get = mocker.patch.object(requests, 'get', return_value=req) 64 | 65 | base_obj = base.ServerBase() 66 | base_obj.get() 67 | 68 | mock_get.assert_called_once_with(default_url, params={}) 69 | 70 | def test_get_with_params(self, mocker, default_url): 71 | """Tests get invocation with custom parameters.""" 72 | 73 | req = pytest.helpers.mock_response() 74 | 75 | mock_get = mocker.patch.object(requests, 'get', return_value=req) 76 | 77 | base_obj = base.ServerBase() 78 | base_obj.get(test=True) 79 | 80 | mock_get.assert_called_once_with(default_url, params={'test': True}) 81 | -------------------------------------------------------------------------------- /tests/test_dataset.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from pybiomart import Dataset 4 | from pybiomart.server import Server 5 | 6 | # pylint: disable=redefined-outer-name, no-self-use 7 | 8 | 9 | @pytest.fixture 10 | def query_params(): 11 | """Example query parameters.""" 12 | 13 | return { 14 | 'attributes': ['ensembl_gene_id'], 15 | 'filters': { 16 | 'chromosome_name': ['1'] 17 | } 18 | } 19 | 20 | 21 | class TestDatasetStatic(object): 22 | """Static (offline) tests for Dataset class.""" 23 | 24 | def test_attibutes(self, mock_dataset): 25 | """Tests basic attributes.""" 26 | assert mock_dataset.name == 'mmusculus_gene_ensembl' 27 | assert mock_dataset.display_name == 'Mus musculus genes (GRCm38.p4)' 28 | 29 | def test_fetch_configuration(self, mocker, mock_dataset, 30 | dataset_config_response): 31 | """Tests fetching of filters/attributes.""" 32 | 33 | mock_get = mocker.patch.object( 34 | mock_dataset, 'get', return_value=dataset_config_response) 35 | 36 | assert len(mock_dataset.filters) > 0 37 | assert len(mock_dataset.attributes) > 0 38 | 39 | mock_get.assert_called_once_with( 40 | type='configuration', dataset=mock_dataset.name) 41 | 42 | def test_fetch_attribute(self, mocker, mock_dataset, 43 | dataset_config_response): 44 | """Tests attributes of example attribute.""" 45 | 46 | mocker.patch.object( 47 | mock_dataset, 'get', return_value=dataset_config_response) 48 | 49 | # Test example attribute. 50 | attr = mock_dataset.attributes['ensembl_gene_id'] 51 | assert attr.name == 'ensembl_gene_id' 52 | assert attr.display_name == 'Ensembl Gene ID' 53 | assert attr.description == 'Ensembl Stable ID of the Gene' 54 | assert attr.default 55 | 56 | def test_fetch_filters(self, mocker, mock_dataset, 57 | dataset_config_response): 58 | """Tests attributes of example filter.""" 59 | 60 | mocker.patch.object( 61 | mock_dataset, 'get', return_value=dataset_config_response) 62 | 63 | # Test example filter. 64 | filt = mock_dataset.filters['chromosome_name'] 65 | assert filt.name == 'chromosome_name' 66 | assert filt.type == 'list' 67 | assert filt.description == '' 68 | 69 | def test_query(self, mocker, mock_dataset_with_config, query_params, 70 | dataset_query_response): 71 | """Tests example query.""" 72 | 73 | mock_dataset = mock_dataset_with_config 74 | 75 | mock_get = mocker.patch.object( 76 | mock_dataset, 'get', return_value=dataset_query_response) 77 | 78 | # Perform query. 79 | res = mock_dataset.query(**query_params) 80 | 81 | # Check query result. 82 | assert len(res) > 0 83 | assert 'Ensembl Gene ID' in res 84 | 85 | # Check query xml. 86 | query = b""" 88 | 89 | 90 | 91 | """ 92 | query = b''.join(query.split(b'\n')) 93 | 94 | mock_get.assert_called_once_with(query=query) 95 | 96 | def test_query_attr_name(self, mocker, mock_dataset_with_config, 97 | query_params, dataset_query_response): 98 | """Tests example query, renaming columns to names.""" 99 | 100 | mock_dataset = mock_dataset_with_config 101 | 102 | mocker.patch.object( 103 | mock_dataset, 'get', return_value=dataset_query_response) 104 | 105 | # Perform query. 106 | res = mock_dataset.query(use_attr_names=True, **query_params) 107 | 108 | # Check query result. 109 | assert len(res) > 0 110 | assert 'ensembl_gene_id' in res 111 | 112 | def test_query_data_types(self, mocker, mock_dataset_with_config, 113 | query_params, dataset_query_response): 114 | """Tests example query with data types specified.""" 115 | 116 | mock_dataset = mock_dataset_with_config 117 | 118 | mock_get = mocker.patch.object( 119 | mock_dataset, 'get', return_value=dataset_query_response) 120 | 121 | data_types = {'Ensembl Gene ID': str} 122 | query_params['dtypes'] = data_types 123 | 124 | # Perform query. 125 | res = mock_dataset.query(**query_params) 126 | 127 | # Check query result. 128 | assert len(res) > 0 129 | assert 'Ensembl Gene ID' in res 130 | 131 | def test_query_non_valid_data_types(self, mocker, mock_dataset_with_config, 132 | query_params, dataset_query_response): 133 | """Tests example query with non valid data types specified.""" 134 | 135 | mock_dataset = mock_dataset_with_config 136 | 137 | mock_get = mocker.patch.object( 138 | mock_dataset, 'get', return_value=dataset_query_response) 139 | 140 | data_types = {'Ensembl Gene ID': 'hello'} 141 | query_params['dtypes'] = data_types 142 | 143 | # Perform query. 144 | with pytest.raises(ValueError): 145 | res = mock_dataset.query(**query_params) 146 | 147 | 148 | 149 | class TestDatasetLive(object): 150 | """Live unit tests for dataset.""" 151 | 152 | def test_ensembl(self): 153 | """Tests example query to ensembl.""" 154 | 155 | dataset = Dataset( 156 | name='hsapiens_gene_ensembl', 157 | host='http://www.ensembl.org', 158 | use_cache=False) 159 | 160 | result = dataset.query( 161 | attributes=['ensembl_gene_id', 'external_gene_name']) 162 | 163 | assert result.shape[0] > 0 164 | assert result.shape[1] == 2 165 | -------------------------------------------------------------------------------- /tests/test_mart.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from pybiomart import mart 4 | from pybiomart.server import Server 5 | 6 | # pylint: disable=redefined-outer-name, no-self-use 7 | 8 | 9 | class TestMartStatic(object): 10 | """Offline unit tests for Mart (using a static response).""" 11 | 12 | def test_attributes(self, mock_mart): 13 | """Tests setting of basic mart attributes.""" 14 | 15 | assert mock_mart.name == 'ENSEMBL_MART_ENSEMBL' 16 | assert mock_mart.display_name == 'Ensembl Genes 84' 17 | assert mock_mart.database_name == 'ensembl_mart_84' 18 | 19 | def test_datasets(self, mocker, mock_mart, mart_datasets_response): 20 | """Tests retrieval of datasets.""" 21 | 22 | mock_get = mocker.patch.object( 23 | mock_mart, 'get', return_value=mart_datasets_response) 24 | 25 | assert len(mock_mart.datasets) > 0 26 | mock_get.assert_called_once_with( 27 | type='datasets', mart='ENSEMBL_MART_ENSEMBL') 28 | 29 | def test_get_item(self, mocker, mock_mart, mart_datasets_response): 30 | """Tests accessing a specific dataset.""" 31 | 32 | mocker.patch.object( 33 | mock_mart, 'get', return_value=mart_datasets_response) 34 | dataset = mock_mart['mmusculus_gene_ensembl'] 35 | 36 | assert dataset.name == 'mmusculus_gene_ensembl' 37 | -------------------------------------------------------------------------------- /tests/test_server.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from pybiomart.server import Server 4 | 5 | # pylint: disable=redefined-outer-name, no-self-use 6 | 7 | 8 | class TestServerStatic(object): 9 | """Offline unit tests for Server (using a static response).""" 10 | 11 | def test_marts(self, mocker, server_marts_response): 12 | """Test fetching marts.""" 13 | 14 | mock_get = mocker.patch.object( 15 | Server, 'get', return_value=server_marts_response) 16 | 17 | server = Server(host='http://www.ensembl.org') 18 | 19 | assert len(server.marts) > 0 20 | mock_get.assert_called_once_with(type='registry') 21 | 22 | def test_get_item(self, mocker, server_marts_response): 23 | """Test getting mart as key.""" 24 | 25 | mock_get = mocker.patch.object( 26 | Server, 'get', return_value=server_marts_response) 27 | 28 | server = Server(host='http://www.ensembl.org') 29 | mart = server['ENSEMBL_MART_ENSEMBL'] 30 | 31 | assert mart.name == 'ENSEMBL_MART_ENSEMBL' 32 | mock_get.assert_called_once_with(type='registry') 33 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py27,py34,py35 3 | skipsdist = {env:TOXBUILD:true} 4 | 5 | [testenv] 6 | passenv = LANG 7 | whitelist_externals = rm 8 | commands= 9 | {env:TOXBUILD:rm -f .pybiomart.sqlite} 10 | {env:TOXBUILD:pip install .[dev]} 11 | {env:TOXBUILD:py.test tests} 12 | --------------------------------------------------------------------------------