├── .gitignore ├── .travis.yml ├── LICENSE ├── MANIFEST.in ├── README.rst ├── conda_recipe.yaml ├── docs ├── Makefile ├── changelog.rst ├── cite.rst ├── conf.py ├── how_to_use.rst ├── index.rst ├── install.rst ├── intro.rst ├── make.bat └── support.rst ├── pyjaspar ├── .DS_Store ├── __init__.py ├── data │ ├── .DS_Store │ ├── JASPAR2014.sqlite │ ├── JASPAR2016.sqlite │ ├── JASPAR2018.sqlite │ ├── JASPAR2020.sqlite │ ├── JASPAR2022.sqlite │ ├── JASPAR2024.sqlite │ └── __init__.py └── utils.py ├── pyjaspar_notebook.ipynb ├── requirements.txt └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # MAC 10 | .DS_Store 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | 43 | # Sphinx documentation 44 | docs/_build/ 45 | 46 | # PyBuilder 47 | .pybuilder/ 48 | target/ 49 | 50 | # Jupyter Notebook 51 | .ipynb_checkpoints 52 | 53 | # IPython 54 | profile_default/ 55 | ipython_config.py -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.4" 4 | - "3.5" 5 | - "3.6" 6 | - "3.7" 7 | 8 | # command to install dependencies 9 | install: 10 | - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then 11 | wget https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh; 12 | else 13 | wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; 14 | fi 15 | - bash miniconda.sh -b -p $HOME/miniconda 16 | - export PATH="$HOME/miniconda/bin:$PATH" 17 | - hash -r 18 | - conda config --set always_yes yes --set changeps1 no 19 | - conda update -q conda 20 | - conda info -a 21 | 22 | - pip install --user -r requirements.txt 23 | - python setup.py sdist install --user 24 | 25 | # command to run tests 26 | script: 27 | - python --version -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst 2 | include LICENSE 3 | include pyjaspar/data/*.sqlite 4 | exclude .gitignore -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | pyJASPAR 2 | -------- 3 | 4 | A Pythonic interface to JASPAR transcription factor motifs 5 | 6 | **pyJASPAR** uses *Biopython* and *SQLite3* to provide a serverless interface to `JASPAR database `_ to query and access TF motif profiles across various releases of JASPAR. 7 | 8 | 9 | .. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.4509415.svg 10 | :target: https://doi.org/10.5281/zenodo.4509415 11 | 12 | .. image:: https://travis-ci.org/asntech/pyjaspar.svg?branch=main 13 | :target: https://travis-ci.org/asntech/pyjaspar 14 | 15 | .. image:: https://img.shields.io/pypi/pyversions/pyjaspar.svg 16 | :target: https://www.python.org 17 | 18 | .. image:: https://img.shields.io/pypi/v/pyjaspar.svg 19 | :target: https://pypi.python.org/pypi/pyjaspar 20 | 21 | .. image:: https://anaconda.org/bioconda/pyjaspar/badges/version.svg 22 | :target: https://anaconda.org/bioconda/pyjaspar 23 | 24 | .. image:: https://anaconda.org/bioconda/pyjaspar/badges/downloads.svg 25 | :target: https://bioconda.github.io/recipes/pyjaspar/README.html 26 | 27 | .. image:: https://img.shields.io/github/issues/asntech/pyjaspar.svg 28 | :target: https://github.com/asntech/pyjaspar/issues 29 | 30 | 31 | pyJASPAR provides access to the following releases of JASPAR database: *JASPAR2024*, *JASPAR2022*, *JASPAR2020*, *JASPAR2018*, *JASPAR2016*, *JASPAR2014*. 32 | 33 | **Note**: This is a serverless SQLite wrapper around the Biopython JASPAR module `Bio.motifs.jaspar.db` which requires JASPAR MySQL database sever connection details. 34 | 35 | 36 | Documentation 37 | ------------- 38 | 39 | **A detailed documentation is available in different formats:** `HTML `_ | `PDF `_ | `ePUB `_ 40 | 41 | 42 | Installation 43 | ------------ 44 | 45 | Quick installation using conda 46 | ================================ 47 | pyJASPAR is available on `Bioconda `_ for installation via ``conda``. 48 | 49 | .. code-block:: bash 50 | 51 | conda install -c bioconda pyjaspar 52 | 53 | 54 | Install using pip 55 | ================== 56 | pyJASPAR is also available on `PyPi `_ for installation via ``pip``. 57 | 58 | .. code-block:: bash 59 | 60 | pip install pyjaspar 61 | 62 | 63 | pyJASPAR uses BioPython and it supports python ``3.x``. 64 | 65 | Install pyjaspar from source 66 | ============================= 67 | You can install a development version by using ``git`` from GitHub. 68 | 69 | 70 | Install development version from `GitHub` 71 | ========================================== 72 | If you have `git` installed, use this: 73 | 74 | .. code-block:: bash 75 | 76 | git clone https://github.com/asntech/pyjaspar.git 77 | cd pyjaspar 78 | python setup.py sdist install 79 | 80 | How to use pyJASPAR? 81 | -------------------- 82 | 83 | Once you have installed pyjaspar, you can create jaspardb class object: 84 | 85 | .. code-block:: pycon 86 | 87 | >>> from pyjaspar import jaspardb 88 | 89 | #Create the JASPAR2022 release object 90 | >>> jdb_obj = jaspardb(release='JASPAR2024') 91 | 92 | #Fetch motif by ID 93 | >>> motif = jdb_obj.fetch_motif_by_id('MA0095.2') 94 | >>> print(motif.name) 95 | YY1 96 | 97 | #Fetch motifs by TF name 98 | >>> motifs = jdb_obj.fetch_motifs_by_name('KFL4') 99 | >>> print(len(motifs)) 100 | 1 101 | 102 | # Get a dictionary of frequency count matrics 103 | >>> print(motifs[0].counts) 104 | {'A': [2465.0, 2105.0, 7021.0, 1173.0, 45602.0, 852.0, 1617.0, 1202.0], 105 | 'C': [49209.0, 47865.0, 45405.0, 52875.0, 161.0, 52366.0, 51112.0, 51045.0], 106 | 'G': [1583.0, 1214.0, 1422.0, 793.0, 6598.0, 1470.0, 1870.0, 1005.0], 107 | 'T': [2560.0, 4633.0, 1969.0, 976.0, 3456.0, 1129.0, 1218.0, 2565.0]} 108 | 109 | #Get CORE vertebrates non-redundent collection 110 | >>> motifs = jdb_obj.fetch_motifs( 111 | collection = ['CORE'], 112 | tax_group = ['Vertebrates'], 113 | all_versions = False) 114 | >>> print(len(motifs)) 115 | 879 116 | ## loop through the motifs list and perform analysis 117 | >>> for motif in motifs: 118 | pass 119 | 120 | **Note**: Above methods return `Bio.motifs.jaspar.Motif` object. You can find more details `here `_ 121 | 122 | 123 | Find available releases 124 | ======================= 125 | .. code-block:: pycon 126 | 127 | >>> print(jdb_obj.get_releases()) 128 | ['JASPAR2024','JASPAR2022','JASPAR2020', 'JASPAR2018', 'JASPAR2016', 'JASPAR2014'] 129 | 130 | 131 | Cite 132 | ===== 133 | - Aziz Khan. pyJASPAR: a Pythonic interface to JASPAR transcription factor motifs. (2021). doi:10.5281/zenodo.4509415 134 | 135 | .. code-block:: bash 136 | 137 | @software{aziz_khan_2021_4509415, 138 | author = {Aziz Khan}, 139 | title = {{pyJASPAR: a Pythonic interface to JASPAR transcription factor motifs}}, 140 | month = feb, 141 | year = 2021, 142 | publisher = {Zenodo}, 143 | version = {v2.0.0}, 144 | doi = {10.5281/zenodo.4509415}, 145 | url = {https://doi.org/10.5281/zenodo.4509415} 146 | } 147 | -------------------------------------------------------------------------------- /conda_recipe.yaml: -------------------------------------------------------------------------------- 1 | {% set version = "1.0.0" %} 2 | 3 | package: 4 | name: pyjaspar 5 | version: '{{ version }}' 6 | 7 | source: 8 | url: https://pypi.io/packages/source/p/pyjaspar/pyjaspar-{{ version }}.tar.gz 9 | sha256: "97f1e7cc184186a7dc806db9bc9e91b2a858d1a0b54cec96f3d63d1c512a0db2" 10 | 11 | build: 12 | number: 0 13 | noarch: python 14 | script: {{ PYTHON }} -m pip install . --ignore-installed --no-deps -vv 15 | 16 | requirements: 17 | host: 18 | - python >=3.6 19 | - pip 20 | - biopython 21 | run: 22 | - python >=3.6 23 | - biopython 24 | 25 | test: 26 | imports: 27 | - pyjaspar 28 | 29 | about: 30 | home: https://github.com/asntech/pyjaspar 31 | license: GPLv3 32 | license_family: GPL 33 | license_file: LICENSE 34 | summary: "pyJASPAR: a serverless interface to Biopython to access different versions of JASPAR database" 35 | description: "A serverless interface to Biopython to query and access JASPAR motifs from different releases of JASPAR database using sqlite3." 36 | doc_url: 'https://pyjaspar.rtfd.io' 37 | 38 | extra: 39 | recipe-maintainers: 40 | - asntech -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/changelog.rst: -------------------------------------------------------------------------------- 1 | ========= 2 | Changelog 3 | ========= 4 | 5 | Version 3.0.0 6 | ------------- 7 | Released date: September 24, 2023 8 | 9 | Notes: Added the 10th release of JASPAR (JASPAR2024) to the package. 10 | 11 | 12 | Version 2.0.0 13 | ------------- 14 | Released date: September 08, 2021 15 | 16 | Notes: Added the 9th release of JASPAR (JASPAR2022) to the package. 17 | 18 | Version 1.6.0 19 | ------------- 20 | Released date: July 02, 2021 21 | 22 | Notes: Both tf_family and tf_class are now string array. -------------------------------------------------------------------------------- /docs/cite.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | How to cite? 3 | ============ 4 | 5 | If you used **pyJASPAR**, please cite: 6 | 7 | - Aziz Khan. pyJASPAR: a Pythonic interface to JASPAR transcription factor motifs. (2021). doi:10.5281/zenodo.4509415 8 | 9 | And for the specific release of JASPAR database, please cite one of these: 10 | 11 | **JASPAR2020** 12 | 13 | - Fornes O, Castro-Mondragon JA, Khan A, et al. JASPAR 2020: update of the open-access database of transcription factor binding profiles. Nucleic Acids Res. 2020; 48(D1):D87-D92. doi: 10.1093/nar/gkz1001 14 | 15 | **JASPAR2018** 16 | 17 | - Khan A, Fornes O, Stigliani A, et al. JASPAR 2018: update of the open-access database of transcription factor binding profiles and its web framework. Nucleic Acids Res. 2018; 46:D260–D266. doi: 10.1093/nar/gkx1126 18 | 19 | **JASPAR2016** 20 | 21 | - Mathelier, A., Fornes, O., Arenillas, et al. JASPAR 2016: a major expansion and update of the open-access database of transcription factor binding profiles. Nucleic Acids Res. 2016; 44:D110-D115. 22 | 23 | **JASPAR2014** 24 | 25 | - Mathelier, A., Zhao, X., Zhang, A. W., et al. JASPAR 2014: an extensively expanded and updated open-access database of transcription factor binding profiles. Nucleic Acids Res. 2014; 42:D142-D147. 26 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | # import os 14 | # import sys 15 | # sys.path.insert(0, os.path.abspath('.')) 16 | 17 | 18 | # -- Project information ----------------------------------------------------- 19 | 20 | project = 'pyJASPAR' 21 | copyright = '2021, Aziz Khan' 22 | author = 'Aziz Khan' 23 | 24 | # The full version, including alpha/beta/rc tags 25 | release = 'v3.0.0' 26 | 27 | 28 | # -- General configuration --------------------------------------------------- 29 | 30 | # Add any Sphinx extension module names here, as strings. They can be 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 32 | # ones. 33 | extensions = [ 34 | 'sphinx.ext.autodoc', 35 | 'sphinx.ext.doctest', 36 | 'sphinx.ext.napoleon', 37 | 'sphinx.ext.viewcode', 38 | ] 39 | 40 | # Add any paths that contain templates here, relative to this directory. 41 | templates_path = ['_templates'] 42 | 43 | # List of patterns, relative to source directory, that match files and 44 | # directories to ignore when looking for source files. 45 | # This pattern also affects html_static_path and html_extra_path. 46 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 47 | 48 | 49 | # -- Options for HTML output ------------------------------------------------- 50 | 51 | # The theme to use for HTML and HTML Help pages. See the documentation for 52 | # a list of builtin themes. 53 | # 54 | import sphinx_rtd_theme 55 | html_theme = "sphinx_rtd_theme" 56 | #html_theme = "bizstyle" 57 | #html_theme = 'alabaster' 58 | 59 | # Add any paths that contain custom static files (such as style sheets) here, 60 | # relative to this directory. They are copied after the builtin static files, 61 | # so a file named "default.css" will overwrite the builtin "default.css". 62 | html_static_path = ['_static'] -------------------------------------------------------------------------------- /docs/how_to_use.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | How to use? 3 | ============ 4 | 5 | Once you have installed `pyjaspar`, you can load the module and connect to the latest release of JASPAR: 6 | 7 | .. code-block:: pycon 8 | 9 | >>> from pyjaspar import jaspardb 10 | 11 | Connect to the JASPAR 12 | ---------------------- 13 | Next step is to connect to the version of JASPAR you're interested by creating a jaspardb class object. 14 | For example here we're using the the JASPAR2018. 15 | 16 | .. code-block:: pycon 17 | 18 | >>> jdb_obj = jaspardb(release='JASPAR2018') 19 | 20 | You can also check JASPAR version you are connected to using: 21 | 22 | .. code-block:: pycon 23 | 24 | >>> print(jdb_obj.release) 25 | JASPAR2018 26 | 27 | By default it is set to latest release/version of JASPAR database. For example. 28 | 29 | .. code-block:: pycon 30 | 31 | >>> jdb_obj = jaspardb() 32 | >>> print(jdb_obj.release) 33 | JASPAR2020 34 | 35 | 36 | You can also connect to a local copy of JASPAR SQLite database by setting absolute path `sqlite_db_path`. For example. 37 | 38 | .. code-block:: pycon 39 | 40 | >>> jdb_obj = jaspardb(sqlite_db_path='/path/to/jaspar.sqlite') 41 | 42 | 43 | Get available releases 44 | ---------------------- 45 | You can find the available releases/version of JASPAR using `get_releases` method. 46 | 47 | 48 | .. code-block:: pycon 49 | 50 | >>> print(jdb_obj.get_releases()) 51 | ['JASPAR2022', 'JASPAR2020', 'JASPAR2018', 'JASPAR2016', 'JASPAR2014'] 52 | 53 | 54 | Get motif by using JASPAR ID 55 | ---------------------------- 56 | If you want to get the motif details for a specific TF using the JASPAR ID. If you skip the version of motif, it will return the latest version. 57 | 58 | .. code-block:: pycon 59 | 60 | >>> motif = jdb_obj.fetch_motif_by_id('MA0095.2') 61 | 62 | Printing the motif will all the associated meta-information stored in the JASPAR database cluding the matric counts. 63 | 64 | .. code-block:: pycon 65 | 66 | >>> print(motif) 67 | TF name YY1 68 | Matrix ID MA0095.2 69 | Collection CORE 70 | TF class ['C2H2 zinc finger factors'] 71 | TF family ['More than 3 adjacent zinc finger factors'] 72 | Species 9606 73 | Taxonomic group vertebrates 74 | Accession ['P25490'] 75 | Data type used ChIP-seq 76 | Medline 18950698 77 | Matrix: 78 | 0 1 2 3 4 5 6 7 8 9 10 11 79 | A: 1126.00 6975.00 6741.00 2506.00 7171.00 0.00 11.00 13.00 812.00 867.00 899.00 1332.00 80 | C: 4583.00 0.00 99.00 1117.00 0.00 12.00 0.00 0.00 5637.00 1681.00 875.00 4568.00 81 | G: 801.00 181.00 268.00 3282.00 0.00 0.00 7160.00 7158.00 38.00 2765.00 4655.00 391.00 82 | T: 661.00 15.00 63.00 266.00 0.00 7159.00 0.00 0.00 684.00 1858.00 742.00 880.00 83 | 84 | 85 | Get the count matrix using `.counts` 86 | 87 | 88 | .. code-block:: pycon 89 | 90 | >>> print(motif.counts) 91 | 0 1 2 3 4 5 6 7 8 9 10 11 92 | A: 1126.00 6975.00 6741.00 2506.00 7171.00 0.00 11.00 13.00 812.00 867.00 899.00 1332.00 93 | C: 4583.00 0.00 99.00 1117.00 0.00 12.00 0.00 0.00 5637.00 1681.00 875.00 4568.00 94 | G: 801.00 181.00 268.00 3282.00 0.00 0.00 7160.00 7158.00 38.00 2765.00 4655.00 391.00 95 | T: 661.00 15.00 63.00 266.00 0.00 7159.00 0.00 0.00 684.00 1858.00 742.00 880.00 96 | 97 | 98 | Get motifs by TF name 99 | ----------------------- 100 | You can use the `fetch_motifs_by_name` function to find motifs by TF name. This method returns a list of motifs for the same TF name across taxonomic group. For example, below search will return two CTCF motifs one in vertebrates and another in plants taxon. 101 | 102 | .. code-block:: pycon 103 | 104 | >>> motifs = jdb_obj.fetch_motifs_by_name("CTCF") 105 | >>> print(len(motifs)) 106 | 2 107 | >>> print(motifs) 108 | TF name CTCF 109 | Matrix ID MA0139.1 110 | Collection CORE 111 | TF class ['C2H2 zinc finger factors' 112 | TF family ['More than 3 adjacent zinc finger factors'] 113 | Species 9606 114 | Taxonomic group vertebrates 115 | Accession ['P49711'] 116 | Data type used ChIP-seq 117 | Medline 17512414 118 | Matrix: 119 | 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 120 | A: 87.00 167.00 281.00 56.00 8.00 744.00 40.00 107.00 851.00 5.00 333.00 54.00 12.00 56.00 104.00 372.00 82.00 117.00 402.00 121 | C: 291.00 145.00 49.00 800.00 903.00 13.00 528.00 433.00 11.00 0.00 3.00 12.00 0.00 8.00 733.00 13.00 482.00 322.00 181.00 122 | G: 76.00 414.00 449.00 21.00 0.00 65.00 334.00 48.00 32.00 903.00 566.00 504.00 890.00 775.00 5.00 507.00 307.00 73.00 266.00 123 | T: 459.00 187.00 134.00 36.00 2.00 91.00 11.00 324.00 18.00 3.00 9.00 341.00 8.00 71.00 67.00 17.00 37.00 396.00 59.00 124 | 125 | 126 | TF name CTCF 127 | Matrix ID MA0531.1 128 | Collection CORE 129 | TF class ['C2H2 zinc finger factors'] 130 | TF family ['More than 3 adjacent zinc finger factors'] 131 | Species 7227 132 | Taxonomic group insects 133 | Accession ['Q9VS55'] 134 | Data type used ChIP-chip 135 | Medline 17616980 136 | Matrix: 137 | 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 138 | A: 306.00 313.00 457.00 676.00 257.00 1534.00 202.00 987.00 2.00 0.00 2.00 124.00 1.00 79.00 231.00 139 | C: 876.00 1147.00 383.00 784.00 714.00 1.00 0.00 0.00 4.00 0.00 0.00 1645.00 0.00 1514.00 773.00 140 | G: 403.00 219.00 826.00 350.00 87.00 192.00 1700.00 912.00 311.00 1902.00 1652.00 3.00 1807.00 8.00 144.00 141 | T: 317.00 223.00 236.00 92.00 844.00 175.00 0.00 3.00 1585.00 0.00 248.00 130.00 94.00 301.00 754.00 142 | 143 | 144 | Search motifs based on meta-info 145 | --------------------------------- 146 | A more commonly used function is `fetch_motifs` helps you to get motifs which match a specified set of criteria. 147 | You can query the database based on the available meta-information in the database. 148 | 149 | For example, here we are gettting the widely used CORE collection for vertebrates. It returns a list of 746 non-redundent motifs for JASPAR2020 release. 150 | 151 | .. code-block:: pycon 152 | 153 | >>> motifs = jdb_obj.fetch_motifs( 154 | collection = 'CORE', 155 | tax_group = ['vertebrates'] 156 | ) 157 | >>> print(len(motifs)) 158 | 746 159 | 160 | You can loop through these motifs and perform your analysis. 161 | 162 | .. code-block:: pycon 163 | 164 | >>> for motif in motifs: 165 | print(motif.matrix_id) 166 | MA0004.1 167 | MA0006.1 168 | - 169 | - 170 | - 171 | MA0528.2 172 | MA0609.2 173 | 174 | Here is a list of meta-info `fetch_motifs` method takes as an arugment to filter the motifs. 175 | 176 | .. csv-table:: 177 | :header: "Argument", "Description" 178 | :widths: 10, 80 179 | 180 | "`matrix_id`","Takes precedence over all other selection criteria except 'all'. Only motifs with the given JASPAR matrix ID(s) are returned. A matrix ID may be specified as just a base ID or full JASPAR IDs including version number. If only a base ID is provided for specific motif(s), then just the latest version of those motif(s) are returned unless 'all_versions' is also specified." 181 | "`collection`","Only motifs from the specified JASPAR collection(s) are returned. NOTE - if not specified, the collection defaults to CORE for all other selection criteria except 'all' and 'matrix_id'. To apply the other selection criteria across all JASPAR collections, explicitly set collection=None." 182 | "`tf_name`","Only motifs with the given name(s) are returned." 183 | "`tf_class`","Only motifs of the given TF class(es) are returned." 184 | "`tf_family`","Only motifs from the given TF families are returned." 185 | "`tax_group`","Only motifs belonging to the given taxonomic supergroups are returned (e.g. 'vertebrates', 'insects', 'nematodes' etc.)" 186 | "`species`","Only motifs derived from the given species are returned. Species are specified as taxonomy IDs." 187 | "`data_type`","Only motifs generated with the given data type (e.g. ('ChIP-seq', 'PBM', 'SELEX' etc.) are returned." 188 | "`pazar_id`","Only motifs with the given PAZAR TF ID are returned." 189 | "`medline`","Only motifs with the given medline (PubmMed IDs) are returned." 190 | "`min_ic`","Only motifs whose profile matrices have at least this information content (specificty) are returned." 191 | "`min_length`","Only motifs whose profiles are of at least this length are returned." 192 | "`min_sites`","Only motifs compiled from at least these many binding sites are returned." 193 | "`all_versions`","Unless specified, just the latest version of motifs determined by the other selection criteria are returned. Otherwise all versions of the selected motifs are returned." 194 | "`all`","Takes precedent of all other selection criteria. Every motif is returned. If 'all_versions' is also specified, all versions of every motif are returned, otherwise just the latest version of every motif is returned." 195 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | ======================= 2 | pyJASPAR Documentation 3 | ======================= 4 | 5 | **Welcome to pyJASPAR**! — a serverless interface to Biopython to query and access JASPAR motifs from different releases of JASPAR database using sqlite3. 6 | 7 | .. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.4509415.svg 8 | :target: https://doi.org/10.5281/zenodo.4509415 9 | 10 | .. image:: https://travis-ci.org/asntech/pyjaspar.svg?branch=main 11 | :target: https://travis-ci.org/asntech/pyjaspar 12 | 13 | .. image:: https://img.shields.io/pypi/v/pyjaspar.svg 14 | :target: https://pypi.python.org/pypi/pyjaspar 15 | 16 | .. image:: https://anaconda.org/bioconda/pyjaspar/badges/version.svg 17 | :target: https://anaconda.org/bioconda/pyjaspar 18 | 19 | .. image:: https://anaconda.org/bioconda/pyjaspar/badges/downloads.svg 20 | :target: https://bioconda.github.io/recipes/pyjaspar/README.html 21 | 22 | .. image:: https://anaconda.org/bioconda/pyjaspar/badges/installer/conda.svg 23 | :target: https://conda.anaconda.org/bioconda 24 | 25 | .. image:: https://img.shields.io/github/issues/asntech/pyjaspar.svg 26 | :target: https://github.com/asntech/pyjaspar/issues 27 | 28 | 29 | .. toctree:: 30 | :maxdepth: 2 31 | :caption: Table of contents 32 | 33 | intro 34 | install 35 | how_to_use 36 | support 37 | cite 38 | -------------------------------------------------------------------------------- /docs/install.rst: -------------------------------------------------------------------------------- 1 | =============== 2 | How to Install? 3 | =============== 4 | pyJASPAR is available on `PyPi `_, through `Bioconda `_, and source code available on `GitHub `_. If you already have a working installation of Python, the easiest way to install the required Python modules is by installing pyJASPAR using ``pip``. 5 | 6 | If you're setting up Python for the first time, we recommend to install it using the `Conda or Miniconda Python distribution `_. This comes with several helpful scientific and data processing libraries, and available for platforms including Windows, Mac OSX and Linux. 7 | 8 | You can use one of the following ways to install pyJASPAR. 9 | 10 | 11 | Install uisng Conda 12 | ==================== 13 | We highly recommend to install pyJASPAR using Conda, this will take care of the dependencies. If you already have Conda or Miniconda installed, go ahead and use the below command. 14 | 15 | .. code-block:: bash 16 | 17 | conda install -c bioconda pyjaspar 18 | 19 | .. note:: This will install all the dependencies and you are ready to use **pyJASPAR**. 20 | 21 | Install using pip 22 | ================== 23 | You can install pyJASPAR from PyPi using pip. 24 | 25 | .. code-block:: bash 26 | 27 | pip install pyjaspar 28 | 29 | .. note:: Make sure you're using python v3.6 or latest. 30 | 31 | 32 | 33 | Install from source 34 | =================== 35 | You can install a development version by using ``git`` from our GitHub repository at https://github.com/asntech/pyjaspar. 36 | 37 | .. code-block:: bash 38 | 39 | git clone https://github.com/asntech/pyjaspar.git 40 | cd pyjaspar 41 | python setup.py sdist install 42 | -------------------------------------------------------------------------------- /docs/intro.rst: -------------------------------------------------------------------------------- 1 | ================= 2 | What is pyJASPAR? 3 | ================= 4 | 5 | pyJASPAR is a python module and a serverless interface to Biopython to query and access JASPAR motifs from different releases of JASPAR database using sqlite3. 6 | 7 | .. note:: This is a serverless SQLite wrapper around the Biopython JASPAR module `Bio.motifs.jaspar.db` which requires JASPAR MySQL database sever connection details. 8 | 9 | 10 | Currently, pyJASPAR provides access to JASPAR database releases including: 11 | 12 | - `JASPAR2024` - http://jaspar.genereg.net/ 13 | - `JASPAR2022` - http://jaspar2022.genereg.net/ 14 | - `JASPAR2020` - http://jaspar2020.genereg.net/ 15 | - `JASPAR2018` - http://jaspar2018.genereg.net/ 16 | - `JASPAR2016` - http://jaspar2016.genereg.net/ 17 | - `JASPAR2014` - http://jaspar2014.genereg.net/ 18 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/support.rst: -------------------------------------------------------------------------------- 1 | ======== 2 | Support 3 | ======== 4 | 5 | If you have questions, or found any bug in the program, please write to us at ``azez.khan[at]gmail.com``. 6 | 7 | You can also report the issues to our `GiHub repo `_ 8 | -------------------------------------------------------------------------------- /pyjaspar/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asntech/pyjaspar/eaa9e8137406918a9a22eaeffdf979d8c98d57b0/pyjaspar/.DS_Store -------------------------------------------------------------------------------- /pyjaspar/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2013 by David Arenillas and Anthony Mathelier. All rights reserved. 2 | # Revisions 2021 copyright by Aziz Khan. All rights reserved. 3 | # This code is part of the Biopython distribution and governed by its 4 | # license. Please see the LICENSE file that should have been included 5 | # as part of this package. 6 | """Provides read and query access to multiple releases of JASPAR database. 7 | 8 | This modules requires biopython to be installed. 9 | 10 | Example, substitute the database release/version name:: 11 | 12 | from pyjaspar import jaspardb 13 | 14 | jdb_obj = jaspardb(release='JASPAR2020') 15 | motif = jdb_obj.fetch_motif_by_id('MA0095') 16 | print(motif) 17 | TF name YY1 18 | Matrix ID MA0095.2 19 | Collection CORE 20 | TF class ['C2H2 zinc finger factors'] 21 | TF family ['More than 3 adjacent zinc finger factors'] 22 | Species 9606 23 | Taxonomic group vertebrates 24 | Accession ['P25490'] 25 | Data type used ChIP-seq 26 | Medline 18950698 27 | PAZAR ID TF0000069 28 | Matrix: 29 | 0 1 2 3 4 5 6 7 8 9 10 11 30 | A: 1126.00 6975.00 6741.00 2506.00 7171.00 0.00 11.00 13.00 812.00 867.00 899.00 1332.00 31 | C: 4583.00 0.00 99.00 1117.00 0.00 12.00 0.00 0.00 5637.00 1681.00 875.00 4568.00 32 | G: 801.00 181.00 268.00 3282.00 0.00 0.00 7160.00 7158.00 38.00 2765.00 4655.00 391.00 33 | T: 661.00 15.00 63.00 266.00 0.00 7159.00 0.00 0.00 684.00 1858.00 742.00 880.00 34 | 35 | motifs = jdb_obj.fetch_motifs( 36 | collection = 'CORE', 37 | tax_group = ['vertebrates', 'insects'], 38 | tf_class = 'Homeo domain factors', 39 | tf_family = ['TALE-type homeo domain factors', 'POU domain factors'], 40 | min_ic = 12 41 | ) 42 | for motif in motifs: 43 | pass # do something with the motif 44 | """ 45 | 46 | __version__ = '3.0.0' 47 | 48 | import warnings 49 | from Bio import BiopythonWarning 50 | 51 | import sqlite3 52 | 53 | from Bio.motifs import jaspar 54 | 55 | from .utils import * 56 | 57 | jaspar_releases = { 58 | 'JASPAR2024': 'JASPAR2024.sqlite', 59 | 'JASPAR2022': 'JASPAR2022.sqlite', 60 | 'JASPAR2020': 'JASPAR2020.sqlite', 61 | 'JASPAR2018': 'JASPAR2018.sqlite', 62 | 'JASPAR2016': 'JASPAR2016.sqlite', 63 | 'JASPAR2014': 'JASPAR2014.sqlite', 64 | } 65 | 66 | JASPAR_LATEST_RELEASE = "JASPAR2024" 67 | 68 | JASPAR_DFLT_COLLECTION = "CORE" 69 | 70 | class jaspardb(object): 71 | """Class representing a JASPAR SQLite database. 72 | 73 | This is adapted from the biopython JASPAR5 MYSQL DB. 74 | 75 | """ 76 | 77 | def __init__(self, release=JASPAR_LATEST_RELEASE, sqlite_db_path=None): 78 | """Construct a jaspardb instance and connect to specified DB. 79 | 80 | By default it connects to the JASPAR_LATEST_RELEASE which can be over written by using sqlite_db_path 81 | 82 | Arguments: 83 | - release - JASPAR release name ( e.g. JASPAR2018, JASPAR2020) By default latest available release. 84 | - sqlite_db_path - path to the JASPAR SQLite file (this will skip release) 85 | - conn - JASPAR SQLite connection 86 | 87 | """ 88 | self.sqlite_db_path = sqlite_db_path 89 | self.release = release 90 | 91 | if sqlite_db_path: 92 | try: 93 | self.conn = sqlite3.connect(sqlite_db_path) 94 | self.release = sqlite_db_path 95 | except Error as e: 96 | print(e) 97 | else: 98 | try: 99 | release_value = jaspar_releases[release] 100 | try: 101 | self.conn = sqlite3.connect(get_jaspardb_path(release_value)) 102 | except Error as e: 103 | print(e) 104 | except KeyError: 105 | print(f"{release} is not available. Available releases are:") 106 | print(self.get_releases()) 107 | 108 | def __str__(self): 109 | """Return a string represention of the JASPAR DB SQLite connection.""" 110 | return r"JASPAR release:%s:%s" % (self.release, self.conn) 111 | 112 | def get_releases(self): 113 | """Return available JASPAR releases/version. 114 | 115 | Returns: 116 | - A list of JASPAR available releases 117 | 118 | """ 119 | releases = [] 120 | for key, value in jaspar_releases.items(): 121 | #print(key) 122 | releases.append(key) 123 | #print("Available JASPAR releases are: {releases}") 124 | return releases 125 | 126 | 127 | def fetch_motif_by_id(self, id): 128 | """Fetch a single JASPAR motif from the DB by it's JASPAR matrix ID. 129 | 130 | Example id 'MA0001.1'. 131 | 132 | Arguments: 133 | - id - JASPAR matrix ID. This may be a fully specified ID including 134 | the version number (e.g. MA0049.2) or just the base ID (e.g. 135 | MA0049). If only a base ID is provided, the latest version is 136 | returned. 137 | 138 | Returns: 139 | - A Bio.motifs.jaspar.Motif object 140 | 141 | **NOTE:** The perl TFBS module allows you to specify the type of matrix 142 | to return (PFM, PWM, ICM) but matrices are always stored in JASPAR as 143 | PFMs so this does not really belong here. Once a PFM is fetched the 144 | pwm() and pssm() methods can be called to return the normalized and 145 | log-odds matrices. 146 | 147 | """ 148 | # separate stable ID and version number 149 | (base_id, version) = jaspar.split_jaspar_id(id) 150 | if not version: 151 | # if ID contains no version portion, fetch the latest version 152 | version = self._fetch_latest_version(base_id) 153 | 154 | # fetch internal JASPAR matrix ID - also a check for validity 155 | int_id = None 156 | if version: 157 | int_id = self._fetch_internal_id(base_id, version) 158 | 159 | # fetch JASPAR motif using internal ID 160 | motif = None 161 | if int_id: 162 | motif = self._fetch_motif_by_internal_id(int_id) 163 | 164 | return motif 165 | 166 | def fetch_motifs_by_name(self, name): 167 | """Fetch a list of JASPAR motifs from a JASPAR DB by the given TF name(s). 168 | 169 | Arguments: 170 | name - a single name or list of names 171 | Returns: 172 | A list of Bio.motifs.jaspar.Motif objects 173 | 174 | Notes: 175 | Names are not guaranteed to be unique. There may be more than one 176 | motif with the same name. Therefore even if name specifies a single 177 | name, a list of motifs is returned. This just calls 178 | self.fetch_motifs(collection = None, tf_name = name). 179 | 180 | This behaviour is different from the TFBS perl module's 181 | get_Matrix_by_name() method which always returns a single matrix, 182 | issuing a warning message and returning the first matrix retrieved 183 | in the case where multiple matrices have the same name. 184 | 185 | """ 186 | return self.fetch_motifs(collection=None, tf_name=name) 187 | 188 | def fetch_motifs( 189 | self, collection=JASPAR_DFLT_COLLECTION, tf_name=None, tf_class=None, 190 | tf_family=None, matrix_id=None, tax_group=None, species=None, 191 | pazar_id=None, data_type=None, medline=None, min_ic=0, min_length=0, 192 | min_sites=0, all=False, all_versions=False 193 | ): 194 | """Fetch jaspar.Record (list) of motifs using selection criteria. 195 | 196 | Arguments:: 197 | 198 | Except where obvious, all selection criteria arguments may be 199 | specified as a single value or a list of values. Motifs must 200 | meet ALL the specified selection criteria to be returned with 201 | the precedent exceptions noted below. 202 | 203 | all - Takes precedent of all other selection criteria. 204 | Every motif is returned. If 'all_versions' is also 205 | specified, all versions of every motif are returned, 206 | otherwise just the latest version of every motif is 207 | returned. 208 | matrix_id - Takes precedence over all other selection criteria 209 | except 'all'. Only motifs with the given JASPAR 210 | matrix ID(s) are returned. A matrix ID may be 211 | specified as just a base ID or full JASPAR IDs 212 | including version number. If only a base ID is 213 | provided for specific motif(s), then just the latest 214 | version of those motif(s) are returned unless 215 | 'all_versions' is also specified. 216 | collection - Only motifs from the specified JASPAR collection(s) 217 | are returned. NOTE - if not specified, the collection 218 | defaults to CORE for all other selection criteria 219 | except 'all' and 'matrix_id'. To apply the other 220 | selection criteria across all JASPAR collections, 221 | explicitly set collection=None. 222 | tf_name - Only motifs with the given name(s) are returned. 223 | tf_class - Only motifs of the given TF class(es) are returned. 224 | tf_family - Only motifs from the given TF families are returned. 225 | tax_group - Only motifs belonging to the given taxonomic 226 | supergroups are returned (e.g. 'vertebrates', 227 | 'insects', 'nematodes' etc.) 228 | species - Only motifs derived from the given species are 229 | returned. Species are specified as taxonomy IDs. 230 | data_type - Only motifs generated with the given data type (e.g. 231 | ('ChIP-seq', 'PBM', 'SELEX' etc.) are returned. 232 | NOTE - must match exactly as stored in the database. 233 | pazar_id - Only motifs with the given PAZAR TF ID are returned. 234 | medline - Only motifs with the given medline (PubmMed IDs) are 235 | returned. 236 | min_ic - Only motifs whose profile matrices have at least this 237 | information content (specificty) are returned. 238 | min_length - Only motifs whose profiles are of at least this 239 | length are returned. 240 | min_sites - Only motifs compiled from at least these many binding 241 | sites are returned. 242 | all_versions- Unless specified, just the latest version of motifs 243 | determined by the other selection criteria are 244 | returned. Otherwise all versions of the selected 245 | motifs are returned. 246 | 247 | Returns: 248 | - A Bio.motifs.jaspar.Record (list) of motifs. 249 | 250 | """ 251 | # Fetch the internal IDs of the motifs using the criteria provided 252 | int_ids = self._fetch_internal_id_list( 253 | collection=collection, 254 | tf_name=tf_name, 255 | tf_class=tf_class, 256 | tf_family=tf_family, 257 | matrix_id=matrix_id, 258 | tax_group=tax_group, 259 | species=species, 260 | pazar_id=pazar_id, 261 | data_type=data_type, 262 | medline=medline, 263 | all=all, 264 | all_versions=all_versions 265 | ) 266 | 267 | record = jaspar.Record() 268 | 269 | """ 270 | Now further filter motifs returned above based on any specified 271 | matrix specific criteria. 272 | """ 273 | for int_id in int_ids: 274 | motif = self._fetch_motif_by_internal_id(int_id) 275 | 276 | # Filter motifs to those with matrix IC greater than min_ic 277 | if min_ic: 278 | if motif.pssm.mean() < min_ic: 279 | continue 280 | 281 | # Filter motifs to those with minimum length of min_length 282 | if min_length: 283 | if motif.length < min_length: 284 | continue 285 | 286 | # XXX We could also supply a max_length filter. 287 | 288 | """ 289 | Filter motifs to those composed of at least this many sites. 290 | The perl TFBS module assumes column sums may be different but 291 | this should be strictly enforced here we will ignore this and 292 | just use the first column sum. 293 | """ 294 | if min_sites: 295 | num_sites = sum( 296 | motif.counts[nt][0] for nt in motif.alphabet.letters 297 | ) 298 | if num_sites < min_sites: 299 | continue 300 | 301 | record.append(motif) 302 | 303 | return record 304 | 305 | def _fetch_latest_version(self, base_id): 306 | """Get the latest version number for the given base_id (PRIVATE).""" 307 | cur = self.conn.cursor() 308 | cur.execute("select VERSION from MATRIX where BASE_id = ? order by VERSION desc limit 1", (base_id,)) 309 | 310 | row = cur.fetchone() 311 | 312 | latest = None 313 | if row: 314 | latest = row[0] 315 | else: 316 | warnings.warn( 317 | "Failed to fetch latest version number for JASPAR motif" 318 | f" with base ID '{base_id}'. No JASPAR motif with this" 319 | " base ID appears to exist in the database.", 320 | BiopythonWarning) 321 | 322 | return latest 323 | 324 | def _fetch_internal_id(self, base_id, version): 325 | """Fetch the internal id for a base id + version (PRIVATE). 326 | 327 | Also checks if this combo exists or not. 328 | """ 329 | cur = self.conn.cursor() 330 | cur.execute("select id from MATRIX where BASE_id = ? and VERSION = ? COLLATE NOCASE", (base_id, version)) 331 | 332 | row = cur.fetchone() 333 | 334 | int_id = None 335 | if row: 336 | int_id = row[0] 337 | else: 338 | warnings.warn( 339 | "Failed to fetch internal database ID for JASPAR motif" 340 | f" with matrix ID '{base_id}.{version}'. No JASPAR motif" 341 | " with this matrix ID appears to exist.", 342 | BiopythonWarning) 343 | 344 | return int_id 345 | 346 | def _fetch_motif_by_internal_id(self, int_id): 347 | """Fetch basic motif information (PRIVATE).""" 348 | cur = self.conn.cursor() 349 | cur.execute("SELECT BASE_ID, VERSION, COLLECTION, NAME FROM MATRIX WHERE ID = ? COLLATE NOCASE", (int_id,)) 350 | 351 | row = cur.fetchone() 352 | 353 | # This should never happen as it is an internal method. If it does 354 | # we should probably raise an exception 355 | if not row: 356 | warnings.warn( 357 | f"Could not fetch JASPAR motif with internal ID = {int_id}", 358 | BiopythonWarning) 359 | return None 360 | 361 | base_id = row[0] 362 | version = row[1] 363 | collection = row[2] 364 | name = row[3] 365 | 366 | matrix_id = "".join([base_id, ".", str(version)]) 367 | 368 | # fetch the counts matrix 369 | counts = self._fetch_counts_matrix(int_id) 370 | 371 | # Create new JASPAR motif 372 | motif = jaspar.Motif( 373 | matrix_id, name, collection=collection, counts=counts 374 | ) 375 | 376 | # fetch species 377 | cur.execute("select TAX_ID from MATRIX_SPECIES where id = ?", (int_id,)) 378 | tax_ids = [] 379 | rows = cur.fetchall() 380 | for row in rows: 381 | tax_ids.append(row[0]) 382 | 383 | # Many JASPAR motifs (especially those not in the CORE collection) 384 | # do not have taxonomy IDs. So this warning would get annoying. 385 | # if not tax_ids: 386 | # warnings.warn("Could not fetch any taxonomy IDs for JASPAR motif" 387 | # " {0}".format(motif.matrix_id), BiopythonWarning) 388 | 389 | motif.species = tax_ids 390 | 391 | # fetch protein accession numbers 392 | cur.execute("select ACC FROM MATRIX_PROTEIN where id = ? COLLATE NOCASE", (int_id,)) 393 | accs = [] 394 | rows = cur.fetchall() 395 | for row in rows: 396 | accs.append(row[0]) 397 | 398 | # Similarly as for taxonomy IDs, it would get annoying to print 399 | # warnings for JASPAR motifs which do not have accession numbers. 400 | 401 | motif.acc = accs 402 | 403 | # fetch remaining annotation as tags from the ANNOTATION table 404 | cur.execute("select TAG, VAL from MATRIX_ANNOTATION where id = ?", (int_id,)) 405 | 406 | #Since jaspar 2018 tf_family and tf_class are return as array 407 | tf_family = [] 408 | tf_class = [] 409 | rows = cur.fetchall() 410 | for row in rows: 411 | attr = row[0] 412 | val = row[1] 413 | if attr == "class": 414 | tf_class.append(val) 415 | elif attr == "family": 416 | tf_family.append(val) 417 | elif attr == "tax_group": 418 | motif.tax_group = val 419 | elif attr == "type": 420 | motif.data_type = val 421 | elif attr == "pazar_tf_id": 422 | motif.pazar_id = val 423 | elif attr == "medline": 424 | motif.medline = val 425 | elif attr == "comment": 426 | motif.comment = val 427 | else: 428 | """ 429 | TODO If we were to implement additional abitrary tags 430 | motif.tag(attr, val) 431 | """ 432 | pass 433 | 434 | motif.tf_family = tf_family 435 | motif.tf_class = tf_class 436 | 437 | return motif 438 | 439 | def _fetch_counts_matrix(self, int_id): 440 | """Fetch the counts matrix from the JASPAR DB by the internal ID (PRIVATE). 441 | 442 | Returns a Bio.motifs.matrix.GenericPositionMatrix 443 | """ 444 | counts = {} 445 | cur = self.conn.cursor() 446 | 447 | for base in "ACGT": 448 | base_counts = [] 449 | 450 | cur.execute("SELECT val from MATRIX_DATA WHERE ID = ? AND row = ? ORDER BY col", (int_id, base)) 451 | 452 | rows = cur.fetchall() 453 | for row in rows: 454 | base_counts.append(row[0]) 455 | 456 | counts[base] = [float(x) for x in base_counts] 457 | 458 | return GenericPositionMatrix("ACGT", counts) 459 | 460 | def _fetch_internal_id_list( 461 | self, collection=JASPAR_DFLT_COLLECTION, tf_name=None, tf_class=None, 462 | tf_family=None, matrix_id=None, tax_group=None, species=None, 463 | pazar_id=None, data_type=None, medline=None, all=False, 464 | all_versions=False 465 | ): 466 | """Fetch list of internal JASPAR motif IDs. 467 | 468 | Fetch a list of internal JASPAR motif IDs based on various passed 469 | parameters which may then be used to fetch the rest of the motif data. 470 | 471 | Caller: 472 | fetch_motifs() 473 | 474 | Arguments: 475 | See arguments sections of fetch_motifs() 476 | 477 | Returns: 478 | A list of internal JASPAR motif IDs which match the given 479 | selection criteria arguments. 480 | 481 | 482 | Build an SQL query based on the selection arguments provided. 483 | 484 | 1: First add table joins and sub-clauses for criteria corresponding to 485 | named fields from the MATRIX and MATRIX_SPECIES tables such as 486 | collection, matrix ID, name, species etc. 487 | 488 | 2: Then add joins/sub-clauses for tag/value parameters from the 489 | MATRIX_ANNOTATION table. 490 | 491 | For the surviving matrices, the responsibility to do matrix-based 492 | feature filtering such as ic, number of sites etc, fall on the 493 | calling fetch_motifs() method. 494 | 495 | """ 496 | int_ids = [] 497 | 498 | cur = self.conn.cursor() 499 | 500 | """ 501 | Special case 1: fetch ALL motifs. Highest priority. 502 | Ignore all other selection arguments. 503 | """ 504 | if all: 505 | cur.execute("select ID from MATRIX") 506 | rows = cur.fetchall() 507 | 508 | for row in rows: 509 | int_ids.append(row[0]) 510 | 511 | return int_ids 512 | 513 | """ 514 | Special case 2: fetch specific motifs by their JASPAR IDs. This 515 | has higher priority than any other except the above 'all' case. 516 | Ignore all other selection arguments. 517 | """ 518 | if matrix_id: 519 | """ 520 | These might be either stable IDs or stable_ID.version. 521 | If just stable ID and if all_versions == 1, return all versions, 522 | otherwise just the latest 523 | """ 524 | if all_versions: 525 | for id in matrix_id: 526 | # ignore vesion here, this is a stupidity filter 527 | (base_id, version) = jaspar.split_jaspar_id(id) 528 | cur.execute("select ID from MATRIX where BASE_ID = ? COLLATE NOCASE", (base_id,)) 529 | 530 | rows = cur.fetchall() 531 | for row in rows: 532 | int_ids.append(row[0]) 533 | else: 534 | # only the lastest version, or the requested version 535 | for id in matrix_id: 536 | (base_id, version) = jaspar.split_jaspar_id(id) 537 | 538 | if not version: 539 | version = self._fetch_latest_version(base_id) 540 | 541 | int_id = None 542 | if version: 543 | int_id = self._fetch_internal_id(base_id, version) 544 | 545 | if int_id: 546 | int_ids.append(int_id) 547 | 548 | return int_ids 549 | 550 | tables = ["MATRIX m"] 551 | where_clauses = [] 552 | 553 | # Select by MATRIX.COLLECTION 554 | if collection: 555 | if isinstance(collection, list): 556 | # Multiple collections passed in as a list 557 | clause = "m.COLLECTION in ('" 558 | clause = "".join([clause, "','".join([c.upper() for c in collection])]) 559 | clause = "".join([clause, "')"]) 560 | else: 561 | # A single collection - typical usage 562 | clause = "m.COLLECTION = '%s'" % collection.upper() 563 | ##SQLite is case sensitive therefore COLLATE NOCASE is set. 564 | #clause = "%s COLLATE NOCASE" % clause 565 | where_clauses.append(clause) 566 | 567 | # Select by MATRIX.NAME 568 | if tf_name: 569 | if isinstance(tf_name, list): 570 | # Multiple names passed in as a list 571 | clause = "m.NAME in ('" 572 | clause = "".join([clause, "','".join(tf_name)]) 573 | clause = "".join([clause, "')"]) 574 | else: 575 | # A single name 576 | clause = "m.NAME = '%s'" % tf_name 577 | ##SQLite is case sensitive therefore COLLATE NOCASE is set. 578 | #clause = "%s COLLATE NOCASE" % clause 579 | where_clauses.append(clause) 580 | 581 | # Select by MATRIX_SPECIES.TAX_ID 582 | if species: 583 | tables.append("MATRIX_SPECIES ms") 584 | where_clauses.append("m.ID = ms.ID") 585 | 586 | """ 587 | NOTE: species are numeric taxonomy IDs but stored as varchars 588 | in the DB. 589 | """ 590 | if isinstance(species, list): 591 | # Multiple tax IDs passed in as a list 592 | clause = "ms.TAX_ID in ('" 593 | clause = "".join([clause, "','".join(str(s) for s in species)]) 594 | clause = "".join([clause, "')"]) 595 | else: 596 | # A single tax ID 597 | clause = "ms.TAX_ID = '%s'" % str(species) 598 | ##SQLite is case sensitive therefore COLLATE NOCASE is set. 599 | #clause = "%s COLLATE NOCASE" % clause 600 | where_clauses.append(clause) 601 | 602 | """ 603 | Tag based selection from MATRIX_ANNOTATION 604 | Differs from perl TFBS module in that the matrix class explicitly 605 | has a tag attribute corresponding to the tags in the database. This 606 | provides tremendous flexibility in adding new tags to the DB and 607 | being able to select based on those tags with out adding new code. 608 | In the JASPAR Motif class we have elected to use specific attributes 609 | for the most commonly used tags and here correspondingly only allow 610 | selection on these attributes. 611 | 612 | The attributes corresponding to the tags for which selection is 613 | provided are: 614 | 615 | Attribute Tag 616 | tf_class class 617 | tf_family family 618 | pazar_id pazar_tf_id 619 | medline medline 620 | data_type type 621 | tax_group tax_group 622 | """ 623 | 624 | # Select by TF class(es) (MATRIX_ANNOTATION.TAG="class") 625 | if tf_class: 626 | tables.append("MATRIX_ANNOTATION ma1") 627 | where_clauses.append("m.ID = ma1.ID") 628 | 629 | clause = "ma1.TAG = 'class'" 630 | if isinstance(tf_class, list): 631 | # A list of TF classes 632 | clause = "".join([clause, " and ma1.VAL in ('"]) 633 | clause = "".join([clause, "','".join(tf_class)]) 634 | clause = "".join([clause, "')"]) 635 | else: 636 | # A single TF class 637 | clause = "".join([clause, " and ma1.VAL = '%s' " % tf_class]) 638 | ##SQLite is case sensitive therefore COLLATE NOCASE is set. 639 | #clause = "%s COLLATE NOCASE" % clause 640 | where_clauses.append(clause) 641 | 642 | # Select by TF families (MATRIX_ANNOTATION.TAG="family") 643 | if tf_family: 644 | tables.append("MATRIX_ANNOTATION ma2") 645 | where_clauses.append("m.ID = ma2.ID") 646 | 647 | clause = "ma2.TAG = 'family'" 648 | if isinstance(tf_family, list): 649 | # A list of TF families 650 | clause = "".join([clause, " and ma2.VAL in ('"]) 651 | clause = "".join([clause, "','".join(tf_family)]) 652 | clause = "".join([clause, "')"]) 653 | else: 654 | # A single TF family 655 | clause = "".join([clause, " and ma2.VAL = '%s' " % tf_family]) 656 | ##SQLite is case sensitive therefore COLLATE NOCASE is set. 657 | #clause = "%s COLLATE NOCASE" % clause 658 | where_clauses.append(clause) 659 | 660 | # Select by PAZAR TF ID(s) (MATRIX_ANNOTATION.TAG="pazar_tf_id") 661 | if pazar_id: 662 | tables.append("MATRIX_ANNOTATION ma3") 663 | where_clauses.append("m.ID = ma3.ID") 664 | 665 | clause = "ma3.TAG = 'pazar_tf_id'" 666 | if isinstance(pazar_id, list): 667 | # A list of PAZAR IDs 668 | clause = "".join([clause, " and ma3.VAL in ('"]) 669 | clause = "".join([clause, "','".join(pazar_id)]) 670 | clause = "".join([clause, "')"]) 671 | else: 672 | # A single PAZAR ID 673 | clause = "".join([" and ma3.VAL = '%s' " % pazar_id]) 674 | ##SQLite is case sensitive therefore COLLATE NOCASE is set. 675 | #clause = "%s COLLATE NOCASE" % clause 676 | where_clauses.append(clause) 677 | 678 | # Select by PubMed ID(s) (MATRIX_ANNOTATION.TAG="medline") 679 | if medline: 680 | tables.append("MATRIX_ANNOTATION ma4") 681 | where_clauses.append("m.ID = ma4.ID") 682 | 683 | clause = "ma4.TAG = 'medline'" 684 | if isinstance(medline, list): 685 | # A list of PubMed IDs 686 | clause = "".join([clause, " and ma4.VAL in ('"]) 687 | clause = "".join([clause, "','".join(medline)]) 688 | clause = "".join([clause, "')"]) 689 | else: 690 | # A single PubMed ID 691 | clause = "".join([" and ma4.VAL = '%s' " % medline]) 692 | ##SQLite is case sensitive therefore COLLATE NOCASE is set. 693 | #clause = "%s COLLATE NOCASE" % clause 694 | where_clauses.append(clause) 695 | 696 | # Select by data type(s) used to compile the matrix 697 | # (MATRIX_ANNOTATION.TAG="type") 698 | if data_type: 699 | tables.append("MATRIX_ANNOTATION ma5") 700 | where_clauses.append("m.ID = ma5.ID") 701 | 702 | clause = "ma5.TAG = 'type'" 703 | if isinstance(data_type, list): 704 | # A list of data types 705 | clause = "".join([clause, " and ma5.VAL in ('"]) 706 | clause = "".join([clause, "','".join(data_type)]) 707 | clause = "".join([clause, "')"]) 708 | else: 709 | # A single data type 710 | clause = "".join([" and ma5.VAL = '%s' " % data_type]) 711 | ##SQLite is case sensitive therefore COLLATE NOCASE is set. 712 | #clause = "%s COLLATE NOCASE" % clause 713 | where_clauses.append(clause) 714 | 715 | # Select by taxonomic supergroup(s) (MATRIX_ANNOTATION.TAG="tax_group") 716 | if tax_group: 717 | tables.append("MATRIX_ANNOTATION ma6") 718 | where_clauses.append("m.ID = ma6.ID") 719 | 720 | clause = "ma6.TAG = 'tax_group'" 721 | if isinstance(tax_group, list): 722 | # A list of tax IDs 723 | clause = "".join([clause, " and ma6.VAL in ('"]) 724 | clause = "".join([clause, "','".join([tg.lower() for tg in tax_group])]) 725 | clause = "".join([clause, "')"]) 726 | else: 727 | # A single tax ID 728 | clause = "".join([clause, " and ma6.VAL = '%s' " % tax_group.lower()]) 729 | ##SQLite is case sensitive therefore COLLATE NOCASE is set. 730 | #clause = "%s COLLATE NOCASE" % clause 731 | where_clauses.append(clause) 732 | 733 | sql = "".join(["select distinct(m.ID) from ", ", ".join(tables)]) 734 | 735 | if where_clauses: 736 | sql = "".join([sql, " where ", " and ".join(where_clauses)]) 737 | 738 | ### SQLite is casesensitivitive 739 | sql = "%s COLLATE NOCASE" % sql 740 | #print(sql) 741 | 742 | cur.execute(sql) 743 | rows = cur.fetchall() 744 | 745 | for row in rows: 746 | id = row[0] 747 | if all_versions: 748 | int_ids.append(id) 749 | else: 750 | # is the latest version? 751 | if self._is_latest_version(id): 752 | int_ids.append(id) 753 | 754 | if len(int_ids) < 1: 755 | warnings.warn("Zero motifs returned with current select critera", 756 | BiopythonWarning) 757 | 758 | return int_ids 759 | 760 | def _is_latest_version(self, int_id): 761 | """Check if the internal ID represents the latest JASPAR matrix (PRIVATE). 762 | 763 | Does this internal ID represent the latest version of the JASPAR 764 | matrix (collapse on base ids) 765 | """ 766 | cur = self.conn.cursor() 767 | 768 | cur.execute("select count(*) from MATRIX where " 769 | "BASE_ID = (select BASE_ID from MATRIX where ID = ?) " 770 | "and VERSION > (select VERSION from MATRIX where ID = ?) COLLATE NOCASE", 771 | (int_id, int_id)) 772 | 773 | row = cur.fetchone() 774 | 775 | count = row[0] 776 | 777 | if count == 0: 778 | # no matrices with higher version ID and same base id 779 | return True 780 | 781 | return False 782 | 783 | class GenericPositionMatrix(dict): 784 | """Base class for the support of position matrix operations.""" 785 | 786 | def __init__(self, alphabet, values): 787 | """Initialize the class.""" 788 | self.length = None 789 | for letter in alphabet: 790 | if self.length is None: 791 | self.length = len(values[letter]) 792 | elif self.length != len(values[letter]): 793 | raise Exception("data has inconsistent lengths") 794 | self[letter] = list(values[letter]) 795 | self.alphabet = alphabet 796 | 797 | def __str__(self): 798 | """Return a string containing nucleotides and counts of the alphabet in the Matrix.""" 799 | words = ["%6d" % i for i in range(self.length)] 800 | line = " " + " ".join(words) 801 | lines = [line] 802 | for letter in self.alphabet: 803 | words = ["%6.2f" % value for value in self[letter]] 804 | line = "%c: " % letter + " ".join(words) 805 | lines.append(line) 806 | text = "\n".join(lines) + "\n" 807 | return text 808 | 809 | def __getitem__(self, key): 810 | """Return the position matrix of index key.""" 811 | if isinstance(key, tuple): 812 | if len(key) == 2: 813 | key1, key2 = key 814 | if isinstance(key1, slice): 815 | start1, stop1, stride1 = key1.indices(len(self.alphabet)) 816 | indices1 = range(start1, stop1, stride1) 817 | letters1 = [self.alphabet[i] for i in indices1] 818 | dim1 = 2 819 | elif isinstance(key1, int): 820 | letter1 = self.alphabet[key1] 821 | dim1 = 1 822 | elif isinstance(key1, tuple): 823 | letters1 = [self.alphabet[i] for i in key1] 824 | dim1 = 2 825 | elif isinstance(key1, str): 826 | if len(key1) == 1: 827 | letter1 = key1 828 | dim1 = 1 829 | else: 830 | raise KeyError(key1) 831 | else: 832 | raise KeyError("Cannot understand key %s", str(key1)) 833 | if isinstance(key2, slice): 834 | start2, stop2, stride2 = key2.indices(self.length) 835 | indices2 = range(start2, stop2, stride2) 836 | dim2 = 2 837 | elif isinstance(key2, int): 838 | index2 = key2 839 | dim2 = 1 840 | else: 841 | raise KeyError("Cannot understand key %s", str(key2)) 842 | if dim1 == 1 and dim2 == 1: 843 | return dict.__getitem__(self, letter1)[index2] 844 | elif dim1 == 1 and dim2 == 2: 845 | values = dict.__getitem__(self, letter1) 846 | return tuple(values[index2] for index2 in indices2) 847 | elif dim1 == 2 and dim2 == 1: 848 | d = {} 849 | for letter1 in letters1: 850 | d[letter1] = dict.__getitem__(self, letter1)[index2] 851 | return d 852 | else: 853 | d = {} 854 | for letter1 in letters1: 855 | values = dict.__getitem__(self, letter1) 856 | d[letter1] = [values[_] for _ in indices2] 857 | if sorted(letters1) == self.alphabet: 858 | return self.__class__(self.alphabet, d) 859 | else: 860 | return d 861 | elif len(key) == 1: 862 | key = key[0] 863 | else: 864 | raise KeyError("keys should be 1- or 2-dimensional") 865 | if isinstance(key, slice): 866 | start, stop, stride = key.indices(len(self.alphabet)) 867 | indices = range(start, stop, stride) 868 | letters = [self.alphabet[i] for i in indices] 869 | dim = 2 870 | elif isinstance(key, int): 871 | letter = self.alphabet[key] 872 | dim = 1 873 | elif isinstance(key, tuple): 874 | letters = [self.alphabet[i] for i in key] 875 | dim = 2 876 | elif isinstance(key, str): 877 | if len(key) == 1: 878 | letter = key 879 | dim = 1 880 | else: 881 | raise KeyError(key) 882 | else: 883 | raise KeyError("Cannot understand key %s", str(key)) 884 | if dim == 1: 885 | return dict.__getitem__(self, letter) 886 | elif dim == 2: 887 | d = {} 888 | for letter in letters: 889 | d[letter] = dict.__getitem__(self, letter) 890 | return d 891 | else: 892 | raise RuntimeError("Should not get here") 893 | 894 | @property 895 | def consensus(self): 896 | """Return the consensus sequence.""" 897 | sequence = "" 898 | for i in range(self.length): 899 | try: 900 | maximum = float("-inf") 901 | except ValueError: 902 | # On Python 2.5 or older that was handled in C code, 903 | # and failed on Windows XP 32bit 904 | maximum = - 1E400 905 | for letter in self.alphabet: 906 | count = self[letter][i] 907 | if count > maximum: 908 | maximum = count 909 | sequence_letter = letter 910 | sequence += sequence_letter 911 | return Seq(sequence) 912 | 913 | @property 914 | def anticonsensus(self): 915 | """Return the anticonsensus sequence.""" 916 | sequence = "" 917 | for i in range(self.length): 918 | try: 919 | minimum = float("inf") 920 | except ValueError: 921 | # On Python 2.5 or older that was handled in C code, 922 | # and failed on Windows XP 32bit 923 | minimum = 1E400 924 | for letter in self.alphabet: 925 | count = self[letter][i] 926 | if count < minimum: 927 | minimum = count 928 | sequence_letter = letter 929 | sequence += sequence_letter 930 | return Seq(sequence) 931 | 932 | @property 933 | def degenerate_consensus(self): 934 | """Return the degenerate consensus sequence.""" 935 | # Following the rules adapted from 936 | # D. R. Cavener: "Comparison of the consensus sequence flanking 937 | # translational start sites in Drosophila and vertebrates." 938 | # Nucleic Acids Research 15(4): 1353-1361. (1987). 939 | # The same rules are used by TRANSFAC. 940 | degenerate_nucleotide = { 941 | "A": "A", 942 | "C": "C", 943 | "G": "G", 944 | "T": "T", 945 | "AC": "M", 946 | "AG": "R", 947 | "AT": "W", 948 | "CG": "S", 949 | "CT": "Y", 950 | "GT": "K", 951 | "ACG": "V", 952 | "ACT": "H", 953 | "AGT": "D", 954 | "CGT": "B", 955 | "ACGT": "N", 956 | } 957 | sequence = "" 958 | for i in range(self.length): 959 | def get(nucleotide): 960 | return self[nucleotide][i] 961 | nucleotides = sorted(self, key=get, reverse=True) 962 | counts = [self[c][i] for c in nucleotides] 963 | # Follow the Cavener rules: 964 | if counts[0] > sum(counts[1:]) and counts[0] > 2 * counts[1]: 965 | key = nucleotides[0] 966 | elif 4 * sum(counts[:2]) > 3 * sum(counts): 967 | key = "".join(sorted(nucleotides[:2])) 968 | elif counts[3] == 0: 969 | key = "".join(sorted(nucleotides[:3])) 970 | else: 971 | key = "ACGT" 972 | nucleotide = degenerate_nucleotide.get(key, key) 973 | sequence += nucleotide 974 | return Seq(sequence) 975 | 976 | @property 977 | def gc_content(self): 978 | """Compute the fraction GC content.""" 979 | alphabet = self.alphabet 980 | gc_total = 0.0 981 | total = 0.0 982 | for i in range(self.length): 983 | for letter in alphabet: 984 | if letter in "CG": 985 | gc_total += self[letter][i] 986 | total += self[letter][i] 987 | return gc_total / total 988 | 989 | def reverse_complement(self): 990 | """Compute reverse complement.""" 991 | values = {} 992 | if self.alphabet == "ACGU": 993 | values["A"] = self["U"][::-1] 994 | values["U"] = self["A"][::-1] 995 | else: 996 | values["A"] = self["T"][::-1] 997 | values["T"] = self["A"][::-1] 998 | values["G"] = self["C"][::-1] 999 | values["C"] = self["G"][::-1] 1000 | alphabet = self.alphabet 1001 | return self.__class__(alphabet, values) 1002 | -------------------------------------------------------------------------------- /pyjaspar/data/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asntech/pyjaspar/eaa9e8137406918a9a22eaeffdf979d8c98d57b0/pyjaspar/data/.DS_Store -------------------------------------------------------------------------------- /pyjaspar/data/JASPAR2014.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asntech/pyjaspar/eaa9e8137406918a9a22eaeffdf979d8c98d57b0/pyjaspar/data/JASPAR2014.sqlite -------------------------------------------------------------------------------- /pyjaspar/data/JASPAR2016.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asntech/pyjaspar/eaa9e8137406918a9a22eaeffdf979d8c98d57b0/pyjaspar/data/JASPAR2016.sqlite -------------------------------------------------------------------------------- /pyjaspar/data/JASPAR2018.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asntech/pyjaspar/eaa9e8137406918a9a22eaeffdf979d8c98d57b0/pyjaspar/data/JASPAR2018.sqlite -------------------------------------------------------------------------------- /pyjaspar/data/JASPAR2020.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asntech/pyjaspar/eaa9e8137406918a9a22eaeffdf979d8c98d57b0/pyjaspar/data/JASPAR2020.sqlite -------------------------------------------------------------------------------- /pyjaspar/data/JASPAR2022.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asntech/pyjaspar/eaa9e8137406918a9a22eaeffdf979d8c98d57b0/pyjaspar/data/JASPAR2022.sqlite -------------------------------------------------------------------------------- /pyjaspar/data/JASPAR2024.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asntech/pyjaspar/eaa9e8137406918a9a22eaeffdf979d8c98d57b0/pyjaspar/data/JASPAR2024.sqlite -------------------------------------------------------------------------------- /pyjaspar/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asntech/pyjaspar/eaa9e8137406918a9a22eaeffdf979d8c98d57b0/pyjaspar/data/__init__.py -------------------------------------------------------------------------------- /pyjaspar/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | def get_jaspardb_path(fn,sub_dir=None): 4 | """ 5 | Return a sqlite file from the pyjaspar data directory. 6 | This code is adapted from https://github.com/daler/pybedtools 7 | 8 | """ 9 | #print(data_dir()) 10 | #sys.exit() 11 | if sub_dir: 12 | fn = os.path.join(data_dir(), sub_dir, fn) 13 | else: 14 | fn = os.path.join(data_dir(), fn) 15 | #print(fn) 16 | if not os.path.exists(fn): 17 | raise ValueError("%s does not exist" % fn) 18 | return fn 19 | 20 | 21 | def data_dir(): 22 | """ 23 | Returns the data directory that contains sqlite files. 24 | """ 25 | #data_path = os.path.dirname(intervene.__file__) 26 | #data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'example_data') 27 | #print(data_path) 28 | return os.path.join(os.path.dirname(__file__), 'data') 29 | -------------------------------------------------------------------------------- /pyjaspar_notebook.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# pyJASPAR Notebook" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Once you have installed pyJASPAR, you can load the module and connect to the latest release of JASPAR." 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 22, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "from pyjaspar import jaspardb" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "Connect to the version of JASPAR you're interested in. This will return jaspardb class object.\n", 31 | "For example here we're getting the JASPAR2020." 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 23, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "jdb_obj = jaspardb(release='JASPAR2024')" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "You can also check JASPAR version you are connected to using:" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 24, 53 | "metadata": { 54 | "scrolled": true 55 | }, 56 | "outputs": [ 57 | { 58 | "name": "stdout", 59 | "output_type": "stream", 60 | "text": [ 61 | "JASPAR2024\n" 62 | ] 63 | } 64 | ], 65 | "source": [ 66 | "print(jdb_obj.release)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "By default it is set to latest release/version of JASPAR database. For example." 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 25, 79 | "metadata": {}, 80 | "outputs": [ 81 | { 82 | "name": "stdout", 83 | "output_type": "stream", 84 | "text": [ 85 | "JASPAR2024\n" 86 | ] 87 | } 88 | ], 89 | "source": [ 90 | "jdb_obj = jaspardb()\n", 91 | "print(jdb_obj.release)" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "### Get available releases\n", 99 | "You can find the available releases/version of JASPAR using." 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 26, 105 | "metadata": {}, 106 | "outputs": [ 107 | { 108 | "name": "stdout", 109 | "output_type": "stream", 110 | "text": [ 111 | "['JASPAR2024', 'JASPAR2022', 'JASPAR2020', 'JASPAR2018', 'JASPAR2016', 'JASPAR2014']\n" 112 | ] 113 | } 114 | ], 115 | "source": [ 116 | "print(jdb_obj.get_releases())" 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": {}, 122 | "source": [ 123 | "### Get motif by using JASPAR ID\n", 124 | "If you want to get the motif details for a specific TF using the JASPAR ID. If you skip the version of motif, it will return the latest version. " 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 27, 130 | "metadata": {}, 131 | "outputs": [], 132 | "source": [ 133 | "motif = jdb_obj.fetch_motif_by_id('MA0006.1')" 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": {}, 139 | "source": [ 140 | "Printing the motif will all the associated meta-information stored in the JASPAR database cluding the matric counts." 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 28, 146 | "metadata": {}, 147 | "outputs": [ 148 | { 149 | "name": "stdout", 150 | "output_type": "stream", 151 | "text": [ 152 | "TF name\tAhr::Arnt\n", 153 | "Matrix ID\tMA0006.1\n", 154 | "Collection\tCORE\n", 155 | "TF class\t['Basic helix-loop-helix factors (bHLH)', 'Basic helix-loop-helix factors (bHLH)']\n", 156 | "TF family\t['PAS domain factors', 'PAS domain factors']\n", 157 | "Species\t10090\n", 158 | "Taxonomic group\tvertebrates\n", 159 | "Accession\t['P30561', 'P53762']\n", 160 | "Data type used\tSELEX\n", 161 | "Medline\t7592839\n", 162 | "Comments\tdimer\n", 163 | "Matrix:\n", 164 | " 0 1 2 3 4 5\n", 165 | "A: 3.00 0.00 0.00 0.00 0.00 0.00\n", 166 | "C: 8.00 0.00 23.00 0.00 0.00 0.00\n", 167 | "G: 2.00 23.00 0.00 23.00 0.00 24.00\n", 168 | "T: 11.00 1.00 1.00 1.00 24.00 0.00\n", 169 | "\n", 170 | "\n", 171 | "\n" 172 | ] 173 | } 174 | ], 175 | "source": [ 176 | "print(motif)" 177 | ] 178 | }, 179 | { 180 | "cell_type": "markdown", 181 | "metadata": {}, 182 | "source": [ 183 | "Get the count matrix using `.counts`" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": 29, 189 | "metadata": { 190 | "scrolled": true 191 | }, 192 | "outputs": [ 193 | { 194 | "name": "stdout", 195 | "output_type": "stream", 196 | "text": [ 197 | "[3.0, 0.0, 0.0, 0.0, 0.0, 0.0]\n" 198 | ] 199 | } 200 | ], 201 | "source": [ 202 | "print(motif.counts['A'])" 203 | ] 204 | }, 205 | { 206 | "cell_type": "markdown", 207 | "metadata": {}, 208 | "source": [ 209 | "### Search motifs by TF name\n", 210 | "You can use the `fetch_motifs_by_name` function to find motifs by TF name. This method returns a list of motifs for the same TF name across taxonomic group. For example, below search will return two CTCF motifs one in vertebrates and another in plants taxon." 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": 12, 216 | "metadata": {}, 217 | "outputs": [], 218 | "source": [ 219 | "motifs = jdb_obj.fetch_motifs_by_name(\"CTCF\")" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": 13, 225 | "metadata": {}, 226 | "outputs": [ 227 | { 228 | "name": "stdout", 229 | "output_type": "stream", 230 | "text": [ 231 | "4\n" 232 | ] 233 | } 234 | ], 235 | "source": [ 236 | "print(len(motifs))" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": 14, 242 | "metadata": {}, 243 | "outputs": [ 244 | { 245 | "name": "stdout", 246 | "output_type": "stream", 247 | "text": [ 248 | "TF name\tCTCF\n", 249 | "Matrix ID\tMA0531.2\n", 250 | "Collection\tCORE\n", 251 | "TF class\t['C2H2 zinc finger factors']\n", 252 | "TF family\t['More than 3 adjacent zinc fingers']\n", 253 | "Species\t7227\n", 254 | "Taxonomic group\tinsects\n", 255 | "Accession\t['Q9VS55']\n", 256 | "Data type used\tChIP-chip\n", 257 | "Medline\t17616980\n", 258 | "Matrix:\n", 259 | " 0 1 2 3 4 5 6 7 8 9\n", 260 | "A: 257.00 1534.00 202.00 987.00 2.00 0.00 2.00 124.00 1.00 79.00\n", 261 | "C: 714.00 1.00 0.00 0.00 4.00 0.00 0.00 1645.00 0.00 1514.00\n", 262 | "G: 87.00 192.00 1700.00 912.00 311.00 1902.00 1652.00 3.00 1807.00 8.00\n", 263 | "T: 844.00 175.00 0.00 3.00 1585.00 0.00 248.00 130.00 94.00 301.00\n", 264 | "\n", 265 | "\n", 266 | "\n", 267 | "TF name\tCTCF\n", 268 | "Matrix ID\tMA0139.2\n", 269 | "Collection\tCORE\n", 270 | "TF class\t['C2H2 zinc finger factors']\n", 271 | "TF family\t['More than 3 adjacent zinc fingers']\n", 272 | "Species\t9606\n", 273 | "Taxonomic group\tvertebrates\n", 274 | "Accession\t['P49711']\n", 275 | "Data type used\tChIP-seq\n", 276 | "Medline\t17512414\n", 277 | "Comments\tTF has several motif variants.\n", 278 | "Matrix:\n", 279 | " 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14\n", 280 | "A: 281.00 56.00 8.00 744.00 40.00 107.00 851.00 5.00 333.00 54.00 12.00 56.00 104.00 372.00 82.00\n", 281 | "C: 49.00 800.00 903.00 13.00 528.00 433.00 11.00 0.00 3.00 12.00 0.00 8.00 733.00 13.00 482.00\n", 282 | "G: 449.00 21.00 0.00 65.00 334.00 48.00 32.00 903.00 566.00 504.00 890.00 775.00 5.00 507.00 307.00\n", 283 | "T: 134.00 36.00 2.00 91.00 11.00 324.00 18.00 3.00 9.00 341.00 8.00 71.00 67.00 17.00 37.00\n", 284 | "\n", 285 | "\n", 286 | "\n", 287 | "TF name\tCTCF\n", 288 | "Matrix ID\tMA1929.2\n", 289 | "Collection\tCORE\n", 290 | "TF class\t['C2H2 zinc finger factors']\n", 291 | "TF family\t['More than 3 adjacent zinc fingers']\n", 292 | "Species\t9606\n", 293 | "Taxonomic group\tvertebrates\n", 294 | "Accession\t['P49711']\n", 295 | "Data type used\tChIP-seq\n", 296 | "Medline\t34326481\n", 297 | "Comments\tTF has several motif variants. Extended motif with zinc finger 8 (5bp)\n", 298 | "Matrix:\n", 299 | " 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30\n", 300 | "A: 905.00 914.00 163.00 287.00 4559.00 1658.00 592.00 1133.00 720.00 775.00 2055.00 1473.00 1639.00 1959.00 1569.00 827.00 1483.00 1649.00 730.00 224.00 4283.00 433.00 1251.00 5212.00 210.00 2494.00 495.00 232.00 347.00 1034.00 2737.00\n", 301 | "C: 3839.00 405.00 132.00 5468.00 627.00 1155.00 986.00 777.00 3973.00 3913.00 1851.00 1915.00 2308.00 1412.00 1431.00 2470.00 1392.00 791.00 5068.00 6321.00 357.00 3551.00 2748.00 244.00 114.00 86.00 254.00 76.00 381.00 4715.00 466.00\n", 302 | "G: 1235.00 319.00 6252.00 241.00 390.00 3322.00 571.00 1773.00 764.00 544.00 1141.00 2079.00 1441.00 1429.00 2192.00 942.00 2269.00 3152.00 518.00 79.00 935.00 2483.00 618.00 787.00 6387.00 4085.00 3487.00 6326.00 5406.00 246.00 3084.00\n", 303 | "T: 787.00 5128.00 219.00 770.00 1190.00 631.00 4617.00 3083.00 1309.00 1534.00 1719.00 1299.00 1378.00 1966.00 1574.00 2527.00 1622.00 1174.00 450.00 142.00 1191.00 299.00 2149.00 523.00 55.00 101.00 2530.00 132.00 632.00 771.00 479.00\n", 304 | "\n", 305 | "\n", 306 | "\n", 307 | "TF name\tCTCF\n", 308 | "Matrix ID\tMA1930.2\n", 309 | "Collection\tCORE\n", 310 | "TF class\t['C2H2 zinc finger factors']\n", 311 | "TF family\t['More than 3 adjacent zinc fingers']\n", 312 | "Species\t9606\n", 313 | "Taxonomic group\tvertebrates\n", 314 | "Accession\t['P49711']\n", 315 | "Data type used\tChIP-seq\n", 316 | "Medline\t34326481\n", 317 | "Comments\tTF has several motif variants. Extended motif with zinc finger 8 (6bp)\n", 318 | "Matrix:\n", 319 | " 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32\n", 320 | "A: 293.00 242.00 41.00 86.00 1986.00 445.00 158.00 346.00 453.00 497.00 714.00 536.00 467.00 664.00 563.00 593.00 452.00 560.00 604.00 178.00 38.00 1906.00 127.00 348.00 2179.00 32.00 1140.00 169.00 62.00 119.00 335.00 1021.00 279.00\n", 321 | "C: 1497.00 107.00 35.00 2129.00 127.00 366.00 278.00 324.00 1333.00 1129.00 794.00 855.00 1138.00 604.00 730.00 902.00 781.00 458.00 300.00 2074.00 2515.00 97.00 1409.00 1016.00 54.00 9.00 14.00 71.00 14.00 138.00 1833.00 164.00 1307.00\n", 322 | "G: 400.00 144.00 2476.00 25.00 155.00 1456.00 179.00 892.00 328.00 419.00 623.00 770.00 561.00 376.00 541.00 494.00 393.00 1041.00 1242.00 155.00 9.00 275.00 961.00 175.00 212.00 2527.00 1399.00 1208.00 2472.00 2117.00 86.00 1161.00 740.00\n", 323 | "T: 386.00 2083.00 24.00 336.00 308.00 309.00 1961.00 1014.00 462.00 531.00 445.00 415.00 410.00 932.00 742.00 587.00 950.00 517.00 430.00 169.00 14.00 298.00 79.00 1037.00 131.00 8.00 23.00 1128.00 28.00 202.00 322.00 230.00 250.00\n", 324 | "\n", 325 | "\n", 326 | "\n" 327 | ] 328 | } 329 | ], 330 | "source": [ 331 | "print(motifs)" 332 | ] 333 | }, 334 | { 335 | "cell_type": "markdown", 336 | "metadata": {}, 337 | "source": [ 338 | "### Search motifs with \n", 339 | "A more commonly used function is `fetch_motifs` helps you to get motifs which match a specified set of criteria.\n", 340 | "You can query the database based on the available meta-information in the database.\n", 341 | "\n", 342 | "For example, here we are gettting the widely used CORE collection for vertebrates. It returns a list of non-redundent motifs. " 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": 15, 348 | "metadata": {}, 349 | "outputs": [], 350 | "source": [ 351 | "motifs = jdb_obj.fetch_motifs(\n", 352 | "collection = ['CORE'],\n", 353 | "tax_group = ['Vertebrates'],\n", 354 | "all_versions = False,\n", 355 | ")" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": 16, 361 | "metadata": {}, 362 | "outputs": [ 363 | { 364 | "name": "stdout", 365 | "output_type": "stream", 366 | "text": [ 367 | "879\n" 368 | ] 369 | } 370 | ], 371 | "source": [ 372 | "print(len(motifs))" 373 | ] 374 | }, 375 | { 376 | "cell_type": "code", 377 | "execution_count": 25, 378 | "metadata": {}, 379 | "outputs": [], 380 | "source": [ 381 | "for motif in motifs:\n", 382 | " #print(motif.matrix_id)\n", 383 | " pass # do something with the motif" 384 | ] 385 | }, 386 | { 387 | "cell_type": "markdown", 388 | "metadata": {}, 389 | "source": [ 390 | "Get the number of non-redundent motifs from CORE collection per-release." 391 | ] 392 | }, 393 | { 394 | "cell_type": "code", 395 | "execution_count": 17, 396 | "metadata": {}, 397 | "outputs": [ 398 | { 399 | "name": "stdout", 400 | "output_type": "stream", 401 | "text": [ 402 | "JASPAR2024\n", 403 | "2346\n", 404 | "JASPAR2022\n", 405 | "1956\n", 406 | "JASPAR2020\n", 407 | "1646\n", 408 | "JASPAR2018\n", 409 | "1404\n", 410 | "JASPAR2016\n", 411 | "1082\n", 412 | "JASPAR2014\n", 413 | "593\n" 414 | ] 415 | } 416 | ], 417 | "source": [ 418 | "for release in jdb_obj.get_releases():\n", 419 | " print(release)\n", 420 | " jdb_obj = jaspardb(release=release)\n", 421 | " motifs = jdb_obj.fetch_motifs(\n", 422 | " collection = [\"CORE\"],\n", 423 | " all_versions = False,\n", 424 | " #species = '10090' # this is the mouse tax ID\n", 425 | " )\n", 426 | " print(len(motifs))" 427 | ] 428 | }, 429 | { 430 | "cell_type": "code", 431 | "execution_count": null, 432 | "metadata": {}, 433 | "outputs": [], 434 | "source": [] 435 | } 436 | ], 437 | "metadata": { 438 | "kernelspec": { 439 | "display_name": "Python 3", 440 | "language": "python", 441 | "name": "python3" 442 | }, 443 | "language_info": { 444 | "codemirror_mode": { 445 | "name": "ipython", 446 | "version": 3 447 | }, 448 | "file_extension": ".py", 449 | "mimetype": "text/x-python", 450 | "name": "python", 451 | "nbconvert_exporter": "python", 452 | "pygments_lexer": "ipython3", 453 | "version": "3.8.6" 454 | } 455 | }, 456 | "nbformat": 4, 457 | "nbformat_minor": 2 458 | } 459 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | wheel 2 | biopython -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | This is a setup script for pyJASPAR: a tool for DNA sequence background generation 5 | 6 | This code is free software; you can redistribute it and/or modify it under the terms of the 7 | BSD License (see the file LICENSE included with the distribution). 8 | 9 | @author: Aziz Khan 10 | @email: azez.khan@gmail.com 11 | """ 12 | import os 13 | from distutils.core import setup 14 | from setuptools import find_packages 15 | #from pyjaspar import __version__ as VERSION 16 | import codecs 17 | 18 | CLASSIFIERS = [ 19 | 'Intended Audience :: Developers', 20 | 'Intended Audience :: Science/Research', 21 | 'License :: OSI Approved :: MIT License', 22 | 'Operating System :: OS Independent', 23 | 'Programming Language :: Python', 24 | 'Programming Language :: Python :: 3.6', 25 | 'Programming Language :: Python :: 3.7', 26 | 'Programming Language :: Python :: 3.8', 27 | 'Topic :: Scientific/Engineering :: Bio-Informatics', 28 | 'Topic :: Software Development :: Libraries :: Python Modules', 29 | ] 30 | 31 | install_requires = [ 32 | 'wheel', 33 | 'biopython', 34 | ] 35 | 36 | 37 | def read(rel_path): 38 | here = os.path.abspath(os.path.dirname(__file__)) 39 | with codecs.open(os.path.join(here, rel_path), 'r') as fp: 40 | return fp.read() 41 | 42 | def get_version(rel_path): 43 | for line in read(rel_path).splitlines(): 44 | if line.startswith('__version__'): 45 | delim = '"' if '"' in line else "'" 46 | return line.split(delim)[1] 47 | else: 48 | raise RuntimeError("Unable to find version string.") 49 | 50 | 51 | #def readme(): 52 | # with open('README.rst') as f: 53 | # return f.read() 54 | 55 | def readme(fname): 56 | return open(os.path.join(os.path.dirname(__file__), fname)).read() 57 | 58 | setup( 59 | name="pyjaspar", 60 | description="A serverless interface to Biopython to access different versions of JASPAR database", 61 | version=get_version("pyjaspar/__init__.py"), 62 | author="Aziz Khan", 63 | license='GPL', 64 | platforms='linux/unix', 65 | author_email="azez.khan@gmail.com", 66 | url="https://github.com/asntech/pyjaspar", 67 | long_description=readme("README.rst"), 68 | long_description_content_type='text/x-rst', 69 | package_dir={'pyjaspar': 'pyjaspar'}, 70 | 71 | packages=['pyjaspar', 72 | 'pyjaspar.data' 73 | ], 74 | 75 | package_data={'pyjaspar': ['pyjaspar/data/*.sqlite',]}, 76 | include_package_data=True, 77 | install_requires = install_requires, 78 | classifiers=CLASSIFIERS, 79 | ) 80 | --------------------------------------------------------------------------------