├── .gitignore
├── .travis.yml
├── LICENSE
├── MANIFEST.in
├── README.rst
├── conda_recipe.yaml
├── docs
├── Makefile
├── changelog.rst
├── cite.rst
├── conf.py
├── how_to_use.rst
├── index.rst
├── install.rst
├── intro.rst
├── make.bat
└── support.rst
├── pyjaspar
├── .DS_Store
├── __init__.py
├── data
│ ├── .DS_Store
│ ├── JASPAR2014.sqlite
│ ├── JASPAR2016.sqlite
│ ├── JASPAR2018.sqlite
│ ├── JASPAR2020.sqlite
│ ├── JASPAR2022.sqlite
│ ├── JASPAR2024.sqlite
│ └── __init__.py
└── utils.py
├── pyjaspar_notebook.ipynb
├── requirements.txt
└── setup.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 |
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # MAC
10 | .DS_Store
11 |
12 | # Distribution / packaging
13 | .Python
14 | build/
15 | develop-eggs/
16 | dist/
17 | downloads/
18 | eggs/
19 | .eggs/
20 | lib/
21 | lib64/
22 | parts/
23 | sdist/
24 | var/
25 | wheels/
26 | share/python-wheels/
27 | *.egg-info/
28 | .installed.cfg
29 | *.egg
30 | MANIFEST
31 |
32 | # PyInstaller
33 | # Usually these files are written by a python script from a template
34 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
35 | *.manifest
36 | *.spec
37 |
38 | # Installer logs
39 | pip-log.txt
40 | pip-delete-this-directory.txt
41 |
42 |
43 | # Sphinx documentation
44 | docs/_build/
45 |
46 | # PyBuilder
47 | .pybuilder/
48 | target/
49 |
50 | # Jupyter Notebook
51 | .ipynb_checkpoints
52 |
53 | # IPython
54 | profile_default/
55 | ipython_config.py
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | python:
3 | - "3.4"
4 | - "3.5"
5 | - "3.6"
6 | - "3.7"
7 |
8 | # command to install dependencies
9 | install:
10 | - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then
11 | wget https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh;
12 | else
13 | wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh;
14 | fi
15 | - bash miniconda.sh -b -p $HOME/miniconda
16 | - export PATH="$HOME/miniconda/bin:$PATH"
17 | - hash -r
18 | - conda config --set always_yes yes --set changeps1 no
19 | - conda update -q conda
20 | - conda info -a
21 |
22 | - pip install --user -r requirements.txt
23 | - python setup.py sdist install --user
24 |
25 | # command to run tests
26 | script:
27 | - python --version
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 | Preamble
9 |
10 | The GNU General Public License is a free, copyleft license for
11 | software and other kinds of works.
12 |
13 | The licenses for most software and other practical works are designed
14 | to take away your freedom to share and change the works. By contrast,
15 | the GNU General Public License is intended to guarantee your freedom to
16 | share and change all versions of a program--to make sure it remains free
17 | software for all its users. We, the Free Software Foundation, use the
18 | GNU General Public License for most of our software; it applies also to
19 | any other work released this way by its authors. You can apply it to
20 | your programs, too.
21 |
22 | When we speak of free software, we are referring to freedom, not
23 | price. Our General Public Licenses are designed to make sure that you
24 | have the freedom to distribute copies of free software (and charge for
25 | them if you wish), that you receive source code or can get it if you
26 | want it, that you can change the software or use pieces of it in new
27 | free programs, and that you know you can do these things.
28 |
29 | To protect your rights, we need to prevent others from denying you
30 | these rights or asking you to surrender the rights. Therefore, you have
31 | certain responsibilities if you distribute copies of the software, or if
32 | you modify it: responsibilities to respect the freedom of others.
33 |
34 | For example, if you distribute copies of such a program, whether
35 | gratis or for a fee, you must pass on to the recipients the same
36 | freedoms that you received. You must make sure that they, too, receive
37 | or can get the source code. And you must show them these terms so they
38 | know their rights.
39 |
40 | Developers that use the GNU GPL protect your rights with two steps:
41 | (1) assert copyright on the software, and (2) offer you this License
42 | giving you legal permission to copy, distribute and/or modify it.
43 |
44 | For the developers' and authors' protection, the GPL clearly explains
45 | that there is no warranty for this free software. For both users' and
46 | authors' sake, the GPL requires that modified versions be marked as
47 | changed, so that their problems will not be attributed erroneously to
48 | authors of previous versions.
49 |
50 | Some devices are designed to deny users access to install or run
51 | modified versions of the software inside them, although the manufacturer
52 | can do so. This is fundamentally incompatible with the aim of
53 | protecting users' freedom to change the software. The systematic
54 | pattern of such abuse occurs in the area of products for individuals to
55 | use, which is precisely where it is most unacceptable. Therefore, we
56 | have designed this version of the GPL to prohibit the practice for those
57 | products. If such problems arise substantially in other domains, we
58 | stand ready to extend this provision to those domains in future versions
59 | of the GPL, as needed to protect the freedom of users.
60 |
61 | Finally, every program is threatened constantly by software patents.
62 | States should not allow patents to restrict development and use of
63 | software on general-purpose computers, but in those that do, we wish to
64 | avoid the special danger that patents applied to a free program could
65 | make it effectively proprietary. To prevent this, the GPL assures that
66 | patents cannot be used to render the program non-free.
67 |
68 | The precise terms and conditions for copying, distribution and
69 | modification follow.
70 |
71 | TERMS AND CONDITIONS
72 |
73 | 0. Definitions.
74 |
75 | "This License" refers to version 3 of the GNU General Public License.
76 |
77 | "Copyright" also means copyright-like laws that apply to other kinds of
78 | works, such as semiconductor masks.
79 |
80 | "The Program" refers to any copyrightable work licensed under this
81 | License. Each licensee is addressed as "you". "Licensees" and
82 | "recipients" may be individuals or organizations.
83 |
84 | To "modify" a work means to copy from or adapt all or part of the work
85 | in a fashion requiring copyright permission, other than the making of an
86 | exact copy. The resulting work is called a "modified version" of the
87 | earlier work or a work "based on" the earlier work.
88 |
89 | A "covered work" means either the unmodified Program or a work based
90 | on the Program.
91 |
92 | To "propagate" a work means to do anything with it that, without
93 | permission, would make you directly or secondarily liable for
94 | infringement under applicable copyright law, except executing it on a
95 | computer or modifying a private copy. Propagation includes copying,
96 | distribution (with or without modification), making available to the
97 | public, and in some countries other activities as well.
98 |
99 | To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies. Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 |
103 | An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License. If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 |
112 | 1. Source Code.
113 |
114 | The "source code" for a work means the preferred form of the work
115 | for making modifications to it. "Object code" means any non-source
116 | form of a work.
117 |
118 | A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 |
123 | The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form. A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 |
134 | The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities. However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work. For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 |
147 | The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 |
151 | The Corresponding Source for a work in source code form is that
152 | same work.
153 |
154 | 2. Basic Permissions.
155 |
156 | All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met. This License explicitly affirms your unlimited
159 | permission to run the unmodified Program. The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work. This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 |
164 | You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force. You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright. Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 |
175 | Conveying under any other circumstances is permitted solely under
176 | the conditions stated below. Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 |
179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 |
181 | No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 |
187 | When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 |
195 | 4. Conveying Verbatim Copies.
196 |
197 | You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 |
205 | You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 |
208 | 5. Conveying Modified Source Versions.
209 |
210 | You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 |
214 | a) The work must carry prominent notices stating that you modified
215 | it, and giving a relevant date.
216 |
217 | b) The work must carry prominent notices stating that it is
218 | released under this License and any conditions added under section
219 | 7. This requirement modifies the requirement in section 4 to
220 | "keep intact all notices".
221 |
222 | c) You must license the entire work, as a whole, under this
223 | License to anyone who comes into possession of a copy. This
224 | License will therefore apply, along with any applicable section 7
225 | additional terms, to the whole of the work, and all its parts,
226 | regardless of how they are packaged. This License gives no
227 | permission to license the work in any other way, but it does not
228 | invalidate such permission if you have separately received it.
229 |
230 | d) If the work has interactive user interfaces, each must display
231 | Appropriate Legal Notices; however, if the Program has interactive
232 | interfaces that do not display Appropriate Legal Notices, your
233 | work need not make them do so.
234 |
235 | A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit. Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 |
245 | 6. Conveying Non-Source Forms.
246 |
247 | You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 |
252 | a) Convey the object code in, or embodied in, a physical product
253 | (including a physical distribution medium), accompanied by the
254 | Corresponding Source fixed on a durable physical medium
255 | customarily used for software interchange.
256 |
257 | b) Convey the object code in, or embodied in, a physical product
258 | (including a physical distribution medium), accompanied by a
259 | written offer, valid for at least three years and valid for as
260 | long as you offer spare parts or customer support for that product
261 | model, to give anyone who possesses the object code either (1) a
262 | copy of the Corresponding Source for all the software in the
263 | product that is covered by this License, on a durable physical
264 | medium customarily used for software interchange, for a price no
265 | more than your reasonable cost of physically performing this
266 | conveying of source, or (2) access to copy the
267 | Corresponding Source from a network server at no charge.
268 |
269 | c) Convey individual copies of the object code with a copy of the
270 | written offer to provide the Corresponding Source. This
271 | alternative is allowed only occasionally and noncommercially, and
272 | only if you received the object code with such an offer, in accord
273 | with subsection 6b.
274 |
275 | d) Convey the object code by offering access from a designated
276 | place (gratis or for a charge), and offer equivalent access to the
277 | Corresponding Source in the same way through the same place at no
278 | further charge. You need not require recipients to copy the
279 | Corresponding Source along with the object code. If the place to
280 | copy the object code is a network server, the Corresponding Source
281 | may be on a different server (operated by you or a third party)
282 | that supports equivalent copying facilities, provided you maintain
283 | clear directions next to the object code saying where to find the
284 | Corresponding Source. Regardless of what server hosts the
285 | Corresponding Source, you remain obligated to ensure that it is
286 | available for as long as needed to satisfy these requirements.
287 |
288 | e) Convey the object code using peer-to-peer transmission, provided
289 | you inform other peers where the object code and Corresponding
290 | Source of the work are being offered to the general public at no
291 | charge under subsection 6d.
292 |
293 | A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 |
297 | A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling. In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage. For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product. A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 |
310 | "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source. The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 |
318 | If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information. But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 |
329 | The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed. Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 |
337 | Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 |
343 | 7. Additional Terms.
344 |
345 | "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law. If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 |
354 | When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it. (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.) You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 |
361 | Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 |
365 | a) Disclaiming warranty or limiting liability differently from the
366 | terms of sections 15 and 16 of this License; or
367 |
368 | b) Requiring preservation of specified reasonable legal notices or
369 | author attributions in that material or in the Appropriate Legal
370 | Notices displayed by works containing it; or
371 |
372 | c) Prohibiting misrepresentation of the origin of that material, or
373 | requiring that modified versions of such material be marked in
374 | reasonable ways as different from the original version; or
375 |
376 | d) Limiting the use for publicity purposes of names of licensors or
377 | authors of the material; or
378 |
379 | e) Declining to grant rights under trademark law for use of some
380 | trade names, trademarks, or service marks; or
381 |
382 | f) Requiring indemnification of licensors and authors of that
383 | material by anyone who conveys the material (or modified versions of
384 | it) with contractual assumptions of liability to the recipient, for
385 | any liability that these contractual assumptions directly impose on
386 | those licensors and authors.
387 |
388 | All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10. If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term. If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 |
398 | If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 |
403 | Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 |
407 | 8. Termination.
408 |
409 | You may not propagate or modify a covered work except as expressly
410 | provided under this License. Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 |
415 | However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 |
422 | Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 |
429 | Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License. If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 |
435 | 9. Acceptance Not Required for Having Copies.
436 |
437 | You are not required to accept this License in order to receive or
438 | run a copy of the Program. Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance. However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work. These actions infringe copyright if you do
443 | not accept this License. Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 |
446 | 10. Automatic Licensing of Downstream Recipients.
447 |
448 | Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License. You are not responsible
451 | for enforcing compliance by third parties with this License.
452 |
453 | An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations. If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 |
463 | You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License. For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 |
471 | 11. Patents.
472 |
473 | A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based. The
475 | work thus licensed is called the contributor's "contributor version".
476 |
477 | A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version. For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 |
487 | Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 |
492 | In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement). To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 |
499 | If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients. "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 |
513 | If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 |
521 | A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License. You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 |
536 | Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 |
540 | 12. No Surrender of Others' Freedom.
541 |
542 | If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License. If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all. For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 |
552 | 13. Use with the GNU Affero General Public License.
553 |
554 | Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work. The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 |
563 | 14. Revised Versions of this License.
564 |
565 | The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time. Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 |
570 | Each version is given a distinguishing version number. If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation. If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 |
579 | If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 |
584 | Later license versions may give you additional or different
585 | permissions. However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 |
589 | 15. Disclaimer of Warranty.
590 |
591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 |
600 | 16. Limitation of Liability.
601 |
602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 |
612 | 17. Interpretation of Sections 15 and 16.
613 |
614 | If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 |
621 | END OF TERMS AND CONDITIONS
622 |
623 | How to Apply These Terms to Your New Programs
624 |
625 | If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 |
629 | To do so, attach the following notices to the program. It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 |
634 |
635 | Copyright (C)
636 |
637 | This program is free software: you can redistribute it and/or modify
638 | it under the terms of the GNU General Public License as published by
639 | the Free Software Foundation, either version 3 of the License, or
640 | (at your option) any later version.
641 |
642 | This program is distributed in the hope that it will be useful,
643 | but WITHOUT ANY WARRANTY; without even the implied warranty of
644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
645 | GNU General Public License for more details.
646 |
647 | You should have received a copy of the GNU General Public License
648 | along with this program. If not, see .
649 |
650 | Also add information on how to contact you by electronic and paper mail.
651 |
652 | If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 |
655 | Copyright (C)
656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 | This is free software, and you are welcome to redistribute it
658 | under certain conditions; type `show c' for details.
659 |
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License. Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 |
664 | You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | .
668 |
669 | The GNU General Public License does not permit incorporating your program
670 | into proprietary programs. If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library. If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License. But first, please read
674 | .
675 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.rst
2 | include LICENSE
3 | include pyjaspar/data/*.sqlite
4 | exclude .gitignore
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | pyJASPAR
2 | --------
3 |
4 | A Pythonic interface to JASPAR transcription factor motifs
5 |
6 | **pyJASPAR** uses *Biopython* and *SQLite3* to provide a serverless interface to `JASPAR database `_ to query and access TF motif profiles across various releases of JASPAR.
7 |
8 |
9 | .. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.4509415.svg
10 | :target: https://doi.org/10.5281/zenodo.4509415
11 |
12 | .. image:: https://travis-ci.org/asntech/pyjaspar.svg?branch=main
13 | :target: https://travis-ci.org/asntech/pyjaspar
14 |
15 | .. image:: https://img.shields.io/pypi/pyversions/pyjaspar.svg
16 | :target: https://www.python.org
17 |
18 | .. image:: https://img.shields.io/pypi/v/pyjaspar.svg
19 | :target: https://pypi.python.org/pypi/pyjaspar
20 |
21 | .. image:: https://anaconda.org/bioconda/pyjaspar/badges/version.svg
22 | :target: https://anaconda.org/bioconda/pyjaspar
23 |
24 | .. image:: https://anaconda.org/bioconda/pyjaspar/badges/downloads.svg
25 | :target: https://bioconda.github.io/recipes/pyjaspar/README.html
26 |
27 | .. image:: https://img.shields.io/github/issues/asntech/pyjaspar.svg
28 | :target: https://github.com/asntech/pyjaspar/issues
29 |
30 |
31 | pyJASPAR provides access to the following releases of JASPAR database: *JASPAR2024*, *JASPAR2022*, *JASPAR2020*, *JASPAR2018*, *JASPAR2016*, *JASPAR2014*.
32 |
33 | **Note**: This is a serverless SQLite wrapper around the Biopython JASPAR module `Bio.motifs.jaspar.db` which requires JASPAR MySQL database sever connection details.
34 |
35 |
36 | Documentation
37 | -------------
38 |
39 | **A detailed documentation is available in different formats:** `HTML `_ | `PDF `_ | `ePUB `_
40 |
41 |
42 | Installation
43 | ------------
44 |
45 | Quick installation using conda
46 | ================================
47 | pyJASPAR is available on `Bioconda `_ for installation via ``conda``.
48 |
49 | .. code-block:: bash
50 |
51 | conda install -c bioconda pyjaspar
52 |
53 |
54 | Install using pip
55 | ==================
56 | pyJASPAR is also available on `PyPi `_ for installation via ``pip``.
57 |
58 | .. code-block:: bash
59 |
60 | pip install pyjaspar
61 |
62 |
63 | pyJASPAR uses BioPython and it supports python ``3.x``.
64 |
65 | Install pyjaspar from source
66 | =============================
67 | You can install a development version by using ``git`` from GitHub.
68 |
69 |
70 | Install development version from `GitHub`
71 | ==========================================
72 | If you have `git` installed, use this:
73 |
74 | .. code-block:: bash
75 |
76 | git clone https://github.com/asntech/pyjaspar.git
77 | cd pyjaspar
78 | python setup.py sdist install
79 |
80 | How to use pyJASPAR?
81 | --------------------
82 |
83 | Once you have installed pyjaspar, you can create jaspardb class object:
84 |
85 | .. code-block:: pycon
86 |
87 | >>> from pyjaspar import jaspardb
88 |
89 | #Create the JASPAR2022 release object
90 | >>> jdb_obj = jaspardb(release='JASPAR2024')
91 |
92 | #Fetch motif by ID
93 | >>> motif = jdb_obj.fetch_motif_by_id('MA0095.2')
94 | >>> print(motif.name)
95 | YY1
96 |
97 | #Fetch motifs by TF name
98 | >>> motifs = jdb_obj.fetch_motifs_by_name('KFL4')
99 | >>> print(len(motifs))
100 | 1
101 |
102 | # Get a dictionary of frequency count matrics
103 | >>> print(motifs[0].counts)
104 | {'A': [2465.0, 2105.0, 7021.0, 1173.0, 45602.0, 852.0, 1617.0, 1202.0],
105 | 'C': [49209.0, 47865.0, 45405.0, 52875.0, 161.0, 52366.0, 51112.0, 51045.0],
106 | 'G': [1583.0, 1214.0, 1422.0, 793.0, 6598.0, 1470.0, 1870.0, 1005.0],
107 | 'T': [2560.0, 4633.0, 1969.0, 976.0, 3456.0, 1129.0, 1218.0, 2565.0]}
108 |
109 | #Get CORE vertebrates non-redundent collection
110 | >>> motifs = jdb_obj.fetch_motifs(
111 | collection = ['CORE'],
112 | tax_group = ['Vertebrates'],
113 | all_versions = False)
114 | >>> print(len(motifs))
115 | 879
116 | ## loop through the motifs list and perform analysis
117 | >>> for motif in motifs:
118 | pass
119 |
120 | **Note**: Above methods return `Bio.motifs.jaspar.Motif` object. You can find more details `here `_
121 |
122 |
123 | Find available releases
124 | =======================
125 | .. code-block:: pycon
126 |
127 | >>> print(jdb_obj.get_releases())
128 | ['JASPAR2024','JASPAR2022','JASPAR2020', 'JASPAR2018', 'JASPAR2016', 'JASPAR2014']
129 |
130 |
131 | Cite
132 | =====
133 | - Aziz Khan. pyJASPAR: a Pythonic interface to JASPAR transcription factor motifs. (2021). doi:10.5281/zenodo.4509415
134 |
135 | .. code-block:: bash
136 |
137 | @software{aziz_khan_2021_4509415,
138 | author = {Aziz Khan},
139 | title = {{pyJASPAR: a Pythonic interface to JASPAR transcription factor motifs}},
140 | month = feb,
141 | year = 2021,
142 | publisher = {Zenodo},
143 | version = {v2.0.0},
144 | doi = {10.5281/zenodo.4509415},
145 | url = {https://doi.org/10.5281/zenodo.4509415}
146 | }
147 |
--------------------------------------------------------------------------------
/conda_recipe.yaml:
--------------------------------------------------------------------------------
1 | {% set version = "1.0.0" %}
2 |
3 | package:
4 | name: pyjaspar
5 | version: '{{ version }}'
6 |
7 | source:
8 | url: https://pypi.io/packages/source/p/pyjaspar/pyjaspar-{{ version }}.tar.gz
9 | sha256: "97f1e7cc184186a7dc806db9bc9e91b2a858d1a0b54cec96f3d63d1c512a0db2"
10 |
11 | build:
12 | number: 0
13 | noarch: python
14 | script: {{ PYTHON }} -m pip install . --ignore-installed --no-deps -vv
15 |
16 | requirements:
17 | host:
18 | - python >=3.6
19 | - pip
20 | - biopython
21 | run:
22 | - python >=3.6
23 | - biopython
24 |
25 | test:
26 | imports:
27 | - pyjaspar
28 |
29 | about:
30 | home: https://github.com/asntech/pyjaspar
31 | license: GPLv3
32 | license_family: GPL
33 | license_file: LICENSE
34 | summary: "pyJASPAR: a serverless interface to Biopython to access different versions of JASPAR database"
35 | description: "A serverless interface to Biopython to query and access JASPAR motifs from different releases of JASPAR database using sqlite3."
36 | doc_url: 'https://pyjaspar.rtfd.io'
37 |
38 | extra:
39 | recipe-maintainers:
40 | - asntech
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/changelog.rst:
--------------------------------------------------------------------------------
1 | =========
2 | Changelog
3 | =========
4 |
5 | Version 3.0.0
6 | -------------
7 | Released date: September 24, 2023
8 |
9 | Notes: Added the 10th release of JASPAR (JASPAR2024) to the package.
10 |
11 |
12 | Version 2.0.0
13 | -------------
14 | Released date: September 08, 2021
15 |
16 | Notes: Added the 9th release of JASPAR (JASPAR2022) to the package.
17 |
18 | Version 1.6.0
19 | -------------
20 | Released date: July 02, 2021
21 |
22 | Notes: Both tf_family and tf_class are now string array.
--------------------------------------------------------------------------------
/docs/cite.rst:
--------------------------------------------------------------------------------
1 | ============
2 | How to cite?
3 | ============
4 |
5 | If you used **pyJASPAR**, please cite:
6 |
7 | - Aziz Khan. pyJASPAR: a Pythonic interface to JASPAR transcription factor motifs. (2021). doi:10.5281/zenodo.4509415
8 |
9 | And for the specific release of JASPAR database, please cite one of these:
10 |
11 | **JASPAR2020**
12 |
13 | - Fornes O, Castro-Mondragon JA, Khan A, et al. JASPAR 2020: update of the open-access database of transcription factor binding profiles. Nucleic Acids Res. 2020; 48(D1):D87-D92. doi: 10.1093/nar/gkz1001
14 |
15 | **JASPAR2018**
16 |
17 | - Khan A, Fornes O, Stigliani A, et al. JASPAR 2018: update of the open-access database of transcription factor binding profiles and its web framework. Nucleic Acids Res. 2018; 46:D260–D266. doi: 10.1093/nar/gkx1126
18 |
19 | **JASPAR2016**
20 |
21 | - Mathelier, A., Fornes, O., Arenillas, et al. JASPAR 2016: a major expansion and update of the open-access database of transcription factor binding profiles. Nucleic Acids Res. 2016; 44:D110-D115.
22 |
23 | **JASPAR2014**
24 |
25 | - Mathelier, A., Zhao, X., Zhang, A. W., et al. JASPAR 2014: an extensively expanded and updated open-access database of transcription factor binding profiles. Nucleic Acids Res. 2014; 42:D142-D147.
26 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # This file only contains a selection of the most common options. For a full
4 | # list see the documentation:
5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
6 |
7 | # -- Path setup --------------------------------------------------------------
8 |
9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | # import os
14 | # import sys
15 | # sys.path.insert(0, os.path.abspath('.'))
16 |
17 |
18 | # -- Project information -----------------------------------------------------
19 |
20 | project = 'pyJASPAR'
21 | copyright = '2021, Aziz Khan'
22 | author = 'Aziz Khan'
23 |
24 | # The full version, including alpha/beta/rc tags
25 | release = 'v3.0.0'
26 |
27 |
28 | # -- General configuration ---------------------------------------------------
29 |
30 | # Add any Sphinx extension module names here, as strings. They can be
31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
32 | # ones.
33 | extensions = [
34 | 'sphinx.ext.autodoc',
35 | 'sphinx.ext.doctest',
36 | 'sphinx.ext.napoleon',
37 | 'sphinx.ext.viewcode',
38 | ]
39 |
40 | # Add any paths that contain templates here, relative to this directory.
41 | templates_path = ['_templates']
42 |
43 | # List of patterns, relative to source directory, that match files and
44 | # directories to ignore when looking for source files.
45 | # This pattern also affects html_static_path and html_extra_path.
46 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
47 |
48 |
49 | # -- Options for HTML output -------------------------------------------------
50 |
51 | # The theme to use for HTML and HTML Help pages. See the documentation for
52 | # a list of builtin themes.
53 | #
54 | import sphinx_rtd_theme
55 | html_theme = "sphinx_rtd_theme"
56 | #html_theme = "bizstyle"
57 | #html_theme = 'alabaster'
58 |
59 | # Add any paths that contain custom static files (such as style sheets) here,
60 | # relative to this directory. They are copied after the builtin static files,
61 | # so a file named "default.css" will overwrite the builtin "default.css".
62 | html_static_path = ['_static']
--------------------------------------------------------------------------------
/docs/how_to_use.rst:
--------------------------------------------------------------------------------
1 | ============
2 | How to use?
3 | ============
4 |
5 | Once you have installed `pyjaspar`, you can load the module and connect to the latest release of JASPAR:
6 |
7 | .. code-block:: pycon
8 |
9 | >>> from pyjaspar import jaspardb
10 |
11 | Connect to the JASPAR
12 | ----------------------
13 | Next step is to connect to the version of JASPAR you're interested by creating a jaspardb class object.
14 | For example here we're using the the JASPAR2018.
15 |
16 | .. code-block:: pycon
17 |
18 | >>> jdb_obj = jaspardb(release='JASPAR2018')
19 |
20 | You can also check JASPAR version you are connected to using:
21 |
22 | .. code-block:: pycon
23 |
24 | >>> print(jdb_obj.release)
25 | JASPAR2018
26 |
27 | By default it is set to latest release/version of JASPAR database. For example.
28 |
29 | .. code-block:: pycon
30 |
31 | >>> jdb_obj = jaspardb()
32 | >>> print(jdb_obj.release)
33 | JASPAR2020
34 |
35 |
36 | You can also connect to a local copy of JASPAR SQLite database by setting absolute path `sqlite_db_path`. For example.
37 |
38 | .. code-block:: pycon
39 |
40 | >>> jdb_obj = jaspardb(sqlite_db_path='/path/to/jaspar.sqlite')
41 |
42 |
43 | Get available releases
44 | ----------------------
45 | You can find the available releases/version of JASPAR using `get_releases` method.
46 |
47 |
48 | .. code-block:: pycon
49 |
50 | >>> print(jdb_obj.get_releases())
51 | ['JASPAR2022', 'JASPAR2020', 'JASPAR2018', 'JASPAR2016', 'JASPAR2014']
52 |
53 |
54 | Get motif by using JASPAR ID
55 | ----------------------------
56 | If you want to get the motif details for a specific TF using the JASPAR ID. If you skip the version of motif, it will return the latest version.
57 |
58 | .. code-block:: pycon
59 |
60 | >>> motif = jdb_obj.fetch_motif_by_id('MA0095.2')
61 |
62 | Printing the motif will all the associated meta-information stored in the JASPAR database cluding the matric counts.
63 |
64 | .. code-block:: pycon
65 |
66 | >>> print(motif)
67 | TF name YY1
68 | Matrix ID MA0095.2
69 | Collection CORE
70 | TF class ['C2H2 zinc finger factors']
71 | TF family ['More than 3 adjacent zinc finger factors']
72 | Species 9606
73 | Taxonomic group vertebrates
74 | Accession ['P25490']
75 | Data type used ChIP-seq
76 | Medline 18950698
77 | Matrix:
78 | 0 1 2 3 4 5 6 7 8 9 10 11
79 | A: 1126.00 6975.00 6741.00 2506.00 7171.00 0.00 11.00 13.00 812.00 867.00 899.00 1332.00
80 | C: 4583.00 0.00 99.00 1117.00 0.00 12.00 0.00 0.00 5637.00 1681.00 875.00 4568.00
81 | G: 801.00 181.00 268.00 3282.00 0.00 0.00 7160.00 7158.00 38.00 2765.00 4655.00 391.00
82 | T: 661.00 15.00 63.00 266.00 0.00 7159.00 0.00 0.00 684.00 1858.00 742.00 880.00
83 |
84 |
85 | Get the count matrix using `.counts`
86 |
87 |
88 | .. code-block:: pycon
89 |
90 | >>> print(motif.counts)
91 | 0 1 2 3 4 5 6 7 8 9 10 11
92 | A: 1126.00 6975.00 6741.00 2506.00 7171.00 0.00 11.00 13.00 812.00 867.00 899.00 1332.00
93 | C: 4583.00 0.00 99.00 1117.00 0.00 12.00 0.00 0.00 5637.00 1681.00 875.00 4568.00
94 | G: 801.00 181.00 268.00 3282.00 0.00 0.00 7160.00 7158.00 38.00 2765.00 4655.00 391.00
95 | T: 661.00 15.00 63.00 266.00 0.00 7159.00 0.00 0.00 684.00 1858.00 742.00 880.00
96 |
97 |
98 | Get motifs by TF name
99 | -----------------------
100 | You can use the `fetch_motifs_by_name` function to find motifs by TF name. This method returns a list of motifs for the same TF name across taxonomic group. For example, below search will return two CTCF motifs one in vertebrates and another in plants taxon.
101 |
102 | .. code-block:: pycon
103 |
104 | >>> motifs = jdb_obj.fetch_motifs_by_name("CTCF")
105 | >>> print(len(motifs))
106 | 2
107 | >>> print(motifs)
108 | TF name CTCF
109 | Matrix ID MA0139.1
110 | Collection CORE
111 | TF class ['C2H2 zinc finger factors'
112 | TF family ['More than 3 adjacent zinc finger factors']
113 | Species 9606
114 | Taxonomic group vertebrates
115 | Accession ['P49711']
116 | Data type used ChIP-seq
117 | Medline 17512414
118 | Matrix:
119 | 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
120 | A: 87.00 167.00 281.00 56.00 8.00 744.00 40.00 107.00 851.00 5.00 333.00 54.00 12.00 56.00 104.00 372.00 82.00 117.00 402.00
121 | C: 291.00 145.00 49.00 800.00 903.00 13.00 528.00 433.00 11.00 0.00 3.00 12.00 0.00 8.00 733.00 13.00 482.00 322.00 181.00
122 | G: 76.00 414.00 449.00 21.00 0.00 65.00 334.00 48.00 32.00 903.00 566.00 504.00 890.00 775.00 5.00 507.00 307.00 73.00 266.00
123 | T: 459.00 187.00 134.00 36.00 2.00 91.00 11.00 324.00 18.00 3.00 9.00 341.00 8.00 71.00 67.00 17.00 37.00 396.00 59.00
124 |
125 |
126 | TF name CTCF
127 | Matrix ID MA0531.1
128 | Collection CORE
129 | TF class ['C2H2 zinc finger factors']
130 | TF family ['More than 3 adjacent zinc finger factors']
131 | Species 7227
132 | Taxonomic group insects
133 | Accession ['Q9VS55']
134 | Data type used ChIP-chip
135 | Medline 17616980
136 | Matrix:
137 | 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
138 | A: 306.00 313.00 457.00 676.00 257.00 1534.00 202.00 987.00 2.00 0.00 2.00 124.00 1.00 79.00 231.00
139 | C: 876.00 1147.00 383.00 784.00 714.00 1.00 0.00 0.00 4.00 0.00 0.00 1645.00 0.00 1514.00 773.00
140 | G: 403.00 219.00 826.00 350.00 87.00 192.00 1700.00 912.00 311.00 1902.00 1652.00 3.00 1807.00 8.00 144.00
141 | T: 317.00 223.00 236.00 92.00 844.00 175.00 0.00 3.00 1585.00 0.00 248.00 130.00 94.00 301.00 754.00
142 |
143 |
144 | Search motifs based on meta-info
145 | ---------------------------------
146 | A more commonly used function is `fetch_motifs` helps you to get motifs which match a specified set of criteria.
147 | You can query the database based on the available meta-information in the database.
148 |
149 | For example, here we are gettting the widely used CORE collection for vertebrates. It returns a list of 746 non-redundent motifs for JASPAR2020 release.
150 |
151 | .. code-block:: pycon
152 |
153 | >>> motifs = jdb_obj.fetch_motifs(
154 | collection = 'CORE',
155 | tax_group = ['vertebrates']
156 | )
157 | >>> print(len(motifs))
158 | 746
159 |
160 | You can loop through these motifs and perform your analysis.
161 |
162 | .. code-block:: pycon
163 |
164 | >>> for motif in motifs:
165 | print(motif.matrix_id)
166 | MA0004.1
167 | MA0006.1
168 | -
169 | -
170 | -
171 | MA0528.2
172 | MA0609.2
173 |
174 | Here is a list of meta-info `fetch_motifs` method takes as an arugment to filter the motifs.
175 |
176 | .. csv-table::
177 | :header: "Argument", "Description"
178 | :widths: 10, 80
179 |
180 | "`matrix_id`","Takes precedence over all other selection criteria except 'all'. Only motifs with the given JASPAR matrix ID(s) are returned. A matrix ID may be specified as just a base ID or full JASPAR IDs including version number. If only a base ID is provided for specific motif(s), then just the latest version of those motif(s) are returned unless 'all_versions' is also specified."
181 | "`collection`","Only motifs from the specified JASPAR collection(s) are returned. NOTE - if not specified, the collection defaults to CORE for all other selection criteria except 'all' and 'matrix_id'. To apply the other selection criteria across all JASPAR collections, explicitly set collection=None."
182 | "`tf_name`","Only motifs with the given name(s) are returned."
183 | "`tf_class`","Only motifs of the given TF class(es) are returned."
184 | "`tf_family`","Only motifs from the given TF families are returned."
185 | "`tax_group`","Only motifs belonging to the given taxonomic supergroups are returned (e.g. 'vertebrates', 'insects', 'nematodes' etc.)"
186 | "`species`","Only motifs derived from the given species are returned. Species are specified as taxonomy IDs."
187 | "`data_type`","Only motifs generated with the given data type (e.g. ('ChIP-seq', 'PBM', 'SELEX' etc.) are returned."
188 | "`pazar_id`","Only motifs with the given PAZAR TF ID are returned."
189 | "`medline`","Only motifs with the given medline (PubmMed IDs) are returned."
190 | "`min_ic`","Only motifs whose profile matrices have at least this information content (specificty) are returned."
191 | "`min_length`","Only motifs whose profiles are of at least this length are returned."
192 | "`min_sites`","Only motifs compiled from at least these many binding sites are returned."
193 | "`all_versions`","Unless specified, just the latest version of motifs determined by the other selection criteria are returned. Otherwise all versions of the selected motifs are returned."
194 | "`all`","Takes precedent of all other selection criteria. Every motif is returned. If 'all_versions' is also specified, all versions of every motif are returned, otherwise just the latest version of every motif is returned."
195 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | =======================
2 | pyJASPAR Documentation
3 | =======================
4 |
5 | **Welcome to pyJASPAR**! — a serverless interface to Biopython to query and access JASPAR motifs from different releases of JASPAR database using sqlite3.
6 |
7 | .. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.4509415.svg
8 | :target: https://doi.org/10.5281/zenodo.4509415
9 |
10 | .. image:: https://travis-ci.org/asntech/pyjaspar.svg?branch=main
11 | :target: https://travis-ci.org/asntech/pyjaspar
12 |
13 | .. image:: https://img.shields.io/pypi/v/pyjaspar.svg
14 | :target: https://pypi.python.org/pypi/pyjaspar
15 |
16 | .. image:: https://anaconda.org/bioconda/pyjaspar/badges/version.svg
17 | :target: https://anaconda.org/bioconda/pyjaspar
18 |
19 | .. image:: https://anaconda.org/bioconda/pyjaspar/badges/downloads.svg
20 | :target: https://bioconda.github.io/recipes/pyjaspar/README.html
21 |
22 | .. image:: https://anaconda.org/bioconda/pyjaspar/badges/installer/conda.svg
23 | :target: https://conda.anaconda.org/bioconda
24 |
25 | .. image:: https://img.shields.io/github/issues/asntech/pyjaspar.svg
26 | :target: https://github.com/asntech/pyjaspar/issues
27 |
28 |
29 | .. toctree::
30 | :maxdepth: 2
31 | :caption: Table of contents
32 |
33 | intro
34 | install
35 | how_to_use
36 | support
37 | cite
38 |
--------------------------------------------------------------------------------
/docs/install.rst:
--------------------------------------------------------------------------------
1 | ===============
2 | How to Install?
3 | ===============
4 | pyJASPAR is available on `PyPi `_, through `Bioconda `_, and source code available on `GitHub `_. If you already have a working installation of Python, the easiest way to install the required Python modules is by installing pyJASPAR using ``pip``.
5 |
6 | If you're setting up Python for the first time, we recommend to install it using the `Conda or Miniconda Python distribution `_. This comes with several helpful scientific and data processing libraries, and available for platforms including Windows, Mac OSX and Linux.
7 |
8 | You can use one of the following ways to install pyJASPAR.
9 |
10 |
11 | Install uisng Conda
12 | ====================
13 | We highly recommend to install pyJASPAR using Conda, this will take care of the dependencies. If you already have Conda or Miniconda installed, go ahead and use the below command.
14 |
15 | .. code-block:: bash
16 |
17 | conda install -c bioconda pyjaspar
18 |
19 | .. note:: This will install all the dependencies and you are ready to use **pyJASPAR**.
20 |
21 | Install using pip
22 | ==================
23 | You can install pyJASPAR from PyPi using pip.
24 |
25 | .. code-block:: bash
26 |
27 | pip install pyjaspar
28 |
29 | .. note:: Make sure you're using python v3.6 or latest.
30 |
31 |
32 |
33 | Install from source
34 | ===================
35 | You can install a development version by using ``git`` from our GitHub repository at https://github.com/asntech/pyjaspar.
36 |
37 | .. code-block:: bash
38 |
39 | git clone https://github.com/asntech/pyjaspar.git
40 | cd pyjaspar
41 | python setup.py sdist install
42 |
--------------------------------------------------------------------------------
/docs/intro.rst:
--------------------------------------------------------------------------------
1 | =================
2 | What is pyJASPAR?
3 | =================
4 |
5 | pyJASPAR is a python module and a serverless interface to Biopython to query and access JASPAR motifs from different releases of JASPAR database using sqlite3.
6 |
7 | .. note:: This is a serverless SQLite wrapper around the Biopython JASPAR module `Bio.motifs.jaspar.db` which requires JASPAR MySQL database sever connection details.
8 |
9 |
10 | Currently, pyJASPAR provides access to JASPAR database releases including:
11 |
12 | - `JASPAR2024` - http://jaspar.genereg.net/
13 | - `JASPAR2022` - http://jaspar2022.genereg.net/
14 | - `JASPAR2020` - http://jaspar2020.genereg.net/
15 | - `JASPAR2018` - http://jaspar2018.genereg.net/
16 | - `JASPAR2016` - http://jaspar2016.genereg.net/
17 | - `JASPAR2014` - http://jaspar2014.genereg.net/
18 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 |
13 | if "%1" == "" goto help
14 |
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | echo.
18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | echo.installed, then set the SPHINXBUILD environment variable to point
20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | echo.may add the Sphinx directory to PATH.
22 | echo.
23 | echo.If you don't have Sphinx installed, grab it from
24 | echo.http://sphinx-doc.org/
25 | exit /b 1
26 | )
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/docs/support.rst:
--------------------------------------------------------------------------------
1 | ========
2 | Support
3 | ========
4 |
5 | If you have questions, or found any bug in the program, please write to us at ``azez.khan[at]gmail.com``.
6 |
7 | You can also report the issues to our `GiHub repo `_
8 |
--------------------------------------------------------------------------------
/pyjaspar/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asntech/pyjaspar/eaa9e8137406918a9a22eaeffdf979d8c98d57b0/pyjaspar/.DS_Store
--------------------------------------------------------------------------------
/pyjaspar/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2013 by David Arenillas and Anthony Mathelier. All rights reserved.
2 | # Revisions 2021 copyright by Aziz Khan. All rights reserved.
3 | # This code is part of the Biopython distribution and governed by its
4 | # license. Please see the LICENSE file that should have been included
5 | # as part of this package.
6 | """Provides read and query access to multiple releases of JASPAR database.
7 |
8 | This modules requires biopython to be installed.
9 |
10 | Example, substitute the database release/version name::
11 |
12 | from pyjaspar import jaspardb
13 |
14 | jdb_obj = jaspardb(release='JASPAR2020')
15 | motif = jdb_obj.fetch_motif_by_id('MA0095')
16 | print(motif)
17 | TF name YY1
18 | Matrix ID MA0095.2
19 | Collection CORE
20 | TF class ['C2H2 zinc finger factors']
21 | TF family ['More than 3 adjacent zinc finger factors']
22 | Species 9606
23 | Taxonomic group vertebrates
24 | Accession ['P25490']
25 | Data type used ChIP-seq
26 | Medline 18950698
27 | PAZAR ID TF0000069
28 | Matrix:
29 | 0 1 2 3 4 5 6 7 8 9 10 11
30 | A: 1126.00 6975.00 6741.00 2506.00 7171.00 0.00 11.00 13.00 812.00 867.00 899.00 1332.00
31 | C: 4583.00 0.00 99.00 1117.00 0.00 12.00 0.00 0.00 5637.00 1681.00 875.00 4568.00
32 | G: 801.00 181.00 268.00 3282.00 0.00 0.00 7160.00 7158.00 38.00 2765.00 4655.00 391.00
33 | T: 661.00 15.00 63.00 266.00 0.00 7159.00 0.00 0.00 684.00 1858.00 742.00 880.00
34 |
35 | motifs = jdb_obj.fetch_motifs(
36 | collection = 'CORE',
37 | tax_group = ['vertebrates', 'insects'],
38 | tf_class = 'Homeo domain factors',
39 | tf_family = ['TALE-type homeo domain factors', 'POU domain factors'],
40 | min_ic = 12
41 | )
42 | for motif in motifs:
43 | pass # do something with the motif
44 | """
45 |
46 | __version__ = '3.0.0'
47 |
48 | import warnings
49 | from Bio import BiopythonWarning
50 |
51 | import sqlite3
52 |
53 | from Bio.motifs import jaspar
54 |
55 | from .utils import *
56 |
57 | jaspar_releases = {
58 | 'JASPAR2024': 'JASPAR2024.sqlite',
59 | 'JASPAR2022': 'JASPAR2022.sqlite',
60 | 'JASPAR2020': 'JASPAR2020.sqlite',
61 | 'JASPAR2018': 'JASPAR2018.sqlite',
62 | 'JASPAR2016': 'JASPAR2016.sqlite',
63 | 'JASPAR2014': 'JASPAR2014.sqlite',
64 | }
65 |
66 | JASPAR_LATEST_RELEASE = "JASPAR2024"
67 |
68 | JASPAR_DFLT_COLLECTION = "CORE"
69 |
70 | class jaspardb(object):
71 | """Class representing a JASPAR SQLite database.
72 |
73 | This is adapted from the biopython JASPAR5 MYSQL DB.
74 |
75 | """
76 |
77 | def __init__(self, release=JASPAR_LATEST_RELEASE, sqlite_db_path=None):
78 | """Construct a jaspardb instance and connect to specified DB.
79 |
80 | By default it connects to the JASPAR_LATEST_RELEASE which can be over written by using sqlite_db_path
81 |
82 | Arguments:
83 | - release - JASPAR release name ( e.g. JASPAR2018, JASPAR2020) By default latest available release.
84 | - sqlite_db_path - path to the JASPAR SQLite file (this will skip release)
85 | - conn - JASPAR SQLite connection
86 |
87 | """
88 | self.sqlite_db_path = sqlite_db_path
89 | self.release = release
90 |
91 | if sqlite_db_path:
92 | try:
93 | self.conn = sqlite3.connect(sqlite_db_path)
94 | self.release = sqlite_db_path
95 | except Error as e:
96 | print(e)
97 | else:
98 | try:
99 | release_value = jaspar_releases[release]
100 | try:
101 | self.conn = sqlite3.connect(get_jaspardb_path(release_value))
102 | except Error as e:
103 | print(e)
104 | except KeyError:
105 | print(f"{release} is not available. Available releases are:")
106 | print(self.get_releases())
107 |
108 | def __str__(self):
109 | """Return a string represention of the JASPAR DB SQLite connection."""
110 | return r"JASPAR release:%s:%s" % (self.release, self.conn)
111 |
112 | def get_releases(self):
113 | """Return available JASPAR releases/version.
114 |
115 | Returns:
116 | - A list of JASPAR available releases
117 |
118 | """
119 | releases = []
120 | for key, value in jaspar_releases.items():
121 | #print(key)
122 | releases.append(key)
123 | #print("Available JASPAR releases are: {releases}")
124 | return releases
125 |
126 |
127 | def fetch_motif_by_id(self, id):
128 | """Fetch a single JASPAR motif from the DB by it's JASPAR matrix ID.
129 |
130 | Example id 'MA0001.1'.
131 |
132 | Arguments:
133 | - id - JASPAR matrix ID. This may be a fully specified ID including
134 | the version number (e.g. MA0049.2) or just the base ID (e.g.
135 | MA0049). If only a base ID is provided, the latest version is
136 | returned.
137 |
138 | Returns:
139 | - A Bio.motifs.jaspar.Motif object
140 |
141 | **NOTE:** The perl TFBS module allows you to specify the type of matrix
142 | to return (PFM, PWM, ICM) but matrices are always stored in JASPAR as
143 | PFMs so this does not really belong here. Once a PFM is fetched the
144 | pwm() and pssm() methods can be called to return the normalized and
145 | log-odds matrices.
146 |
147 | """
148 | # separate stable ID and version number
149 | (base_id, version) = jaspar.split_jaspar_id(id)
150 | if not version:
151 | # if ID contains no version portion, fetch the latest version
152 | version = self._fetch_latest_version(base_id)
153 |
154 | # fetch internal JASPAR matrix ID - also a check for validity
155 | int_id = None
156 | if version:
157 | int_id = self._fetch_internal_id(base_id, version)
158 |
159 | # fetch JASPAR motif using internal ID
160 | motif = None
161 | if int_id:
162 | motif = self._fetch_motif_by_internal_id(int_id)
163 |
164 | return motif
165 |
166 | def fetch_motifs_by_name(self, name):
167 | """Fetch a list of JASPAR motifs from a JASPAR DB by the given TF name(s).
168 |
169 | Arguments:
170 | name - a single name or list of names
171 | Returns:
172 | A list of Bio.motifs.jaspar.Motif objects
173 |
174 | Notes:
175 | Names are not guaranteed to be unique. There may be more than one
176 | motif with the same name. Therefore even if name specifies a single
177 | name, a list of motifs is returned. This just calls
178 | self.fetch_motifs(collection = None, tf_name = name).
179 |
180 | This behaviour is different from the TFBS perl module's
181 | get_Matrix_by_name() method which always returns a single matrix,
182 | issuing a warning message and returning the first matrix retrieved
183 | in the case where multiple matrices have the same name.
184 |
185 | """
186 | return self.fetch_motifs(collection=None, tf_name=name)
187 |
188 | def fetch_motifs(
189 | self, collection=JASPAR_DFLT_COLLECTION, tf_name=None, tf_class=None,
190 | tf_family=None, matrix_id=None, tax_group=None, species=None,
191 | pazar_id=None, data_type=None, medline=None, min_ic=0, min_length=0,
192 | min_sites=0, all=False, all_versions=False
193 | ):
194 | """Fetch jaspar.Record (list) of motifs using selection criteria.
195 |
196 | Arguments::
197 |
198 | Except where obvious, all selection criteria arguments may be
199 | specified as a single value or a list of values. Motifs must
200 | meet ALL the specified selection criteria to be returned with
201 | the precedent exceptions noted below.
202 |
203 | all - Takes precedent of all other selection criteria.
204 | Every motif is returned. If 'all_versions' is also
205 | specified, all versions of every motif are returned,
206 | otherwise just the latest version of every motif is
207 | returned.
208 | matrix_id - Takes precedence over all other selection criteria
209 | except 'all'. Only motifs with the given JASPAR
210 | matrix ID(s) are returned. A matrix ID may be
211 | specified as just a base ID or full JASPAR IDs
212 | including version number. If only a base ID is
213 | provided for specific motif(s), then just the latest
214 | version of those motif(s) are returned unless
215 | 'all_versions' is also specified.
216 | collection - Only motifs from the specified JASPAR collection(s)
217 | are returned. NOTE - if not specified, the collection
218 | defaults to CORE for all other selection criteria
219 | except 'all' and 'matrix_id'. To apply the other
220 | selection criteria across all JASPAR collections,
221 | explicitly set collection=None.
222 | tf_name - Only motifs with the given name(s) are returned.
223 | tf_class - Only motifs of the given TF class(es) are returned.
224 | tf_family - Only motifs from the given TF families are returned.
225 | tax_group - Only motifs belonging to the given taxonomic
226 | supergroups are returned (e.g. 'vertebrates',
227 | 'insects', 'nematodes' etc.)
228 | species - Only motifs derived from the given species are
229 | returned. Species are specified as taxonomy IDs.
230 | data_type - Only motifs generated with the given data type (e.g.
231 | ('ChIP-seq', 'PBM', 'SELEX' etc.) are returned.
232 | NOTE - must match exactly as stored in the database.
233 | pazar_id - Only motifs with the given PAZAR TF ID are returned.
234 | medline - Only motifs with the given medline (PubmMed IDs) are
235 | returned.
236 | min_ic - Only motifs whose profile matrices have at least this
237 | information content (specificty) are returned.
238 | min_length - Only motifs whose profiles are of at least this
239 | length are returned.
240 | min_sites - Only motifs compiled from at least these many binding
241 | sites are returned.
242 | all_versions- Unless specified, just the latest version of motifs
243 | determined by the other selection criteria are
244 | returned. Otherwise all versions of the selected
245 | motifs are returned.
246 |
247 | Returns:
248 | - A Bio.motifs.jaspar.Record (list) of motifs.
249 |
250 | """
251 | # Fetch the internal IDs of the motifs using the criteria provided
252 | int_ids = self._fetch_internal_id_list(
253 | collection=collection,
254 | tf_name=tf_name,
255 | tf_class=tf_class,
256 | tf_family=tf_family,
257 | matrix_id=matrix_id,
258 | tax_group=tax_group,
259 | species=species,
260 | pazar_id=pazar_id,
261 | data_type=data_type,
262 | medline=medline,
263 | all=all,
264 | all_versions=all_versions
265 | )
266 |
267 | record = jaspar.Record()
268 |
269 | """
270 | Now further filter motifs returned above based on any specified
271 | matrix specific criteria.
272 | """
273 | for int_id in int_ids:
274 | motif = self._fetch_motif_by_internal_id(int_id)
275 |
276 | # Filter motifs to those with matrix IC greater than min_ic
277 | if min_ic:
278 | if motif.pssm.mean() < min_ic:
279 | continue
280 |
281 | # Filter motifs to those with minimum length of min_length
282 | if min_length:
283 | if motif.length < min_length:
284 | continue
285 |
286 | # XXX We could also supply a max_length filter.
287 |
288 | """
289 | Filter motifs to those composed of at least this many sites.
290 | The perl TFBS module assumes column sums may be different but
291 | this should be strictly enforced here we will ignore this and
292 | just use the first column sum.
293 | """
294 | if min_sites:
295 | num_sites = sum(
296 | motif.counts[nt][0] for nt in motif.alphabet.letters
297 | )
298 | if num_sites < min_sites:
299 | continue
300 |
301 | record.append(motif)
302 |
303 | return record
304 |
305 | def _fetch_latest_version(self, base_id):
306 | """Get the latest version number for the given base_id (PRIVATE)."""
307 | cur = self.conn.cursor()
308 | cur.execute("select VERSION from MATRIX where BASE_id = ? order by VERSION desc limit 1", (base_id,))
309 |
310 | row = cur.fetchone()
311 |
312 | latest = None
313 | if row:
314 | latest = row[0]
315 | else:
316 | warnings.warn(
317 | "Failed to fetch latest version number for JASPAR motif"
318 | f" with base ID '{base_id}'. No JASPAR motif with this"
319 | " base ID appears to exist in the database.",
320 | BiopythonWarning)
321 |
322 | return latest
323 |
324 | def _fetch_internal_id(self, base_id, version):
325 | """Fetch the internal id for a base id + version (PRIVATE).
326 |
327 | Also checks if this combo exists or not.
328 | """
329 | cur = self.conn.cursor()
330 | cur.execute("select id from MATRIX where BASE_id = ? and VERSION = ? COLLATE NOCASE", (base_id, version))
331 |
332 | row = cur.fetchone()
333 |
334 | int_id = None
335 | if row:
336 | int_id = row[0]
337 | else:
338 | warnings.warn(
339 | "Failed to fetch internal database ID for JASPAR motif"
340 | f" with matrix ID '{base_id}.{version}'. No JASPAR motif"
341 | " with this matrix ID appears to exist.",
342 | BiopythonWarning)
343 |
344 | return int_id
345 |
346 | def _fetch_motif_by_internal_id(self, int_id):
347 | """Fetch basic motif information (PRIVATE)."""
348 | cur = self.conn.cursor()
349 | cur.execute("SELECT BASE_ID, VERSION, COLLECTION, NAME FROM MATRIX WHERE ID = ? COLLATE NOCASE", (int_id,))
350 |
351 | row = cur.fetchone()
352 |
353 | # This should never happen as it is an internal method. If it does
354 | # we should probably raise an exception
355 | if not row:
356 | warnings.warn(
357 | f"Could not fetch JASPAR motif with internal ID = {int_id}",
358 | BiopythonWarning)
359 | return None
360 |
361 | base_id = row[0]
362 | version = row[1]
363 | collection = row[2]
364 | name = row[3]
365 |
366 | matrix_id = "".join([base_id, ".", str(version)])
367 |
368 | # fetch the counts matrix
369 | counts = self._fetch_counts_matrix(int_id)
370 |
371 | # Create new JASPAR motif
372 | motif = jaspar.Motif(
373 | matrix_id, name, collection=collection, counts=counts
374 | )
375 |
376 | # fetch species
377 | cur.execute("select TAX_ID from MATRIX_SPECIES where id = ?", (int_id,))
378 | tax_ids = []
379 | rows = cur.fetchall()
380 | for row in rows:
381 | tax_ids.append(row[0])
382 |
383 | # Many JASPAR motifs (especially those not in the CORE collection)
384 | # do not have taxonomy IDs. So this warning would get annoying.
385 | # if not tax_ids:
386 | # warnings.warn("Could not fetch any taxonomy IDs for JASPAR motif"
387 | # " {0}".format(motif.matrix_id), BiopythonWarning)
388 |
389 | motif.species = tax_ids
390 |
391 | # fetch protein accession numbers
392 | cur.execute("select ACC FROM MATRIX_PROTEIN where id = ? COLLATE NOCASE", (int_id,))
393 | accs = []
394 | rows = cur.fetchall()
395 | for row in rows:
396 | accs.append(row[0])
397 |
398 | # Similarly as for taxonomy IDs, it would get annoying to print
399 | # warnings for JASPAR motifs which do not have accession numbers.
400 |
401 | motif.acc = accs
402 |
403 | # fetch remaining annotation as tags from the ANNOTATION table
404 | cur.execute("select TAG, VAL from MATRIX_ANNOTATION where id = ?", (int_id,))
405 |
406 | #Since jaspar 2018 tf_family and tf_class are return as array
407 | tf_family = []
408 | tf_class = []
409 | rows = cur.fetchall()
410 | for row in rows:
411 | attr = row[0]
412 | val = row[1]
413 | if attr == "class":
414 | tf_class.append(val)
415 | elif attr == "family":
416 | tf_family.append(val)
417 | elif attr == "tax_group":
418 | motif.tax_group = val
419 | elif attr == "type":
420 | motif.data_type = val
421 | elif attr == "pazar_tf_id":
422 | motif.pazar_id = val
423 | elif attr == "medline":
424 | motif.medline = val
425 | elif attr == "comment":
426 | motif.comment = val
427 | else:
428 | """
429 | TODO If we were to implement additional abitrary tags
430 | motif.tag(attr, val)
431 | """
432 | pass
433 |
434 | motif.tf_family = tf_family
435 | motif.tf_class = tf_class
436 |
437 | return motif
438 |
439 | def _fetch_counts_matrix(self, int_id):
440 | """Fetch the counts matrix from the JASPAR DB by the internal ID (PRIVATE).
441 |
442 | Returns a Bio.motifs.matrix.GenericPositionMatrix
443 | """
444 | counts = {}
445 | cur = self.conn.cursor()
446 |
447 | for base in "ACGT":
448 | base_counts = []
449 |
450 | cur.execute("SELECT val from MATRIX_DATA WHERE ID = ? AND row = ? ORDER BY col", (int_id, base))
451 |
452 | rows = cur.fetchall()
453 | for row in rows:
454 | base_counts.append(row[0])
455 |
456 | counts[base] = [float(x) for x in base_counts]
457 |
458 | return GenericPositionMatrix("ACGT", counts)
459 |
460 | def _fetch_internal_id_list(
461 | self, collection=JASPAR_DFLT_COLLECTION, tf_name=None, tf_class=None,
462 | tf_family=None, matrix_id=None, tax_group=None, species=None,
463 | pazar_id=None, data_type=None, medline=None, all=False,
464 | all_versions=False
465 | ):
466 | """Fetch list of internal JASPAR motif IDs.
467 |
468 | Fetch a list of internal JASPAR motif IDs based on various passed
469 | parameters which may then be used to fetch the rest of the motif data.
470 |
471 | Caller:
472 | fetch_motifs()
473 |
474 | Arguments:
475 | See arguments sections of fetch_motifs()
476 |
477 | Returns:
478 | A list of internal JASPAR motif IDs which match the given
479 | selection criteria arguments.
480 |
481 |
482 | Build an SQL query based on the selection arguments provided.
483 |
484 | 1: First add table joins and sub-clauses for criteria corresponding to
485 | named fields from the MATRIX and MATRIX_SPECIES tables such as
486 | collection, matrix ID, name, species etc.
487 |
488 | 2: Then add joins/sub-clauses for tag/value parameters from the
489 | MATRIX_ANNOTATION table.
490 |
491 | For the surviving matrices, the responsibility to do matrix-based
492 | feature filtering such as ic, number of sites etc, fall on the
493 | calling fetch_motifs() method.
494 |
495 | """
496 | int_ids = []
497 |
498 | cur = self.conn.cursor()
499 |
500 | """
501 | Special case 1: fetch ALL motifs. Highest priority.
502 | Ignore all other selection arguments.
503 | """
504 | if all:
505 | cur.execute("select ID from MATRIX")
506 | rows = cur.fetchall()
507 |
508 | for row in rows:
509 | int_ids.append(row[0])
510 |
511 | return int_ids
512 |
513 | """
514 | Special case 2: fetch specific motifs by their JASPAR IDs. This
515 | has higher priority than any other except the above 'all' case.
516 | Ignore all other selection arguments.
517 | """
518 | if matrix_id:
519 | """
520 | These might be either stable IDs or stable_ID.version.
521 | If just stable ID and if all_versions == 1, return all versions,
522 | otherwise just the latest
523 | """
524 | if all_versions:
525 | for id in matrix_id:
526 | # ignore vesion here, this is a stupidity filter
527 | (base_id, version) = jaspar.split_jaspar_id(id)
528 | cur.execute("select ID from MATRIX where BASE_ID = ? COLLATE NOCASE", (base_id,))
529 |
530 | rows = cur.fetchall()
531 | for row in rows:
532 | int_ids.append(row[0])
533 | else:
534 | # only the lastest version, or the requested version
535 | for id in matrix_id:
536 | (base_id, version) = jaspar.split_jaspar_id(id)
537 |
538 | if not version:
539 | version = self._fetch_latest_version(base_id)
540 |
541 | int_id = None
542 | if version:
543 | int_id = self._fetch_internal_id(base_id, version)
544 |
545 | if int_id:
546 | int_ids.append(int_id)
547 |
548 | return int_ids
549 |
550 | tables = ["MATRIX m"]
551 | where_clauses = []
552 |
553 | # Select by MATRIX.COLLECTION
554 | if collection:
555 | if isinstance(collection, list):
556 | # Multiple collections passed in as a list
557 | clause = "m.COLLECTION in ('"
558 | clause = "".join([clause, "','".join([c.upper() for c in collection])])
559 | clause = "".join([clause, "')"])
560 | else:
561 | # A single collection - typical usage
562 | clause = "m.COLLECTION = '%s'" % collection.upper()
563 | ##SQLite is case sensitive therefore COLLATE NOCASE is set.
564 | #clause = "%s COLLATE NOCASE" % clause
565 | where_clauses.append(clause)
566 |
567 | # Select by MATRIX.NAME
568 | if tf_name:
569 | if isinstance(tf_name, list):
570 | # Multiple names passed in as a list
571 | clause = "m.NAME in ('"
572 | clause = "".join([clause, "','".join(tf_name)])
573 | clause = "".join([clause, "')"])
574 | else:
575 | # A single name
576 | clause = "m.NAME = '%s'" % tf_name
577 | ##SQLite is case sensitive therefore COLLATE NOCASE is set.
578 | #clause = "%s COLLATE NOCASE" % clause
579 | where_clauses.append(clause)
580 |
581 | # Select by MATRIX_SPECIES.TAX_ID
582 | if species:
583 | tables.append("MATRIX_SPECIES ms")
584 | where_clauses.append("m.ID = ms.ID")
585 |
586 | """
587 | NOTE: species are numeric taxonomy IDs but stored as varchars
588 | in the DB.
589 | """
590 | if isinstance(species, list):
591 | # Multiple tax IDs passed in as a list
592 | clause = "ms.TAX_ID in ('"
593 | clause = "".join([clause, "','".join(str(s) for s in species)])
594 | clause = "".join([clause, "')"])
595 | else:
596 | # A single tax ID
597 | clause = "ms.TAX_ID = '%s'" % str(species)
598 | ##SQLite is case sensitive therefore COLLATE NOCASE is set.
599 | #clause = "%s COLLATE NOCASE" % clause
600 | where_clauses.append(clause)
601 |
602 | """
603 | Tag based selection from MATRIX_ANNOTATION
604 | Differs from perl TFBS module in that the matrix class explicitly
605 | has a tag attribute corresponding to the tags in the database. This
606 | provides tremendous flexibility in adding new tags to the DB and
607 | being able to select based on those tags with out adding new code.
608 | In the JASPAR Motif class we have elected to use specific attributes
609 | for the most commonly used tags and here correspondingly only allow
610 | selection on these attributes.
611 |
612 | The attributes corresponding to the tags for which selection is
613 | provided are:
614 |
615 | Attribute Tag
616 | tf_class class
617 | tf_family family
618 | pazar_id pazar_tf_id
619 | medline medline
620 | data_type type
621 | tax_group tax_group
622 | """
623 |
624 | # Select by TF class(es) (MATRIX_ANNOTATION.TAG="class")
625 | if tf_class:
626 | tables.append("MATRIX_ANNOTATION ma1")
627 | where_clauses.append("m.ID = ma1.ID")
628 |
629 | clause = "ma1.TAG = 'class'"
630 | if isinstance(tf_class, list):
631 | # A list of TF classes
632 | clause = "".join([clause, " and ma1.VAL in ('"])
633 | clause = "".join([clause, "','".join(tf_class)])
634 | clause = "".join([clause, "')"])
635 | else:
636 | # A single TF class
637 | clause = "".join([clause, " and ma1.VAL = '%s' " % tf_class])
638 | ##SQLite is case sensitive therefore COLLATE NOCASE is set.
639 | #clause = "%s COLLATE NOCASE" % clause
640 | where_clauses.append(clause)
641 |
642 | # Select by TF families (MATRIX_ANNOTATION.TAG="family")
643 | if tf_family:
644 | tables.append("MATRIX_ANNOTATION ma2")
645 | where_clauses.append("m.ID = ma2.ID")
646 |
647 | clause = "ma2.TAG = 'family'"
648 | if isinstance(tf_family, list):
649 | # A list of TF families
650 | clause = "".join([clause, " and ma2.VAL in ('"])
651 | clause = "".join([clause, "','".join(tf_family)])
652 | clause = "".join([clause, "')"])
653 | else:
654 | # A single TF family
655 | clause = "".join([clause, " and ma2.VAL = '%s' " % tf_family])
656 | ##SQLite is case sensitive therefore COLLATE NOCASE is set.
657 | #clause = "%s COLLATE NOCASE" % clause
658 | where_clauses.append(clause)
659 |
660 | # Select by PAZAR TF ID(s) (MATRIX_ANNOTATION.TAG="pazar_tf_id")
661 | if pazar_id:
662 | tables.append("MATRIX_ANNOTATION ma3")
663 | where_clauses.append("m.ID = ma3.ID")
664 |
665 | clause = "ma3.TAG = 'pazar_tf_id'"
666 | if isinstance(pazar_id, list):
667 | # A list of PAZAR IDs
668 | clause = "".join([clause, " and ma3.VAL in ('"])
669 | clause = "".join([clause, "','".join(pazar_id)])
670 | clause = "".join([clause, "')"])
671 | else:
672 | # A single PAZAR ID
673 | clause = "".join([" and ma3.VAL = '%s' " % pazar_id])
674 | ##SQLite is case sensitive therefore COLLATE NOCASE is set.
675 | #clause = "%s COLLATE NOCASE" % clause
676 | where_clauses.append(clause)
677 |
678 | # Select by PubMed ID(s) (MATRIX_ANNOTATION.TAG="medline")
679 | if medline:
680 | tables.append("MATRIX_ANNOTATION ma4")
681 | where_clauses.append("m.ID = ma4.ID")
682 |
683 | clause = "ma4.TAG = 'medline'"
684 | if isinstance(medline, list):
685 | # A list of PubMed IDs
686 | clause = "".join([clause, " and ma4.VAL in ('"])
687 | clause = "".join([clause, "','".join(medline)])
688 | clause = "".join([clause, "')"])
689 | else:
690 | # A single PubMed ID
691 | clause = "".join([" and ma4.VAL = '%s' " % medline])
692 | ##SQLite is case sensitive therefore COLLATE NOCASE is set.
693 | #clause = "%s COLLATE NOCASE" % clause
694 | where_clauses.append(clause)
695 |
696 | # Select by data type(s) used to compile the matrix
697 | # (MATRIX_ANNOTATION.TAG="type")
698 | if data_type:
699 | tables.append("MATRIX_ANNOTATION ma5")
700 | where_clauses.append("m.ID = ma5.ID")
701 |
702 | clause = "ma5.TAG = 'type'"
703 | if isinstance(data_type, list):
704 | # A list of data types
705 | clause = "".join([clause, " and ma5.VAL in ('"])
706 | clause = "".join([clause, "','".join(data_type)])
707 | clause = "".join([clause, "')"])
708 | else:
709 | # A single data type
710 | clause = "".join([" and ma5.VAL = '%s' " % data_type])
711 | ##SQLite is case sensitive therefore COLLATE NOCASE is set.
712 | #clause = "%s COLLATE NOCASE" % clause
713 | where_clauses.append(clause)
714 |
715 | # Select by taxonomic supergroup(s) (MATRIX_ANNOTATION.TAG="tax_group")
716 | if tax_group:
717 | tables.append("MATRIX_ANNOTATION ma6")
718 | where_clauses.append("m.ID = ma6.ID")
719 |
720 | clause = "ma6.TAG = 'tax_group'"
721 | if isinstance(tax_group, list):
722 | # A list of tax IDs
723 | clause = "".join([clause, " and ma6.VAL in ('"])
724 | clause = "".join([clause, "','".join([tg.lower() for tg in tax_group])])
725 | clause = "".join([clause, "')"])
726 | else:
727 | # A single tax ID
728 | clause = "".join([clause, " and ma6.VAL = '%s' " % tax_group.lower()])
729 | ##SQLite is case sensitive therefore COLLATE NOCASE is set.
730 | #clause = "%s COLLATE NOCASE" % clause
731 | where_clauses.append(clause)
732 |
733 | sql = "".join(["select distinct(m.ID) from ", ", ".join(tables)])
734 |
735 | if where_clauses:
736 | sql = "".join([sql, " where ", " and ".join(where_clauses)])
737 |
738 | ### SQLite is casesensitivitive
739 | sql = "%s COLLATE NOCASE" % sql
740 | #print(sql)
741 |
742 | cur.execute(sql)
743 | rows = cur.fetchall()
744 |
745 | for row in rows:
746 | id = row[0]
747 | if all_versions:
748 | int_ids.append(id)
749 | else:
750 | # is the latest version?
751 | if self._is_latest_version(id):
752 | int_ids.append(id)
753 |
754 | if len(int_ids) < 1:
755 | warnings.warn("Zero motifs returned with current select critera",
756 | BiopythonWarning)
757 |
758 | return int_ids
759 |
760 | def _is_latest_version(self, int_id):
761 | """Check if the internal ID represents the latest JASPAR matrix (PRIVATE).
762 |
763 | Does this internal ID represent the latest version of the JASPAR
764 | matrix (collapse on base ids)
765 | """
766 | cur = self.conn.cursor()
767 |
768 | cur.execute("select count(*) from MATRIX where "
769 | "BASE_ID = (select BASE_ID from MATRIX where ID = ?) "
770 | "and VERSION > (select VERSION from MATRIX where ID = ?) COLLATE NOCASE",
771 | (int_id, int_id))
772 |
773 | row = cur.fetchone()
774 |
775 | count = row[0]
776 |
777 | if count == 0:
778 | # no matrices with higher version ID and same base id
779 | return True
780 |
781 | return False
782 |
783 | class GenericPositionMatrix(dict):
784 | """Base class for the support of position matrix operations."""
785 |
786 | def __init__(self, alphabet, values):
787 | """Initialize the class."""
788 | self.length = None
789 | for letter in alphabet:
790 | if self.length is None:
791 | self.length = len(values[letter])
792 | elif self.length != len(values[letter]):
793 | raise Exception("data has inconsistent lengths")
794 | self[letter] = list(values[letter])
795 | self.alphabet = alphabet
796 |
797 | def __str__(self):
798 | """Return a string containing nucleotides and counts of the alphabet in the Matrix."""
799 | words = ["%6d" % i for i in range(self.length)]
800 | line = " " + " ".join(words)
801 | lines = [line]
802 | for letter in self.alphabet:
803 | words = ["%6.2f" % value for value in self[letter]]
804 | line = "%c: " % letter + " ".join(words)
805 | lines.append(line)
806 | text = "\n".join(lines) + "\n"
807 | return text
808 |
809 | def __getitem__(self, key):
810 | """Return the position matrix of index key."""
811 | if isinstance(key, tuple):
812 | if len(key) == 2:
813 | key1, key2 = key
814 | if isinstance(key1, slice):
815 | start1, stop1, stride1 = key1.indices(len(self.alphabet))
816 | indices1 = range(start1, stop1, stride1)
817 | letters1 = [self.alphabet[i] for i in indices1]
818 | dim1 = 2
819 | elif isinstance(key1, int):
820 | letter1 = self.alphabet[key1]
821 | dim1 = 1
822 | elif isinstance(key1, tuple):
823 | letters1 = [self.alphabet[i] for i in key1]
824 | dim1 = 2
825 | elif isinstance(key1, str):
826 | if len(key1) == 1:
827 | letter1 = key1
828 | dim1 = 1
829 | else:
830 | raise KeyError(key1)
831 | else:
832 | raise KeyError("Cannot understand key %s", str(key1))
833 | if isinstance(key2, slice):
834 | start2, stop2, stride2 = key2.indices(self.length)
835 | indices2 = range(start2, stop2, stride2)
836 | dim2 = 2
837 | elif isinstance(key2, int):
838 | index2 = key2
839 | dim2 = 1
840 | else:
841 | raise KeyError("Cannot understand key %s", str(key2))
842 | if dim1 == 1 and dim2 == 1:
843 | return dict.__getitem__(self, letter1)[index2]
844 | elif dim1 == 1 and dim2 == 2:
845 | values = dict.__getitem__(self, letter1)
846 | return tuple(values[index2] for index2 in indices2)
847 | elif dim1 == 2 and dim2 == 1:
848 | d = {}
849 | for letter1 in letters1:
850 | d[letter1] = dict.__getitem__(self, letter1)[index2]
851 | return d
852 | else:
853 | d = {}
854 | for letter1 in letters1:
855 | values = dict.__getitem__(self, letter1)
856 | d[letter1] = [values[_] for _ in indices2]
857 | if sorted(letters1) == self.alphabet:
858 | return self.__class__(self.alphabet, d)
859 | else:
860 | return d
861 | elif len(key) == 1:
862 | key = key[0]
863 | else:
864 | raise KeyError("keys should be 1- or 2-dimensional")
865 | if isinstance(key, slice):
866 | start, stop, stride = key.indices(len(self.alphabet))
867 | indices = range(start, stop, stride)
868 | letters = [self.alphabet[i] for i in indices]
869 | dim = 2
870 | elif isinstance(key, int):
871 | letter = self.alphabet[key]
872 | dim = 1
873 | elif isinstance(key, tuple):
874 | letters = [self.alphabet[i] for i in key]
875 | dim = 2
876 | elif isinstance(key, str):
877 | if len(key) == 1:
878 | letter = key
879 | dim = 1
880 | else:
881 | raise KeyError(key)
882 | else:
883 | raise KeyError("Cannot understand key %s", str(key))
884 | if dim == 1:
885 | return dict.__getitem__(self, letter)
886 | elif dim == 2:
887 | d = {}
888 | for letter in letters:
889 | d[letter] = dict.__getitem__(self, letter)
890 | return d
891 | else:
892 | raise RuntimeError("Should not get here")
893 |
894 | @property
895 | def consensus(self):
896 | """Return the consensus sequence."""
897 | sequence = ""
898 | for i in range(self.length):
899 | try:
900 | maximum = float("-inf")
901 | except ValueError:
902 | # On Python 2.5 or older that was handled in C code,
903 | # and failed on Windows XP 32bit
904 | maximum = - 1E400
905 | for letter in self.alphabet:
906 | count = self[letter][i]
907 | if count > maximum:
908 | maximum = count
909 | sequence_letter = letter
910 | sequence += sequence_letter
911 | return Seq(sequence)
912 |
913 | @property
914 | def anticonsensus(self):
915 | """Return the anticonsensus sequence."""
916 | sequence = ""
917 | for i in range(self.length):
918 | try:
919 | minimum = float("inf")
920 | except ValueError:
921 | # On Python 2.5 or older that was handled in C code,
922 | # and failed on Windows XP 32bit
923 | minimum = 1E400
924 | for letter in self.alphabet:
925 | count = self[letter][i]
926 | if count < minimum:
927 | minimum = count
928 | sequence_letter = letter
929 | sequence += sequence_letter
930 | return Seq(sequence)
931 |
932 | @property
933 | def degenerate_consensus(self):
934 | """Return the degenerate consensus sequence."""
935 | # Following the rules adapted from
936 | # D. R. Cavener: "Comparison of the consensus sequence flanking
937 | # translational start sites in Drosophila and vertebrates."
938 | # Nucleic Acids Research 15(4): 1353-1361. (1987).
939 | # The same rules are used by TRANSFAC.
940 | degenerate_nucleotide = {
941 | "A": "A",
942 | "C": "C",
943 | "G": "G",
944 | "T": "T",
945 | "AC": "M",
946 | "AG": "R",
947 | "AT": "W",
948 | "CG": "S",
949 | "CT": "Y",
950 | "GT": "K",
951 | "ACG": "V",
952 | "ACT": "H",
953 | "AGT": "D",
954 | "CGT": "B",
955 | "ACGT": "N",
956 | }
957 | sequence = ""
958 | for i in range(self.length):
959 | def get(nucleotide):
960 | return self[nucleotide][i]
961 | nucleotides = sorted(self, key=get, reverse=True)
962 | counts = [self[c][i] for c in nucleotides]
963 | # Follow the Cavener rules:
964 | if counts[0] > sum(counts[1:]) and counts[0] > 2 * counts[1]:
965 | key = nucleotides[0]
966 | elif 4 * sum(counts[:2]) > 3 * sum(counts):
967 | key = "".join(sorted(nucleotides[:2]))
968 | elif counts[3] == 0:
969 | key = "".join(sorted(nucleotides[:3]))
970 | else:
971 | key = "ACGT"
972 | nucleotide = degenerate_nucleotide.get(key, key)
973 | sequence += nucleotide
974 | return Seq(sequence)
975 |
976 | @property
977 | def gc_content(self):
978 | """Compute the fraction GC content."""
979 | alphabet = self.alphabet
980 | gc_total = 0.0
981 | total = 0.0
982 | for i in range(self.length):
983 | for letter in alphabet:
984 | if letter in "CG":
985 | gc_total += self[letter][i]
986 | total += self[letter][i]
987 | return gc_total / total
988 |
989 | def reverse_complement(self):
990 | """Compute reverse complement."""
991 | values = {}
992 | if self.alphabet == "ACGU":
993 | values["A"] = self["U"][::-1]
994 | values["U"] = self["A"][::-1]
995 | else:
996 | values["A"] = self["T"][::-1]
997 | values["T"] = self["A"][::-1]
998 | values["G"] = self["C"][::-1]
999 | values["C"] = self["G"][::-1]
1000 | alphabet = self.alphabet
1001 | return self.__class__(alphabet, values)
1002 |
--------------------------------------------------------------------------------
/pyjaspar/data/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asntech/pyjaspar/eaa9e8137406918a9a22eaeffdf979d8c98d57b0/pyjaspar/data/.DS_Store
--------------------------------------------------------------------------------
/pyjaspar/data/JASPAR2014.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asntech/pyjaspar/eaa9e8137406918a9a22eaeffdf979d8c98d57b0/pyjaspar/data/JASPAR2014.sqlite
--------------------------------------------------------------------------------
/pyjaspar/data/JASPAR2016.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asntech/pyjaspar/eaa9e8137406918a9a22eaeffdf979d8c98d57b0/pyjaspar/data/JASPAR2016.sqlite
--------------------------------------------------------------------------------
/pyjaspar/data/JASPAR2018.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asntech/pyjaspar/eaa9e8137406918a9a22eaeffdf979d8c98d57b0/pyjaspar/data/JASPAR2018.sqlite
--------------------------------------------------------------------------------
/pyjaspar/data/JASPAR2020.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asntech/pyjaspar/eaa9e8137406918a9a22eaeffdf979d8c98d57b0/pyjaspar/data/JASPAR2020.sqlite
--------------------------------------------------------------------------------
/pyjaspar/data/JASPAR2022.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asntech/pyjaspar/eaa9e8137406918a9a22eaeffdf979d8c98d57b0/pyjaspar/data/JASPAR2022.sqlite
--------------------------------------------------------------------------------
/pyjaspar/data/JASPAR2024.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asntech/pyjaspar/eaa9e8137406918a9a22eaeffdf979d8c98d57b0/pyjaspar/data/JASPAR2024.sqlite
--------------------------------------------------------------------------------
/pyjaspar/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asntech/pyjaspar/eaa9e8137406918a9a22eaeffdf979d8c98d57b0/pyjaspar/data/__init__.py
--------------------------------------------------------------------------------
/pyjaspar/utils.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | def get_jaspardb_path(fn,sub_dir=None):
4 | """
5 | Return a sqlite file from the pyjaspar data directory.
6 | This code is adapted from https://github.com/daler/pybedtools
7 |
8 | """
9 | #print(data_dir())
10 | #sys.exit()
11 | if sub_dir:
12 | fn = os.path.join(data_dir(), sub_dir, fn)
13 | else:
14 | fn = os.path.join(data_dir(), fn)
15 | #print(fn)
16 | if not os.path.exists(fn):
17 | raise ValueError("%s does not exist" % fn)
18 | return fn
19 |
20 |
21 | def data_dir():
22 | """
23 | Returns the data directory that contains sqlite files.
24 | """
25 | #data_path = os.path.dirname(intervene.__file__)
26 | #data_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'example_data')
27 | #print(data_path)
28 | return os.path.join(os.path.dirname(__file__), 'data')
29 |
--------------------------------------------------------------------------------
/pyjaspar_notebook.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# pyJASPAR Notebook"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "Once you have installed pyJASPAR, you can load the module and connect to the latest release of JASPAR."
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 22,
20 | "metadata": {},
21 | "outputs": [],
22 | "source": [
23 | "from pyjaspar import jaspardb"
24 | ]
25 | },
26 | {
27 | "cell_type": "markdown",
28 | "metadata": {},
29 | "source": [
30 | "Connect to the version of JASPAR you're interested in. This will return jaspardb class object.\n",
31 | "For example here we're getting the JASPAR2020."
32 | ]
33 | },
34 | {
35 | "cell_type": "code",
36 | "execution_count": 23,
37 | "metadata": {},
38 | "outputs": [],
39 | "source": [
40 | "jdb_obj = jaspardb(release='JASPAR2024')"
41 | ]
42 | },
43 | {
44 | "cell_type": "markdown",
45 | "metadata": {},
46 | "source": [
47 | "You can also check JASPAR version you are connected to using:"
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": 24,
53 | "metadata": {
54 | "scrolled": true
55 | },
56 | "outputs": [
57 | {
58 | "name": "stdout",
59 | "output_type": "stream",
60 | "text": [
61 | "JASPAR2024\n"
62 | ]
63 | }
64 | ],
65 | "source": [
66 | "print(jdb_obj.release)"
67 | ]
68 | },
69 | {
70 | "cell_type": "markdown",
71 | "metadata": {},
72 | "source": [
73 | "By default it is set to latest release/version of JASPAR database. For example."
74 | ]
75 | },
76 | {
77 | "cell_type": "code",
78 | "execution_count": 25,
79 | "metadata": {},
80 | "outputs": [
81 | {
82 | "name": "stdout",
83 | "output_type": "stream",
84 | "text": [
85 | "JASPAR2024\n"
86 | ]
87 | }
88 | ],
89 | "source": [
90 | "jdb_obj = jaspardb()\n",
91 | "print(jdb_obj.release)"
92 | ]
93 | },
94 | {
95 | "cell_type": "markdown",
96 | "metadata": {},
97 | "source": [
98 | "### Get available releases\n",
99 | "You can find the available releases/version of JASPAR using."
100 | ]
101 | },
102 | {
103 | "cell_type": "code",
104 | "execution_count": 26,
105 | "metadata": {},
106 | "outputs": [
107 | {
108 | "name": "stdout",
109 | "output_type": "stream",
110 | "text": [
111 | "['JASPAR2024', 'JASPAR2022', 'JASPAR2020', 'JASPAR2018', 'JASPAR2016', 'JASPAR2014']\n"
112 | ]
113 | }
114 | ],
115 | "source": [
116 | "print(jdb_obj.get_releases())"
117 | ]
118 | },
119 | {
120 | "cell_type": "markdown",
121 | "metadata": {},
122 | "source": [
123 | "### Get motif by using JASPAR ID\n",
124 | "If you want to get the motif details for a specific TF using the JASPAR ID. If you skip the version of motif, it will return the latest version. "
125 | ]
126 | },
127 | {
128 | "cell_type": "code",
129 | "execution_count": 27,
130 | "metadata": {},
131 | "outputs": [],
132 | "source": [
133 | "motif = jdb_obj.fetch_motif_by_id('MA0006.1')"
134 | ]
135 | },
136 | {
137 | "cell_type": "markdown",
138 | "metadata": {},
139 | "source": [
140 | "Printing the motif will all the associated meta-information stored in the JASPAR database cluding the matric counts."
141 | ]
142 | },
143 | {
144 | "cell_type": "code",
145 | "execution_count": 28,
146 | "metadata": {},
147 | "outputs": [
148 | {
149 | "name": "stdout",
150 | "output_type": "stream",
151 | "text": [
152 | "TF name\tAhr::Arnt\n",
153 | "Matrix ID\tMA0006.1\n",
154 | "Collection\tCORE\n",
155 | "TF class\t['Basic helix-loop-helix factors (bHLH)', 'Basic helix-loop-helix factors (bHLH)']\n",
156 | "TF family\t['PAS domain factors', 'PAS domain factors']\n",
157 | "Species\t10090\n",
158 | "Taxonomic group\tvertebrates\n",
159 | "Accession\t['P30561', 'P53762']\n",
160 | "Data type used\tSELEX\n",
161 | "Medline\t7592839\n",
162 | "Comments\tdimer\n",
163 | "Matrix:\n",
164 | " 0 1 2 3 4 5\n",
165 | "A: 3.00 0.00 0.00 0.00 0.00 0.00\n",
166 | "C: 8.00 0.00 23.00 0.00 0.00 0.00\n",
167 | "G: 2.00 23.00 0.00 23.00 0.00 24.00\n",
168 | "T: 11.00 1.00 1.00 1.00 24.00 0.00\n",
169 | "\n",
170 | "\n",
171 | "\n"
172 | ]
173 | }
174 | ],
175 | "source": [
176 | "print(motif)"
177 | ]
178 | },
179 | {
180 | "cell_type": "markdown",
181 | "metadata": {},
182 | "source": [
183 | "Get the count matrix using `.counts`"
184 | ]
185 | },
186 | {
187 | "cell_type": "code",
188 | "execution_count": 29,
189 | "metadata": {
190 | "scrolled": true
191 | },
192 | "outputs": [
193 | {
194 | "name": "stdout",
195 | "output_type": "stream",
196 | "text": [
197 | "[3.0, 0.0, 0.0, 0.0, 0.0, 0.0]\n"
198 | ]
199 | }
200 | ],
201 | "source": [
202 | "print(motif.counts['A'])"
203 | ]
204 | },
205 | {
206 | "cell_type": "markdown",
207 | "metadata": {},
208 | "source": [
209 | "### Search motifs by TF name\n",
210 | "You can use the `fetch_motifs_by_name` function to find motifs by TF name. This method returns a list of motifs for the same TF name across taxonomic group. For example, below search will return two CTCF motifs one in vertebrates and another in plants taxon."
211 | ]
212 | },
213 | {
214 | "cell_type": "code",
215 | "execution_count": 12,
216 | "metadata": {},
217 | "outputs": [],
218 | "source": [
219 | "motifs = jdb_obj.fetch_motifs_by_name(\"CTCF\")"
220 | ]
221 | },
222 | {
223 | "cell_type": "code",
224 | "execution_count": 13,
225 | "metadata": {},
226 | "outputs": [
227 | {
228 | "name": "stdout",
229 | "output_type": "stream",
230 | "text": [
231 | "4\n"
232 | ]
233 | }
234 | ],
235 | "source": [
236 | "print(len(motifs))"
237 | ]
238 | },
239 | {
240 | "cell_type": "code",
241 | "execution_count": 14,
242 | "metadata": {},
243 | "outputs": [
244 | {
245 | "name": "stdout",
246 | "output_type": "stream",
247 | "text": [
248 | "TF name\tCTCF\n",
249 | "Matrix ID\tMA0531.2\n",
250 | "Collection\tCORE\n",
251 | "TF class\t['C2H2 zinc finger factors']\n",
252 | "TF family\t['More than 3 adjacent zinc fingers']\n",
253 | "Species\t7227\n",
254 | "Taxonomic group\tinsects\n",
255 | "Accession\t['Q9VS55']\n",
256 | "Data type used\tChIP-chip\n",
257 | "Medline\t17616980\n",
258 | "Matrix:\n",
259 | " 0 1 2 3 4 5 6 7 8 9\n",
260 | "A: 257.00 1534.00 202.00 987.00 2.00 0.00 2.00 124.00 1.00 79.00\n",
261 | "C: 714.00 1.00 0.00 0.00 4.00 0.00 0.00 1645.00 0.00 1514.00\n",
262 | "G: 87.00 192.00 1700.00 912.00 311.00 1902.00 1652.00 3.00 1807.00 8.00\n",
263 | "T: 844.00 175.00 0.00 3.00 1585.00 0.00 248.00 130.00 94.00 301.00\n",
264 | "\n",
265 | "\n",
266 | "\n",
267 | "TF name\tCTCF\n",
268 | "Matrix ID\tMA0139.2\n",
269 | "Collection\tCORE\n",
270 | "TF class\t['C2H2 zinc finger factors']\n",
271 | "TF family\t['More than 3 adjacent zinc fingers']\n",
272 | "Species\t9606\n",
273 | "Taxonomic group\tvertebrates\n",
274 | "Accession\t['P49711']\n",
275 | "Data type used\tChIP-seq\n",
276 | "Medline\t17512414\n",
277 | "Comments\tTF has several motif variants.\n",
278 | "Matrix:\n",
279 | " 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14\n",
280 | "A: 281.00 56.00 8.00 744.00 40.00 107.00 851.00 5.00 333.00 54.00 12.00 56.00 104.00 372.00 82.00\n",
281 | "C: 49.00 800.00 903.00 13.00 528.00 433.00 11.00 0.00 3.00 12.00 0.00 8.00 733.00 13.00 482.00\n",
282 | "G: 449.00 21.00 0.00 65.00 334.00 48.00 32.00 903.00 566.00 504.00 890.00 775.00 5.00 507.00 307.00\n",
283 | "T: 134.00 36.00 2.00 91.00 11.00 324.00 18.00 3.00 9.00 341.00 8.00 71.00 67.00 17.00 37.00\n",
284 | "\n",
285 | "\n",
286 | "\n",
287 | "TF name\tCTCF\n",
288 | "Matrix ID\tMA1929.2\n",
289 | "Collection\tCORE\n",
290 | "TF class\t['C2H2 zinc finger factors']\n",
291 | "TF family\t['More than 3 adjacent zinc fingers']\n",
292 | "Species\t9606\n",
293 | "Taxonomic group\tvertebrates\n",
294 | "Accession\t['P49711']\n",
295 | "Data type used\tChIP-seq\n",
296 | "Medline\t34326481\n",
297 | "Comments\tTF has several motif variants. Extended motif with zinc finger 8 (5bp)\n",
298 | "Matrix:\n",
299 | " 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30\n",
300 | "A: 905.00 914.00 163.00 287.00 4559.00 1658.00 592.00 1133.00 720.00 775.00 2055.00 1473.00 1639.00 1959.00 1569.00 827.00 1483.00 1649.00 730.00 224.00 4283.00 433.00 1251.00 5212.00 210.00 2494.00 495.00 232.00 347.00 1034.00 2737.00\n",
301 | "C: 3839.00 405.00 132.00 5468.00 627.00 1155.00 986.00 777.00 3973.00 3913.00 1851.00 1915.00 2308.00 1412.00 1431.00 2470.00 1392.00 791.00 5068.00 6321.00 357.00 3551.00 2748.00 244.00 114.00 86.00 254.00 76.00 381.00 4715.00 466.00\n",
302 | "G: 1235.00 319.00 6252.00 241.00 390.00 3322.00 571.00 1773.00 764.00 544.00 1141.00 2079.00 1441.00 1429.00 2192.00 942.00 2269.00 3152.00 518.00 79.00 935.00 2483.00 618.00 787.00 6387.00 4085.00 3487.00 6326.00 5406.00 246.00 3084.00\n",
303 | "T: 787.00 5128.00 219.00 770.00 1190.00 631.00 4617.00 3083.00 1309.00 1534.00 1719.00 1299.00 1378.00 1966.00 1574.00 2527.00 1622.00 1174.00 450.00 142.00 1191.00 299.00 2149.00 523.00 55.00 101.00 2530.00 132.00 632.00 771.00 479.00\n",
304 | "\n",
305 | "\n",
306 | "\n",
307 | "TF name\tCTCF\n",
308 | "Matrix ID\tMA1930.2\n",
309 | "Collection\tCORE\n",
310 | "TF class\t['C2H2 zinc finger factors']\n",
311 | "TF family\t['More than 3 adjacent zinc fingers']\n",
312 | "Species\t9606\n",
313 | "Taxonomic group\tvertebrates\n",
314 | "Accession\t['P49711']\n",
315 | "Data type used\tChIP-seq\n",
316 | "Medline\t34326481\n",
317 | "Comments\tTF has several motif variants. Extended motif with zinc finger 8 (6bp)\n",
318 | "Matrix:\n",
319 | " 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32\n",
320 | "A: 293.00 242.00 41.00 86.00 1986.00 445.00 158.00 346.00 453.00 497.00 714.00 536.00 467.00 664.00 563.00 593.00 452.00 560.00 604.00 178.00 38.00 1906.00 127.00 348.00 2179.00 32.00 1140.00 169.00 62.00 119.00 335.00 1021.00 279.00\n",
321 | "C: 1497.00 107.00 35.00 2129.00 127.00 366.00 278.00 324.00 1333.00 1129.00 794.00 855.00 1138.00 604.00 730.00 902.00 781.00 458.00 300.00 2074.00 2515.00 97.00 1409.00 1016.00 54.00 9.00 14.00 71.00 14.00 138.00 1833.00 164.00 1307.00\n",
322 | "G: 400.00 144.00 2476.00 25.00 155.00 1456.00 179.00 892.00 328.00 419.00 623.00 770.00 561.00 376.00 541.00 494.00 393.00 1041.00 1242.00 155.00 9.00 275.00 961.00 175.00 212.00 2527.00 1399.00 1208.00 2472.00 2117.00 86.00 1161.00 740.00\n",
323 | "T: 386.00 2083.00 24.00 336.00 308.00 309.00 1961.00 1014.00 462.00 531.00 445.00 415.00 410.00 932.00 742.00 587.00 950.00 517.00 430.00 169.00 14.00 298.00 79.00 1037.00 131.00 8.00 23.00 1128.00 28.00 202.00 322.00 230.00 250.00\n",
324 | "\n",
325 | "\n",
326 | "\n"
327 | ]
328 | }
329 | ],
330 | "source": [
331 | "print(motifs)"
332 | ]
333 | },
334 | {
335 | "cell_type": "markdown",
336 | "metadata": {},
337 | "source": [
338 | "### Search motifs with \n",
339 | "A more commonly used function is `fetch_motifs` helps you to get motifs which match a specified set of criteria.\n",
340 | "You can query the database based on the available meta-information in the database.\n",
341 | "\n",
342 | "For example, here we are gettting the widely used CORE collection for vertebrates. It returns a list of non-redundent motifs. "
343 | ]
344 | },
345 | {
346 | "cell_type": "code",
347 | "execution_count": 15,
348 | "metadata": {},
349 | "outputs": [],
350 | "source": [
351 | "motifs = jdb_obj.fetch_motifs(\n",
352 | "collection = ['CORE'],\n",
353 | "tax_group = ['Vertebrates'],\n",
354 | "all_versions = False,\n",
355 | ")"
356 | ]
357 | },
358 | {
359 | "cell_type": "code",
360 | "execution_count": 16,
361 | "metadata": {},
362 | "outputs": [
363 | {
364 | "name": "stdout",
365 | "output_type": "stream",
366 | "text": [
367 | "879\n"
368 | ]
369 | }
370 | ],
371 | "source": [
372 | "print(len(motifs))"
373 | ]
374 | },
375 | {
376 | "cell_type": "code",
377 | "execution_count": 25,
378 | "metadata": {},
379 | "outputs": [],
380 | "source": [
381 | "for motif in motifs:\n",
382 | " #print(motif.matrix_id)\n",
383 | " pass # do something with the motif"
384 | ]
385 | },
386 | {
387 | "cell_type": "markdown",
388 | "metadata": {},
389 | "source": [
390 | "Get the number of non-redundent motifs from CORE collection per-release."
391 | ]
392 | },
393 | {
394 | "cell_type": "code",
395 | "execution_count": 17,
396 | "metadata": {},
397 | "outputs": [
398 | {
399 | "name": "stdout",
400 | "output_type": "stream",
401 | "text": [
402 | "JASPAR2024\n",
403 | "2346\n",
404 | "JASPAR2022\n",
405 | "1956\n",
406 | "JASPAR2020\n",
407 | "1646\n",
408 | "JASPAR2018\n",
409 | "1404\n",
410 | "JASPAR2016\n",
411 | "1082\n",
412 | "JASPAR2014\n",
413 | "593\n"
414 | ]
415 | }
416 | ],
417 | "source": [
418 | "for release in jdb_obj.get_releases():\n",
419 | " print(release)\n",
420 | " jdb_obj = jaspardb(release=release)\n",
421 | " motifs = jdb_obj.fetch_motifs(\n",
422 | " collection = [\"CORE\"],\n",
423 | " all_versions = False,\n",
424 | " #species = '10090' # this is the mouse tax ID\n",
425 | " )\n",
426 | " print(len(motifs))"
427 | ]
428 | },
429 | {
430 | "cell_type": "code",
431 | "execution_count": null,
432 | "metadata": {},
433 | "outputs": [],
434 | "source": []
435 | }
436 | ],
437 | "metadata": {
438 | "kernelspec": {
439 | "display_name": "Python 3",
440 | "language": "python",
441 | "name": "python3"
442 | },
443 | "language_info": {
444 | "codemirror_mode": {
445 | "name": "ipython",
446 | "version": 3
447 | },
448 | "file_extension": ".py",
449 | "mimetype": "text/x-python",
450 | "name": "python",
451 | "nbconvert_exporter": "python",
452 | "pygments_lexer": "ipython3",
453 | "version": "3.8.6"
454 | }
455 | },
456 | "nbformat": 4,
457 | "nbformat_minor": 2
458 | }
459 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | wheel
2 | biopython
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | """
4 | This is a setup script for pyJASPAR: a tool for DNA sequence background generation
5 |
6 | This code is free software; you can redistribute it and/or modify it under the terms of the
7 | BSD License (see the file LICENSE included with the distribution).
8 |
9 | @author: Aziz Khan
10 | @email: azez.khan@gmail.com
11 | """
12 | import os
13 | from distutils.core import setup
14 | from setuptools import find_packages
15 | #from pyjaspar import __version__ as VERSION
16 | import codecs
17 |
18 | CLASSIFIERS = [
19 | 'Intended Audience :: Developers',
20 | 'Intended Audience :: Science/Research',
21 | 'License :: OSI Approved :: MIT License',
22 | 'Operating System :: OS Independent',
23 | 'Programming Language :: Python',
24 | 'Programming Language :: Python :: 3.6',
25 | 'Programming Language :: Python :: 3.7',
26 | 'Programming Language :: Python :: 3.8',
27 | 'Topic :: Scientific/Engineering :: Bio-Informatics',
28 | 'Topic :: Software Development :: Libraries :: Python Modules',
29 | ]
30 |
31 | install_requires = [
32 | 'wheel',
33 | 'biopython',
34 | ]
35 |
36 |
37 | def read(rel_path):
38 | here = os.path.abspath(os.path.dirname(__file__))
39 | with codecs.open(os.path.join(here, rel_path), 'r') as fp:
40 | return fp.read()
41 |
42 | def get_version(rel_path):
43 | for line in read(rel_path).splitlines():
44 | if line.startswith('__version__'):
45 | delim = '"' if '"' in line else "'"
46 | return line.split(delim)[1]
47 | else:
48 | raise RuntimeError("Unable to find version string.")
49 |
50 |
51 | #def readme():
52 | # with open('README.rst') as f:
53 | # return f.read()
54 |
55 | def readme(fname):
56 | return open(os.path.join(os.path.dirname(__file__), fname)).read()
57 |
58 | setup(
59 | name="pyjaspar",
60 | description="A serverless interface to Biopython to access different versions of JASPAR database",
61 | version=get_version("pyjaspar/__init__.py"),
62 | author="Aziz Khan",
63 | license='GPL',
64 | platforms='linux/unix',
65 | author_email="azez.khan@gmail.com",
66 | url="https://github.com/asntech/pyjaspar",
67 | long_description=readme("README.rst"),
68 | long_description_content_type='text/x-rst',
69 | package_dir={'pyjaspar': 'pyjaspar'},
70 |
71 | packages=['pyjaspar',
72 | 'pyjaspar.data'
73 | ],
74 |
75 | package_data={'pyjaspar': ['pyjaspar/data/*.sqlite',]},
76 | include_package_data=True,
77 | install_requires = install_requires,
78 | classifiers=CLASSIFIERS,
79 | )
80 |
--------------------------------------------------------------------------------