├── .gitignore
├── LICENSE
├── README.md
├── cdk_pywrapper
├── __init__.py
├── cdk
│ └── cdk_bridge.java
├── cdk_pywrapper.py
├── chemlib.py
├── config.py
└── tests
│ ├── __init__.py
│ └── cdk_pywrapper_test.py
├── requirements.txt
├── setup.cfg
├── setup.py
└── version.txt
/.gitignore:
--------------------------------------------------------------------------------
1 | # Created by .ignore support plugin (hsz.mobi)
2 | ### Python template
3 | # Byte-compiled / optimized / DLL files
4 | __pycache__/
5 | *.py[cod]
6 | *$py.class
7 |
8 | # C extensions
9 | *.so
10 |
11 | # Distribution / packaging
12 | .Python
13 | env/
14 | venv/
15 | venv2/
16 | build/
17 | develop-eggs/
18 | dist/
19 | downloads/
20 | eggs/
21 | .eggs/
22 | lib/
23 | lib64/
24 | parts/
25 | sdist/
26 | var/
27 | *.egg-info/
28 | .installed.cfg
29 | *.egg
30 |
31 | # PyInstaller
32 | # Usually these files are written by a python script from a template
33 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
34 | *.manifest
35 | *.spec
36 |
37 | # Installer logs
38 | pip-log.txt
39 | pip-delete-this-directory.txt
40 |
41 | # Unit test / coverage reports
42 | htmlcov/
43 | .tox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *,cover
50 | .hypothesis/
51 |
52 | # Translations
53 | *.mo
54 | *.pot
55 |
56 | # Django stuff:
57 | *.log
58 | local_settings.py
59 |
60 | # Flask stuff:
61 | instance/
62 | .webassets-cache
63 |
64 | # Scrapy stuff:
65 | .scrapy
66 |
67 | # Sphinx documentation
68 | docs/_build/
69 |
70 | # PyBuilder
71 | target/
72 |
73 | # IPython Notebook
74 | .ipynb_checkpoints
75 |
76 | # pyenv
77 | .python-version
78 |
79 | # celery beat schedule file
80 | celerybeat-schedule
81 |
82 | # dotenv
83 | .env
84 |
85 | # virtualenv
86 | venv/
87 | ENV/
88 |
89 | # Spyder project settings
90 | .spyderproject
91 |
92 | # Rope project settings
93 | .ropeproject
94 |
95 | .gitignore
96 | .idea/
97 | VERSION
98 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU AFFERO GENERAL PUBLIC LICENSE
2 | Version 3, 19 November 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 | Preamble
9 |
10 | The GNU Affero General Public License is a free, copyleft license for
11 | software and other kinds of works, specifically designed to ensure
12 | cooperation with the community in the case of network server software.
13 |
14 | The licenses for most software and other practical works are designed
15 | to take away your freedom to share and change the works. By contrast,
16 | our General Public Licenses are intended to guarantee your freedom to
17 | share and change all versions of a program--to make sure it remains free
18 | software for all its users.
19 |
20 | When we speak of free software, we are referring to freedom, not
21 | price. Our General Public Licenses are designed to make sure that you
22 | have the freedom to distribute copies of free software (and charge for
23 | them if you wish), that you receive source code or can get it if you
24 | want it, that you can change the software or use pieces of it in new
25 | free programs, and that you know you can do these things.
26 |
27 | Developers that use our General Public Licenses protect your rights
28 | with two steps: (1) assert copyright on the software, and (2) offer
29 | you this License which gives you legal permission to copy, distribute
30 | and/or modify the software.
31 |
32 | A secondary benefit of defending all users' freedom is that
33 | improvements made in alternate versions of the program, if they
34 | receive widespread use, become available for other developers to
35 | incorporate. Many developers of free software are heartened and
36 | encouraged by the resulting cooperation. However, in the case of
37 | software used on network servers, this result may fail to come about.
38 | The GNU General Public License permits making a modified version and
39 | letting the public access it on a server without ever releasing its
40 | source code to the public.
41 |
42 | The GNU Affero General Public License is designed specifically to
43 | ensure that, in such cases, the modified source code becomes available
44 | to the community. It requires the operator of a network server to
45 | provide the source code of the modified version running there to the
46 | users of that server. Therefore, public use of a modified version, on
47 | a publicly accessible server, gives the public access to the source
48 | code of the modified version.
49 |
50 | An older license, called the Affero General Public License and
51 | published by Affero, was designed to accomplish similar goals. This is
52 | a different license, not a version of the Affero GPL, but Affero has
53 | released a new version of the Affero GPL which permits relicensing under
54 | this license.
55 |
56 | The precise terms and conditions for copying, distribution and
57 | modification follow.
58 |
59 | TERMS AND CONDITIONS
60 |
61 | 0. Definitions.
62 |
63 | "This License" refers to version 3 of the GNU Affero General Public License.
64 |
65 | "Copyright" also means copyright-like laws that apply to other kinds of
66 | works, such as semiconductor masks.
67 |
68 | "The Program" refers to any copyrightable work licensed under this
69 | License. Each licensee is addressed as "you". "Licensees" and
70 | "recipients" may be individuals or organizations.
71 |
72 | To "modify" a work means to copy from or adapt all or part of the work
73 | in a fashion requiring copyright permission, other than the making of an
74 | exact copy. The resulting work is called a "modified version" of the
75 | earlier work or a work "based on" the earlier work.
76 |
77 | A "covered work" means either the unmodified Program or a work based
78 | on the Program.
79 |
80 | To "propagate" a work means to do anything with it that, without
81 | permission, would make you directly or secondarily liable for
82 | infringement under applicable copyright law, except executing it on a
83 | computer or modifying a private copy. Propagation includes copying,
84 | distribution (with or without modification), making available to the
85 | public, and in some countries other activities as well.
86 |
87 | To "convey" a work means any kind of propagation that enables other
88 | parties to make or receive copies. Mere interaction with a user through
89 | a computer network, with no transfer of a copy, is not conveying.
90 |
91 | An interactive user interface displays "Appropriate Legal Notices"
92 | to the extent that it includes a convenient and prominently visible
93 | feature that (1) displays an appropriate copyright notice, and (2)
94 | tells the user that there is no warranty for the work (except to the
95 | extent that warranties are provided), that licensees may convey the
96 | work under this License, and how to view a copy of this License. If
97 | the interface presents a list of user commands or options, such as a
98 | menu, a prominent item in the list meets this criterion.
99 |
100 | 1. Source Code.
101 |
102 | The "source code" for a work means the preferred form of the work
103 | for making modifications to it. "Object code" means any non-source
104 | form of a work.
105 |
106 | A "Standard Interface" means an interface that either is an official
107 | standard defined by a recognized standards body, or, in the case of
108 | interfaces specified for a particular programming language, one that
109 | is widely used among developers working in that language.
110 |
111 | The "System Libraries" of an executable work include anything, other
112 | than the work as a whole, that (a) is included in the normal form of
113 | packaging a Major Component, but which is not part of that Major
114 | Component, and (b) serves only to enable use of the work with that
115 | Major Component, or to implement a Standard Interface for which an
116 | implementation is available to the public in source code form. A
117 | "Major Component", in this context, means a major essential component
118 | (kernel, window system, and so on) of the specific operating system
119 | (if any) on which the executable work runs, or a compiler used to
120 | produce the work, or an object code interpreter used to run it.
121 |
122 | The "Corresponding Source" for a work in object code form means all
123 | the source code needed to generate, install, and (for an executable
124 | work) run the object code and to modify the work, including scripts to
125 | control those activities. However, it does not include the work's
126 | System Libraries, or general-purpose tools or generally available free
127 | programs which are used unmodified in performing those activities but
128 | which are not part of the work. For example, Corresponding Source
129 | includes interface definition files associated with source files for
130 | the work, and the source code for shared libraries and dynamically
131 | linked subprograms that the work is specifically designed to require,
132 | such as by intimate data communication or control flow between those
133 | subprograms and other parts of the work.
134 |
135 | The Corresponding Source need not include anything that users
136 | can regenerate automatically from other parts of the Corresponding
137 | Source.
138 |
139 | The Corresponding Source for a work in source code form is that
140 | same work.
141 |
142 | 2. Basic Permissions.
143 |
144 | All rights granted under this License are granted for the term of
145 | copyright on the Program, and are irrevocable provided the stated
146 | conditions are met. This License explicitly affirms your unlimited
147 | permission to run the unmodified Program. The output from running a
148 | covered work is covered by this License only if the output, given its
149 | content, constitutes a covered work. This License acknowledges your
150 | rights of fair use or other equivalent, as provided by copyright law.
151 |
152 | You may make, run and propagate covered works that you do not
153 | convey, without conditions so long as your license otherwise remains
154 | in force. You may convey covered works to others for the sole purpose
155 | of having them make modifications exclusively for you, or provide you
156 | with facilities for running those works, provided that you comply with
157 | the terms of this License in conveying all material for which you do
158 | not control copyright. Those thus making or running the covered works
159 | for you must do so exclusively on your behalf, under your direction
160 | and control, on terms that prohibit them from making any copies of
161 | your copyrighted material outside their relationship with you.
162 |
163 | Conveying under any other circumstances is permitted solely under
164 | the conditions stated below. Sublicensing is not allowed; section 10
165 | makes it unnecessary.
166 |
167 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
168 |
169 | No covered work shall be deemed part of an effective technological
170 | measure under any applicable law fulfilling obligations under article
171 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
172 | similar laws prohibiting or restricting circumvention of such
173 | measures.
174 |
175 | When you convey a covered work, you waive any legal power to forbid
176 | circumvention of technological measures to the extent such circumvention
177 | is effected by exercising rights under this License with respect to
178 | the covered work, and you disclaim any intention to limit operation or
179 | modification of the work as a means of enforcing, against the work's
180 | users, your or third parties' legal rights to forbid circumvention of
181 | technological measures.
182 |
183 | 4. Conveying Verbatim Copies.
184 |
185 | You may convey verbatim copies of the Program's source code as you
186 | receive it, in any medium, provided that you conspicuously and
187 | appropriately publish on each copy an appropriate copyright notice;
188 | keep intact all notices stating that this License and any
189 | non-permissive terms added in accord with section 7 apply to the code;
190 | keep intact all notices of the absence of any warranty; and give all
191 | recipients a copy of this License along with the Program.
192 |
193 | You may charge any price or no price for each copy that you convey,
194 | and you may offer support or warranty protection for a fee.
195 |
196 | 5. Conveying Modified Source Versions.
197 |
198 | You may convey a work based on the Program, or the modifications to
199 | produce it from the Program, in the form of source code under the
200 | terms of section 4, provided that you also meet all of these conditions:
201 |
202 | a) The work must carry prominent notices stating that you modified
203 | it, and giving a relevant date.
204 |
205 | b) The work must carry prominent notices stating that it is
206 | released under this License and any conditions added under section
207 | 7. This requirement modifies the requirement in section 4 to
208 | "keep intact all notices".
209 |
210 | c) You must license the entire work, as a whole, under this
211 | License to anyone who comes into possession of a copy. This
212 | License will therefore apply, along with any applicable section 7
213 | additional terms, to the whole of the work, and all its parts,
214 | regardless of how they are packaged. This License gives no
215 | permission to license the work in any other way, but it does not
216 | invalidate such permission if you have separately received it.
217 |
218 | d) If the work has interactive user interfaces, each must display
219 | Appropriate Legal Notices; however, if the Program has interactive
220 | interfaces that do not display Appropriate Legal Notices, your
221 | work need not make them do so.
222 |
223 | A compilation of a covered work with other separate and independent
224 | works, which are not by their nature extensions of the covered work,
225 | and which are not combined with it such as to form a larger program,
226 | in or on a volume of a storage or distribution medium, is called an
227 | "aggregate" if the compilation and its resulting copyright are not
228 | used to limit the access or legal rights of the compilation's users
229 | beyond what the individual works permit. Inclusion of a covered work
230 | in an aggregate does not cause this License to apply to the other
231 | parts of the aggregate.
232 |
233 | 6. Conveying Non-Source Forms.
234 |
235 | You may convey a covered work in object code form under the terms
236 | of sections 4 and 5, provided that you also convey the
237 | machine-readable Corresponding Source under the terms of this License,
238 | in one of these ways:
239 |
240 | a) Convey the object code in, or embodied in, a physical product
241 | (including a physical distribution medium), accompanied by the
242 | Corresponding Source fixed on a durable physical medium
243 | customarily used for software interchange.
244 |
245 | b) Convey the object code in, or embodied in, a physical product
246 | (including a physical distribution medium), accompanied by a
247 | written offer, valid for at least three years and valid for as
248 | long as you offer spare parts or customer support for that product
249 | model, to give anyone who possesses the object code either (1) a
250 | copy of the Corresponding Source for all the software in the
251 | product that is covered by this License, on a durable physical
252 | medium customarily used for software interchange, for a price no
253 | more than your reasonable cost of physically performing this
254 | conveying of source, or (2) access to copy the
255 | Corresponding Source from a network server at no charge.
256 |
257 | c) Convey individual copies of the object code with a copy of the
258 | written offer to provide the Corresponding Source. This
259 | alternative is allowed only occasionally and noncommercially, and
260 | only if you received the object code with such an offer, in accord
261 | with subsection 6b.
262 |
263 | d) Convey the object code by offering access from a designated
264 | place (gratis or for a charge), and offer equivalent access to the
265 | Corresponding Source in the same way through the same place at no
266 | further charge. You need not require recipients to copy the
267 | Corresponding Source along with the object code. If the place to
268 | copy the object code is a network server, the Corresponding Source
269 | may be on a different server (operated by you or a third party)
270 | that supports equivalent copying facilities, provided you maintain
271 | clear directions next to the object code saying where to find the
272 | Corresponding Source. Regardless of what server hosts the
273 | Corresponding Source, you remain obligated to ensure that it is
274 | available for as long as needed to satisfy these requirements.
275 |
276 | e) Convey the object code using peer-to-peer transmission, provided
277 | you inform other peers where the object code and Corresponding
278 | Source of the work are being offered to the general public at no
279 | charge under subsection 6d.
280 |
281 | A separable portion of the object code, whose source code is excluded
282 | from the Corresponding Source as a System Library, need not be
283 | included in conveying the object code work.
284 |
285 | A "User Product" is either (1) a "consumer product", which means any
286 | tangible personal property which is normally used for personal, family,
287 | or household purposes, or (2) anything designed or sold for incorporation
288 | into a dwelling. In determining whether a product is a consumer product,
289 | doubtful cases shall be resolved in favor of coverage. For a particular
290 | product received by a particular user, "normally used" refers to a
291 | typical or common use of that class of product, regardless of the status
292 | of the particular user or of the way in which the particular user
293 | actually uses, or expects or is expected to use, the product. A product
294 | is a consumer product regardless of whether the product has substantial
295 | commercial, industrial or non-consumer uses, unless such uses represent
296 | the only significant mode of use of the product.
297 |
298 | "Installation Information" for a User Product means any methods,
299 | procedures, authorization keys, or other information required to install
300 | and execute modified versions of a covered work in that User Product from
301 | a modified version of its Corresponding Source. The information must
302 | suffice to ensure that the continued functioning of the modified object
303 | code is in no case prevented or interfered with solely because
304 | modification has been made.
305 |
306 | If you convey an object code work under this section in, or with, or
307 | specifically for use in, a User Product, and the conveying occurs as
308 | part of a transaction in which the right of possession and use of the
309 | User Product is transferred to the recipient in perpetuity or for a
310 | fixed term (regardless of how the transaction is characterized), the
311 | Corresponding Source conveyed under this section must be accompanied
312 | by the Installation Information. But this requirement does not apply
313 | if neither you nor any third party retains the ability to install
314 | modified object code on the User Product (for example, the work has
315 | been installed in ROM).
316 |
317 | The requirement to provide Installation Information does not include a
318 | requirement to continue to provide support service, warranty, or updates
319 | for a work that has been modified or installed by the recipient, or for
320 | the User Product in which it has been modified or installed. Access to a
321 | network may be denied when the modification itself materially and
322 | adversely affects the operation of the network or violates the rules and
323 | protocols for communication across the network.
324 |
325 | Corresponding Source conveyed, and Installation Information provided,
326 | in accord with this section must be in a format that is publicly
327 | documented (and with an implementation available to the public in
328 | source code form), and must require no special password or key for
329 | unpacking, reading or copying.
330 |
331 | 7. Additional Terms.
332 |
333 | "Additional permissions" are terms that supplement the terms of this
334 | License by making exceptions from one or more of its conditions.
335 | Additional permissions that are applicable to the entire Program shall
336 | be treated as though they were included in this License, to the extent
337 | that they are valid under applicable law. If additional permissions
338 | apply only to part of the Program, that part may be used separately
339 | under those permissions, but the entire Program remains governed by
340 | this License without regard to the additional permissions.
341 |
342 | When you convey a copy of a covered work, you may at your option
343 | remove any additional permissions from that copy, or from any part of
344 | it. (Additional permissions may be written to require their own
345 | removal in certain cases when you modify the work.) You may place
346 | additional permissions on material, added by you to a covered work,
347 | for which you have or can give appropriate copyright permission.
348 |
349 | Notwithstanding any other provision of this License, for material you
350 | add to a covered work, you may (if authorized by the copyright holders of
351 | that material) supplement the terms of this License with terms:
352 |
353 | a) Disclaiming warranty or limiting liability differently from the
354 | terms of sections 15 and 16 of this License; or
355 |
356 | b) Requiring preservation of specified reasonable legal notices or
357 | author attributions in that material or in the Appropriate Legal
358 | Notices displayed by works containing it; or
359 |
360 | c) Prohibiting misrepresentation of the origin of that material, or
361 | requiring that modified versions of such material be marked in
362 | reasonable ways as different from the original version; or
363 |
364 | d) Limiting the use for publicity purposes of names of licensors or
365 | authors of the material; or
366 |
367 | e) Declining to grant rights under trademark law for use of some
368 | trade names, trademarks, or service marks; or
369 |
370 | f) Requiring indemnification of licensors and authors of that
371 | material by anyone who conveys the material (or modified versions of
372 | it) with contractual assumptions of liability to the recipient, for
373 | any liability that these contractual assumptions directly impose on
374 | those licensors and authors.
375 |
376 | All other non-permissive additional terms are considered "further
377 | restrictions" within the meaning of section 10. If the Program as you
378 | received it, or any part of it, contains a notice stating that it is
379 | governed by this License along with a term that is a further
380 | restriction, you may remove that term. If a license document contains
381 | a further restriction but permits relicensing or conveying under this
382 | License, you may add to a covered work material governed by the terms
383 | of that license document, provided that the further restriction does
384 | not survive such relicensing or conveying.
385 |
386 | If you add terms to a covered work in accord with this section, you
387 | must place, in the relevant source files, a statement of the
388 | additional terms that apply to those files, or a notice indicating
389 | where to find the applicable terms.
390 |
391 | Additional terms, permissive or non-permissive, may be stated in the
392 | form of a separately written license, or stated as exceptions;
393 | the above requirements apply either way.
394 |
395 | 8. Termination.
396 |
397 | You may not propagate or modify a covered work except as expressly
398 | provided under this License. Any attempt otherwise to propagate or
399 | modify it is void, and will automatically terminate your rights under
400 | this License (including any patent licenses granted under the third
401 | paragraph of section 11).
402 |
403 | However, if you cease all violation of this License, then your
404 | license from a particular copyright holder is reinstated (a)
405 | provisionally, unless and until the copyright holder explicitly and
406 | finally terminates your license, and (b) permanently, if the copyright
407 | holder fails to notify you of the violation by some reasonable means
408 | prior to 60 days after the cessation.
409 |
410 | Moreover, your license from a particular copyright holder is
411 | reinstated permanently if the copyright holder notifies you of the
412 | violation by some reasonable means, this is the first time you have
413 | received notice of violation of this License (for any work) from that
414 | copyright holder, and you cure the violation prior to 30 days after
415 | your receipt of the notice.
416 |
417 | Termination of your rights under this section does not terminate the
418 | licenses of parties who have received copies or rights from you under
419 | this License. If your rights have been terminated and not permanently
420 | reinstated, you do not qualify to receive new licenses for the same
421 | material under section 10.
422 |
423 | 9. Acceptance Not Required for Having Copies.
424 |
425 | You are not required to accept this License in order to receive or
426 | run a copy of the Program. Ancillary propagation of a covered work
427 | occurring solely as a consequence of using peer-to-peer transmission
428 | to receive a copy likewise does not require acceptance. However,
429 | nothing other than this License grants you permission to propagate or
430 | modify any covered work. These actions infringe copyright if you do
431 | not accept this License. Therefore, by modifying or propagating a
432 | covered work, you indicate your acceptance of this License to do so.
433 |
434 | 10. Automatic Licensing of Downstream Recipients.
435 |
436 | Each time you convey a covered work, the recipient automatically
437 | receives a license from the original licensors, to run, modify and
438 | propagate that work, subject to this License. You are not responsible
439 | for enforcing compliance by third parties with this License.
440 |
441 | An "entity transaction" is a transaction transferring control of an
442 | organization, or substantially all assets of one, or subdividing an
443 | organization, or merging organizations. If propagation of a covered
444 | work results from an entity transaction, each party to that
445 | transaction who receives a copy of the work also receives whatever
446 | licenses to the work the party's predecessor in interest had or could
447 | give under the previous paragraph, plus a right to possession of the
448 | Corresponding Source of the work from the predecessor in interest, if
449 | the predecessor has it or can get it with reasonable efforts.
450 |
451 | You may not impose any further restrictions on the exercise of the
452 | rights granted or affirmed under this License. For example, you may
453 | not impose a license fee, royalty, or other charge for exercise of
454 | rights granted under this License, and you may not initiate litigation
455 | (including a cross-claim or counterclaim in a lawsuit) alleging that
456 | any patent claim is infringed by making, using, selling, offering for
457 | sale, or importing the Program or any portion of it.
458 |
459 | 11. Patents.
460 |
461 | A "contributor" is a copyright holder who authorizes use under this
462 | License of the Program or a work on which the Program is based. The
463 | work thus licensed is called the contributor's "contributor version".
464 |
465 | A contributor's "essential patent claims" are all patent claims
466 | owned or controlled by the contributor, whether already acquired or
467 | hereafter acquired, that would be infringed by some manner, permitted
468 | by this License, of making, using, or selling its contributor version,
469 | but do not include claims that would be infringed only as a
470 | consequence of further modification of the contributor version. For
471 | purposes of this definition, "control" includes the right to grant
472 | patent sublicenses in a manner consistent with the requirements of
473 | this License.
474 |
475 | Each contributor grants you a non-exclusive, worldwide, royalty-free
476 | patent license under the contributor's essential patent claims, to
477 | make, use, sell, offer for sale, import and otherwise run, modify and
478 | propagate the contents of its contributor version.
479 |
480 | In the following three paragraphs, a "patent license" is any express
481 | agreement or commitment, however denominated, not to enforce a patent
482 | (such as an express permission to practice a patent or covenant not to
483 | sue for patent infringement). To "grant" such a patent license to a
484 | party means to make such an agreement or commitment not to enforce a
485 | patent against the party.
486 |
487 | If you convey a covered work, knowingly relying on a patent license,
488 | and the Corresponding Source of the work is not available for anyone
489 | to copy, free of charge and under the terms of this License, through a
490 | publicly available network server or other readily accessible means,
491 | then you must either (1) cause the Corresponding Source to be so
492 | available, or (2) arrange to deprive yourself of the benefit of the
493 | patent license for this particular work, or (3) arrange, in a manner
494 | consistent with the requirements of this License, to extend the patent
495 | license to downstream recipients. "Knowingly relying" means you have
496 | actual knowledge that, but for the patent license, your conveying the
497 | covered work in a country, or your recipient's use of the covered work
498 | in a country, would infringe one or more identifiable patents in that
499 | country that you have reason to believe are valid.
500 |
501 | If, pursuant to or in connection with a single transaction or
502 | arrangement, you convey, or propagate by procuring conveyance of, a
503 | covered work, and grant a patent license to some of the parties
504 | receiving the covered work authorizing them to use, propagate, modify
505 | or convey a specific copy of the covered work, then the patent license
506 | you grant is automatically extended to all recipients of the covered
507 | work and works based on it.
508 |
509 | A patent license is "discriminatory" if it does not include within
510 | the scope of its coverage, prohibits the exercise of, or is
511 | conditioned on the non-exercise of one or more of the rights that are
512 | specifically granted under this License. You may not convey a covered
513 | work if you are a party to an arrangement with a third party that is
514 | in the business of distributing software, under which you make payment
515 | to the third party based on the extent of your activity of conveying
516 | the work, and under which the third party grants, to any of the
517 | parties who would receive the covered work from you, a discriminatory
518 | patent license (a) in connection with copies of the covered work
519 | conveyed by you (or copies made from those copies), or (b) primarily
520 | for and in connection with specific products or compilations that
521 | contain the covered work, unless you entered into that arrangement,
522 | or that patent license was granted, prior to 28 March 2007.
523 |
524 | Nothing in this License shall be construed as excluding or limiting
525 | any implied license or other defenses to infringement that may
526 | otherwise be available to you under applicable patent law.
527 |
528 | 12. No Surrender of Others' Freedom.
529 |
530 | If conditions are imposed on you (whether by court order, agreement or
531 | otherwise) that contradict the conditions of this License, they do not
532 | excuse you from the conditions of this License. If you cannot convey a
533 | covered work so as to satisfy simultaneously your obligations under this
534 | License and any other pertinent obligations, then as a consequence you may
535 | not convey it at all. For example, if you agree to terms that obligate you
536 | to collect a royalty for further conveying from those to whom you convey
537 | the Program, the only way you could satisfy both those terms and this
538 | License would be to refrain entirely from conveying the Program.
539 |
540 | 13. Remote Network Interaction; Use with the GNU General Public License.
541 |
542 | Notwithstanding any other provision of this License, if you modify the
543 | Program, your modified version must prominently offer all users
544 | interacting with it remotely through a computer network (if your version
545 | supports such interaction) an opportunity to receive the Corresponding
546 | Source of your version by providing access to the Corresponding Source
547 | from a network server at no charge, through some standard or customary
548 | means of facilitating copying of software. This Corresponding Source
549 | shall include the Corresponding Source for any work covered by version 3
550 | of the GNU General Public License that is incorporated pursuant to the
551 | following paragraph.
552 |
553 | Notwithstanding any other provision of this License, you have
554 | permission to link or combine any covered work with a work licensed
555 | under version 3 of the GNU General Public License into a single
556 | combined work, and to convey the resulting work. The terms of this
557 | License will continue to apply to the part which is the covered work,
558 | but the work with which it is combined will remain governed by version
559 | 3 of the GNU General Public License.
560 |
561 | 14. Revised Versions of this License.
562 |
563 | The Free Software Foundation may publish revised and/or new versions of
564 | the GNU Affero General Public License from time to time. Such new versions
565 | will be similar in spirit to the present version, but may differ in detail to
566 | address new problems or concerns.
567 |
568 | Each version is given a distinguishing version number. If the
569 | Program specifies that a certain numbered version of the GNU Affero General
570 | Public License "or any later version" applies to it, you have the
571 | option of following the terms and conditions either of that numbered
572 | version or of any later version published by the Free Software
573 | Foundation. If the Program does not specify a version number of the
574 | GNU Affero General Public License, you may choose any version ever published
575 | by the Free Software Foundation.
576 |
577 | If the Program specifies that a proxy can decide which future
578 | versions of the GNU Affero General Public License can be used, that proxy's
579 | public statement of acceptance of a version permanently authorizes you
580 | to choose that version for the Program.
581 |
582 | Later license versions may give you additional or different
583 | permissions. However, no additional obligations are imposed on any
584 | author or copyright holder as a result of your choosing to follow a
585 | later version.
586 |
587 | 15. Disclaimer of Warranty.
588 |
589 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
590 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
591 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
592 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
593 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
594 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
595 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
596 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
597 |
598 | 16. Limitation of Liability.
599 |
600 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
601 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
602 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
603 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
604 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
605 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
606 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
607 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
608 | SUCH DAMAGES.
609 |
610 | 17. Interpretation of Sections 15 and 16.
611 |
612 | If the disclaimer of warranty and limitation of liability provided
613 | above cannot be given local legal effect according to their terms,
614 | reviewing courts shall apply local law that most closely approximates
615 | an absolute waiver of all civil liability in connection with the
616 | Program, unless a warranty or assumption of liability accompanies a
617 | copy of the Program in return for a fee.
618 |
619 | END OF TERMS AND CONDITIONS
620 |
621 | How to Apply These Terms to Your New Programs
622 |
623 | If you develop a new program, and you want it to be of the greatest
624 | possible use to the public, the best way to achieve this is to make it
625 | free software which everyone can redistribute and change under these terms.
626 |
627 | To do so, attach the following notices to the program. It is safest
628 | to attach them to the start of each source file to most effectively
629 | state the exclusion of warranty; and each file should have at least
630 | the "copyright" line and a pointer to where the full notice is found.
631 |
632 |
633 | Copyright (C)
634 |
635 | This program is free software: you can redistribute it and/or modify
636 | it under the terms of the GNU Affero General Public License as published
637 | by the Free Software Foundation, either version 3 of the License, or
638 | (at your option) any later version.
639 |
640 | This program is distributed in the hope that it will be useful,
641 | but WITHOUT ANY WARRANTY; without even the implied warranty of
642 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
643 | GNU Affero General Public License for more details.
644 |
645 | You should have received a copy of the GNU Affero General Public License
646 | along with this program. If not, see .
647 |
648 | Also add information on how to contact you by electronic and paper mail.
649 |
650 | If your software can interact with users remotely through a computer
651 | network, you should also make sure that it provides a way for users to
652 | get its source. For example, if your program is a web application, its
653 | interface could display a "Source" link that leads users to an archive
654 | of the code. There are many ways you could offer source, and different
655 | solutions will be better for different programs; see section 13 for the
656 | specific requirements.
657 |
658 | You should also get your employer (if you work as a programmer) or school,
659 | if any, to sign a "copyright disclaimer" for the program, if necessary.
660 | For more information on this, and how to apply and follow the GNU AGPL, see
661 | .
662 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Python Wrapper for the Chemistry Development kit
2 |
3 | ### tl;dr
4 | * A Python wrapper for the CDK (which is written in Java)
5 | * Primary purpose:
6 | * Generate diverse chemical compound identifiers (SMILES, InChI)
7 | * Inter-convert between these identifiers
8 | * Fully compatible to Python 3.x
9 |
10 | ### Motivation
11 | The chemistry world only has a small number of open tools, e.g. [OpenBabel](http://openbabel.org) and the
12 | [Chemistry Development Kit](cdk.sourceforge.net) ([github](https://github.com/cdk)).
13 |
14 | I have been using OpenBabel for some time now, and it is a great tool offering many options,
15 | I found several issues which make it hard to use:
16 | * Generating InChI (keys) from SMILES often either does not work or struggles with stereochemistry.
17 | * InChI cannot be used as input format.
18 |
19 | ### Installation
20 |
21 | ```bash
22 | git clone https://github.com/sebotic/cdk_pywrapper.git
23 | cd cdk_pywrapper
24 |
25 | pip install .
26 |
27 | ```
28 |
29 | This will install the package on your local system, it will download the CDK and it will build the cdk_bridge.java.
30 | So after that, cdk_pywrapper should be ready to use, like in the example below.
31 |
32 | Don't forget to use e.g. sudo for global installation or pip3 for Python 3.
33 |
34 | I will also host this on pypi soon, so no repo cloning will be required. I have tested it on Linux and MacOS, not sure if it would work on Windows.
35 |
36 | ### Example
37 |
38 | ```python
39 | from cdk_pywrapper.cdk_pywrapper import Compound
40 |
41 | smiles = 'CCN1C2=CC=CC=C2SC1=CC=CC=CC3=[N+](C4=CC=CC=C4S3)CC.[I-]'
42 | cmpnd = Compound(compound_string=smiles, identifier_type='smiles')
43 | ikey = cmpnd.get_inchi_key()
44 | print(ikey)
45 |
46 | ```
47 | Output: 'MNQDKWZEUULFPX-UHFFFAOYSA-M'
48 |
49 |
50 |
--------------------------------------------------------------------------------
/cdk_pywrapper/__init__.py:
--------------------------------------------------------------------------------
1 | # import cdk_pywrapper.cdk_pywrapper
2 | # import cdk_pywrapper.config
3 |
4 | gw = 'twest'
5 |
6 | ade = 'maose'
--------------------------------------------------------------------------------
/cdk_pywrapper/cdk/cdk_bridge.java:
--------------------------------------------------------------------------------
1 | /*
2 | * A py4j bridge for the CDK
3 | * Also has a class for substructure search and SVG xml generation
4 | * Copyright 2018 Sebastian Burgstaller-Muehlbacher
5 | * Licensed under AGPLv3
6 | */
7 |
8 | import py4j.GatewayServer;
9 | import org.openscience.cdk.*;
10 | import org.openscience.cdk.DefaultChemObjectBuilder;
11 | import org.openscience.cdk.interfaces.IChemObjectBuilder;
12 | import org.openscience.cdk.interfaces.IAtomContainer;
13 | import org.openscience.cdk.interfaces.IChemObject;
14 |
15 | import org.openscience.cdk.smiles.SmilesParser;
16 | import org.openscience.cdk.smiles.SmiFlavor;
17 | import org.openscience.cdk.smiles.SmilesGenerator;
18 |
19 | import org.openscience.cdk.exception.InvalidSmilesException;
20 | import org.openscience.cdk.exception.CDKException;
21 | import org.openscience.cdk.smiles.smarts.SmartsPattern;
22 | import org.openscience.cdk.isomorphism.Pattern;
23 | import org.openscience.cdk.isomorphism.Mappings;
24 | import org.openscience.cdk.depict.DepictionGenerator;
25 |
26 | import org.openscience.cdk.aromaticity.Aromaticity;
27 | import org.openscience.cdk.aromaticity.ElectronDonation;
28 | import org.openscience.cdk.graph.Cycles;
29 | import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;
30 |
31 | import java.util.*;
32 | import java.io.IOException;
33 | import java.awt.Color;
34 | import java.util.concurrent.ConcurrentHashMap;
35 |
36 | class CDKBridge {
37 |
38 | public static void main(String[] args) {
39 | CDKBridge app = new CDKBridge();
40 | GatewayServer server = new GatewayServer(app);
41 | server.start();
42 | System.out.println("Server process started sucessfully");
43 | }
44 | }
45 |
46 | class SearchHandler {
47 |
48 | ConcurrentHashMap moleculeContainers;
49 | Pattern pattern;
50 | int totalCount = 0;
51 |
52 | public SearchHandler() {
53 | this.moleculeContainers = new ConcurrentHashMap();
54 | // this.buildSubstructureIndex(molecules);
55 | }
56 |
57 |
58 |
59 | public String getSVG(IAtomContainer c, Iterable substructures) {
60 | Color color = Color.orange;
61 |
62 | DepictionGenerator dg = new DepictionGenerator()
63 | .withHighlight(substructures, color)
64 | .withAtomColors()
65 | .withOuterGlowHighlight(4.0);
66 |
67 | try {
68 | return dg.depict(c).toSvgStr();
69 |
70 | } catch (CDKException e) {
71 | System.err.println(e.getMessage());
72 | return "";
73 | }
74 | }
75 |
76 | public ArrayList searchPattern(String p, HashMap mols) {
77 |
78 | ConcurrentHashMap molecules = new ConcurrentHashMap<>(mols);
79 | IChemObjectBuilder builder = DefaultChemObjectBuilder.getInstance();
80 | SmilesParser parser = new SmilesParser(builder);
81 |
82 |
83 | try {
84 | IAtomContainer patternMol = parser.parseSmiles(p);
85 | Aromaticity aromaticity = new Aromaticity(ElectronDonation.daylight(),
86 | Cycles.all());
87 |
88 | aromaticity.apply(patternMol);
89 |
90 | patternMol = AtomContainerManipulator.copyAndSuppressedHydrogens(patternMol);
91 |
92 | SmilesGenerator sg = new SmilesGenerator(SmiFlavor.UseAromaticSymbols);
93 | p = sg.create(patternMol);
94 |
95 |
96 | } catch (InvalidSmilesException e) {
97 | System.err.println(e.getMessage());
98 | } catch (CDKException e) {
99 |
100 | }
101 |
102 | try {
103 | this.pattern = SmartsPattern.create(p);
104 | } catch (IOException e) {
105 | System.err.println(e.getMessage());
106 |
107 | return new ArrayList();
108 | }
109 |
110 | ConcurrentHashMap ma = new ConcurrentHashMap<>();
111 |
112 | molecules.forEach(1, (k, v) -> {
113 | if (this.totalCount < 200) {
114 | try {
115 |
116 | IAtomContainer ac = parser.parseSmiles(v);
117 |
118 | Mappings mappings = pattern.matchAll(ac);
119 | int match_count = mappings.countUnique();
120 |
121 | if (match_count > 0) {
122 | Iterable substructures = mappings.toChemObjects();
123 | String svg = this.getSVG(ac, substructures);
124 |
125 | ArrayList tmp = new ArrayList(3);
126 | tmp.add(0, k);
127 | tmp.add(1, String.valueOf(match_count));
128 | tmp.add(2, svg);
129 | ma.put(k, tmp);
130 |
131 | this.totalCount += 1;
132 | }
133 |
134 |
135 | } catch (InvalidSmilesException e) {
136 | System.err.println(e.getMessage());
137 | }
138 | }
139 | }
140 | );
141 |
142 |
143 | ArrayList matches = new ArrayList(500);
144 | for (Map.Entry entry : ma.entrySet()) {
145 | matches.add(entry.getValue());
146 |
147 | }
148 |
149 | // try {
150 | // Pattern pattern = SmartsPattern.create(p);
151 | //
152 | // int totalCount = 0;
153 | // for (Map.Entry entry : moleculeContainers.entrySet()) {
154 | // String key = entry.getKey();
155 | // IAtomContainer ac = entry.getValue();
156 | // Mappings mappings = pattern.matchAll(ac);
157 | // int match_count = mappings.countUnique();
158 | //
159 | // if (match_count > 0) {
160 | // Iterable substructures = mappings.toChemObjects();
161 | // String svg = this.getSVG(ac, substructures);
162 | //
163 | // ArrayList tmp = new ArrayList(3);
164 | // tmp.add(0, key);
165 | // tmp.add(1, String.valueOf(match_count));
166 | // tmp.add(2, svg);
167 | // matches.add(tmp);
168 | //
169 | // totalCount += 1;
170 | // }
171 | //
172 | // if (totalCount > 200) {
173 | // return matches;
174 | // }
175 | // }
176 | //
177 | // } catch (IOException e) {
178 | // System.err.println(e.getMessage());
179 | // }
180 |
181 | return matches;
182 | }
183 | }
184 |
185 |
--------------------------------------------------------------------------------
/cdk_pywrapper/cdk_pywrapper.py:
--------------------------------------------------------------------------------
1 | import subprocess
2 | import sys
3 | import time
4 | import os
5 | import atexit
6 | import platform
7 | import copy
8 | import psutil
9 |
10 | import py4j
11 | from py4j.java_gateway import JavaGateway, GatewayParameters
12 | from py4j.java_collections import SetConverter, MapConverter, ListConverter
13 | from py4j.protocol import Py4JJavaError
14 |
15 | # import cdk_pywrapper.config as config
16 | import cdk_pywrapper
17 | print(cdk_pywrapper.__path__)
18 |
19 | # make sure host paths are set correctly,
20 | # TODO: test if this can reasonably be replace by finding full path using 'which' shell command
21 | host_os = platform.system()
22 | ps_path = 'ps'
23 | java_path = 'java'
24 | grep_path = 'grep'
25 |
26 | cp_sep = ':'
27 |
28 | if host_os == 'Darwin':
29 | cdk_path = os.path.join(*cdk_pywrapper.__path__[0].split('/')[:-4])
30 | cdk_jar_path = os.path.join('/', cdk_path, 'share', 'cdk')
31 |
32 | py4j_path = os.path.join(*py4j.__path__[0].split('/')[:-4])
33 | py4j_jar_path = os.path.join('/', py4j_path, 'share', 'py4j', 'py4j' + py4j.__version__ + '.jar')
34 |
35 | ps_path = '/bin/ps'
36 | java_path = '/usr/bin/java'
37 | grep_path = '/usr/bin/grep'
38 | elif host_os == 'Linux':
39 | cdk_path = os.path.join(*cdk_pywrapper.__path__[0].split('/')[:-4])
40 | cdk_jar_path = os.path.join('/', cdk_path, 'share', 'cdk')
41 |
42 | py4j_path = os.path.join(*py4j.__path__[0].split('/')[:-4])
43 | py4j_jar_path = os.path.join('/', py4j_path, 'share', 'py4j', 'py4j' + py4j.__version__ + '.jar')
44 |
45 | ps_path = '/usr/bin/ps'
46 | java_path = '/usr/bin/java'
47 | grep_path = '/usr/bin/grep'
48 | elif host_os == 'Windows':
49 | cp_sep = ';'
50 | drive, path = os.path.splitdrive(cdk_pywrapper.__path__[0])
51 | cdk_path = os.path.join(drive + '\\', *path.split('\\')[:-3])
52 | cdk_jar_path = os.path.join(cdk_path, 'share', 'cdk')
53 |
54 | drive, path = os.path.splitdrive(py4j.__path__[0])
55 | py4j_path = os.path.join(drive + '\\', *path.split('\\')[:-3])
56 | py4j_jar_path = os.path.join(py4j_path, 'share', 'py4j', 'py4j' + py4j.__version__ + '.jar')
57 |
58 | print(cdk_path)
59 | print(cdk_jar_path)
60 | print(py4j_path)
61 | print(py4j_jar_path)
62 |
63 | # from py4j.clientserver import ClientServer, JavaParameters, PythonParameters
64 |
65 | # set dev classpaths
66 | if not __debug__:
67 | cdk_jar_path = os.path.join('.', 'cdk')
68 |
69 | __author__ = 'Sebastian Burgstaller-Muehlbacher'
70 | __license__ = 'AGPLv3'
71 | __copyright__ = 'Sebastian Burgstaller-Muehlbacher'
72 |
73 |
74 | server_process_running = False
75 | # with subprocess.Popen(['{} aux | {} CDK'.format(ps_path, grep_path)], shell=True, stdout=subprocess.PIPE) as proc:
76 | # line = proc.stdout.read()
77 | # print(line)
78 | # if 'CDKBridge' in str(line):
79 | # print('process running')
80 | # server_process_running = True
81 |
82 | for proc in psutil.process_iter():
83 | pinfo = proc.as_dict(attrs=['pid', 'name', 'username', 'cmdline'])
84 | if 'cmdline' in pinfo and pinfo['cmdline'] and 'CDKBridge' in pinfo['cmdline']:
85 | server_process_running = True
86 | print('Server process already running:', server_process_running)
87 |
88 |
89 | # if not any([True if 'CDKBridge' in p.cmdline() else False for p in psutil.process_iter()]):
90 | if not server_process_running:
91 | # compile and start py4j server
92 | # print(os.getcwd())
93 | # subprocess.check_call(["javac -cp '{}:.{}' ../cdk/cdk_bridge.java".format(py4j_path, cdk_path)], shell=True)
94 |
95 | # subprocess.check_call(["javac -cp '{}:{}' ../cdk_pywrapper/cdk/cdk_bridge.java".format(py4j_jar_path,
96 | # '../cdk_pywrapper/cdk/cdk-2.1.1.jar')], shell=True)
97 | # # print('compiled sucessfully')
98 | print('starting server process')
99 | # p = subprocess.Popen(['{} -cp {};{};{}\\ CDKBridge'.format(java_path, py4j_jar_path,
100 | # os.path.join(cdk_jar_path, 'cdk-2.2.jar'),
101 | # cdk_jar_path)], shell=True)
102 |
103 | if host_os == 'Linux' or host_os == 'Darwin':
104 | p = subprocess.Popen([java_path +
105 | ' -cp ' +
106 | ' {}:{}:{} '.format(py4j_jar_path,
107 | os.path.join(cdk_jar_path, 'cdk-2.2.jar'),
108 | cdk_jar_path) +
109 | ' CDKBridge'],
110 | shell=True)
111 |
112 | elif host_os == 'Windows':
113 | p = subprocess.Popen([java_path,
114 | '-cp',
115 | '{}{}{}{}{}\\'.format(py4j_jar_path,
116 | cp_sep,
117 | os.path.join(cdk_jar_path, 'cdk-2.2.jar'),
118 | cp_sep,
119 | cdk_jar_path),
120 | 'CDKBridge'],
121 | shell=True)
122 |
123 | # wait 5 sec to start up JVM and server
124 | time.sleep(5)
125 |
126 | # connect to the JVM
127 | # gateway = JavaGateway(gateway_parameters=GatewayParameters(auto_convert=True))
128 | # gateway = ClientServer(
129 | # java_parameters=JavaParameters(),
130 | # python_parameters=PythonParameters())
131 |
132 |
133 | # shorten paths
134 | # cdk = gateway.jvm.org.openscience.cdk
135 | # java = gateway.jvm.java
136 | # javax = gateway.jvm.javax
137 |
138 | # map exceptions
139 | # InvalidSmilesException = cdk.exception.InvalidSmilesException
140 | # CDKException = cdk.exception.CDKException
141 | # NullPointerException = java.lang.NullPointerException
142 |
143 | gateway = JavaGateway(gateway_parameters=GatewayParameters(auto_convert=True))
144 |
145 |
146 | # make sure the Java gateway server is shut down at exit of Python, but don't shut down if it has already been running
147 | @atexit.register
148 | def cleanup_gateway():
149 | if not server_process_running:
150 | gateway.shutdown()
151 |
152 |
153 | def search_substructure(pattern, molecules):
154 | if host_os == 'Linux' or host_os == 'Darwin':
155 | g = JavaGateway.launch_gateway(classpath="{}:{}:{}/".format(py4j_jar_path,
156 | os.path.join(cdk_jar_path, 'cdk-2.2.jar'),
157 | cdk_jar_path), java_path=java_path)
158 | elif host_os == 'Windows':
159 | g = JavaGateway.launch_gateway(classpath="{};{};{}\\".format(py4j_jar_path,
160 | os.path.join(cdk_jar_path, 'cdk-2.2.jar'),
161 | cdk_jar_path), java_path='java')
162 |
163 | # search_handler = g.jvm.SearchHandler(MapConverter().convert(molecules, g._gateway_client))
164 | search_handler = g.jvm.SearchHandler()
165 |
166 | matches = search_handler.searchPattern(pattern, MapConverter().convert(molecules, g._gateway_client))
167 |
168 | results = copy.deepcopy([{'id': copy.deepcopy(str(compound_id)), 'match_count': copy.deepcopy(int(match_count)),
169 | 'svg': copy.deepcopy(str(svg))}
170 | for compound_id, match_count, svg in matches])
171 | g.shutdown()
172 | return results
173 |
174 |
175 | class Compound(object):
176 | def __init__(self, compound_string, identifier_type, suppress_hydrogens=False, add_explicit_hydrogens=False):
177 | allowed_types = ['smiles', 'inchi', 'atom_container']
178 | assert(identifier_type in allowed_types)
179 |
180 | self.cdk = gateway.jvm.org.openscience.cdk
181 | self.java = gateway.jvm.java
182 |
183 | self.identifier_type = identifier_type
184 | self.mol_container = None
185 | self.inchi_factory = self.cdk.inchi.InChIGeneratorFactory.getInstance()
186 |
187 | if self.identifier_type not in allowed_types:
188 | raise ValueError('Not a valid identifier type')
189 | try:
190 | if identifier_type == 'atom_container':
191 | self.compound_string = compound_string
192 | self.mol_container = self.compound_string
193 | else:
194 | self.compound_string = compound_string.strip()
195 | builder = self.cdk.DefaultChemObjectBuilder.getInstance()
196 | if self.identifier_type == 'inchi':
197 | s = self.inchi_factory.getInChIToStructure(self.compound_string, builder)
198 | self.mol_container = s.getAtomContainer()
199 | elif self.identifier_type == 'smiles':
200 |
201 | smiles_parser = self.cdk.smiles.SmilesParser(builder)
202 | self.mol_container = smiles_parser.parseSmiles(self.compound_string)
203 |
204 | if suppress_hydrogens:
205 | self.mol_container = self.cdk.tools.manipulator.AtomContainerManipulator.copyAndSuppressedHydrogens(
206 | self.mol_container)
207 |
208 | if add_explicit_hydrogens:
209 | self.cdk.tools.manipulator.AtomContainerManipulator\
210 | .percieveAtomTypesAndConfigureAtoms(self.mol_container)
211 | self.cdk.tools.CDKHydrogenAdder.getInstance(builder).addImplicitHydrogens(self.mol_container)
212 | self.cdk.tools.manipulator.AtomContainerManipulator\
213 | .convertImplicitToExplicitHydrogens(self.mol_container)
214 |
215 | except Py4JJavaError as e:
216 | print(e)
217 | raise ValueError('Invalid {} provided!'.format(self.identifier_type))
218 |
219 | def get_smiles(self, smiles_type='isomeric'):
220 | if smiles_type == 'isomeric':
221 | smiles_flavor = self.cdk.smiles.SmiFlavor.Isomeric
222 | smiles_generator = self.cdk.smiles.SmilesGenerator(smiles_flavor)
223 | elif smiles_type == 'unique':
224 | smiles_generator = self.cdk.smiles.SmilesGenerator.unique()
225 | elif smiles_type == 'generic':
226 | smiles_generator = self.cdk.smiles.SmilesGenerator.generic()
227 |
228 | elif smiles_type == 'use_aromatic_symbols':
229 | # need to add aromaticity information first before generating aromatic smiles
230 | aromaticity = self.cdk.aromaticity.Aromaticity(self.cdk.aromaticity.ElectronDonation.daylight(),
231 | self.cdk.graph.Cycles.all())
232 | try:
233 | aromaticity.apply(self.mol_container)
234 | except Exception as e:
235 | print(e)
236 |
237 | smiles_flavor = self.cdk.smiles.SmiFlavor.UseAromaticSymbols
238 | smiles_generator = self.cdk.smiles.SmilesGenerator(smiles_flavor)
239 | else:
240 | smiles_generator = self.cdk.smiles.SmilesGenerator.absolute()
241 |
242 | return smiles_generator.create(self.mol_container)
243 |
244 | def get_inchi_key(self):
245 | gen = self.inchi_factory.getInChIGenerator(self.mol_container)
246 | return gen.getInchiKey()
247 |
248 | def get_inchi(self):
249 | gen = self.inchi_factory.getInChIGenerator(self.mol_container)
250 | return gen.getInchi()
251 |
252 | def get_tautomers(self):
253 | tautomer_generator = self.cdk.tautomers.InChITautomerGenerator()
254 | tautomers = tautomer_generator.getTautomers(self.mol_container)
255 | # py4j.java_collections.JavaList('o16', gateway)
256 | # mol1 = tautomers[0]
257 | t_obj = [Compound(compound_string=x, identifier_type='atom_container') for x in tautomers]
258 | print([t.get_inchi_key() for t in t_obj])
259 | print(*[t.get_inchi() for t in t_obj], sep='\n')
260 | print(*[t.get_smiles() for t in t_obj], sep='\n')
261 | return list(tautomers)
262 |
263 | def get_stereocenters(self):
264 | stereocenters = self.cdk.stereo.Stereocenters.of(self.mol_container)
265 | sc = []
266 |
267 | for x in range(self.mol_container.getAtomCount()):
268 | if stereocenters.isStereocenter(x):
269 | sc.append((
270 | str(stereocenters.elementType(x)),
271 | str(stereocenters.stereocenterType(x)),
272 | x,
273 | self.mol_container.getAtom(x).getSymbol())
274 | )
275 | # print(str(stereocenters.stereocenterType(x)))
276 | # print(self.mol_container.getAtom(x).getSymbol())
277 |
278 | return sc
279 |
280 | def get_configuration_class(self):
281 |
282 | for se in self.mol_container.stereoElements():
283 | config_class = se.getConfigClass()
284 | print(config_class)
285 |
286 | print(se.getStereo())
287 |
288 | if config_class == self.cdk.interfaces.IStereoElement.TH:
289 | print('tetrahedral')
290 | elif config_class == self.cdk.interfaces.IStereoElement.CT:
291 | print('cis-trans')
292 | elif config_class == self.cdk.interfaces.IStereoElement.Octahedral:
293 | print('octaheral')
294 | elif config_class == self.cdk.interfaces.IStereoElement.AL:
295 | print('extended tetrahedral')
296 | elif config_class == self.cdk.interfaces.IStereoElement.AT:
297 | print('atropisomeric')
298 | elif config_class == self.cdk.interfaces.IStereoElement.SP:
299 | print('square planar')
300 | elif config_class == self.cdk.interfaces.IStereoElement.SPY:
301 | print('square pyramidal')
302 | elif config_class == self.cdk.interfaces.IStereoElement.TBPY:
303 | print('trigonal bipyramidal')
304 | elif config_class == self.cdk.interfaces.IStereoElement.PBPY:
305 | print('pentagonal bipyramidal')
306 | elif config_class == self.cdk.interfaces.IStereoElement.HBPY8:
307 | print('hexagonal bipyramidal')
308 | elif config_class == self.cdk.interfaces.IStereoElement.HBPY9:
309 | print('heptagonal bipyramidal')
310 |
311 | configuration = se.getConfigOrder()
312 | if configuration == self.cdk.interfaces.IStereoElement.LEFT:
313 | print('left')
314 | elif configuration == self.cdk.interfaces.IStereoElement.RIGHT:
315 | print('right')
316 | elif configuration == self.cdk.interfaces.IStereoElement.OPPOSITE:
317 | print('opposite')
318 | elif configuration == self.cdk.interfaces.IStereoElement.TOGETHER:
319 | print('together')
320 | print(configuration)
321 | print('---------------------------------')
322 |
323 | def get_chirality(self):
324 | configurations = [x[0] for x in self.get_configuration_order()]
325 | raw_stereocenters = [element_type for (element_type, sterecenter_type, atom_number, element_symbol) in
326 | self.get_stereocenters() if element_type == 'Tetracoordinate' and element_symbol == 'C']
327 |
328 | # print(len(configurations), configurations)
329 | # print(self.get_configuration_order())
330 | # print(len(raw_stereocenters), raw_stereocenters)
331 | # print(self.get_stereocenters())
332 |
333 | if len(configurations) != len(raw_stereocenters):
334 | return 'racemate'
335 | elif len(raw_stereocenters) == 0 or (len(set(configurations).intersection(set(['R', 'S'])))
336 | == 2 and self.has_point_symmetry()):
337 | return 'achiral'
338 | else:
339 | return 'chiral'
340 |
341 | def get_configuration_order(self):
342 | configurations = []
343 | for se in self.mol_container.stereoElements():
344 | conf = str(self.cdk.geometry.cip.CIPTool.getCIPChirality(self.mol_container, se))
345 |
346 | # that is not the IUPAC naming convention atom number but a CDK internal representation
347 | focus_atom_number = se.getFocus().getIndex()
348 |
349 | configurations.append((conf, focus_atom_number))
350 |
351 | return configurations
352 |
353 | def has_point_symmetry(self):
354 | atom_count = self.mol_container.getAtomCount()
355 | qr = self.cdk.signature.SignatureQuotientGraph(self.mol_container)
356 | if atom_count % 2 == 0 and qr.getVertexCount() <= atom_count / 2 and qr.getVertexCount() == qr.getEdgeCount():
357 | return True
358 | elif (atom_count - 1) % 2 == 0 and (atom_count - 1) / 2 >= qr.getVertexCount() > qr.getEdgeCount():
359 | return True
360 | else:
361 | return False
362 |
363 | def get_monoisotopic_mass(self):
364 | weight = self.cdk.qsar.descriptors.molecular.WeightDescriptor()
365 | # print(weight.getDescriptorNames())
366 |
367 | return weight.calculate(self.mol_container).getValue().toString()
368 |
369 | def get_natural_mass(self):
370 | mass = self.cdk.tools.manipulator.AtomContainerManipulator()
371 | return mass.getNaturalExactMass(self.mol_container)
372 |
373 | def get_mw(self):
374 | return self.cdk.tools.manipulator.AtomContainerManipulator().getMolecularWeight(self.mol_container)
375 |
376 | def get_tpsa(self):
377 | return self.cdk.qsar.descriptors.molecular.TPSADescriptor().calculate(self.mol_container).getValue().toString()
378 |
379 | def get_rotable_bond_count(self):
380 | return self.cdk.qsar.descriptors.molecular.RotatableBondsCountDescriptor()\
381 | .calculate(self.mol_container).getValue().toString()
382 |
383 | def get_hbond_acceptor_count(self):
384 | return self.cdk.qsar.descriptors.molecular.HBondAcceptorCountDescriptor() \
385 | .calculate(self.mol_container).getValue().toString()
386 |
387 | def get_hbond_donor_count(self):
388 | return self.cdk.qsar.descriptors.molecular.HBondDonorCountDescriptor() \
389 | .calculate(self.mol_container).getValue().toString()
390 |
391 | def get_xlogp(self):
392 | return self.cdk.qsar.descriptors.molecular.XLogPDescriptor() \
393 | .calculate(self.mol_container).getValue().toString()
394 |
395 | def get_ro5_failures(self):
396 | return self.cdk.qsar.descriptors.molecular.RuleOfFiveDescriptor() \
397 | .calculate(self.mol_container).getValue().toString()
398 |
399 | def get_acidic_group_count(self):
400 | agcd = self.cdk.qsar.descriptors.molecular.AcidicGroupCountDescriptor()
401 | agcd.initialise(self.cdk.DefaultChemObjectBuilder.getInstance())
402 | return agcd.calculate(self.mol_container).getValue().toString()
403 |
404 | def get_mol2(self, filename=''):
405 | """
406 | A method to convert a molecule to the mol2 format and optionally write it to a file
407 | :param filename: the filename, the mol2 file should be written to.
408 | :type filename: str
409 | :return: A mol2 file in string format
410 | """
411 | sdg = self.cdk.layout.StructureDiagramGenerator(self.mol_container)
412 | sdg.generateCoordinates()
413 |
414 | writer = self.java.io.StringWriter()
415 | mol2writer = self.cdk.io.Mol2Writer(writer)
416 |
417 | mol2writer.writeMolecule(self.mol_container)
418 | mol2writer.close()
419 |
420 | mol2string = writer.toString()
421 |
422 | if filename:
423 | with open(filename, "w") as text_file:
424 | text_file.write(mol2string)
425 |
426 | return mol2string
427 |
428 | def get_molfile(self, filename=''):
429 | """
430 | A method to convert a molecule to molfile V2000 (MDLV2000) format and optionally write it to a file
431 | :param filename: the filename, the molfile V2000 (MDLV2000) file should be written to.
432 | :type filename: str
433 | :return: A molfile V2000 (MDLV2000) file in string format
434 | """
435 | sdg = self.cdk.layout.StructureDiagramGenerator(self.mol_container)
436 | sdg.generateCoordinates()
437 |
438 | writer = self.java.io.StringWriter()
439 | molfile_writer = self.cdk.io.MDLV2000Writer(writer)
440 |
441 | molfile_writer.writeMolecule(self.mol_container)
442 | molfile_writer.close()
443 |
444 | molfile2string = writer.toString()
445 |
446 | if filename:
447 | with open(filename, "w") as text_file:
448 | text_file.write(molfile2string)
449 |
450 | return molfile2string
451 |
452 | def get_fingerprint(self):
453 | fingerprinter = self.cdk.fingerprint.Fingerprinter()
454 | fingerprint = fingerprinter.getBitFingerprint(self.mol_container)
455 | # raw_fingerprint = fingerprinter.getRawFingerprint(self.mol_container)
456 | print('Fingerprint size:', fingerprint.size())
457 | print(fingerprint.asBitSet())
458 | # print('raw fingerprint', raw_fingerprint)
459 | return fingerprint
460 |
461 | def get_bitmap_fingerprint(self):
462 | fingerprinter = self.cdk.fingerprint.Fingerprinter()
463 | fingerprint = fingerprinter.getBitFingerprint(self.mol_container)
464 | return fingerprint.asBitSet()
465 |
466 | def get_tanimoto(self, other_molecule):
467 | return self.cdk.similarity.Tanimoto.calculate(self.get_fingerprint(), other_molecule.get_fingerprint())
468 |
469 | def get_tanimoto_from_bitset(self, other_molecule):
470 | return self.cdk.similarity.Tanimoto.calculate(self.get_bitmap_fingerprint(), other_molecule.get_bitmap_fingerprint())
471 |
472 | def get_molecule_signature(self):
473 | molecule_signature = self.cdk.signature.MoleculeSignature(self.mol_container)
474 | return molecule_signature.toCanonicalString()
475 |
476 | def substructure_search(self, smarts='O=CO'):
477 | querytool = self.cdk.smiles.smarts.SMARTSQueryTool(smarts, self.cdk.DefaultChemObjectBuilder.getInstance())
478 | status = querytool.matches(self.mol_container)
479 |
480 | if status:
481 | nmatch = querytool.countMatches()
482 | mappings = querytool.getMatchingAtoms()
483 | for i in range(nmatch):
484 | print(mappings.get(i))
485 |
486 | return ''
487 |
488 | def get_svg(self, file_name=None, substructures=None):
489 | if substructures:
490 | color = self.java.awt.Color.orange
491 | dg = self.cdk.depict.DepictionGenerator()\
492 | .withHighlight(substructures, color)\
493 | .withAtomColors()\
494 | .withOuterGlowHighlight(4.0)
495 | else:
496 | dg = self.cdk.depict.DepictionGenerator().withAtomColors()
497 |
498 | if file_name:
499 | if not file_name.split('.')[-1].lower() == 'svg':
500 | file_name += '.svg'
501 |
502 | dg.depict(self.mol_container).writeTo(file_name)
503 | return ''
504 |
505 | else:
506 | return dg.depict(self.mol_container).toSvgStr()
507 |
508 | def get_molecular_weight(self):
509 | weight_descriptor = self.cdk.qsar.descriptors.molecular.WeightDescriptor()
510 | return weight_descriptor.calculate(self.mol_container).getValue().toString()
511 |
512 | @staticmethod
513 | def search_substructure(search_string, molecules, svg_return_count=10):
514 | """A slow version of a substructure search going back and forth btwn Java and Python"""
515 |
516 | cdk = gateway.jvm.org.openscience.cdk
517 | pattern = cdk.smiles.smarts.SmartsPattern.create(search_string)
518 | results = []
519 |
520 | for count, (compound_id, smiles) in enumerate(molecules):
521 | try:
522 | mol = Compound(compound_string=smiles, identifier_type='smiles')
523 | except ValueError as e:
524 | continue
525 |
526 | mappings = pattern.matchAll(mol.mol_container)
527 | match_count = mappings.countUnique()
528 | if match_count > 0:
529 | substructures = mappings.toChemObjects()
530 | svg = ''
531 | if len(results) <= svg_return_count:
532 | svg = mol.get_svg(substructures=substructures)
533 |
534 | results.append({
535 | 'compound_id': compound_id,
536 | 'smiles': smiles,
537 | 'svg': svg
538 | })
539 | # print(svg)
540 |
541 | return results
542 |
543 |
544 | def main():
545 | test_inchi = 'InChI=1S/C23H18ClF2N3O3S/c1-2-9-33(31,32)29-19-8-7-18(25)20(21(19)26)22(30)17-12-28-23-16(17)10-14(11-27-23)13-3-5-15(24)6-4-13/h3-8,10-12,29H,2,9H2,1H3,(H,27,28)'
546 | cmpnd = Compound(compound_string=test_inchi, identifier_type='inchi')
547 | print(cmpnd.get_smiles())
548 | print(cmpnd.get_inchi_key())
549 | print(cmpnd.get_inchi())
550 |
551 | mol = 'C[BH]1H[BH](C)H1'
552 | mol = "CC(=O)Cl"
553 | cmpnd = Compound(compound_string=mol, identifier_type='smiles')
554 | print(cmpnd.get_smiles())
555 | print(cmpnd.get_inchi_key())
556 | print(cmpnd.get_inchi())
557 |
558 | print('ran through')
559 | time.sleep(5)
560 |
561 | if __name__ == '__main__':
562 | sys.exit(main())
563 |
--------------------------------------------------------------------------------
/cdk_pywrapper/chemlib.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import sys
3 | import simplejson
4 | import time
5 | import re
6 | import wikidataintegrator.wdi_core as wdi_core
7 | import wikidataintegrator.wdi_login as wdi_login
8 | import pprint
9 | import pandas as pd
10 | import numpy as np
11 | import os
12 |
13 | import chemspipy
14 | # sys.path.append('/home/sebastian/PycharmProjects/cdk_pywrapper/')
15 | from cdk_pywrapper.cdk_pywrapper import Compound
16 |
17 | """
18 | A Python library for PubChem RDF
19 | """
20 |
21 | __author__ = 'Sebastian Burgstaller-Muehlbacher'
22 | __license__ = 'AGPLv3'
23 | __copyright__ = 'Sebastian Burgstaller-Muehlbacher'
24 |
25 |
26 | class UNIIMolecule(object):
27 | unii_data = pd.read_csv('./unii_data/unii_data_ndfrt.csv', low_memory=False, index_col=0,
28 | dtype={
29 | 'UNII': np.str,
30 | 'RXCUI': np.str,
31 | 'INN_ID': np.str,
32 | 'ITIS': np.str,
33 | 'NCBI': np.str,
34 | 'RxNorm_CUI': np.str, # same as RXCUI, but from NDF-RT
35 | })
36 |
37 | for count, row in unii_data.iterrows():
38 | smiles = row['SMILES']
39 | ikey = row['INCHIKEY']
40 | if pd.notnull(smiles) and pd.isnull(ikey):
41 | cmpnd = Compound(compound_string=smiles, identifier_type='smiles')
42 | unii_data.loc[count, 'INCHIKEY'] = cmpnd.get_inchi_key()
43 |
44 | if count % 10000 == 0:
45 | print('processed to UNII ID', count)
46 |
47 | unii_data.to_csv('./unii_data/unii_data_ndfrt.csv')
48 |
49 | def __init__(self, unii=None, inchi_key=None):
50 |
51 | print('unii inchi key', inchi_key)
52 | if unii:
53 | ind = UNIIMolecule.unii_data['UNII'].values == unii
54 | else:
55 | ind = UNIIMolecule.unii_data['INCHIKEY'].values == inchi_key
56 |
57 |
58 | self.data = UNIIMolecule.unii_data.loc[ind, :]
59 |
60 | if len(self.data.index) != 1:
61 | raise ValueError('Provided ID did not return a unique UNII')
62 |
63 | self.data_index = self.data.index[0]
64 |
65 |
66 |
67 | @property
68 | def stdinchikey(self):
69 | ikey = self.data.loc[self.data_index, 'INCHIKEY']
70 | if pd.isnull(ikey) and pd.isnull(self.smiles):
71 | return None
72 | elif pd.notnull(self.smiles):
73 | cmpnd = Compound(compound_string=self.smiles, identifier_type='smiles')
74 | ikey = cmpnd.get_inchi_key()
75 |
76 | return ikey
77 |
78 | @property
79 | def stdinchi(self):
80 | if pd.isnull(self.smiles):
81 | return None
82 | elif pd.notnull(self.smiles):
83 | cmpnd = Compound(compound_string=self.smiles, identifier_type='smiles')
84 | return cmpnd.get_inchi()
85 |
86 | @property
87 | def preferred_name(self):
88 | name = self.data.loc[self.data_index, 'PT']
89 | return UNIIMolecule.label_converter(name) if pd.notnull(name) else None
90 |
91 | @property
92 | def smiles(self):
93 | smiles = self.data.loc[self.data_index, 'SMILES']
94 | return smiles if pd.notnull(smiles) else None
95 |
96 | @property
97 | def molecule_type(self):
98 | molecule_type = self.data.loc[self.data_index, 'UNII_TYPE']
99 | return molecule_type if pd.notnull(molecule_type) else None
100 |
101 | @property
102 | def unii(self):
103 | return self.data.loc[self.data_index, 'UNII']
104 |
105 | @property
106 | def cas(self):
107 | cas = self.data.loc[self.data_index, 'RN']
108 | return cas if pd.notnull(cas) else None
109 |
110 | @property
111 | def einecs(self):
112 | einecs = self.data.loc[self.data_index, 'EC']
113 | return einecs if pd.notnull(einecs) else None
114 |
115 | @property
116 | def rxnorm(self):
117 | rxnorm = self.data.loc[self.data_index, 'RXCUI']
118 | return rxnorm if pd.notnull(rxnorm) else None
119 |
120 | @property
121 | def ndfrt(self):
122 | ndfrt = self.data.loc[self.data_index, 'NUI']
123 | return ndfrt if pd.notnull(ndfrt) else None
124 |
125 | @property
126 | def umls(self):
127 | umls_cui = self.data.loc[self.data_index, 'UMLS_CUI']
128 | return umls_cui if pd.notnull(umls_cui) else None
129 |
130 |
131 | def to_wikidata(self):
132 | item_label = self.preferred_name if self.preferred_name else self.unii
133 |
134 | refs = [[
135 | wdi_core.WDItemID(value='Q6593799', prop_nr='P248', is_reference=True), # stated in
136 | wdi_core.WDExternalID(value=self.unii, prop_nr='P652', is_reference=True), # source element
137 | wdi_core.WDItemID(value='Q1860', prop_nr='P407', is_reference=True), # language of work
138 | wdi_core.WDMonolingualText(value=item_label[0:400], language='en', prop_nr='P1476', is_reference=True),
139 | wdi_core.WDTime(time=time.strftime('+%Y-%m-%dT00:00:00Z'), prop_nr='P813', is_reference=True) # retrieved
140 | ]]
141 | print('UNII Main label is', item_label)
142 |
143 | cmpnd = Compound(compound_string=self.smiles, identifier_type='smiles')
144 | isomeric_smiles = cmpnd.get_smiles(smiles_type='isomeric')
145 | canonical_smiles = cmpnd.get_smiles(smiles_type='generic')
146 |
147 | elements = {
148 | 'P652': self.unii,
149 | 'P233': canonical_smiles,
150 | 'P2017': isomeric_smiles,
151 | 'P235': self.stdinchikey,
152 | 'P234': self.stdinchi[6:],
153 | 'P231': self.cas,
154 | 'P232': self.einecs,
155 | 'P2892': self.umls,
156 | 'P2115': self.ndfrt,
157 | 'P3345': self.rxnorm
158 | }
159 |
160 | dtypes = {
161 | 'P652': wdi_core.WDExternalID,
162 | 'P683': wdi_core.WDExternalID,
163 | 'P661': wdi_core.WDExternalID,
164 | 'P2153': wdi_core.WDExternalID,
165 | 'P233': wdi_core.WDString,
166 | 'P2017': wdi_core.WDString,
167 | 'P235': wdi_core.WDExternalID,
168 | 'P234': wdi_core.WDExternalID,
169 | 'P274': wdi_core.WDString,
170 | 'P231': wdi_core.WDExternalID,
171 | 'P232': wdi_core.WDExternalID,
172 | 'P2892': wdi_core.WDExternalID,
173 | 'P2115': wdi_core.WDExternalID,
174 | 'P3345': wdi_core.WDExternalID
175 | }
176 |
177 | # do not add isomeric smiles if no isomeric info is available
178 | if canonical_smiles == isomeric_smiles or len(self.smiles) > 400:
179 | del elements['P2017']
180 |
181 | # do not try to add InChI longer than 400 chars
182 | if len(self.stdinchi[6:]) > 400:
183 | del elements['P234']
184 |
185 | if len(self.smiles) > 400:
186 | del elements['P233']
187 |
188 | data = []
189 |
190 | for k, v in elements.items():
191 | if not v:
192 | continue
193 |
194 | print('{}:'.format(k), v)
195 | if isinstance(v, list) or isinstance(v, set):
196 | for x in v:
197 | data.append(dtypes[k](prop_nr=k, value=x, references=refs))
198 | else:
199 | data.append(dtypes[k](prop_nr=k, value=v, references=refs))
200 |
201 | return data
202 |
203 | @staticmethod
204 | def label_converter(label):
205 | label = label.lower()
206 |
207 | greek_codes = {
208 | '.alpha.': '\u03B1',
209 | '.beta.': '\u03B2',
210 | '.gamma.': '\u03B3',
211 | '.delta.': '\u03B4',
212 | '.epsilon.': '\u03B5',
213 | '.zeta.': '\u03B6 ',
214 | '.eta.': '\u03B7',
215 | '.theta.': '\u03B8',
216 | '.iota.': '\u03B9',
217 | '.kappa.': '\u03BA',
218 | '.lambda.': '\u03BB',
219 | '.mu.': '\u03BC',
220 | '.nu.': '\u03BD',
221 | '.xi.': '\u03BE',
222 | '.omicron.': '\u03BF',
223 | '.pi.': '\u03C0',
224 | '.rho.': '\u03C1',
225 | '.sigma.': '\u03C3',
226 | '.tau.': '\u03C4',
227 | '.upsilon.': '\u03C5',
228 | '.phi.': '\u03C6',
229 | '.chi.': '\u03C7',
230 | '.psi.': '\u03C8',
231 | '.omega.': '\u03C9',
232 |
233 | }
234 |
235 | for greek_letter, unicode in greek_codes.items():
236 | if greek_letter in label:
237 | label = label.replace(greek_letter, unicode)
238 |
239 | match = re.compile('(^|[^a-z])([ezdlnhros]{1}|dl{1})[^a-z]{1}')
240 |
241 | while True:
242 | if re.search(match, label):
243 | replacement = label[re.search(match, label).start(): re.search(match, label).end()].upper()
244 | label = re.sub(match, repl=replacement, string=label, count=1)
245 | else:
246 | break
247 |
248 | splits = label.split(', ')
249 | splits.reverse()
250 | return ''.join(splits)
251 |
252 |
253 | class DrugBankMolecule(object):
254 | """DrugBank ID, Accession Numbers, Common name, CAS, UNII, Synonyms, Standard InChI Key"""
255 |
256 | drugbank_data = pd.read_csv('drugbank vocabulary.csv', low_memory=False)
257 | drugbank_data = pd.concat([drugbank_data.drop_duplicates(subset=['Standard InChI Key'], keep='first'),
258 | drugbank_data.loc[drugbank_data['Standard InChI Key'].isnull(), :]])
259 |
260 |
261 | def __init__(self, db=None, inchi_key=None):
262 |
263 | print('unii inchi key', inchi_key)
264 | if db:
265 | ind = DrugBankMolecule.drugbank_data['DrugBank ID'].values == db
266 | else:
267 | ind = DrugBankMolecule.drugbank_data['Standard InChI Key'].values == inchi_key
268 |
269 |
270 | self.data = DrugBankMolecule.drugbank_data.loc[ind, :]
271 |
272 | if len(self.data.index) != 1:
273 | raise ValueError('Provided ID did not return a unique DrugBank ID')
274 |
275 | self.data_index = self.data.index[0]
276 |
277 | @property
278 | def stdinchikey(self):
279 | ikey = self.data.loc[self.data_index, 'Standard InChI Key']
280 | if pd.isnull(ikey):
281 | return None
282 |
283 | return ikey
284 |
285 | @property
286 | def stdinchi(self):
287 | # CC0 data does not provide InChI, instead could create a PubChemMolecule using the InChI key provided and use that
288 | return None
289 |
290 | @property
291 | def preferred_name(self):
292 | name = self.data.loc[self.data_index, 'Common name']
293 | return name if pd.notnull(name) else None
294 |
295 | @property
296 | def synonyms(self):
297 | synonyms = self.data.loc[self.data_index, 'Synonyms']
298 | return synonyms.split(' | ') if pd.notnull(synonyms) else []
299 |
300 | @property
301 | def smiles(self):
302 | # same applies as for InChIs
303 | return None
304 |
305 | @property
306 | def molecule_type(self):
307 | # return either 'approved', 'experimental', 'retracted', 'biotech', 'antibody'. Based on what the accession numbers say
308 | return None
309 |
310 | @property
311 | def accession_numbers(self):
312 | acc_nrs = self.data.loc[self.data_index, 'Accession Numbers'].split('|')
313 | return acc_nrs
314 |
315 | @property
316 | def unii(self):
317 | unii = self.data.loc[self.data_index, 'UNII']
318 | return unii if pd.notnull(unii) else None
319 |
320 | @property
321 | def cas(self):
322 | cas = self.data.loc[self.data_index, 'CAS']
323 | return cas if pd.notnull(cas) else None
324 |
325 | @property
326 | def drugbank(self):
327 | return self.data.loc[self.data_index, 'DrugBank ID'][2:]
328 |
329 | def to_wikidata(self):
330 | item_label = self.preferred_name if self.preferred_name else 'DB' + self.drugbank
331 |
332 | refs = [[
333 | wdi_core.WDItemID(value='Q1122544', prop_nr='P248', is_reference=True), # stated in
334 | wdi_core.WDExternalID(value=self.drugbank, prop_nr='P715', is_reference=True), # source element
335 | wdi_core.WDItemID(value='Q1860', prop_nr='P407', is_reference=True), # language of work
336 | wdi_core.WDMonolingualText(value=item_label[0:400], language='en', prop_nr='P1476', is_reference=True),
337 | wdi_core.WDTime(time=time.strftime('+%Y-%m-%dT00:00:00Z'), prop_nr='P813', is_reference=True) # retrieved
338 | ]]
339 | print('DrugBank Main label is', item_label)
340 |
341 | # cmpnd = Compound(compound_string=self.smiles, identifier_type='smiles')
342 | # isomeric_smiles = cmpnd.get_smiles(smiles_type='isomeric')
343 | # canonical_smiles = cmpnd.get_smiles(smiles_type='generic')
344 |
345 | elements = {
346 | 'P652': self.unii,
347 | 'P715': self.drugbank,
348 | #'P233': canonical_smiles,
349 | #'P2017': isomeric_smiles,
350 | #'P235': self.stdinchikey,
351 | #'P234': self.stdinchi[6:],
352 | 'P231': self.cas,
353 | }
354 |
355 | dtypes = {
356 | 'P652': wdi_core.WDExternalID,
357 | 'P715': wdi_core.WDExternalID,
358 | 'P683': wdi_core.WDExternalID,
359 | 'P661': wdi_core.WDExternalID,
360 | 'P2153': wdi_core.WDExternalID,
361 | 'P233': wdi_core.WDString,
362 | 'P2017': wdi_core.WDString,
363 | 'P235': wdi_core.WDExternalID,
364 | 'P234': wdi_core.WDExternalID,
365 | 'P274': wdi_core.WDString,
366 | 'P231': wdi_core.WDExternalID,
367 | 'P232': wdi_core.WDExternalID
368 | }
369 |
370 | # # do not add isomeric smiles if no isomeric info is available
371 | # if canonical_smiles == isomeric_smiles or len(self.smiles) > 400:
372 | # del elements['P2017']
373 | #
374 | # # do not try to add InChI longer than 400 chars
375 | # if len(self.stdinchi[6:]) > 400:
376 | # del elements['P234']
377 | #
378 | # if len(self.smiles) > 400:
379 | # del elements['P233']
380 |
381 | data = []
382 |
383 | for k, v in elements.items():
384 | if not v:
385 | continue
386 |
387 | print('{}:'.format(k), v)
388 | if isinstance(v, list) or isinstance(v, set):
389 | for x in v:
390 | data.append(dtypes[k](prop_nr=k, value=x, references=refs))
391 | else:
392 | data.append(dtypes[k](prop_nr=k, value=v, references=refs))
393 |
394 | return data
395 |
396 |
397 | class ChEBIMolecule(object):
398 | chebi_data_path = './chebi_data/'
399 | chebi_data = pd.read_csv(os.path.join(chebi_data_path, 'chebiId_inchi.tsv'), low_memory=False, index_col=0, sep='\t')
400 |
401 | 'ID COMPOUND_ID TYPE SOURCE NAME ADAPTED LANGUAGE'
402 | chebi_names = pd.read_csv(os.path.join(chebi_data_path, 'names.tsv'), low_memory=False, index_col=None, sep='\t',
403 | dtype={'ID': np.str, 'COMPOUND_ID': np.str}, na_filter=False)
404 |
405 | zwitterion_id_list = set()
406 | for zz in chebi_names.iterrows():
407 | data = zz[1]
408 | if 'zwitterion' in data['NAME']:
409 | zwitterion_id_list.add(np.int64(data['COMPOUND_ID']))
410 |
411 | compounds = pd.read_csv(os.path.join(chebi_data_path, 'compounds.tsv'), low_memory=False, index_col=0, sep='\t')
412 |
413 | for c, zz in compounds.iterrows():
414 | # pd.NaN is handled as a float datatype so it needs extra treatment, what a nonsense.
415 | if pd.isnull(zz['NAME']):
416 | zwitterion_id_list.add(c)
417 | continue
418 | if 'zwitterion' in zz['NAME']:
419 | zwitterion_id_list.add(c)
420 |
421 | chebi_data = chebi_data.drop(list(zwitterion_id_list))
422 |
423 | if 'InChI key' not in chebi_data:
424 | print('Generating InChI keys ...')
425 | for row in chebi_data.iterrows():
426 | index = row[0]
427 | data = row[1]
428 |
429 | inchi = data['InChI']
430 | cmpnd = Compound(compound_string=inchi, identifier_type='inchi')
431 | chebi_data.loc[index, 'InChI key'] = cmpnd.get_inchi_key()
432 |
433 | if index % 1000 == 0:
434 | print('processed to ChEBI ID', index)
435 |
436 | chebi_data.to_csv(os.path.join(chebi_data_path, 'chebiId_inchi.tsv'), sep='\t')
437 |
438 |
439 | 'ID COMPOUND_ID SOURCE TYPE ACCESSION_NUMBER'
440 | db_accessions = pd.read_csv(os.path.join(chebi_data_path, 'database_accession.tsv'), low_memory=False, index_col=None, sep='\t',
441 | dtype={'ID': np.str, 'COMPOUND_ID': np.str}, na_filter=False)
442 |
443 | # remove CAS numbers provided by KEGG, as they are frequently incorrect
444 | db_accessions = db_accessions.loc[~(db_accessions['SOURCE'].isin(['KEGG COMPOUND']) &
445 | db_accessions['TYPE'].isin(['CAS Registry Number'])), :]
446 |
447 | def __init__(self, chebi_id=None, inchi_key=None):
448 |
449 | if chebi_id:
450 | ind = ChEBIMolecule.chebi_data.index == np.int64(chebi_id)
451 | else:
452 | ind = ChEBIMolecule.chebi_data['InChI key'].values == inchi_key
453 |
454 | self._canonical_smiles = None
455 | self._isomeric_smiles = None
456 |
457 | self.data = ChEBIMolecule.chebi_data.loc[ind, :]
458 |
459 | if len(self.data.index) != 1:
460 | raise ValueError('No unique found for ChEBI ID')
461 |
462 | self.data_index = self.data.index[0]
463 |
464 | self.all_names = ChEBIMolecule.chebi_names.loc[ChEBIMolecule.chebi_names['COMPOUND_ID'] == self.chebi, :]
465 | self.accessions = ChEBIMolecule.db_accessions.loc[ChEBIMolecule.db_accessions['COMPOUND_ID'] == self.chebi, :]
466 | self.chebi_base_data = ChEBIMolecule.compounds.loc[ChEBIMolecule.compounds.index == np.int64(self.chebi), :]
467 |
468 | @property
469 | def stdinchikey(self):
470 | return self.data.loc[self.data_index, 'InChI key']
471 |
472 | @property
473 | def stdinchi(self):
474 | return self.data.loc[self.data_index, 'InChI']
475 |
476 | @property
477 | def preferred_name(self):
478 | pref_names = [x[1]['NAME'] for x in self.chebi_base_data
479 | .loc[self.chebi_base_data.index == np.int64(self.chebi), :].iterrows()]
480 | return pref_names[0] if len(pref_names) > 0 else None
481 |
482 | @property
483 | def synonyms(self):
484 | return [x[1]['NAME'] for x in self.all_names.loc[self.all_names['TYPE'] == 'SYNONYM', :]
485 | .iterrows() if x[1]['LANGUAGE'] == 'en']
486 |
487 | @property
488 | def canonical_smiles(self):
489 | if not self._canonical_smiles:
490 | cmpnd = Compound(compound_string=self.stdinchi, identifier_type='inchi')
491 | self._canonical_smiles = cmpnd.get_smiles(smiles_type='generic')
492 | self._isomeric_smiles = cmpnd.get_smiles(smiles_type='isomeric')
493 | return self._canonical_smiles
494 |
495 | @canonical_smiles.setter
496 | def canonical_smiles(self, value):
497 | self._canonical_smiles = value
498 |
499 | @property
500 | def isomeric_smiles(self):
501 | if not self._isomeric_smiles:
502 | csmiles = self.canonical_smiles
503 | return self._isomeric_smiles
504 |
505 | @isomeric_smiles.setter
506 | def isomeric_smiles(self, value):
507 | self._isomeric_smiles = value
508 |
509 | @property
510 | def chebi(self):
511 | return str(self.data_index)
512 |
513 | @property
514 | def cas(self):
515 | return set([x[1]['ACCESSION_NUMBER'] for x in self.accessions.loc[self.accessions['TYPE']
516 | .isin(['CAS Registry Number']), :].iterrows()])
517 |
518 | @property
519 | def hmdb(self):
520 | return set([x[1]['ACCESSION_NUMBER'] for x in self.accessions.loc[self.accessions['TYPE']
521 | .isin(['HMDB accession']), :].iterrows()])
522 |
523 | @property
524 | def beilstein(self):
525 | return set([x[1]['ACCESSION_NUMBER'] for x in self.accessions.loc[self.accessions['TYPE']
526 | .isin(['Beilstein Registry Number', 'Reaxys Registry Number']), :].iterrows()])
527 |
528 | @property
529 | def kegg(self):
530 | return set([x[1]['ACCESSION_NUMBER'] for x in self.accessions.loc[self.accessions['TYPE']
531 | .isin(['KEGG COMPOUND accession', 'KEGG DRUG accession']), :].iterrows()])
532 |
533 | @property
534 | def knapsack(self):
535 | return set([x[1]['ACCESSION_NUMBER'] for x in self.accessions.loc[self.accessions['TYPE']
536 | .isin(['KNApSAcK accession']), :].iterrows()])
537 |
538 | @property
539 | def who_inn(self):
540 | return [x[1]['NAME'] for x in self.all_names.loc[self.all_names['TYPE'] == 'INN', :]
541 | .iterrows() if x[1]['LANGUAGE'] == 'en']
542 |
543 | def to_wikidata(self):
544 | item_label = self.preferred_name if self.preferred_name else 'ChEBI:' + self.chebi
545 |
546 | refs = [[
547 | wdi_core.WDItemID(value='Q902623', prop_nr='P248', is_reference=True), # stated in
548 | wdi_core.WDExternalID(value=self.chebi, prop_nr='P683', is_reference=True), # source element
549 | wdi_core.WDItemID(value='Q1860', prop_nr='P407', is_reference=True), # language of work
550 | wdi_core.WDMonolingualText(value=item_label[0:400], language='en', prop_nr='P1476', is_reference=True),
551 | wdi_core.WDTime(time=time.strftime('+%Y-%m-%dT00:00:00Z'), prop_nr='P813', is_reference=True) # retrieved
552 | ]]
553 | print('ChEBI Main label is', item_label)
554 |
555 | elements = {
556 | 'P683': self.chebi,
557 | 'P233': self.canonical_smiles,
558 | 'P2017': self.isomeric_smiles,
559 | 'P235': self.stdinchikey,
560 | 'P234': self.stdinchi[6:],
561 | 'P231': self.cas,
562 | 'P665': self.kegg,
563 | 'P2057': self.hmdb,
564 | 'P1579': self.beilstein,
565 | 'P2064': self.knapsack,
566 | 'P2275': self.who_inn
567 | }
568 |
569 | dtypes = {
570 | 'P652': wdi_core.WDExternalID,
571 | 'P683': wdi_core.WDExternalID,
572 | 'P661': wdi_core.WDExternalID,
573 | 'P2153': wdi_core.WDExternalID,
574 | 'P233': wdi_core.WDString,
575 | 'P2017': wdi_core.WDString,
576 | 'P235': wdi_core.WDExternalID,
577 | 'P234': wdi_core.WDExternalID,
578 | 'P274': wdi_core.WDString,
579 | 'P231': wdi_core.WDExternalID,
580 | 'P232': wdi_core.WDExternalID,
581 | 'P665': wdi_core.WDExternalID,
582 | 'P2057': wdi_core.WDExternalID,
583 | 'P1579': wdi_core.WDExternalID,
584 | 'P2064': wdi_core.WDExternalID,
585 | 'P2275': wdi_core.WDMonolingualText
586 | }
587 |
588 | # do not add isomeric smiles if no isomeric info is available
589 | if self.canonical_smiles == self.isomeric_smiles or len(self.isomeric_smiles) > 400:
590 | del elements['P2017']
591 |
592 | # do not try to add InChI longer than 400 chars
593 | if len(self.stdinchi[6:]) > 400:
594 | del elements['P234']
595 |
596 | if len(self.canonical_smiles) > 400:
597 | del elements['P233']
598 |
599 | data = []
600 |
601 | for k, v in elements.items():
602 | if not v:
603 | continue
604 |
605 | print('{}:'.format(k), v)
606 | if isinstance(v, list) or isinstance(v, set):
607 | for x in v:
608 | data.append(dtypes[k](prop_nr=k, value=x, references=refs))
609 | else:
610 | data.append(dtypes[k](prop_nr=k, value=v, references=refs))
611 |
612 | return data
613 |
614 |
615 | class GTPLMolecule(object):
616 | def __init__(self, gtpl_id=None, cid=None, sid=None, inchi_key=None):
617 | gtp_data = pd.read_csv('./iuphar/ligands.csv', low_memory=False,
618 | dtype={'PubChem SID': np.str, 'PubChem CID': np.str, 'Ligand id': np.str})
619 |
620 | # remove all labelled or radioactive compounds as they have the same inchi key as unlabelled compounds
621 | gtp_data = gtp_data.loc[pd.isnull(gtp_data['Labelled'].values), :]
622 |
623 | print('gtpl inchi', inchi_key)
624 | if gtpl_id:
625 | ind = gtp_data['Ligand id'].values == gtpl_id
626 | elif cid:
627 | ind = gtp_data['PubChem CID'].values == cid
628 | elif sid:
629 | ind = gtp_data['PubChem CID'].values == sid
630 | else:
631 | ind = gtp_data['InChIKey'].values == inchi_key
632 |
633 |
634 | self.data = gtp_data.loc[ind, :]
635 |
636 | if len(self.data.index) != 1:
637 | raise ValueError('Provided ID did not return a unique GTPL ID')
638 |
639 | self.data_index = self.data.index[0]
640 |
641 |
642 | @property
643 | def stdinchikey(self):
644 | return self.data.loc[self.data_index, 'InChIKey']
645 |
646 | @property
647 | def stdinchi(self):
648 | return self.data.loc[self.data_index, 'InChI']
649 |
650 | @property
651 | def preferred_name(self):
652 | return GTPLMolecule.label_converter(self.data.loc[self.data_index, 'Name'])
653 |
654 | @property
655 | def synonyms(self):
656 | synonyms = self.data.loc[self.data_index, 'Synonyms']
657 | synonyms = synonyms.split('|') if pd.notnull(synonyms) else []
658 | return [GTPLMolecule.label_converter(x) for x in synonyms]
659 |
660 | @property
661 | def smiles(self):
662 | return self.data.loc[self.data_index, 'SMILES']
663 |
664 | @property
665 | def molecule_type(self):
666 | return self.data.loc[self.data_index, 'Type']
667 |
668 | @property
669 | def gtpl_id(self):
670 | return self.data.loc[self.data_index, 'Ligand id']
671 |
672 | def to_wikidata(self):
673 | item_label = self.preferred_name if self.preferred_name else 'GTPL' + self.gtpl_id
674 |
675 | refs = [[
676 | wdi_core.WDItemID(value='Q17091219', prop_nr='P248', is_reference=True), # stated in
677 | wdi_core.WDExternalID(value=self.gtpl_id, prop_nr='P595', is_reference=True), # source element
678 | wdi_core.WDItemID(value='Q1860', prop_nr='P407', is_reference=True), # language of work
679 | wdi_core.WDMonolingualText(value=item_label[0:400], language='en', prop_nr='P1476', is_reference=True),
680 | wdi_core.WDTime(time=time.strftime('+%Y-%m-%dT00:00:00Z'), prop_nr='P813', is_reference=True) # retrieved
681 | ]]
682 | print('GTPL Main label is', item_label)
683 |
684 | cmpnd = Compound(compound_string=self.smiles, identifier_type='smiles')
685 | isomeric_smiles = cmpnd.get_smiles(smiles_type='isomeric')
686 | canonical_smiles = cmpnd.get_smiles(smiles_type='generic')
687 |
688 | elements = {
689 | 'P595': self.gtpl_id,
690 | 'P233': canonical_smiles,
691 | 'P2017': isomeric_smiles,
692 | 'P235': self.stdinchikey,
693 | 'P234': self.stdinchi[6:],
694 | }
695 |
696 | dtypes = {
697 | 'P595': wdi_core.WDExternalID,
698 | 'P683': wdi_core.WDExternalID,
699 | 'P661': wdi_core.WDExternalID,
700 | 'P2153': wdi_core.WDExternalID,
701 | 'P233': wdi_core.WDString,
702 | 'P2017': wdi_core.WDString,
703 | 'P235': wdi_core.WDExternalID,
704 | 'P234': wdi_core.WDExternalID,
705 | 'P274': wdi_core.WDString
706 | }
707 |
708 | # do not add isomeric smiles if no isomeric info is available
709 | if canonical_smiles == isomeric_smiles or len(self.smiles) > 400:
710 | del elements['P2017']
711 |
712 | # do not try to add InChI longer than 400 chars
713 | if len(self.stdinchi[6:]) > 400:
714 | del elements['P234']
715 |
716 | if len(self.smiles) > 400:
717 | del elements['P233']
718 |
719 | data = []
720 |
721 | for k, v in elements.items():
722 | if not v:
723 | continue
724 |
725 | print('{}:'.format(k), v)
726 | if isinstance(v, list) or isinstance(v, set):
727 | for x in v:
728 | data.append(dtypes[k](prop_nr=k, value=x, references=refs))
729 | else:
730 | data.append(dtypes[k](prop_nr=k, value=v, references=refs))
731 |
732 | return data
733 |
734 | @staticmethod
735 | def label_converter(label):
736 | greek_codes = {
737 | 'α': '\u03B1',
738 | 'β': '\u03B2',
739 | 'γ': '\u03B3',
740 | 'δ': '\u03B4',
741 | 'ε': '\u03B5',
742 | 'ζ': '\u03B6 ',
743 | 'η': '\u03B7',
744 | 'θ': '\u03B8',
745 | 'ι': '\u03B9',
746 | 'κ': '\u03BA',
747 | 'λ': '\u03BB',
748 | 'μ': '\u03BC',
749 | 'ν': '\u03BD',
750 | 'ξ': '\u03BE',
751 | 'ο': '\u03BF',
752 | 'π': '\u03C0',
753 | 'ρ': '\u03C1',
754 | 'σ': '\u03C3',
755 | 'τ': '\u03C4',
756 | 'υ': '\u03C5',
757 | 'φ': '\u03C6',
758 | 'χ': '\u03C7',
759 | 'ψ': '\u03C8',
760 | 'ω': '\u03C9',
761 |
762 | 'Α': '\u0391',
763 | 'Β': '\u0392',
764 | 'Γ': '\u0393',
765 | 'Δ': '\u0394',
766 | 'Ε': '\u0395',
767 | 'Ζ': '\u0396',
768 | 'Η': '\u0397',
769 | 'Θ': '\u0398',
770 | 'Ι': '\u0399',
771 | 'Κ': '\u039A',
772 | 'Λ': '\u039B',
773 | 'Μ': '\u039C',
774 | 'Ν': '\u039D',
775 | 'Ξ': '\u039E',
776 | 'Ο': '\u039F',
777 | 'Π': '\u03A0',
778 | 'Ρ': '\u03A1',
779 | 'Σ': '\u03A3',
780 | 'Τ': '\u03A4',
781 | 'Υ': '\u03A5',
782 | 'Φ': '\u03A6',
783 | 'Χ': '\u03A7',
784 | 'Ψ': '\u03A8',
785 | 'Ω': '\u03A9',
786 |
787 | '®': '\u00AE',
788 | '±': '\u00B1'
789 | }
790 |
791 | for greek_letter, unicode in greek_codes.items():
792 | if greek_letter in label:
793 | label = label.replace(greek_letter, unicode)
794 |
795 | remove_tags = ['', '', '', '', '', '']
796 | for x in remove_tags:
797 | label = label.replace(x, '')
798 |
799 | return label
800 |
801 |
802 |
803 | class ChEMBLMolecule(object):
804 | def __init__(self, chembl_id=None, inchi_key=None):
805 | ci = chembl_id if chembl_id is not None else inchi_key
806 |
807 | url = 'https://www.ebi.ac.uk/chembl/api/data/molecule/{}.json'.format(ci.upper())
808 | r = requests.get(url)
809 | if r.status_code == 404:
810 | raise ValueError('ChEMBL ID {} not found in ChEMBL'.format(chembl_id))
811 | self.compound = r.json()
812 |
813 | @property
814 | def stdinchikey(self):
815 | return self.compound['molecule_structures']['standard_inchi_key']
816 |
817 | @property
818 | def stdinchi(self):
819 | return self.compound['molecule_structures']['standard_inchi']
820 |
821 | @property
822 | def preferred_name(self):
823 | return self.compound['pref_name']
824 |
825 | @property
826 | def smiles(self):
827 | return self.compound['molecule_structures']['canonical_smiles']
828 |
829 | @property
830 | def chembl_id(self):
831 | return self.compound['molecule_chembl_id']
832 |
833 | @property
834 | def monoisotopic_mass(self):
835 | return self.compound['molecule_properties']['mw_monoisotopic']
836 |
837 | @property
838 | def chebi(self):
839 | return self.compound['chebi_par_id'] if 'chebi_par_id' in self.compound else None
840 |
841 | def to_wikidata(self):
842 | item_label = self.preferred_name if self.preferred_name else self.chembl_id
843 |
844 | refs = [[
845 | wdi_core.WDItemID(value='Q6120337', prop_nr='P248', is_reference=True), # stated in
846 | wdi_core.WDExternalID(value=self.chembl_id, prop_nr='P592', is_reference=True), # source element
847 | wdi_core.WDItemID(value='Q1860', prop_nr='P407', is_reference=True), # language of work
848 | wdi_core.WDMonolingualText(value=item_label[0:400], language='en', prop_nr='P1476', is_reference=True),
849 | wdi_core.WDTime(time=time.strftime('+%Y-%m-%dT00:00:00Z'), prop_nr='P813', is_reference=True) # retrieved
850 | ]]
851 | print('ChEMBL Main label is', item_label)
852 |
853 | cmpnd = Compound(compound_string=self.smiles, identifier_type='smiles')
854 | isomeric_smiles = cmpnd.get_smiles(smiles_type='isomeric')
855 | canonical_smiles = cmpnd.get_smiles(smiles_type='generic')
856 |
857 | elements = {
858 | 'P592': self.chembl_id,
859 | 'P233': canonical_smiles,
860 | 'P2017': isomeric_smiles,
861 | 'P235': self.stdinchikey,
862 | 'P234': self.stdinchi[6:],
863 | 'P683': str(self.chebi) if self.chebi else None
864 | }
865 |
866 | dtypes = {
867 | 'P592': wdi_core.WDExternalID,
868 | 'P683': wdi_core.WDExternalID,
869 | 'P661': wdi_core.WDExternalID,
870 | 'P2153': wdi_core.WDExternalID,
871 | 'P233': wdi_core.WDString,
872 | 'P2017': wdi_core.WDString,
873 | 'P235': wdi_core.WDExternalID,
874 | 'P234': wdi_core.WDExternalID,
875 | 'P274': wdi_core.WDString
876 | }
877 |
878 | # do not add isomeric smiles if no isomeric info is available
879 | if canonical_smiles == isomeric_smiles or len(self.smiles) > 400:
880 | del elements['P2017']
881 |
882 | # do not try to add InChI longer than 400 chars
883 | if len(self.stdinchi[6:]) > 400:
884 | del elements['P234']
885 |
886 | if len(self.smiles) > 400:
887 | del elements['P233']
888 |
889 | data = [
890 | wdi_core.WDQuantity(value=self.monoisotopic_mass, prop_nr='P2067', upper_bound=self.monoisotopic_mass,
891 | lower_bound=self.monoisotopic_mass, unit='http://www.wikidata.org/entity/Q483261',
892 | references=refs)
893 | ]
894 |
895 | for k, v in elements.items():
896 | if not v:
897 | continue
898 |
899 | print('{}:'.format(k), v)
900 | if isinstance(v, list) or isinstance(v, set):
901 | for x in v:
902 | data.append(dtypes[k](prop_nr=k, value=x, references=refs))
903 | else:
904 | data.append(dtypes[k](prop_nr=k, value=v, references=refs))
905 |
906 | return data
907 |
908 | class ChemSpiderMolecule(object):
909 | token = ''
910 |
911 | def __init__(self, csid=None, mol=None):
912 | if csid:
913 | cs = chemspipy.ChemSpider(ChemSpiderMolecule.token)
914 | self.compound = cs.get_compound(csid)
915 | else:
916 | self.compound = mol
917 |
918 | # self._inchikey = self.compound.inchikey
919 | # self._inchi = self.compound.inchi
920 | # self._common_name = self.compound.common_name
921 | # self._smiles = self.compound.smiles
922 |
923 |
924 | # ikey = 'HGCGQDMQKGRJNO-UHFFFAOYSA-N'
925 | # ikey = 'MTNISTQLDNOGTM-UHFFFAOYSA-N'
926 | # ikey = 'ZWAWYSBJNBVQHP-UHFFFAOYSA-N'
927 |
928 |
929 | @property
930 | def stdinchikey(self):
931 | return self.compound.stdinchikey
932 |
933 | @property
934 | def stdinchi(self):
935 | return self.compound.stdinchi
936 |
937 | @property
938 | def common_name(self):
939 | try:
940 | return self.compound.common_name
941 | except KeyError:
942 | return None
943 |
944 | @property
945 | def smiles(self):
946 | return self.compound.smiles
947 |
948 | @property
949 | def csid(self):
950 | return str(self.compound.csid)
951 |
952 | @property
953 | def monoisotopic_mass(self):
954 | return self.compound.monoisotopic_mass
955 |
956 |
957 | def to_wikidata(self):
958 | item_label = self.common_name if self.common_name else self.csid
959 |
960 | pubchem_ref = [[
961 | wdi_core.WDItemID(value='Q2311683', prop_nr='P248', is_reference=True), # stated in
962 | wdi_core.WDExternalID(value=self.csid, prop_nr='P661', is_reference=True), # source element
963 | wdi_core.WDItemID(value='Q1860', prop_nr='P407', is_reference=True), # language of work
964 | wdi_core.WDMonolingualText(value=item_label[0:200], language='en', prop_nr='P1476', is_reference=True),
965 | wdi_core.WDTime(time=time.strftime('+%Y-%m-%dT00:00:00Z'), prop_nr='P813', is_reference=True) # retrieved
966 | ]]
967 | print('Main label is', item_label)
968 |
969 | try:
970 | cmpnd = Compound(compound_string=self.smiles, identifier_type='smiles')
971 | isomeric_smiles = cmpnd.get_smiles(smiles_type='isomeric')
972 | canonical_smiles = cmpnd.get_smiles(smiles_type='generic')
973 | except ValueError as e:
974 | print(e)
975 | print('Error when trying to convert ChemSpider SMILES')
976 | canonical_smiles = None
977 | isomeric_smiles = None
978 |
979 | elements = {
980 | 'P661': self.csid,
981 | 'P233': canonical_smiles,
982 | 'P2017': isomeric_smiles,
983 | 'P235': self.stdinchikey,
984 | 'P234': self.stdinchi[6:],
985 | }
986 |
987 | dtypes = {
988 | 'P661': wdi_core.WDExternalID,
989 | 'P2153': wdi_core.WDExternalID,
990 | 'P233': wdi_core.WDString,
991 | 'P2017': wdi_core.WDString,
992 | 'P235': wdi_core.WDExternalID,
993 | 'P234': wdi_core.WDExternalID,
994 | 'P274': wdi_core.WDString
995 | }
996 |
997 | # do not add isomeric smiles if no isomeric info is available
998 | if canonical_smiles == isomeric_smiles or len(self.smiles) > 400:
999 | del elements['P2017']
1000 |
1001 | # do not try to add InChI longer than 400 chars
1002 | if len(self.stdinchi[6:]) > 400:
1003 | del elements['P234']
1004 |
1005 | if len(self.smiles) > 400:
1006 | del elements['P233']
1007 |
1008 | data = []
1009 | if float(self.monoisotopic_mass) != 0:
1010 | data = [
1011 | wdi_core.WDQuantity(value=self.monoisotopic_mass, prop_nr='P2067', upper_bound=self.monoisotopic_mass,
1012 | lower_bound=self.monoisotopic_mass, unit='http://www.wikidata.org/entity/Q483261',
1013 | references=pubchem_ref)
1014 | ]
1015 |
1016 | for k, v in elements.items():
1017 | if not v:
1018 | continue
1019 |
1020 | print('{}:'.format(k), v)
1021 | if isinstance(v, list) or isinstance(v, set):
1022 | for x in v:
1023 | data.append(dtypes[k](prop_nr=k, value=x, references=pubchem_ref))
1024 | else:
1025 | data.append(dtypes[k](prop_nr=k, value=v, references=pubchem_ref))
1026 |
1027 | return data
1028 |
1029 | @staticmethod
1030 | def search(search_string):
1031 | molecules = []
1032 |
1033 | cs = chemspipy.ChemSpider(ChemSpiderMolecule.token)
1034 |
1035 | for x in cs.search(search_string):
1036 | molecules.append(ChemSpiderMolecule(mol=x))
1037 | # print(x.common_name)
1038 | # print(x.stdinchikey)
1039 | # print(x.stdinchi)
1040 | # print(x.csid)
1041 | return molecules
1042 |
1043 |
1044 | class PubChemMolecule(object):
1045 |
1046 | # s = requests.Session()
1047 | headers = {
1048 | 'accept': 'application/json',
1049 | 'content-type': 'application/json',
1050 | 'charset': 'utf-8'
1051 | }
1052 |
1053 | base_url = 'http://pubchem.ncbi.nlm.nih.gov/rest/rdf/{}'
1054 |
1055 | def __init__(self, cid=None, inchi_key=None, inchi=None, sid=None, mol_type='canonical'):
1056 | self.dtxsid = None
1057 | self.einecs = None
1058 | self.cas = None
1059 | self.zinc = None
1060 | self.chembl = None
1061 | self.kegg = None
1062 | self.chebi = None
1063 | self.unii = None
1064 |
1065 | self._cid = None
1066 | self._sids = None
1067 | self._inchi_key = None
1068 | self._inchi = None
1069 | self._canonical_smiles = None
1070 | self._isomeric_smiles = None
1071 | self._exact_mass = None
1072 | self._molecular_formula = None
1073 | self._aids = None
1074 |
1075 | # self.s = requests.Session()
1076 | # PubChemMolecule.s.close()
1077 | # PubChemMolecule.s = self.s
1078 | print('cid parameter value', cid)
1079 | if cid:
1080 | self.cid = cid
1081 | if sid:
1082 | self.sids = sid
1083 | if inchi_key:
1084 | self.stdinchikey = inchi_key
1085 | if inchi:
1086 | self.inchi = inchi
1087 |
1088 | assert(mol_type == 'canonical' or mol_type == 'zwitterion')
1089 | self.mol_type = mol_type
1090 |
1091 | if cid:
1092 | pass
1093 | elif inchi_key:
1094 | cids = self._retrieve_pubchem_cids(self.stdinchikey)
1095 | if len(cids) == 0:
1096 | raise InChIKeyMissingError('InChI key not found in PubChem!')
1097 | if len(cids) == 1:
1098 | self.cid = cids[0]
1099 | else:
1100 | self.cid = self._determine_mol_type(cids)
1101 |
1102 | self.synonyms = PubChemMolecule._get_synonyms(self.cid)
1103 | self.main_label = '' if len(self.synonyms) == 0 else self.synonyms[0]
1104 |
1105 | @property
1106 | def canonical_smiles(self):
1107 | if not self._canonical_smiles:
1108 | self._canonical_smiles = PubChemMolecule._get_descriptors(self.cid, 'Canonical_SMILES')
1109 | return self._canonical_smiles
1110 |
1111 | @canonical_smiles.setter
1112 | def canonical_smiles(self, value):
1113 | self._canonical_smiles = value
1114 |
1115 | @property
1116 | def isomeric_smiles(self):
1117 | if not self._isomeric_smiles:
1118 | self._isomeric_smiles = PubChemMolecule._get_descriptors(self.cid, 'Isomeric_SMILES')
1119 | return self._isomeric_smiles
1120 |
1121 | @isomeric_smiles.setter
1122 | def isomeric_smiles(self, value):
1123 | self._isomeric_smiles = value
1124 |
1125 | @property
1126 | def exact_mass(self):
1127 | """Get exact mass of a PubChem compound."""
1128 | if not self._exact_mass:
1129 | self._exact_mass = PubChemMolecule._get_descriptors(self.cid, 'Exact_Mass')
1130 | return self._exact_mass
1131 |
1132 | @exact_mass.setter
1133 | def exact_mass(self, value):
1134 | """Set exact mass of a PubChem compound."""
1135 | self._exact_mass = value
1136 |
1137 | @property
1138 | def molecular_formula(self):
1139 | if not self._molecular_formula:
1140 | self._molecular_formula = PubChemMolecule._get_descriptors(self.cid, 'Molecular_Formula')
1141 | return self._molecular_formula
1142 |
1143 | @molecular_formula.setter
1144 | def molecular_formula(self, value):
1145 | self._molecular_formula = value
1146 |
1147 | @property
1148 | def cid(self):
1149 | return self._cid
1150 |
1151 | @cid.setter
1152 | def cid(self, value):
1153 |
1154 | if value and not value.lower().startswith('cid'):
1155 | # make sure that the provided cid is an integer, will raise a ValueError if not
1156 | int(value)
1157 |
1158 | self._cid = 'CID{}'.format(value)
1159 | else:
1160 | self._cid = value
1161 |
1162 | if self._cid:
1163 | base_data = PubChemMolecule._retrieve_basic_compound_info(self.cid)
1164 |
1165 | # object triples
1166 | has_parts = set()
1167 | active_ingredient_of = set()
1168 | has_roles = set()
1169 | has_parent = set()
1170 |
1171 | # deal with item as subject
1172 | subj_data = base_data['compound/' + self._cid]
1173 | del base_data['compound/' + self._cid]
1174 |
1175 | subj_mapping = {
1176 | 'vocabulary#FDAApprovedDrugs': has_roles,
1177 | 'vocabulary#is_active_ingredient_of': active_ingredient_of,
1178 | 'http://purl.obolibrary.org/obo/has-role': has_roles,
1179 | 'vocabulary#has_parent': has_parent
1180 | }
1181 |
1182 | for k, v in subj_data.items():
1183 | if k not in subj_mapping:
1184 | continue
1185 |
1186 | value = v[0]['value']
1187 | if value.startswith('compound/CID'):
1188 | value = value.split('/')[-1]
1189 | subj_mapping[k].add(value)
1190 |
1191 | # subject properties
1192 | isotopologues = set()
1193 | stereoisomers = set()
1194 | same_connectivity = set()
1195 | sids = set()
1196 | parent_of = set()
1197 | part_of = set()
1198 |
1199 | obj_mapping = {
1200 | 'vocabulary#has_parent': parent_of,
1201 | 'http://semanticscience.org/resource/CHEMINF_000455': isotopologues,
1202 | 'http://semanticscience.org/resource/CHEMINF_000461': stereoisomers,
1203 | 'http://semanticscience.org/resource/CHEMINF_000462': same_connectivity,
1204 | 'http://semanticscience.org/resource/CHEMINF_000477': sids,
1205 | 'http://semanticscience.org/resource/CHEMINF_000478': part_of,
1206 | 'http://semanticscience.org/resource/has-attribute': set(),
1207 | 'http://semanticscience.org/resource/CHEMINF_000446': 'cas',
1208 | 'http://semanticscience.org/resource/CHEMINF_000447': 'einecs',
1209 | 'http://semanticscience.org/resource/CHEMINF_000412': 'chembl',
1210 | 'http://semanticscience.org/resource/CHEMINF_000409': 'kegg',
1211 | 'http://semanticscience.org/resource/CHEMINF_000407': 'chebi',
1212 | 'http://semanticscience.org/resource/CHEMINF_000563': 'unii',
1213 |
1214 | }
1215 |
1216 | prefix_mapping = {
1217 | ('DTXSID', 'dtxsid'),
1218 | ('ZINC', 'zinc')
1219 | }
1220 |
1221 | # deal with item as object
1222 | for k, v in base_data.items():
1223 | if k.startswith('inchikey'):
1224 | self.stdinchikey = k.split('/')[-1]
1225 | continue
1226 |
1227 | if k.startswith('synonym/MD5_'):
1228 | # print(k)
1229 |
1230 | res = requests.get(url=self.base_url.format(k + '.json'), headers=self.headers).json()
1231 |
1232 | identifier = [x['value'] for x in res[k]['http://semanticscience.org/resource/has-value']]
1233 |
1234 | types = [x['value'] for x in res[k]['http://www.w3.org/1999/02/22-rdf-syntax-ns#type']]
1235 |
1236 | # retrieve database identifiers
1237 | if len(types) == 1 and types[0] == 'http://semanticscience.org/resource/CHEMINF_000467':
1238 | for pref, prop in prefix_mapping:
1239 | if identifier[0].startswith(pref):
1240 | #print(prop, identifier[0])
1241 | setattr(self, prop, identifier[0])
1242 |
1243 | for x in types:
1244 | if x in obj_mapping:
1245 |
1246 | # process identifier strings from PubChem, if needed
1247 | #EINECS
1248 | if x == 'http://semanticscience.org/resource/CHEMINF_000447':
1249 | identifier = [x.split(' ').pop() for x in identifier]
1250 | #ChEBI
1251 | if x == 'http://semanticscience.org/resource/CHEMINF_000407':
1252 | identifier = list(set([x.split(':').pop() for x in identifier]))
1253 | #UNII
1254 | if x == 'http://semanticscience.org/resource/CHEMINF_000563':
1255 | identifier = list(set([x.upper() for x in identifier]))
1256 |
1257 | #print(obj_mapping[x], x, identifier)
1258 | setattr(self, obj_mapping[x], identifier)
1259 |
1260 | for kk, vv in v.items():
1261 | if kk not in obj_mapping:
1262 | continue
1263 |
1264 | obj_mapping[kk].add(k.split('/')[-1])
1265 |
1266 | self.sids = list(sids)
1267 |
1268 | @property
1269 | def sids(self):
1270 | return self._sid
1271 |
1272 | @sids.setter
1273 | def sids(self, value):
1274 | self._sid = value
1275 |
1276 | @property
1277 | def aids(self):
1278 | return self._aids
1279 |
1280 | @aids.setter
1281 | def aids(self, value):
1282 | self._aids = value
1283 |
1284 | @property
1285 | def stdinchikey(self):
1286 | return self._inchi_key
1287 |
1288 | @stdinchikey.setter
1289 | def stdinchikey(self, value):
1290 | self._inchi_key = value
1291 |
1292 | @property
1293 | def inchi(self):
1294 | if not self._inchi:
1295 | self._inchi = PubChemMolecule._get_descriptors(self.cid, 'IUPAC_InChI')
1296 | return self._inchi
1297 |
1298 | @inchi.setter
1299 | def inchi(self, value):
1300 | self._inchi = value
1301 |
1302 | @property
1303 | def assay_ids(self):
1304 | return PubChemMolecule._get_assay_ids(self.sids)
1305 |
1306 | def _determine_mol_type(self, cids):
1307 | print(cids)
1308 | zwitterion_charge_count = []
1309 | for count, cid in enumerate(cids):
1310 | ismiles = PubChemMolecule._get_descriptors(cid, 'Isomeric_SMILES')
1311 | plus_count = ismiles.count('+')
1312 | minus_count = ismiles.count('-')
1313 | zwitterion_charge_count.append(plus_count + minus_count)
1314 |
1315 | if self.mol_type == 'canonical':
1316 | charge = min(zwitterion_charge_count)
1317 | else:
1318 | charge = max(zwitterion_charge_count)
1319 |
1320 | if zwitterion_charge_count.count(charge) > 1:
1321 | x = [len(simplejson.dumps(PubChemMolecule._retrieve_basic_compound_info(cids[c])))
1322 | if z == charge else 0 for c, z in enumerate(zwitterion_charge_count)]
1323 | return cids[x.index(max(x))]
1324 | else:
1325 | return cids[zwitterion_charge_count.index(charge)]
1326 |
1327 | @staticmethod
1328 | def _get_synonyms(cid):
1329 | url = 'https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{}/synonyms/json'.format(cid[3:])
1330 | # reply = PubChemMolecule.s.get(url, headers=PubChemMolecule.headers)
1331 | # reply = PubChemMolecule.s.get(url, headers=PubChemMolecule.headers)
1332 | reply = requests.get(url, headers=PubChemMolecule.headers)
1333 | if 'Fault' in reply.json():
1334 | return []
1335 | return reply.json()['InformationList']['Information'][0]['Synonym']
1336 |
1337 | @staticmethod
1338 | def _retrieve_basic_compound_info(cid):
1339 | cmpnd_url = 'https://pubchem.ncbi.nlm.nih.gov/rest/rdf/compound/{}.json'.format(cid)
1340 | print(cmpnd_url)
1341 |
1342 | # r = PubChemMolecule.s.get(cmpnd_url, headers=PubChemMolecule.headers).json()
1343 | r = requests.get(cmpnd_url, headers=PubChemMolecule.headers).json()
1344 |
1345 | return r
1346 |
1347 | @staticmethod
1348 | def _get_descriptors(cid, descr_type):
1349 | url = 'https://pubchem.ncbi.nlm.nih.gov/rest/rdf/descriptor/{}_{}.json'.format(cid, descr_type)
1350 |
1351 | # descr_json = PubChemMolecule.s.get(url, headers=PubChemMolecule.headers).json()
1352 | descr_json = requests.get(url, headers=PubChemMolecule.headers).json()
1353 | return descr_json['descriptor/{}_{}'
1354 | .format(cid, descr_type)]['http://semanticscience.org/resource/has-value'][0]['value']
1355 |
1356 | @staticmethod
1357 | def _get_assay_ids(sids):
1358 | url = 'http://pubchem.ncbi.nlm.nih.gov/rest/rdf/query'
1359 | assay_ids = dict()
1360 |
1361 | for sid_block in [sids[c : c + 20] for c in range(0, len(sids), 20)]:
1362 | r = dict()
1363 |
1364 | params = {
1365 | 'graph': 'substance',
1366 | 'pred': 'obo:BFO_0000056',
1367 | 'subj': ','.join(['substance:{}'.format(x) for x in sid_block]),
1368 | 'format': 'json'
1369 | }
1370 |
1371 | try:
1372 | response = requests.get(url, params=params, headers=PubChemMolecule.headers)
1373 | print(response.url)
1374 | r = response.json()['results']['bindings']
1375 | print('length response items', len(r))
1376 |
1377 | except simplejson.JSONDecodeError as e:
1378 | print(e)
1379 | print('Error retrieving PubChem Assay Ids')
1380 |
1381 | for x in r:
1382 | if 'subject' not in x:
1383 | continue
1384 |
1385 | assay_id = x['object']['value'].split('/')[-1].split('_')[0]
1386 | sid = x['subject']['value'].split('/')[-1]
1387 |
1388 | if sid in assay_ids:
1389 |
1390 | assay_ids[sid].add(assay_id)
1391 | else:
1392 | assay_ids.update({sid: {assay_id}})
1393 | print(assay_ids)
1394 | return assay_ids
1395 |
1396 | @staticmethod
1397 | def _retrieve_pubchem_cids(ikey):
1398 | url = 'http://pubchem.ncbi.nlm.nih.gov/rest/rdf/inchikey/{}.json'.format(ikey)
1399 |
1400 | try:
1401 | # r = PubChemMolecule.s.get(url, headers=PubChemMolecule.headers).json()
1402 | r = requests.get(url, headers=PubChemMolecule.headers).json()
1403 | except simplejson.JSONDecodeError as e:
1404 | # print(e.__str__())
1405 | print('PubChem does not have this InChI key', ikey)
1406 | return []
1407 |
1408 | cids = list()
1409 | if 'http://semanticscience.org/resource/is-attribute-of' in r['inchikey/{}'.format(ikey)]:
1410 | for x in r['inchikey/{}'.format(ikey)]['http://semanticscience.org/resource/is-attribute-of']:
1411 | cids.append(x['value'].split('/')[-1])
1412 |
1413 | return cids
1414 |
1415 | # def __del__(self):
1416 | # self.s.close()
1417 |
1418 | def to_wikidata(self):
1419 | item_label = self.cid if self.main_label == '' else self.main_label
1420 |
1421 | pubchem_ref = [[
1422 | wdi_core.WDItemID(value='Q278487', prop_nr='P248', is_reference=True), # stated in
1423 | wdi_core.WDExternalID(value=self.cid[3:], prop_nr='P662', is_reference=True), # source element
1424 | wdi_core.WDItemID(value='Q1860', prop_nr='P407', is_reference=True), # language of work
1425 | wdi_core.WDMonolingualText(value=item_label[0:400], language='en', prop_nr='P1476', is_reference=True),
1426 | wdi_core.WDTime(time=time.strftime('+%Y-%m-%dT00:00:00Z'), prop_nr='P813', is_reference=True) # publication date
1427 | ]]
1428 | print('Main label is', self.main_label)
1429 |
1430 | elements = {
1431 | 'P662': self.cid[3:],
1432 | #'P2153': self.sid[3:],
1433 | 'P233': self.canonical_smiles,
1434 | 'P2017': self.isomeric_smiles,
1435 | 'P235': self.stdinchikey,
1436 | 'P234': self.inchi[6:],
1437 | 'P274': PubChemMolecule.convert_to_index_numbers(self.molecular_formula),
1438 | 'P3117': self.dtxsid,
1439 | 'P231': self.cas,
1440 | 'P232': self.einecs,
1441 | 'P2084': self.zinc,
1442 | 'P592': self.chembl,
1443 | 'P665': self.kegg,
1444 | 'P683': self.chebi,
1445 | 'P652': self.unii,
1446 |
1447 | }
1448 |
1449 | dtypes = {
1450 | 'P662': wdi_core.WDExternalID,
1451 | 'P2153': wdi_core.WDExternalID,
1452 | 'P233': wdi_core.WDString,
1453 | 'P2017': wdi_core.WDString,
1454 | 'P235': wdi_core.WDExternalID,
1455 | 'P234': wdi_core.WDExternalID,
1456 | 'P274': wdi_core.WDString,
1457 | 'P232': wdi_core.WDExternalID,
1458 | 'P231': wdi_core.WDExternalID,
1459 | 'P3117': wdi_core.WDExternalID,
1460 | 'P2084': wdi_core.WDExternalID,
1461 | 'P592': wdi_core.WDExternalID,
1462 | 'P665': wdi_core.WDExternalID,
1463 | 'P683': wdi_core.WDExternalID,
1464 | 'P652': wdi_core.WDExternalID,
1465 |
1466 |
1467 | }
1468 |
1469 | # do not add isomeric smiles if canonical smiles is the same
1470 | if self.canonical_smiles == self.isomeric_smiles or len(self.isomeric_smiles) > 400:
1471 | del elements['P2017']
1472 |
1473 | # do not try to add InChI longer than 400 chars
1474 | if len(self.inchi[6:]) > 400:
1475 | del elements['P234']
1476 |
1477 | if len(self.canonical_smiles) > 400:
1478 | del elements['P233']
1479 |
1480 | data = [
1481 | wdi_core.WDQuantity(value=self.exact_mass, prop_nr='P2067', upper_bound=self.exact_mass,
1482 | lower_bound=self.exact_mass, unit='http://www.wikidata.org/entity/Q483261',
1483 | references=pubchem_ref)
1484 | ]
1485 |
1486 | for k, v in elements.items():
1487 | if not v:
1488 | continue
1489 |
1490 | print('{}:'.format(k), v)
1491 | if isinstance(v, list) or isinstance(v, set):
1492 | for x in v:
1493 | data.append(dtypes[k](prop_nr=k, value=x, references=pubchem_ref))
1494 | else:
1495 | data.append(dtypes[k](prop_nr=k, value=v, references=pubchem_ref))
1496 |
1497 | return data
1498 |
1499 | @staticmethod
1500 | def convert_to_index_numbers(formula_string):
1501 | """
1502 | Converts the numbers in a normal string into unicode index numbers (as used in chemical formulas)
1503 | :param formula_string: a string containing numbers which should be converted to index numbers
1504 | :type formula_string: str
1505 | :return: returns a unicode string with numbers converted to index numbers
1506 | """
1507 | index_numbers = ['₀', '₁', '₂', '₃', '₄', '₅', '₆', '₇', '₈', '₉']
1508 | conventional_numbers = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
1509 |
1510 | conversion_map = dict(zip(conventional_numbers, index_numbers))
1511 |
1512 | for i in set(formula_string):
1513 | if i in conversion_map:
1514 | formula_string = formula_string.replace(i, conversion_map[i])
1515 |
1516 | return formula_string
1517 |
1518 |
1519 | class InChIKeyMissingError(Exception):
1520 | def __init__(self, value):
1521 | self.value = value
1522 |
1523 | def __str__(self):
1524 | return repr(self.value)
1525 |
1526 |
1527 | def main():
1528 | # a = PubChemMolecule(inchi_key='ADPBHYYCECQFTN-UHFFFAOYSA-K')
1529 | # print(a.cid)
1530 | # print(a.main_label)
1531 | #
1532 | # b = PubChemMolecule(inchi_key='PIOZZBNFRIZETM-UHFFFAOYSA-L')
1533 | # print(b.cid)
1534 | # print(b.main_label)
1535 | #
1536 | #
1537 | # c = PubChemMolecule(inchi_key='RNAICSBVACLLGM-GNAZCLTHSA-N')
1538 | # print(c.cid)
1539 | # print(c.main_label)
1540 |
1541 | login_obj = wdi_login.WDLogin(user='', pwd='')
1542 |
1543 |
1544 | query = '''
1545 | SELECT * WHERE {
1546 | ?cmpnd wdt:P235 ?pc .
1547 | FILTER NOT EXISTS{
1548 | #{?cmpnd wdt:P279 wd:Q11173 .} UNION
1549 | #{?cmpnd wdt:P31 wd:Q11173 .} UNION
1550 | {?cmpnd wdt:P662 ?x .}
1551 | }
1552 | }
1553 | '''
1554 |
1555 | results = wdi_core.WDItemEngine.execute_sparql_query(query=query)
1556 |
1557 | cid_not_found_count = 0
1558 | for count, item in enumerate(results['results']['bindings']):
1559 | start = time.time()
1560 | ikey = item['pc']['value']
1561 | try:
1562 | print('--' * 10)
1563 | print(ikey)
1564 | cmpnd = PubChemMolecule(inchi_key=ikey)
1565 | print(cmpnd.cid)
1566 | print(cmpnd.canonical_smiles)
1567 | print(cmpnd.isomeric_smiles)
1568 | print(cmpnd.inchi)
1569 | print(cmpnd.exact_mass)
1570 | print(cmpnd.molecular_formula)
1571 | print(cmpnd.main_label)
1572 | print(cmpnd.sids)
1573 | cmpnd.s.close()
1574 |
1575 | wd_item = wdi_core.WDItemEngine(item_name='ddk', domain='drugs', data=cmpnd.to_wikidata(),
1576 | append_value=['P31'])
1577 | print(wd_item.wd_item_id)
1578 | pprint.pprint(wd_item.entity_metadata)
1579 | # pprint.pprint(wd_item.get_wd_json_representation())
1580 | wd_item.write(login_obj)
1581 |
1582 | # if count > 120:
1583 | # break
1584 | except InChIKeyMissingError as e:
1585 | print(ikey, e)
1586 | cid_not_found_count += 1
1587 | continue
1588 | except Exception as e:
1589 | print(e)
1590 |
1591 | wdi_core.WDItemEngine.log(
1592 | 'ERROR', '{main_data_id}, "{exception_type}", "{message}", {wd_id}, {duration}'
1593 | .format(
1594 | main_data_id='{}'.format(ikey),
1595 | exception_type=type(e),
1596 | message=e.__str__(),
1597 | wd_id='',
1598 | duration=time.time() - start
1599 | ))
1600 |
1601 |
1602 |
1603 | print('not found count', cid_not_found_count)
1604 |
1605 |
1606 |
1607 |
1608 | if __name__ == '__main__':
1609 | sys.exit(main())
1610 |
--------------------------------------------------------------------------------
/cdk_pywrapper/config.py:
--------------------------------------------------------------------------------
1 | # get the the py4j server jar file with 'find /usr/ -type f -name py4j*jar'
2 | # better: use pip: pip3 show -f py4j
3 | py4j_path = '/usr/local/share/py4j/py4j0.10.7.jar'
4 | cdk_path = './cdk/cdk-1.5.13.jar'
5 |
--------------------------------------------------------------------------------
/cdk_pywrapper/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sebotic/cdk_pywrapper/94f0f6f337cf3162b303d95b6d06da03e61bdee3/cdk_pywrapper/tests/__init__.py
--------------------------------------------------------------------------------
/cdk_pywrapper/tests/cdk_pywrapper_test.py:
--------------------------------------------------------------------------------
1 | from cdk_pywrapper.cdk_pywrapper import Compound
2 | import sys
3 |
4 | __author__ = 'Sebastian Burgstaller-Muehlbacher'
5 | __license__ = 'AGPLv3'
6 | __copyright__ = 'Sebastian Burgstaller-Muehlbacher'
7 |
8 | '''A main method with a list of InChIs. These are then used to generate SMILES and InChI keys.'''
9 |
10 |
11 | def main():
12 | test_inchis = [
13 | 'InChI=1S/C23H18ClF2N3O3S/c1-2-9-33(31,32)29-19-8-7-18(25)20(21(19)26)22(30)17-12-28-23-16(17)10-14(11-27-23)13-3-5-15(24)6-4-13/h3-8,10-12,29H,2,9H2,1H3,(H,27,28)',
14 | 'InChI=1S/C33H42N4O6/c1-7-20-19(6)32(42)37-27(20)14-25-18(5)23(10-12-31(40)41)29(35-25)15-28-22(9-11-30(38)39)17(4)24(34-28)13-26-16(3)21(8-2)33(43)36-26/h15,26-27,35H,7-14H2,1-6H3,(H,36,43)(H,37,42)(H,38,39)(H,40,41)/b28-15-/t26-,27-/m0/s1',
15 | 'InChI=1S/C21H25ClFN3O3/c1-2-28-20-10-19(24)18(22)9-17(20)21(27)25-11-16-13-26(7-8-29-16)12-14-3-5-15(23)6-4-14/h3-6,9-10,16H,2,7-8,11-13,24H2,1H3,(H,25,27)',
16 | 'InChI=1S/C16H12FN3O3/c1-19-14-7-6-10(20(22)23)8-12(14)16(18-9-15(19)21)11-4-2-3-5-13(11)17/h2-8H,9H2,1H3',
17 | 'InChI=1S/C10H17N3O6S/c11-5(10(18)19)1-2-7(14)13-6(4-20)9(17)12-3-8(15)16/h5-6,20H,1-4,11H2,(H,12,17)(H,13,14)(H,15,16)(H,18,19)/t5-,6-/m0/s1',
18 | 'InChI=1S/C13H16N2O/c1-8-13-11(5-6-14-8)10-4-3-9(16-2)7-12(10)15-13/h3-4,7-8,14-15H,5-6H2,1-2H3',
19 | 'InChI=1S/C27H44O2/c1-19-10-13-23(28)18-22(19)12-11-21-9-7-17-27(5)24(14-15-25(21)27)20(2)8-6-16-26(3,4)29/h11-12,20,23-25,28-29H,1,6-10,13-18H2,2-5H3/b21-11+,22-12-/t20-,23+,24-,25+,27-/m1/s1',
20 | 'InChI=1S/C40H56/c1-31(19-13-21-33(3)25-27-37-35(5)23-15-29-39(37,7)8)17-11-12-18-32(2)20-14-22-34(4)26-28-38-36(6)24-16-30-40(38,9)10/h11-14,17-23,25-28,37H,15-16,24,29-30H2,1-10H3/b12-11+,19-13+,20-14+,27-25+,28-26+,31-17+,32-18+,33-21+,34-22+/t37-/m0/s1',
21 | 'InChI=1S/C11H14N4O5/c1-14-3-13-9-6(10(14)19)12-4-15(9)11-8(18)7(17)5(2-16)20-11/h3-5,7-8,11,16-18H,2H2,1H3',
22 | 'InChI=1S/C27H44O2/c1-18(2)8-6-9-19(3)24-13-14-25-21(10-7-15-27(24,25)5)11-12-22-16-23(28)17-26(29)20(22)4/h11-12,18-19,23-26,28-29H,4,6-10,13-17H2,1-3,5H3/b21-11+,22-12-/t19-,23-,24-,25+,26+,27-/m1/s1',
23 | 'InChI=1S/C9H14N5O4P/c1-6(18-5-19(15,16)17)2-14-4-13-7-8(10)11-3-12-9(7)14/h3-4,6H,2,5H2,1H3,(H2,10,11,12)(H2,15,16,17)/t6-/m1/s1',
24 | 'InChI=1S/C51H79NO13/c1-30-16-12-11-13-17-31(2)42(61-8)28-38-21-19-36(7)51(60,65-38)48(57)49(58)52-23-15-14-18-39(52)50(59)64-43(33(4)26-37-20-22-40(53)44(27-37)62-9)29-41(54)32(3)25-35(6)46(56)47(63-10)45(55)34(5)24-30/h11-13,16-17,25,30,32-34,36-40,42-44,46-47,53,56,60H,14-15,18-24,26-29H2,1-10H3/b13-11+,16-12+,31-17+,35-25+/t30-,32-,33-,34-,36-,37+,38+,39+,40-,42+,43+,44-,46-,47+,51-/m1/s1'
25 |
26 | ]
27 |
28 | for inchi in test_inchis:
29 |
30 | cmpnd = Compound(compound_string=inchi, identifier_type='inchi')
31 | print(cmpnd.get_smiles())
32 | print(cmpnd.get_inchi_key())
33 | print(cmpnd.get_inchi())
34 | print(cmpnd.get_mol2())
35 | print(cmpnd.get_fingerprint())
36 | print('----------------------------')
37 |
38 | # group of compounds with same connectivity but different configuration:
39 | # https://pubchem.ncbi.nlm.nih.gov/rest/rdf/inchikey/MNQDKWZEUULFPX-UHFFFAOYSA-M.html
40 | smiles = [
41 | '[Ba++].[O-][Fe]([O-])(=O)=O',
42 | 'CCN1C2=CC=CC=C2SC1=CC=CC=CC3=[N+](C4=CC=CC=C4S3)CC.[I-]',
43 | 'CCN\\1C2=CC=CC=C2S/C1=C\C=C\C=C\C3=[N+](C4=CC=CC=C4S3)CC.[I-]',
44 | 'CCN\\1C2=CC=CC=C2S/C1=C/C=C/C=C/C3=[N+](C4=CC=CC=C4S3)CC.[I-]',
45 | 'CCN\\1C2=CC=CC=C2S/C1=C\\C=C\\C=C/C3=[N+](C4=CC=CC=C4S3)CC.[I-]',
46 | 'CCN\\1C2=CC=CC=C2S/C1=C/C=C/C=CC3=[N+](C4=CC=CC=C4S3)CC.[I-]',
47 | 'CC1=CC=CC=C1OCC2=CC=CC=C2/C(=N\OC)/C(=O)OC',
48 | 'CCCCCC/C=C\CCCCCCCC(=O)O',
49 | 'CC(C)(C)c1nc(c2cccc(NS(=O)(=O)c3c(F)cccc3F)c2F)c(s1)c4ccnc(N)n4',
50 | 'CC(C)(C)C1=NC(C2=CC=CC(NS(=O)(=O)N(C)(CC))=C2F)=C(S1)C4=CC=NC(N)=N4',
51 | 'C1=CC2=C(C=C1O)C(=CN2)C[C@@H](C(=O)[O-])[NH3+]',
52 | 'CN/C(=C\[N+](=O)[O-])/NCCSCC1=CC=C(O1)CN(C)C',
53 | 'CN/C(=C/[N+](=O)[O-])/NCCSCC1=CC=C(O1)CN(C)C',
54 | 'COCCOC[C@H](CC1(CCCC1)C(=O)N[C@@H]2CC[C@@H](CC2)C(=O)O)C(=O)Oc3ccc4CCCc4c3',
55 | 'C1=C(N=C(S1)N=C(N)N)CSCC/C(=N/S(=O)(=O)N)/N',
56 | 'C[C@]([C@H]1C[C@@]23CC[C@@]1([C@H]4[C@@]25CCN([C@@H]3CC6=C5C(=C(C=C6)O)O4)CC7CC7)OC)(C(C)(C)CC)O',
57 | 'CC(=O)O[Hg]c1cc(ccc1O)C(CC(C)(C)C)(C)C',
58 | 'CC(=O)O.CC(C)(C)CC(C)(C)[C]1C=CC(=C=C1)[O-].[Hg+]',
59 | 'N/C(N)=C([N+]([O-])=O)\[N+]([O-])=O',
60 | 'CC(C)C1=C(C(=C(N1CC[C@H](C[C@H](CC(=O)O)O)O)C2=CC=C(C=C2)F)C3=CC=CC=C3)C(=O)NC4=CC=CC=C4',
61 | 'c1cc(ccc1/N=N/c2ccc(c(c2)OS(=O)O)N)OS(=O)O.[Na+].[Na+]',
62 | 'Clc1ccc2Nc4ccccc4C(=N\c2c1)/N3CCNCC3',
63 | '[Yb][Yb][Yb][Ag][Ag]',
64 | 'N[C@@H](CSSC[C@H](N)C(O)=O)C(O)=O'
65 | 'CC1(C\\2CCC1(C(=O)/C2=C/c3ccc(cc3)C=O)CS(=O)(=O)[O-])C.[Na+]',
66 | 'CNC(=O)C1=CC=CC=C1NC2=NC(=NC=C2Cl)NC3=CC=C(C=C3)N4CCN(CCCN)CC4',
67 | 'OC(=O)CN/C(=N\c1ccc(C#N)cc1)NC2CCCCCCCC2',
68 | 'N[C@@]12C[C@]3(O[N+]([O-])=O)C[C@@](C2)(CC)C[C@@](C1)(CC)C3',
69 | 'C1C2CC3CC1(ON(OO))CC(C2)(C3)N',
70 | '[N+](=O)([O-])OC12CC3(CC(CC(C1)(C3)N)(C2)CC)CC',
71 | 'COc1cc(c(cc1C(=O)N[C@@H]2CC[N@@]3CCC[C@H]2C3)Cl)N',
72 | 'OCN(C(=O)N(CO)C)',
73 | '[O-][n+]1cc[n+](c2c1cccc2)[O-]',
74 | '[2H]C([2H])([2H])C([2H])([2H])C([2H])([2H])C([2H])([2H])C([2H])([2H])C([2H])([2H])C([2H])([2H])C([2H])([2H])C([2H])([2H])C([2H])([2H])C([2H])([2H])C([2H])([2H])C(O)=O',
75 | 'CS(O)(=O)=O.[H][C@@]12CC(C)C(C(=O)CN3CCN(CC3)c3cc(nc(n3)N3CCCC3)N3CCCC3)[C@@]1(C)CC=C1[C@@]2([H])CCC2=CC(=O)C=C[C@]12C',
76 | 'OCCCC(O)=O',
77 | 'Cc1nnc(s1)SCC2=C(N3[C@@H]([C@@H](C3=O)NC(=O)Cn4cnnn4)SC2)C(=O)[O-]',
78 | 'CC(=O)Oc1ccc(cc1)C(c1ccc(OC(C)=O)cc1)c1ccccn1'
79 |
80 | ]
81 |
82 | for smile in smiles:
83 | try:
84 | cmpnd = Compound(compound_string=smile, identifier_type='smiles')
85 | print(cmpnd.get_smiles(smiles_type='isomeric'))
86 | print(cmpnd.get_smiles(smiles_type='unique'))
87 | print(cmpnd.get_smiles(smiles_type='absolute'))
88 | print(cmpnd.get_smiles(smiles_type='generic'))
89 | print(cmpnd.get_inchi_key())
90 | print(cmpnd.get_inchi())
91 | print(cmpnd.get_mol2())
92 | print(cmpnd.get_fingerprint())
93 | print(cmpnd.get_tanimoto(Compound(compound_string='C1C2CC3CC1(ON(OO))CC(C2)(C3)N', identifier_type='smiles')))
94 | print(cmpnd.get_tanimoto_from_bitset(Compound(compound_string='C1C2CC3CC1(ON(OO))CC(C2)(C3)N', identifier_type='smiles')))
95 | print(cmpnd.get_molfile())
96 | print('----------------------------')
97 |
98 | except ValueError as e:
99 | print(e)
100 |
101 | cmpnd = Compound(compound_string='InChI=1S/C5H10N2O3/c6-3(5(9)10)1-2-4(7)8/h3H,1-2,6H2,(H2,7,8)(H,9,10)/p-1',
102 | identifier_type='inchi')
103 | print(cmpnd.get_smiles(smiles_type='generic'))
104 | print(cmpnd.get_inchi_key())
105 | print(cmpnd.get_inchi())
106 |
107 | # cdk_pywrapper.gateway.shutdown()
108 |
109 |
110 | if __name__ == '__main__':
111 | sys.exit(main())
112 |
113 |
114 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | py4j
2 | psutil
3 | wget
4 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md
3 | version-file: version.txt
4 |
5 | [check-manifest]
6 | ignore =
7 | .travis.yml
8 | PKG-INFO
9 | *.egg-info
10 | *.egg-info/*
11 | setup.cfg
12 | .hgtags
13 | .hgignore
14 | .gitignore
15 | .bzrignore
16 | *.mo
17 | .git/*
18 |
19 |
20 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import platform
2 | from setuptools import setup, find_packages
3 | from subprocess import check_output
4 | import subprocess
5 | import py4j
6 | import os
7 | import wget
8 |
9 | host_os = platform.system()
10 |
11 | cdk_version = 'cdk-2.2'
12 | cdk_jar_path = os.path.join('.', 'cdk_pywrapper', 'cdk')
13 | cdk_jar = os.path.join(cdk_jar_path, cdk_version + '.jar')
14 |
15 | fn = wget.download('https://github.com/cdk/cdk/releases/download/{0}/{0}.jar'.format(cdk_version), out=cdk_jar_path)
16 | print('successfully downloaded', fn)
17 |
18 | if host_os == 'Linux' or host_os == 'Darwin':
19 | py4j_path = os.path.join(*py4j.__path__[0].split('/')[:-4])
20 | py4j_jar_path = os.path.join('/', py4j_path, 'share', 'py4j', 'py4j' + py4j.__version__ + '.jar')
21 | cp_sep = ':'
22 |
23 | subprocess.check_call([
24 | 'javac ' +
25 | ' -cp ' +
26 | ' {}{}{} '.format(py4j_jar_path,
27 | cp_sep,
28 | cdk_jar) +
29 | os.path.join('.', 'cdk_pywrapper', 'cdk', 'cdk_bridge.java')
30 | ],
31 | shell=True)
32 |
33 | if host_os == 'Windows':
34 | cp_sep = ';'
35 | drive, path = os.path.splitdrive(py4j.__path__[0])
36 | py4j_path = os.path.join(drive + '\\', *path.split('\\')[:-3])
37 | py4j_jar_path = os.path.join(py4j_path, 'share', 'py4j', 'py4j' + py4j.__version__ + '.jar')
38 |
39 | subprocess.check_call([
40 | 'javac',
41 | '-cp',
42 | '{}{}{}'.format(py4j_jar_path,
43 | cp_sep,
44 | cdk_jar),
45 | os.path.join('.', 'cdk_pywrapper', 'cdk', 'cdk_bridge.java')
46 | ],
47 | shell=True)
48 |
49 | MAJOR_VERSION = 0
50 | MINOR_VERSION = 0
51 | MICRO_VERSION = 1
52 |
53 | REPO_URL = 'https://github.com/sebotic/cdk_pywrapper'
54 |
55 | setup(
56 | name='cdk_pywrapper',
57 | version="{}.{}.{}".format(MAJOR_VERSION, MINOR_VERSION, MICRO_VERSION),
58 | data_files=[("share/cdk", [cdk_jar, './cdk_pywrapper/cdk/CDKBridge.class',
59 | './cdk_pywrapper/cdk/SearchHandler.class'])],
60 | author='Sebastian Burgstaller-Muehlbacher',
61 | author_email='sburgs@scripps.edu',
62 | description='Python wrapper for the CDK (Chemistry Development Kit)',
63 | license='AGPLv3',
64 | keywords='chemistry, CDK, Chemistry Development Kit',
65 | url=REPO_URL,
66 | # packages=find_packages(),
67 | packages=['cdk_pywrapper'],
68 | # include_package_data=True,
69 | # long_description=read('README.md'),
70 | classifiers=[
71 | "Programming Language :: Python",
72 | "Programming Language :: Python :: 3",
73 | "Programming Language :: Python :: 2.7",
74 | "Development Status :: 4 - Beta",
75 | "Operating System :: POSIX",
76 | "Operating System :: MacOS :: MacOS X",
77 | "Operating System :: Microsoft :: Windows",
78 | "Intended Audience :: Science/Research",
79 | "Topic :: Utilities",
80 | "Topic :: Scientific/Engineering :: Bio-Informatics",
81 | ],
82 | install_requires=[
83 | 'py4j'
84 | ],
85 | )
86 |
--------------------------------------------------------------------------------
/version.txt:
--------------------------------------------------------------------------------
1 | 0.0.2
2 |
--------------------------------------------------------------------------------