├── alvadescpy ├── __init__.py ├── functions.py └── wrapper.py ├── pyproject.toml ├── .github └── workflows │ └── publish_to_pypi.yml ├── LICENSE └── README.md /alvadescpy/__init__.py: -------------------------------------------------------------------------------- 1 | from alvadescpy.wrapper import alvadesc 2 | from alvadescpy.functions import smiles_to_descriptors 3 | from alvadescpy.wrapper import CONFIG 4 | import pkg_resources 5 | __version__ = pkg_resources.get_distribution('alvadescpy') 6 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=61.0"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "alvadescpy" 7 | version = "0.1.4" 8 | authors = [ 9 | { name="Travis Kessler", email="travis.j.kessler@gmail.com" }, 10 | ] 11 | description = "Python wrapper for alvaDesc software" 12 | readme = "README.md" 13 | requires-python = ">=3.11" 14 | classifiers = [ 15 | "Programming Language :: Python :: 3.11", 16 | "License :: OSI Approved :: MIT License", 17 | "Operating System :: OS Independent", 18 | ] 19 | 20 | [project.urls] 21 | "Homepage" = "https://github.com/ecrl/alvadescpy" 22 | "Bug Tracker" = "https://github.com/ecrl/alvadescpy/issues" -------------------------------------------------------------------------------- /.github/workflows/publish_to_pypi.yml: -------------------------------------------------------------------------------- 1 | name: Upload new alvaDescPy version to PyPI 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | permissions: 8 | contents: read 9 | 10 | jobs: 11 | deploy: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Checkout 15 | uses: actions/checkout@v3 16 | - name: Set up Python 3.11 17 | uses: actions/setup-python@v3 18 | with: 19 | python-version: '3.11' 20 | - name: Install dependencies 21 | run: | 22 | python -m pip install --upgrade pip 23 | pip install build 24 | - name: Build package 25 | run: python -m build 26 | - name: Publish package to PyPI 27 | uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 28 | with: 29 | user: __token__ 30 | password: ${{ secrets.PYPI_API_TOKEN }} -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 UMass Lowell Energy and Combustion Research Laboratory 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /alvadescpy/functions.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # alvadescpy/functions.py 5 | # v.0.1.2 6 | # Developed in 2019 by Travis Kessler 7 | # 8 | # contains functions for common alvaDesc tasks 9 | # 10 | 11 | # stdlib. imports 12 | from typing import TypeVar 13 | 14 | # alvadescpy imports 15 | from alvadescpy import alvadesc 16 | 17 | # custom argument and return variables 18 | str_or_list = TypeVar('str_or_list', str, list) 19 | list_or_dict = TypeVar('list_or_dict', dict, list) 20 | 21 | 22 | def smiles_to_descriptors(smiles: str_or_list, 23 | descriptors: str_or_list = 'ALL', 24 | labels: bool = True) -> list_or_dict: 25 | ''' smiles_to_descriptors: returns molecular descriptors for a given 26 | molecule (represented by its SMILES string) 27 | 28 | Args: 29 | smiles (str, list): SMILES string for a given molecule 30 | descriptors (str, list): `ALL` for all descriptors, or list containing 31 | individual descriptors (str's) 32 | labels (bool): if `True`, labels are included in return value (dict); 33 | if `False`, no labels are included in return value (list) 34 | 35 | Returns: 36 | list, dict: returns a list of descriptor values if `labels` is False, 37 | else a dict 38 | ''' 39 | 40 | if type(smiles) == list: 41 | return [ 42 | alvadesc(ismiles=smi, descriptors=descriptors, labels=labels)[0] 43 | for smi in smiles 44 | ] 45 | return alvadesc(ismiles=smiles, descriptors=descriptors, labels=labels)[0] 46 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![UML Energy & Combustion Research Laboratory](https://sites.uml.edu/hunter-mack/files/2021/11/ECRL_final.png)](http://faculty.uml.edu/Hunter_Mack/) 2 | 3 | # alvaDescPy: A Python wrapper for alvaDesc software 4 | 5 | [![GitHub version](https://badge.fury.io/gh/ecrl%2Falvadescpy.svg)](https://badge.fury.io/gh/ecrl%2Falvadescpy) 6 | [![PyPI version](https://badge.fury.io/py/alvadescpy.svg)](https://badge.fury.io/py/alvadescpy) 7 | [![GitHub license](https://img.shields.io/badge/license-MIT-blue.svg)](https://raw.githubusercontent.com/ecrl/alvadescpy/master/LICENSE.txt) 8 | 9 | alvaDescPy provides a Python wrapper for the [alvaDesc](https://www.alvascience.com/alvadesc/) molecular descriptor calculation software. It was created to allow direct access to the alvaDesc command-line interface via Python. 10 | 11 | Important Notice: 12 | 13 | Please note that alvaDescPy is an independent project and was not developed by Alvascience. You can find the official alvaDesc Python interface on Alvascience website: https://www.alvascience.com/python-alvadesc/. 14 | 15 | ## Installation 16 | 17 | Installation via pip: 18 | 19 | ``` 20 | $ pip install alvadescpy 21 | ``` 22 | 23 | Installation via cloned repository: 24 | 25 | ``` 26 | $ git clone https://github.com/ecrl/alvadescpy 27 | $ cd alvadescpy 28 | $ pip install . 29 | ``` 30 | 31 | There are currently no additional dependencies for alvaDescPy, however it requires a valid, licensed installation of [alvaDesc](https://www.alvascience.com/alvadesc/). 32 | 33 | ## Basic Usage 34 | 35 | alvaDescPy assumes the location of alvaDesc's command-line interface is located at your OS's default location. If alvaDesc is located in a different location, you can change the path: 36 | 37 | ```python 38 | from alvadescpy import CONFIG 39 | 40 | CONFIG['alvadesc_path'] = '\\path\\to\\alvaDescCLI' 41 | ``` 42 | 43 | alvaDescPy provides direct access to all alvaDesc command line arguments via the "alvadesc" function: 44 | 45 | ```python 46 | from alvadescpy import alvadesc 47 | 48 | # providing an XML script file 49 | alvadesc(script='my_script.xml') 50 | 51 | # supplying a SMILES string returns a list of descriptors 52 | descriptors = alvadesc(ismiles='CCC', descriptors='ALL') 53 | 54 | # a Python dictionary is returned if labels are desired 55 | descriptors = alvadesc(ismiles='CCC', descriptors='ALL', labels=True) 56 | 57 | # specific descriptors can be calculated 58 | descriptors = alvadesc(ismiles='CCC', descriptors=['MW', 'AMW'], labels=True) 59 | 60 | # input/output files (and input type) can be specified 61 | alvadesc( 62 | input_file='mols.mdl', 63 | inputtype='MDL', 64 | descriptors='ALL', 65 | output='descriptors.txt' 66 | ) 67 | 68 | # various fingerprints can be calculated 69 | ecfp = alvadesc(ismiles='CCC', ecfp=True) 70 | pfp = alvadesc(ismiles='CCC', pfp=True) 71 | maccsfp = alvadesc(ismiles='CCC', pfp=True) 72 | 73 | # fingerprint hash size, min/max fragment length, bits/pattern and other 74 | # options can be specified 75 | ecfp = alvadesc( 76 | ismiles='CCC', 77 | ecfp=True, 78 | fpsize=2048, 79 | fpmin=1, 80 | fpmax=4, 81 | bits=4, 82 | fpoptions='- Additional Options -' 83 | ) 84 | 85 | # alvaDesc uses a number of threads equal to the maximum number of CPUs, but 86 | # can be changed 87 | descriptors=alvadesc(ismiles='CCC', descriptors='ALL', threads=4) 88 | ``` 89 | 90 | alvaDescPy also provides the "smiles_to_descriptors" function: 91 | 92 | ```python 93 | from alvadescpy import smiles_to_descriptors 94 | 95 | # returns a list of descriptor values 96 | descriptors = smiles_to_descriptors('CCC', descriptors='ALL') 97 | 98 | # returns a dictionary of descriptor labels, values 99 | descriptors = smiles_to_descriptors('CCC', descriptors='ALL', labels=True) 100 | 101 | # returns a dictionary containing MW, AMW labels, values 102 | descriptors = smiles_to_descriptors( 103 | 'CCC', 104 | descriptors=['MW', 'AMW'], 105 | labels=True 106 | ) 107 | ``` 108 | 109 | ## Contributing, Reporting Issues and Other Support 110 | 111 | To contribute to alvaDescPy, make a pull request. Contributions should include tests for new features added, as well as extensive documentation. 112 | 113 | To report problems with the software or feature requests, file an issue. When reporting problems, include information such as error messages, your OS/environment and Python version. 114 | 115 | For additional support/questions, contact Travis Kessler (Travis_Kessler@student.uml.edu). 116 | -------------------------------------------------------------------------------- /alvadescpy/wrapper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # alvadescpy/wrapper.py 5 | # v.0.1.2 6 | # Developed in 2019 by Travis Kessler 7 | # 8 | # contains `alvadesc` function, a wrapper for alvaDesc software 9 | # 10 | 11 | # stdlib. imports 12 | from subprocess import PIPE, Popen 13 | from typing import TypeVar 14 | import platform 15 | 16 | # path to alvaDesc command line interface executable 17 | CONFIG = { 18 | 'alvadesc_path': None 19 | } 20 | plt = platform.system() 21 | if plt == 'Windows': 22 | CONFIG['alvadesc_path'] = 'C:\\Program Files\\Alvascience\\alvaDesc\\\ 23 | alvaDescCLI.exe' 24 | elif plt == 'Darwin': 25 | CONFIG['alvadesc_path'] = '/Applications/alvaDesc.app/Contents/MacOS/\ 26 | alvaDescCLI' 27 | elif plt == 'Linux': 28 | CONFIG['alvadesc_path'] = '/usr/bin/alvaDescCLI' 29 | else: 30 | raise RuntimeError('Unknown/unsupported operating system: {}'.format(plt)) 31 | 32 | # custom argument variable (either str or list) 33 | str_or_list = TypeVar('str_or_list', str, list) 34 | 35 | 36 | def _sub_call(command: str) -> list: 37 | ''' _sub_call: calls alvaDesc via subprocess.Popen 38 | 39 | Args: 40 | command (str): command to execute 41 | 42 | Returns: 43 | list: list of lists, where each sublist is a molecule's descriptors 44 | ''' 45 | 46 | try: 47 | p = Popen(command, stdout=PIPE, stderr=PIPE) 48 | except FileNotFoundError as exception: 49 | raise FileNotFoundError('{}\n alvaDescCLI not found at {}'.format( 50 | exception, CONFIG['alvadesc_path'] 51 | )) 52 | except Exception as exception: 53 | raise Exception('{}'.format(exception)) 54 | return p.communicate()[0].decode('utf-8') 55 | 56 | 57 | def alvadesc(script: str = None, ismiles: str = None, input_file: str = None, 58 | inputtype: str = None, descriptors: str_or_list = None, 59 | labels: bool = False, ecfp: bool = False, pfp: bool = False, 60 | fpsize: int = 1024, fpmin: int = 0, fpmax: int = 2, 61 | count: bool = True, bits: int = 2, fpoptions: str = None, 62 | maccsfp: bool = False, output: str = None, 63 | threads: int = None) -> list: 64 | ''' alvadesc: calls alvaDesc's command line interface; supports all 65 | arguments 66 | 67 | Args: 68 | script (str): path to script file containing all available options; if 69 | supplied, nothing else should be supplied 70 | ismiles (str): use a single SMILES string as input 71 | input_file (str): uses a set of molecules in this file as inputs 72 | inputtype (str): if `input_file` is supplied, this is mandatory (e.g. 73 | `SMILES`, `MDL`, `SYBYL`, `HYPERCHEM`) 74 | descriptors (str, list): `ALL` for all descriptors, or a list for 75 | specific descriptors 76 | labels (bool): if `True`, adds descriptor and molecule labels 77 | ecfp (bool): if `True`, calculates extended connectivity fingerprint 78 | pfp (bool): if `True`, calculates path fingerprint 79 | fpsize (int): size of hashed fingerprint (default 1024) 80 | fpmin (int): minimum fragment length for hashed fingerprint (default 0) 81 | fpmax (int): maximum fragments for hashed fingerprint (default 2) 82 | count (bool): if `True`, counts fragments for hashed fingerprint 83 | (default True) 84 | bits (int): bits per pattern for hashed fingerprint (default 2) 85 | fpoptions (str): atom types for hashed fingerprint (default Atom type, 86 | Aromaticity, Charge, Connectivity (total), Bond order) 87 | maccsfp (bool): if `True`, calculates MACCS116 fingerprint 88 | output (str): if not `None`, saves descriptors to this file 89 | threads (int): number of threads used in the calculation (default: 90 | equal to the maximum number of CPUs) 91 | 92 | Returns: 93 | list: if `labels` is True, returns a list of dicts, where each dict 94 | corresponds to a single molecule; if `labels` is False, returns a 95 | list of lists, where each sublist contains a molecule's descriptor 96 | values; if any fingerprint is calculated, no labels are included - 97 | returns a list of lists 98 | ''' 99 | 100 | if script is not None: 101 | _ = _sub_call(' --script={}'.format(script)) 102 | return 103 | 104 | if ismiles is not None and input_file is not None: 105 | raise ValueError('`ismiles` and `input_file` cannot both be supplied') 106 | 107 | if input_file is not None and inputtype is None: 108 | raise ValueError('Must supply `inputtype` if supplying `input_file`') 109 | 110 | command = [CONFIG['alvadesc_path']] 111 | 112 | if ismiles is not None: 113 | command.append('--iSMILES={}'.format(ismiles)) 114 | 115 | if input_file is not None: 116 | command.append('--input={}'.format(input_file)) 117 | command.append('--inputtype={}'.format(inputtype)) 118 | 119 | if output is not None: 120 | command.append('--output={}'.format(output)) 121 | 122 | if threads is not None: 123 | command.append('--threads={}'.format(threads)) 124 | 125 | if ecfp is True or pfp is True or maccsfp is True: 126 | 127 | if sum([ecfp, pfp, maccsfp]) > 1: 128 | raise ValueError('Only one type of fingerprint can be calculated') 129 | 130 | if ecfp is True: 131 | command.append('--ecfp') 132 | 133 | if pfp is True: 134 | command.append('--pfp') 135 | 136 | if maccsfp is True: 137 | command.append('--maccsfp') 138 | 139 | command.append('--size={}'.format(fpsize)) 140 | command.append('--min={}'.format(fpmin)) 141 | command.append('--max={}'.format(fpmax)) 142 | command.append('--bits={}'.format(bits)) 143 | if count is not True: 144 | command.append('--count=FALSE') 145 | if fpoptions is not None: 146 | command.append('--fpoptions={}'.format(fpoptions)) 147 | 148 | if labels is True: 149 | command.append('--labels') 150 | 151 | if descriptors is not None: 152 | if descriptors == 'ALL': 153 | command.append('--descriptors=ALL') 154 | elif type(descriptors) is list: 155 | cmd = '--descriptors=' 156 | for idx, desc in enumerate(descriptors): 157 | cmd += '{}'.format(desc) 158 | if idx != len(descriptors) - 1: 159 | cmd += ',' 160 | # cmd += '' 161 | command.append(cmd) 162 | else: 163 | raise ValueError('Unknown `descriptors` argument: {}'.format( 164 | descriptors 165 | )) 166 | 167 | descriptors_raw = _sub_call(command).split('\n')[:-1] 168 | val_start_idx = 0 169 | if labels is True: 170 | desc_names = descriptors_raw[0].split('\t') 171 | val_start_idx = 1 172 | desc_vals = [] 173 | for d in descriptors_raw[val_start_idx:]: 174 | _vals = d.split('\t') 175 | for vidx, v in enumerate(_vals): 176 | try: 177 | _vals[vidx] = float(v) 178 | except ValueError: 179 | continue 180 | desc_vals.append(_vals) 181 | if labels is False: 182 | return desc_vals 183 | desc_dicts = [] 184 | for mol in desc_vals: 185 | moldict = {} 186 | for nidx, name in enumerate(desc_names): 187 | moldict[name] = mol[nidx] 188 | desc_dicts.append(moldict) 189 | return desc_dicts 190 | --------------------------------------------------------------------------------