├── .gitignore ├── .travis.yml ├── MANIFEST.in ├── PyCFMID ├── PyCFMID.py ├── Windows │ ├── ISOTOPE.DAT │ ├── README.txt │ ├── cfm-annotate.exe │ ├── cfm-id-precomputed.exe │ ├── cfm-id.exe │ ├── cfm-predict.exe │ ├── cfm-train.exe │ ├── compute-stats.exe │ ├── fraggraph-gen.exe │ ├── license.txt │ └── lpsolve55.dll ├── __init__.py ├── ei_config.log ├── ei_config.txt ├── esi_config.log ├── esi_config.txt ├── esi_config_neg.log └── esi_config_neg.txt ├── ReadMe.md ├── Test └── test.py ├── requirements.txt └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | Input/ 2 | Output/ 3 | Candidate/ 4 | 5 | # PyCharm 6 | .idea/ 7 | 8 | # Byte-compiled / optimized / DLL files 9 | __pycache__/ 10 | *.py[cod] 11 | *$py.class 12 | 13 | # C extensions 14 | *.so 15 | 16 | # Distribution / packaging 17 | .Python 18 | build/ 19 | develop-eggs/ 20 | dist/ 21 | downloads/ 22 | eggs/ 23 | .eggs/ 24 | lib/ 25 | lib64/ 26 | parts/ 27 | sdist/ 28 | var/ 29 | wheels/ 30 | *.egg-info/ 31 | .installed.cfg 32 | *.egg 33 | MANIFEST 34 | 35 | # PyInstaller 36 | # Usually these files are written by a python script from a template 37 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 38 | *.manifest 39 | *.spec 40 | 41 | # Installer logs 42 | pip-log.txt 43 | pip-delete-this-directory.txt 44 | 45 | # Unit test / coverage reports 46 | htmlcov/ 47 | .tox/ 48 | .coverage 49 | .coverage.* 50 | .cache 51 | nosetests.xml 52 | coverage.xml 53 | *.cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | local_settings.py 63 | db.sqlite3 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | .python-version 87 | 88 | # celery beat schedule file 89 | celerybeat-schedule 90 | 91 | # SageMath parsed files 92 | *.sage.py 93 | 94 | # Environments 95 | .env 96 | .venv 97 | env/ 98 | venv/ 99 | ENV/ 100 | env.bak/ 101 | venv.bak/ 102 | 103 | # Spyder project settings 104 | .spyderproject 105 | .spyproject 106 | 107 | # Rope project settings 108 | .ropeproject 109 | 110 | # mkdocs documentation 111 | /site 112 | 113 | # mypy 114 | .mypy_cache/ 115 | .dmypy.json 116 | dmypy.json 117 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.6" 4 | - "3.7" 5 | 6 | install: 7 | - pip install -r requirements.txt 8 | - pip install git+git://github.com/hcji/PyCFMID@master 9 | 10 | script: nosetests --with-coverage -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include PyCFMID/ei_config.log 2 | include PyCFMID/ei_config.txt 3 | include PyCFMID/esi_config.log 4 | include PyCFMID/esi_config.txt 5 | include PyCFMID/esi_config_neg.log 6 | include PyCFMID/esi_config_neg.txt 7 | include PyCFMID/Windows/* -------------------------------------------------------------------------------- /PyCFMID/PyCFMID.py: -------------------------------------------------------------------------------- 1 | import os 2 | import platform 3 | import json 4 | import requests 5 | import pubchempy as pc 6 | from bs4 import BeautifulSoup 7 | import subprocess 8 | import pandas as pd 9 | import numpy as np 10 | import ssl 11 | import PyCFMID 12 | ssl._create_default_https_context = ssl._create_unverified_context 13 | 14 | package_path = PyCFMID.__path__[0] 15 | cwd = os.path.join(package_path, 'Windows') 16 | 17 | def check_output_file(output_file=None): 18 | if output_file is None: 19 | try: 20 | os.mkdir('Output') 21 | except: 22 | pass 23 | output_file = os.path.join(os.getcwd(), 'Output', 'output.txt') 24 | return output_file 25 | 26 | 27 | def check_input_file(input_dir=None): 28 | if input_dir is None: 29 | try: 30 | os.mkdir('Input') 31 | except: 32 | pass 33 | input_dir = os.path.join(os.getcwd(), 'Input') 34 | return input_dir 35 | 36 | 37 | def fraggraph_gen(smiles, max_depth=2, ionization_mode='+', fullgraph=True, output_file=None): 38 | output_file = check_output_file(output_file) 39 | program = os.path.join(package_path, platform.platform().split('-')[0], 'fraggraph-gen.exe') 40 | cmd = os.path.join(os.getcwd(), program) 41 | cmd += ' ' + str(smiles) 42 | cmd += ' ' + str(max_depth) 43 | cmd += ' ' + str(ionization_mode) 44 | if fullgraph: 45 | cmd += ' fullgraph' 46 | else: 47 | cmd += ' fragonly' 48 | cmd += ' ' + str(output_file) 49 | subprocess.call(cmd, cwd =cwd) 50 | return parser_fraggraph_gen(output_file) 51 | 52 | 53 | def parser_fraggraph_gen(output_file): 54 | with open(output_file) as t: 55 | output = t.readlines() 56 | output = [s.replace('\n', '') for s in output] 57 | nfrags = int(output[0]) 58 | frag_index = [int(output[i].split(' ')[0]) for i in range(1, nfrags+1)] 59 | frag_mass = [float(output[i].split(' ')[1]) for i in range(1, nfrags+1)] 60 | frag_smiles = [output[i].split(' ')[2] for i in range(1, nfrags+1)] 61 | loss_from = [int(output[i].split(' ')[0]) for i in range(nfrags+2, len(output))] 62 | loss_to = [int(output[i].split(' ')[1]) for i in range(nfrags+2, len(output))] 63 | loss_smiles = [output[i].split(' ')[2] for i in range(nfrags+2, len(output))] 64 | fragments = pd.DataFrame({'index': frag_index, 'mass': frag_mass, 'smiles': frag_smiles}) 65 | losses = pd.DataFrame({'from': loss_from, 'to': loss_to, 'smiles': loss_smiles}) 66 | return {'fragments': fragments, 'losses': losses} 67 | 68 | 69 | def cfm_predict(smiles, prob_thresh=0.001, ion_source='ESI', ionization_mode='+', param_file='', config_file='', annotate_fragments=False, output_file=None, apply_postproc=True, suppress_exceptions=False): 70 | output_file = check_output_file(output_file) 71 | if ion_source == 'ESI': 72 | if ionization_mode == '+': 73 | config = 'esi_config' 74 | else: 75 | config = 'esi_config_neg' 76 | else: 77 | config = 'ei_config' 78 | if param_file == '': 79 | param_file = os.path.join(package_path, config + '.log') 80 | if config_file == '': 81 | config_file = os.path.join(package_path, config + '.txt') 82 | program = os.path.join(package_path, platform.platform().split('-')[0], 'cfm-predict.exe') 83 | cmd = os.path.join(os.getcwd(), program) 84 | cmd += ' ' + smiles 85 | cmd += ' ' + str(prob_thresh) 86 | cmd += ' ' + param_file 87 | cmd += ' ' + config_file 88 | if annotate_fragments: 89 | cmd += ' ' + str(1) 90 | else: 91 | cmd += ' ' + str(0) 92 | cmd += ' ' + output_file 93 | if apply_postproc: 94 | cmd += ' ' + str(1) 95 | else: 96 | cmd += ' ' + str(0) 97 | if suppress_exceptions: 98 | cmd += ' ' + str(1) 99 | else: 100 | cmd += ' ' + str(0) 101 | subprocess.call(cmd, cwd =cwd) 102 | return parser_cfm_predict(output_file) 103 | 104 | 105 | def parser_cfm_predict(output_file): 106 | with open(output_file) as t: 107 | output = t.readlines() 108 | output = [s.replace('\n', '') for s in output] 109 | low_energy = pd.DataFrame(columns=['mz', 'intensity']) 110 | medium_energy = pd.DataFrame(columns=['mz', 'intensity']) 111 | high_energy = pd.DataFrame(columns=['mz', 'intensity']) 112 | energy_level = 0 113 | for i in output: 114 | if 'energy0' == i: 115 | energy_level = 0 116 | elif 'energy1' == i: 117 | energy_level = 1 118 | elif 'energy2' == i: 119 | energy_level = 2 120 | elif '' == i: 121 | continue 122 | else: 123 | i = i.split(' ') 124 | i = [float(j) for j in i] 125 | if energy_level == 0: 126 | low_energy.loc[len(low_energy)] = i 127 | elif energy_level == 1: 128 | medium_energy.loc[len(medium_energy)] = i 129 | else: 130 | high_energy.loc[len(high_energy)] = i 131 | return {'low_energy': low_energy, 'medium_energy': medium_energy, 'high_energy': high_energy} 132 | 133 | 134 | def cfm_id(spectrum_file, candidate_file, num_highest=-1, ppm_mass_tol=10, abs_mass_tol=0.01, prob_thresh=0.001, ion_source='ESI', ionization_mode='+', param_file='', config_file='', score_type='Jaccard', apply_postprocessing=True, output_file=None): 135 | output_file = check_output_file(output_file) 136 | if ion_source == 'ESI': 137 | if ionization_mode == '+': 138 | config = 'esi_config' 139 | else: 140 | config = 'esi_config_neg' 141 | else: 142 | config = 'ei_config' 143 | if param_file == '': 144 | param_file = os.path.join(package_path, config + '.log') 145 | if config_file == '': 146 | config_file = os.path.join(package_path, config + '.txt') 147 | program = os.path.join(package_path, platform.platform().split('-')[0], 'cfm-id.exe') 148 | cmd = os.path.join(os.getcwd(), program) 149 | cmd += ' ' + spectrum_file 150 | cmd += ' ' + 'AN_ID' 151 | cmd += ' ' + candidate_file 152 | cmd += ' ' + str(num_highest) 153 | cmd += ' ' + str(ppm_mass_tol) 154 | cmd += ' ' + str(abs_mass_tol) 155 | cmd += ' ' + str(prob_thresh) 156 | cmd += ' ' + param_file 157 | cmd += ' ' + config_file 158 | cmd += ' ' + score_type 159 | if apply_postprocessing: 160 | cmd += ' ' + str(1) 161 | else: 162 | cmd += ' ' + str(0) 163 | cmd += ' ' + output_file 164 | subprocess.call(cmd, cwd =cwd) 165 | return parser_cfm_id(output_file) 166 | 167 | 168 | def cfm_id_database(spectrum_dataframe, formula, energy_level='high', database='biodb', input_dir=None, num_highest=-1, ppm_mass_tol=10, abs_mass_tol=0.01, prob_thresh=0.001, ion_source='ESI', ionization_mode='+', param_file='', config_file='', score_type='Jaccard', apply_postprocessing=True, output_file=None): 169 | input_dir = check_input_file(input_dir) 170 | output_file = check_output_file(output_file) 171 | if ion_source == 'ESI': 172 | if ionization_mode == '+': 173 | config = 'esi_config' 174 | else: 175 | config = 'esi_config_neg' 176 | else: 177 | config = 'ei_config' 178 | if param_file == '': 179 | param_file = os.path.join(package_path, config + '.log') 180 | if config_file == '': 181 | config_file = os.path.join(package_path, config + '.txt') 182 | spectrum_file = os.path.join(input_dir, 'spectrum.txt') 183 | candidate_file = os.path.join(input_dir, 'candidate.txt') 184 | spectrum_file = write_spectrum(spectrum_dataframe, spectrum_file, energy_level) 185 | if database == 'biodb': 186 | candidates = search_biodatabase(formula, candidate_file) 187 | elif database == 'pubchem': 188 | candidates = search_pubchem(formula, candidate_file) 189 | else: 190 | candidates = pd.read_csv(database, sep=' ', header = None) 191 | candidates.columns = ['ID', 'Smiles'] 192 | candidate_file = database 193 | result = cfm_id(spectrum_file, candidate_file, num_highest, ppm_mass_tol, abs_mass_tol, prob_thresh, ion_source, ionization_mode, param_file, config_file, score_type, apply_postprocessing, output_file) 194 | return {'candidates':candidates, 'result':result} 195 | 196 | 197 | def write_spectrum(spectrum_dataframe, spectrum_file, energy_level='high'): 198 | with open(spectrum_file, 'w+') as t: 199 | t.write('energy0' + '\n') 200 | if energy_level == 'low': 201 | for s in range(len(spectrum_dataframe)): 202 | t.write(str(spectrum_dataframe.iloc[s,0]) + ' ' + str(spectrum_dataframe.iloc[s,1]) + '\n') 203 | t.write('energy1' + '\n') 204 | if energy_level == 'medium': 205 | for s in range(len(spectrum_dataframe)): 206 | t.write(str(spectrum_dataframe.iloc[s,0]) + ' ' + str(spectrum_dataframe.iloc[s,1]) + '\n') 207 | t.write('energy2' + '\n') 208 | if energy_level == 'high': 209 | for s in range(len(spectrum_dataframe)): 210 | t.write(str(spectrum_dataframe.iloc[s,0]) + ' ' + str(spectrum_dataframe.iloc[s,1]) + '\n') 211 | return spectrum_file 212 | 213 | 214 | def parser_cfm_id(output_file): 215 | output = pd.read_table(output_file, delim_whitespace=True, header=None, index_col=0) 216 | output.columns = ['Score', 'ID', 'Smiles'] 217 | return output 218 | 219 | 220 | def search_biodatabase(formula, structureDB, output_file=None): 221 | output_file = check_output_file(output_file) 222 | result = structureDB[structureDB['Formula'] == formula] 223 | output = pd.DataFrame({'ID': result.index, 'Smiles': result['SMILES']}) 224 | output.to_csv(output_file, header=False, index=False, sep=' ') 225 | return result 226 | 227 | 228 | def search_pubchem(formula, output_file=None, timeout=999): 229 | output_file = check_output_file(output_file) 230 | # get pubchem cid based on formula 231 | cids = pc.get_cids(formula, 'formula', list_return='flat') 232 | idstring = '' 233 | smiles = [] 234 | inchikey = [] 235 | all_cids = [] 236 | # search pubchem via formula with pug 237 | for i, cid in enumerate(cids): 238 | idstring += ',' + str(cid) 239 | if ((i%100==99) or (i==len(cids)-1)): 240 | url_i = "http://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/" + idstring[1:(len(idstring))] + "/property/InChIKey,CanonicalSMILES/JSON" 241 | res_i = requests.get(url_i, timeout=timeout) 242 | soup_i = BeautifulSoup(res_i.content, "html.parser") 243 | str_i = str(soup_i) 244 | properties_i = json.loads(str_i)['PropertyTable']['Properties'] 245 | idstring = '' 246 | for properties_ij in properties_i: 247 | smiles_ij = properties_ij['CanonicalSMILES'] 248 | if smiles_ij not in smiles: 249 | smiles.append(smiles_ij) 250 | inchikey.append(properties_ij['InChIKey']) 251 | all_cids.append(str(properties_ij['CID'])) 252 | else: 253 | wh = np.where(np.array(smiles)==smiles_ij)[0][0] 254 | all_cids[wh] = all_cids[wh] + ', ' + str(properties_ij['CID']) 255 | result = pd.DataFrame({'InChIKey': inchikey, 'SMILES': smiles, 'PubChem': all_cids}) 256 | output = pd.DataFrame({'ID': result.index, 'Smiles': result['SMILES']}) 257 | output.to_csv(output_file, header=False, index=False, sep=' ') 258 | return result 259 | 260 | -------------------------------------------------------------------------------- /PyCFMID/Windows/ISOTOPE.DAT: -------------------------------------------------------------------------------- 1 | X 2 2 | 1 0.9 3 | 2 0.1 4 | 5 | H 2 6 | 1.0078246 0.99985 7 | 2.0141021 0.00015 8 | 9 | He 2 10 | 3.01603 0.00000138 11 | 4.00260 0.99999862 12 | 13 | Li 2 14 | 6.015121 0.075 15 | 7.016003 0.925 16 | 17 | Be 1 18 | 9.012182 1.0 19 | 20 | B 2 21 | 10.012937 0.199 22 | 11.009305 0.801 23 | 24 | C 2 25 | 12.0000000 0.988930 26 | 13.0033554 0.011070 27 | 28 | N 2 29 | 14.0030732 0.996337 30 | 15.0001088 0.003663 31 | 32 | O 3 33 | 15.9949141 0.997590 34 | 16.9991322 0.000374 35 | 17.9991616 0.002036 36 | 37 | F 1 38 | 18.9984032 1.0 39 | 40 | Ne 3 41 | 19.992435 0.9048 42 | 20.993843 0.0027 43 | 21.991383 0.0925 44 | 45 | Na 1 46 | 22.989767 1.0 47 | 48 | Mg 3 49 | 23.985042 0.7899 50 | 24.985837 0.1000 51 | 25.982593 0.1101 52 | 53 | Al 1 54 | 26.981539 1.0 55 | 56 | Si 3 57 | 27.976927 0.9223 58 | 28.976495 0.0467 59 | 29.973770 0.0310 60 | 61 | P 1 62 | 30.973762 1.0 63 | 64 | S 4 65 | 31.972070 0.9502 66 | 32.971456 0.0075 67 | 33.967866 0.0421 68 | 35.967080 0.0002 69 | 70 | Cl 2 71 | 34.9688531 0.755290 72 | 36.9659034 0.244710 73 | 74 | Ar 3 75 | 35.967545 0.00337 76 | 37.962732 0.00063 77 | 39.962384 0.99600 78 | 79 | K 3 80 | 38.963707 0.932581 81 | 39.963999 0.000117 82 | 40.961825 0.067302 83 | 84 | Ca 6 85 | 39.962591 0.96941 86 | 41.958618 0.00647 87 | 42.958766 0.00135 88 | 43.955480 0.02086 89 | 45.953689 0.00004 90 | 47.952533 0.00187 91 | 92 | Sc 1 93 | 44.955910 1.0 94 | 95 | Ti 5 96 | 45.952629 0.080 97 | 46.951764 0.073 98 | 47.947947 0.738 99 | 48.947871 0.055 100 | 49.944792 0.054 101 | 102 | V 2 103 | 49.947161 0.00250 104 | 50.943962 0.99750 105 | 106 | Cr 4 107 | 49.946046 0.04345 108 | 51.940509 0.83790 109 | 52.940651 0.09500 110 | 53.938882 0.02365 111 | 112 | Mn 1 113 | 54.938047 1.0 114 | 115 | Fe 4 116 | 53.939612 0.0590 117 | 55.934939 0.9172 118 | 56.935396 0.0210 119 | 57.933277 0.0028 120 | 121 | Co 1 122 | 58.933198 1.0 123 | 124 | Ni 5 125 | 57.935346 0.6827 126 | 59.930788 0.2610 127 | 60.931058 0.0113 128 | 61.928346 0.0359 129 | 63.927968 0.0091 130 | 131 | Cu 2 132 | 62.939598 0.6917 133 | 64.927793 0.3083 134 | 135 | Zn 5 136 | 63.929145 0.486 137 | 65.926034 0.279 138 | 66.927129 0.041 139 | 67.924846 0.188 140 | 69.925325 0.006 141 | 142 | Ga 2 143 | 68.925580 0.60108 144 | 70.924700 0.39892 145 | 146 | Ge 5 147 | 69.924250 0.205 148 | 71.922079 0.274 149 | 72.923463 0.078 150 | 73.921177 0.365 151 | 75.921401 0.078 152 | 153 | As 1 154 | 74.921594 1.0 155 | 156 | Se 6 157 | 73.922475 0.009 158 | 75.919212 0.091 159 | 76.919912 0.076 160 | 77.9190 0.236 161 | 79.916520 0.499 162 | 81.916698 0.089 163 | 164 | Br 2 165 | 78.918336 0.5069 166 | 80.916289 0.4931 167 | 168 | Kr 6 169 | 77.914 0.0035 170 | 79.916380 0.0225 171 | 81.913482 0.116 172 | 82.914135 0.115 173 | 83.911507 0.570 174 | 85.910616 0.173 175 | 176 | Rb 2 177 | 84.911794 0.7217 178 | 86.909187 0.2783 179 | 180 | Sr 4 181 | 83.913430 0.0056 182 | 85.909267 0.0986 183 | 86.908884 0.0700 184 | 87.905619 0.8258 185 | 186 | Y 1 187 | 88.905849 1.0 188 | 189 | Zr 5 190 | 89.904703 0.5145 191 | 90.905644 0.1122 192 | 91.905039 0.1715 193 | 93.906314 0.1738 194 | 95.908275 0.0280 195 | 196 | Nb 1 197 | 92.906377 1.0 198 | 199 | Mo 7 200 | 91.906808 0.1484 201 | 93.905085 0.0925 202 | 94.905840 0.1592 203 | 95.904678 0.1668 204 | 96.906020 0.0955 205 | 97.905406 0.2413 206 | 99.907477 0.0963 207 | 208 | Tc 1 209 | 98.0 1.0 210 | 211 | Ru 7 212 | 95.907599 0.0554 213 | 97.905287 0.0186 214 | 98.905939 0.127 215 | 99.904219 0.126 216 | 100.905582 0.171 217 | 101.904348 0.316 218 | 103.905424 0.186 219 | 220 | Rh 1 221 | 102.905500 1.0 222 | 223 | Pd 6 224 | 101.905634 0.0102 225 | 103.904029 0.1114 226 | 104.905079 0.2233 227 | 105.903478 0.2733 228 | 107.903895 0.2646 229 | 109.905167 0.1172 230 | 231 | Ag 2 232 | 106.905092 0.51839 233 | 108.904757 0.48161 234 | 235 | Cd 8 236 | 105.906461 0.0125 237 | 107.904176 0.0089 238 | 109.903005 0.1249 239 | 110.904182 0.1280 240 | 111.902758 0.2413 241 | 112.904400 0.1222 242 | 113.903357 0.2873 243 | 115.904754 0.0749 244 | 245 | In 2 246 | 112.904061 0.043 247 | 114.903880 0.957 248 | 249 | Sn 10 250 | 111.904826 0.0097 251 | 113.902784 0.0065 252 | 114.903348 0.0036 253 | 115.901747 0.1453 254 | 116.902956 0.0768 255 | 117.901609 0.2422 256 | 118.903310 0.0858 257 | 119.902200 0.3259 258 | 121.903440 0.0463 259 | 123.905274 0.0579 260 | 261 | Sb 2 262 | 120.903821 0.574 263 | 122.904216 0.426 264 | 265 | Te 8 266 | 119.904048 0.00095 267 | 121.903054 0.0259 268 | 122.904271 0.00905 269 | 123.902823 0.0479 270 | 124.904433 0.0712 271 | 125.903314 0.1893 272 | 127.904463 0.3170 273 | 129.906229 0.3387 274 | 275 | I 1 276 | 126.904473 1.0 277 | 278 | Xe 9 279 | 123.905894 0.0010 280 | 125.904281 0.0009 281 | 127.903531 0.0191 282 | 128.904780 0.264 283 | 129.903509 0.041 284 | 130.905072 0.212 285 | 131.904144 0.269 286 | 133.905395 0.104 287 | 135.907214 0.089 288 | 289 | Cs 1 290 | 132.905429 1.0 291 | 292 | Ba 7 293 | 129.906282 0.00106 294 | 131.905042 0.00101 295 | 133.904486 0.0242 296 | 134.905665 0.06593 297 | 135.904553 0.0785 298 | 136.905812 0.1123 299 | 137.905232 0.7170 300 | 301 | La 2 302 | 137.90711 0.00090 303 | 138.906347 0.99910 304 | 305 | Ce 4 306 | 135.907140 0.0019 307 | 137.905985 0.0025 308 | 139.905433 0.8843 309 | 141.909241 0.1113 310 | 311 | Pr 1 312 | 140.907647 1.0 313 | 314 | Nd 7 315 | 141.907719 0.2713 316 | 142.909810 0.1218 317 | 143.910083 0.2380 318 | 144.912570 0.0830 319 | 145.913113 0.1719 320 | 147.916889 0.0576 321 | 149.920887 0.0564 322 | 323 | Pm 1 324 | 145.0 1.0 325 | 326 | Sm 7 327 | 143.911998 0.031 328 | 146.914895 0.150 329 | 147.914820 0.113 330 | 148.917181 0.138 331 | 149.917273 0.074 332 | 151.919729 0.267 333 | 153.922206 0.227 334 | 335 | Eu 2 336 | 150.919847 0.478 337 | 152.921225 0.522 338 | 339 | Gd 7 340 | 151.919786 0.0020 341 | 153.920861 0.0218 342 | 154.922618 0.1480 343 | 155.922118 0.2047 344 | 156.923956 0.1565 345 | 157.924099 0.2484 346 | 159.927049 0.2186 347 | 348 | Tb 1 349 | 158.925342 1.0 350 | 351 | Dy 7 352 | 155.925277 0.0006 353 | 157.924403 0.0010 354 | 159.925193 0.0234 355 | 160.926930 0.189 356 | 161.926795 0.255 357 | 162.928728 0.249 358 | 163.929171 0.282 359 | 360 | Ho 1 361 | 164.930319 1.0 362 | 363 | Er 6 364 | 161.928775 0.0014 365 | 163.929198 0.0161 366 | 165.930290 0.336 367 | 166.932046 0.2295 368 | 167.932368 0.268 369 | 169.935461 0.149 370 | 371 | Tm 1 372 | 168.934212 1.0 373 | 374 | Yb 7 375 | 167.933894 0.0013 376 | 169.934759 0.0305 377 | 170.936323 0.143 378 | 171.936378 0.219 379 | 172.938208 0.1612 380 | 173.938859 0.318 381 | 175.942564 0.127 382 | 383 | Lu 2 384 | 174.940770 0.9741 385 | 175.942679 0.0259 386 | 387 | Hf 6 388 | 173.940044 0.00162 389 | 175.941406 0.05206 390 | 176.943217 0.18606 391 | 177.943696 0.27297 392 | 178.945812 0.13629 393 | 179.946545 0.35100 394 | 395 | Ta 2 396 | 179.947462 0.00012 397 | 180.947992 0.99988 398 | 399 | W 5 400 | 179.946701 0.0012 401 | 181.948202 0.263 402 | 182.950220 0.1428 403 | 183.950928 0.307 404 | 185.954357 0.286 405 | 406 | Re 2 407 | 184.952951 0.3740 408 | 186.955744 0.6260 409 | 410 | Os 7 411 | 183.952488 0.0002 412 | 185.953830 0.0158 413 | 186.955741 0.016 414 | 187.955860 0.133 415 | 188.958137 0.161 416 | 189.958436 0.264 417 | 191.961467 0.410 418 | 419 | Ir 2 420 | 190.960584 0.373 421 | 192.962917 0.627 422 | 423 | Pt 6 424 | 189.959917 0.0001 425 | 191.961019 0.0079 426 | 193.962655 0.329 427 | 194.964766 0.338 428 | 195.964926 0.253 429 | 197.967869 0.072 430 | 431 | Au 1 432 | 196.966543 1.0 433 | 434 | Hg 7 435 | 195.965807 0.0015 436 | 197.966743 0.100 437 | 198.968254 0.169 438 | 199.968300 0.231 439 | 200.970277 0.132 440 | 201.970617 0.298 441 | 203.973467 0.0685 442 | 443 | Tl 2 444 | 202.972320 0.29524 445 | 204.974401 0.70476 446 | 447 | Pb 4 448 | 203.973020 0.014 449 | 205.974440 0.241 450 | 206.975872 0.221 451 | 207.976627 0.524 452 | 453 | Bi 1 454 | 208.980374 1.0 455 | 456 | Po 1 457 | 209.0 1.0 458 | 459 | At 1 460 | 210.0 1.0 461 | 462 | Rn 1 463 | 222.0 1.0 464 | 465 | Fr 1 466 | 223.0 1.0 467 | 468 | Ra 1 469 | 226.025 1.0 470 | 471 | Ac 1 472 | 227.028 1.0 473 | 474 | Th 1 475 | 232.038054 1.0 476 | 477 | Pa 1 478 | 231.0359 1.0 479 | 480 | U 3 481 | 234.040946 0.000055 482 | 235.043924 0.00720 483 | 238.050784 0.992745 484 | 485 | Np 1 486 | 237.048 1.0 487 | 488 | Pu 1 489 | 244.0 1.0 490 | 491 | Am 1 492 | 243.0 1.0 493 | 494 | Cm 1 495 | 247.0 1.0 496 | 497 | Bk 1 498 | 247.0 1.0 499 | 500 | Cf 1 501 | 251.0 1.0 502 | 503 | Es 1 504 | 252.0 1.0 505 | 506 | Fm 1 507 | 257.0 1.0 508 | 509 | Md 1 510 | 258.0 1.0 511 | 512 | No 1 513 | 259.0 1.0 514 | 515 | Lr 1 516 | 260.0 1.0 517 | 518 | -------------------------------------------------------------------------------- /PyCFMID/Windows/README.txt: -------------------------------------------------------------------------------- 1 | /*######################################################################### 2 | # CFM-ID Competitive Fragmentation Modeling for Metabolite Identification 3 | # 4 | # Copyright (c) 2013, Felicity Allen 5 | # All rights reserved. 6 | # 7 | # These executables are part of the cfm-id project. 8 | # This project is covered by the terms of the GNU Lesser General Public 9 | # License, which is included in the file license.txt. 10 | # 11 | # These executables link or use source code (emass only) from the following 12 | # components, with their respective license conditions: 13 | # ########################################################################*/ 14 | 15 | *** RDKit **** 16 | 17 | Copyright (c) 2006-2011 18 | Rational Discovery LLC, Greg Landrum, and Julie Penzotti 19 | 20 | All rights reserved. 21 | 22 | Redistribution and use in source and binary forms, with or without 23 | modification, are permitted provided that the following conditions are 24 | met: 25 | 26 | * Redistributions of source code must retain the above copyright 27 | notice, this list of conditions and the following disclaimer. 28 | * Redistributions in binary form must reproduce the above 29 | copyright notice, this list of conditions and the following 30 | disclaimer in the documentation and/or other materials provided 31 | with the distribution. 32 | * Neither the name of Rational Discovery nor the names of its 33 | contributors may be used to endorse or promote products derived 34 | from this software without specific prior written permission. 35 | 36 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 37 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 38 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 39 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 40 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 41 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 42 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 43 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 44 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 45 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 46 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 47 | 48 | *** LPSolve *** 49 | 50 | https://sourceforge.net/projects/lpsolve 51 | 52 | LPSolve is covered by the terms of the GNU Lesser General Public 53 | License v2.1, which is included in the file license.txt. 54 | 55 | *** Boost *** 56 | 57 | Boost Software License - Version 1.0 - August 17th, 2003 58 | 59 | Permission is hereby granted, free of charge, to any person or organization 60 | obtaining a copy of the software and accompanying documentation covered by 61 | this license (the "Software") to use, reproduce, display, distribute, 62 | execute, and transmit the Software, and to prepare derivative works of the 63 | Software, and to permit third-parties to whom the Software is furnished to 64 | do so, all subject to the following: 65 | 66 | The copyright notices in the Software and this entire statement, including 67 | the above license grant, this restriction and the following disclaimer, 68 | must be included in all copies of the Software, in whole or in part, and 69 | all derivative works of the Software, unless such copies or derivative 70 | works are solely in the form of machine-executable object code generated by 71 | a source language processor. 72 | 73 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 74 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 75 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 76 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 77 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 78 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 79 | DEALINGS IN THE SOFTWARE. 80 | 81 | *** emass *** 82 | 83 | This collective work is Copyright (C)2005 by Perttu Haimi 84 | Individual portions may be copyright by individual 85 | contributors, and are included in this collective work with 86 | permission of the copyright owners. 87 | 88 | All rights reserved. 89 | 90 | Redistribution and use in source and binary forms, 91 | with or without modification, are permitted provided 92 | that the following conditions are met: 93 | 94 | * Redistributions of source code must retain the 95 | above copyright notice, this list of conditions 96 | and the following disclaimer. 97 | * Redistributions in binary form must reproduce 98 | the above copyright notice, this list of conditions 99 | and the following disclaimer in the documentation 100 | and/or other materials provided with the distribution. 101 | * Neither the author nor the names of any contributors 102 | may be used to endorse or promote products derived 103 | from this software without specific prior written 104 | permission. 105 | 106 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 107 | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 108 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 109 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 110 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 111 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 112 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 113 | NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 114 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 115 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 116 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 117 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 118 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 119 | 120 | *** libLBFGS *** 121 | 122 | The MIT License 123 | 124 | Copyright (c) 1990 Jorge Nocedal 125 | Copyright (c) 2007-2010 Naoaki Okazaki 126 | 127 | Permission is hereby granted, free of charge, to any person obtaining a 128 | copy of this software and associated documentation files (the "Software"), 129 | to deal in the Software without restriction, including without limitation 130 | the rights to use, copy, modify, merge, publish, distribute, sublicense, 131 | and/or sell copies of the Software, and to permit persons to whom the 132 | Software is furnished to do so, subject to the following conditions: 133 | 134 | The above copyright notice and this permission notice shall be included in 135 | all copies or substantial portions of the Software. 136 | 137 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 138 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 139 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 140 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 141 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 142 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 143 | THE SOFTWARE. 144 | 145 | 146 | -------------------------------------------------------------------------------- /PyCFMID/Windows/cfm-annotate.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hcji/PyCFMID/b8e450aec108f9a60635502333ab8ebad173ca85/PyCFMID/Windows/cfm-annotate.exe -------------------------------------------------------------------------------- /PyCFMID/Windows/cfm-id-precomputed.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hcji/PyCFMID/b8e450aec108f9a60635502333ab8ebad173ca85/PyCFMID/Windows/cfm-id-precomputed.exe -------------------------------------------------------------------------------- /PyCFMID/Windows/cfm-id.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hcji/PyCFMID/b8e450aec108f9a60635502333ab8ebad173ca85/PyCFMID/Windows/cfm-id.exe -------------------------------------------------------------------------------- /PyCFMID/Windows/cfm-predict.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hcji/PyCFMID/b8e450aec108f9a60635502333ab8ebad173ca85/PyCFMID/Windows/cfm-predict.exe -------------------------------------------------------------------------------- /PyCFMID/Windows/cfm-train.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hcji/PyCFMID/b8e450aec108f9a60635502333ab8ebad173ca85/PyCFMID/Windows/cfm-train.exe -------------------------------------------------------------------------------- /PyCFMID/Windows/compute-stats.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hcji/PyCFMID/b8e450aec108f9a60635502333ab8ebad173ca85/PyCFMID/Windows/compute-stats.exe -------------------------------------------------------------------------------- /PyCFMID/Windows/fraggraph-gen.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hcji/PyCFMID/b8e450aec108f9a60635502333ab8ebad173ca85/PyCFMID/Windows/fraggraph-gen.exe -------------------------------------------------------------------------------- /PyCFMID/Windows/license.txt: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 2.1, February 1999 3 | 4 | Copyright (C) 1991, 1999 Free Software Foundation, Inc. 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | [This is the first released version of the Lesser GPL. It also counts 10 | as the successor of the GNU Library Public License, version 2, hence 11 | the version number 2.1.] 12 | 13 | Preamble 14 | 15 | The licenses for most software are designed to take away your 16 | freedom to share and change it. By contrast, the GNU General Public 17 | Licenses are intended to guarantee your freedom to share and change 18 | free software--to make sure the software is free for all its users. 19 | 20 | This license, the Lesser General Public License, applies to some 21 | specially designated software packages--typically libraries--of the 22 | Free Software Foundation and other authors who decide to use it. You 23 | can use it too, but we suggest you first think carefully about whether 24 | this license or the ordinary General Public License is the better 25 | strategy to use in any particular case, based on the explanations below. 26 | 27 | When we speak of free software, we are referring to freedom of use, 28 | not price. Our General Public Licenses are designed to make sure that 29 | you have the freedom to distribute copies of free software (and charge 30 | for this service if you wish); that you receive source code or can get 31 | it if you want it; that you can change the software and use pieces of 32 | it in new free programs; and that you are informed that you can do 33 | these things. 34 | 35 | To protect your rights, we need to make restrictions that forbid 36 | distributors to deny you these rights or to ask you to surrender these 37 | rights. These restrictions translate to certain responsibilities for 38 | you if you distribute copies of the library or if you modify it. 39 | 40 | For example, if you distribute copies of the library, whether gratis 41 | or for a fee, you must give the recipients all the rights that we gave 42 | you. You must make sure that they, too, receive or can get the source 43 | code. If you link other code with the library, you must provide 44 | complete object files to the recipients, so that they can relink them 45 | with the library after making changes to the library and recompiling 46 | it. And you must show them these terms so they know their rights. 47 | 48 | We protect your rights with a two-step method: (1) we copyright the 49 | library, and (2) we offer you this license, which gives you legal 50 | permission to copy, distribute and/or modify the library. 51 | 52 | To protect each distributor, we want to make it very clear that 53 | there is no warranty for the free library. Also, if the library is 54 | modified by someone else and passed on, the recipients should know 55 | that what they have is not the original version, so that the original 56 | author's reputation will not be affected by problems that might be 57 | introduced by others. 58 | 59 | Finally, software patents pose a constant threat to the existence of 60 | any free program. We wish to make sure that a company cannot 61 | effectively restrict the users of a free program by obtaining a 62 | restrictive license from a patent holder. Therefore, we insist that 63 | any patent license obtained for a version of the library must be 64 | consistent with the full freedom of use specified in this license. 65 | 66 | Most GNU software, including some libraries, is covered by the 67 | ordinary GNU General Public License. This license, the GNU Lesser 68 | General Public License, applies to certain designated libraries, and 69 | is quite different from the ordinary General Public License. We use 70 | this license for certain libraries in order to permit linking those 71 | libraries into non-free programs. 72 | 73 | When a program is linked with a library, whether statically or using 74 | a shared library, the combination of the two is legally speaking a 75 | combined work, a derivative of the original library. The ordinary 76 | General Public License therefore permits such linking only if the 77 | entire combination fits its criteria of freedom. The Lesser General 78 | Public License permits more lax criteria for linking other code with 79 | the library. 80 | 81 | We call this license the "Lesser" General Public License because it 82 | does Less to protect the user's freedom than the ordinary General 83 | Public License. It also provides other free software developers Less 84 | of an advantage over competing non-free programs. These disadvantages 85 | are the reason we use the ordinary General Public License for many 86 | libraries. However, the Lesser license provides advantages in certain 87 | special circumstances. 88 | 89 | For example, on rare occasions, there may be a special need to 90 | encourage the widest possible use of a certain library, so that it becomes 91 | a de-facto standard. To achieve this, non-free programs must be 92 | allowed to use the library. A more frequent case is that a free 93 | library does the same job as widely used non-free libraries. In this 94 | case, there is little to gain by limiting the free library to free 95 | software only, so we use the Lesser General Public License. 96 | 97 | In other cases, permission to use a particular library in non-free 98 | programs enables a greater number of people to use a large body of 99 | free software. For example, permission to use the GNU C Library in 100 | non-free programs enables many more people to use the whole GNU 101 | operating system, as well as its variant, the GNU/Linux operating 102 | system. 103 | 104 | Although the Lesser General Public License is Less protective of the 105 | users' freedom, it does ensure that the user of a program that is 106 | linked with the Library has the freedom and the wherewithal to run 107 | that program using a modified version of the Library. 108 | 109 | The precise terms and conditions for copying, distribution and 110 | modification follow. Pay close attention to the difference between a 111 | "work based on the library" and a "work that uses the library". The 112 | former contains code derived from the library, whereas the latter must 113 | be combined with the library in order to run. 114 | 115 | GNU LESSER GENERAL PUBLIC LICENSE 116 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 117 | 118 | 0. This License Agreement applies to any software library or other 119 | program which contains a notice placed by the copyright holder or 120 | other authorized party saying it may be distributed under the terms of 121 | this Lesser General Public License (also called "this License"). 122 | Each licensee is addressed as "you". 123 | 124 | A "library" means a collection of software functions and/or data 125 | prepared so as to be conveniently linked with application programs 126 | (which use some of those functions and data) to form executables. 127 | 128 | The "Library", below, refers to any such software library or work 129 | which has been distributed under these terms. A "work based on the 130 | Library" means either the Library or any derivative work under 131 | copyright law: that is to say, a work containing the Library or a 132 | portion of it, either verbatim or with modifications and/or translated 133 | straightforwardly into another language. (Hereinafter, translation is 134 | included without limitation in the term "modification".) 135 | 136 | "Source code" for a work means the preferred form of the work for 137 | making modifications to it. For a library, complete source code means 138 | all the source code for all modules it contains, plus any associated 139 | interface definition files, plus the scripts used to control compilation 140 | and installation of the library. 141 | 142 | Activities other than copying, distribution and modification are not 143 | covered by this License; they are outside its scope. The act of 144 | running a program using the Library is not restricted, and output from 145 | such a program is covered only if its contents constitute a work based 146 | on the Library (independent of the use of the Library in a tool for 147 | writing it). Whether that is true depends on what the Library does 148 | and what the program that uses the Library does. 149 | 150 | 1. You may copy and distribute verbatim copies of the Library's 151 | complete source code as you receive it, in any medium, provided that 152 | you conspicuously and appropriately publish on each copy an 153 | appropriate copyright notice and disclaimer of warranty; keep intact 154 | all the notices that refer to this License and to the absence of any 155 | warranty; and distribute a copy of this License along with the 156 | Library. 157 | 158 | You may charge a fee for the physical act of transferring a copy, 159 | and you may at your option offer warranty protection in exchange for a 160 | fee. 161 | 162 | 2. You may modify your copy or copies of the Library or any portion 163 | of it, thus forming a work based on the Library, and copy and 164 | distribute such modifications or work under the terms of Section 1 165 | above, provided that you also meet all of these conditions: 166 | 167 | a) The modified work must itself be a software library. 168 | 169 | b) You must cause the files modified to carry prominent notices 170 | stating that you changed the files and the date of any change. 171 | 172 | c) You must cause the whole of the work to be licensed at no 173 | charge to all third parties under the terms of this License. 174 | 175 | d) If a facility in the modified Library refers to a function or a 176 | table of data to be supplied by an application program that uses 177 | the facility, other than as an argument passed when the facility 178 | is invoked, then you must make a good faith effort to ensure that, 179 | in the event an application does not supply such function or 180 | table, the facility still operates, and performs whatever part of 181 | its purpose remains meaningful. 182 | 183 | (For example, a function in a library to compute square roots has 184 | a purpose that is entirely well-defined independent of the 185 | application. Therefore, Subsection 2d requires that any 186 | application-supplied function or table used by this function must 187 | be optional: if the application does not supply it, the square 188 | root function must still compute square roots.) 189 | 190 | These requirements apply to the modified work as a whole. If 191 | identifiable sections of that work are not derived from the Library, 192 | and can be reasonably considered independent and separate works in 193 | themselves, then this License, and its terms, do not apply to those 194 | sections when you distribute them as separate works. But when you 195 | distribute the same sections as part of a whole which is a work based 196 | on the Library, the distribution of the whole must be on the terms of 197 | this License, whose permissions for other licensees extend to the 198 | entire whole, and thus to each and every part regardless of who wrote 199 | it. 200 | 201 | Thus, it is not the intent of this section to claim rights or contest 202 | your rights to work written entirely by you; rather, the intent is to 203 | exercise the right to control the distribution of derivative or 204 | collective works based on the Library. 205 | 206 | In addition, mere aggregation of another work not based on the Library 207 | with the Library (or with a work based on the Library) on a volume of 208 | a storage or distribution medium does not bring the other work under 209 | the scope of this License. 210 | 211 | 3. You may opt to apply the terms of the ordinary GNU General Public 212 | License instead of this License to a given copy of the Library. To do 213 | this, you must alter all the notices that refer to this License, so 214 | that they refer to the ordinary GNU General Public License, version 2, 215 | instead of to this License. (If a newer version than version 2 of the 216 | ordinary GNU General Public License has appeared, then you can specify 217 | that version instead if you wish.) Do not make any other change in 218 | these notices. 219 | 220 | Once this change is made in a given copy, it is irreversible for 221 | that copy, so the ordinary GNU General Public License applies to all 222 | subsequent copies and derivative works made from that copy. 223 | 224 | This option is useful when you wish to copy part of the code of 225 | the Library into a program that is not a library. 226 | 227 | 4. You may copy and distribute the Library (or a portion or 228 | derivative of it, under Section 2) in object code or executable form 229 | under the terms of Sections 1 and 2 above provided that you accompany 230 | it with the complete corresponding machine-readable source code, which 231 | must be distributed under the terms of Sections 1 and 2 above on a 232 | medium customarily used for software interchange. 233 | 234 | If distribution of object code is made by offering access to copy 235 | from a designated place, then offering equivalent access to copy the 236 | source code from the same place satisfies the requirement to 237 | distribute the source code, even though third parties are not 238 | compelled to copy the source along with the object code. 239 | 240 | 5. A program that contains no derivative of any portion of the 241 | Library, but is designed to work with the Library by being compiled or 242 | linked with it, is called a "work that uses the Library". Such a 243 | work, in isolation, is not a derivative work of the Library, and 244 | therefore falls outside the scope of this License. 245 | 246 | However, linking a "work that uses the Library" with the Library 247 | creates an executable that is a derivative of the Library (because it 248 | contains portions of the Library), rather than a "work that uses the 249 | library". The executable is therefore covered by this License. 250 | Section 6 states terms for distribution of such executables. 251 | 252 | When a "work that uses the Library" uses material from a header file 253 | that is part of the Library, the object code for the work may be a 254 | derivative work of the Library even though the source code is not. 255 | Whether this is true is especially significant if the work can be 256 | linked without the Library, or if the work is itself a library. The 257 | threshold for this to be true is not precisely defined by law. 258 | 259 | If such an object file uses only numerical parameters, data 260 | structure layouts and accessors, and small macros and small inline 261 | functions (ten lines or less in length), then the use of the object 262 | file is unrestricted, regardless of whether it is legally a derivative 263 | work. (Executables containing this object code plus portions of the 264 | Library will still fall under Section 6.) 265 | 266 | Otherwise, if the work is a derivative of the Library, you may 267 | distribute the object code for the work under the terms of Section 6. 268 | Any executables containing that work also fall under Section 6, 269 | whether or not they are linked directly with the Library itself. 270 | 271 | 6. As an exception to the Sections above, you may also combine or 272 | link a "work that uses the Library" with the Library to produce a 273 | work containing portions of the Library, and distribute that work 274 | under terms of your choice, provided that the terms permit 275 | modification of the work for the customer's own use and reverse 276 | engineering for debugging such modifications. 277 | 278 | You must give prominent notice with each copy of the work that the 279 | Library is used in it and that the Library and its use are covered by 280 | this License. You must supply a copy of this License. If the work 281 | during execution displays copyright notices, you must include the 282 | copyright notice for the Library among them, as well as a reference 283 | directing the user to the copy of this License. Also, you must do one 284 | of these things: 285 | 286 | a) Accompany the work with the complete corresponding 287 | machine-readable source code for the Library including whatever 288 | changes were used in the work (which must be distributed under 289 | Sections 1 and 2 above); and, if the work is an executable linked 290 | with the Library, with the complete machine-readable "work that 291 | uses the Library", as object code and/or source code, so that the 292 | user can modify the Library and then relink to produce a modified 293 | executable containing the modified Library. (It is understood 294 | that the user who changes the contents of definitions files in the 295 | Library will not necessarily be able to recompile the application 296 | to use the modified definitions.) 297 | 298 | b) Use a suitable shared library mechanism for linking with the 299 | Library. A suitable mechanism is one that (1) uses at run time a 300 | copy of the library already present on the user's computer system, 301 | rather than copying library functions into the executable, and (2) 302 | will operate properly with a modified version of the library, if 303 | the user installs one, as long as the modified version is 304 | interface-compatible with the version that the work was made with. 305 | 306 | c) Accompany the work with a written offer, valid for at 307 | least three years, to give the same user the materials 308 | specified in Subsection 6a, above, for a charge no more 309 | than the cost of performing this distribution. 310 | 311 | d) If distribution of the work is made by offering access to copy 312 | from a designated place, offer equivalent access to copy the above 313 | specified materials from the same place. 314 | 315 | e) Verify that the user has already received a copy of these 316 | materials or that you have already sent this user a copy. 317 | 318 | For an executable, the required form of the "work that uses the 319 | Library" must include any data and utility programs needed for 320 | reproducing the executable from it. However, as a special exception, 321 | the materials to be distributed need not include anything that is 322 | normally distributed (in either source or binary form) with the major 323 | components (compiler, kernel, and so on) of the operating system on 324 | which the executable runs, unless that component itself accompanies 325 | the executable. 326 | 327 | It may happen that this requirement contradicts the license 328 | restrictions of other proprietary libraries that do not normally 329 | accompany the operating system. Such a contradiction means you cannot 330 | use both them and the Library together in an executable that you 331 | distribute. 332 | 333 | 7. You may place library facilities that are a work based on the 334 | Library side-by-side in a single library together with other library 335 | facilities not covered by this License, and distribute such a combined 336 | library, provided that the separate distribution of the work based on 337 | the Library and of the other library facilities is otherwise 338 | permitted, and provided that you do these two things: 339 | 340 | a) Accompany the combined library with a copy of the same work 341 | based on the Library, uncombined with any other library 342 | facilities. This must be distributed under the terms of the 343 | Sections above. 344 | 345 | b) Give prominent notice with the combined library of the fact 346 | that part of it is a work based on the Library, and explaining 347 | where to find the accompanying uncombined form of the same work. 348 | 349 | 8. You may not copy, modify, sublicense, link with, or distribute 350 | the Library except as expressly provided under this License. Any 351 | attempt otherwise to copy, modify, sublicense, link with, or 352 | distribute the Library is void, and will automatically terminate your 353 | rights under this License. However, parties who have received copies, 354 | or rights, from you under this License will not have their licenses 355 | terminated so long as such parties remain in full compliance. 356 | 357 | 9. You are not required to accept this License, since you have not 358 | signed it. However, nothing else grants you permission to modify or 359 | distribute the Library or its derivative works. These actions are 360 | prohibited by law if you do not accept this License. Therefore, by 361 | modifying or distributing the Library (or any work based on the 362 | Library), you indicate your acceptance of this License to do so, and 363 | all its terms and conditions for copying, distributing or modifying 364 | the Library or works based on it. 365 | 366 | 10. Each time you redistribute the Library (or any work based on the 367 | Library), the recipient automatically receives a license from the 368 | original licensor to copy, distribute, link with or modify the Library 369 | subject to these terms and conditions. You may not impose any further 370 | restrictions on the recipients' exercise of the rights granted herein. 371 | You are not responsible for enforcing compliance by third parties with 372 | this License. 373 | 374 | 11. If, as a consequence of a court judgment or allegation of patent 375 | infringement or for any other reason (not limited to patent issues), 376 | conditions are imposed on you (whether by court order, agreement or 377 | otherwise) that contradict the conditions of this License, they do not 378 | excuse you from the conditions of this License. If you cannot 379 | distribute so as to satisfy simultaneously your obligations under this 380 | License and any other pertinent obligations, then as a consequence you 381 | may not distribute the Library at all. For example, if a patent 382 | license would not permit royalty-free redistribution of the Library by 383 | all those who receive copies directly or indirectly through you, then 384 | the only way you could satisfy both it and this License would be to 385 | refrain entirely from distribution of the Library. 386 | 387 | If any portion of this section is held invalid or unenforceable under any 388 | particular circumstance, the balance of the section is intended to apply, 389 | and the section as a whole is intended to apply in other circumstances. 390 | 391 | It is not the purpose of this section to induce you to infringe any 392 | patents or other property right claims or to contest validity of any 393 | such claims; this section has the sole purpose of protecting the 394 | integrity of the free software distribution system which is 395 | implemented by public license practices. Many people have made 396 | generous contributions to the wide range of software distributed 397 | through that system in reliance on consistent application of that 398 | system; it is up to the author/donor to decide if he or she is willing 399 | to distribute software through any other system and a licensee cannot 400 | impose that choice. 401 | 402 | This section is intended to make thoroughly clear what is believed to 403 | be a consequence of the rest of this License. 404 | 405 | 12. If the distribution and/or use of the Library is restricted in 406 | certain countries either by patents or by copyrighted interfaces, the 407 | original copyright holder who places the Library under this License may add 408 | an explicit geographical distribution limitation excluding those countries, 409 | so that distribution is permitted only in or among countries not thus 410 | excluded. In such case, this License incorporates the limitation as if 411 | written in the body of this License. 412 | 413 | 13. The Free Software Foundation may publish revised and/or new 414 | versions of the Lesser General Public License from time to time. 415 | Such new versions will be similar in spirit to the present version, 416 | but may differ in detail to address new problems or concerns. 417 | 418 | Each version is given a distinguishing version number. If the Library 419 | specifies a version number of this License which applies to it and 420 | "any later version", you have the option of following the terms and 421 | conditions either of that version or of any later version published by 422 | the Free Software Foundation. If the Library does not specify a 423 | license version number, you may choose any version ever published by 424 | the Free Software Foundation. 425 | 426 | 14. If you wish to incorporate parts of the Library into other free 427 | programs whose distribution conditions are incompatible with these, 428 | write to the author to ask for permission. For software which is 429 | copyrighted by the Free Software Foundation, write to the Free 430 | Software Foundation; we sometimes make exceptions for this. Our 431 | decision will be guided by the two goals of preserving the free status 432 | of all derivatives of our free software and of promoting the sharing 433 | and reuse of software generally. 434 | 435 | NO WARRANTY 436 | 437 | 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO 438 | WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. 439 | EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR 440 | OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY 441 | KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE 442 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 443 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE 444 | LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME 445 | THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 446 | 447 | 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN 448 | WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY 449 | AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU 450 | FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR 451 | CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE 452 | LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING 453 | RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A 454 | FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF 455 | SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 456 | DAMAGES. 457 | 458 | END OF TERMS AND CONDITIONS 459 | 460 | How to Apply These Terms to Your New Libraries 461 | 462 | If you develop a new library, and you want it to be of the greatest 463 | possible use to the public, we recommend making it free software that 464 | everyone can redistribute and change. You can do so by permitting 465 | redistribution under these terms (or, alternatively, under the terms of the 466 | ordinary General Public License). 467 | 468 | To apply these terms, attach the following notices to the library. It is 469 | safest to attach them to the start of each source file to most effectively 470 | convey the exclusion of warranty; and each file should have at least the 471 | "copyright" line and a pointer to where the full notice is found. 472 | 473 | 474 | Copyright (C) 475 | 476 | This library is free software; you can redistribute it and/or 477 | modify it under the terms of the GNU Lesser General Public 478 | License as published by the Free Software Foundation; either 479 | version 2.1 of the License, or (at your option) any later version. 480 | 481 | This library is distributed in the hope that it will be useful, 482 | but WITHOUT ANY WARRANTY; without even the implied warranty of 483 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 484 | Lesser General Public License for more details. 485 | 486 | You should have received a copy of the GNU Lesser General Public 487 | License along with this library; if not, write to the Free Software 488 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 489 | 490 | Also add information on how to contact you by electronic and paper mail. 491 | 492 | You should also get your employer (if you work as a programmer) or your 493 | school, if any, to sign a "copyright disclaimer" for the library, if 494 | necessary. Here is a sample; alter the names: 495 | 496 | Yoyodyne, Inc., hereby disclaims all copyright interest in the 497 | library `Frob' (a library for tweaking knobs) written by James Random Hacker. 498 | 499 | , 1 April 1990 500 | Ty Coon, President of Vice 501 | 502 | That's all there is to it! 503 | -------------------------------------------------------------------------------- /PyCFMID/Windows/lpsolve55.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hcji/PyCFMID/b8e450aec108f9a60635502333ab8ebad173ca85/PyCFMID/Windows/lpsolve55.dll -------------------------------------------------------------------------------- /PyCFMID/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hcji/PyCFMID/b8e450aec108f9a60635502333ab8ebad173ca85/PyCFMID/__init__.py -------------------------------------------------------------------------------- /PyCFMID/ei_config.txt: -------------------------------------------------------------------------------- 1 | lambda 1.0 2 | em_converge_thresh 0.001 3 | model_depth 2 4 | use_single_energy_cfm 1 5 | spectrum_depth 2 6 | spectrum_weight 1 7 | num_em_restarts 3 8 | ionization_mode 3 9 | abs_mass_tol 0.5 10 | use_lbfgs_for_ga 1 11 | ga_converge_thresh 0.0001 12 | include_isotopes 1 13 | em_init_type 3 14 | fg_depth 2 15 | allow_frag_detours 0 16 | theta_function 2 17 | theta_nn_hlayer_num_nodes 20 18 | theta_nn_hlayer_num_nodes 4 19 | theta_nn_layer_act_func_ids 1 20 | theta_nn_layer_act_func_ids 1 21 | theta_nn_layer_act_func_ids 0 22 | starting_step_size 0.0001 23 | ga_max_iterations 20 24 | fragraph_compute_timeout_in_secs 600 25 | include_h_losses 1 26 | -------------------------------------------------------------------------------- /PyCFMID/esi_config.txt: -------------------------------------------------------------------------------- 1 | lambda 1.0 2 | em_converge_thresh 10.0 3 | ga_converge_thresh 1.0 4 | model_depth 6 5 | spectrum_depth 2 6 | spectrum_depth 4 7 | spectrum_depth 6 8 | spectrum_weight 1 9 | spectrum_weight 1 10 | spectrum_weight 1 11 | num_em_restarts 3 12 | starting_step_size 0.01 13 | ipfp_converge_thresh 0.005 14 | osc_ipfp_converge_thresh 0.999 15 | 16 | -------------------------------------------------------------------------------- /PyCFMID/esi_config_neg.txt: -------------------------------------------------------------------------------- 1 | lambda 1.0 2 | em_converge_thresh 10.0 3 | ga_converge_thresh 1.0 4 | model_depth 2 5 | use_single_energy_cfm 1 6 | spectrum_depth 2 7 | spectrum_depth 2 8 | spectrum_depth 2 9 | spectrum_weight 1 10 | spectrum_weight 1 11 | spectrum_weight 1 12 | num_em_restarts 1 13 | starting_step_size 0.01 14 | ipfp_converge_thresh 0.005 15 | osc_ipfp_converge_thresh 0.999 16 | ionization_mode 2 -------------------------------------------------------------------------------- /ReadMe.md: -------------------------------------------------------------------------------- 1 | # PyCFMID 2 | *** 3 | 4 | This package is a python warpper for [CFM-ID](https://sourceforge.net/projects/cfm-id/) which provides a method for accurately and efficiently identifying metabolites in spectra generated by electrospray tandem mass spectrometry (ESI-MS/MS). The program 5 | uses Competitive Fragmentation Modeling to produce a probabilistic generative model for the MS/MS fragmentation process and machine 6 | learning techniques to adapt the model parameters from data. 7 | 8 | ## Note 9 | 1. This is not an official wrapper. 10 | 2. Only Windows platform is currently supported because of a compile error in Linux (issue #1). 11 | 3. The CFM-ID version is 2.4, download from https://sourceforge.net/projects/cfm-id/. 12 | 4. CFM-ID 3.x version is only available with webserver http://cfmid3.wishartlab.com. 13 | 5. Official CFM-ID 4.x version is already available with docker at https://hub.docker.com/r/wishartlab/cfmid. 14 | 15 | ### Install 16 | 17 | pip install git+git://github.com/hcji/PyCFMID@master 18 | 19 | ## Usage 20 | 21 | ### fraggraph_gen 22 | This program produces a complete fragmentation graph or list of feasible fragments for an input molecule. It systematically breaks bonds within the molecule and checks for valid resulting fragments as described in section 2.1.1 of the above publication. 23 | 24 | from PyCFMID.PyCFMID import fraggraph_gen 25 | frags = fraggraph_gen(smiles, max_depth=2, ionization_mode='+', fullgraph=True, output_file=None) 26 | 27 | **smiles**: The smiles strings for the input molecule to fragment. 28 | **max depth**: The depth to which the program should recurse when computing the tree. e.g. depth 1 would be just the original molecule and its immediate descendants, depth 2 would allow those descendants to break one more time, etc. 29 | **ionization_mode**: Whether to generate fragments using positive ESI or EI, or negative ESI ionization. + for positive mode ESI [M+H], - for negative mode ESI [M-H], * for positive mode EI [M+]. 30 | **fullgraph**: This specifies the type of output. fullgraph (default) will also return a list of the connections between fragments and their corresponding neutral losses. otherwise, it will return a list of unique feasible fragments with their masses. 31 | **output file**: (optional) The name and path of a file to write the output to. If this argument is not provided, it will make a dir in the working path. 32 | 33 | ### cfm_predict 34 | This program predicts spectra for an input molecule given a pre-trained CFM model. 35 | 36 | from PyCFMID.PyCFMID import cfm_predict 37 | spectra = cfm_predict(smiles, prob_thresh=0.001, param_file='', config_file='', annotate_fragments=False, output_file=None, apply_postproc=True, suppress_exceptions=False) 38 | 39 | **smiles**: The smiles strings for the input molecule to fragment. 40 | **prob_thresh**: The probability below which to prune unlikely fragmentations during fragmentation graph generation (default 0.001). 41 | **ion_source**: The ion source of mass. Usually, 'EI' for GC-MS, and 'ESI' for LC-MS/MS. Will not used if param_file is given. 42 | **param_file**: (optional) The filename where the parameters of a trained cfm model can be found (if not given, assumes param_output.log in the current directory). This file is the output of cfm-train. 43 | **config_file**: (optional) The filename where the configuration parameters of the cfm model can be found (if not given, assumes param_config.txt in the current directory). This needs to match the file passed to cfm-train during training. 44 | **annotate_fragments**:(optional) Whether to include fragment information in the output spectra. 45 | **output file**: (optional) The name and path of a file to write the output to. If this argument is not provided, it will make a dir in the working path. 46 | **apply_postproc**: (optional) Whether or not to post-process predicted spectra to take the top 80% of energy (at least 5 peaks), or the highest 30 peaks (whichever comes first). If turned off, will output a peak for every possible fragment of the input molecule, as long as the prob_thresh argument above is set to 0.0. 47 | **suppress_exceptions**: (optional) Suppress most exceptions so that the program returns normally even when it fails to produce a result. 48 | 49 | ### cfm_id 50 | Given an input spectrum and a list of candidate smiles strings, this program computes the predicted spectrum for each candidate and compares it to the input spectrum. It returns a ranking of the candidates according to how closely they match. The spectrum prediction is done using a pre-trained CFM model. 51 | 52 | from PyCFMID.PyCFMID import cfm_id 53 | result = cfm_id(spectrum_file, candidate_file, num_highest=-1, ppm_mass_tol=10, abs_mass_tol=0.01, prob_thresh=0.001, param_file='', config_file='', score_type='Jaccard', apply_postprocessing=True, output_file=None) 54 | 55 | **spectrum_file**: The filename where the input the spectrum. see [example_spectra.txt](https://sourceforge.net/p/cfm-id/code/HEAD/tree/cfm/example_spec.txt) as an example. 56 | **candidate_file**: The filename where the input list of candidate structures can be found as line separated 'id smiles_or_inchi' pairs. see[example_candidates.txt](https://sourceforge.net/p/cfm-id/code/HEAD/tree/cfm/example_candidates.txt) as an example. 57 | **num_highest** (optional): The number of (ranked) candidates to return or -1 for all (if not given, returns all in ranked order). 58 | **ppm_mass_tol**: (optional) The mass tolerance in ppm to use when matching peaks within the dot product comparison - will use higher resulting tolerance of ppm and abs (if not given defaults to 10 ppm). 59 | **abs_mass_tol**: (optional) The mass tolerance in abs Da to use when matching peaks within the dot product comparison - will use higher resulting tolerance of ppm and abs (if not given defaults to 0.01 Da). 60 | **prob_thresh**: The probability below which to prune unlikely fragmentations during fragmentation graph generation (default 0.001). 61 | **ion_source**: The ion source of mass. Usually, 'EI' for GC-MS, and 'ESI' for LC-MS/MS. Will not used if param_file is given. 62 | **param_file**: (optional) The filename where the parameters of a trained cfm model can be found (if not given, assumes param_output.log in the current directory). This file is the output of cfm-train. 63 | **config_file**: (optional) The filename where the configuration parameters of the cfm model can be found (if not given, assumes param_config.txt in the current directory). This needs to match the file passed to cfm-train during training. 64 | **score_type**: (optional) The type of scoring function to use when comparing spectra. Options: Jaccard (default), DotProduct. 65 | **output file**: (optional) The name and path of a file to write the output to. If this argument is not provided, it will make a dir in the working path. 66 | **apply_postproc**: (optional) Whether or not to post-process predicted spectra to take the top 80% of energy (at least 5 peaks), or the highest 30 peaks (whichever comes first). If turned off, will output a peak for every possible fragment of the input molecule, as long as the prob_thresh argument above is set to 0.0. 67 | 68 | ### cfm_id_database 69 | Given an input spectrum, this program retrieves candidates automatically and computes the predicted spectrum for each candidate and compares it to the input spectrum. It returns a ranking of the candidates according to how closely they match. The spectrum prediction is done using a pre-trained CFM model. 70 | 71 | from PyCFMID.PyCFMID import cfm_id_database 72 | cfm_id_database(spectrum_dataframe, formula, energy_level='high', database='biodb', input_dir=None, num_highest=-1, ppm_mass_tol=10, abs_mass_tol=0.01, prob_thresh=0.001, param_file='', config_file='', score_type='Jaccard', apply_postprocessing=True, output_file=None) 73 | 74 | **spectrum_dataframe**: A two-column dataframe with m/z and intensity of a spectrum. 75 | **formula**: The formula of the candidates. 76 | **energy_level**: The energy_level of the mass spectrometry. Options: high (default), medium, low. 77 | **database**: 'biodb' for biological database, 'pubchem' for PubChem database, or a file path for a custom candidate list. 78 | other parameters are the same as **cfm_id** 79 | -------------------------------------------------------------------------------- /Test/test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun May 19 17:06:01 2019 4 | 5 | @author: hcji 6 | """ 7 | 8 | import pandas as pd 9 | from PyCFMID.PyCFMID import fraggraph_gen, cfm_predict, cfm_id_database 10 | 11 | fragments = fraggraph_gen('CCCCN') 12 | pred_ms = cfm_predict('CCCCN') 13 | 14 | spectra = pd.DataFrame({'mz': [223.106608, 251.101730], 'intensity':[100.000000, 40.722900]}) 15 | cfm_id_database(spectra, formula='C17H22FN3O4S', database='pubchem') -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Installation 2 | bs4 3 | requests 4 | pubchempy 5 | pandas 6 | 7 | # test 8 | coverage -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup(name='PyCFMID', 4 | version='0.0.2', 5 | description="Python interface to CFM-ID", 6 | license='MIT', 7 | author='Hongchao Ji', 8 | author_email='ji.hongchao@foxmail.com', 9 | url='https://github.com/hcji/PyCFMID', 10 | long_description_content_type="text/markdown", 11 | packages=find_packages(), 12 | install_requires=['requests', 'pubchempy', 'bs4'], 13 | include_package_data = True, 14 | classifiers=[ 15 | 'Development Status :: 4 - Beta', 16 | 'Programming Language :: Python :: 3.6', 17 | 'Programming Language :: Python :: 3.7' 18 | ] 19 | ) --------------------------------------------------------------------------------