├── LICENSE.txt ├── README.md ├── pyms ├── AUTHORS ├── Baseline │ ├── TopHat.py │ └── __init__.py ├── COPYING ├── Deconvolution │ ├── BillerBiemann │ │ ├── Function.py │ │ └── __init__.py │ └── __init__.py ├── Display │ ├── Class.py │ ├── Function.py │ └── __init__.py ├── Experiment │ ├── Class.py │ ├── IO.py │ └── __init__.py ├── GCMS │ ├── Class.py │ ├── Function.py │ ├── IO │ │ ├── ANDI │ │ │ ├── Function.py │ │ │ └── __init__.py │ │ ├── JCAMP │ │ │ ├── Function.py │ │ │ └── __init__.py │ │ ├── MZML │ │ │ ├── Function.py │ │ │ └── __init__.py │ │ └── __init__.py │ └── __init__.py ├── Gapfill │ ├── Class.py │ ├── Function.py │ └── __init__.py ├── LICENSE ├── Noise │ ├── Analysis.py │ ├── SavitzkyGolay.py │ ├── Window.py │ └── __init__.py ├── Peak │ ├── Class.py │ ├── Function.py │ ├── IO.py │ ├── List │ │ ├── DPA │ │ │ ├── #Class.py# │ │ │ ├── Class.py │ │ │ ├── Class_old.py │ │ │ ├── Function.py │ │ │ ├── Function_new.py │ │ │ ├── Function_new.py~ │ │ │ ├── Utils.py │ │ │ └── __init__.py │ │ ├── Function.py │ │ ├── Utils.py │ │ └── __init__.py │ └── __init__.py ├── Simulator │ ├── Function.py │ └── __init__.py ├── Utils │ ├── DP.py │ ├── Error.py │ ├── IO.py │ ├── Math.py │ ├── Time.py │ ├── Utils.py │ └── __init__.py └── __init__.py └── setup.py /LICENSE.txt: -------------------------------------------------------------------------------- 1 | ############################################################################# 2 | # # 3 | # PyMS software for processing of metabolomic mass-spectrometry data # 4 | # Copyright (C) 2005-2015 Vladimir Likic # 5 | # # 6 | # This program is free software; you can redistribute it and/or modify # 7 | # it under the terms of the GNU General Public License version 2 as # 8 | # published by the Free Software Foundation. # 9 | # # 10 | # This program is distributed in the hope that it will be useful, # 11 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 12 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 13 | # GNU General Public License for more details. # 14 | # # 15 | # You should have received a copy of the GNU General Public License # 16 | # along with this program; if not, write to the Free Software # 17 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 18 | # # 19 | ############################################################################# 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pyms 2 | Original Publication: 3 | https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-13-115 4 | 5 | PyMS was originally stored on googlecode, and moved to github when google code closed. The same group - Metabolomics Australia at the Bio21 Institute, University of Melbourne, manages the github repo. For any further information on the history of PyMS please email the maintainer: Sean O'Callaghan, spoc@unimelb.edu.au. 6 | 7 | The main PyMS documentation is available here: https://github.com/ma-bio21/pyms-docs 8 | Test code available here: https://github.com/ma-bio21/pyms-test 9 | -------------------------------------------------------------------------------- /pyms/AUTHORS: -------------------------------------------------------------------------------- 1 | Current developers: 2 | Vladimir Likic -- project leader 3 | Sean O'Callaghan -- Jan/10-present 4 | Milica Ng -- Mar/08-present 5 | Luke Hodkinson -- Jan/09-present 6 | 7 | Former project memebers: 8 | Andrew Isaac -- Dec/08-present 9 | Qiao Wang -- Dec/08-Jan/09 10 | Lewis Lee -- Nov/05-May/06 11 | Woon Wai Keen -- Dec/06-Dec/07 12 | Tim Erwin -- Dec/07-Jul/08 13 | 14 | External contributors: 15 | Uwe Schmitt -- Savitzky-Golay noise filter 16 | 17 | -------------------------------------------------------------------------------- /pyms/Baseline/TopHat.py: -------------------------------------------------------------------------------- 1 | """ 2 | Top-hat baseline corrector 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | import copy 26 | import numpy 27 | 28 | from scipy import ndimage 29 | 30 | from pyms.Utils.Error import error 31 | from pyms.GCMS.Function import is_ionchromatogram, ic_window_points 32 | 33 | # default structural element as a fraction of total number of points 34 | _STRUCT_ELM_FRAC = 0.2 35 | 36 | def tophat(ic, struct=None): 37 | 38 | """ 39 | @summary: Top-hat baseline correction on Ion Chromatogram 40 | 41 | @param ic: The input ion chromatogram 42 | @type ic: pyms.GCMS.Class.IonChromatogram 43 | @param struct: Top-hat structural element as time string 44 | @type struct: StringType 45 | 46 | @return: Top-hat corrected ion chromatogram 47 | @rtype: pyms.IO.Class.IonChromatogram 48 | 49 | @author: Woon Wai Keen 50 | @author: Vladimir Likic 51 | """ 52 | 53 | if not is_ionchromatogram(ic): 54 | error("'ic' not an IonChromatogram object") 55 | else: 56 | ia = copy.deepcopy(ic.get_intensity_array()) 57 | 58 | if struct == None: 59 | struct_pts = int(round(ia.size * _STRUCT_ELM_FRAC)) 60 | else: 61 | struct_pts = ic_window_points(ic,struct) 62 | 63 | # print " -> Top-hat: structural element is %d point(s)" % ( struct_pts ) 64 | 65 | str_el = numpy.repeat([1], struct_pts) 66 | ia = ndimage.white_tophat(ia, None, str_el) 67 | 68 | ic_bc = copy.deepcopy(ic) 69 | ic_bc.set_intensity_array(ia) 70 | 71 | return ic_bc 72 | 73 | def tophat_im(im, struct=None): 74 | 75 | """ 76 | @summary: Top-hat baseline correction on Intensity Matrix 77 | 78 | Wraps around the TopHat function above 79 | 80 | @param im: The input Intensity Matrix 81 | @type im: pyms.GCMS.Class.IntenstiyMatrix 82 | @param struct: Top-hat structural element as time string 83 | @type struct: StringType 84 | 85 | @return: Top-hat corrected Intenstity Matrix 86 | @rtype: pyms.IO.Class.IntensityMatrix 87 | 88 | @author: Sean O'Callaghan 89 | """ 90 | 91 | n_scan, n_mz = im.get_size() 92 | 93 | im_smooth = copy.deepcopy(im) 94 | 95 | for ii in range(n_mz): 96 | ic = im_smooth.get_ic_at_index(ii) 97 | ic_smooth = tophat(ic, struct) 98 | im_smooth.set_ic_at_index(ii, ic_smooth) 99 | 100 | return im_smooth 101 | -------------------------------------------------------------------------------- /pyms/Baseline/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions for baseline correction 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | -------------------------------------------------------------------------------- /pyms/COPYING: -------------------------------------------------------------------------------- 1 | Copyright (c) 2005-2011, Vladimir Likic 2 | All rights reserved. 3 | 4 | This copyright notice must be preserved in all derivative works. If in doubt 5 | contact Vladimir Likic 6 | 7 | This code is subject to GNU General Public License version 2. 8 | 9 | Redistribution and use in source and binary forms, with or without modification, 10 | are permitted provided that the following conditions are met: 11 | 12 | * Redistributions of source code must retain the above copyright notice, 13 | this list of conditions and the following disclaimer. 14 | * Redistributions in binary form must reproduce the above copyright notice, 15 | this list of conditions and the following disclaimer in the documentation 16 | and/or other materials provided with the distribution. 17 | * Neither the name of the nor the names of its contributors 18 | may be used to endorse or promote products derived from this software without 19 | specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 22 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 28 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | 32 | -------------------------------------------------------------------------------- /pyms/Deconvolution/BillerBiemann/Function.py: -------------------------------------------------------------------------------- 1 | """ 2 | BillerBiemann deconvolution algorithm 3 | Provides a class to perform Biller and Biemann deconvolution 4 | """ 5 | 6 | ############################################################################# 7 | # # 8 | # PyMS software for processing of metabolomic mass-spectrometry data # 9 | # Copyright (C) 2005-2012 Vladimir Likic # 10 | # # 11 | # This program is free software; you can redistribute it and/or modify # 12 | # it under the terms of the GNU General Public License version 2 as # 13 | # published by the Free Software Foundation. # 14 | # # 15 | # This program is distributed in the hope that it will be useful, # 16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 18 | # GNU General Public License for more details. # 19 | # # 20 | # You should have received a copy of the GNU General Public License # 21 | # along with this program; if not, write to the Free Software # 22 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 23 | # # 24 | ############################################################################# 25 | 26 | import numpy 27 | import copy 28 | 29 | from pyms.Utils.Error import error 30 | from pyms.Utils.Utils import is_list, is_number, is_int 31 | from pyms.GCMS.Class import IonChromatogram, MassSpectrum 32 | from pyms.Peak.Class import Peak 33 | 34 | # If psyco is installed, use it to speed up running time 35 | try: 36 | import psyco 37 | psyco.full() 38 | except: 39 | pass 40 | 41 | ####################### 42 | # structure 43 | # 1) find local maxima per ion, store intensity and scan index 44 | # 2) sum across N scans to compensate for scan type 45 | # 3) sum ions belonging to each maxima scan 46 | ####################### 47 | 48 | def BillerBiemann(im, points=3, scans=1): 49 | 50 | """ 51 | @summary: BillerBiemann Deconvolution 52 | 53 | Deconvolution based on the algorithm of Biller and Biemann (1974) 54 | 55 | @param im: An IntensityMatrix object 56 | @type im: pyms.GCMS.Class.IntensityMatrix 57 | @param points: Peak if maxima over 'points' number of scans (Default 3) 58 | @type points: IntType 59 | @param scans: To compensate for spectra skewing, 60 | peaks from 'scans' scans are combined (Default 1). 61 | @type scans: IntType 62 | 63 | @return: List of Peak objects 64 | @rtype: ListType 65 | 66 | @author: Andrew Isaac 67 | """ 68 | 69 | rt_list = im.get_time_list() 70 | mass_list = im.get_mass_list() 71 | peak_list = [] 72 | maxima_im = get_maxima_matrix(im, points, scans) 73 | numrows = len(maxima_im) 74 | for row in range(numrows): 75 | if sum(maxima_im[row]) > 0: 76 | rt = rt_list[row] 77 | ms = MassSpectrum(mass_list, maxima_im[row]) 78 | peak = Peak(rt, ms) 79 | peak.set_pt_bounds([0,row,0]) # store IM index for convenience 80 | peak_list.append(peak) 81 | 82 | return peak_list 83 | 84 | def rel_threshold(pl, percent=2): 85 | 86 | """ 87 | @summary: Remove ions with relative intensities less than the given 88 | relative percentage of the maximum intensity. 89 | 90 | @param pl: A list of Peak objects 91 | @type pl: ListType 92 | @param percent: Threshold for relative percentage of intensity (Default 2%) 93 | @type percent: FloatType 94 | 95 | @return: A new list of Peak objects with threshold ions 96 | @rtype: ListType 97 | 98 | @author: Andrew Isaac 99 | """ 100 | 101 | if not is_number(percent) or percent <= 0: 102 | error("'percent' must be a number > 0") 103 | 104 | pl_copy = copy.deepcopy(pl) 105 | new_pl = [] 106 | for p in pl_copy: 107 | ms = p.get_mass_spectrum() 108 | ia = ms.mass_spec 109 | # assume max(ia) big so /100 1st 110 | cutoff = (max(ia)/100.0)*float(percent) 111 | for i in range(len(ia)): 112 | if ia[i] < cutoff: 113 | ia[i] = 0 114 | ms.mass_spec = ia 115 | p.set_mass_spectrum(ms) 116 | new_pl.append(p) 117 | return new_pl 118 | 119 | def num_ions_threshold(pl, n, cutoff): 120 | 121 | """ 122 | @summary: Remove Peaks where there are less than a given number of ion 123 | intensities above the given threshold 124 | 125 | @param pl: A list of Peak objects 126 | @type pl: ListType 127 | @param n: Minimum number of ions that must have intensities above the cutoff 128 | @type n: IntType 129 | @param cutoff: The minimum intensity threshold 130 | @type cutoff: FloatType 131 | 132 | @return: A new list of Peak objects 133 | @rtype: ListType 134 | 135 | @author: Andrew Isaac 136 | """ 137 | 138 | pl_copy = copy.deepcopy(pl) 139 | new_pl = [] 140 | for p in pl_copy: 141 | ms = p.get_mass_spectrum() 142 | ia = ms.mass_spec 143 | ions = 0 144 | for i in range(len(ia)): 145 | if ia[i] >= cutoff: 146 | ions += 1 147 | if ions >= n: 148 | new_pl.append(p) 149 | return new_pl 150 | 151 | def sum_maxima(im, points=3, scans=1): 152 | 153 | """ 154 | @summary: Reconstruct the TIC as sum of maxima 155 | 156 | @param im: An IntensityMatrix object 157 | @type im: pyms.GCMS.Class.IntensityMatrix 158 | @param points: Peak if maxima over 'points' number of scans 159 | @type points: IntType 160 | @param scans: To compensate for spectra scewing, 161 | peaks from 'scans' scans are combined. 162 | @type scans: IntType 163 | 164 | @return: The reconstructed TIC 165 | @rtype: pyms.GCMS.Class.IonChromatogram 166 | 167 | @author: Andrew Isaac 168 | """ 169 | 170 | maxima_im = get_maxima_matrix(im, points) 171 | sums = [] 172 | numrows = len(maxima_im) 173 | half = int(scans/2) 174 | for row in range(numrows): 175 | val = 0 176 | for ii in range(scans): 177 | if row - half + ii >= 0 and row - half + ii < numrows: 178 | val += maxima_im[row - half + ii].sum() 179 | sums.append(val) 180 | tic = IonChromatogram(numpy.array(sums), im.get_time_list()) 181 | 182 | return tic 183 | 184 | def get_maxima_indices(ion_intensities, points=3): 185 | 186 | """ 187 | @summary: Find local maxima. 188 | 189 | @param ion_intensities: A list of intensities for a single ion 190 | @type ion_intensities: ListType 191 | @param points: Peak if maxima over 'points' number of scans 192 | @type points: IntType 193 | 194 | @return: A list of scan indices 195 | @rtype: ListType 196 | 197 | @author: Andrew Isaac 198 | """ 199 | 200 | if not is_list(ion_intensities) or not is_number(ion_intensities[0]): 201 | error("'ion_intensities' must be a List of numbers") 202 | 203 | # find peak inflection points 204 | # use a 'points' point window 205 | # for a plateau after a rise, need to check if it is the left edge of 206 | # a peak 207 | peak_point = [] 208 | edge = -1 209 | points = int(points) 210 | half = int(points/2) 211 | points = 2*half+1 # ensure odd number of points 212 | for index in range(len(ion_intensities)-points+1): 213 | left = ion_intensities[index:index+half] 214 | mid = ion_intensities[index+half] 215 | right = ion_intensities[index+half+1:index+points] 216 | # max in middle 217 | if mid > max(left) and mid > max(right): 218 | peak_point.append(index+half) 219 | edge = -1 # ignore previous rising edge 220 | # flat from rise (left of peak?) 221 | if mid > max(left) and mid == max(right): 222 | edge = index+half # ignore previous rising edge, update latest 223 | # fall from flat 224 | if mid == max(left) and mid > max(right): 225 | if edge > -1: 226 | centre = int((edge+index+half)/2) # mid point 227 | peak_point.append(centre) 228 | edge = -1 229 | 230 | return peak_point 231 | 232 | def get_maxima_list(ic, points=3): 233 | 234 | """ 235 | @summary: List of retention time and intensity of local maxima for ion 236 | 237 | @param ic: An IonChromatogram object 238 | @type ic: pyms.GCMS.Class.IonChromatogram 239 | @param points: Peak if maxima over 'points' number of scans 240 | @type points: IntType 241 | 242 | @return: A list of retention time and intensity of local maxima for ion 243 | @rtype: ListType 244 | 245 | @author: Andrew Isaac 246 | """ 247 | 248 | peak_point = get_maxima_indices(ic.get_intensity_array(), points) 249 | mlist = [] 250 | for index in range(len(peak_point)): 251 | rt = ic.get_time_at_index(peak_point[index]) 252 | intens = ic.get_intensity_at_index(peak_point[index]) 253 | mlist.append([rt, intens]) 254 | return mlist 255 | 256 | def get_maxima_list_reduced(ic, mp_rt, points=13, window=3): 257 | 258 | """ 259 | @summary: List of retention time and intensity of local maxima for ion 260 | Only peaks around a specific retention time are recorded 261 | created for use with gap filling algorithm 262 | @param ic: An IonChromatogram object 263 | @type ic: pyms.GCMS.Class.IonChromatogram 264 | @param mp_rt: The retention time of the missing peak 265 | @type ic: floatType 266 | @param points: Peak if maxima over 'points' number of scans 267 | @type points: IntType 268 | @param window: The window around the mp_rt where peaks should 269 | be recorded 270 | @type window: intType 271 | 272 | @return: A list of retention time and intensity of local maxima for ion 273 | @rtype: ListType 274 | 275 | @author: Andrew Isaac 276 | """ 277 | 278 | peak_point = get_maxima_indices(ic.get_intensity_array(), points) 279 | mlist = [] 280 | for index in range(len(peak_point)): 281 | rt = ic.get_time_at_index(peak_point[index]) 282 | if (rt > float(mp_rt) - window) and (rt < float(mp_rt) + window): 283 | intens = ic.get_intensity_at_index(peak_point[index]) 284 | mlist.append([rt, intens]) 285 | else: 286 | pass 287 | return mlist 288 | 289 | 290 | def get_maxima_matrix(im, points=3, scans=1): 291 | 292 | """ 293 | @summary: Get matrix of local maxima for each ion 294 | 295 | @param im: A list of intensities for a single ion 296 | @type im: ListType 297 | @param points: Peak if maxima over 'points' number of scans 298 | @type points: IntType 299 | @param scans: To compensate for spectra scewing, 300 | peaks from 'scans' scans are combined (Default 1). 301 | @type scans: IntType 302 | 303 | @return: A matrix of each ion and scan and intensity at ion peaks 304 | @rtype: ListType 305 | 306 | @author: Andrew Isaac 307 | """ 308 | 309 | numrows, numcols = im.get_size() 310 | # zeroed matrix, size numrows*numcols 311 | maxima_im = numpy.zeros((numrows, numcols)) 312 | raw_im = numpy.array(im.get_matrix_list()) 313 | 314 | for col in range(numcols): # assume all rows have same width 315 | # 1st, find maxima 316 | maxima = get_maxima_indices(raw_im[:,col], points) 317 | # 2nd, fill intensities 318 | for row in maxima: 319 | maxima_im[row, col] = raw_im[row, col] 320 | 321 | # combine spectra within 'scans' scans. 322 | half = int(scans/2) 323 | for row in range(numrows): 324 | tic = 0 325 | best = 0 326 | loc = 0 327 | # find best in scans 328 | for ii in range(scans): 329 | if row - half + ii >= 0 and row - half + ii < numrows: 330 | tic = maxima_im[row - half + ii].sum() 331 | # find largest tic of scans 332 | if tic > best: 333 | best = tic 334 | loc = ii 335 | # move and add others to best 336 | for ii in range(scans): 337 | if row - half + ii >= 0 and row - half + ii < numrows and ii != loc: 338 | for col in range(numcols): 339 | maxima_im[row - half + loc, col] += \ 340 | maxima_im[row - half + ii, col] 341 | maxima_im[row - half + ii, col] = 0 342 | 343 | return maxima_im 344 | -------------------------------------------------------------------------------- /pyms/Deconvolution/BillerBiemann/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Biller and Biemann deconvolution 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | -------------------------------------------------------------------------------- /pyms/Deconvolution/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Deconvolution routines 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | -------------------------------------------------------------------------------- /pyms/Display/Class.py: -------------------------------------------------------------------------------- 1 | """ 2 | Class to Display Ion Chromatograms and TIC 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | import matplotlib.pyplot as plt 26 | import numpy 27 | import sys 28 | sys.path.append('/x/PyMS/') 29 | 30 | from pyms.GCMS.Class import IonChromatogram 31 | from pyms.Utils.Error import error 32 | 33 | 34 | class Display(object): 35 | """ 36 | @summary: Class to display Ion Chromatograms and Total 37 | Ion Chromatograms from GCMS.Class.IonChromatogram 38 | 39 | Uses matplotlib module pyplot to do plotting 40 | 41 | @author: Sean O'Callaghan 42 | @author: Vladimir Likic 43 | """ 44 | 45 | def __init__(self): 46 | """ 47 | @summary: Initialises an instance of Display class 48 | """ 49 | 50 | # Container to store plots 51 | self.__tic_ic_plots = [] 52 | 53 | # color dictionary for plotting of ics; blue reserved 54 | # for TIC 55 | self.__col_ic = {0:'r', 1:'g', 2:'k', 3:'y', 4:'m', 5:'c'} 56 | self.__col_count = 0 # counter to keep track of colors 57 | 58 | # Peak list container 59 | self.__peak_list = [] 60 | 61 | #Plotting Variables 62 | self.__fig = plt.figure() 63 | self.__ax = self.__fig.add_subplot(111) 64 | 65 | 66 | 67 | 68 | def plot_ics(self, ics, labels = None): 69 | 70 | """ 71 | @summary: Adds an Ion Chromatogram or a 72 | list of Ion Chromatograms to plot list 73 | 74 | @param ics: List of Ion Chromatograms m/z channels 75 | for plotting 76 | @type ics: list of pyms.GCMS.Class.IonChromatogram 77 | 78 | @param labels: Labels for plot legend 79 | @type labels: list of StringType 80 | """ 81 | 82 | if not isinstance(ics, list): 83 | if isinstance(ics, IonChromatogram): 84 | ics = [ics] 85 | else: 86 | error("ics argument must be an IonChromatogram\ 87 | or a list of Ion Chromatograms") 88 | 89 | if not isinstance(labels, list) and labels != None: 90 | labels = [labels] 91 | # TODO: take care of case where one element of ics is 92 | # not an IonChromatogram 93 | 94 | 95 | intensity_list = [] 96 | time_list = ics[0].get_time_list() 97 | 98 | 99 | for i in range(len(ics)): 100 | intensity_list.append(ics[i].get_intensity_array()) 101 | 102 | 103 | # Case for labels not present 104 | if labels == None: 105 | for i in range(len(ics)): 106 | self.__tic_ic_plots.append(plt.plot(time_list, \ 107 | intensity_list[i], self.__col_ic[self.__col_count])) 108 | if self.__col_count == 5: 109 | self.__col_count = 0 110 | else: 111 | self.__col_count += 1 112 | 113 | # Case for labels present 114 | else: 115 | for i in range(len(ics)): 116 | 117 | self.__tic_ic_plots.append(plt.plot(time_list, \ 118 | intensity_list[i], self.__col_ic[self.__col_count]\ 119 | , label = labels[i])) 120 | if self.__col_count == 5: 121 | self.__col_count = 0 122 | else: 123 | self.__col_count += 1 124 | 125 | 126 | 127 | 128 | def plot_tic(self, tic, label=None): 129 | 130 | """ 131 | @summary: Adds Total Ion Chromatogram to plot list 132 | 133 | @param tic: Total Ion Chromatogram 134 | @type tic: pyms.GCMS.Class.IonChromatogram 135 | 136 | @param label: label for plot legend 137 | @type label: StringType 138 | """ 139 | 140 | if not isinstance(tic, IonChromatogram): 141 | error("TIC is not an Ion Chromatogram object") 142 | 143 | 144 | intensity_list = tic.get_intensity_array() 145 | time_list = tic.get_time_list() 146 | 147 | self.__tic_ic_plots.append(plt.plot(time_list, intensity_list,\ 148 | label=label)) 149 | 150 | 151 | 152 | 153 | 154 | def plot_peaks(self, peak_list, label = "Peaks"): 155 | 156 | """ 157 | @summary: Plots the locations of peaks as found 158 | by PyMS. 159 | 160 | @param peak_list: List of peaks 161 | @type peak_list: list of pyms.Peak.Class.Peak 162 | 163 | @param label: label for plot legend 164 | @type label: StringType 165 | """ 166 | 167 | if not isinstance(peak_list, list): 168 | error("peak_list is not a list") 169 | 170 | time_list = [] 171 | height_list=[] 172 | 173 | # Copy to self.__peak_list for onclick event handling 174 | self.__peak_list = peak_list 175 | 176 | for peak in peak_list: 177 | time_list.append(peak.get_rt()) 178 | height_list.append(sum(peak.get_mass_spectrum().mass_spec)) 179 | 180 | self.__tic_ic_plots.append(plt.plot(time_list, height_list, 'o',\ 181 | label = label)) 182 | 183 | 184 | 185 | 186 | 187 | def get_5_largest(self, intensity_list): 188 | 189 | """ 190 | @summary: Computes the indices of the largest 5 ion intensities 191 | for writing to console 192 | 193 | @param intensity_list: List of Ion intensities 194 | @type intensity_list: listType 195 | """ 196 | 197 | largest = [0,0,0,0,0,0,0,0,0,0] 198 | 199 | # Find out largest value 200 | for i in range(len(intensity_list)): 201 | if intensity_list[i] > intensity_list[largest[0]]: 202 | largest[0] = i 203 | 204 | # Now find next four largest values 205 | for j in [1,2,3,4,5,6,7,8,9]: 206 | for i in range(len(intensity_list)): 207 | if intensity_list[i] > intensity_list[largest[j]] and \ 208 | intensity_list[i] < intensity_list[largest[j-1]]: 209 | largest[j] = i 210 | 211 | return largest 212 | 213 | 214 | 215 | 216 | 217 | def plot_mass_spec(self, rt, mass_list, intensity_list): 218 | 219 | """ 220 | @summary: Plots the mass spec given a list of masses and intensities 221 | 222 | @param rt: The retention time for labelling of the plot 223 | @type rt: floatType 224 | 225 | @param mass_list: list of masses of the MassSpectrum object 226 | @type mass_list: listType 227 | 228 | @param intensity_list: List of intensities of the MassSpectrum object 229 | @type intensity_list: listType 230 | """ 231 | 232 | new_fig = plt.figure() 233 | new_ax = new_fig.add_subplot(111) 234 | 235 | # to set x axis range find minimum and maximum m/z channels 236 | max_mz = mass_list[0] 237 | min_mz = mass_list[0] 238 | 239 | for i in range(len(mass_list)): 240 | if mass_list[i] > max_mz: 241 | max_mz = mass_list[i] 242 | 243 | for i in range(len(mass_list)): 244 | if mass_list[i] < min_mz: 245 | min_mz = mass_list[i] 246 | 247 | label = "Mass spec for peak at time " + "%5.2f" % rt 248 | 249 | mass_spec_plot = plt.bar(mass_list, intensity_list,\ 250 | label=label, width=0.01) 251 | 252 | x_axis_range = plt.xlim(min_mz, max_mz) 253 | 254 | t = new_ax.set_title(label) 255 | 256 | plt.show() 257 | 258 | 259 | 260 | 261 | def onclick(self, event): 262 | 263 | """ 264 | @summary: Finds the 5 highest intensity m/z channels for the selected peak. 265 | The peak is selected by clicking on it. If a button other than 266 | the left one is clicked, a new plot of the mass spectrum is displayed 267 | 268 | @param event: a mouse click by the user 269 | """ 270 | 271 | intensity_list = [] 272 | mass_list = [] 273 | 274 | 275 | for peak in self.__peak_list: 276 | if event.xdata > 0.9999*peak.get_rt() and event.xdata < \ 277 | 1.0001*peak.get_rt(): 278 | intensity_list = peak.get_mass_spectrum().mass_spec 279 | mass_list = peak.get_mass_spectrum().mass_list 280 | 281 | largest = self.get_5_largest(intensity_list) 282 | 283 | if len(intensity_list) != 0: 284 | print "mass\t intensity" 285 | for i in range(10): 286 | print mass_list[largest[i]], "\t", intensity_list[largest[i]] 287 | else: # if the selected point is not close enough to peak 288 | print "No Peak at this point" 289 | 290 | # Check if a button other than left was pressed, if so plot mass spectrum 291 | # Also check that a peak was selected, not just whitespace 292 | if event.button != 1 and len(intensity_list) != 0: 293 | self.plot_mass_spec(event.xdata, mass_list, intensity_list) 294 | 295 | 296 | 297 | 298 | def do_plotting(self, plot_label = None): 299 | 300 | """ 301 | @summary: Plots TIC and IC(s) if they have been created 302 | by plot_tic() or plot_ics(). Adds detected peaks 303 | if they have been added by plot_peaks() 304 | 305 | @param plot_label: Optional to supply a label or other 306 | definition of data origin 307 | @type plot_label: StringType 308 | 309 | """ 310 | 311 | # if no plots have been created advise user 312 | if len(self.__tic_ic_plots) == 0: 313 | print 'No plots have been created' 314 | print 'Please call a plotting function before' 315 | print 'calling do_plotting()' 316 | 317 | if plot_label != None : 318 | t = self.__ax.set_title(plot_label) 319 | 320 | l = self.__ax.legend() 321 | 322 | self.__fig.canvas.draw 323 | 324 | # If no peak list plot, no mouse click event 325 | if len(self.__peak_list) != 0: 326 | cid = self.__fig.canvas.mpl_connect('button_press_event', self.onclick) 327 | plt.show() 328 | 329 | 330 | 331 | -------------------------------------------------------------------------------- /pyms/Display/Function.py: -------------------------------------------------------------------------------- 1 | """Display.Function.py 2 | """ 3 | 4 | ############################################################################# 5 | # # 6 | # PyMS software for processing of metabolomic mass-spectrometry data # 7 | # Copyright (C) 2005-2012 Vladimir Likic # 8 | # # 9 | # This program is free software; you can redistribute it and/or modify # 10 | # it under the terms of the GNU General Public License version 2 as # 11 | # published by the Free Software Foundation. # 12 | # # 13 | # This program is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU General Public License for more details. # 17 | # # 18 | # You should have received a copy of the GNU General Public License # 19 | # along with this program; if not, write to the Free Software # 20 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 21 | # # 22 | ############################################################################# 23 | 24 | 25 | import matplotlib.pyplot as plt 26 | 27 | import sys 28 | sys.path.append('/x/PyMS/') 29 | 30 | from pyms.Utils.Error import error 31 | from pyms.GCMS.Class import IonChromatogram 32 | 33 | 34 | def plot_ic(ic, line_label=" ", plot_title=" "): 35 | """ 36 | @summary: Plots an Ion Chromatogram or List of same 37 | 38 | @param ic: The ion chromatogram 39 | @type ic: pyms.GCMS.Class.IonChromatogram 40 | 41 | @param line_label: plot legend 42 | @type line_label: stringType 43 | 44 | @param plot_title: A label for the plot 45 | @type plot_title: String Type 46 | 47 | @author: Sean O'Callaghan 48 | """ 49 | 50 | #Plotting Variables 51 | fig = plt.figure() 52 | ax = fig.add_subplot(111) 53 | 54 | 55 | if not isinstance(ic, IonChromatogram): 56 | error("ics argument must be an IonChromatogram\ 57 | or a list of Ion Chromatograms") 58 | 59 | time_list = ic.get_time_list() 60 | 61 | 62 | 63 | intensity_list = ic.get_intensity_array() 64 | 65 | ic_plot = plt.plot(time_list, intensity_list, label=line_label) 66 | 67 | t = ax.set_title(plot_title) 68 | l = ax.legend() 69 | 70 | fig.canvas.draw 71 | plt.show() 72 | 73 | 74 | 75 | def plot_ms(mass_spec, plot_title=" "): 76 | 77 | """ 78 | @summary: Plots the mass spec given a list of masses and intensities 79 | 80 | @param mass_spec: The mass spectrum at a given time/index 81 | @type mass_spec: GCMS.Class.MassSpectrum 82 | 83 | @param plot_title: A label for the plot 84 | @type plot_title: String Type 85 | 86 | @author: Sean O'Callaghan 87 | """ 88 | 89 | fig = plt.figure() 90 | ax = fig.add_subplot(111) 91 | 92 | mass_list = mass_spec.mass_list 93 | intensity_list = mass_spec.mass_spec 94 | 95 | # to set x axis range find minimum and maximum m/z channels 96 | max_mz = mass_list[0] 97 | min_mz = mass_list[0] 98 | 99 | for i in range(len(mass_list)): 100 | if mass_list[i] > max_mz: 101 | max_mz = mass_list[i] 102 | 103 | for i in range(len(mass_list)): 104 | if mass_list[i] < min_mz: 105 | min_mz = mass_list[i] 106 | 107 | mass_spec_plot = plt.bar(mass_list, intensity_list,\ 108 | width=0.01) 109 | 110 | x_axis_range = plt.xlim(min_mz, max_mz) 111 | t = ax.set_title(plot_title) 112 | plt.show() 113 | -------------------------------------------------------------------------------- /pyms/Display/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions for Displaying ICs and TIC 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | -------------------------------------------------------------------------------- /pyms/Experiment/Class.py: -------------------------------------------------------------------------------- 1 | """ 2 | Models a GC-MS experiment represented by a list of signal peaks 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | from pyms.Utils.Error import error 26 | from pyms.Utils.Utils import is_str 27 | from pyms.Peak.Class import Peak 28 | from pyms.Peak.List.Utils import is_peak_list, sele_peaks_by_rt 29 | 30 | class Experiment: 31 | 32 | """ 33 | @summary: Models an experiment object 34 | 35 | @author: Vladimir Likic 36 | @author: Andrew Isaac 37 | """ 38 | 39 | def __init__(self, expr_code, peak_list): 40 | 41 | """ 42 | @summary: Models an experiment 43 | 44 | @param expr_code: Unique identifier for the experiment 45 | @type expr_code: StringType 46 | @param peak_list: A list of peak objects 47 | @type peak_list: ListType 48 | """ 49 | 50 | if not is_str(expr_code): 51 | error("'expr_code' must be a string") 52 | if not is_peak_list(peak_list): 53 | error("'peak_list' must be a list of Peak objects") 54 | 55 | self.__expr_code = expr_code 56 | self.__peak_list = peak_list 57 | 58 | def get_expr_code(self): 59 | 60 | """ 61 | @summary: Returns the expr_code of the experiment 62 | 63 | @return: The expr_code of the experiment 64 | @rtype: StringType 65 | """ 66 | 67 | return self.__expr_code 68 | 69 | def get_peak_list(self): 70 | 71 | """ 72 | @summary: Returns the peak list 73 | 74 | @return: A list of peak objects 75 | @rtype: ListType 76 | """ 77 | 78 | return self.__peak_list 79 | 80 | def sele_rt_range(self, rt_range): 81 | 82 | """ 83 | @summary: Discards all peaks which have the retention time outside 84 | the specified range 85 | 86 | @param rt_range: Min, max retention time given as a list [rt_min,rt_max] 87 | @type rt_range: ListType 88 | 89 | @return: none 90 | @rtype: NoneType 91 | """ 92 | 93 | peaks_sele = sele_peaks_by_rt(self.__peak_list, rt_range) 94 | self.__peak_list = peaks_sele 95 | -------------------------------------------------------------------------------- /pyms/Experiment/IO.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions related to experiment input/output 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | import string, cPickle 26 | 27 | from pyms.Utils.Error import error 28 | from pyms.Experiment.Class import Experiment 29 | from pyms.Utils.Utils import is_str 30 | from pyms.Utils.IO import file_lines 31 | from pyms.GCMS.Class import IntensityMatrix 32 | 33 | def load_expr(file_name): 34 | 35 | """ 36 | @summary: Loads an experiment saved with 'store_expr' 37 | 38 | @param file_name: Experiment file name 39 | @type file_name: StringType 40 | 41 | @return: The experiment intensity matrix and peak list 42 | @rtype: pyms.Experiment.Class.Experiment 43 | 44 | @author: Vladimir Likic 45 | @author: Andrew Isaac 46 | """ 47 | 48 | if not is_str(file_name): 49 | error("'file_name' not a string") 50 | 51 | fp = open(file_name,'rb') 52 | expr = cPickle.load(fp) 53 | fp.close() 54 | 55 | if not isinstance(expr, Experiment): 56 | error("'file_name' is not an Experiment object") 57 | 58 | return expr 59 | 60 | def store_expr(file_name, expr): 61 | 62 | """ 63 | @summary: stores an expriment to a file 64 | 65 | @param file_name: The name of the file 66 | @type file_name: StringType 67 | @param expr: An experiment object 68 | @type expr: pyms.Experiment.Class.Experiment 69 | 70 | @return: none 71 | @rtype: NoneType 72 | 73 | @author: Vladimir Likic 74 | @author: Andrew Isaac 75 | """ 76 | 77 | if not isinstance(expr, Experiment): 78 | error("argument not an instance of the class 'Experiment'") 79 | 80 | if not is_str(file_name): 81 | error("'file_name' not a string") 82 | 83 | fp = open(file_name,'wb') 84 | cPickle.dump(expr, fp, 1) 85 | fp.close() 86 | 87 | def read_expr_list(file_name): 88 | 89 | """ 90 | @summary: Reads the set of experiment files and returns a list of 91 | Experiment objects 92 | 93 | @param file_name: The name of the file which lists experiment 94 | dump file names, one file per line 95 | @type file_name: StringType 96 | 97 | @return: A list of Experiment instances 98 | @rtype: ListType 99 | 100 | @author: Vladimir Likic 101 | """ 102 | 103 | if not is_str(file_name): 104 | error("file_name argument must be a string") 105 | try: 106 | fp = open(file_name, 'r') 107 | except IOError: 108 | error("error opening file '%s' for reading" % file_name) 109 | 110 | exprfiles = fp.readlines() 111 | fp.close() 112 | 113 | exprl = [] 114 | 115 | for exprfile in exprfiles: 116 | 117 | exprfile = string.strip(exprfile) 118 | expr = load_expr(exprfile) 119 | 120 | exprl.append(expr) 121 | 122 | return exprl 123 | -------------------------------------------------------------------------------- /pyms/Experiment/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions for modelling experiments 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | -------------------------------------------------------------------------------- /pyms/GCMS/IO/ANDI/Function.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions for reading manufacturer specific ANDI-MS data files 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | import math, copy 26 | 27 | import numpy 28 | 29 | from pyms.GCMS.Class import GCMS_data 30 | from pyms.GCMS.Class import Scan 31 | from pyms.Utils.Error import error, stop 32 | from pyms.Utils.Utils import is_str, is_int, is_float, is_number, is_list 33 | from pyms.Utils.Time import time_str_secs 34 | from pycdf import * 35 | 36 | try: 37 | from mpi4py import MPI 38 | except: 39 | pass 40 | 41 | def ANDI_reader(file_name): 42 | 43 | """ 44 | @summary: A reader for ANDI-MS NetCDF files, returns 45 | a GC-MS data object 46 | 47 | @param file_name: The name of the ANDI-MS file 48 | @type file_name: StringType 49 | 50 | @author: Qiao Wang 51 | @author: Andrew Isaac 52 | @author: Vladimir Likic 53 | """ 54 | 55 | ## TODO: use 'point_count' and allow for zero len scans 56 | 57 | # the keys used to retrieve certain data from the NetCDF file 58 | __MASS_STRING = "mass_values" 59 | __INTENSITY_STRING = "intensity_values" 60 | __TIME_STRING = "scan_acquisition_time" 61 | 62 | if not is_str(file_name): 63 | error("'file_name' must be a string") 64 | try: 65 | file = CDF(file_name) 66 | except CDFError: 67 | error("Cannot open file '%s'" % file_name) 68 | 69 | try:# avoid printing from each rank 70 | comm = MPI.COMM_WORLD 71 | rank = comm.Get_rank() 72 | size = comm.Get_size() 73 | 74 | if rank ==0: 75 | file_names = [] 76 | 77 | for i in range(1,size): 78 | recv_buffer = "" 79 | file_n = comm.recv(recv_buffer, i) 80 | file_names.append(file_n) 81 | 82 | print " -> Reading netCDF files:" 83 | print file_name 84 | for file_n in file_names: 85 | print file_n 86 | else: 87 | comm.send(file_name, dest=0) 88 | except: 89 | print " -> Reading netCDF file '%s'" % (file_name) 90 | 91 | 92 | 93 | 94 | 95 | scan_list = [] 96 | mass = file.var(__MASS_STRING) 97 | intensity = file.var(__INTENSITY_STRING) 98 | mass_values = mass.get().tolist() 99 | mass_list = [] 100 | mass_previous = mass_values[0] 101 | mass_list.append(mass_previous) 102 | intensity_values = intensity.get().tolist() 103 | intensity_list = [] 104 | intensity_previous = intensity_values[0] 105 | intensity_list.append(intensity_previous) 106 | if not len(mass_values) == len(intensity_values): 107 | error("length of mass_list is not equal to length of intensity_list !") 108 | for i in range(len(mass_values) - 1): 109 | # assume masses in ascending order until new scan 110 | if mass_previous <= mass_values[i + 1]: 111 | #print mass_values[i+1] 112 | mass_list.append(mass_values[i + 1]) 113 | mass_previous = mass_values[i + 1] 114 | intensity_list.append(intensity_values[i + 1]) 115 | intensity_previous = intensity_values[i + 1] 116 | # new scan 117 | else: 118 | scan_list.append(Scan(mass_list, intensity_list)) 119 | #print "Added scan" 120 | mass_previous = mass_values[i + 1] 121 | intensity_previous = intensity_values[i + 1] 122 | mass_list = [] 123 | intensity_list = [] 124 | mass_list.append(mass_previous) 125 | intensity_list.append(intensity_previous) 126 | # store final scan 127 | scan_list.append(Scan(mass_list, intensity_list)) 128 | time = file.var(__TIME_STRING) 129 | time_list = time.get().tolist() 130 | 131 | # sanity check 132 | if not len(time_list) == len(scan_list): 133 | error("number of time points (%d) does not equal the number of scans (%d)"%(len(time_list), len(scan_list))) 134 | 135 | data = GCMS_data(time_list, scan_list) 136 | 137 | return data 138 | 139 | def ANDI_writer(file_name, im): 140 | 141 | """ 142 | @summary: A reader for ANDI-MS NetCDF files, returns 143 | a GC-MS data object 144 | 145 | @param file_name: The name of the ANDI-MS file 146 | @type file_name: StringType 147 | @param im: The IntensityMatrix 148 | @type file_name: pyms.GCMS.Class.IntensityMatrix 149 | 150 | @author: Andrew Isaac 151 | """ 152 | 153 | # netCDF header info for compatability 154 | # attributes 155 | #dataset_completeness 0 CHAR 6 C1+C2 156 | #dataset_origin 4 CHAR 16 Santa Clara, CA 157 | #experiment_date_time_stamp 7 CHAR 20 20081218044500+1100 158 | #experiment_title 6 CHAR 7 mix ma 159 | #experiment_type 10 CHAR 25 Centroided Mass Spectrum 160 | #external_file_ref_0 9 CHAR 8 MA_5C.M 161 | #languages 3 CHAR 8 English 162 | #ms_template_revision 1 CHAR 6 1.0.1 163 | #netcdf_file_date_time_stamp 5 CHAR 20 20090114001531+1100 164 | #netcdf_revision 2 CHAR 6 2.3.2 165 | #number_of_times_calibrated 12 INT 1 0 166 | #number_of_times_processed 11 INT 1 1 167 | #operator_name 8 CHAR 12 Dave and Su 168 | #raw_data_intensity_format 25 CHAR 6 Float 169 | #raw_data_mass_format 23 CHAR 6 Float 170 | #raw_data_time_format 24 CHAR 6 Short 171 | #sample_state 13 CHAR 12 Other State 172 | #test_detector_type 18 CHAR 20 Electron Multiplier 173 | #test_ionization_mode 16 CHAR 16 Electron Impact 174 | #test_ionization_polarity 17 CHAR 18 Positive Polarity 175 | #test_ms_inlet 15 CHAR 17 Capillary Direct 176 | #test_resolution_type 19 CHAR 20 Constant Resolution 177 | #test_scan_direction 21 CHAR 3 Up 178 | #test_scan_function 20 CHAR 10 Mass Scan 179 | #test_scan_law 22 CHAR 7 Linear 180 | #test_separation_type 14 CHAR 18 No Chromatography 181 | 182 | # dimensions 183 | #_128_byte_string 6 128 184 | #_16_byte_string 3 16 185 | #_255_byte_string 7 255 186 | #_2_byte_string 0 2 187 | #_32_byte_string 4 32 188 | #_4_byte_string 1 4 189 | #_64_byte_string 5 64 190 | #_8_byte_string 2 8 191 | #error_number 10 1 192 | #instrument_number 12 1 193 | #point_number 9 554826 X 194 | #range 8 2 195 | #scan_number 11 9865 196 | 197 | # variables 198 | #a_d_coaddition_factor 2 SHORT 0 scan_number(9865) 199 | #a_d_sampling_rate 1 DOUBLE 0 scan_number(9865) 200 | #actual_scan_number 7 INT 0 scan_number(9865) 201 | #error_log 0 CHAR 0 error_number(1), _64_byte_string(64) 202 | #flag_count 15 INT 0 scan_number(9865) 203 | #instrument_app_version 27 CHAR 0 instrument_number(1), 204 | #_32_byte_string(32) 205 | #instrument_comments 28 CHAR 0 instrument_number(1), 206 | #_32_byte_string(32) 207 | #instrument_fw_version 25 CHAR 0 instrument_number(1), 208 | #_32_byte_string(32) 209 | #instrument_id 20 CHAR 0 instrument_number(1), 210 | #_32_byte_string(32) 211 | #instrument_mfr 21 CHAR 0 instrument_number(1), 212 | #_32_byte_string(32) 213 | #instrument_model 22 CHAR 0 instrument_number(1), 214 | #_32_byte_string(32) 215 | #instrument_name 19 CHAR 0 instrument_number(1), 216 | #_32_byte_string(32) 217 | #instrument_os_version 26 CHAR 0 instrument_number(1), 218 | #_32_byte_string(32) 219 | #instrument_serial_no 23 CHAR 0 instrument_number(1), 220 | #_32_byte_string(32) 221 | #instrument_sw_version 24 CHAR 0 instrument_number(1), 222 | #_32_byte_string(32) 223 | #intensity_values 18 FLOAT 3 point_number(554826) 224 | #inter_scan_time 5 DOUBLE 0 scan_number(9865) 225 | #mass_range_max 10 DOUBLE 0 scan_number(9865) 226 | #mass_range_min 9 DOUBLE 0 scan_number(9865) 227 | #mass_values 16 FLOAT 2 point_number(554826) 228 | #point_count 14 INT 0 scan_number(9865) 229 | #resolution 6 DOUBLE 0 scan_number(9865) 230 | #scan_acquisition_time 3 DOUBLE 0 scan_number(9865) 231 | #scan_duration 4 DOUBLE 0 scan_number(9865) 232 | #scan_index 13 INT 0 scan_number(9865) 233 | #time_range_max 12 DOUBLE 0 scan_number(9865) 234 | #time_range_min 11 DOUBLE 0 scan_number(9865) 235 | #time_values 17 FLOAT 2 point_number(554826) 236 | #total_intensity 8 DOUBLE 1 scan_number(9865) 237 | 238 | # variable information 239 | #intensity_values attributes 240 | 241 | #name idx type len value 242 | #-------------------- --- ---- --- ----- 243 | #add_offset 1 DOUBLE 1 0.0 244 | #scale_factor 2 DOUBLE 1 1.0 245 | #units 0 CHAR 26 Arbitrary Intensity Units 246 | 247 | #mass_values attributes 248 | 249 | #name idx type len value 250 | #-------------------- --- ---- --- ----- 251 | #scale_factor 1 DOUBLE 1 1.0 252 | #units 0 CHAR 4 M/Z 253 | 254 | #time_values attributes 255 | 256 | #name idx type len value 257 | #-------------------- --- ---- --- ----- 258 | #scale_factor 1 DOUBLE 1 1.0 259 | #units 0 CHAR 8 Seconds 260 | 261 | #total_intensity attributes 262 | 263 | #name idx type len value 264 | #-------------------- --- ---- --- ----- 265 | #units 0 CHAR 26 Arbitrary Intensity Units 266 | 267 | # netCDF dimension names 268 | __POINT_NUMBER = "point_number" 269 | __SCAN_NUMBER = "scan_number" 270 | 271 | # the keys used to create certain data from the NetCDF file 272 | __MASS_STRING = "mass_values" 273 | __INTENSITY_STRING = "intensity_values" 274 | __TIME_STRING = "scan_acquisition_time" 275 | __POINT_COUNT = "point_count" 276 | 277 | if not is_str(file_name): 278 | error("'file_name' must be a string") 279 | try: 280 | # Open netCDF file in overwrite mode, creating it if inexistent. 281 | nc = CDF(file_name, NC.WRITE|NC.TRUNC|NC.CREATE) 282 | # Automatically set define and data modes. 283 | nc.automode() 284 | except CDFError: 285 | error("Cannot create file '%s'" % file_name) 286 | 287 | mass_list = im.get_mass_list() 288 | time_list = im.get_time_list() 289 | 290 | # direct access, don't modify 291 | intensity_matrix = im.intensity_matrix 292 | 293 | # compress by ignoring zero intensities 294 | # included for consistency with imported netCDF format 295 | mass_values = [] 296 | intensity_values = [] 297 | point_count_values = [] 298 | for row in xrange(len(intensity_matrix)): 299 | pc = 0 # point count 300 | for col in xrange(len(intensity_matrix[0])): # all rows same len 301 | if (intensity_matrix[row][col] > 0): 302 | mass_values.append(mass_list[col]) 303 | intensity_values.append(intensity_matrix[row][col]) 304 | pc += 1 305 | point_count_values.append(pc) 306 | 307 | # sanity checks 308 | if not len(time_list) == len(point_count_values): 309 | error("number of time points does not equal the number of scans") 310 | 311 | # create dimensions 312 | # total number of data points 313 | dim_point_number = nc.def_dim(__POINT_NUMBER, len(mass_values)) 314 | # number of scans 315 | dim_scan_number = nc.def_dim(__SCAN_NUMBER, len(point_count_values)) 316 | 317 | # create variables 318 | # points 319 | var_mass_values = nc.def_var(__MASS_STRING, NC.FLOAT, dim_point_number) 320 | var_intensity_values = nc.def_var(__INTENSITY_STRING, NC.FLOAT, 321 | dim_point_number) 322 | # scans 323 | var_time_list = nc.def_var(__TIME_STRING, NC.DOUBLE, dim_scan_number) 324 | var_point_count_values = nc.def_var(__POINT_COUNT, NC.INT, 325 | dim_scan_number) 326 | 327 | # populate variables 328 | # points 329 | var_mass_values[:] = mass_values 330 | var_intensity_values[:] = intensity_values 331 | # scans 332 | var_time_list[:] = time_list 333 | var_point_count_values[:] = point_count_values 334 | 335 | # close file 336 | nc.close() 337 | 338 | -------------------------------------------------------------------------------- /pyms/GCMS/IO/ANDI/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions for the manipulation of spectral libraries 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | -------------------------------------------------------------------------------- /pyms/GCMS/IO/JCAMP/Function.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions for I/O of data in JCAMP-DX format 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | from pyms.GCMS.Class import GCMS_data 26 | from pyms.GCMS.Class import Scan 27 | from pyms.Utils.IO import file_lines 28 | from pyms.Utils.Utils import is_str 29 | from pyms.Utils.Error import error 30 | 31 | import numpy 32 | 33 | def JCAMP_reader(file_name): 34 | 35 | """ 36 | @summary: Generic reader for JCAMP DX files, produces GC-MS data 37 | object 38 | 39 | @author: Qiao Wang 40 | @author: Andrew Isaac 41 | @author: Vladimir Likic 42 | """ 43 | 44 | if not is_str(file_name): 45 | error("'file_name' not a string") 46 | 47 | print " -> Reading JCAMP file '%s'" % (file_name) 48 | lines_list = open(file_name,'r') 49 | data = [] 50 | page_idx = 0 51 | xydata_idx = 0 52 | time_list = [] 53 | scan_list = [] 54 | 55 | for line in lines_list: 56 | if not len(line.strip()) == 0: 57 | prefix = line.find('#') 58 | # key word or information 59 | if prefix == 0: 60 | fields = line.split('=') 61 | if fields[0].find("##PAGE") >= 0: 62 | time = float(fields[2].strip()) #rt for the scan to be submitted 63 | time_list.append(time) 64 | page_idx = page_idx + 1 65 | elif fields[0].find("##DATA TABLE") >= 0: 66 | xydata_idx = xydata_idx + 1 67 | # data 68 | elif prefix == -1: 69 | if page_idx > 1 or xydata_idx > 1: 70 | if len(data) % 2 == 1: 71 | error("data not in pair !") 72 | mass = [] 73 | intensity = [] 74 | for i in range(len(data) / 2): 75 | mass.append(data[i * 2]) 76 | intensity.append(data[i * 2 + 1]) 77 | if not len(mass) == len(intensity): 78 | error("len(mass) is not equal to len(intensity)") 79 | scan_list.append(Scan(mass, intensity)) 80 | data = [] 81 | data_sub = line.strip().split(',') 82 | for item in data_sub: 83 | if not len(item.strip()) == 0: 84 | data.append(float(item.strip())) 85 | if page_idx > 1: 86 | page_idx = 1 87 | if xydata_idx > 1: 88 | xydata_idx = 1 89 | else: 90 | data_sub = line.strip().split(',') 91 | for item in data_sub: 92 | if not len(item.strip()) == 0: 93 | data.append(float(item.strip())) 94 | 95 | if len(data) % 2 == 1: 96 | error("data not in pair !") 97 | # get last scan 98 | mass = [] 99 | intensity = [] 100 | for i in range(len(data) / 2): 101 | mass.append(data[i * 2]) 102 | intensity.append(data[i * 2 + 1]) 103 | 104 | if not len(mass) == len(intensity): 105 | error("len(mass) is not equal to len(intensity)") 106 | scan_list.append(Scan(mass, intensity)) 107 | 108 | # sanity check 109 | if not len(time_list) == len(scan_list): 110 | error("number of time points does not equal the number of scans") 111 | 112 | data = GCMS_data(time_list, scan_list) 113 | 114 | return data 115 | -------------------------------------------------------------------------------- /pyms/GCMS/IO/JCAMP/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions for I/O of JCAMP DX files 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | -------------------------------------------------------------------------------- /pyms/GCMS/IO/MZML/Function.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions for reading manufacturer specific ANDI-MS data files 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | import pymzml 26 | 27 | import numpy 28 | 29 | from pyms.GCMS.Class import GCMS_data 30 | from pyms.GCMS.Class import Scan 31 | from pyms.Utils.Utils import is_str 32 | from pyms.Utils.Error import error, stop 33 | 34 | try: 35 | from mpi4py import MPI 36 | except: 37 | pass 38 | 39 | def mzML_reader(file_name): 40 | 41 | """ 42 | @summary: A reader for mzML files, returns 43 | a GC-MS data object 44 | 45 | @param file_name: The name of the mzML file 46 | @type file_name: StringType 47 | 48 | @author: Sean O'Callaghan 49 | """ 50 | 51 | if not is_str(file_name): 52 | error("'file_name' must be a string") 53 | try: 54 | mzml_file = pymzml.run.Reader(file_name) 55 | except: 56 | error("Cannot open file '%s'" % file_name) 57 | 58 | try:# avoid printing from each rank 59 | comm = MPI.COMM_WORLD 60 | rank = comm.Get_rank() 61 | size = comm.Get_size() 62 | 63 | if rank ==0: 64 | file_names = [] 65 | 66 | for i in range(1,size): 67 | recv_buffer = "" 68 | file_n = comm.recv(recv_buffer, i) 69 | file_names.append(file_n) 70 | 71 | print " -> Reading mzML files:" 72 | print file_name 73 | for file_n in file_names: 74 | print file_n 75 | else: 76 | comm.send(file_name, dest=0) 77 | except: 78 | print " -> Reading mzML file '%s'" % (file_name) 79 | 80 | scan_list = [] 81 | time_list = [] 82 | 83 | for spectrum in mzml_file: 84 | mass_list = [] 85 | intensity_list = [] 86 | 87 | for mz,i in spectrum.peaks: 88 | mass_list.append(mz) 89 | intensity_list.append(i) 90 | 91 | #scan_list.append(Scan(mass_list, intensity_list)) 92 | for element in spectrum.xmlTree: 93 | # For some reason there are spectra with no time value, 94 | # Ignore these???????????? 95 | if element.get('accession') == "MS:1000016": #time value 96 | # We need time in seconds not minutes 97 | time_list.append(60*float(element.get('value'))) 98 | scan_list.append(Scan(mass_list, intensity_list)) 99 | 100 | #print "time:", len(time_list) 101 | #print "scan:", len(scan_list) 102 | 103 | data = GCMS_data(time_list, scan_list) 104 | 105 | return data 106 | 107 | -------------------------------------------------------------------------------- /pyms/GCMS/IO/MZML/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions for the manipulation of spectral libraries 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | -------------------------------------------------------------------------------- /pyms/GCMS/IO/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Input/output functions for GC-MS data files 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | -------------------------------------------------------------------------------- /pyms/GCMS/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Sub-package to handle raw data 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | -------------------------------------------------------------------------------- /pyms/Gapfill/Class.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Provides a class for handling Missing Peaks in an output file (i.e. area.csv) 3 | ''' 4 | import string 5 | 6 | class MissingPeak(object): 7 | ''' 8 | @summary: Class to encapsulate a peak object identified as missing in 9 | the output area matrix fom PyMS. 10 | 11 | @author: Jairus Bowne 12 | @author: Sean O'Callaghan 13 | ''' 14 | 15 | def __init__(self, ci, qual_ion_1, qual_ion_2, rt=0.0): 16 | ''' 17 | @summary: 18 | 19 | 20 | @param ci: Common ion for the peak across samples in an experiment 21 | @type ci: IntType 22 | 23 | @param UID: Unique IDentifier for peak, listing top ions, ratio and 24 | retention time for the peak across an experiment (row in area.csv) 25 | @type UID: StringType 26 | 27 | # TODO: Determine which of these will be used. 28 | @param SAMPLE_NAME: The 29 | sample name (filename) 30 | experiment code (group-number-filename) 31 | of 32 | the originating data file (column in area.csv) 33 | @type SAMPLE_NAME: StringType 34 | 35 | @param rt: Retention time of the peak. May or may not be set 36 | @type rt: FloatType 37 | ''' 38 | 39 | self.__ci = ci 40 | self.__qual_1 = qual_ion_1 41 | self.__qual_2 = qual_ion_2 42 | self.__rt = rt 43 | self.__exact_rt = 'na' 44 | self.__ci_area = 'na' 45 | 46 | 47 | def get_ci(self): 48 | ''' 49 | @summary: Returns the common ion for the peak object across an 50 | experiment 51 | 52 | @return: Common ion for the peak 53 | @rtype: IntType 54 | 55 | @author: Jairus Bowne 56 | ''' 57 | 58 | return self.__ci 59 | 60 | def get_qual_ion1(self): 61 | ''' 62 | @summary: Returns the top (most abundant) ion for the peak object 63 | 64 | @return: Most abundant ion 65 | @rtype: IntType 66 | 67 | @author: Jairus Bowne 68 | ''' 69 | 70 | ''' 71 | # TODO: Consider the abundance of ions when some (i.e. 73, 147) have 72 | been im.null_mass()'d. Is there a way to determine whether that 73 | has been done to generate the original peak list? 74 | ''' 75 | 76 | return self.__qual_1 77 | #return int(string.split(self.__UID, '-')[0]) 78 | 79 | def get_qual_ion2(self): 80 | ''' 81 | @summary: Returns the second most abundant ion for the peak object 82 | 83 | @return: Second most abundant ion 84 | @rtype: IntType 85 | 86 | @author: Jairus Bowne 87 | ''' 88 | 89 | return self.__qual_2 90 | #return int(string.split(self.__UID, '-')[1]) 91 | 92 | def set_ci_area(self, ci_area): 93 | """ 94 | @summary: sets the common ion area calculated by the gap fill 95 | algorithm 96 | @param ci_area: The area of the common ion 97 | @type ci_area: intType 98 | 99 | """ 100 | self.__ci_area = ci_area 101 | 102 | def get_ci_area(self): 103 | """ 104 | @summary: returns the common ion area 105 | 106 | @return ci_area: The area of the common ion 107 | @rtype: intType 108 | """ 109 | return self.__ci_area 110 | 111 | 112 | def get_rt(self): 113 | """ 114 | @summary: returns the retention time of the peak 115 | 116 | @return: the retention time of the peak 117 | @rtype: floatType 118 | """ 119 | return self.__rt 120 | 121 | def set_exact_rt(self, rt): 122 | """ 123 | @summary: sets the retention time of a peak 124 | 125 | @param rt: The retention time of the apex of the peak 126 | @type rt: floatType 127 | """ 128 | self.__exact_rt = rt 129 | 130 | def get_exact_rt(self): 131 | """ 132 | @summary: returns the retention time of the peak 133 | 134 | @return: the retention time of the peak 135 | @rtype: floatType 136 | """ 137 | return self.__exact_rt 138 | 139 | 140 | 141 | 142 | 143 | class Sample(object): 144 | """ 145 | @summary: A collection of MissingPeak objects 146 | 147 | @author: Sean O'Callaghan 148 | """ 149 | 150 | def __init__(self, sample_name, matrix_position): 151 | """ 152 | @summary: A collection of MissingPeak objects 153 | 154 | @param sample_name: the experiment code/name 155 | @type sample_name: stringType 156 | 157 | @param matrix_position: position along x-axis 158 | where sample is located 159 | @type matrix_position: intType 160 | 161 | """ 162 | self.__sample_name = sample_name 163 | self.__matrix_position = matrix_position 164 | 165 | self.__missing_peak_list = [] 166 | 167 | def get_name(self): 168 | """ 169 | @summary: Returns the sample name 170 | 171 | @return: The name of the sample 172 | @rtype: stringType 173 | """ 174 | return self.__sample_name 175 | 176 | def add_missing_peak(self, missing_peak): 177 | """ 178 | @summary: Add a new MissingPeak object to the Sample 179 | 180 | @param missing_peak: The missing peak object to be added 181 | @type missing_peak: pyms.GapFilling.Class.MissingPeak 182 | """ 183 | ### 184 | # Do some checking here!!! 185 | ### 186 | self.__missing_peak_list.append(missing_peak) 187 | 188 | def get_missing_peaks(self): 189 | """ 190 | @summary: Returns a list of the MissingPeak objects 191 | in the Sample object 192 | @return: list of pyms.GapFilling.Class.MissingPeak 193 | @rtype: listType 194 | """ 195 | return self.__missing_peak_list 196 | 197 | def get_mp_rt_area_dict(self): 198 | """ 199 | @summary: returns a dictionary containing rt:area pairs 200 | 201 | @return: a dict containing rt:area pairs 202 | @rtype: dictType 203 | """ 204 | rt_area_dict = {} 205 | for peak in self.__missing_peak_list: 206 | rt = peak.get_rt() 207 | area = peak.get_ci_area() 208 | 209 | rt_area_dict[rt] = area 210 | 211 | 212 | return rt_area_dict 213 | 214 | def get_mp_rt_exact_rt_dict(self): 215 | """ 216 | @summary:returns a dictionary containing average_rt:exact_rt pairs 217 | 218 | @return: a dict of average_rt:exact_rt pairs 219 | @rtype: dictType 220 | """ 221 | 222 | rt_exact_rt_dict = {} 223 | for peak in self.__missing_peak_list: 224 | rt = peak.get_rt() 225 | exact_rt = peak.get_exact_rt() 226 | 227 | rt_exact_rt_dict[rt] = exact_rt 228 | 229 | return rt_exact_rt_dict 230 | 231 | 232 | -------------------------------------------------------------------------------- /pyms/Gapfill/Function.py: -------------------------------------------------------------------------------- 1 | ''' 2 | @summary: # Functions to fill missing peak objects 3 | 4 | @author: Jairus Bowne 5 | @author: Sean O'Callaghan 6 | ''' 7 | 8 | import csv 9 | import string 10 | import sys, os, errno, string, numpy 11 | sys.path.append("/x/PyMS") 12 | 13 | from pyms.GCMS.IO.ANDI.Function import ANDI_reader 14 | from pyms.GCMS.IO.MZML.Function import mzML_reader 15 | from pyms.GCMS.Function import build_intensity_matrix_i 16 | from pyms.Noise.SavitzkyGolay import savitzky_golay 17 | from pyms.Baseline.TopHat import tophat 18 | from pyms.Deconvolution.BillerBiemann.Function import get_maxima_list_reduced 19 | from pyms.Peak.Function import ion_area 20 | 21 | from Class import MissingPeak, Sample 22 | 23 | 24 | # .csv reader (cloned from gcqc project) 25 | def file2matrix(filename): 26 | ''' 27 | @summary: Convert a .csv file to a matrix (list of lists) 28 | 29 | @param filename: Filename (.csv) to convert (area.csv, area_ci.csv) 30 | @type filename: StringType 31 | 32 | @return: Data matrix 33 | @rtype: ListType (List of lists) 34 | ''' 35 | 36 | # open(filename, 'rb')? Or unnecessary? 37 | with open(filename) as fp: 38 | reader = csv.reader(fp, delimiter=",",quotechar="\"") 39 | matrix = [] 40 | for row in reader: 41 | newrow = [] 42 | for each in row: 43 | try: 44 | each = float(each) 45 | except: 46 | pass 47 | newrow.append(each) 48 | matrix.append(newrow) 49 | 50 | return matrix 51 | 52 | 53 | def mp_finder(inputmatrix): 54 | """ 55 | @summary: setup sample objects with missing peak objects 56 | Finds the 'NA's in the transformed area_ci.csv file and 57 | makes Sample objects with them 58 | 59 | @param inputmatrix: Data matrix derived from the area_ci.csv file 60 | @type inputmatrix: listType 61 | 62 | @return: list of Sample objects 63 | @rtype: list of pyms.MissingPeak.Class.Sample 64 | 65 | """ 66 | 67 | sample_list = [] 68 | 69 | try: 70 | ci_pos = inputmatrix[0].index(' "Quant Ion"') 71 | except ValueError: 72 | ci_pos = inputmatrix[0].index('"Quant Ion"') 73 | 74 | uid_pos = inputmatrix[0].index('UID') 75 | 76 | 77 | # Set up the sample objects 78 | # All entries on line 1 beyond the Qual Ion position are sample names 79 | for i, sample_name in enumerate(inputmatrix[0][ci_pos:]): 80 | 81 | print sample_name 82 | sample = Sample(sample_name, i+3) #add 4 to allow for UID, RT,QualIon 83 | sample_list.append(sample) 84 | 85 | for line in inputmatrix[1:]: 86 | uid = line[uid_pos] 87 | common_ion = line[ci_pos] 88 | 89 | qual_ion_1 = uid.split("-")[0] 90 | qual_ion_2 = uid.split("-")[1] 91 | rt = uid.split("-")[-1] 92 | #print rt 93 | 94 | for i, area in enumerate(line[ci_pos:]): 95 | if area == 'NA': 96 | missing_peak = MissingPeak(common_ion, qual_ion_1, \ 97 | qual_ion_2, rt) 98 | sample_list[i].add_missing_peak(missing_peak) 99 | 100 | return sample_list 101 | 102 | 103 | def missing_peak_finder(sample, filename, points=13, null_ions=[73, 147],\ 104 | crop_ions=[50,540], threshold=1000, rt_window=1, filetype='mzml'): 105 | """ 106 | @summary: Integrates raw data around missing peak locations 107 | to fill in NAs in the data matrix 108 | 109 | @param sample: The sample object containing missing peaks 110 | @type sample: pyms.MissingPeak.Class.Sample 111 | 112 | @param andi_file: Name of the raw data file 113 | @type andi_file: stringType 114 | 115 | @param points: Peak finding - Peak if maxima over 'points' \ 116 | number of scans (Default 3) 117 | @type points: intType 118 | 119 | @param null_ions: Ions to be deleted in the matrix 120 | @type null_ions: listType 121 | 122 | @param crop_ions: Range of Ions to be considered 123 | @type crop_ions: listType 124 | 125 | @param threshold: Minimum intensity of IonChromatogram allowable to fill\ 126 | missing peak 127 | @type threshold: intType 128 | 129 | @param rt_window: Window in seconds around average RT to look for \ 130 | missing peak 131 | @type rt_window: floatType 132 | 133 | @param filetype: either mzml or netcdf 134 | @type filetype: stringType 135 | 136 | @author: Sean O'Callaghan 137 | """ 138 | 139 | ### some error checks on null and crop ions 140 | 141 | ### a for root,files,dirs in os.path.walk(): loop 142 | print "Sample:", sample.get_name(), "File:", filename 143 | 144 | if filetype.lower() == 'cdf': 145 | data = ANDI_reader(filename) 146 | elif filetype.lower() == 'mzml': 147 | data = mzML_reader(filename) 148 | else: 149 | print "file type not valid" 150 | 151 | 152 | # build integer intensity matrix 153 | im = build_intensity_matrix_i(data) 154 | 155 | for null_ion in null_ions: 156 | im.null_mass(null_ion) 157 | 158 | im.crop_mass(crop_ions[0], crop_ions[1]) 159 | 160 | # get the size of the intensity matrix 161 | n_scan, n_mz = im.get_size() 162 | 163 | # smooth data 164 | for ii in range(n_mz): 165 | ic = im.get_ic_at_index(ii) 166 | ic1 = savitzky_golay(ic, points) 167 | ic_smooth = savitzky_golay(ic1, points) 168 | ic_base = tophat(ic_smooth, struct="1.5m") 169 | im.set_ic_at_index(ii, ic_base) 170 | 171 | for mp in sample.get_missing_peaks(): 172 | 173 | mp_rt = mp.get_rt() 174 | common_ion = mp.get_ci() 175 | qual_ion_1 = float(mp.get_qual_ion1()) 176 | qual_ion_2 = float(mp.get_qual_ion2()) 177 | 178 | 179 | ci_ion_chrom = im.get_ic_at_mass(common_ion) 180 | print "ci = ",common_ion 181 | qi1_ion_chrom = im.get_ic_at_mass(qual_ion_1) 182 | print "qi1 = ", qual_ion_1 183 | qi2_ion_chrom = im.get_ic_at_mass(qual_ion_2) 184 | print "qi2 = ", qual_ion_2 185 | ###### 186 | # Integrate the CI around that particular RT 187 | ####### 188 | 189 | #Convert time to points 190 | # How long between scans? 191 | 192 | points_1 = ci_ion_chrom.get_index_at_time(float(mp_rt)) 193 | points_2 = ci_ion_chrom.get_index_at_time(float(mp_rt)-rt_window) 194 | print "rt_window = ", points_1 - points_2 195 | 196 | rt_window_points = points_1 - points_2 197 | 198 | maxima_list = get_maxima_list_reduced(ci_ion_chrom, mp_rt, \ 199 | rt_window_points) 200 | 201 | large_peaks = [] 202 | 203 | for rt, intens in maxima_list: 204 | if intens > threshold: 205 | q1_index = qi1_ion_chrom.get_index_at_time(rt) 206 | q2_index = qi2_ion_chrom.get_index_at_time(rt) 207 | 208 | q1_intensity = qi1_ion_chrom.get_intensity_at_index(q1_index) 209 | q2_intensity = qi2_ion_chrom.get_intensity_at_index(q2_index) 210 | 211 | if q1_intensity > threshold/2 and q2_intensity > threshold/2: 212 | large_peaks.append([rt, intens]) 213 | 214 | print('found %d peaks above threshold'%len(large_peaks)) 215 | 216 | areas = [] 217 | for peak in large_peaks: 218 | apex = ci_ion_chrom.get_index_at_time(peak[0]) 219 | ia = ci_ion_chrom.get_intensity_array().tolist() 220 | area, left, right, l_share, r_share = ion_area(ia, apex, 0) 221 | areas.append(area) 222 | ######################## 223 | areas.sort() 224 | if len(areas)>0: 225 | biggest_area = areas[-1] 226 | mp.set_ci_area(biggest_area) 227 | mp.set_exact_rt("{:.3f}".format(float(mp_rt)/60.0)) 228 | print "found area:", biggest_area, "at rt:", mp_rt 229 | else: 230 | print "Missing peak at rt = ", mp_rt 231 | mp.set_ci_area('na') 232 | 233 | def transposed(lists): 234 | """ 235 | @summary: transposes a list of lists 236 | 237 | @param lists: the list of lists to be transposed 238 | @type lists: listType 239 | """ 240 | 241 | if not lists: return [] 242 | return map(lambda *row: list(row), *lists) 243 | 244 | 245 | def write_filled_csv(sample_list, area_file, filled_area_file): 246 | """ 247 | @summary: creates a new area_ci.csv file, replacing NAs with 248 | values from the sample_list objects where possible 249 | @param sample_list: A list of sample objects 250 | @type sample_list: list of Class.Sample 251 | 252 | @param area_file: the file 'area_ci.csv' from PyMS output 253 | @type area_file: stringType 254 | 255 | @param filled_area_file: the new output file which has NA 256 | values replaced 257 | @type filled_area_file: stringType 258 | """ 259 | 260 | old_matrix = file2matrix(area_file) 261 | 262 | 263 | #Invert it to be a little more efficent 264 | invert_old_matrix = zip(*old_matrix) 265 | #print invert_old_matrix[0:5] 266 | 267 | uid_list = invert_old_matrix[0][1:] 268 | rt_list = [] 269 | for uid in uid_list: 270 | rt = uid.split('-')[-1] 271 | rt_list.append(rt) 272 | 273 | 274 | #print rt_list 275 | 276 | #start setting up the output file 277 | invert_new_matrix = [] 278 | for line in invert_old_matrix[0:2]: 279 | invert_new_matrix.append(line) 280 | 281 | for line in invert_old_matrix[3:]: 282 | sample_name = line[0] 283 | 284 | new_line = [] 285 | new_line.append(sample_name) 286 | for sample in sample_list: 287 | if sample_name in sample.get_name(): 288 | rt_area_dict = sample.get_mp_rt_area_dict() 289 | #print rt_area_dict 290 | 291 | for i, part in enumerate(line[1:]): 292 | #print part 293 | if part == 'NA': 294 | try: 295 | area = rt_area_dict[str(rt_list[i])] 296 | new_line.append(area) 297 | except(KeyError): 298 | pass 299 | #print 'missing peak not found for rt =', rt_list[i], \ 300 | # "in sample:", sample_name 301 | 302 | else: 303 | new_line.append(part) 304 | 305 | invert_new_matrix.append(new_line) 306 | #print invert_new_matrix 307 | 308 | #print len(invert_new_matrix[0]), len(invert_new_matrix) 309 | 310 | 311 | fp_new = open(filled_area_file, 'w') 312 | 313 | 314 | # new_matrix = numpy.empty(matrix_size) 315 | new_matrix = transposed(invert_new_matrix) 316 | 317 | for i, line in enumerate(new_matrix): 318 | for j, part in enumerate(line): 319 | fp_new.write(str(part) +',') 320 | fp_new.write("\n") 321 | 322 | fp_new.close() 323 | 324 | def write_filled_rt_csv(sample_list, rt_file, filled_rt_file): 325 | """ 326 | @summary: creates a new rt.csv file, replacing NAs with 327 | values from the sample_list objects where possible 328 | @param sample_list: A list of sample objects 329 | @type sample_list: list of Class.Sample 330 | 331 | @param area_file: the file 'rt.csv' from PyMS output 332 | @type area_file: stringType 333 | 334 | @param filled_area_file: the new output file which has NA 335 | values replaced 336 | @type filled_area_file: stringType 337 | """ 338 | 339 | old_matrix = file2matrix(rt_file) 340 | 341 | 342 | #Invert it to be a little more efficent 343 | invert_old_matrix = zip(*old_matrix) 344 | #print invert_old_matrix[0:5][0:5] 345 | 346 | uid_list = invert_old_matrix[0][1:] 347 | rt_list = [] 348 | for uid in uid_list: 349 | rt = uid.split('-')[-1] 350 | rt_list.append(rt) 351 | 352 | 353 | #print rt_list 354 | 355 | #start setting up the output file 356 | invert_new_matrix = [] 357 | for line in invert_old_matrix[0:1]: 358 | invert_new_matrix.append(line) 359 | 360 | for line in invert_old_matrix[2:]: 361 | sample_name = line[0] 362 | #print "Sample Name:", sample_name 363 | 364 | new_line = [] 365 | new_line.append(sample_name) 366 | for sample in sample_list: 367 | #print "sample name:", sample.get_name() 368 | if sample_name in sample.get_name(): 369 | 370 | rt_exact_rt_dict = sample.get_mp_rt_exact_rt_dict() 371 | #print "Got it" 372 | #print rt_area_dict 373 | 374 | for i, part in enumerate(line[1:]): 375 | #print part 376 | if part == 'NA': 377 | try: 378 | rt_new = rt_exact_rt_dict[str(rt_list[i])] 379 | new_line.append(rt_new) 380 | except(KeyError): 381 | pass 382 | #print 'missing peak not found for rt =', rt_list[i], \ 383 | # "in sample:", sample_name 384 | 385 | else: 386 | new_line.append(part) 387 | 388 | invert_new_matrix.append(new_line) 389 | #print invert_new_matrix 390 | 391 | #print len(invert_new_matrix[0]), len(invert_new_matrix) 392 | 393 | 394 | fp_new = open(filled_rt_file, 'w') 395 | 396 | 397 | # new_matrix = numpy.empty(matrix_size) 398 | new_matrix = transposed(invert_new_matrix) 399 | 400 | for i, line in enumerate(new_matrix): 401 | for j, part in enumerate(line): 402 | fp_new.write(str(part) +',') 403 | fp_new.write("\n") 404 | 405 | fp_new.close() 406 | 407 | 408 | 409 | 410 | 411 | 412 | 413 | 414 | -------------------------------------------------------------------------------- /pyms/Gapfill/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Gap Filling Routines 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2014 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | -------------------------------------------------------------------------------- /pyms/Noise/Analysis.py: -------------------------------------------------------------------------------- 1 | """ 2 | Noise analysis functions 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | import random, math 26 | 27 | from pyms.Utils.Error import error 28 | from pyms.GCMS.Function import is_ionchromatogram 29 | from pyms.Utils.Time import window_sele_points 30 | from pyms.Utils.Math import MAD 31 | 32 | _DEFAULT_WINDOW = 256 33 | _DEFAULT_N_WINDOWS = 1024 34 | 35 | def window_analyzer(ic, window=_DEFAULT_WINDOW, n_windows=_DEFAULT_N_WINDOWS, rand_seed=None ): 36 | 37 | """ 38 | @summary: A simple estimator of the signal noise based on randomly 39 | placed windows and median absolute deviation 40 | 41 | The noise value is estimated by repeatedly and picking random 42 | windows (of a specified width) and calculating median absolute 43 | deviation (MAD). The noise estimate is given by the minimum MAD. 44 | 45 | @param ic: An IonChromatogram object 46 | @type ic: pyms.IO.Class.IonCromatogram 47 | @param window: Window width selection 48 | @type window: IntType or StringType 49 | @param n_windows: The number of windows to calculate 50 | @type n_windows: IntType 51 | @param rand_seed: Random seed generator 52 | @type rand_seed: IntType 53 | 54 | @return: The noise estimate 55 | @rtype: FloatType 56 | 57 | @author: Vladimir Likic 58 | """ 59 | 60 | if not is_ionchromatogram(ic): 61 | error("argument must be an IonChromatogram object") 62 | 63 | ia = ic.get_intensity_array() # fetch the intensities 64 | 65 | # create an instance of the Random class 66 | if rand_seed != None: 67 | generator = random.Random(rand_seed) 68 | else: 69 | generator = random.Random() 70 | 71 | window_pts = window_sele_points(ic, window) 72 | 73 | maxi = ia.size - window_pts 74 | noise_level = math.fabs(ia.max()-ia.min()) 75 | best_window_pos = None 76 | seen_positions = [] 77 | 78 | cntr = 0 79 | 80 | while cntr < n_windows: 81 | # generator.randrange(): last point not included in range 82 | try_pos = generator.randrange(0, maxi+1) 83 | # only process the window if not analyzed previously 84 | if try_pos not in seen_positions: 85 | end_slice = try_pos + window_pts 86 | slice = ia[try_pos:end_slice] 87 | crnt_mad = MAD(slice) 88 | if crnt_mad < noise_level: 89 | noise_level = crnt_mad 90 | best_window_pos = try_pos 91 | cntr = cntr + 1 92 | seen_positions.append(try_pos) 93 | 94 | return noise_level 95 | 96 | -------------------------------------------------------------------------------- /pyms/Noise/SavitzkyGolay.py: -------------------------------------------------------------------------------- 1 | """ 2 | Savitzky-Golay noise filter 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | import numpy 26 | import copy 27 | 28 | from pyms.GCMS.Function import is_ionchromatogram, ic_window_points 29 | from pyms.Utils.Utils import is_int 30 | 31 | __DEFAULT_WINDOW = 7 32 | __DEFAULT_POLYNOMIAL_DEGREE = 2 33 | 34 | def savitzky_golay(ic, window=__DEFAULT_WINDOW, \ 35 | degree=__DEFAULT_POLYNOMIAL_DEGREE): 36 | 37 | """ 38 | @summary: Applies Savitzky-Golay filter on ion chromatogram 39 | 40 | @param ic: The input ion chromatogram 41 | @type ic: pyms.GCMS.Class.IonChromatogram 42 | @param window: The window selection parameter. This can be an integer 43 | or time string. If integer, taken as the number of points. If a 44 | string, must of the form "s" or "m", specifying 45 | a time in seconds or minutes, respectively 46 | @type window: IntType or StringType 47 | @param degree: degree of the fitting polynomial for the Savitzky-Golay 48 | filter 49 | @type degree: IntType 50 | 51 | @return: Smoothed ion chromatogram 52 | @rtype: pyms.GCMS.Class.IonChromatogram 53 | 54 | @author: Uwe Schmitt 55 | @author: Vladimir Likic 56 | """ 57 | 58 | if not is_ionchromatogram(ic): 59 | error("'ic' not an IonChromatogram object") 60 | 61 | if not is_int(degree): 62 | error("'degree' not an integer") 63 | 64 | ia = ic.get_intensity_array() 65 | 66 | wing_length = ic_window_points(ic, window, half_window=True) 67 | 68 | #print " -> Applying Savitzky-Golay filter" 69 | #print " Window width (points): %d" % ( 2*wing_length+1 ) 70 | #print " Polynomial degree: %d" % ( degree ) 71 | 72 | coeff = __calc_coeff(wing_length, degree) 73 | ia_denoise = __smooth(ia, coeff) 74 | 75 | ic_denoise = copy.deepcopy(ic) 76 | ic_denoise.set_intensity_array(ia_denoise) 77 | 78 | return ic_denoise 79 | 80 | def savitzky_golay_im(im, window=__DEFAULT_WINDOW, \ 81 | degree=__DEFAULT_POLYNOMIAL_DEGREE): 82 | """ 83 | @summary: Applies Savitzky-Golay filter on Intensity 84 | Matrix 85 | 86 | Simply wraps around the Savitzky Golay 87 | function above 88 | 89 | @param im: The input IntensityMatrix 90 | @type im: pyms.GCMS.Class.IntensityMatrix 91 | @param window: The window selection parameter. 92 | @type window: IntType or StringType 93 | 94 | @param degree: degree of the fitting polynomial for the Savitzky-Golay 95 | filter 96 | @type degree: IntType 97 | 98 | @return: Smoothed IntensityMatrix 99 | @rtype: pyms.GCMS.Class.IntensityMatrix 100 | 101 | @author: Sean O'Callaghan 102 | @author: Vladimir Likic 103 | """ 104 | 105 | n_scan, n_mz = im.get_size() 106 | 107 | im_smooth = copy.deepcopy(im) 108 | 109 | for ii in range(n_mz): 110 | ic = im_smooth.get_ic_at_index(ii) 111 | ic_smooth = savitzky_golay(ic, window, degree) 112 | im_smooth.set_ic_at_index(ii, ic_smooth) 113 | 114 | return im_smooth 115 | 116 | def __calc_coeff(num_points, pol_degree, diff_order=0): 117 | 118 | """ 119 | @summary: Calculates filter coefficients for symmetric savitzky-golay 120 | filter 121 | 122 | See: http://www.nrbook.com/a/bookcpdf/c14-8.pdf 123 | 124 | @param num_points: Means that 2*num_points+1 values contribute to 125 | the smoother 126 | @type num_points: IntType 127 | @param pol_degree: The degree of fitting polynomial 128 | @type pol_degree: IntType 129 | @param diff_order: The degree of implicit differentiation. 0 means 130 | that filter results in smoothing of function, 1 means that filter 131 | results in smoothing the first derivative of function, and so on. 132 | Always use 0 133 | 134 | @return: Filter coefficients 135 | @rtype: numpy.ndarray 136 | 137 | @author: Uwe Schmitt 138 | @copyright: Uwe Schmitt 139 | """ 140 | 141 | # setup normal matrix 142 | A = numpy.zeros((2*num_points+1, pol_degree+1), float) 143 | for i in range(2*num_points+1): 144 | for j in range(pol_degree+1): 145 | A[i,j] = pow(i-num_points, j) 146 | 147 | # calculate diff_order-th row of inv(A^T A) 148 | ATA = numpy.dot(A.transpose(), A) 149 | rhs = numpy.zeros((pol_degree+1,), float) 150 | rhs[diff_order] = 1 151 | D = numpy.linalg.cholesky(ATA) 152 | wvec = __resub(D, rhs) 153 | 154 | # calculate filter-coefficients 155 | coeff = numpy.zeros((2*num_points+1,), float) 156 | for n in range(-num_points, num_points+1): 157 | x = 0.0 158 | for m in range(pol_degree+1): 159 | x += wvec[m]*pow(n, m) 160 | coeff[n+num_points] = x 161 | 162 | return coeff 163 | 164 | def __resub(D, rhs): 165 | 166 | """ 167 | @summary: Solves D D^T = rhs by resubstitution 168 | 169 | D is lower triangle-matrix from cholesky-decomposition 170 | 171 | @author: Uwe Schmitt 172 | @copyright: Uwe Schmitt 173 | """ 174 | 175 | M = D.shape[0] 176 | x1= numpy.zeros((M,),float) 177 | x2= numpy.zeros((M,),float) 178 | 179 | # resub step 1 180 | for l in range(M): 181 | sum = rhs[l] 182 | for n in range(l): 183 | sum -= D[l,n]*x1[n] 184 | x1[l] = sum/D[l,l] 185 | 186 | # resub step 2 187 | for l in range(M-1,-1,-1): 188 | sum = x1[l] 189 | for n in range(l+1,M): 190 | sum -= D[n,l]*x2[n] 191 | x2[l] = sum/D[l,l] 192 | 193 | return x2 194 | 195 | def __smooth(signal, coeff): 196 | 197 | """ 198 | @summary: Applies coefficients calculated by __calc_coeff() 199 | to signal 200 | 201 | @author: Uwe Schmitt 202 | @copyright: Uwe Schmitt 203 | """ 204 | 205 | N = numpy.size(coeff-1)/2 206 | res = numpy.convolve(signal, coeff) 207 | return res[N:-N] 208 | 209 | -------------------------------------------------------------------------------- /pyms/Noise/Window.py: -------------------------------------------------------------------------------- 1 | """ 2 | Moving window noise filter 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | import copy 26 | import numpy 27 | 28 | from pyms.GCMS.Function import is_ionchromatogram, ic_window_points 29 | from pyms.Utils.Math import median 30 | 31 | __DEFAULT_WINDOW = 3 32 | 33 | def window_smooth(ic, window=__DEFAULT_WINDOW, median=False): 34 | 35 | """ 36 | @summary: Applies window smoothing on ion chromatogram 37 | 38 | @param ic: The input ion chromatogram 39 | @type ic: pyms.GCMS.Class.IonChromatogram 40 | @param window: The window selection parameter. This can be an integer 41 | or time string. If integer, taken as the number of points. If a 42 | string, must of the form "s" or "m", specifying 43 | a time in seconds or minutes, respectively 44 | @type window: IntType or StringType 45 | @param median: An indicator whether the mean or median window smoothing 46 | to be used 47 | @type median: Booleantype 48 | 49 | @return: Smoothed ion chromatogram 50 | @rtype: pyms.GCMS.Class.IonChromatogram 51 | 52 | @author: Vladimir Likic 53 | """ 54 | 55 | if not is_ionchromatogram(ic): 56 | error("'ic' not an IonChromatogram object") 57 | 58 | ia = ic.get_intensity_array() 59 | 60 | wing_length = ic_window_points(ic, window, half_window=True) 61 | 62 | if median: 63 | ia_denoise = __median_window(ia, wing_length) 64 | else: 65 | ia_denoise = __mean_window(ia, wing_length) 66 | 67 | ic_denoise = copy.deepcopy(ic) 68 | ic_denoise.set_intensity_array(ia_denoise) 69 | 70 | return ic_denoise 71 | 72 | def window_smooth_im(im, window=__DEFAULT_WINDOW, median=False): 73 | """ 74 | @summary: Applies window smoothing on Intensity Matrix 75 | 76 | Simply wraps around the window smooth function above 77 | 78 | @param im: The input Intensity Matrix 79 | @type im: pyms.GCMS.Class.IntensityMatrix 80 | @param window: The window selection parameter. 81 | @type window: IntType or StringType 82 | 83 | @param median: An indicator whether the mean or median window smoothing 84 | to be used 85 | @type median: Booleantype 86 | 87 | @return: Smoothed Intensity Matrix 88 | @rtype: pyms.GCMS.Class.IntensityMatrix 89 | 90 | @author: Sean O'Callaghan 91 | @author: Vladimir Likic 92 | """ 93 | 94 | n_scan, n_mz = im.get_size() 95 | 96 | im_smooth = copy.deepcopy(im) 97 | 98 | for ii in range(n_mz): 99 | ic = im_smooth.get_ic_at_index(ii) 100 | ic_smooth = window_smooth(ic, window, median) 101 | im_smooth.set_ic_at_index(ii, ic_smooth) 102 | 103 | return im_smooth 104 | 105 | def __mean_window(ia, wing_length): 106 | 107 | """ 108 | @summary: Applies mean-window averaging on the array of intensities. 109 | 110 | @param ia: Intensity array 111 | @type ia: nympy.core.ndarray 112 | @param wing_length: An integer value representing the number of 113 | points on either side of a point in the ion chromatogram 114 | @type wing_length: IntType 115 | 116 | @return: Smoothed intensity array 117 | @rtype: nympy.core.ndarray 118 | 119 | @author: Vladimir Likic 120 | """ 121 | 122 | #print " -> Window smoothing (mean): the wing is %d point(s)" % (wing_length) 123 | 124 | ia_denoise = numpy.repeat([0], ia.size) 125 | 126 | index = 0 127 | end = ia.size - 1 128 | 129 | while index <= end: 130 | left = index - wing_length 131 | right = index + wing_length + 1 132 | if left < 0: left = 0 133 | slice = ia[left:right] 134 | ia_denoise[index] = slice.mean() 135 | index = index + 1 136 | 137 | return ia_denoise 138 | 139 | def __median_window(ia, wing_length): 140 | 141 | """ 142 | @summary: Applies median-window averaging on the array of intensities. 143 | 144 | @param ia: Intensity array 145 | @type ia: nympy.core.ndarray 146 | @param wing_length: An integer value representing the number of 147 | points on either side of a point in the ion chromatogram 148 | @type wing_length: IntType 149 | 150 | @return: Smoothed intensity array 151 | @rtype: nympy.core.ndarray 152 | 153 | @author: Vladimir Likic 154 | """ 155 | 156 | #print " -> Window smoothing (median): the wing is %d point(s)" % (wing_length) 157 | 158 | ia_denoise = numpy.repeat([0], ia.size) 159 | 160 | index = 0 161 | end = ia.size - 1 162 | 163 | while index <= end: 164 | left = index - wing_length 165 | right = index + wing_length + 1 166 | if left < 0: left = 0 167 | slice = ia[left:right] 168 | ia_denoise[index] = median(slice) 169 | index = index + 1 170 | 171 | return ia_denoise 172 | 173 | -------------------------------------------------------------------------------- /pyms/Noise/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Noise processing functions 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | -------------------------------------------------------------------------------- /pyms/Peak/Function.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions related to Peak modification 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | import numpy 26 | import copy 27 | 28 | #from pyms.Utils.Error import error 29 | from pyms.Peak.Class import Peak 30 | from pyms.Utils.Utils import is_str, is_list 31 | from pyms.Utils.Math import median 32 | from pyms.GCMS.Class import MassSpectrum 33 | 34 | # If psyco is installed, use it to speed up running time 35 | try: 36 | import psyco 37 | psyco.full() 38 | except: 39 | pass 40 | 41 | def peak_sum_area(im, peak, single_ion=False, max_bound=0): 42 | 43 | """ 44 | @Summary: Calculate the sum of the raw ion areas based on 45 | detected boundaries. 46 | 47 | @param im: The originating IntensityMatrix object 48 | @type im: pyms.GCMS.Class.IntensityMatrix 49 | @param peak: The Peak object 50 | @type peak: pyms.Peak.Class.Peak 51 | @param single_ion: whether single ion areas should be returned 52 | @type singe_ion: BooleanType 53 | @param max_bound: Optional value to limit size of detected bound 54 | @type max_bound: IntType 55 | 56 | @return: Sum of peak apex ions in detected bounds 57 | @rtype: FloatType 58 | 59 | @author: Andrew Isaac 60 | @author: Sean O'Callaghan 61 | """ 62 | 63 | sum_area = 0 64 | # Use internal values (not copy) 65 | #mat = im.get_matrix_list() 66 | mat = im.intensity_matrix 67 | ms = peak.get_mass_spectrum() 68 | rt = peak.get_rt() 69 | apex = im.get_index_at_time(rt) 70 | 71 | # get peak masses with non-zero intensity 72 | mass_ii = [ ii for ii in xrange(len(ms.mass_list)) \ 73 | if ms.mass_spec[ii] > 0 ] 74 | 75 | area_dict = {} 76 | # get stats on boundaries 77 | for ii in mass_ii: 78 | # get ion chromatogram as list 79 | ia = [ mat[scan][ii] for scan in xrange(len(mat)) ] 80 | area, left, right, l_share, r_share = ion_area(ia, apex, max_bound) 81 | # need actual mass for single ion areas 82 | actual_mass = ms.mass_list[ii] 83 | area_dict[actual_mass] = area 84 | sum_area += area 85 | 86 | if single_ion == True: 87 | return sum_area, area_dict 88 | else: 89 | return sum_area 90 | 91 | def peak_top_ion_areas(im, peak, n_top_ions = 5, max_bound=0): 92 | """ 93 | @summary: Calculate and return the ion areas of the five most 94 | abundant ions in the peak 95 | 96 | @param im: The originating IntensityMatrix object 97 | @type im: pyms.GCMS.Class.IntensityMatrix 98 | @param peak: The Peak object 99 | @type peak: pyms.Peak.Class.Peak 100 | 101 | @return: dictionary of ion:ion_area pairs 102 | @rtype: dictType 103 | 104 | @author: Sean O'Callaghan 105 | 106 | """ 107 | #ms = peak.get_mass_spectrum() 108 | rt = peak.get_rt() 109 | apex = im.get_index_at_time(rt) 110 | 111 | ion_areas = {} # Dictionary to store ion:ion_area pairs 112 | 113 | 114 | top_ions = top_ions_v2(peak, n_top_ions) 115 | #print top_ions 116 | 117 | for ion in top_ions: 118 | ion_chrom = im.get_ic_at_mass(ion) 119 | # need ia as a list not numpy array so use .tolist() 120 | ia = ion_chrom.get_intensity_array().tolist() 121 | area, left, right, l_share, r_share = ion_area(ia, apex, max_bound) 122 | # need actual mass for single ion areas 123 | ion_areas[ion] = area 124 | 125 | 126 | return ion_areas 127 | 128 | 129 | 130 | def top_ions_v1(peak, num_ions=5): 131 | 132 | """ 133 | @summary: Computes the highest 5 intensity ions 134 | 135 | @param peak: the peak to be processed 136 | @type peak: pyms.Peak.Class.Peak 137 | 138 | @return: a list of the top 5 highest intensity ions 139 | @rtype listType 140 | 141 | @author: Sean O'Callaghan 142 | """ 143 | 144 | intensity_list = peak.get_mass_spectrum().mass_spec 145 | mass_list = peak.get_mass_spectrum().mass_list 146 | 147 | intensity_list_sorted = copy.deepcopy(intensity_list) 148 | intensity_list_sorted.sort() 149 | 150 | 151 | top_ions = [] 152 | top_intensities = intensity_list_sorted[-num_ions:] 153 | 154 | for i in range(len(intensity_list)): 155 | if intensity_list[i] in top_intensities: 156 | top_ions.append(mass_list[i]) 157 | 158 | return top_ions 159 | 160 | def top_ions_v2(peak, num_ions=5): 161 | """ 162 | @summary: Computes the highest #num_ions intensity ions 163 | 164 | @param peak: the peak to be processed 165 | @type peak: pyms.Peak.Class.Peak 166 | 167 | @param num_ions: the number of ions to be recorded 168 | @type: num_ions: intType 169 | 170 | @return: a list of the #num_ions highest intensity ions 171 | @rtype listType 172 | 173 | @author: Sean O'Callaghan 174 | """ 175 | 176 | intensity_list = peak.get_mass_spectrum().mass_spec 177 | mass_list = peak.get_mass_spectrum().mass_list 178 | 179 | ic_tuple = zip(intensity_list, mass_list) 180 | 181 | sorted_ic = sorted(ic_tuple) 182 | top_ic = sorted_ic[-num_ions:] 183 | 184 | top_ions = [] 185 | 186 | for entry in top_ic: 187 | top_ions.append(entry[1]) 188 | 189 | return top_ions 190 | 191 | 192 | def ion_area(ia, apex, max_bound=0, tol=0.5): 193 | 194 | """ 195 | @Summary: Find bounds of peak by summing intensities until change in sum is 196 | less than 'tol' percent of the current area. 197 | 198 | @param ia: List of intensities for a given mass 199 | @type ia: ListType 200 | @param apex: Index of the peak apex. 201 | @type apex: IntType 202 | @param max_bound: Optional value to limit size of detected bound 203 | @type max_bound: IntType 204 | @param tol: Percentage tolerance of added area to current area. 205 | @type tol: FloatType 206 | 207 | @return: Area, left and right boundary offset, shared left, shared right 208 | @rtype: TupleType 209 | 210 | @author: Andrew Isaac 211 | """ 212 | 213 | # Left area 214 | lhs = ia[:apex+1] 215 | lhs.reverse() # reverse, as search to right is bounds safe 216 | l_area, left, l_share = half_area(lhs, max_bound, tol) 217 | 218 | # Right area 219 | rhs = ia[apex:] 220 | r_area, right, r_share = half_area(rhs, max_bound, tol) 221 | r_area -= ia[apex] # counted apex twice for tollerence, now ignore 222 | 223 | # Put it all together 224 | return l_area+r_area, left, right, l_share, r_share 225 | 226 | def half_area(ia, max_bound=0, tol=0.5): 227 | 228 | """ 229 | @Summary: Find bound of peak by summing intensities until change in sum is 230 | less than 'tol' percent of the current area. 231 | 232 | @param ia: List of intensities from Peak apex for a given mass 233 | @type ia: ListType 234 | @param max_bound: Optional value to limit size of detected bound 235 | @type max_bound: IntType 236 | @param tol: Percentage tolerance of added area to current area. 237 | @type tol: FloatType 238 | 239 | @return: Half peak area, boundary offset, shared (True if shared ion) 240 | @rtype: TupleType 241 | 242 | @author: Andrew Isaac 243 | """ 244 | 245 | tol = tol/200.0 # halve and convert from percent 246 | 247 | # Default number of points to sum new area across, for smoothing 248 | wide = 3 249 | 250 | # start at 0, compare average value of 'wide' points to the right, 251 | # centre 'wide' points on edge point, 252 | # and keep moving right until: 253 | # i) tollerence reached 254 | # ii) edge area starts increasing 255 | # iii) bound reached 256 | 257 | # 258 | # initialise areas and bounds 259 | shared = False 260 | area = ia[0] 261 | edge = float(sum(ia[0:wide]))/wide 262 | old_edge = 2 * edge # bigger than expected edge 263 | index = 1 264 | if max_bound < 1: 265 | limit = len(ia) 266 | else: 267 | limit = min(max_bound+1, len(ia)) 268 | while edge > area * tol and edge < old_edge and index < limit: 269 | old_edge = edge 270 | area += ia[index] 271 | edge = float(sum(ia[index:index+wide]))/wide # bounds safe 272 | index += 1 273 | if edge >= old_edge: 274 | shared = True 275 | index -= 1 276 | 277 | return area, index, shared 278 | 279 | def median_bounds(im, peak, shared=True): 280 | 281 | """ 282 | @Summary: Calculates the median of the left and right bounds found 283 | for each apexing peak mass 284 | 285 | @param im: The originating IntensityMatrix object 286 | @type im: pyms.GCMS.Class.IntensityMatrix 287 | @param peak: The Peak object 288 | @type peak: pyms.Peak.Class.Peak 289 | @param shared: Include shared ions shared with neighbouring peak 290 | @type shared: BooleanType 291 | 292 | @return: median left and right boundary offset in points 293 | @rtype: TupleType 294 | 295 | @author: Andrew Isaac 296 | """ 297 | 298 | mat = im.get_matrix_list() 299 | ms = peak.get_mass_spectrum() 300 | rt = peak.get_rt() 301 | apex = im.get_index_at_time(rt) 302 | # check if RT based index is simmilar to stored index 303 | tmp = peak.get_pt_bounds() 304 | if is_list(tmp) and apex-1 < tmp[1] and tmp[1] < apex+1: 305 | apex = tmp[1] 306 | 307 | # get peak masses with non-zero intensity 308 | mass_ii = [ ii for ii in xrange(len(ms.mass_list)) \ 309 | if ms.mass_spec[ii] > 0 ] 310 | 311 | # get stats on boundaries 312 | left_list = [] 313 | right_list = [] 314 | for ii in mass_ii: 315 | # get ion chromatogram as list 316 | ia = [ mat[scan][ii] for scan in xrange(len(mat)) ] 317 | area, left, right, l_share, r_share = ion_area(ia, apex) 318 | if shared or not l_share: 319 | left_list.append(left) 320 | if shared or not r_share: 321 | right_list.append(right) 322 | 323 | # return medians 324 | # NB if shared=True, lists maybe empty 325 | l_med = 0 326 | r_med = 0 327 | if len(left_list) > 0: 328 | l_med = median(left_list) 329 | if len(right_list) > 0: 330 | r_med = median(right_list) 331 | 332 | return l_med, r_med 333 | -------------------------------------------------------------------------------- /pyms/Peak/IO.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions related to storing and loading a list of Peak objects 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | import string, cPickle 26 | 27 | from pyms.Utils.Error import error 28 | from pyms.Peak.Class import Peak 29 | from pyms.Utils.Utils import is_str, is_list 30 | 31 | def store_peaks(peak_list, file_name): 32 | 33 | """ 34 | @summary:Store the list of peak objects 35 | 36 | @param peak_list: A list of peak objects 37 | @type peak_list: pyms.Peaks.Class.Peak 38 | @param file_name: File name to store peak list 39 | @type file_name: StringType 40 | 41 | @author: Andrew Isaac 42 | """ 43 | 44 | if not is_str(file_name): 45 | error("'file_name' must be a string") 46 | 47 | fp = open(file_name,'w') 48 | cPickle.dump(peak_list, fp, 1) 49 | fp.close() 50 | 51 | def load_peaks(file_name): 52 | 53 | """ 54 | @summary: Loads the peak_list stored with 'store_peaks' 55 | 56 | @param file_name: File name of peak list 57 | @type file_name: StringType 58 | 59 | @return: The list of Peak objects 60 | @rtype: ListType 61 | 62 | @author: Andrew Isaac 63 | """ 64 | 65 | if not is_str(file_name): 66 | error("'file_name' not a string") 67 | 68 | fp = open(file_name,'r') 69 | peak_list = cPickle.load(fp) 70 | fp.close() 71 | 72 | if not is_list(peak_list): 73 | error("'file_name' is not a List") 74 | if not len(peak_list) > 0 and not isinstance(peak_list[0], Peak): 75 | error("'peak_list' must be a list of Peak objects") 76 | 77 | return peak_list 78 | -------------------------------------------------------------------------------- /pyms/Peak/List/DPA/Function.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions for peak alignment by dynamic programming 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | import copy 26 | import numpy 27 | #from math import sqrt, log 28 | import math 29 | 30 | from pyms.Utils.Error import error, stop 31 | from pyms.Utils.Utils import is_list 32 | from pyms.Utils.DP import dp 33 | from pyms.Experiment.Class import Experiment 34 | 35 | import Class 36 | import Utils 37 | 38 | # If psyco is installed, use it to speed up running time 39 | try: 40 | import psyco 41 | psyco.full() 42 | except: 43 | pass 44 | 45 | def align_with_tree(T, min_peaks=1): 46 | 47 | """ 48 | @summary: Aligns a list of alignments using the supplied guide tree 49 | 50 | @param T: The pairwise alignment object 51 | @type: pyms.Peak.List.DPA.Class.PairwiseAlignment 52 | @return: The final alignment consisting of aligned input alignments 53 | @rtype: pyms.Peak.List.DPA.Class.Alignment 54 | @author: Woon Wai Keen 55 | @author: Vladimir Likic 56 | """ 57 | 58 | print " Aligning %d items with guide tree (D=%.2f, gap=%.2f)" % \ 59 | (len(T.algts), T.D, T.gap) 60 | 61 | # For everything else, we align according to the guide tree provided by 62 | # Pycluster. From Pycluster documentation: 63 | # Each item and subnode is represented by an integer. For hierarchical 64 | # clustering of n items, we number the original items {0, ... , n-1}, 65 | # nodes are numbered {-1, ... , -(n-1)}. Note that the number of nodes 66 | # is one less than the number of items. 67 | 68 | # extend As to length 2n to hold the n items, n-1 nodes, and 1 root 69 | As = copy.deepcopy(T.algts) + [ None for _ in range(len(T.algts)) ] 70 | 71 | # align the alignments into positions -1, ... ,-(n-1) 72 | total = len(T.tree) 73 | index = 0 74 | 75 | for node in T.tree: 76 | index = index - 1 77 | As[index] = align(As[node.left], As[node.right], T.D, T.gap) 78 | total = total - 1 79 | print " -> %d item(s) remaining" % total 80 | 81 | # the final alignment is in the root. Filter min peaks and return 82 | final_algt = As[index] 83 | 84 | # useful for within state alignment only 85 | if min_peaks > 1: 86 | final_algt.filter_min_peaks(min_peaks) 87 | 88 | return final_algt 89 | 90 | def exprl2alignment(exprl): 91 | 92 | """ 93 | @summary: Converts experiments into alignments 94 | 95 | @param exprl: The list of experiments to be converted into an alignment 96 | objects 97 | @type exprl: ListType 98 | 99 | @author: Vladimir Likic 100 | """ 101 | 102 | if not is_list(exprl): 103 | error("the argument is not a list") 104 | 105 | algts = [] 106 | 107 | for item in exprl: 108 | if not isinstance(item, Experiment): 109 | error("list items must be 'Experiment' instances") 110 | else: 111 | algt = Class.Alignment(item) 112 | algts.append(algt) 113 | 114 | return algts 115 | 116 | def align(a1, a2, D, gap): 117 | 118 | """ 119 | @summary: Aligns two alignments 120 | 121 | @param a1: The first alignment 122 | @type a1: pyms.Peak.List.Class.Alignment 123 | @param a2: The second alignment 124 | @type a2: pyms.Peak.List.Class.Alignment 125 | @param D: Retention time tolerance 126 | @type D: FloatType 127 | @param gap: Gap penalty 128 | @type gap: FloatType 129 | 130 | @return: Aligned alignments 131 | @rtype: pyms.Peak.List.Class.Alignment 132 | 133 | @author: Woon Wai Keen 134 | @author: Vladimir Likic 135 | """ 136 | 137 | # calculate score matrix for two alignments 138 | M = score_matrix(a1, a2, D) 139 | 140 | # run dynamic programming 141 | result = dp(M, gap) 142 | 143 | # make composite alignment from the results 144 | ma = merge_alignments(a1, a2, result['trace']) 145 | 146 | # calculate the similarity score 147 | ma.similarity = alignment_similarity(result['trace'], M, gap) 148 | 149 | return ma 150 | 151 | def merge_alignments(A1, A2, traces): 152 | 153 | """ 154 | @summary: Merges two alignments with gaps added in from DP traceback 155 | 156 | @param A1: First alignment 157 | @param A2: Second alignment 158 | @param traces: DP traceback 159 | 160 | @return: A single alignment from A1 and A2 161 | @author: Woon Wai Keen 162 | @author: Vladimir Likic 163 | @author: Qiao Wang 164 | """ 165 | 166 | # Create object to hold new merged alignment and fill in its expr_codes 167 | ma = Class.Alignment(None) 168 | ma.expr_code = A1.expr_code + A2.expr_code 169 | 170 | # create empty lists of dimension |A1| + |A2| 171 | dimension = len(A1.peakpos) + len(A2.peakpos) 172 | merged = [ [] for _ in range(dimension) ] 173 | A1 = A1.peakpos 174 | A2 = A2.peakpos 175 | 176 | idx1 = idx2 = 0 177 | 178 | # trace can either be 0, 1, or 2 179 | # if it is 0, there are no gaps. otherwise, if it is 1 or 2, 180 | # there is a gap in A2 or A1 respectively. 181 | 182 | for trace in traces: 183 | 184 | if trace == 0: 185 | for i in range(len(A1)): 186 | merged[i].append(A1[i][idx1]) 187 | 188 | for j in range(len(A2)): 189 | merged[1+i+j].append(A2[j][idx2]) 190 | 191 | idx1 = idx1 + 1 192 | idx2 = idx2 + 1 193 | 194 | elif trace == 1: 195 | for i in range(len(A1)): 196 | merged[i].append(A1[i][idx1]) 197 | 198 | for j in range(len(A2)): 199 | merged[1+i+j].append(None) 200 | 201 | idx1 = idx1 + 1 202 | 203 | elif trace == 2: 204 | for i in range(len(A1)): 205 | merged[i].append(None) 206 | 207 | for j in range(len(A2)): 208 | merged[1+i+j].append(A2[j][idx2]) 209 | 210 | idx2 = idx2 + 1 211 | 212 | ma.peakalgt = numpy.transpose(merged) 213 | # sort according to average peak 214 | ma.peakalgt = list(ma.peakalgt) 215 | ma.peakalgt.sort(Utils.alignment_compare) 216 | ma.peakpos = numpy.transpose(ma.peakalgt) 217 | 218 | return ma 219 | 220 | def alignment_similarity(traces, score_matrix, gap): 221 | 222 | """ 223 | @summary: Calculates similarity score between two alignments (new method) 224 | 225 | @param traces: Traceback from DP algorithm 226 | @param score_matrix: Score matrix of the two alignments 227 | @param gap: Gap penalty 228 | 229 | @return: Similarity score (i.e. more similar => higher score) 230 | @author: Woon Wai Keen 231 | @author: Vladimir Likic 232 | """ 233 | 234 | score_matrix = 1. - score_matrix 235 | similarity = 0. 236 | idx1 = idx2 = 0 237 | 238 | # Trace can either be 0, 1, or 2 239 | # If it is 0, there is a match and we add to the sum the score between 240 | # these two aligned peaks. 241 | # 242 | # Otherwise, if it is 1 or 2, and there is a gap in A2 or A1 243 | # respectively. We then subtract the gap penalty from the sum. 244 | for trace in traces: 245 | if trace == 0: 246 | similarity = similarity + score_matrix[idx1][idx2] 247 | idx1 = idx1 + 1 248 | idx2 = idx2 + 1 249 | elif trace == 1: 250 | similarity = similarity - gap 251 | idx1 = idx1 + 1 252 | elif trace == 2: 253 | similarity = similarity - gap 254 | idx2 = idx2 + 1 255 | 256 | return similarity 257 | 258 | def score_matrix(a1, a2, D): 259 | 260 | """ 261 | @summary: Calculates the score matrix between two alignments 262 | 263 | @param a1: The first alignment 264 | @type a1: pyms.Peak.List.Class.Alignment 265 | @param a2: The second alignment 266 | @type a2: pyms.Peak.List.Class.Alignment 267 | @param D: Retention time tolerance 268 | @type D: FloatType 269 | 270 | @return: Aligned alignments 271 | @rtype: pyms.Peak.List.Class.Alignment 272 | 273 | @author: Qiao Wang 274 | @author: Andrew Isaac 275 | """ 276 | 277 | score_matrix = numpy.zeros((len(a1.peakalgt), len(a2.peakalgt))) 278 | row = 0 279 | col = 0 280 | sim_score = 0 281 | 282 | for algt1pos in a1.peakalgt: 283 | for algt2pos in a2.peakalgt: 284 | sim_score = position_similarity(algt1pos, algt2pos, D) 285 | score_matrix[row][col] = sim_score 286 | col = col+1 287 | row = row+1 288 | col = 0 289 | 290 | return score_matrix 291 | 292 | def position_similarity(pos1, pos2, D): 293 | 294 | """ 295 | @summary: Calculates the similarity between the two alignment 296 | positions. A score of 0 is best and 1 is worst. 297 | 298 | @param pos1: The position of the first alignment 299 | @param pos2: The position of the second alignment 300 | @param D: Rentention time tolerance 301 | 302 | @return: The similarity value for the current position 303 | @rtype: FloatType 304 | 305 | @author: Qiao Wang 306 | @author: Vladimir Likic 307 | @author: Andrew Isaac 308 | """ 309 | 310 | score = 0.0 311 | count = 0 312 | 313 | ## Attempt to speed up by only calculating 'in-range' values 314 | ## set tollerance to 1/1000 315 | _TOL = 0.001 316 | cutoff = D*math.sqrt(-2.0*math.log(_TOL)) 317 | 318 | for a in pos1: 319 | if a is not None: 320 | aspec = a.mass_spec 321 | art = a.rt 322 | once = True 323 | for b in pos2: 324 | if b is not None: 325 | brt = b.rt 326 | # in range? 327 | if abs(art-brt) > cutoff: 328 | score += 1.0 # NB score of 1 is worst 329 | else: 330 | # Once per b-loop 331 | if once: 332 | mass_spect1 = numpy.array(aspec, dtype='d') 333 | mass_spect1_sum = numpy.sum(mass_spect1**2, axis=0) 334 | once = False 335 | bspec = b.mass_spec 336 | mass_spect2 = numpy.array(bspec, dtype='d') 337 | mass_spect2_sum = numpy.sum(mass_spect2**2, axis=0) 338 | try: 339 | top = numpy.dot(mass_spect1, mass_spect2) 340 | except(ValueError): 341 | error("Mass Spectra are of different length\n\n" + 342 | " Use IntensityMatrix.crop_mass() to set\n" 343 | + " same length for all Mass Spectra""") 344 | bot = numpy.sqrt(mass_spect1_sum*mass_spect2_sum) 345 | if bot > 0: 346 | cos = top/bot 347 | else: 348 | cos = 0 349 | rtime = numpy.exp(-((art-brt)/float(D))**2 / 2.0) 350 | score = score + (1.0 - (cos*rtime)) 351 | count = count + 1 352 | 353 | if count == 0: 354 | score = 1.0 # NB score of 1 is worst 355 | else: 356 | score = score/float(count) 357 | 358 | return score 359 | -------------------------------------------------------------------------------- /pyms/Peak/List/DPA/Utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utilities for peak alignment by dynamic programming 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | import numpy 26 | 27 | def alignment_compare(x, y): 28 | 29 | """ 30 | @summary: A helper function for sorting peak positions in a alignment 31 | """ 32 | 33 | x = [ _.get_rt() for _ in filter(None, x)] 34 | y = [ _.get_rt() for _ in filter(None, y)] 35 | 36 | avg_x = numpy.sum(x)/len(x) 37 | avg_y = numpy.sum(y)/len(y) 38 | 39 | if avg_x < avg_y: 40 | return -1 41 | else: 42 | return 1 43 | 44 | -------------------------------------------------------------------------------- /pyms/Peak/List/DPA/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Alignment of peak lists by dynamic programming 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | -------------------------------------------------------------------------------- /pyms/Peak/List/Function.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions related to Peak modification 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | import numpy, math 26 | 27 | #from pyms.Utils.Error import error 28 | from pyms.Peak.Class import Peak 29 | from pyms.Utils.Utils import is_str, is_list 30 | from pyms.Utils.Math import median 31 | from pyms.GCMS.Class import MassSpectrum 32 | 33 | # If psyco is installed, use it to speed up running time 34 | try: 35 | import psyco 36 | psyco.full() 37 | except: 38 | pass 39 | 40 | def composite_peak(peak_list, minutes=False): 41 | 42 | """ 43 | @summary: Create a peak that consists of a composite spectrum from all 44 | spectra in the list of peaks. 45 | 46 | @param peak_list: A list of peak objects 47 | @type peak_list: ListType 48 | @param minutes: Return retention time as minutes 49 | @type minutes: BooleanType 50 | 51 | @return: Peak Object with combined mass spectra of 'peak_list' 52 | @type: pyms.Peak.Class.Peak 53 | 54 | @author: Andrew Isaac 55 | """ 56 | 57 | first = True 58 | count = 0 59 | avg_rt = 0 60 | new_ms = None 61 | for peak in peak_list: 62 | if peak is not None: 63 | ms = peak.get_mass_spectrum() 64 | spec = numpy.array(ms.mass_spec, dtype='d') 65 | if first: 66 | avg_spec = numpy.zeros(len(ms.mass_spec), dtype='d') 67 | mass_list = ms.mass_list 68 | first = False 69 | # scale all intensities to [0,100] 70 | max_spec = max(spec)/100.0 71 | if max_spec > 0: 72 | spec = spec/max_spec 73 | else: 74 | spec = spec*0 75 | avg_rt += peak.get_rt() 76 | avg_spec += spec 77 | count += 1 78 | if count > 0: 79 | avg_rt = avg_rt/count 80 | #if minutes == True: 81 | #avg_rt = avg_rt/60.0 82 | avg_spec = avg_spec/count 83 | avg_spec = avg_spec.tolist() # list more compact than ndarray 84 | new_ms = MassSpectrum(mass_list, avg_spec) 85 | return Peak(avg_rt, new_ms, minutes) 86 | else: 87 | return None 88 | 89 | def fill_peaks(data, peak_list, D, minutes=False): 90 | 91 | """ 92 | @summary: Gets the best matching Retention Time and spectra from 'data' for 93 | each peak in the peak list. 94 | 95 | @param data: A data IntensityMatrix that has the same mass range as the 96 | peaks in the peak list 97 | @type data: pyms.GCMS.Class.IntensityMatrix 98 | @param peak_list: A list of peak objects 99 | @type peak_list: ListType 100 | @param D: Peak width standard deviation in seconds. Determines search 101 | window width. 102 | @type D: FloatType 103 | @param minutes: Return retention time as minutes 104 | @type minutes: BooleanType 105 | 106 | @return: List of Peak Objects 107 | @type: ListType 108 | 109 | @author: Andrew Isaac 110 | """ 111 | 112 | # Test for best match in range where RT weight is greater than _TOL 113 | _TOL = 0.001 114 | cutoff = D*math.sqrt(-2.0*math.log(_TOL)) 115 | 116 | # Penalise for neighboring peaks 117 | # reweight so RT weight at nearest peak is _PEN 118 | _PEN = 0.5 119 | 120 | datamat = data.get_matrix_list() 121 | mass_list = data.get_mass_list() 122 | datatimes = data.get_time_list() 123 | minrt = min(datatimes) 124 | maxrt = max(datatimes) 125 | rtl = 0 126 | rtr = 0 127 | new_peak_list = [] 128 | for ii in xrange(len(peak_list)): 129 | spec = peak_list[ii].get_mass_spectrum().mass_spec 130 | spec = numpy.array(spec, dtype='d') 131 | rt = peak_list[ii].get_rt() 132 | spec_SS = numpy.sum(spec**2, axis=0) 133 | 134 | # get neighbour RT's 135 | if ii > 0: 136 | rtl = peak_list[ii-1].rt 137 | if ii < len(peak_list)-1: 138 | rtr = peak_list[ii+1].rt 139 | # adjust weighting for neighbours 140 | rtclose = min(abs(rt-rtl), abs(rt-rtr)) 141 | Dclose = rtclose/math.sqrt(-2.0*math.log(_PEN)) 142 | 143 | if Dclose > 0: 144 | Dclose = min(D, Dclose) 145 | else: 146 | Dclose = D 147 | 148 | # Get bounds 149 | rtlow = rt - cutoff 150 | if rtlow < minrt: 151 | rtlow = minrt 152 | lowii = data.get_index_at_time(rtlow) 153 | 154 | rtup = rt + cutoff 155 | if rtup > maxrt: 156 | rtup = maxrt 157 | upii = data.get_index_at_time(rtup) 158 | 159 | # Get sub matrix of scans in bounds 160 | submat = datamat[lowii:upii+1] 161 | submat = numpy.array(submat, dtype='d') 162 | subrts = datatimes[lowii:upii+1] 163 | subrts = numpy.array(subrts, dtype='d') 164 | 165 | submat_SS = numpy.sum(submat**2, axis=1) 166 | 167 | # transpose spec (as matrix) for dot product 168 | spec = numpy.transpose([spec]) 169 | # dot product on rows 170 | 171 | toparr = numpy.dot(submat, spec) 172 | botarr = numpy.sqrt(spec_SS*submat_SS) 173 | 174 | # convert back to 1-D array 175 | toparr = toparr.ravel() 176 | 177 | # scaled dot product of each scan 178 | cosarr = toparr/botarr 179 | 180 | # RT weight of each scan 181 | rtimearr = numpy.exp(-((subrts-rt)/float(Dclose))**2 / 2.0) 182 | 183 | # weighted scores 184 | scorearr = cosarr*rtimearr 185 | 186 | # index of best score 187 | best_ii = scorearr.argmax() 188 | 189 | # Add new peak 190 | bestrt = subrts[best_ii] 191 | bestspec = submat[best_ii].tolist() 192 | ms = MassSpectrum(mass_list, bestspec) 193 | new_peak_list.append(Peak(bestrt, ms, minutes)) 194 | 195 | return new_peak_list 196 | -------------------------------------------------------------------------------- /pyms/Peak/List/Utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utilities for manipulation of peak lists 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | import math 26 | 27 | from pyms.Peak.Class import Peak 28 | from pyms.Utils.Error import error 29 | from pyms.Utils.Utils import is_list, is_str 30 | from pyms.Utils.Time import time_str_secs 31 | 32 | def is_peak_list(peaks): 33 | 34 | """ 35 | @summary: Returns True if 'peaks' is a valid peak list, False 36 | otherwise 37 | 38 | @param peaks: A list of peak objects 39 | @type peaks: ListType 40 | 41 | @return: A boolean indicator 42 | @rtype: BooleanType 43 | 44 | @author: Vladimir Likic 45 | """ 46 | 47 | flag = True 48 | 49 | if not is_list(peaks): 50 | flag = False 51 | else: 52 | for item in peaks: 53 | if not isinstance(item, Peak): 54 | flag = False 55 | 56 | return flag 57 | 58 | def sele_peaks_by_rt(peaks, rt_range): 59 | 60 | """ 61 | @summary: Selects peaks from a retention time range 62 | 63 | @param peaks: A list of peak objects 64 | @type peaks: ListType 65 | @param rt_range: A list of two time strings, specifying lower and 66 | upper retention times 67 | @type rt_range: ListType 68 | @return: A list of peak objects 69 | @rtype: ListType 70 | """ 71 | 72 | if not is_peak_list(peaks): 73 | error("'peaks' not a peak list") 74 | 75 | if not is_list(rt_range): 76 | error("'rt_range' not a list") 77 | else: 78 | if len(rt_range) != 2: 79 | error("'rt_range' must have exactly two elements") 80 | 81 | if not is_str(rt_range[0]) or not is_str(rt_range[1]): 82 | error("lower/upper retention time limits must be strings") 83 | 84 | rt_lo = time_str_secs(rt_range[0]) 85 | rt_hi = time_str_secs(rt_range[1]) 86 | 87 | if not rt_lo < rt_hi: 88 | error("lower retention time limit must be less than upper") 89 | 90 | peaks_sele = [] 91 | 92 | for peak in peaks: 93 | rt = peak.get_rt() 94 | if rt > rt_lo and rt < rt_hi: 95 | peaks_sele.append(peak) 96 | 97 | #print "%d peaks selected" % (len(peaks_sele)) 98 | 99 | return peaks_sele 100 | -------------------------------------------------------------------------------- /pyms/Peak/List/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions for modelling peak lists 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | -------------------------------------------------------------------------------- /pyms/Peak/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Functions for modelling signal peaks 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | -------------------------------------------------------------------------------- /pyms/Simulator/Function.py: -------------------------------------------------------------------------------- 1 | """ 2 | Provides functions for simulation of GCMS data 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | import numpy 26 | import math 27 | import sys 28 | 29 | sys.path.append("/x/PyMS/") 30 | from pyms.GCMS.Class import IonChromatogram, IntensityMatrix 31 | 32 | 33 | def gcms_sim(time_list, mass_list, peak_list): 34 | 35 | """ 36 | @summary: Simulator of GCMS data 37 | 38 | @param time_list: the list of scan times 39 | @type time_list: listType 40 | 41 | @param mass_list: the list of m/z channels 42 | @type mass_list: listType 43 | 44 | @param peak_list: A list of peaks 45 | @type peak_list: list of pyms.Peak.Class.Peak 46 | 47 | @return: A simulated Intensity Matrix object 48 | @rtype: pyms.GCMS.Class.IntenstityMatrix 49 | 50 | @author: Sean O'Callaghan 51 | """ 52 | n_mz = len(mass_list) 53 | n_scan = len(time_list) 54 | 55 | t1 = time_list[0] 56 | period = time_list[1] - t1 57 | 58 | # initialise a 2D numpy array for intensity matrix 59 | i_array = numpy.zeros((n_scan, n_mz), 'd') 60 | 61 | 62 | for peak in peak_list: 63 | print "-", 64 | index = int((peak.get_rt() - t1)/period) 65 | height = sum(peak.get_mass_spectrum().mass_spec) 66 | # standard deviation = area/(height * sqrt(2/pi)) 67 | sigma = peak.get_area() / (height * (math.sqrt(2*math.pi))) 68 | print "width", sigma 69 | for i in range(len(peak.get_mass_spectrum().mass_list)): 70 | ion_height = peak.get_mass_spectrum().mass_spec[i] 71 | ic = chromatogram(n_scan, index, sigma, ion_height) 72 | i_array[:, i] += ic 73 | 74 | im = IntensityMatrix(time_list, mass_list, i_array) 75 | 76 | return im 77 | 78 | 79 | 80 | 81 | def chromatogram(n_scan, x_zero, sigma, peak_scale): 82 | 83 | """ 84 | @summary: Returns a simulated ion chromatogram of a pure component 85 | The ion chromatogram contains a single gaussian peak. 86 | 87 | @param n_scan: the number of scans 88 | @type n_scan: intType 89 | 90 | @param x_zero: The apex of the peak 91 | @type x_zero: intType 92 | 93 | @param sigma: The standard deviation of the distribution 94 | @type sigma: floatType 95 | 96 | @param: peak_scale: the intensity of the peak at the apex 97 | @type peak_scale: floatType 98 | 99 | @return: a list of intensities 100 | @rtype: listType 101 | 102 | @author: Sean O'Callaghan 103 | """ 104 | 105 | 106 | ic = numpy.zeros((n_scan), 'd') 107 | 108 | for i in range(n_scan): 109 | x = float(i) 110 | 111 | ic[i] = gaussian(x,x_zero,sigma,peak_scale) 112 | 113 | return ic 114 | 115 | def gaussian(point, mean, sigma, scale): 116 | """ 117 | @summary: calculates a point on a gaussian density function 118 | 119 | f = s*exp(-((x-x0)^2)/(2*w^2)); 120 | 121 | @param point: The point currently being computed 122 | @type point: floatType 123 | 124 | @param mean: The apex of the peak 125 | @type mean: intType 126 | 127 | @param sigma: The standard deviation of the gaussian 128 | @type sigma: floatType 129 | 130 | @param scale: The height of the apex 131 | @type scale: floatType 132 | 133 | @return: a single value from a normal distribution 134 | @rtype: floatType 135 | 136 | @author: Sean O'Callaghan 137 | """ 138 | 139 | return scale*math.exp((-(point-mean)**2)/(2*(sigma**2))) 140 | 141 | def add_gaussc_noise(im, scale): 142 | """ 143 | @summary: adds noise to an IntensityMatrix object 144 | 145 | @param im: the intensity matrix object 146 | @param im: pyms.GCMS.Class.IntensityMatrix 147 | 148 | @param scale: the scale of the normal distribution from 149 | which the noise is drawn 150 | @type scale: floatType 151 | 152 | @author: Sean O'Callaghan 153 | """ 154 | n_scan, n_mz = im.get_size() 155 | 156 | for i in range(n_mz): 157 | ic = im.get_ic_at_index(i) 158 | add_gaussc_noise_ic(ic, scale) 159 | im.set_ic_at_index(i, ic) 160 | 161 | 162 | def add_gaussv_noise(im, scale, cutoff, prop): 163 | """ 164 | @summary: adds noise to an IntensityMatrix object 165 | 166 | @param im: the intensity matrix object 167 | @param im: pyms.GCMS.Class.IntensityMatrix 168 | 169 | @param scale: the scale of the normal distribution from 170 | which the noise is drawn 171 | @type scale: floatType 172 | 173 | @param cutoff: The level below which the intensity of the ic at that point 174 | has no effect on the scale of the noise distribution 175 | @type cutoff: intType 176 | 177 | @param scale: The scale of the normal distribution for ic values 178 | @type scale: intType 179 | 180 | @param prop: For intensity values above the cutoff, the scale is 181 | multiplied by the ic value multiplied by prop 182 | @type prop: floatType 183 | 184 | @author: Sean O'Callaghan 185 | """ 186 | n_scan, n_mz = im.get_size() 187 | 188 | for i in range(n_mz): 189 | ic = im.get_ic_at_index(i) 190 | add_gaussv_noise_ic(ic, scale, cutoff, prop) 191 | im.set_ic_at_index(i, ic) 192 | 193 | 194 | 195 | def add_gaussc_noise_ic(ic, scale): 196 | """ 197 | @summary: adds noise drawn from a normal distribution 198 | with constant scale to an ion chromatogram 199 | 200 | @param ic: The ion Chromatogram 201 | @type ic: pyms.GCMS.IonChromatogram 202 | 203 | @param scale: The scale of the normal distribution 204 | @type scale: intType 205 | 206 | @author: Sean O'Callaghan 207 | """ 208 | 209 | noise = numpy.random.normal(0.0, scale, (len(ic))) 210 | 211 | i_array_with_noise = ic.get_intensity_array() + noise 212 | ic.set_intensity_array(i_array_with_noise) 213 | 214 | 215 | def add_gaussv_noise_ic(ic, scale, cutoff, prop): 216 | """ 217 | @summary: adds noise to an ic. The noise value is drawn from a normal 218 | distribution, the scale of this distribution depends on the 219 | value of the ic at the point where the noise is being added 220 | 221 | @param ic: The IonChromatogram 222 | @type ic: pyms.GCMS.IonChromatogram 223 | 224 | @param cutoff: The level below which the intensity of the ic at that point 225 | has no effect on the scale of the noise distribution 226 | @type cutoff: intType 227 | 228 | @param scale: The scale of the normal distribution for ic values below the cutoff 229 | is modified for values above the cutoff 230 | @type scale: intType 231 | 232 | @param prop: For ic values above the cutoff, the scale is multiplied by the ic 233 | value multiplied by prop 234 | @type prop: floatType 235 | 236 | @author: Sean O'Callaghan 237 | """ 238 | 239 | noise = numpy.zeros(len(ic)) 240 | 241 | i_array = ic.get_intensity_array() 242 | time_list = ic.get_time_list() 243 | 244 | for i in range(len(ic)): 245 | if i_array[i] < cutoff: 246 | noise[i] = numpy.random.normal(0.0, scale, 1) 247 | else: 248 | noise[i] = numpy.random.normal(0.0, scale*i_array[i]*prop, 1) 249 | 250 | i_array_with_noise = noise + i_array 251 | ic.set_intensity_array(i_array_with_noise) 252 | 253 | 254 | -------------------------------------------------------------------------------- /pyms/Simulator/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Sub-package to simulate GCMS data 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | -------------------------------------------------------------------------------- /pyms/Utils/DP.py: -------------------------------------------------------------------------------- 1 | """ 2 | Dynamic Programming routine 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | import numpy 26 | from Error import error 27 | #from mpi4py import MPI 28 | 29 | def dp(S, gap_penalty): 30 | 31 | """ 32 | @summary: Solves optimal path in score matrix based on global sequence 33 | alignment 34 | 35 | @param S: Score matrix 36 | @type S: numpy. 37 | @param gap_penalty: Gap penalty 38 | @type gap_penalty: FloatType 39 | 40 | @return: A dictionary of results 41 | @rtype: DictType 42 | 43 | @author: Tim Erwin 44 | """ 45 | #comm = MPI.COMM_WORLD 46 | #rank = comm.Get_rank() 47 | #print " In DP.py, I am rank", rank 48 | 49 | try: 50 | row_length = len(S[:,0]) 51 | except(IndexError): 52 | error('Zero length alignment found: Samples with no peaks \ 53 | cannot be aligned') 54 | col_length = len(S[0,:]) 55 | 56 | #D contains the score of the optimal alignment 57 | D = numpy.zeros((row_length+1,col_length+1), dtype='d') 58 | for i in range(1, row_length+1): 59 | D[i,0] = gap_penalty*i 60 | for j in range(1, col_length+1): 61 | D[0,j] = gap_penalty*j 62 | D[0,0] = 0.0 63 | D[1:(row_length+1), 1:(col_length+1)] = S.copy(); 64 | 65 | # Directions for trace 66 | # 0 - match (move diagonal) 67 | # 1 - peaks1 has no match (move up) 68 | # 2 - peaks2 has no match (move left) 69 | # 3 - stop 70 | trace_matrix = numpy.zeros((row_length+1,col_length+1)) 71 | trace_matrix[:,0] = 1; 72 | trace_matrix[0,:] = 2; 73 | trace_matrix[0,0] = 3; 74 | 75 | for i in range(1,row_length+1): 76 | for j in range(1,col_length+1): 77 | 78 | # 79 | # Needleman-Wunsch Algorithm assuming a score function S(x,x)=0 80 | # 81 | # | D[i-1,j-1] + S(i,j) 82 | # D[i,j] = min | D(i-1,j] + gap 83 | # | D[i,j-1] + gap 84 | # 85 | 86 | darray = [D[i-1,j-1]+S[i-1,j-1], D[i-1,j]+gap_penalty, D[i,j-1]+gap_penalty] 87 | D[i,j] = min(darray) 88 | #Store direction in trace matrix 89 | trace_matrix[i,j] = darray.index(D[i,j]) 90 | 91 | # Trace back from bottom right 92 | trace = [] 93 | matches = [] 94 | i = row_length 95 | j = col_length 96 | direction = trace_matrix[i,j] 97 | p = [row_length-1] 98 | q = [col_length-1] 99 | 100 | while direction != 3: 101 | 102 | if direction == 0: #Match 103 | i = i-1 104 | j = j-1 105 | matches.append([i,j]) 106 | elif direction == 1: #peaks1 has no match 107 | i = i-1 108 | elif direction == 2: #peaks2 has no match 109 | j = j-1 110 | p.append(i-1) 111 | q.append(j-1) 112 | trace.append(direction) 113 | direction=trace_matrix[i,j] 114 | 115 | #remove 'stop' entry 116 | p.pop() 117 | q.pop() 118 | # reverse the trace back 119 | p.reverse() 120 | q.reverse() 121 | trace.reverse() 122 | matches.reverse() 123 | 124 | return {'p':p, 'q':q, 'trace':trace, 'matches':matches, 'D':D, 'phi':trace_matrix} 125 | 126 | -------------------------------------------------------------------------------- /pyms/Utils/Error.py: -------------------------------------------------------------------------------- 1 | """ 2 | Defines error() 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | import sys 26 | 27 | def error(message=None): 28 | 29 | """ 30 | @summary: PyMS wide error function 31 | 32 | Prints out the error message (if supplied) and terminates execution 33 | with sys.exit(1). 34 | 35 | @param message: The error message to be printed 36 | @type message: StringType 37 | 38 | @return: none 39 | @rtype: NoneType 40 | 41 | @author: Lewis Lee 42 | @author: Vladimir Likic 43 | """ 44 | 45 | sys.stdout = sys.__stderr__ 46 | 47 | if message == None: 48 | message = "(no message)" 49 | else: 50 | message.rstrip('\n') 51 | 52 | # Retrieve details of the caller function from the caller 53 | # function's stack frame (i.e. the second most recent stack 54 | # frame, denoted by "1"). 55 | funame = sys._getframe(1).f_code.co_name 56 | fargcount = sys._getframe(1).f_code.co_argcount 57 | fmodule = sys._getframe(1).f_code.co_filename 58 | fmodule = fmodule.split('/') 59 | fmodule = fmodule[-1] 60 | fmodule = fmodule.split('.')[0] 61 | 62 | if funame == '?': 63 | fmessage = "ERROR: 'main' in module %s.\n" % (fmodule) 64 | else: 65 | fmessage = "ERROR: %s(%d) in module %s.\n" % \ 66 | (funame, fargcount, fmodule) 67 | 68 | fmessage = fmessage + " %s" % ( message ) 69 | 70 | message_list = fmessage.split("\n") 71 | 72 | n = 0 73 | for line in message_list: 74 | if len(line) > n: 75 | n = len(line) 76 | 77 | cstr = "" 78 | for ii in range(n): 79 | cstr = cstr + "=" 80 | print "\n %s" % (cstr) 81 | print " %s" % (fmessage) 82 | print " %s\n" % (cstr) 83 | 84 | sys.exit(1) 85 | 86 | def stop(message=None): 87 | 88 | """ 89 | @summary: A simple termination of execution 90 | 91 | @param message: The message to be printed 92 | @type message: StringType 93 | 94 | @return: none 95 | @rtype: NoneType 96 | """ 97 | 98 | if message != None: 99 | print message 100 | 101 | raise RuntimeError 102 | 103 | -------------------------------------------------------------------------------- /pyms/Utils/IO.py: -------------------------------------------------------------------------------- 1 | """ 2 | General I/O functions 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | import types, os, string, cPickle 26 | 27 | from pyms.Utils.Error import error 28 | from pyms.Utils.Utils import is_number, is_str, is_list 29 | 30 | def dump_object(object, file_name): 31 | 32 | """ 33 | @summary: Dumps an object to a file through cPickle.dump() 34 | 35 | @param object: Object to be dumpted 36 | @type object: An instance of an arbitrary class 37 | 38 | @param file_name: Name of the file for the object dump 39 | @type file_name: StringType 40 | 41 | @author: Vladimir Likic 42 | """ 43 | 44 | fp = open_for_writing(file_name) 45 | cPickle.dump(object, fp) 46 | close_for_writing(fp) 47 | 48 | def load_object(file_name): 49 | 50 | """ 51 | @summary: Loads an object previously dumped with dump_object() 52 | 53 | @param file_name: Name of the object dump file 54 | @type file_name: StringType 55 | 56 | @return: Object contained in the file 'file_name' 57 | @rtype: An instance of an arbitrary class 58 | 59 | @author: Vladimir Likic 60 | """ 61 | 62 | fp = open_for_reading(file_name) 63 | object = cPickle.load(fp) 64 | close_for_reading(fp) 65 | 66 | return object 67 | 68 | def open_for_reading(file_name): 69 | 70 | """ 71 | @summary: Opens file for reading, returns file pointer 72 | 73 | @param file_name: Name of the file to be opened for reading 74 | @type file_name: StringType 75 | 76 | @return: Pointer to the opened file 77 | @rtype: FileType 78 | 79 | @author: Vladimir Likic 80 | """ 81 | 82 | if not is_str(file_name): 83 | error("'file_name' is not a string") 84 | try: 85 | fp = open(file_name) 86 | except IOError: 87 | error("'%s' does not exist" % (file_name)) 88 | 89 | return fp 90 | 91 | def open_for_writing(file_name): 92 | 93 | """ 94 | @summary: Opens file for writing, returns file pointer 95 | 96 | @param file_name: Name of the file to be opened for writing 97 | @type file_name: StringType 98 | 99 | @return: Pointer to the opened file 100 | @rtype: FileType 101 | 102 | @author: Vladimir Likic 103 | """ 104 | 105 | if not is_str(file_name): 106 | error("'file_name' is not a string") 107 | try: 108 | fp = open(file_name, "w") 109 | except IOError: 110 | error("Cannot open '%s' for writing" % (file_name)) 111 | 112 | return fp 113 | 114 | def close_for_reading(fp): 115 | 116 | """ 117 | @summary: Closes file pointer open for reading 118 | 119 | @param fp: A file pointer, previously opened for reading 120 | @type fp: FileType 121 | 122 | @return: none 123 | @rtype: NoneType 124 | 125 | @author: Vladimir Likic 126 | """ 127 | 128 | fp.close() 129 | 130 | def close_for_writing(fp): 131 | 132 | """ 133 | @summary: Closes file pointer open for writing 134 | 135 | @param fp: A file pointer, previously opened for writing 136 | @type fp: FileType 137 | 138 | @return: none 139 | @rtype: NoneType 140 | 141 | @author: Vladimir Likic 142 | """ 143 | 144 | fp.close() 145 | 146 | def file_lines(file_name, filter=False): 147 | 148 | """ 149 | @summary: Returns lines from a file, as a list 150 | 151 | @param file_name: Name of a file 152 | @type: StringType 153 | @param filter: If True, lines are pre-processes. Newline character 154 | if removed, leading and taling whitespaces are removed, and lines 155 | starting with '#' are discarded 156 | @type: BooleanType 157 | 158 | @return: A list of lines 159 | @rtype: ListType 160 | 161 | @author: Vladimir Likic 162 | """ 163 | 164 | if not is_str(file_name): 165 | error("'file_name' is not a string") 166 | 167 | fp = open_for_reading(file_name) 168 | lines = fp.readlines() 169 | close_for_reading(fp) 170 | 171 | if filter: 172 | # strip leading and talining whitespaces 173 | lines_filtered = [] 174 | for line in lines: 175 | line = line.strip() 176 | lines_filtered.append(line) 177 | 178 | # discard comments 179 | lines_to_discard = [] 180 | for line in lines_filtered: 181 | # remove empty lines and comments 182 | if len(line) == 0 or line[0] == "#": 183 | lines_to_discard.append(line) 184 | for line in lines_to_discard: 185 | lines_filtered.remove(line) 186 | lines = lines_filtered 187 | 188 | return lines 189 | 190 | def save_data(file_name, data, format_str="%.6f", prepend="", sep=" ", 191 | compressed=False): 192 | 193 | """ 194 | @summary: Saves a list of numbers or a list of lists of numbers 195 | to a file with specific formatting 196 | 197 | @param file_name: Name of a file 198 | @type: StringType 199 | @param data: A list of numbers, or a list of lists 200 | @type: ListType 201 | @param format_str: A format string for individual entries 202 | @type: StringType 203 | @param prepend: A string, printed before each row 204 | @type: StringType 205 | @param sep: A string, printed after each number 206 | @type: StringType 207 | @param compressed: A boolean. If True, the output will be gzipped 208 | @type: BooleanType 209 | 210 | @return: none 211 | @rtype: NoneType 212 | 213 | @author: Vladimir Likic 214 | """ 215 | 216 | if not is_str(file_name): 217 | error("'file_name' is not a string") 218 | 219 | if not is_list(data): 220 | error("'data' is not a list") 221 | 222 | if not is_str(prepend): 223 | error("'prepend' is not a string") 224 | 225 | if not is_str(sep): 226 | error("'sep' is not a string") 227 | 228 | fp = open_for_writing(file_name) 229 | 230 | # decide whether data is a vector or matrix 231 | if is_number(data[0]): 232 | for item in data: 233 | if not is_number(item): 234 | error("not all elements of the list are numbers") 235 | data_is_matrix = 0 236 | else: 237 | for item in data: 238 | if not is_list(item): 239 | error("not all elements of the list are lists") 240 | data_is_matrix = 1 241 | 242 | if data_is_matrix: 243 | for ii in range(len(data)): 244 | fp.write(prepend) 245 | for jj in range(len(data[ii])): 246 | if is_number(data[ii][jj]): 247 | fp.write(format_str % (data[ii][jj])) 248 | if (jj<(len(data[ii])-1)): fp.write(sep) 249 | else: 250 | error("datum not a number") 251 | fp.write("\n") 252 | else: 253 | for ii in range(len(data)): 254 | fp.write(prepend) 255 | fp.write(format_str % (data[ii])) 256 | fp.write("\n") 257 | 258 | close_for_writing(fp) 259 | 260 | if compressed: 261 | status = os.system('gzip %s' % (file_name)) 262 | if status != 0: 263 | error("gzip compress failed") 264 | 265 | -------------------------------------------------------------------------------- /pyms/Utils/Math.py: -------------------------------------------------------------------------------- 1 | """ 2 | Provides mathematical functions 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | import copy, math 26 | 27 | from pyms.Utils.Error import error 28 | from pyms.Utils.Utils import is_list, is_number 29 | 30 | def median(v): 31 | 32 | """ 33 | @summary: Returns a median of a list or numpy array 34 | 35 | @param v: Input list or array 36 | @type v: ListType or numpy.core.ndarray 37 | @return: The median of the input list 38 | @rtype: FloatType 39 | 40 | @author: Vladimir Likic 41 | """ 42 | 43 | if not is_list(v): 44 | error("argument neither list nor array") 45 | 46 | local_data = copy.deepcopy(v) 47 | local_data.sort() 48 | N = len(local_data) 49 | 50 | if (N % 2) == 0: 51 | # even number of points 52 | K = N/2 - 1 53 | median = (local_data[K] + local_data[K+1])/2.0 54 | else: 55 | # odd number of points 56 | K = (N - 1)/2 - 1 57 | median = local_data[K+1] 58 | 59 | return median 60 | 61 | def vector_by_step(vstart,vstop,vstep): 62 | 63 | """ 64 | @summary: generates a list by using start, stop, and step values 65 | 66 | @param vstart: Initial value 67 | @type vstart: A number 68 | @param vstop: Max value 69 | @type vstop: A number 70 | @param vstep: Step 71 | @type vstep: A number 72 | 73 | @return: A list generated 74 | @rtype: ListType 75 | 76 | @author: Vladimir Likic 77 | """ 78 | 79 | if not is_number(vstart) or not is_number(vstop) or not is_number(vstep): 80 | error("parameters start, stop, step must be numbers") 81 | 82 | v = [] 83 | 84 | p = vstart 85 | while p < vstop: 86 | v.append(p) 87 | p = p + vstep 88 | 89 | return v 90 | 91 | def MAD(v): 92 | 93 | """ 94 | @summary: median absolute deviation 95 | 96 | @param v: A list or array 97 | @type v: ListType, TupleType, or numpy.core.ndarray 98 | 99 | @return: median absolute deviation 100 | @rtype: FloatType 101 | 102 | @author: Vladimir Likic 103 | """ 104 | 105 | if not is_list(v): 106 | error("argument neither list nor array") 107 | 108 | m = median(v) 109 | m_list = [] 110 | 111 | for xi in v: 112 | d = math.fabs(xi - m) 113 | m_list.append(d) 114 | 115 | mad = median(m_list)/0.6745 116 | 117 | return mad 118 | 119 | def amin(v): 120 | 121 | """ 122 | @summary: Finds the minimum element in a list or array 123 | 124 | @param v: A list or array 125 | @type v: ListType, TupleType, or numpy.core.ndarray 126 | 127 | @return: Tuple (maxi, maxv), where maxv is the minimum 128 | element in the list and maxi is its index 129 | @rtype: TupleType 130 | 131 | @author: Vladimir Likic 132 | """ 133 | 134 | if not is_list(v): 135 | error("argument neither list nor array") 136 | 137 | minv = max(v) # built-in max() function 138 | mini = None 139 | 140 | for ii in range(len(v)): 141 | if v[ii] < minv: 142 | minv = v[ii] 143 | mini = ii 144 | 145 | if mini == None: 146 | error("finding maximum failed") 147 | 148 | return mini, minv 149 | 150 | def mean(v): 151 | 152 | """ 153 | @summary: Calculates the mean 154 | 155 | @param v: A list or array 156 | @type v: ListType, TupleType, or numpy.core.ndarray 157 | 158 | @return: Mean 159 | @rtype: FloatType 160 | 161 | @author: Vladimir Likic 162 | """ 163 | 164 | if not is_list(v): 165 | error("argument neither list nor array") 166 | 167 | s = 0.0 168 | for e in v: 169 | s = s + e 170 | s_mean = s/float(len(v)) 171 | 172 | return s_mean 173 | 174 | def std(v): 175 | 176 | """ 177 | @summary: Calculates standard deviation 178 | 179 | @param v: A list or array 180 | @type v: ListType, TupleType, or numpy.core.ndarray 181 | 182 | @return: Mean 183 | @rtype: FloatType 184 | 185 | @author: Vladimir Likic 186 | """ 187 | 188 | if not is_list(v): 189 | error("argument neither list nor array") 190 | 191 | v_mean = mean(v) 192 | 193 | s = 0.0 194 | for e in v: 195 | d = e - v_mean 196 | s = s + d*d 197 | s_mean = s/float(len(v)-1) 198 | v_std = math.sqrt(s_mean) 199 | 200 | return v_std 201 | 202 | 203 | def rmsd(list1, list2): 204 | 205 | """ 206 | @summary: Calculates RMSD for the 2 lists 207 | 208 | @param list1: First data set 209 | @type list1: ListType, TupleType, or numpy.core.ndarray 210 | @param list2: Second data set 211 | @type list2: ListType, TupleType, or numpy.core.ndarray 212 | @return: RMSD value 213 | @rtype: FloatType 214 | 215 | @author: Qiao Wang 216 | @author: Andrew Isaac 217 | @author: Vladimir Likic 218 | """ 219 | 220 | if not is_list(list1): 221 | error("argument neither list nor array") 222 | 223 | if not is_list(list2): 224 | error("argument neither list nor array") 225 | 226 | sum = 0.0 227 | for i in range(len(list1)): 228 | sum = sum + (list1[i] - list2[i]) ** 2 229 | rmsd = math.sqrt(sum / len(list1)) 230 | return rmsd 231 | 232 | -------------------------------------------------------------------------------- /pyms/Utils/Time.py: -------------------------------------------------------------------------------- 1 | """ 2 | Time conversion and related functions 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | import math 26 | 27 | from pyms.Utils.Error import error 28 | from pyms.Utils.Utils import is_int, is_str, is_str_num 29 | 30 | def time_str_secs(time_str): 31 | 32 | """ 33 | @summary: Resolves time string of the form "s" or "m", 34 | returns time in seconds 35 | 36 | @param time_str: A time string, which must be of the form 37 | "s" or "m" where "" is a valid number 38 | @type time_str: StringType 39 | 40 | @return: Time in seconds 41 | @rtype: FloatType 42 | 43 | @author: Vladimir Likic 44 | """ 45 | 46 | if not is_str(time_str): 47 | error("time string not a string") 48 | 49 | time_number = time_str[:-1] 50 | time_spec = time_str[-1].lower() 51 | 52 | if not is_str_num(time_number): 53 | print " --> received time string '%s'" % (time_number) 54 | error("improper time string") 55 | 56 | if not time_spec == "s" and not time_spec == "m": 57 | error("time string must end with either 's' or 'm'") 58 | 59 | time = float(time_number) 60 | 61 | if time_spec == "m": 62 | time = time*60.0 63 | 64 | return time 65 | 66 | def window_sele_points(ic, window_sele, half_window=False): 67 | 68 | """ 69 | @summary: Converts window selection parameter into points based 70 | on the time step in an ion chromatogram 71 | 72 | @param ic: ion chromatogram object relevant for the conversion 73 | @type ic: pyms.GCMS.Class.IonChromatogram 74 | 75 | @param window_sele: The window selection parameter. This can be 76 | an integer or time string. If integer, taken as the number 77 | of points. If a string, must of the form "s" or 78 | "m", specifying a time in seconds or minutes, 79 | respectively 80 | @type window_sele: IntType or StringType 81 | 82 | @param half_window: Specifies whether to return half-window 83 | @type half_window: BooleanType 84 | 85 | @return: The number of points in the window 86 | @rtype: IntType 87 | 88 | @author: Vladimir Likic 89 | """ 90 | 91 | if not is_int(window_sele) and not is_str(window_sele): 92 | error("'window' must be an integer or a string") 93 | 94 | if is_int(window_sele): 95 | if half_window: 96 | if window_sele % 2 == 0: 97 | error("window must be an odd number of points") 98 | else: 99 | points = int(math.floor(window_sele*0.5)) 100 | else: 101 | points = window_sele 102 | else: 103 | time = time_str_secs(window_sele) 104 | time_step = ic.get_time_step() 105 | 106 | if half_window: 107 | time = time*0.5 108 | 109 | points = int(math.floor(time/time_step)) 110 | 111 | if half_window: 112 | if points < 1: error("window too small (half window=%d)" % (points)) 113 | else: 114 | if points < 2: error("window too small (window=%d)" % (points)) 115 | 116 | return points 117 | 118 | -------------------------------------------------------------------------------- /pyms/Utils/Utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | General utility functions 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | import types, re 26 | 27 | import numpy 28 | 29 | from pyms.Utils.Error import error 30 | 31 | def is_str(arg): 32 | 33 | """ 34 | @summary: Returns True if the argument is a string, False otherwise 35 | 36 | @param arg: The argument to be evaluated as a string 37 | @type arg: arbitrary 38 | 39 | @return: A boolean indicator True or False 40 | @rtype: BooleanType 41 | 42 | @author: Vladimir Likic 43 | """ 44 | 45 | if isinstance(arg,types.StringType): 46 | return True 47 | else: 48 | return False 49 | 50 | def is_int(arg): 51 | 52 | """ 53 | @summary: Returns True if the argument is an integer, False 54 | otherwise 55 | 56 | @param arg: The argument to be evaluated as an integer 57 | @type arg: arbitrary 58 | 59 | @return: A boolean indicator True or False 60 | @rtype: BooleanType 61 | 62 | @author: Vladimir Likic 63 | """ 64 | 65 | if isinstance(arg,types.IntType) or isinstance(arg,types.LongType): 66 | return True 67 | else: 68 | return False 69 | 70 | def is_float(arg): 71 | 72 | """ 73 | @summary: Returns True if the argument is a float, False otherwise 74 | 75 | @param arg: The argument to be evaluated as a float 76 | @type arg: arbitrary 77 | 78 | @return: A boolean indicator True or False 79 | @rtype: BooleanType 80 | 81 | @author: Vladimir Likic 82 | """ 83 | 84 | if isinstance(arg,types.FloatType): 85 | return True 86 | else: 87 | return False 88 | 89 | def is_number(arg): 90 | 91 | """ 92 | @summary: Returns True if the argument is a number (integer or 93 | float), False otherwise 94 | 95 | @param arg: The argument to be evaluated as a number 96 | @type arg: arbitrary 97 | 98 | @return: A boolean indicator True or False 99 | @rtype: BooleanType 100 | 101 | @author: Vladimir Likic 102 | """ 103 | 104 | if is_int(arg) or is_float(arg): 105 | return True 106 | else: 107 | return False 108 | 109 | def is_list(arg): 110 | 111 | """ 112 | @summary: Returns True if the argument is a list, tuple, or numpy 113 | array, False otherwise 114 | 115 | @param arg: The argument to be evaluated as a list 116 | @type arg: arbitrary 117 | 118 | @return: A boolean indicator True or False 119 | @rtype: BooleanType 120 | 121 | @author: Vladimir Likic 122 | """ 123 | 124 | if isinstance(arg,types.ListType) or isinstance(arg,types.TupleType) \ 125 | or isinstance(arg, numpy.core.ndarray): 126 | return True 127 | else: 128 | return False 129 | 130 | def is_array(arg): 131 | 132 | """ 133 | @summary: Returns True if the argument is a numpy array, False 134 | otherwise 135 | 136 | @param arg: The argument to be evaluated as a numpy array 137 | @type arg: arbitrary 138 | 139 | @return: A boolean indicator True or False 140 | @rtype: BooleanType 141 | 142 | @author: Vladimir Likic 143 | """ 144 | 145 | if isinstance(arg, numpy.core.ndarray): 146 | return True 147 | else: 148 | return False 149 | 150 | 151 | def is_boolean(arg): 152 | 153 | """ 154 | @summary: Returns true of the argument is booleean, False otherwise 155 | 156 | @param arg: The argument to be evaluated as boolean 157 | @type arg: arbitrary 158 | 159 | @return: A boolean indicator True or False 160 | @rtype: BooleanType 161 | 162 | @author: Vladimir Likic 163 | """ 164 | 165 | if isinstance(arg,types.BooleanType): 166 | return True 167 | else: 168 | return False 169 | 170 | def is_str_num(arg): 171 | 172 | """ 173 | @summary: Determines if the argument is a string in the format of a number 174 | 175 | The number can be an integer, or alternatively floating point in scientific 176 | or engineering format. 177 | 178 | @param arg: A string to be evaluate as a number 179 | @type arg: StringType 180 | 181 | @return: A boolean indicator True or False 182 | @rtype: BooleanType 183 | 184 | @author: Gyro Funch (from Active State Python Cookbook) 185 | """ 186 | 187 | NUM_RE = re.compile(r'^[-+]?([0-9]+\.?[0-9]*|\.[0-9]+)([eE][-+]?[0-9]+)?$') 188 | 189 | if NUM_RE.match(str(arg)): 190 | return True 191 | else: 192 | return False 193 | 194 | def is_positive_int(arg): 195 | 196 | """ 197 | @summary: Determines if the argument is an integer greater than zero 198 | 199 | @param arg: A string to be evaluate as a postive integer 200 | @type arg: types.StringType 201 | 202 | @return: A boolean indicator True or False 203 | @rtype: BooleanType 204 | 205 | @author: Milica Ng 206 | """ 207 | 208 | if not is_int(arg): 209 | return False 210 | elif not (arg > 0): 211 | return False 212 | else: 213 | return True 214 | 215 | def is_list_of_dec_nums(arg): 216 | 217 | """ 218 | @summary: Determines if the argument is a list of decimal numbers 219 | 220 | @param arg: A string to be evaluate as a list of decimal numbers 221 | @type arg: types.StringType 222 | 223 | @return: A boolean indicator True or False 224 | @rtype: BooleanType 225 | 226 | @author: Milica Ng 227 | """ 228 | 229 | if not(isinstance(arg, types.ListType)): 230 | return False 231 | elif (arg == []): 232 | return False 233 | else: 234 | for q in arg: 235 | if not(isinstance(q, types.FloatType)): 236 | return False 237 | return True 238 | 239 | -------------------------------------------------------------------------------- /pyms/Utils/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions for PyMS wide use 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | -------------------------------------------------------------------------------- /pyms/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The root of the package pyms 3 | """ 4 | 5 | ############################################################################# 6 | # # 7 | # PyMS software for processing of metabolomic mass-spectrometry data # 8 | # Copyright (C) 2005-2012 Vladimir Likic # 9 | # # 10 | # This program is free software; you can redistribute it and/or modify # 11 | # it under the terms of the GNU General Public License version 2 as # 12 | # published by the Free Software Foundation. # 13 | # # 14 | # This program is distributed in the hope that it will be useful, # 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 17 | # GNU General Public License for more details. # 18 | # # 19 | # You should have received a copy of the GNU General Public License # 20 | # along with this program; if not, write to the Free Software # 21 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # 22 | # # 23 | ############################################################################# 24 | 25 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | try: 3 | from setuptools import setup 4 | except ImportError: 5 | from distutils.core import setup 6 | 7 | try: 8 | from setuptools import find_packages 9 | except: 10 | pass 11 | 12 | setup( 13 | name='pyms', 14 | version='2.1', 15 | author='Sean O\'Callaghan', 16 | author_email='spoc@unimelb.edu.au', 17 | packages=find_packages(), 18 | url='https://github.com/ma-bio21/pyms', 19 | download_url = 'https://github.com/ma-bio21/pyms/tarball/0.1', 20 | license='LICENSE.txt', 21 | description='Python Toolkit for Mass Spectrometry', 22 | long_description=('PyMS is a collection of PyMS libraries ' 23 | 'which can be used to perform peak picking ', 24 | 'alignment by dynamic programming, and ' 25 | 'gap-filling in Gas Chromatography Mass ' 26 | 'Spectrometry experiments'), 27 | install_requires=[ 28 | # PyMS also requires both scipy and numpy. Installation 29 | # of scipy and numpy should be performed before PyMS 30 | # installation, as these packages do not play 31 | # well with automated package installers 32 | # In addition, matplotlib is required if graphical output is 33 | # desired. 34 | # A good solution which provides all of these dependencies is 35 | # Anaconda 36 | #"numpy >= 1.7.1", 37 | #"scipy >= 0.12.0", 38 | "pymzml >= 0.7.5", 39 | "pycluster >= 1.49", 40 | ], 41 | ) 42 | --------------------------------------------------------------------------------