├── LICENSE.txt
├── README.md
├── pyms
    ├── AUTHORS
    ├── Baseline
    │   ├── TopHat.py
    │   └── __init__.py
    ├── COPYING
    ├── Deconvolution
    │   ├── BillerBiemann
    │   │   ├── Function.py
    │   │   └── __init__.py
    │   └── __init__.py
    ├── Display
    │   ├── Class.py
    │   ├── Function.py
    │   └── __init__.py
    ├── Experiment
    │   ├── Class.py
    │   ├── IO.py
    │   └── __init__.py
    ├── GCMS
    │   ├── Class.py
    │   ├── Function.py
    │   ├── IO
    │   │   ├── ANDI
    │   │   │   ├── Function.py
    │   │   │   └── __init__.py
    │   │   ├── JCAMP
    │   │   │   ├── Function.py
    │   │   │   └── __init__.py
    │   │   ├── MZML
    │   │   │   ├── Function.py
    │   │   │   └── __init__.py
    │   │   └── __init__.py
    │   └── __init__.py
    ├── Gapfill
    │   ├── Class.py
    │   ├── Function.py
    │   └── __init__.py
    ├── LICENSE
    ├── Noise
    │   ├── Analysis.py
    │   ├── SavitzkyGolay.py
    │   ├── Window.py
    │   └── __init__.py
    ├── Peak
    │   ├── Class.py
    │   ├── Function.py
    │   ├── IO.py
    │   ├── List
    │   │   ├── DPA
    │   │   │   ├── #Class.py#
    │   │   │   ├── Class.py
    │   │   │   ├── Class_old.py
    │   │   │   ├── Function.py
    │   │   │   ├── Function_new.py
    │   │   │   ├── Function_new.py~
    │   │   │   ├── Utils.py
    │   │   │   └── __init__.py
    │   │   ├── Function.py
    │   │   ├── Utils.py
    │   │   └── __init__.py
    │   └── __init__.py
    ├── Simulator
    │   ├── Function.py
    │   └── __init__.py
    ├── Utils
    │   ├── DP.py
    │   ├── Error.py
    │   ├── IO.py
    │   ├── Math.py
    │   ├── Time.py
    │   ├── Utils.py
    │   └── __init__.py
    └── __init__.py
└── setup.py


/LICENSE.txt:
--------------------------------------------------------------------------------
 1 |  #############################################################################
 2 |  #                                                                           #
 3 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
 4 |  #    Copyright (C) 2005-2015 Vladimir Likic                                 #
 5 |  #                                                                           #
 6 |  #    This program is free software; you can redistribute it and/or modify   #
 7 |  #    it under the terms of the GNU General Public License version 2 as      #
 8 |  #    published by the Free Software Foundation.                             #
 9 |  #                                                                           #
10 |  #    This program is distributed in the hope that it will be useful,        #
11 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
12 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
13 |  #    GNU General Public License for more details.                           #
14 |  #                                                                           #
15 |  #    You should have received a copy of the GNU General Public License      #
16 |  #    along with this program; if not, write to the Free Software            #
17 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
18 |  #                                                                           #
19 |  #############################################################################
20 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # pyms
2 | Original Publication:
3 | https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-13-115
4 | 
5 | PyMS was originally stored on googlecode, and moved to github when google code closed. The same group - Metabolomics Australia at the Bio21 Institute, University of Melbourne, manages the github repo. For any further information on the history of PyMS please email the maintainer: Sean O'Callaghan, spoc@unimelb.edu.au.
6 | 
7 | The main PyMS documentation is available here: https://github.com/ma-bio21/pyms-docs
8 | Test code available here: https://github.com/ma-bio21/pyms-test
9 | 


--------------------------------------------------------------------------------
/pyms/AUTHORS:
--------------------------------------------------------------------------------
 1 | Current developers:
 2 |   Vladimir Likic <v.likic@gmail.com> -- project leader
 3 |   Sean O'Callaghan <sean.ocall@gmail.com> -- Jan/10-present
 4 |   Milica Ng <milica.ng14@gmail.com> -- Mar/08-present
 5 |   Luke Hodkinson <lhodkins@vpac.org> -- Jan/09-present
 6 | 
 7 | Former project memebers:
 8 |   Andrew Isaac <andrew.isaac.au@gmail.com> -- Dec/08-present
 9 |   Qiao Wang <tadaniajoe@gmail.com> -- Dec/08-Jan/09
10 |   Lewis Lee <onefatpanda@gmail.com> -- Nov/05-May/06 
11 |   Woon Wai Keen <doubleukay@doubleukay.com> -- Dec/06-Dec/07
12 |   Tim Erwin <taerwin@gmail.com> -- Dec/07-Jul/08
13 | 
14 | External contributors:
15 |   Uwe Schmitt <uschmitt@mineway.de> -- Savitzky-Golay noise filter
16 | 
17 | 


--------------------------------------------------------------------------------
/pyms/Baseline/TopHat.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Top-hat baseline corrector
  3 | """
  4 | 
  5 |  #############################################################################
  6 |  #                                                                           #
  7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
  8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
  9 |  #                                                                           #
 10 |  #    This program is free software; you can redistribute it and/or modify   #
 11 |  #    it under the terms of the GNU General Public License version 2 as      #
 12 |  #    published by the Free Software Foundation.                             #
 13 |  #                                                                           #
 14 |  #    This program is distributed in the hope that it will be useful,        #
 15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
 16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
 17 |  #    GNU General Public License for more details.                           #
 18 |  #                                                                           #
 19 |  #    You should have received a copy of the GNU General Public License      #
 20 |  #    along with this program; if not, write to the Free Software            #
 21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
 22 |  #                                                                           #
 23 |  #############################################################################
 24 | 
 25 | import copy
 26 | import numpy
 27 | 
 28 | from scipy import ndimage
 29 | 
 30 | from pyms.Utils.Error import error
 31 | from pyms.GCMS.Function import is_ionchromatogram, ic_window_points
 32 | 
 33 | # default structural element as a fraction of total number of points
 34 | _STRUCT_ELM_FRAC = 0.2
 35 | 
 36 | def tophat(ic, struct=None):
 37 | 
 38 |     """
 39 |     @summary: Top-hat baseline correction on Ion Chromatogram
 40 | 
 41 |     @param ic: The input ion chromatogram
 42 |     @type ic: pyms.GCMS.Class.IonChromatogram
 43 |     @param struct: Top-hat structural element as time string
 44 |     @type struct: StringType
 45 | 
 46 |     @return: Top-hat corrected ion chromatogram
 47 |     @rtype: pyms.IO.Class.IonChromatogram
 48 | 
 49 |     @author: Woon Wai Keen
 50 |     @author: Vladimir Likic
 51 |     """
 52 | 
 53 |     if not is_ionchromatogram(ic):
 54 |         error("'ic' not an IonChromatogram object")
 55 |     else:
 56 |         ia = copy.deepcopy(ic.get_intensity_array())
 57 | 
 58 |     if struct == None:
 59 |         struct_pts = int(round(ia.size * _STRUCT_ELM_FRAC))
 60 |     else:
 61 |         struct_pts = ic_window_points(ic,struct)
 62 | 
 63 | #    print " -> Top-hat: structural element is %d point(s)" % ( struct_pts )
 64 | 
 65 |     str_el = numpy.repeat([1], struct_pts)
 66 |     ia = ndimage.white_tophat(ia, None, str_el)
 67 | 
 68 |     ic_bc = copy.deepcopy(ic)
 69 |     ic_bc.set_intensity_array(ia)
 70 | 
 71 |     return ic_bc
 72 | 
 73 | def tophat_im(im, struct=None):
 74 |     
 75 |     """
 76 |     @summary: Top-hat baseline correction on Intensity Matrix
 77 | 
 78 |               Wraps around the TopHat function above
 79 | 
 80 |     @param im: The input Intensity Matrix
 81 |     @type im: pyms.GCMS.Class.IntenstiyMatrix
 82 |     @param struct: Top-hat structural element as time string
 83 |     @type struct: StringType
 84 | 
 85 |     @return: Top-hat corrected Intenstity Matrix
 86 |     @rtype: pyms.IO.Class.IntensityMatrix
 87 |     
 88 |     @author: Sean O'Callaghan
 89 |     """
 90 |     
 91 |     n_scan, n_mz = im.get_size()
 92 |     
 93 |     im_smooth = copy.deepcopy(im)
 94 |     
 95 |     for ii in range(n_mz):
 96 |         ic = im_smooth.get_ic_at_index(ii)
 97 |         ic_smooth = tophat(ic, struct)
 98 |         im_smooth.set_ic_at_index(ii, ic_smooth)
 99 |         
100 |     return im_smooth
101 | 


--------------------------------------------------------------------------------
/pyms/Baseline/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Functions for baseline correction
 3 | """
 4 | 
 5 |  #############################################################################
 6 |  #                                                                           #
 7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
 8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
 9 |  #                                                                           #
10 |  #    This program is free software; you can redistribute it and/or modify   #
11 |  #    it under the terms of the GNU General Public License version 2 as      #
12 |  #    published by the Free Software Foundation.                             #
13 |  #                                                                           #
14 |  #    This program is distributed in the hope that it will be useful,        #
15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
17 |  #    GNU General Public License for more details.                           #
18 |  #                                                                           #
19 |  #    You should have received a copy of the GNU General Public License      #
20 |  #    along with this program; if not, write to the Free Software            #
21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
22 |  #                                                                           #
23 |  #############################################################################
24 | 
25 | 


--------------------------------------------------------------------------------
/pyms/COPYING:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2005-2011, Vladimir Likic <v.likic@unimelb.edu.au>
 2 | All rights reserved.
 3 | 
 4 | This copyright notice must be preserved in all derivative works. If in doubt
 5 | contact Vladimir Likic <v.likic@unimelb.edu.au>
 6 | 
 7 | This code is subject to GNU General Public License version 2.
 8 | 
 9 | Redistribution and use in source and binary forms, with or without modification,
10 | are permitted provided that the following conditions are met:
11 | 
12 |     * Redistributions of source code must retain the above copyright notice, 
13 | this list of conditions and the following disclaimer.
14 |     * Redistributions in binary form must reproduce the above copyright notice, 
15 | this list of conditions and the following disclaimer in the documentation 
16 | and/or other materials provided with the distribution.
17 |     * Neither the name of the <ORGANIZATION> nor the names of its contributors 
18 | may be used to endorse or promote products derived from this software without 
19 | specific prior written permission.
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
22 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
23 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 
25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 
26 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
27 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 
28 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
30 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | 
32 | 


--------------------------------------------------------------------------------
/pyms/Deconvolution/BillerBiemann/Function.py:
--------------------------------------------------------------------------------
  1 | """
  2 | BillerBiemann deconvolution algorithm
  3 | Provides a class to perform Biller and Biemann deconvolution
  4 | """
  5 | 
  6 |  #############################################################################
  7 |  #                                                                           #
  8 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
  9 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
 10 |  #                                                                           #
 11 |  #    This program is free software; you can redistribute it and/or modify   #
 12 |  #    it under the terms of the GNU General Public License version 2 as      #
 13 |  #    published by the Free Software Foundation.                             #
 14 |  #                                                                           #
 15 |  #    This program is distributed in the hope that it will be useful,        #
 16 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
 17 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
 18 |  #    GNU General Public License for more details.                           #
 19 |  #                                                                           #
 20 |  #    You should have received a copy of the GNU General Public License      #
 21 |  #    along with this program; if not, write to the Free Software            #
 22 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
 23 |  #                                                                           #
 24 |  #############################################################################
 25 | 
 26 | import numpy
 27 | import copy
 28 | 
 29 | from pyms.Utils.Error import error
 30 | from pyms.Utils.Utils import is_list, is_number, is_int
 31 | from pyms.GCMS.Class import IonChromatogram, MassSpectrum
 32 | from pyms.Peak.Class import Peak
 33 | 
 34 | # If psyco is installed, use it to speed up running time
 35 | try:
 36 |     import psyco
 37 |     psyco.full()
 38 | except:
 39 |     pass
 40 | 
 41 | #######################
 42 | # structure
 43 | # 1) find local maxima per ion, store intensity and scan index
 44 | # 2) sum across N scans to compensate for scan type
 45 | # 3) sum ions belonging to each maxima scan
 46 | #######################
 47 | 
 48 | def BillerBiemann(im, points=3, scans=1):
 49 | 
 50 |     """
 51 |     @summary: BillerBiemann Deconvolution
 52 | 
 53 |         Deconvolution based on the algorithm of Biller and Biemann (1974)
 54 | 
 55 |     @param im: An IntensityMatrix object
 56 |     @type im: pyms.GCMS.Class.IntensityMatrix
 57 |     @param points: Peak if maxima over 'points' number of scans (Default 3)
 58 |     @type points: IntType
 59 |     @param scans: To compensate for spectra skewing,
 60 |         peaks from 'scans' scans are combined (Default 1).
 61 |     @type scans: IntType
 62 | 
 63 |     @return: List of Peak objects
 64 |     @rtype: ListType
 65 | 
 66 |     @author: Andrew Isaac
 67 |     """
 68 | 
 69 |     rt_list = im.get_time_list()
 70 |     mass_list = im.get_mass_list()
 71 |     peak_list = []
 72 |     maxima_im = get_maxima_matrix(im, points, scans)
 73 |     numrows = len(maxima_im)
 74 |     for row in range(numrows):
 75 |         if sum(maxima_im[row]) > 0:
 76 |             rt = rt_list[row]
 77 |             ms = MassSpectrum(mass_list, maxima_im[row])
 78 |             peak = Peak(rt, ms)
 79 |             peak.set_pt_bounds([0,row,0])  # store IM index for convenience
 80 |             peak_list.append(peak)
 81 | 
 82 |     return peak_list
 83 | 
 84 | def rel_threshold(pl, percent=2):
 85 | 
 86 |     """
 87 |     @summary: Remove ions with relative intensities less than the given
 88 |         relative percentage of the maximum intensity.
 89 | 
 90 |     @param pl: A list of Peak objects
 91 |     @type pl: ListType
 92 |     @param percent: Threshold for relative percentage of intensity (Default 2%)
 93 |     @type percent: FloatType
 94 | 
 95 |     @return: A new list of Peak objects with threshold ions
 96 |     @rtype: ListType
 97 | 
 98 |     @author: Andrew Isaac
 99 |     """
100 | 
101 |     if not is_number(percent) or percent <= 0:
102 |         error("'percent' must be a number > 0")
103 | 
104 |     pl_copy = copy.deepcopy(pl)
105 |     new_pl = []
106 |     for p in pl_copy:
107 |         ms = p.get_mass_spectrum()
108 |         ia = ms.mass_spec
109 |         # assume max(ia) big so /100 1st
110 |         cutoff = (max(ia)/100.0)*float(percent)
111 |         for i in range(len(ia)):
112 |             if ia[i] < cutoff:
113 |                 ia[i] = 0
114 |         ms.mass_spec = ia
115 |         p.set_mass_spectrum(ms)
116 |         new_pl.append(p)
117 |     return new_pl
118 | 
119 | def num_ions_threshold(pl, n, cutoff):
120 | 
121 |     """
122 |     @summary: Remove Peaks where there are less than a given number of ion
123 |         intensities above the given threshold
124 | 
125 |     @param pl: A list of Peak objects
126 |     @type pl: ListType
127 |     @param n: Minimum number of ions that must have intensities above the cutoff
128 |     @type n: IntType
129 |     @param cutoff: The minimum intensity threshold
130 |     @type cutoff: FloatType
131 | 
132 |     @return: A new list of Peak objects
133 |     @rtype: ListType
134 | 
135 |     @author: Andrew Isaac
136 |     """
137 | 
138 |     pl_copy = copy.deepcopy(pl)
139 |     new_pl = []
140 |     for p in pl_copy:
141 |         ms = p.get_mass_spectrum()
142 |         ia = ms.mass_spec
143 |         ions = 0
144 |         for i in range(len(ia)):
145 |             if ia[i] >= cutoff:
146 |                 ions += 1
147 |         if ions >= n:
148 |             new_pl.append(p)
149 |     return new_pl
150 | 
151 | def sum_maxima(im, points=3, scans=1):
152 | 
153 |     """
154 |     @summary: Reconstruct the TIC as sum of maxima
155 | 
156 |     @param im: An IntensityMatrix object
157 |     @type im: pyms.GCMS.Class.IntensityMatrix
158 |     @param points: Peak if maxima over 'points' number of scans
159 |     @type points: IntType
160 |     @param scans: To compensate for spectra scewing,
161 |         peaks from 'scans' scans are combined.
162 |     @type scans: IntType
163 | 
164 |     @return: The reconstructed TIC
165 |     @rtype: pyms.GCMS.Class.IonChromatogram
166 | 
167 |     @author: Andrew Isaac
168 |     """
169 | 
170 |     maxima_im = get_maxima_matrix(im, points)
171 |     sums = []
172 |     numrows = len(maxima_im)
173 |     half = int(scans/2)
174 |     for row in range(numrows):
175 |         val = 0
176 |         for ii in range(scans):
177 |             if row - half + ii >= 0 and row - half + ii < numrows:
178 |                 val += maxima_im[row - half + ii].sum()
179 |         sums.append(val)
180 |     tic = IonChromatogram(numpy.array(sums), im.get_time_list())
181 | 
182 |     return tic
183 | 
184 | def get_maxima_indices(ion_intensities, points=3):
185 | 
186 |     """
187 |     @summary: Find local maxima.
188 | 
189 |     @param ion_intensities: A list of intensities for a single ion
190 |     @type ion_intensities: ListType
191 |     @param points: Peak if maxima over 'points' number of scans
192 |     @type points: IntType
193 | 
194 |     @return: A list of scan indices
195 |     @rtype: ListType
196 | 
197 |     @author: Andrew Isaac
198 |     """
199 | 
200 |     if not is_list(ion_intensities) or not is_number(ion_intensities[0]):
201 |         error("'ion_intensities' must be a List of numbers")
202 | 
203 |     # find peak inflection points
204 |     # use a 'points' point window
205 |     # for a plateau after a rise, need to check if it is the left edge of
206 |     # a peak
207 |     peak_point = []
208 |     edge = -1
209 |     points = int(points)
210 |     half = int(points/2)
211 |     points = 2*half+1  # ensure odd number of points
212 |     for index in range(len(ion_intensities)-points+1):
213 |         left = ion_intensities[index:index+half]
214 |         mid = ion_intensities[index+half]
215 |         right = ion_intensities[index+half+1:index+points]
216 |         # max in middle
217 |         if mid > max(left) and mid > max(right):
218 |             peak_point.append(index+half)
219 |             edge = -1  # ignore previous rising edge
220 |         # flat from rise (left of peak?)
221 |         if mid > max(left) and mid == max(right):
222 |             edge = index+half  # ignore previous rising edge, update latest
223 |         # fall from flat
224 |         if mid == max(left) and mid > max(right):
225 |             if edge > -1:
226 |                 centre = int((edge+index+half)/2)  # mid point
227 |                 peak_point.append(centre)
228 |             edge = -1
229 | 
230 |     return peak_point
231 | 
232 | def get_maxima_list(ic, points=3):
233 | 
234 |     """
235 |     @summary: List of retention time and intensity of local maxima for ion
236 | 
237 |     @param ic: An IonChromatogram object
238 |     @type ic: pyms.GCMS.Class.IonChromatogram
239 |     @param points: Peak if maxima over 'points' number of scans
240 |     @type points: IntType
241 | 
242 |     @return: A list of retention time and intensity of local maxima for ion
243 |     @rtype: ListType
244 | 
245 |     @author: Andrew Isaac
246 |     """
247 | 
248 |     peak_point = get_maxima_indices(ic.get_intensity_array(), points)
249 |     mlist = []
250 |     for index in range(len(peak_point)):
251 |         rt = ic.get_time_at_index(peak_point[index])
252 |         intens = ic.get_intensity_at_index(peak_point[index])
253 |         mlist.append([rt, intens])
254 |     return mlist
255 | 
256 | def get_maxima_list_reduced(ic, mp_rt, points=13, window=3):
257 | 
258 |     """
259 |     @summary: List of retention time and intensity of local maxima for ion
260 |               Only peaks around a specific retention time are recorded
261 |               created for use with gap filling algorithm
262 |     @param ic: An IonChromatogram object
263 |     @type ic: pyms.GCMS.Class.IonChromatogram
264 |     @param mp_rt: The retention time of the missing peak
265 |     @type ic: floatType
266 |     @param points: Peak if maxima over 'points' number of scans
267 |     @type points: IntType
268 |     @param window: The window around the mp_rt where peaks should
269 |                    be recorded
270 |     @type window: intType
271 | 
272 |     @return: A list of retention time and intensity of local maxima for ion
273 |     @rtype: ListType
274 | 
275 |     @author: Andrew Isaac
276 |     """
277 | 
278 |     peak_point = get_maxima_indices(ic.get_intensity_array(), points)
279 |     mlist = []
280 |     for index in range(len(peak_point)):
281 |         rt = ic.get_time_at_index(peak_point[index])
282 |         if (rt > float(mp_rt) - window) and (rt < float(mp_rt) + window):
283 |             intens = ic.get_intensity_at_index(peak_point[index])
284 |             mlist.append([rt, intens])
285 |         else:
286 |             pass
287 |     return mlist
288 | 
289 | 
290 | def get_maxima_matrix(im, points=3, scans=1):
291 | 
292 |     """
293 |     @summary: Get matrix of local maxima for each ion
294 | 
295 |     @param im: A list of intensities for a single ion
296 |     @type im: ListType
297 |     @param points: Peak if maxima over 'points' number of scans
298 |     @type points: IntType
299 |     @param scans: To compensate for spectra scewing,
300 |         peaks from 'scans' scans are combined (Default 1).
301 |     @type scans: IntType
302 | 
303 |     @return: A matrix of each ion and scan and intensity at ion peaks
304 |     @rtype: ListType
305 | 
306 |     @author: Andrew Isaac
307 |     """
308 | 
309 |     numrows, numcols = im.get_size()
310 |     # zeroed matrix, size numrows*numcols
311 |     maxima_im = numpy.zeros((numrows, numcols))
312 |     raw_im = numpy.array(im.get_matrix_list())
313 | 
314 |     for col in range(numcols):  # assume all rows have same width
315 |         # 1st, find maxima
316 |         maxima = get_maxima_indices(raw_im[:,col], points)
317 |         # 2nd, fill intensities
318 |         for row in maxima:
319 |             maxima_im[row, col] = raw_im[row, col]
320 | 
321 |     # combine spectra within 'scans' scans.
322 |     half = int(scans/2)
323 |     for row in range(numrows):
324 |         tic = 0
325 |         best = 0
326 |         loc = 0
327 |         # find best in scans
328 |         for ii in range(scans):
329 |             if row - half + ii >= 0 and row - half + ii < numrows:
330 |                 tic = maxima_im[row - half + ii].sum()
331 |                 # find largest tic of scans
332 |                 if tic > best:
333 |                     best = tic
334 |                     loc = ii
335 |         # move and add others to best
336 |         for ii in range(scans):
337 |             if row - half + ii >= 0 and row - half + ii < numrows and ii != loc:
338 |                 for col in range(numcols):
339 |                     maxima_im[row - half + loc, col] += \
340 |                         maxima_im[row - half + ii, col]
341 |                     maxima_im[row - half + ii, col] = 0
342 | 
343 |     return maxima_im
344 | 


--------------------------------------------------------------------------------
/pyms/Deconvolution/BillerBiemann/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Biller and Biemann deconvolution
 3 | """
 4 | 
 5 |  #############################################################################
 6 |  #                                                                           #
 7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
 8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
 9 |  #                                                                           #
10 |  #    This program is free software; you can redistribute it and/or modify   #
11 |  #    it under the terms of the GNU General Public License version 2 as      #
12 |  #    published by the Free Software Foundation.                             #
13 |  #                                                                           #
14 |  #    This program is distributed in the hope that it will be useful,        #
15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
17 |  #    GNU General Public License for more details.                           #
18 |  #                                                                           #
19 |  #    You should have received a copy of the GNU General Public License      #
20 |  #    along with this program; if not, write to the Free Software            #
21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
22 |  #                                                                           #
23 |  #############################################################################
24 | 
25 | 


--------------------------------------------------------------------------------
/pyms/Deconvolution/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Deconvolution routines
 3 | """
 4 | 
 5 |  #############################################################################
 6 |  #                                                                           #
 7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
 8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
 9 |  #                                                                           #
10 |  #    This program is free software; you can redistribute it and/or modify   #
11 |  #    it under the terms of the GNU General Public License version 2 as      #
12 |  #    published by the Free Software Foundation.                             #
13 |  #                                                                           #
14 |  #    This program is distributed in the hope that it will be useful,        #
15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
17 |  #    GNU General Public License for more details.                           #
18 |  #                                                                           #
19 |  #    You should have received a copy of the GNU General Public License      #
20 |  #    along with this program; if not, write to the Free Software            #
21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
22 |  #                                                                           #
23 |  #############################################################################
24 | 
25 | 


--------------------------------------------------------------------------------
/pyms/Display/Class.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Class to Display Ion Chromatograms and TIC
  3 | """
  4 | 
  5 | #############################################################################
  6 | #                                                                           #
  7 | #    PyMS software for processing of metabolomic mass-spectrometry data     #
  8 | #    Copyright (C) 2005-2012 Vladimir Likic                                 #
  9 | #                                                                           #
 10 | #    This program is free software; you can redistribute it and/or modify   #
 11 | #    it under the terms of the GNU General Public License version 2 as      #
 12 | #    published by the Free Software Foundation.                             #
 13 | #                                                                           #
 14 | #    This program is distributed in the hope that it will be useful,        #
 15 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
 16 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
 17 | #    GNU General Public License for more details.                           #
 18 | #                                                                           #
 19 | #    You should have received a copy of the GNU General Public License      #
 20 | #    along with this program; if not, write to the Free Software            #
 21 | #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
 22 | #                                                                           #
 23 | #############################################################################
 24 | 
 25 | import matplotlib.pyplot as plt
 26 | import numpy
 27 | import sys
 28 | sys.path.append('/x/PyMS/')
 29 | 
 30 | from pyms.GCMS.Class import IonChromatogram 
 31 | from pyms.Utils.Error import error
 32 | 
 33 | 
 34 | class Display(object):
 35 |     """
 36 |     @summary: Class to display Ion Chromatograms and Total
 37 |               Ion Chromatograms from GCMS.Class.IonChromatogram
 38 | 		
 39 |               Uses matplotlib module pyplot to do plotting
 40 | 
 41 |     @author: Sean O'Callaghan
 42 |     @author: Vladimir Likic
 43 |     """
 44 |     	
 45 |     def __init__(self):
 46 |         """	
 47 |         @summary: Initialises an instance of Display class
 48 | 	"""
 49 | 		
 50 | 	# Container to store plots
 51 | 	self.__tic_ic_plots = []
 52 | 			
 53 | 	# color dictionary for plotting of ics; blue reserved
 54 | 	# for TIC
 55 | 	self.__col_ic = {0:'r', 1:'g', 2:'k', 3:'y', 4:'m', 5:'c'}
 56 | 	self.__col_count = 0  # counter to keep track of colors
 57 |         
 58 |         # Peak list container 
 59 |         self.__peak_list = []
 60 | 				
 61 | 	#Plotting Variables
 62 | 	self.__fig = plt.figure()
 63 | 	self.__ax = self.__fig.add_subplot(111)
 64 | 
 65 | 		
 66 |     
 67 |     
 68 |     def plot_ics(self, ics, labels = None):
 69 | 		
 70 | 	"""
 71 |         @summary: Adds an Ion Chromatogram or a 
 72 | 	list of Ion Chromatograms to plot list
 73 | 	
 74 | 	@param ics: List of Ion Chromatograms m/z channels
 75 | 		for plotting
 76 | 	@type ics: list of pyms.GCMS.Class.IonChromatogram 
 77 | 	
 78 | 	@param labels: Labels for plot legend
 79 | 	@type labels: list of StringType
 80 |         """
 81 | 	
 82 |         if not isinstance(ics, list):
 83 | 	    if isinstance(ics, IonChromatogram):
 84 | 		ics = [ics]
 85 | 	    else:
 86 | 		error("ics argument must be an IonChromatogram\
 87 | 		or a list of Ion Chromatograms")
 88 | 	
 89 |         if not isinstance(labels, list) and labels != None:
 90 |             labels = [labels]
 91 |         # TODO: take care of case where one element of ics is
 92 | 	# not an IonChromatogram
 93 | 		
 94 | 	
 95 | 	intensity_list = []
 96 | 	time_list = ics[0].get_time_list()
 97 | 			
 98 | 	
 99 | 	for i in range(len(ics)):
100 | 	    intensity_list.append(ics[i].get_intensity_array())
101 | 		
102 | 	
103 |         # Case for labels not present
104 |         if labels == None:
105 |             for i in range(len(ics)):
106 |                 self.__tic_ic_plots.append(plt.plot(time_list, \
107 | 	            intensity_list[i], self.__col_ic[self.__col_count]))
108 | 	        if self.__col_count == 5:
109 |                     self.__col_count = 0
110 |                 else:
111 |                     self.__col_count += 1
112 |         
113 |         # Case for labels present
114 |         else:
115 |             for i in range(len(ics)):	
116 | 		
117 | 	        self.__tic_ic_plots.append(plt.plot(time_list, \
118 | 	            intensity_list[i], self.__col_ic[self.__col_count]\
119 | 	                , label = labels[i]))
120 | 	        if self.__col_count == 5:
121 |                     self.__col_count = 0
122 |                 else:
123 |                     self.__col_count += 1
124 | 	
125 | 	
126 |     
127 |     
128 |     def plot_tic(self, tic, label=None):
129 | 	
130 |         """
131 |         @summary: Adds Total Ion Chromatogram to plot list
132 | 	
133 | 	@param tic: Total Ion Chromatogram 
134 | 	@type tic: pyms.GCMS.Class.IonChromatogram
135 | 	
136 | 	@param label: label for plot legend
137 | 	@type label: StringType
138 |         """
139 | 		
140 | 	if not isinstance(tic, IonChromatogram):
141 | 	    error("TIC is not an Ion Chromatogram object")
142 | 			
143 | 		
144 | 	intensity_list = tic.get_intensity_array()
145 | 	time_list = tic.get_time_list()
146 | 				
147 | 	self.__tic_ic_plots.append(plt.plot(time_list, intensity_list,\
148 | 	label=label))
149 | 		
150 |     
151 |     
152 |     
153 |     
154 |     def plot_peaks(self, peak_list, label = "Peaks"):
155 | 	
156 |         """
157 |         @summary: Plots the locations of peaks as found
158 | 		  by PyMS.
159 | 		
160 | 	@param peak_list: List of peaks
161 | 	@type peak_list: list of pyms.Peak.Class.Peak
162 | 		
163 | 	@param label: label for plot legend
164 | 	@type label: StringType
165 |         """
166 |         
167 |         if not isinstance(peak_list, list):
168 |             error("peak_list is not a list")
169 | 		
170 | 	time_list = []
171 | 	height_list=[]
172 |         
173 |         # Copy to self.__peak_list for onclick event handling
174 |         self.__peak_list = peak_list
175 | 	
176 | 	for peak in peak_list:
177 | 	    time_list.append(peak.get_rt())
178 | 	    height_list.append(sum(peak.get_mass_spectrum().mass_spec))
179 | 		
180 | 	self.__tic_ic_plots.append(plt.plot(time_list, height_list, 'o',\
181 | 	    label = label))
182 | 	
183 |     
184 |     
185 |     
186 |     
187 |     def get_5_largest(self, intensity_list):
188 |         
189 |         """
190 |         @summary: Computes the indices of the largest 5 ion intensities
191 |                   for writing to console
192 |         
193 |         @param intensity_list: List of Ion intensities
194 |         @type intensity_list: listType
195 |         """
196 |         
197 |         largest = [0,0,0,0,0,0,0,0,0,0]
198 |         
199 |         # Find out largest value
200 |         for i in range(len(intensity_list)):
201 |             if intensity_list[i] > intensity_list[largest[0]]:
202 |                 largest[0] = i
203 |         
204 |         # Now find next four largest values
205 |         for j in [1,2,3,4,5,6,7,8,9]:
206 |             for i in range(len(intensity_list)):
207 |                 if intensity_list[i] > intensity_list[largest[j]] and \
208 |                     intensity_list[i] < intensity_list[largest[j-1]]:
209 |                     largest[j] = i
210 |        
211 |         return largest
212 | 
213 |     
214 |     
215 |     
216 |     
217 |     def plot_mass_spec(self, rt, mass_list, intensity_list):
218 |         
219 |         """ 
220 |         @summary: Plots the mass spec given a list of masses and intensities
221 |         
222 |         @param rt: The retention time for labelling of the plot
223 |         @type rt: floatType
224 |         
225 |         @param mass_list: list of masses of the MassSpectrum object
226 |         @type mass_list: listType
227 |         
228 |         @param intensity_list: List of intensities of the MassSpectrum object
229 |         @type intensity_list: listType
230 |         """
231 |         
232 |         new_fig = plt.figure()
233 |         new_ax = new_fig.add_subplot(111)
234 |         
235 |         # to set x axis range find minimum and maximum m/z channels
236 |         max_mz = mass_list[0]
237 |         min_mz = mass_list[0]
238 |         
239 |         for i in range(len(mass_list)):
240 |             if mass_list[i] > max_mz:
241 |                 max_mz = mass_list[i]
242 |                 
243 |         for i in range(len(mass_list)):
244 |             if mass_list[i] < min_mz:
245 |                 min_mz = mass_list[i]
246 |         
247 |         label = "Mass spec for peak at time " + "%5.2f" % rt
248 |         
249 |         mass_spec_plot = plt.bar(mass_list, intensity_list,\
250 | 	label=label, width=0.01)
251 |         
252 |         x_axis_range = plt.xlim(min_mz, max_mz)
253 |         
254 |         t = new_ax.set_title(label)
255 |         
256 |         plt.show()
257 |         
258 |         
259 |     
260 |     
261 |     def onclick(self, event):
262 |         
263 |         """
264 |         @summary: Finds the 5 highest intensity m/z channels for the selected peak.
265 |                   The peak is selected by clicking on it. If a button other than
266 |                   the left one is clicked, a new plot of the mass spectrum is displayed
267 |                   
268 |         @param event: a mouse click by the user
269 |         """
270 |                     
271 |         intensity_list = []
272 |         mass_list = []
273 |         
274 |         
275 |         for peak in self.__peak_list:
276 |             if event.xdata > 0.9999*peak.get_rt() and event.xdata < \
277 |                 1.0001*peak.get_rt():
278 |                 intensity_list = peak.get_mass_spectrum().mass_spec
279 |                 mass_list = peak.get_mass_spectrum().mass_list
280 |             
281 |         largest = self.get_5_largest(intensity_list)
282 |         
283 |         if len(intensity_list) != 0:
284 |             print "mass\t intensity"
285 |             for i in range(10):
286 |                 print mass_list[largest[i]], "\t", intensity_list[largest[i]]
287 |         else:    # if the selected point is not close enough to peak
288 |             print "No Peak at this point"
289 |         
290 |         # Check if a button other than left was pressed, if so plot mass spectrum
291 |         # Also check that a peak was selected, not just whitespace
292 |         if event.button != 1 and len(intensity_list) != 0:
293 |             self.plot_mass_spec(event.xdata, mass_list, intensity_list)
294 |         
295 |             
296 |     
297 |     
298 |     def do_plotting(self, plot_label = None):
299 | 	
300 |         """
301 | 	@summary: Plots TIC and IC(s) if they have been created
302 | 		by plot_tic() or plot_ics(). Adds detected peaks
303 | 		if they have been added by plot_peaks()
304 | 		
305 | 	@param plot_label: Optional to supply a label or other
306 | 			definition of data origin
307 | 	@type plot_label: StringType
308 | 	
309 | 	"""
310 | 	
311 | 	# if no plots have been created advise user
312 | 	if len(self.__tic_ic_plots) == 0:
313 | 	    print 'No plots have been created'
314 | 	    print 'Please call a plotting function before'
315 | 	    print 'calling do_plotting()'
316 | 	
317 | 	if plot_label != None :
318 |             t = self.__ax.set_title(plot_label)
319 | 	
320 | 	l = self.__ax.legend()
321 | 			
322 | 	self.__fig.canvas.draw
323 |         
324 |         # If no peak list plot, no mouse click event
325 |         if len(self.__peak_list) != 0:
326 |             cid = self.__fig.canvas.mpl_connect('button_press_event', self.onclick)	
327 | 	plt.show()
328 | 		
329 | 	
330 | 	
331 | 


--------------------------------------------------------------------------------
/pyms/Display/Function.py:
--------------------------------------------------------------------------------
  1 | """Display.Function.py
  2 | """
  3 | 
  4 | #############################################################################
  5 | #                                                                           #
  6 | #    PyMS software for processing of metabolomic mass-spectrometry data     #
  7 | #    Copyright (C) 2005-2012 Vladimir Likic                                 #
  8 | #                                                                           #
  9 | #    This program is free software; you can redistribute it and/or modify   #
 10 | #    it under the terms of the GNU General Public License version 2 as      #
 11 | #    published by the Free Software Foundation.                             #
 12 | #                                                                           #
 13 | #    This program is distributed in the hope that it will be useful,        #
 14 | #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
 15 | #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
 16 | #    GNU General Public License for more details.                           #
 17 | #                                                                           #
 18 | #    You should have received a copy of the GNU General Public License      #
 19 | #    along with this program; if not, write to the Free Software            #
 20 | #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
 21 | #                                                                           #
 22 | #############################################################################
 23 | 
 24 | 
 25 | import matplotlib.pyplot as plt
 26 | 
 27 | import sys
 28 | sys.path.append('/x/PyMS/')
 29 | 
 30 | from pyms.Utils.Error import error
 31 | from pyms.GCMS.Class import IonChromatogram 
 32 | 
 33 | 
 34 | def plot_ic(ic, line_label=" ", plot_title=" "):
 35 |     """
 36 |     @summary: Plots an Ion Chromatogram or List of same
 37 |     
 38 |     @param ic: The ion chromatogram
 39 |     @type ic: pyms.GCMS.Class.IonChromatogram
 40 |     
 41 |     @param line_label: plot legend
 42 |     @type line_label: stringType
 43 |     
 44 |     @param plot_title: A label for the plot
 45 |     @type plot_title: String Type
 46 |     
 47 |     @author: Sean O'Callaghan
 48 |     """
 49 |     			
 50 |     #Plotting Variables
 51 |     fig = plt.figure()
 52 |     ax = fig.add_subplot(111)
 53 |     
 54 |    
 55 |     if not isinstance(ic, IonChromatogram):
 56 | 	error("ics argument must be an IonChromatogram\
 57 |             or a list of Ion Chromatograms")
 58 | 
 59 |     time_list = ic.get_time_list()
 60 | 			
 61 | 	
 62 |     
 63 |     intensity_list = ic.get_intensity_array()
 64 |     	
 65 |     ic_plot = plt.plot(time_list, intensity_list, label=line_label)
 66 |         
 67 |     t = ax.set_title(plot_title)
 68 |     l = ax.legend()
 69 | 			
 70 |     fig.canvas.draw
 71 |     plt.show()
 72 |     
 73 |     
 74 |     
 75 | def plot_ms(mass_spec, plot_title=" "):
 76 |         
 77 |     """ 
 78 |     @summary: Plots the mass spec given a list of masses and intensities
 79 |         
 80 |     @param mass_spec: The mass spectrum at a given time/index
 81 |     @type mass_spec: GCMS.Class.MassSpectrum
 82 |         
 83 |     @param plot_title: A label for the plot
 84 |     @type plot_title: String Type
 85 |     
 86 |     @author: Sean O'Callaghan
 87 |     """
 88 |         
 89 |     fig = plt.figure()
 90 |     ax = fig.add_subplot(111)
 91 |         
 92 |     mass_list = mass_spec.mass_list
 93 |     intensity_list = mass_spec.mass_spec
 94 |         
 95 |     # to set x axis range find minimum and maximum m/z channels
 96 |     max_mz = mass_list[0]
 97 |     min_mz = mass_list[0]
 98 |         
 99 |     for i in range(len(mass_list)):
100 |         if mass_list[i] > max_mz:
101 |             max_mz = mass_list[i]
102 |                 
103 |     for i in range(len(mass_list)):
104 |         if mass_list[i] < min_mz:
105 |             min_mz = mass_list[i]
106 |         
107 |     mass_spec_plot = plt.bar(mass_list, intensity_list,\
108 |         width=0.01)
109 |         
110 |     x_axis_range = plt.xlim(min_mz, max_mz)
111 |     t = ax.set_title(plot_title)
112 |     plt.show()
113 | 


--------------------------------------------------------------------------------
/pyms/Display/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Functions for Displaying ICs and TIC
 3 | """
 4 | 
 5 |  #############################################################################
 6 |  #                                                                           #
 7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
 8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                    #
 9 |  #                                                                           #
10 |  #    This program is free software; you can redistribute it and/or modify   #
11 |  #    it under the terms of the GNU General Public License version 2 as      #
12 |  #    published by the Free Software Foundation.                             #
13 |  #                                                                           #
14 |  #    This program is distributed in the hope that it will be useful,        #
15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
17 |  #    GNU General Public License for more details.                           #
18 |  #                                                                           #
19 |  #    You should have received a copy of the GNU General Public License      #
20 |  #    along with this program; if not, write to the Free Software            #
21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
22 |  #                                                                           #
23 |  #############################################################################
24 | 
25 | 


--------------------------------------------------------------------------------
/pyms/Experiment/Class.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Models a GC-MS experiment represented by a list of signal peaks
 3 | """
 4 | 
 5 |  #############################################################################
 6 |  #                                                                           #
 7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
 8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
 9 |  #                                                                           #
10 |  #    This program is free software; you can redistribute it and/or modify   #
11 |  #    it under the terms of the GNU General Public License version 2 as      #
12 |  #    published by the Free Software Foundation.                             #
13 |  #                                                                           #
14 |  #    This program is distributed in the hope that it will be useful,        #
15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
17 |  #    GNU General Public License for more details.                           #
18 |  #                                                                           #
19 |  #    You should have received a copy of the GNU General Public License      #
20 |  #    along with this program; if not, write to the Free Software            #
21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
22 |  #                                                                           #
23 |  #############################################################################
24 | 
25 | from pyms.Utils.Error import error
26 | from pyms.Utils.Utils import is_str
27 | from pyms.Peak.Class import Peak
28 | from pyms.Peak.List.Utils import is_peak_list, sele_peaks_by_rt
29 | 
30 | class Experiment:
31 | 
32 |     """
33 |     @summary: Models an experiment object
34 | 
35 |     @author: Vladimir Likic
36 |     @author: Andrew Isaac
37 |     """
38 | 
39 |     def __init__(self, expr_code, peak_list):
40 | 
41 |         """
42 |         @summary: Models an experiment
43 | 
44 |         @param expr_code: Unique identifier for the experiment
45 |         @type expr_code: StringType
46 |         @param peak_list: A list of peak objects
47 |         @type peak_list: ListType
48 |         """
49 | 
50 |         if not is_str(expr_code):
51 |             error("'expr_code' must be a string")
52 |         if not is_peak_list(peak_list):
53 |             error("'peak_list' must be a list of Peak objects")
54 | 
55 |         self.__expr_code = expr_code
56 |         self.__peak_list = peak_list
57 | 
58 |     def get_expr_code(self):
59 | 
60 |         """
61 |         @summary: Returns the expr_code of the experiment
62 | 
63 |         @return: The expr_code of the experiment
64 |         @rtype:  StringType
65 |         """
66 | 
67 |         return self.__expr_code
68 | 
69 |     def get_peak_list(self):
70 | 
71 |         """
72 |         @summary: Returns the peak list
73 | 
74 |         @return: A list of peak objects
75 |         @rtype: ListType
76 |         """
77 | 
78 |         return self.__peak_list
79 | 
80 |     def sele_rt_range(self, rt_range):
81 | 
82 |         """
83 |         @summary: Discards all peaks which have the retention time outside
84 |         the specified range
85 | 
86 |         @param rt_range: Min, max retention time given as a list [rt_min,rt_max]
87 |         @type rt_range: ListType
88 | 
89 |         @return: none
90 |         @rtype: NoneType
91 |         """
92 | 
93 |         peaks_sele = sele_peaks_by_rt(self.__peak_list, rt_range)
94 |         self.__peak_list = peaks_sele
95 | 


--------------------------------------------------------------------------------
/pyms/Experiment/IO.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Functions related to experiment input/output
  3 | """
  4 | 
  5 |  #############################################################################
  6 |  #                                                                           #
  7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
  8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
  9 |  #                                                                           #
 10 |  #    This program is free software; you can redistribute it and/or modify   #
 11 |  #    it under the terms of the GNU General Public License version 2 as      #
 12 |  #    published by the Free Software Foundation.                             #
 13 |  #                                                                           #
 14 |  #    This program is distributed in the hope that it will be useful,        #
 15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
 16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
 17 |  #    GNU General Public License for more details.                           #
 18 |  #                                                                           #
 19 |  #    You should have received a copy of the GNU General Public License      #
 20 |  #    along with this program; if not, write to the Free Software            #
 21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
 22 |  #                                                                           #
 23 |  #############################################################################
 24 | 
 25 | import string, cPickle
 26 | 
 27 | from pyms.Utils.Error import error
 28 | from pyms.Experiment.Class import Experiment
 29 | from pyms.Utils.Utils import is_str
 30 | from pyms.Utils.IO import file_lines
 31 | from pyms.GCMS.Class import IntensityMatrix
 32 | 
 33 | def load_expr(file_name):
 34 | 
 35 |     """
 36 |     @summary: Loads an experiment saved with 'store_expr'
 37 | 
 38 |     @param file_name: Experiment file name
 39 |     @type file_name: StringType
 40 | 
 41 |     @return: The experiment intensity matrix and peak list
 42 |     @rtype: pyms.Experiment.Class.Experiment
 43 | 
 44 |     @author: Vladimir Likic
 45 |     @author: Andrew Isaac
 46 |     """
 47 | 
 48 |     if not is_str(file_name):
 49 |         error("'file_name' not a string")
 50 | 
 51 |     fp = open(file_name,'rb')
 52 |     expr = cPickle.load(fp)
 53 |     fp.close()
 54 | 
 55 |     if not isinstance(expr, Experiment):
 56 |         error("'file_name' is not an Experiment object")
 57 | 
 58 |     return expr
 59 | 
 60 | def store_expr(file_name, expr):
 61 | 
 62 |     """
 63 |     @summary: stores an expriment to a file
 64 | 
 65 |     @param file_name: The name of the file
 66 |     @type file_name: StringType
 67 |     @param expr: An experiment object
 68 |     @type expr: pyms.Experiment.Class.Experiment
 69 | 
 70 |     @return: none
 71 |     @rtype: NoneType
 72 | 
 73 |     @author: Vladimir Likic
 74 |     @author: Andrew Isaac
 75 |     """
 76 | 
 77 |     if not isinstance(expr, Experiment):
 78 |         error("argument not an instance of the class 'Experiment'")
 79 | 
 80 |     if not is_str(file_name):
 81 |         error("'file_name' not a string")
 82 | 
 83 |     fp = open(file_name,'wb')
 84 |     cPickle.dump(expr, fp, 1)
 85 |     fp.close()
 86 | 
 87 | def read_expr_list(file_name):
 88 | 
 89 |     """
 90 |     @summary: Reads the set of experiment files and returns a list of
 91 |     Experiment objects
 92 | 
 93 |     @param file_name: The name of the file which lists experiment
 94 |         dump file names, one file per line
 95 |     @type file_name: StringType
 96 | 
 97 |     @return: A list of Experiment instances
 98 |     @rtype: ListType
 99 | 
100 |     @author: Vladimir Likic
101 |     """
102 | 
103 |     if not is_str(file_name):
104 |         error("file_name argument must be a string")
105 |     try:
106 |         fp = open(file_name, 'r')
107 |     except IOError:
108 |         error("error opening file '%s' for reading" % file_name)
109 | 
110 |     exprfiles = fp.readlines()
111 |     fp.close()
112 | 
113 |     exprl = []
114 | 
115 |     for exprfile in exprfiles:
116 | 
117 |         exprfile = string.strip(exprfile)
118 |         expr = load_expr(exprfile)
119 | 
120 |         exprl.append(expr)
121 | 
122 |     return exprl
123 | 


--------------------------------------------------------------------------------
/pyms/Experiment/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Functions for modelling experiments
 3 | """
 4 | 
 5 |  #############################################################################
 6 |  #                                                                           #
 7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
 8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
 9 |  #                                                                           #
10 |  #    This program is free software; you can redistribute it and/or modify   #
11 |  #    it under the terms of the GNU General Public License version 2 as      #
12 |  #    published by the Free Software Foundation.                             #
13 |  #                                                                           #
14 |  #    This program is distributed in the hope that it will be useful,        #
15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
17 |  #    GNU General Public License for more details.                           #
18 |  #                                                                           #
19 |  #    You should have received a copy of the GNU General Public License      #
20 |  #    along with this program; if not, write to the Free Software            #
21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
22 |  #                                                                           #
23 |  #############################################################################
24 | 
25 | 


--------------------------------------------------------------------------------
/pyms/GCMS/IO/ANDI/Function.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Functions for reading manufacturer specific ANDI-MS data files
  3 | """
  4 | 
  5 |  #############################################################################
  6 |  #                                                                           #
  7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
  8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
  9 |  #                                                                           #
 10 |  #    This program is free software; you can redistribute it and/or modify   #
 11 |  #    it under the terms of the GNU General Public License version 2 as      #
 12 |  #    published by the Free Software Foundation.                             #
 13 |  #                                                                           #
 14 |  #    This program is distributed in the hope that it will be useful,        #
 15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
 16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
 17 |  #    GNU General Public License for more details.                           #
 18 |  #                                                                           #
 19 |  #    You should have received a copy of the GNU General Public License      #
 20 |  #    along with this program; if not, write to the Free Software            #
 21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
 22 |  #                                                                           #
 23 |  #############################################################################
 24 | 
 25 | import math, copy
 26 | 
 27 | import numpy
 28 | 
 29 | from pyms.GCMS.Class import GCMS_data
 30 | from pyms.GCMS.Class import Scan
 31 | from pyms.Utils.Error import error, stop
 32 | from pyms.Utils.Utils import is_str, is_int, is_float, is_number, is_list
 33 | from pyms.Utils.Time import time_str_secs
 34 | from pycdf import *
 35 | 
 36 | try:
 37 |     from mpi4py import MPI
 38 | except:
 39 |     pass
 40 | 
 41 | def ANDI_reader(file_name):
 42 | 
 43 |     """
 44 |     @summary: A reader for ANDI-MS NetCDF files, returns
 45 |         a GC-MS data object
 46 | 
 47 |     @param file_name: The name of the ANDI-MS file
 48 |     @type file_name: StringType
 49 | 
 50 |     @author: Qiao Wang
 51 |     @author: Andrew Isaac
 52 |     @author: Vladimir Likic
 53 |     """
 54 | 
 55 |     ## TODO: use 'point_count' and allow for zero len scans
 56 | 
 57 |     # the keys used to retrieve certain data from the NetCDF file
 58 |     __MASS_STRING = "mass_values"
 59 |     __INTENSITY_STRING = "intensity_values"
 60 |     __TIME_STRING = "scan_acquisition_time"
 61 | 
 62 |     if not is_str(file_name):
 63 |         error("'file_name' must be a string")
 64 |     try:
 65 |         file = CDF(file_name)
 66 |     except CDFError:
 67 |         error("Cannot open file '%s'" % file_name)
 68 | 
 69 |     try:# avoid printing from each rank
 70 |         comm = MPI.COMM_WORLD
 71 |         rank = comm.Get_rank()
 72 |         size = comm.Get_size()
 73 |     
 74 |         if rank ==0:
 75 |             file_names = []
 76 |         
 77 |             for i in range(1,size):
 78 |                 recv_buffer = ""
 79 |                 file_n = comm.recv(recv_buffer, i)
 80 |                 file_names.append(file_n)
 81 | 
 82 |             print " -> Reading netCDF files:"
 83 |             print file_name
 84 |             for file_n in file_names:
 85 |                 print file_n
 86 |         else:
 87 |             comm.send(file_name, dest=0)
 88 |     except:
 89 |         print " -> Reading netCDF file '%s'" % (file_name)
 90 | 
 91 | 
 92 | 
 93 | 
 94 | 
 95 |     scan_list = []
 96 |     mass = file.var(__MASS_STRING)
 97 |     intensity = file.var(__INTENSITY_STRING)
 98 |     mass_values = mass.get().tolist()
 99 |     mass_list = []
100 |     mass_previous = mass_values[0]
101 |     mass_list.append(mass_previous)
102 |     intensity_values = intensity.get().tolist()
103 |     intensity_list = []
104 |     intensity_previous = intensity_values[0]
105 |     intensity_list.append(intensity_previous)
106 |     if not len(mass_values) == len(intensity_values):
107 |         error("length of mass_list is not equal to length of intensity_list !")
108 |     for i in range(len(mass_values) - 1):
109 |         # assume masses in ascending order until new scan
110 |         if mass_previous <= mass_values[i + 1]:
111 |             #print mass_values[i+1]
112 |             mass_list.append(mass_values[i + 1])
113 |             mass_previous = mass_values[i + 1]
114 |             intensity_list.append(intensity_values[i + 1])
115 |             intensity_previous = intensity_values[i + 1]
116 |         # new scan
117 |         else:
118 |             scan_list.append(Scan(mass_list, intensity_list))
119 |             #print "Added scan"
120 |             mass_previous = mass_values[i + 1]
121 |             intensity_previous = intensity_values[i + 1]
122 |             mass_list = []
123 |             intensity_list = []
124 |             mass_list.append(mass_previous)
125 |             intensity_list.append(intensity_previous)
126 |     # store final scan
127 |     scan_list.append(Scan(mass_list, intensity_list))
128 |     time = file.var(__TIME_STRING)
129 |     time_list = time.get().tolist()
130 | 
131 |     # sanity check
132 |     if not len(time_list) == len(scan_list):
133 |         error("number of time points (%d) does not equal the number of scans (%d)"%(len(time_list), len(scan_list)))
134 | 
135 |     data = GCMS_data(time_list, scan_list)
136 | 
137 |     return data
138 | 
139 | def ANDI_writer(file_name, im):
140 | 
141 |     """
142 |     @summary: A reader for ANDI-MS NetCDF files, returns
143 |         a GC-MS data object
144 | 
145 |     @param file_name: The name of the ANDI-MS file
146 |     @type file_name: StringType
147 |     @param im: The IntensityMatrix
148 |     @type file_name: pyms.GCMS.Class.IntensityMatrix
149 | 
150 |     @author: Andrew Isaac
151 |     """
152 | 
153 |     # netCDF header info for compatability
154 |     # attributes
155 |   #dataset_completeness   0 CHAR     6 C1+C2
156 |   #dataset_origin         4 CHAR    16 Santa Clara, CA
157 |   #experiment_date_time_stamp   7 CHAR    20 20081218044500+1100
158 |   #experiment_title       6 CHAR     7 mix ma
159 |   #experiment_type       10 CHAR    25 Centroided Mass Spectrum
160 |   #external_file_ref_0    9 CHAR     8 MA_5C.M
161 |   #languages              3 CHAR     8 English
162 |   #ms_template_revision   1 CHAR     6 1.0.1
163 |   #netcdf_file_date_time_stamp   5 CHAR    20 20090114001531+1100
164 |   #netcdf_revision        2 CHAR     6 2.3.2
165 |   #number_of_times_calibrated  12 INT      1 0
166 |   #number_of_times_processed  11 INT      1 1
167 |   #operator_name          8 CHAR    12 Dave and Su
168 |   #raw_data_intensity_format  25 CHAR     6 Float
169 |   #raw_data_mass_format  23 CHAR     6 Float
170 |   #raw_data_time_format  24 CHAR     6 Short
171 |   #sample_state          13 CHAR    12 Other State
172 |   #test_detector_type    18 CHAR    20 Electron Multiplier
173 |   #test_ionization_mode  16 CHAR    16 Electron Impact
174 |   #test_ionization_polarity  17 CHAR    18 Positive Polarity
175 |   #test_ms_inlet         15 CHAR    17 Capillary Direct
176 |   #test_resolution_type  19 CHAR    20 Constant Resolution
177 |   #test_scan_direction   21 CHAR     3 Up
178 |   #test_scan_function    20 CHAR    10 Mass Scan
179 |   #test_scan_law         22 CHAR     7 Linear
180 |   #test_separation_type  14 CHAR    18 No Chromatography
181 | 
182 |     # dimensions
183 |   #_128_byte_string       6    128
184 |   #_16_byte_string        3     16
185 |   #_255_byte_string       7    255
186 |   #_2_byte_string         0      2
187 |   #_32_byte_string        4     32
188 |   #_4_byte_string         1      4
189 |   #_64_byte_string        5     64
190 |   #_8_byte_string         2      8
191 |   #error_number          10      1
192 |   #instrument_number     12      1
193 |   #point_number           9 554826   X
194 |   #range                  8      2
195 |   #scan_number           11   9865
196 | 
197 |     # variables
198 |   #a_d_coaddition_factor   2 SHORT      0 scan_number(9865)
199 |   #a_d_sampling_rate      1 DOUBLE     0 scan_number(9865)
200 |   #actual_scan_number     7 INT        0 scan_number(9865)
201 |   #error_log              0 CHAR       0 error_number(1), _64_byte_string(64)
202 |   #flag_count            15 INT        0 scan_number(9865)
203 |   #instrument_app_version  27 CHAR       0 instrument_number(1),
204 | #_32_byte_string(32)
205 |   #instrument_comments   28 CHAR       0 instrument_number(1),
206 | #_32_byte_string(32)
207 |   #instrument_fw_version  25 CHAR       0 instrument_number(1),
208 | #_32_byte_string(32)
209 |   #instrument_id         20 CHAR       0 instrument_number(1),
210 | #_32_byte_string(32)
211 |   #instrument_mfr        21 CHAR       0 instrument_number(1),
212 | #_32_byte_string(32)
213 |   #instrument_model      22 CHAR       0 instrument_number(1),
214 | #_32_byte_string(32)
215 |   #instrument_name       19 CHAR       0 instrument_number(1),
216 | #_32_byte_string(32)
217 |   #instrument_os_version  26 CHAR       0 instrument_number(1),
218 | #_32_byte_string(32)
219 |   #instrument_serial_no  23 CHAR       0 instrument_number(1),
220 | #_32_byte_string(32)
221 |   #instrument_sw_version  24 CHAR       0 instrument_number(1),
222 | #_32_byte_string(32)
223 |   #intensity_values      18 FLOAT      3 point_number(554826)
224 |   #inter_scan_time        5 DOUBLE     0 scan_number(9865)
225 |   #mass_range_max        10 DOUBLE     0 scan_number(9865)
226 |   #mass_range_min         9 DOUBLE     0 scan_number(9865)
227 |   #mass_values           16 FLOAT      2 point_number(554826)
228 |   #point_count           14 INT        0 scan_number(9865)
229 |   #resolution             6 DOUBLE     0 scan_number(9865)
230 |   #scan_acquisition_time   3 DOUBLE     0 scan_number(9865)
231 |   #scan_duration          4 DOUBLE     0 scan_number(9865)
232 |   #scan_index            13 INT        0 scan_number(9865)
233 |   #time_range_max        12 DOUBLE     0 scan_number(9865)
234 |   #time_range_min        11 DOUBLE     0 scan_number(9865)
235 |   #time_values           17 FLOAT      2 point_number(554826)
236 |   #total_intensity        8 DOUBLE     1 scan_number(9865)
237 | 
238 |     # variable information
239 | #intensity_values attributes
240 | 
241 |   #name                 idx type   len value
242 |   #-------------------- --- ----   --- -----
243 |   #add_offset             1 DOUBLE   1 0.0
244 |   #scale_factor           2 DOUBLE   1 1.0
245 |   #units                  0 CHAR    26 Arbitrary Intensity Units
246 | 
247 | #mass_values attributes
248 | 
249 |   #name                 idx type   len value
250 |   #-------------------- --- ----   --- -----
251 |   #scale_factor           1 DOUBLE   1 1.0
252 |   #units                  0 CHAR     4 M/Z
253 | 
254 | #time_values attributes
255 | 
256 |   #name                 idx type   len value
257 |   #-------------------- --- ----   --- -----
258 |   #scale_factor           1 DOUBLE   1 1.0
259 |   #units                  0 CHAR     8 Seconds
260 | 
261 | #total_intensity attributes
262 | 
263 |   #name                 idx type   len value
264 |   #-------------------- --- ----   --- -----
265 |   #units                  0 CHAR    26 Arbitrary Intensity Units
266 | 
267 |     # netCDF dimension names
268 |     __POINT_NUMBER = "point_number"
269 |     __SCAN_NUMBER = "scan_number"
270 | 
271 |     # the keys used to create certain data from the NetCDF file
272 |     __MASS_STRING = "mass_values"
273 |     __INTENSITY_STRING = "intensity_values"
274 |     __TIME_STRING = "scan_acquisition_time"
275 |     __POINT_COUNT = "point_count"
276 | 
277 |     if not is_str(file_name):
278 |         error("'file_name' must be a string")
279 |     try:
280 |         # Open netCDF file in overwrite mode, creating it if inexistent.
281 |         nc = CDF(file_name, NC.WRITE|NC.TRUNC|NC.CREATE)
282 |         # Automatically set define and data modes.
283 |         nc.automode()
284 |     except CDFError:
285 |         error("Cannot create file '%s'" % file_name)
286 | 
287 |     mass_list = im.get_mass_list()
288 |     time_list = im.get_time_list()
289 | 
290 |     # direct access, don't modify
291 |     intensity_matrix = im.intensity_matrix
292 | 
293 |     # compress by ignoring zero intensities
294 |     # included for consistency with imported netCDF format
295 |     mass_values = []
296 |     intensity_values = []
297 |     point_count_values = []
298 |     for row in xrange(len(intensity_matrix)):
299 |         pc = 0  # point count
300 |         for col in xrange(len(intensity_matrix[0])):  # all rows same len
301 |             if (intensity_matrix[row][col] > 0):
302 |                 mass_values.append(mass_list[col])
303 |                 intensity_values.append(intensity_matrix[row][col])
304 |                 pc += 1
305 |         point_count_values.append(pc)
306 | 
307 |     # sanity checks
308 |     if not len(time_list) == len(point_count_values):
309 |         error("number of time points does not equal the number of scans")
310 | 
311 |     # create dimensions
312 |     # total number of data points
313 |     dim_point_number = nc.def_dim(__POINT_NUMBER, len(mass_values))
314 |     # number of scans
315 |     dim_scan_number = nc.def_dim(__SCAN_NUMBER, len(point_count_values))
316 | 
317 |     # create variables
318 |     # points
319 |     var_mass_values = nc.def_var(__MASS_STRING, NC.FLOAT, dim_point_number)
320 |     var_intensity_values = nc.def_var(__INTENSITY_STRING, NC.FLOAT,
321 |         dim_point_number)
322 |     # scans
323 |     var_time_list = nc.def_var(__TIME_STRING, NC.DOUBLE, dim_scan_number)
324 |     var_point_count_values = nc.def_var(__POINT_COUNT, NC.INT,
325 |         dim_scan_number)
326 | 
327 |     # populate variables
328 |     # points
329 |     var_mass_values[:] = mass_values
330 |     var_intensity_values[:] = intensity_values
331 |     # scans
332 |     var_time_list[:] = time_list
333 |     var_point_count_values[:] = point_count_values
334 | 
335 |     # close file
336 |     nc.close()
337 | 
338 | 


--------------------------------------------------------------------------------
/pyms/GCMS/IO/ANDI/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Functions for the manipulation of spectral libraries
 3 | """
 4 | 
 5 |  #############################################################################
 6 |  #                                                                           #
 7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
 8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
 9 |  #                                                                           #
10 |  #    This program is free software; you can redistribute it and/or modify   #
11 |  #    it under the terms of the GNU General Public License version 2 as      #
12 |  #    published by the Free Software Foundation.                             #
13 |  #                                                                           #
14 |  #    This program is distributed in the hope that it will be useful,        #
15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
17 |  #    GNU General Public License for more details.                           #
18 |  #                                                                           #
19 |  #    You should have received a copy of the GNU General Public License      #
20 |  #    along with this program; if not, write to the Free Software            #
21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
22 |  #                                                                           #
23 |  #############################################################################
24 | 
25 | 


--------------------------------------------------------------------------------
/pyms/GCMS/IO/JCAMP/Function.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Functions for I/O of data in JCAMP-DX format
  3 | """
  4 | 
  5 |  #############################################################################
  6 |  #                                                                           #
  7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
  8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
  9 |  #                                                                           #
 10 |  #    This program is free software; you can redistribute it and/or modify   #
 11 |  #    it under the terms of the GNU General Public License version 2 as      #
 12 |  #    published by the Free Software Foundation.                             #
 13 |  #                                                                           #
 14 |  #    This program is distributed in the hope that it will be useful,        #
 15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
 16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
 17 |  #    GNU General Public License for more details.                           #
 18 |  #                                                                           #
 19 |  #    You should have received a copy of the GNU General Public License      #
 20 |  #    along with this program; if not, write to the Free Software            #
 21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
 22 |  #                                                                           #
 23 |  #############################################################################
 24 | 
 25 | from pyms.GCMS.Class import GCMS_data
 26 | from pyms.GCMS.Class import Scan
 27 | from pyms.Utils.IO import file_lines
 28 | from pyms.Utils.Utils import is_str
 29 | from pyms.Utils.Error import error
 30 | 
 31 | import numpy
 32 | 
 33 | def JCAMP_reader(file_name):
 34 | 
 35 |     """
 36 |     @summary: Generic reader for JCAMP DX files, produces GC-MS data
 37 |        object
 38 | 
 39 |     @author: Qiao Wang
 40 |     @author: Andrew Isaac
 41 |     @author: Vladimir Likic
 42 |     """
 43 | 
 44 |     if not is_str(file_name):
 45 |         error("'file_name' not a string")
 46 | 
 47 |     print " -> Reading JCAMP file '%s'" % (file_name)
 48 |     lines_list = open(file_name,'r')
 49 |     data = []
 50 |     page_idx = 0
 51 |     xydata_idx = 0
 52 |     time_list = []
 53 |     scan_list = []
 54 | 
 55 |     for line in lines_list:
 56 |         if not len(line.strip()) == 0:
 57 |             prefix = line.find('#')
 58 |             # key word or information
 59 |             if prefix == 0:
 60 |                 fields = line.split('=')
 61 |                 if fields[0].find("##PAGE") >= 0:
 62 |                     time = float(fields[2].strip()) #rt for the scan to be submitted
 63 |                     time_list.append(time)
 64 |                     page_idx = page_idx + 1
 65 |                 elif fields[0].find("##DATA TABLE") >= 0:
 66 |                     xydata_idx = xydata_idx + 1
 67 |             # data
 68 |             elif prefix == -1:
 69 |                 if page_idx > 1 or xydata_idx > 1:
 70 |                     if len(data) % 2 == 1:
 71 |                         error("data not in pair !")
 72 |                     mass = []
 73 |                     intensity = []
 74 |                     for i in range(len(data) / 2):
 75 |                         mass.append(data[i * 2])
 76 |                         intensity.append(data[i * 2 + 1])
 77 |                     if not len(mass) == len(intensity):
 78 |                         error("len(mass) is not equal to len(intensity)")
 79 |                     scan_list.append(Scan(mass, intensity))
 80 |                     data = []
 81 |                     data_sub = line.strip().split(',')
 82 |                     for item in data_sub:
 83 |                         if not len(item.strip()) == 0:
 84 |                             data.append(float(item.strip()))
 85 |                     if page_idx > 1:
 86 |                         page_idx = 1
 87 |                     if xydata_idx > 1:
 88 |                         xydata_idx = 1
 89 |                 else:
 90 |                     data_sub = line.strip().split(',')
 91 |                     for item in data_sub:
 92 |                         if not len(item.strip()) == 0:
 93 |                             data.append(float(item.strip()))
 94 | 
 95 |     if len(data) % 2 == 1:
 96 |         error("data not in pair !")
 97 |     # get last scan
 98 |     mass = []
 99 |     intensity = []
100 |     for i in range(len(data) / 2):
101 |         mass.append(data[i * 2])
102 |         intensity.append(data[i * 2 + 1])
103 | 
104 |     if not len(mass) == len(intensity):
105 |         error("len(mass) is not equal to len(intensity)")
106 |     scan_list.append(Scan(mass, intensity))
107 | 
108 |     # sanity check
109 |     if not len(time_list) == len(scan_list):
110 |         error("number of time points does not equal the number of scans")
111 | 
112 |     data = GCMS_data(time_list, scan_list)
113 | 
114 |     return data
115 | 


--------------------------------------------------------------------------------
/pyms/GCMS/IO/JCAMP/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Functions for I/O of JCAMP DX files
 3 | """
 4 | 
 5 |  #############################################################################
 6 |  #                                                                           #
 7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
 8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
 9 |  #                                                                           #
10 |  #    This program is free software; you can redistribute it and/or modify   #
11 |  #    it under the terms of the GNU General Public License version 2 as      #
12 |  #    published by the Free Software Foundation.                             #
13 |  #                                                                           #
14 |  #    This program is distributed in the hope that it will be useful,        #
15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
17 |  #    GNU General Public License for more details.                           #
18 |  #                                                                           #
19 |  #    You should have received a copy of the GNU General Public License      #
20 |  #    along with this program; if not, write to the Free Software            #
21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
22 |  #                                                                           #
23 |  #############################################################################
24 | 
25 | 


--------------------------------------------------------------------------------
/pyms/GCMS/IO/MZML/Function.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Functions for reading manufacturer specific ANDI-MS data files
  3 | """
  4 | 
  5 |  #############################################################################
  6 |  #                                                                           #
  7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
  8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
  9 |  #                                                                           #
 10 |  #    This program is free software; you can redistribute it and/or modify   #
 11 |  #    it under the terms of the GNU General Public License version 2 as      #
 12 |  #    published by the Free Software Foundation.                             #
 13 |  #                                                                           #
 14 |  #    This program is distributed in the hope that it will be useful,        #
 15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
 16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
 17 |  #    GNU General Public License for more details.                           #
 18 |  #                                                                           #
 19 |  #    You should have received a copy of the GNU General Public License      #
 20 |  #    along with this program; if not, write to the Free Software            #
 21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
 22 |  #                                                                           #
 23 |  #############################################################################
 24 | 
 25 | import pymzml
 26 | 
 27 | import numpy
 28 | 
 29 | from pyms.GCMS.Class import GCMS_data
 30 | from pyms.GCMS.Class import Scan
 31 | from pyms.Utils.Utils import is_str
 32 | from pyms.Utils.Error import error, stop
 33 | 
 34 | try:
 35 |     from mpi4py import MPI
 36 | except:
 37 |     pass
 38 | 
 39 | def mzML_reader(file_name):
 40 | 
 41 |     """
 42 |     @summary: A reader for mzML files, returns
 43 |         a GC-MS data object
 44 | 
 45 |     @param file_name: The name of the mzML file
 46 |     @type file_name: StringType
 47 | 
 48 |     @author: Sean O'Callaghan
 49 |     """
 50 | 
 51 |     if not is_str(file_name):
 52 |         error("'file_name' must be a string")
 53 |     try:
 54 |         mzml_file = pymzml.run.Reader(file_name)
 55 |     except:
 56 |         error("Cannot open file '%s'" % file_name)
 57 | 
 58 |     try:# avoid printing from each rank
 59 |         comm = MPI.COMM_WORLD
 60 |         rank = comm.Get_rank()
 61 |         size = comm.Get_size()
 62 |     
 63 |         if rank ==0:
 64 |             file_names = []
 65 |         
 66 |             for i in range(1,size):
 67 |                 recv_buffer = ""
 68 |                 file_n = comm.recv(recv_buffer, i)
 69 |                 file_names.append(file_n)
 70 | 
 71 |             print " -> Reading mzML files:"
 72 |             print file_name
 73 |             for file_n in file_names:
 74 |                 print file_n
 75 |         else:
 76 |             comm.send(file_name, dest=0)
 77 |     except:
 78 |         print " -> Reading mzML file '%s'" % (file_name)
 79 | 
 80 |     scan_list = []
 81 |     time_list = []
 82 | 
 83 |     for spectrum in mzml_file:
 84 |         mass_list = []
 85 |         intensity_list = []
 86 | 
 87 |         for mz,i in spectrum.peaks:
 88 |             mass_list.append(mz)
 89 |             intensity_list.append(i)
 90 | 
 91 |         #scan_list.append(Scan(mass_list, intensity_list))
 92 |         for element in spectrum.xmlTree:
 93 |             # For some reason there are spectra with no time value, 
 94 |             # Ignore these????????????
 95 |             if element.get('accession') == "MS:1000016": #time value
 96 |                 # We need time in seconds not minutes
 97 |                 time_list.append(60*float(element.get('value')))
 98 |                 scan_list.append(Scan(mass_list, intensity_list))
 99 | 
100 |     #print "time:", len(time_list)
101 |     #print "scan:", len(scan_list)
102 | 
103 |     data = GCMS_data(time_list, scan_list)
104 | 
105 |     return data
106 | 
107 | 


--------------------------------------------------------------------------------
/pyms/GCMS/IO/MZML/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Functions for the manipulation of spectral libraries
 3 | """
 4 | 
 5 |  #############################################################################
 6 |  #                                                                           #
 7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
 8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
 9 |  #                                                                           #
10 |  #    This program is free software; you can redistribute it and/or modify   #
11 |  #    it under the terms of the GNU General Public License version 2 as      #
12 |  #    published by the Free Software Foundation.                             #
13 |  #                                                                           #
14 |  #    This program is distributed in the hope that it will be useful,        #
15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
17 |  #    GNU General Public License for more details.                           #
18 |  #                                                                           #
19 |  #    You should have received a copy of the GNU General Public License      #
20 |  #    along with this program; if not, write to the Free Software            #
21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
22 |  #                                                                           #
23 |  #############################################################################
24 | 
25 | 


--------------------------------------------------------------------------------
/pyms/GCMS/IO/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Input/output functions for GC-MS data files
 3 | """
 4 | 
 5 |  #############################################################################
 6 |  #                                                                           #
 7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
 8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
 9 |  #                                                                           #
10 |  #    This program is free software; you can redistribute it and/or modify   #
11 |  #    it under the terms of the GNU General Public License version 2 as      #
12 |  #    published by the Free Software Foundation.                             #
13 |  #                                                                           #
14 |  #    This program is distributed in the hope that it will be useful,        #
15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
17 |  #    GNU General Public License for more details.                           #
18 |  #                                                                           #
19 |  #    You should have received a copy of the GNU General Public License      #
20 |  #    along with this program; if not, write to the Free Software            #
21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
22 |  #                                                                           #
23 |  #############################################################################
24 | 
25 | 


--------------------------------------------------------------------------------
/pyms/GCMS/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Sub-package to handle raw data
 3 | """
 4 | 
 5 |  #############################################################################
 6 |  #                                                                           #
 7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
 8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
 9 |  #                                                                           #
10 |  #    This program is free software; you can redistribute it and/or modify   #
11 |  #    it under the terms of the GNU General Public License version 2 as      #
12 |  #    published by the Free Software Foundation.                             #
13 |  #                                                                           #
14 |  #    This program is distributed in the hope that it will be useful,        #
15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
17 |  #    GNU General Public License for more details.                           #
18 |  #                                                                           #
19 |  #    You should have received a copy of the GNU General Public License      #
20 |  #    along with this program; if not, write to the Free Software            #
21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
22 |  #                                                                           #
23 |  #############################################################################
24 | 
25 | 


--------------------------------------------------------------------------------
/pyms/Gapfill/Class.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Provides a class for handling Missing Peaks in an output file (i.e. area.csv)
  3 | '''
  4 | import string
  5 | 
  6 | class MissingPeak(object):
  7 |     '''
  8 |     @summary:    Class to encapsulate a peak object identified as missing in 
  9 |         the output area matrix fom PyMS.
 10 |     
 11 |     @author: Jairus Bowne
 12 |     @author: Sean O'Callaghan
 13 |     '''
 14 |     
 15 |     def __init__(self, ci, qual_ion_1, qual_ion_2, rt=0.0):
 16 |         '''
 17 |         @summary:
 18 | 
 19 |         
 20 |         @param ci:    Common ion for the peak across samples in an experiment
 21 |         @type ci:    IntType
 22 |         
 23 |         @param UID:    Unique IDentifier for peak, listing top ions, ratio and
 24 |             retention time for the peak across an experiment (row in area.csv)
 25 |         @type UID:    StringType
 26 |         
 27 |         # TODO: Determine which of these will be used.
 28 |         @param SAMPLE_NAME:    The 
 29 |                                    sample name (filename) 
 30 |                                    experiment code (group-number-filename) 
 31 |                                                                            of
 32 |             the originating data file (column in area.csv)
 33 |         @type SAMPLE_NAME:    StringType
 34 |         
 35 |         @param rt:    Retention time of the peak. May or may not be set
 36 |         @type rt:    FloatType
 37 |         '''
 38 |         
 39 |         self.__ci = ci
 40 |         self.__qual_1 = qual_ion_1
 41 |         self.__qual_2 = qual_ion_2
 42 |         self.__rt = rt
 43 |         self.__exact_rt = 'na'
 44 |         self.__ci_area = 'na'
 45 |         
 46 |     
 47 |     def get_ci(self):
 48 |         '''
 49 |         @summary:    Returns the common ion for the peak object across an 
 50 |             experiment
 51 |         
 52 |         @return:    Common ion for the peak
 53 |         @rtype:    IntType
 54 |         
 55 |         @author:    Jairus Bowne
 56 |         '''
 57 |         
 58 |         return self.__ci
 59 |     
 60 |     def get_qual_ion1(self):
 61 |         '''
 62 |         @summary:    Returns the top (most abundant) ion for the peak object
 63 |         
 64 |         @return:    Most abundant ion
 65 |         @rtype:    IntType
 66 |         
 67 |         @author:    Jairus Bowne
 68 |         '''
 69 |         
 70 |         '''
 71 |         # TODO: Consider the abundance of ions when some (i.e. 73, 147) have 
 72 |             been im.null_mass()'d. Is there a way to determine whether that
 73 |             has been done to generate the original peak list?
 74 |         '''
 75 |         
 76 |         return self.__qual_1
 77 |         #return int(string.split(self.__UID, '-')[0])
 78 |     
 79 |     def get_qual_ion2(self):
 80 |         '''
 81 |         @summary:    Returns the second most abundant ion for the peak object
 82 |         
 83 |         @return:    Second most abundant ion
 84 |         @rtype:    IntType
 85 |         
 86 |         @author:    Jairus Bowne
 87 |         '''
 88 |         
 89 |         return self.__qual_2
 90 |         #return int(string.split(self.__UID, '-')[1])
 91 | 
 92 |     def set_ci_area(self, ci_area):
 93 |         """
 94 |         @summary: sets the common ion area calculated by the gap fill
 95 |                   algorithm
 96 |         @param ci_area: The area of the common ion
 97 |         @type ci_area: intType
 98 | 
 99 |         """
100 |         self.__ci_area = ci_area
101 | 
102 |     def get_ci_area(self):
103 |         """
104 |         @summary: returns the common ion area
105 | 
106 |         @return ci_area: The area of the common ion
107 |         @rtype: intType
108 |         """
109 |         return self.__ci_area
110 |         
111 | 
112 |     def get_rt(self):
113 |         """
114 |         @summary: returns the retention time of the peak
115 | 
116 |         @return: the retention time of the peak
117 |         @rtype: floatType
118 |         """
119 |         return self.__rt
120 | 
121 |     def set_exact_rt(self, rt):
122 |         """
123 |         @summary: sets the retention time of a peak
124 | 
125 |         @param rt: The retention time of the apex of the peak
126 |         @type rt: floatType
127 |         """
128 |         self.__exact_rt = rt
129 | 
130 |     def get_exact_rt(self):
131 |          """
132 |         @summary: returns the retention time of the peak
133 | 
134 |         @return: the retention time of the peak
135 |         @rtype: floatType
136 |         """
137 |          return self.__exact_rt
138 |     
139 | 
140 | 
141 | 
142 | 
143 | class Sample(object):
144 |     """
145 |     @summary: A collection of MissingPeak objects
146 | 
147 |     @author: Sean O'Callaghan
148 |     """
149 | 
150 |     def __init__(self, sample_name, matrix_position):
151 |         """
152 |         @summary: A collection of MissingPeak objects
153 | 
154 |         @param sample_name: the experiment code/name
155 |         @type sample_name: stringType
156 | 
157 |         @param matrix_position: position along x-axis
158 |                                 where sample is located
159 |         @type matrix_position: intType
160 | 
161 |         """
162 |         self.__sample_name = sample_name
163 |         self.__matrix_position = matrix_position
164 | 
165 |         self.__missing_peak_list = []
166 | 
167 |     def get_name(self):
168 |         """
169 |         @summary: Returns the sample name
170 | 
171 |         @return: The name of the sample
172 |         @rtype: stringType
173 |         """
174 |         return self.__sample_name
175 |     
176 |     def add_missing_peak(self, missing_peak):
177 |         """
178 |         @summary: Add a new MissingPeak object to the Sample
179 | 
180 |         @param missing_peak: The missing peak object to be added
181 |         @type missing_peak: pyms.GapFilling.Class.MissingPeak
182 |         """
183 |         ###
184 |         # Do some checking here!!!
185 |         ###
186 |         self.__missing_peak_list.append(missing_peak)
187 | 
188 |     def get_missing_peaks(self):
189 |         """
190 |         @summary: Returns a list of the MissingPeak objects
191 |                   in the Sample object
192 |         @return: list of pyms.GapFilling.Class.MissingPeak
193 |         @rtype: listType
194 |         """
195 |         return self.__missing_peak_list
196 | 
197 |     def get_mp_rt_area_dict(self):
198 |         """
199 |         @summary: returns a dictionary containing rt:area pairs
200 | 
201 |         @return: a dict containing rt:area pairs
202 |         @rtype: dictType
203 |         """
204 |         rt_area_dict = {}        
205 |         for peak in self.__missing_peak_list:
206 |             rt = peak.get_rt()
207 |             area = peak.get_ci_area()
208 | 
209 |             rt_area_dict[rt] = area
210 |         
211 | 
212 |         return rt_area_dict
213 | 
214 |     def get_mp_rt_exact_rt_dict(self):
215 |         """
216 |         @summary:returns a dictionary containing average_rt:exact_rt pairs
217 | 
218 |         @return: a dict of average_rt:exact_rt pairs
219 |         @rtype: dictType
220 |         """
221 | 
222 |         rt_exact_rt_dict = {}
223 |         for peak in self.__missing_peak_list:
224 |             rt = peak.get_rt()
225 |             exact_rt = peak.get_exact_rt()
226 | 
227 |             rt_exact_rt_dict[rt] = exact_rt
228 | 
229 |         return rt_exact_rt_dict
230 |         
231 |         
232 | 


--------------------------------------------------------------------------------
/pyms/Gapfill/Function.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | @summary:    # Functions to fill missing peak objects
  3 | 
  4 | @author:    Jairus Bowne
  5 | @author:    Sean O'Callaghan
  6 | '''
  7 | 
  8 | import csv
  9 | import string
 10 | import sys, os, errno, string, numpy
 11 | sys.path.append("/x/PyMS")
 12 | 
 13 | from pyms.GCMS.IO.ANDI.Function import ANDI_reader
 14 | from pyms.GCMS.IO.MZML.Function import mzML_reader
 15 | from pyms.GCMS.Function import build_intensity_matrix_i
 16 | from pyms.Noise.SavitzkyGolay import savitzky_golay
 17 | from pyms.Baseline.TopHat import tophat
 18 | from pyms.Deconvolution.BillerBiemann.Function import get_maxima_list_reduced
 19 | from pyms.Peak.Function import ion_area
 20 | 
 21 | from Class import MissingPeak, Sample
 22 | 
 23 | 
 24 | # .csv reader (cloned from gcqc project)
 25 | def file2matrix(filename):
 26 |     '''
 27 |     @summary:    Convert a .csv file to a matrix (list of lists)
 28 |     
 29 |     @param filename:    Filename (.csv) to convert (area.csv, area_ci.csv)
 30 |     @type filename:    StringType
 31 |     
 32 |     @return:    Data matrix
 33 |     @rtype:    ListType (List of lists)
 34 |     '''
 35 |     
 36 |     # open(filename, 'rb')? Or unnecessary?
 37 |     with open(filename) as fp:
 38 |         reader = csv.reader(fp, delimiter=",",quotechar="\"")
 39 |         matrix = []
 40 |         for row in reader:
 41 |             newrow = []
 42 |             for each in row:
 43 |                 try:
 44 |                     each = float(each)
 45 |                 except:
 46 |                     pass
 47 |                 newrow.append(each)
 48 |             matrix.append(newrow)
 49 |     
 50 |     return matrix
 51 | 
 52 | 
 53 | def mp_finder(inputmatrix):
 54 |     """
 55 |     @summary: setup sample objects with missing peak objects
 56 |               Finds the 'NA's in the transformed area_ci.csv file and
 57 |               makes Sample objects with them
 58 | 
 59 |     @param inputmatrix: Data matrix derived from the area_ci.csv file
 60 |     @type inputmatrix: listType
 61 | 
 62 |     @return: list of Sample objects
 63 |     @rtype: list of pyms.MissingPeak.Class.Sample
 64 | 
 65 |     """
 66 | 
 67 |     sample_list = []
 68 | 
 69 |     try:
 70 |         ci_pos = inputmatrix[0].index(' "Quant Ion"')
 71 |     except ValueError:
 72 |         ci_pos = inputmatrix[0].index('"Quant Ion"')
 73 |     
 74 |     uid_pos = inputmatrix[0].index('UID')
 75 |    
 76 | 
 77 |     # Set up the sample objects
 78 |     # All entries on line 1 beyond the Qual Ion position are sample names
 79 |     for i, sample_name in enumerate(inputmatrix[0][ci_pos:]):
 80 | 
 81 |         print sample_name
 82 |         sample = Sample(sample_name, i+3) #add 4 to allow for UID, RT,QualIon
 83 |         sample_list.append(sample)
 84 | 
 85 |     for line in inputmatrix[1:]:
 86 |         uid = line[uid_pos]
 87 |         common_ion = line[ci_pos]
 88 | 
 89 |         qual_ion_1 = uid.split("-")[0]
 90 |         qual_ion_2 = uid.split("-")[1]
 91 |         rt = uid.split("-")[-1]
 92 |         #print rt
 93 |         
 94 |         for i, area in enumerate(line[ci_pos:]):
 95 |             if area == 'NA':
 96 |                 missing_peak = MissingPeak(common_ion, qual_ion_1, \
 97 |                                                qual_ion_2, rt)
 98 |                 sample_list[i].add_missing_peak(missing_peak)
 99 | 
100 |     return sample_list
101 | 
102 | 
103 | def missing_peak_finder(sample, filename, points=13, null_ions=[73, 147],\
104 |                             crop_ions=[50,540], threshold=1000, rt_window=1, filetype='mzml'):
105 |     """
106 |     @summary: Integrates raw data around missing peak locations
107 |               to fill in NAs in the data matrix
108 | 
109 |     @param  sample: The sample object containing missing peaks
110 |     @type sample: pyms.MissingPeak.Class.Sample
111 | 
112 |     @param  andi_file: Name of the raw data file
113 |     @type andi_file: stringType
114 | 
115 |     @param  points: Peak finding - Peak if maxima over 'points' \
116 |                     number of scans (Default 3) 
117 |     @type points: intType
118 | 
119 |     @param  null_ions: Ions to be deleted in the matrix
120 |     @type null_ions: listType
121 | 
122 |     @param crop_ions: Range of Ions to be considered
123 |     @type crop_ions: listType 
124 | 
125 |     @param threshold: Minimum intensity of IonChromatogram allowable to fill\
126 |                       missing peak
127 |     @type threshold: intType
128 | 
129 |     @param  rt_window: Window in seconds around average RT to look for \
130 |                        missing peak
131 |     @type rt_window: floatType
132 | 
133 |     @param filetype: either mzml or netcdf
134 |     @type filetype: stringType
135 | 
136 |     @author: Sean O'Callaghan
137 |     """
138 | 
139 |     ### some error checks on null and crop ions
140 | 
141 |     ### a for root,files,dirs in os.path.walk(): loop
142 |     print "Sample:", sample.get_name(), "File:", filename
143 |     
144 |     if filetype.lower() == 'cdf':
145 |         data = ANDI_reader(filename)
146 |     elif filetype.lower() == 'mzml':
147 |         data = mzML_reader(filename)
148 |     else:
149 |         print "file type not valid"
150 |     
151 | 
152 |     # build integer intensity matrix
153 |     im = build_intensity_matrix_i(data)
154 | 
155 |     for null_ion in null_ions:
156 |         im.null_mass(null_ion)
157 | 
158 |     im.crop_mass(crop_ions[0], crop_ions[1])
159 | 
160 |     # get the size of the intensity matrix
161 |     n_scan, n_mz = im.get_size()
162 | 
163 |     # smooth data
164 |     for ii in range(n_mz):
165 |         ic = im.get_ic_at_index(ii)
166 |         ic1 = savitzky_golay(ic, points)
167 |         ic_smooth = savitzky_golay(ic1, points)
168 |         ic_base = tophat(ic_smooth, struct="1.5m")
169 |         im.set_ic_at_index(ii, ic_base)
170 | 
171 |     for mp in sample.get_missing_peaks():
172 | 
173 |         mp_rt = mp.get_rt()
174 |         common_ion = mp.get_ci()
175 |         qual_ion_1 = float(mp.get_qual_ion1())
176 |         qual_ion_2 = float(mp.get_qual_ion2())
177 |         
178 | 
179 |         ci_ion_chrom = im.get_ic_at_mass(common_ion)
180 |         print "ci = ",common_ion
181 |         qi1_ion_chrom = im.get_ic_at_mass(qual_ion_1)
182 |         print "qi1 = ", qual_ion_1
183 |         qi2_ion_chrom = im.get_ic_at_mass(qual_ion_2)
184 |         print "qi2 = ", qual_ion_2
185 |         ######
186 |         # Integrate the CI around that particular RT
187 |         #######
188 | 
189 |         #Convert time to points
190 |         # How long between scans?
191 |         
192 |         points_1 = ci_ion_chrom.get_index_at_time(float(mp_rt))
193 |         points_2 = ci_ion_chrom.get_index_at_time(float(mp_rt)-rt_window)
194 |         print "rt_window = ", points_1 - points_2
195 | 
196 |         rt_window_points = points_1 - points_2
197 | 
198 |         maxima_list = get_maxima_list_reduced(ci_ion_chrom, mp_rt, \
199 |                                                   rt_window_points)
200 | 
201 |         large_peaks = []
202 | 
203 |         for rt, intens in maxima_list:
204 |             if intens > threshold:
205 |                 q1_index = qi1_ion_chrom.get_index_at_time(rt)
206 |                 q2_index = qi2_ion_chrom.get_index_at_time(rt)
207 | 
208 |                 q1_intensity = qi1_ion_chrom.get_intensity_at_index(q1_index)
209 |                 q2_intensity = qi2_ion_chrom.get_intensity_at_index(q2_index)
210 | 
211 |                 if q1_intensity > threshold/2 and q2_intensity > threshold/2:
212 |                     large_peaks.append([rt, intens])
213 |                 
214 |         print('found %d peaks above threshold'%len(large_peaks))
215 | 
216 |         areas = []
217 |         for peak in large_peaks:
218 |             apex = ci_ion_chrom.get_index_at_time(peak[0])
219 |             ia = ci_ion_chrom.get_intensity_array().tolist()
220 |             area, left, right, l_share, r_share = ion_area(ia, apex, 0)
221 |             areas.append(area)
222 |         ########################
223 |         areas.sort()
224 |         if len(areas)>0:
225 |             biggest_area = areas[-1]
226 |             mp.set_ci_area(biggest_area)
227 |             mp.set_exact_rt("{:.3f}".format(float(mp_rt)/60.0))
228 |             print "found area:", biggest_area, "at rt:", mp_rt
229 |         else:
230 |             print "Missing peak at rt = ", mp_rt
231 |             mp.set_ci_area('na')
232 | 
233 | def transposed(lists):
234 |    """
235 |    @summary: transposes a list of lists
236 | 
237 |    @param lists: the list of lists to be transposed
238 |    @type lists: listType
239 |    """
240 |    
241 |    if not lists: return []
242 |    return map(lambda *row: list(row), *lists)
243 | 
244 | 
245 | def write_filled_csv(sample_list, area_file, filled_area_file):
246 |     """
247 |     @summary: creates a new area_ci.csv file, replacing NAs with
248 |               values from the sample_list objects where possible
249 |     @param sample_list: A list of sample objects
250 |     @type sample_list: list of Class.Sample
251 | 
252 |     @param area_file: the file 'area_ci.csv' from PyMS output
253 |     @type area_file: stringType
254 | 
255 |     @param filled_area_file: the new output file which has NA
256 |                              values replaced
257 |     @type filled_area_file: stringType
258 |     """
259 | 
260 |     old_matrix = file2matrix(area_file)
261 |     
262 | 
263 |     #Invert it to be a little more efficent
264 |     invert_old_matrix = zip(*old_matrix)
265 |     #print invert_old_matrix[0:5]
266 | 
267 |     uid_list = invert_old_matrix[0][1:]
268 |     rt_list = []
269 |     for uid in uid_list:
270 |         rt = uid.split('-')[-1]
271 |         rt_list.append(rt)
272 |         
273 |     
274 |     #print rt_list
275 | 
276 |     #start setting up the output file
277 |     invert_new_matrix = []
278 |     for line in invert_old_matrix[0:2]:
279 |         invert_new_matrix.append(line)
280 |     
281 |     for line in invert_old_matrix[3:]:
282 |         sample_name = line[0]
283 |         
284 |         new_line = []
285 |         new_line.append(sample_name)
286 |         for sample in sample_list:
287 |             if sample_name in sample.get_name():
288 |                 rt_area_dict = sample.get_mp_rt_area_dict()
289 |                 #print rt_area_dict
290 | 
291 |         for i, part in enumerate(line[1:]):
292 |             #print part
293 |             if part == 'NA':
294 |                 try:
295 |                     area = rt_area_dict[str(rt_list[i])]
296 |                     new_line.append(area)
297 |                 except(KeyError):
298 |                     pass
299 |                     #print 'missing peak not found for rt =', rt_list[i], \
300 |                     #    "in sample:", sample_name
301 |                 
302 |             else:
303 |                 new_line.append(part)
304 |                 
305 |         invert_new_matrix.append(new_line)
306 |     #print invert_new_matrix
307 | 
308 |     #print len(invert_new_matrix[0]), len(invert_new_matrix)
309 |     
310 | 
311 |     fp_new = open(filled_area_file, 'w')
312 |  
313 |             
314 |     #    new_matrix = numpy.empty(matrix_size)
315 |     new_matrix = transposed(invert_new_matrix)
316 | 
317 |     for i, line in enumerate(new_matrix):
318 |         for j, part in enumerate(line):
319 |             fp_new.write(str(part) +',')
320 |         fp_new.write("\n")
321 |             
322 |     fp_new.close()
323 | 
324 | def write_filled_rt_csv(sample_list, rt_file, filled_rt_file):
325 |     """
326 |     @summary: creates a new rt.csv file, replacing NAs with
327 |               values from the sample_list objects where possible
328 |     @param sample_list: A list of sample objects
329 |     @type sample_list: list of Class.Sample
330 | 
331 |     @param area_file: the file 'rt.csv' from PyMS output
332 |     @type area_file: stringType
333 | 
334 |     @param filled_area_file: the new output file which has NA
335 |                              values replaced
336 |     @type filled_area_file: stringType
337 |     """
338 | 
339 |     old_matrix = file2matrix(rt_file)
340 |     
341 | 
342 |     #Invert it to be a little more efficent
343 |     invert_old_matrix = zip(*old_matrix)
344 |     #print invert_old_matrix[0:5][0:5]
345 | 
346 |     uid_list = invert_old_matrix[0][1:]
347 |     rt_list = []
348 |     for uid in uid_list:
349 |         rt = uid.split('-')[-1]
350 |         rt_list.append(rt)
351 |         
352 |     
353 |     #print rt_list
354 | 
355 |     #start setting up the output file
356 |     invert_new_matrix = []
357 |     for line in invert_old_matrix[0:1]:
358 |         invert_new_matrix.append(line)
359 |     
360 |     for line in invert_old_matrix[2:]:
361 |         sample_name = line[0]
362 |         #print "Sample Name:", sample_name
363 |         
364 |         new_line = []
365 |         new_line.append(sample_name)
366 |         for sample in sample_list:
367 |             #print "sample name:", sample.get_name()
368 |             if sample_name in sample.get_name():
369 |                 
370 |                 rt_exact_rt_dict = sample.get_mp_rt_exact_rt_dict()
371 |                 #print "Got it"
372 |                 #print rt_area_dict
373 | 
374 |         for i, part in enumerate(line[1:]):
375 |             #print part
376 |             if part == 'NA':
377 |                 try:
378 |                     rt_new = rt_exact_rt_dict[str(rt_list[i])]
379 |                     new_line.append(rt_new)
380 |                 except(KeyError):
381 |                     pass
382 |                     #print 'missing peak not found for rt =', rt_list[i], \
383 |                     #    "in sample:", sample_name
384 |                 
385 |             else:
386 |                 new_line.append(part)
387 |                 
388 |         invert_new_matrix.append(new_line)
389 |     #print invert_new_matrix
390 | 
391 |     #print len(invert_new_matrix[0]), len(invert_new_matrix)
392 |     
393 | 
394 |     fp_new = open(filled_rt_file, 'w')
395 |  
396 |             
397 |     #    new_matrix = numpy.empty(matrix_size)
398 |     new_matrix = transposed(invert_new_matrix)
399 | 
400 |     for i, line in enumerate(new_matrix):
401 |         for j, part in enumerate(line):
402 |             fp_new.write(str(part) +',')
403 |         fp_new.write("\n")
404 |             
405 |     fp_new.close()
406 |         
407 |                 
408 |                 
409 | 
410 | 
411 |     
412 |     
413 |              
414 | 


--------------------------------------------------------------------------------
/pyms/Gapfill/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Gap Filling Routines
 3 | """
 4 | 
 5 |  #############################################################################
 6 |  #                                                                           #
 7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
 8 |  #    Copyright (C) 2005-2014 Vladimir Likic                                 #
 9 |  #                                                                           #
10 |  #    This program is free software; you can redistribute it and/or modify   #
11 |  #    it under the terms of the GNU General Public License version 2 as      #
12 |  #    published by the Free Software Foundation.                             #
13 |  #                                                                           #
14 |  #    This program is distributed in the hope that it will be useful,        #
15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
17 |  #    GNU General Public License for more details.                           #
18 |  #                                                                           #
19 |  #    You should have received a copy of the GNU General Public License      #
20 |  #    along with this program; if not, write to the Free Software            #
21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
22 |  #                                                                           #
23 |  #############################################################################
24 | 
25 | 


--------------------------------------------------------------------------------
/pyms/Noise/Analysis.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Noise analysis functions
 3 | """
 4 | 
 5 |  #############################################################################
 6 |  #                                                                           #
 7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
 8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
 9 |  #                                                                           #
10 |  #    This program is free software; you can redistribute it and/or modify   #
11 |  #    it under the terms of the GNU General Public License version 2 as      #
12 |  #    published by the Free Software Foundation.                             #
13 |  #                                                                           #
14 |  #    This program is distributed in the hope that it will be useful,        #
15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
17 |  #    GNU General Public License for more details.                           #
18 |  #                                                                           #
19 |  #    You should have received a copy of the GNU General Public License      #
20 |  #    along with this program; if not, write to the Free Software            #
21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
22 |  #                                                                           #
23 |  #############################################################################
24 | 
25 | import random, math
26 | 
27 | from pyms.Utils.Error import error
28 | from pyms.GCMS.Function import is_ionchromatogram
29 | from pyms.Utils.Time import window_sele_points
30 | from pyms.Utils.Math import MAD
31 | 
32 | _DEFAULT_WINDOW = 256
33 | _DEFAULT_N_WINDOWS = 1024
34 | 
35 | def window_analyzer(ic, window=_DEFAULT_WINDOW, n_windows=_DEFAULT_N_WINDOWS, rand_seed=None ):
36 | 
37 |     """
38 |     @summary: A simple estimator of the signal noise based on randomly
39 |         placed windows and median absolute deviation
40 | 
41 |         The noise value is estimated by repeatedly and picking random
42 |         windows (of a specified width) and calculating median absolute
43 |         deviation (MAD). The noise estimate is given by the minimum MAD.
44 | 
45 |     @param ic: An IonChromatogram object
46 |     @type ic: pyms.IO.Class.IonCromatogram
47 |     @param window: Window width selection
48 |     @type window: IntType or StringType
49 |     @param n_windows: The number of windows to calculate
50 |     @type n_windows: IntType
51 |     @param rand_seed: Random seed generator
52 |     @type rand_seed: IntType
53 | 
54 |     @return: The noise estimate
55 |     @rtype: FloatType
56 | 
57 |     @author: Vladimir Likic
58 |     """
59 | 
60 |     if not is_ionchromatogram(ic):
61 |         error("argument must be an IonChromatogram object")
62 | 
63 |     ia = ic.get_intensity_array() # fetch the intensities
64 | 
65 |     # create an instance of the Random class
66 |     if rand_seed != None:
67 |         generator = random.Random(rand_seed)
68 |     else:
69 |         generator = random.Random()
70 | 
71 |     window_pts = window_sele_points(ic, window)
72 | 
73 |     maxi = ia.size - window_pts
74 |     noise_level = math.fabs(ia.max()-ia.min())
75 |     best_window_pos = None
76 |     seen_positions = []
77 | 
78 |     cntr = 0
79 | 
80 |     while cntr < n_windows:
81 |         # generator.randrange(): last point not included in range
82 |         try_pos = generator.randrange(0, maxi+1)
83 |         # only process the window if not analyzed previously
84 |         if try_pos not in seen_positions:
85 |             end_slice = try_pos + window_pts
86 |             slice = ia[try_pos:end_slice]
87 |             crnt_mad = MAD(slice)
88 |             if crnt_mad < noise_level:
89 |                 noise_level = crnt_mad
90 |                 best_window_pos = try_pos
91 |         cntr = cntr + 1
92 |         seen_positions.append(try_pos)
93 | 
94 |     return noise_level
95 | 
96 | 


--------------------------------------------------------------------------------
/pyms/Noise/SavitzkyGolay.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Savitzky-Golay noise filter
  3 | """
  4 | 
  5 |  #############################################################################
  6 |  #                                                                           #
  7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
  8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
  9 |  #                                                                           #
 10 |  #    This program is free software; you can redistribute it and/or modify   #
 11 |  #    it under the terms of the GNU General Public License version 2 as      #
 12 |  #    published by the Free Software Foundation.                             #
 13 |  #                                                                           #
 14 |  #    This program is distributed in the hope that it will be useful,        #
 15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
 16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
 17 |  #    GNU General Public License for more details.                           #
 18 |  #                                                                           #
 19 |  #    You should have received a copy of the GNU General Public License      #
 20 |  #    along with this program; if not, write to the Free Software            #
 21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
 22 |  #                                                                           #
 23 |  #############################################################################
 24 | 
 25 | import numpy
 26 | import copy
 27 | 
 28 | from pyms.GCMS.Function import is_ionchromatogram, ic_window_points
 29 | from pyms.Utils.Utils import is_int
 30 | 
 31 | __DEFAULT_WINDOW = 7
 32 | __DEFAULT_POLYNOMIAL_DEGREE = 2
 33 | 
 34 | def savitzky_golay(ic, window=__DEFAULT_WINDOW, \
 35 |         degree=__DEFAULT_POLYNOMIAL_DEGREE):
 36 | 
 37 |     """
 38 |     @summary: Applies Savitzky-Golay filter on ion chromatogram
 39 | 
 40 |     @param ic: The input ion chromatogram
 41 |     @type ic: pyms.GCMS.Class.IonChromatogram
 42 |     @param window: The window selection parameter. This can be an integer
 43 |         or time string. If integer, taken as the number of points. If a
 44 |         string, must of the form "<NUMBER>s" or "<NUMBER>m", specifying
 45 |         a time in seconds or minutes, respectively
 46 |     @type window: IntType or StringType
 47 |     @param degree: degree of the fitting polynomial for the Savitzky-Golay
 48 |         filter
 49 |     @type degree: IntType
 50 | 
 51 |     @return: Smoothed ion chromatogram
 52 |     @rtype: pyms.GCMS.Class.IonChromatogram
 53 | 
 54 |     @author: Uwe Schmitt
 55 |     @author: Vladimir Likic
 56 |     """
 57 | 
 58 |     if not is_ionchromatogram(ic):
 59 |         error("'ic' not an IonChromatogram object")
 60 | 
 61 |     if not is_int(degree):
 62 |         error("'degree' not an integer")
 63 | 
 64 |     ia = ic.get_intensity_array()
 65 | 
 66 |     wing_length = ic_window_points(ic, window, half_window=True)
 67 | 
 68 |     #print " -> Applying Savitzky-Golay filter"
 69 |     #print "      Window width (points): %d" % ( 2*wing_length+1 )
 70 |     #print "      Polynomial degree: %d" % ( degree )
 71 | 
 72 |     coeff = __calc_coeff(wing_length, degree)
 73 |     ia_denoise = __smooth(ia, coeff)
 74 | 
 75 |     ic_denoise = copy.deepcopy(ic)
 76 |     ic_denoise.set_intensity_array(ia_denoise)
 77 | 
 78 |     return ic_denoise
 79 | 
 80 | def savitzky_golay_im(im, window=__DEFAULT_WINDOW, \
 81 |         degree=__DEFAULT_POLYNOMIAL_DEGREE):
 82 |     """
 83 |     @summary: Applies Savitzky-Golay filter on Intensity
 84 |               Matrix
 85 |               
 86 |               Simply wraps around the Savitzky Golay 
 87 |               function above
 88 | 
 89 |     @param im: The input IntensityMatrix
 90 |     @type im: pyms.GCMS.Class.IntensityMatrix
 91 |     @param window: The window selection parameter. 
 92 |     @type window: IntType or StringType
 93 |     
 94 |     @param degree: degree of the fitting polynomial for the Savitzky-Golay
 95 |         filter
 96 |     @type degree: IntType
 97 | 
 98 |     @return: Smoothed IntensityMatrix
 99 |     @rtype: pyms.GCMS.Class.IntensityMatrix
100 | 
101 |     @author: Sean O'Callaghan
102 |     @author: Vladimir Likic
103 |     """
104 |     
105 |     n_scan, n_mz = im.get_size()
106 |     
107 |     im_smooth = copy.deepcopy(im)
108 |     
109 |     for ii in range(n_mz):
110 |         ic = im_smooth.get_ic_at_index(ii)
111 |         ic_smooth = savitzky_golay(ic, window, degree)
112 |         im_smooth.set_ic_at_index(ii, ic_smooth)
113 |         
114 |     return im_smooth
115 | 
116 | def __calc_coeff(num_points, pol_degree, diff_order=0):
117 | 
118 |     """
119 |     @summary: Calculates filter coefficients for symmetric savitzky-golay
120 |         filter
121 | 
122 |         See: http://www.nrbook.com/a/bookcpdf/c14-8.pdf
123 | 
124 |     @param num_points: Means that 2*num_points+1 values contribute to
125 |        the smoother
126 |     @type num_points: IntType
127 |     @param pol_degree: The degree of fitting polynomial
128 |     @type pol_degree: IntType
129 |     @param diff_order: The degree of implicit differentiation.  0 means
130 |         that filter results in smoothing of function, 1 means that filter
131 |         results in smoothing the first derivative of function, and so on.
132 |         Always use 0
133 | 
134 |     @return: Filter coefficients
135 |     @rtype: numpy.ndarray
136 | 
137 |     @author: Uwe Schmitt
138 |     @copyright: Uwe Schmitt
139 |     """
140 | 
141 |     # setup normal matrix
142 |     A = numpy.zeros((2*num_points+1, pol_degree+1), float)
143 |     for i in range(2*num_points+1):
144 |         for j in range(pol_degree+1):
145 |             A[i,j] = pow(i-num_points, j)
146 | 
147 |     # calculate diff_order-th row of inv(A^T A)
148 |     ATA = numpy.dot(A.transpose(), A)
149 |     rhs = numpy.zeros((pol_degree+1,), float)
150 |     rhs[diff_order] = 1
151 |     D = numpy.linalg.cholesky(ATA)
152 |     wvec = __resub(D, rhs)
153 | 
154 |     # calculate filter-coefficients
155 |     coeff = numpy.zeros((2*num_points+1,), float)
156 |     for n in range(-num_points, num_points+1):
157 |         x = 0.0
158 |         for m in range(pol_degree+1):
159 |             x += wvec[m]*pow(n, m)
160 |         coeff[n+num_points] = x
161 | 
162 |     return coeff
163 | 
164 | def __resub(D, rhs):
165 | 
166 |     """
167 |     @summary: Solves D D^T = rhs by resubstitution
168 | 
169 |     D is lower triangle-matrix from cholesky-decomposition
170 | 
171 |     @author: Uwe Schmitt
172 |     @copyright: Uwe Schmitt
173 |     """
174 | 
175 |     M = D.shape[0]
176 |     x1= numpy.zeros((M,),float)
177 |     x2= numpy.zeros((M,),float)
178 | 
179 |     # resub step 1
180 |     for l in range(M):
181 |         sum = rhs[l]
182 |         for n in range(l):
183 |             sum -= D[l,n]*x1[n]
184 |         x1[l] = sum/D[l,l]
185 | 
186 |     # resub step 2
187 |     for l in range(M-1,-1,-1):
188 |         sum = x1[l]
189 |         for n in range(l+1,M):
190 |             sum -= D[n,l]*x2[n]
191 |         x2[l] = sum/D[l,l]
192 | 
193 |     return x2
194 | 
195 | def __smooth(signal, coeff):
196 | 
197 |     """
198 |     @summary: Applies coefficients calculated by __calc_coeff()
199 |     to signal
200 | 
201 |     @author: Uwe Schmitt
202 |     @copyright: Uwe Schmitt
203 |     """
204 | 
205 |     N = numpy.size(coeff-1)/2
206 |     res = numpy.convolve(signal, coeff)
207 |     return res[N:-N]
208 | 
209 | 


--------------------------------------------------------------------------------
/pyms/Noise/Window.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Moving window noise filter
  3 | """
  4 | 
  5 |  #############################################################################
  6 |  #                                                                           #
  7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
  8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
  9 |  #                                                                           #
 10 |  #    This program is free software; you can redistribute it and/or modify   #
 11 |  #    it under the terms of the GNU General Public License version 2 as      #
 12 |  #    published by the Free Software Foundation.                             #
 13 |  #                                                                           #
 14 |  #    This program is distributed in the hope that it will be useful,        #
 15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
 16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
 17 |  #    GNU General Public License for more details.                           #
 18 |  #                                                                           #
 19 |  #    You should have received a copy of the GNU General Public License      #
 20 |  #    along with this program; if not, write to the Free Software            #
 21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
 22 |  #                                                                           #
 23 |  #############################################################################
 24 | 
 25 | import copy
 26 | import numpy
 27 | 
 28 | from pyms.GCMS.Function import is_ionchromatogram, ic_window_points
 29 | from pyms.Utils.Math import median
 30 | 
 31 | __DEFAULT_WINDOW = 3
 32 | 
 33 | def window_smooth(ic, window=__DEFAULT_WINDOW, median=False):
 34 | 
 35 |     """
 36 |     @summary: Applies window smoothing on ion chromatogram
 37 | 
 38 |     @param ic: The input ion chromatogram
 39 |     @type ic: pyms.GCMS.Class.IonChromatogram
 40 |     @param window: The window selection parameter. This can be an integer
 41 |         or time string. If integer, taken as the number of points. If a
 42 |         string, must of the form "<NUMBER>s" or "<NUMBER>m", specifying
 43 |         a time in seconds or minutes, respectively
 44 |     @type window: IntType or StringType
 45 |     @param median: An indicator whether the mean or median window smoothing
 46 |         to be used
 47 |     @type median: Booleantype
 48 | 
 49 |     @return: Smoothed ion chromatogram
 50 |     @rtype: pyms.GCMS.Class.IonChromatogram
 51 | 
 52 |     @author: Vladimir Likic
 53 |     """
 54 | 
 55 |     if not is_ionchromatogram(ic):
 56 |         error("'ic' not an IonChromatogram object")
 57 | 
 58 |     ia = ic.get_intensity_array()
 59 | 
 60 |     wing_length = ic_window_points(ic, window, half_window=True)
 61 | 
 62 |     if median:
 63 |         ia_denoise = __median_window(ia, wing_length)
 64 |     else:
 65 |         ia_denoise = __mean_window(ia, wing_length)
 66 | 
 67 |     ic_denoise = copy.deepcopy(ic)
 68 |     ic_denoise.set_intensity_array(ia_denoise)
 69 | 
 70 |     return ic_denoise
 71 | 
 72 | def window_smooth_im(im, window=__DEFAULT_WINDOW, median=False):
 73 |     """
 74 |     @summary: Applies window smoothing on Intensity Matrix
 75 | 
 76 |               Simply wraps around the window smooth function above
 77 | 
 78 |     @param im: The input Intensity Matrix
 79 |     @type im: pyms.GCMS.Class.IntensityMatrix
 80 |     @param window: The window selection parameter. 
 81 |     @type window: IntType or StringType
 82 |     
 83 |     @param median: An indicator whether the mean or median window smoothing
 84 |         to be used
 85 |     @type median: Booleantype
 86 | 
 87 |     @return: Smoothed Intensity Matrix
 88 |     @rtype: pyms.GCMS.Class.IntensityMatrix
 89 | 
 90 |     @author: Sean O'Callaghan
 91 |     @author: Vladimir Likic
 92 |     """
 93 |     
 94 |     n_scan, n_mz = im.get_size()
 95 |     
 96 |     im_smooth = copy.deepcopy(im)
 97 |     
 98 |     for ii in range(n_mz):
 99 |         ic = im_smooth.get_ic_at_index(ii)
100 |         ic_smooth = window_smooth(ic, window, median)
101 |         im_smooth.set_ic_at_index(ii, ic_smooth)
102 |         
103 |     return im_smooth
104 | 
105 | def __mean_window(ia, wing_length):
106 | 
107 |     """
108 |     @summary: Applies mean-window averaging on the array of intensities.
109 | 
110 |     @param ia: Intensity array
111 |     @type ia: nympy.core.ndarray
112 |     @param wing_length: An integer value representing the number of
113 |         points on either side of a point in the ion chromatogram
114 |     @type wing_length: IntType
115 | 
116 |     @return: Smoothed intensity array
117 |     @rtype: nympy.core.ndarray
118 | 
119 |     @author: Vladimir Likic
120 |     """
121 | 
122 | #print " -> Window smoothing (mean): the wing is %d point(s)" % (wing_length)
123 | 
124 |     ia_denoise = numpy.repeat([0], ia.size)
125 | 
126 |     index = 0
127 |     end = ia.size - 1
128 | 
129 |     while index <= end:
130 |         left = index - wing_length
131 |         right = index + wing_length + 1
132 |         if left < 0: left = 0
133 |         slice = ia[left:right]
134 |         ia_denoise[index] = slice.mean()
135 |         index = index + 1
136 | 
137 |     return ia_denoise
138 | 
139 | def __median_window(ia, wing_length):
140 | 
141 |     """
142 |     @summary: Applies median-window averaging on the array of intensities.
143 | 
144 |     @param ia: Intensity array
145 |     @type ia: nympy.core.ndarray
146 |     @param wing_length: An integer value representing the number of
147 |         points on either side of a point in the ion chromatogram
148 |     @type wing_length: IntType
149 | 
150 |     @return: Smoothed intensity array
151 |     @rtype: nympy.core.ndarray
152 | 
153 |     @author: Vladimir Likic
154 |     """
155 | 
156 | #print " -> Window smoothing (median): the wing is %d point(s)" % (wing_length)
157 | 
158 |     ia_denoise = numpy.repeat([0], ia.size)
159 | 
160 |     index = 0
161 |     end = ia.size - 1
162 | 
163 |     while index <= end:
164 |         left = index - wing_length
165 |         right = index + wing_length + 1
166 |         if left < 0: left = 0
167 |         slice = ia[left:right]
168 |         ia_denoise[index] = median(slice)
169 |         index = index + 1
170 | 
171 |     return ia_denoise
172 | 
173 | 


--------------------------------------------------------------------------------
/pyms/Noise/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Noise processing functions
 3 | """
 4 | 
 5 |  #############################################################################
 6 |  #                                                                           #
 7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
 8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
 9 |  #                                                                           #
10 |  #    This program is free software; you can redistribute it and/or modify   #
11 |  #    it under the terms of the GNU General Public License version 2 as      #
12 |  #    published by the Free Software Foundation.                             #
13 |  #                                                                           #
14 |  #    This program is distributed in the hope that it will be useful,        #
15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
17 |  #    GNU General Public License for more details.                           #
18 |  #                                                                           #
19 |  #    You should have received a copy of the GNU General Public License      #
20 |  #    along with this program; if not, write to the Free Software            #
21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
22 |  #                                                                           #
23 |  #############################################################################
24 | 
25 | 


--------------------------------------------------------------------------------
/pyms/Peak/Function.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Functions related to Peak modification
  3 | """
  4 | 
  5 |  #############################################################################
  6 |  #                                                                           #
  7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
  8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
  9 |  #                                                                           #
 10 |  #    This program is free software; you can redistribute it and/or modify   #
 11 |  #    it under the terms of the GNU General Public License version 2 as      #
 12 |  #    published by the Free Software Foundation.                             #
 13 |  #                                                                           #
 14 |  #    This program is distributed in the hope that it will be useful,        #
 15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
 16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
 17 |  #    GNU General Public License for more details.                           #
 18 |  #                                                                           #
 19 |  #    You should have received a copy of the GNU General Public License      #
 20 |  #    along with this program; if not, write to the Free Software            #
 21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
 22 |  #                                                                           #
 23 |  #############################################################################
 24 | 
 25 | import numpy
 26 | import copy
 27 | 
 28 | #from pyms.Utils.Error import error
 29 | from pyms.Peak.Class import Peak
 30 | from pyms.Utils.Utils import is_str, is_list
 31 | from pyms.Utils.Math import median
 32 | from pyms.GCMS.Class import MassSpectrum
 33 | 
 34 | # If psyco is installed, use it to speed up running time
 35 | try:
 36 |     import psyco
 37 |     psyco.full()
 38 | except:
 39 |     pass
 40 | 
 41 | def peak_sum_area(im, peak, single_ion=False, max_bound=0):
 42 | 
 43 |     """
 44 |     @Summary: Calculate the sum of the raw ion areas based on
 45 |         detected boundaries.
 46 | 
 47 |     @param im: The originating IntensityMatrix object
 48 |     @type im: pyms.GCMS.Class.IntensityMatrix
 49 |     @param peak: The Peak object
 50 |     @type peak: pyms.Peak.Class.Peak
 51 |     @param single_ion: whether single ion areas should be returned
 52 |     @type singe_ion: BooleanType
 53 |     @param max_bound: Optional value to limit size of detected bound
 54 |     @type max_bound: IntType
 55 | 
 56 |     @return: Sum of peak apex ions in detected bounds
 57 |     @rtype: FloatType
 58 | 
 59 |     @author: Andrew Isaac
 60 |     @author: Sean O'Callaghan
 61 |     """
 62 | 
 63 |     sum_area = 0
 64 |     # Use internal values (not copy)
 65 |     #mat = im.get_matrix_list()
 66 |     mat = im.intensity_matrix
 67 |     ms = peak.get_mass_spectrum()
 68 |     rt = peak.get_rt()
 69 |     apex = im.get_index_at_time(rt)
 70 | 
 71 |     # get peak masses with non-zero intensity
 72 |     mass_ii = [ ii for ii in xrange(len(ms.mass_list)) \
 73 |         if ms.mass_spec[ii] > 0 ]
 74 | 
 75 |     area_dict = {}
 76 |     # get stats on boundaries
 77 |     for ii in mass_ii:
 78 |         # get ion chromatogram as list
 79 |         ia = [ mat[scan][ii] for scan in xrange(len(mat)) ]
 80 |         area, left, right, l_share, r_share = ion_area(ia, apex, max_bound)
 81 |         # need actual mass for single ion areas
 82 |         actual_mass = ms.mass_list[ii]
 83 |         area_dict[actual_mass] = area
 84 |         sum_area += area
 85 | 
 86 |     if single_ion == True:
 87 |         return sum_area, area_dict
 88 |     else:
 89 |         return sum_area
 90 | 
 91 | def peak_top_ion_areas(im, peak, n_top_ions = 5, max_bound=0):
 92 |     """
 93 |     @summary: Calculate and return the ion areas of the five most
 94 |     abundant ions in the peak
 95 | 
 96 |     @param im: The originating IntensityMatrix object
 97 |     @type im: pyms.GCMS.Class.IntensityMatrix
 98 |     @param peak: The Peak object
 99 |     @type peak: pyms.Peak.Class.Peak
100 | 
101 |     @return: dictionary of ion:ion_area pairs
102 |     @rtype: dictType
103 | 
104 |     @author: Sean O'Callaghan
105 | 
106 |     """
107 |     #ms = peak.get_mass_spectrum()
108 |     rt = peak.get_rt()
109 |     apex = im.get_index_at_time(rt)
110 | 
111 |     ion_areas = {} # Dictionary to store ion:ion_area pairs
112 |     
113 | 
114 |     top_ions = top_ions_v2(peak, n_top_ions)
115 |     #print top_ions
116 | 
117 |     for ion in top_ions:
118 |         ion_chrom = im.get_ic_at_mass(ion)
119 |         # need ia as a list not numpy array so use .tolist()
120 |         ia = ion_chrom.get_intensity_array().tolist()
121 |         area, left, right, l_share, r_share = ion_area(ia, apex, max_bound)
122 |         # need actual mass for single ion areas
123 |         ion_areas[ion] = area
124 |                 
125 | 
126 |     return ion_areas
127 | 
128 | 
129 | 
130 | def top_ions_v1(peak, num_ions=5):
131 |         
132 |     """
133 |     @summary: Computes the highest 5 intensity ions
134 |         
135 |     @param peak: the peak to be processed
136 |     @type peak: pyms.Peak.Class.Peak
137 | 
138 |     @return: a list of the top 5 highest intensity ions
139 |     @rtype listType
140 | 
141 |     @author: Sean O'Callaghan
142 |     """
143 | 
144 |     intensity_list = peak.get_mass_spectrum().mass_spec
145 |     mass_list = peak.get_mass_spectrum().mass_list
146 | 
147 |     intensity_list_sorted = copy.deepcopy(intensity_list)
148 |     intensity_list_sorted.sort()
149 | 
150 | 
151 |     top_ions = []
152 |     top_intensities = intensity_list_sorted[-num_ions:]
153 | 
154 |     for i in range(len(intensity_list)):
155 |         if intensity_list[i] in top_intensities:
156 |             top_ions.append(mass_list[i])
157 |   
158 |     return top_ions
159 | 
160 | def top_ions_v2(peak, num_ions=5):
161 |     """
162 |     @summary: Computes the highest #num_ions intensity ions
163 |         
164 |     @param peak: the peak to be processed
165 |     @type peak: pyms.Peak.Class.Peak
166 | 
167 |     @param num_ions: the number of ions to be recorded
168 |     @type: num_ions: intType
169 | 
170 |     @return: a list of the #num_ions highest intensity ions
171 |     @rtype listType
172 | 
173 |     @author: Sean O'Callaghan
174 |     """
175 | 
176 |     intensity_list = peak.get_mass_spectrum().mass_spec
177 |     mass_list = peak.get_mass_spectrum().mass_list
178 | 
179 |     ic_tuple = zip(intensity_list, mass_list)
180 | 
181 |     sorted_ic = sorted(ic_tuple)
182 |     top_ic = sorted_ic[-num_ions:]
183 | 
184 |     top_ions = []
185 | 
186 |     for entry in top_ic:
187 |         top_ions.append(entry[1])
188 |   
189 |     return top_ions
190 | 
191 | 
192 | def ion_area(ia, apex, max_bound=0, tol=0.5):
193 | 
194 |     """
195 |     @Summary: Find bounds of peak by summing intensities until change in sum is
196 |         less than 'tol' percent of the current area.
197 | 
198 |     @param ia: List of intensities for a given mass
199 |     @type ia: ListType
200 |     @param apex: Index of the peak apex.
201 |     @type apex: IntType
202 |     @param max_bound: Optional value to limit size of detected bound
203 |     @type max_bound: IntType
204 |     @param tol: Percentage tolerance of added area to current area.
205 |     @type tol: FloatType
206 | 
207 |     @return: Area, left and right boundary offset, shared left, shared right
208 |     @rtype: TupleType
209 | 
210 |     @author: Andrew Isaac
211 |     """
212 | 
213 |     # Left area
214 |     lhs = ia[:apex+1]
215 |     lhs.reverse()  # reverse, as search to right is bounds safe
216 |     l_area, left, l_share = half_area(lhs, max_bound, tol)
217 | 
218 |     # Right area
219 |     rhs = ia[apex:]
220 |     r_area, right, r_share = half_area(rhs, max_bound, tol)
221 |     r_area -= ia[apex]  # counted apex twice for tollerence, now ignore
222 | 
223 |     # Put it all together
224 |     return l_area+r_area, left, right, l_share, r_share
225 | 
226 | def half_area(ia, max_bound=0, tol=0.5):
227 | 
228 |     """
229 |     @Summary: Find bound of peak by summing intensities until change in sum is
230 |         less than 'tol' percent of the current area.
231 | 
232 |     @param ia: List of intensities from Peak apex for a given mass
233 |     @type ia: ListType
234 |     @param max_bound: Optional value to limit size of detected bound
235 |     @type max_bound: IntType
236 |     @param tol: Percentage tolerance of added area to current area.
237 |     @type tol: FloatType
238 | 
239 |     @return: Half peak area, boundary offset, shared (True if shared ion)
240 |     @rtype: TupleType
241 | 
242 |     @author: Andrew Isaac
243 |     """
244 | 
245 |     tol = tol/200.0  # halve and convert from percent
246 | 
247 |     # Default number of points to sum new area across, for smoothing
248 |     wide = 3
249 | 
250 |     # start at 0, compare average value of 'wide' points to the right,
251 |     # centre 'wide' points on edge point,
252 |     # and keep moving right until:
253 |     # i) tollerence reached
254 |     # ii) edge area starts increasing
255 |     # iii) bound reached
256 | 
257 |     #
258 |     # initialise areas and bounds
259 |     shared = False
260 |     area = ia[0]
261 |     edge = float(sum(ia[0:wide]))/wide
262 |     old_edge = 2 * edge  # bigger than expected edge
263 |     index = 1
264 |     if max_bound < 1:
265 |         limit = len(ia)
266 |     else:
267 |         limit = min(max_bound+1, len(ia))
268 |     while edge > area * tol and edge < old_edge and index < limit:
269 |         old_edge = edge
270 |         area += ia[index]
271 |         edge = float(sum(ia[index:index+wide]))/wide  # bounds safe
272 |         index += 1
273 |     if edge >= old_edge:
274 |         shared = True
275 |     index -= 1
276 | 
277 |     return area, index, shared
278 | 
279 | def median_bounds(im, peak, shared=True):
280 | 
281 |     """
282 |     @Summary: Calculates the median of the left and right bounds found
283 |         for each apexing peak mass
284 | 
285 |     @param im: The originating IntensityMatrix object
286 |     @type im: pyms.GCMS.Class.IntensityMatrix
287 |     @param peak: The Peak object
288 |     @type peak: pyms.Peak.Class.Peak
289 |     @param shared: Include shared ions shared with neighbouring peak
290 |     @type shared: BooleanType
291 | 
292 |     @return: median left and right boundary offset in points
293 |     @rtype: TupleType
294 | 
295 |     @author: Andrew Isaac
296 |     """
297 | 
298 |     mat = im.get_matrix_list()
299 |     ms = peak.get_mass_spectrum()
300 |     rt = peak.get_rt()
301 |     apex = im.get_index_at_time(rt)
302 |     # check if RT based index is simmilar to stored index
303 |     tmp = peak.get_pt_bounds()
304 |     if is_list(tmp) and apex-1 < tmp[1] and tmp[1] < apex+1:
305 |         apex = tmp[1]
306 | 
307 |     # get peak masses with non-zero intensity
308 |     mass_ii = [ ii for ii in xrange(len(ms.mass_list)) \
309 |         if ms.mass_spec[ii] > 0 ]
310 | 
311 |     # get stats on boundaries
312 |     left_list = []
313 |     right_list = []
314 |     for ii in mass_ii:
315 |         # get ion chromatogram as list
316 |         ia = [ mat[scan][ii] for scan in xrange(len(mat)) ]
317 |         area, left, right, l_share, r_share = ion_area(ia, apex)
318 |         if shared or not l_share:
319 |             left_list.append(left)
320 |         if shared or not r_share:
321 |             right_list.append(right)
322 | 
323 |     # return medians
324 |     # NB if shared=True, lists maybe empty
325 |     l_med = 0
326 |     r_med = 0
327 |     if len(left_list) > 0:
328 |         l_med = median(left_list)
329 |     if len(right_list) > 0:
330 |         r_med = median(right_list)
331 | 
332 |     return l_med, r_med
333 | 


--------------------------------------------------------------------------------
/pyms/Peak/IO.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Functions related to storing and loading a list of Peak objects
 3 | """
 4 | 
 5 |  #############################################################################
 6 |  #                                                                           #
 7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
 8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
 9 |  #                                                                           #
10 |  #    This program is free software; you can redistribute it and/or modify   #
11 |  #    it under the terms of the GNU General Public License version 2 as      #
12 |  #    published by the Free Software Foundation.                             #
13 |  #                                                                           #
14 |  #    This program is distributed in the hope that it will be useful,        #
15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
17 |  #    GNU General Public License for more details.                           #
18 |  #                                                                           #
19 |  #    You should have received a copy of the GNU General Public License      #
20 |  #    along with this program; if not, write to the Free Software            #
21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
22 |  #                                                                           #
23 |  #############################################################################
24 | 
25 | import string, cPickle
26 | 
27 | from pyms.Utils.Error import error
28 | from pyms.Peak.Class import Peak
29 | from pyms.Utils.Utils import is_str, is_list
30 | 
31 | def store_peaks(peak_list, file_name):
32 | 
33 |     """
34 |     @summary:Store the list of peak objects
35 | 
36 |     @param peak_list: A list of peak objects
37 |     @type peak_list: pyms.Peaks.Class.Peak
38 |     @param file_name: File name to store peak list
39 |     @type file_name: StringType
40 | 
41 |     @author: Andrew Isaac
42 |     """
43 | 
44 |     if not is_str(file_name):
45 |         error("'file_name' must be a string")
46 | 
47 |     fp = open(file_name,'w')
48 |     cPickle.dump(peak_list, fp, 1)
49 |     fp.close()
50 | 
51 | def load_peaks(file_name):
52 | 
53 |     """
54 |     @summary: Loads the peak_list stored with 'store_peaks'
55 | 
56 |     @param file_name: File name of peak list
57 |     @type file_name: StringType
58 | 
59 |     @return: The list of Peak objects
60 |     @rtype: ListType
61 | 
62 |     @author: Andrew Isaac
63 |     """
64 | 
65 |     if not is_str(file_name):
66 |         error("'file_name' not a string")
67 | 
68 |     fp = open(file_name,'r')
69 |     peak_list = cPickle.load(fp)
70 |     fp.close()
71 | 
72 |     if not is_list(peak_list):
73 |         error("'file_name' is not a List")
74 |     if not len(peak_list) > 0 and not isinstance(peak_list[0], Peak):
75 |         error("'peak_list' must be a list of Peak objects")
76 | 
77 |     return peak_list
78 | 


--------------------------------------------------------------------------------
/pyms/Peak/List/DPA/Function.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Functions for peak alignment by dynamic programming
  3 | """
  4 | 
  5 |  #############################################################################
  6 |  #                                                                           #
  7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
  8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
  9 |  #                                                                           #
 10 |  #    This program is free software; you can redistribute it and/or modify   #
 11 |  #    it under the terms of the GNU General Public License version 2 as      #
 12 |  #    published by the Free Software Foundation.                             #
 13 |  #                                                                           #
 14 |  #    This program is distributed in the hope that it will be useful,        #
 15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
 16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
 17 |  #    GNU General Public License for more details.                           #
 18 |  #                                                                           #
 19 |  #    You should have received a copy of the GNU General Public License      #
 20 |  #    along with this program; if not, write to the Free Software            #
 21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
 22 |  #                                                                           #
 23 |  #############################################################################
 24 | 
 25 | import copy
 26 | import numpy
 27 | #from math import sqrt, log
 28 | import math
 29 | 
 30 | from pyms.Utils.Error import error, stop
 31 | from pyms.Utils.Utils import is_list
 32 | from pyms.Utils.DP import dp
 33 | from pyms.Experiment.Class import Experiment
 34 | 
 35 | import Class
 36 | import Utils
 37 | 
 38 | # If psyco is installed, use it to speed up running time
 39 | try:
 40 |     import psyco
 41 |     psyco.full()
 42 | except:
 43 |     pass
 44 | 
 45 | def align_with_tree(T, min_peaks=1):
 46 | 
 47 |     """
 48 |     @summary: Aligns a list of alignments using the supplied guide tree
 49 | 
 50 |     @param T: The pairwise alignment object
 51 |     @type: pyms.Peak.List.DPA.Class.PairwiseAlignment
 52 |     @return: The final alignment consisting of aligned input alignments
 53 |     @rtype: pyms.Peak.List.DPA.Class.Alignment
 54 |     @author: Woon Wai Keen
 55 |     @author: Vladimir Likic
 56 |     """
 57 | 
 58 |     print " Aligning %d items with guide tree (D=%.2f, gap=%.2f)" % \
 59 |             (len(T.algts), T.D, T.gap)
 60 | 
 61 |     # For everything else, we align according to the guide tree provided by
 62 |     # Pycluster. From Pycluster documentation:
 63 |     #   Each item and subnode is represented by an integer. For hierarchical
 64 |     #   clustering of n items, we number the original items {0, ... , n-1},
 65 |     #   nodes are numbered {-1, ... , -(n-1)}. Note that the number of nodes
 66 |     #   is one less than the number of items.
 67 | 
 68 |     # extend As to length 2n to hold the n items, n-1 nodes, and 1 root
 69 |     As = copy.deepcopy(T.algts) + [ None for _ in range(len(T.algts)) ]
 70 | 
 71 |     # align the alignments into positions -1, ... ,-(n-1)
 72 |     total = len(T.tree)
 73 |     index = 0
 74 | 
 75 |     for node in T.tree:
 76 |         index = index - 1
 77 |         As[index] = align(As[node.left], As[node.right], T.D, T.gap)
 78 |         total = total - 1
 79 |         print " -> %d item(s) remaining" % total
 80 | 
 81 |     # the final alignment is in the root. Filter min peaks and return
 82 |     final_algt =  As[index]
 83 | 
 84 |     # useful for within state alignment only
 85 |     if min_peaks > 1:
 86 |         final_algt.filter_min_peaks(min_peaks)
 87 | 
 88 |     return final_algt
 89 | 
 90 | def exprl2alignment(exprl):
 91 | 
 92 |     """
 93 |     @summary: Converts experiments into alignments
 94 | 
 95 |     @param exprl: The list of experiments to be converted into an alignment
 96 |         objects
 97 |     @type exprl: ListType
 98 | 
 99 |     @author: Vladimir Likic
100 |     """
101 | 
102 |     if not is_list(exprl):
103 |         error("the argument is not a list")
104 | 
105 |     algts = []
106 | 
107 |     for item in exprl:
108 |         if not isinstance(item, Experiment):
109 |             error("list items must be 'Experiment' instances")
110 |         else:
111 |             algt = Class.Alignment(item)
112 |         algts.append(algt)
113 | 
114 |     return algts
115 | 
116 | def align(a1, a2, D, gap):
117 | 
118 |     """
119 |     @summary: Aligns two alignments
120 | 
121 |     @param a1: The first alignment
122 |     @type a1: pyms.Peak.List.Class.Alignment
123 |     @param a2: The second alignment
124 |     @type a2: pyms.Peak.List.Class.Alignment
125 |     @param D: Retention time tolerance
126 |     @type D: FloatType
127 |     @param gap: Gap penalty
128 |     @type gap: FloatType
129 | 
130 |     @return: Aligned alignments
131 |     @rtype: pyms.Peak.List.Class.Alignment
132 | 
133 |     @author: Woon Wai Keen
134 |     @author: Vladimir Likic
135 |     """
136 | 
137 |     # calculate score matrix for two alignments
138 |     M = score_matrix(a1, a2, D)
139 | 
140 |     # run dynamic programming
141 |     result = dp(M, gap)
142 | 
143 |     # make composite alignment from the results
144 |     ma = merge_alignments(a1, a2, result['trace'])
145 | 
146 |     # calculate the similarity score
147 |     ma.similarity = alignment_similarity(result['trace'], M, gap)
148 | 
149 |     return ma
150 | 
151 | def merge_alignments(A1, A2, traces):
152 | 
153 |     """
154 |     @summary: Merges two alignments with gaps added in from DP traceback
155 | 
156 |     @param A1: First alignment
157 |     @param A2: Second alignment
158 |     @param traces: DP traceback
159 | 
160 |     @return: A single alignment from A1 and A2
161 |     @author: Woon Wai Keen
162 |     @author: Vladimir Likic
163 |     @author: Qiao Wang
164 |     """
165 | 
166 |     # Create object to hold new merged alignment and fill in its expr_codes
167 |     ma = Class.Alignment(None)
168 |     ma.expr_code = A1.expr_code + A2.expr_code
169 | 
170 |     # create empty lists of dimension |A1| + |A2|
171 |     dimension = len(A1.peakpos) + len(A2.peakpos)
172 |     merged = [ [] for _ in range(dimension) ]
173 |     A1 = A1.peakpos
174 |     A2 = A2.peakpos
175 | 
176 |     idx1 = idx2 = 0
177 | 
178 |     # trace can either be 0, 1, or 2
179 |     # if it is 0, there are no gaps. otherwise, if it is 1 or 2,
180 |     # there is a gap in A2 or A1 respectively.
181 | 
182 |     for trace in traces:
183 | 
184 |         if trace == 0:
185 |             for i in range(len(A1)):
186 |                 merged[i].append(A1[i][idx1])
187 | 
188 |             for j in range(len(A2)):
189 |                 merged[1+i+j].append(A2[j][idx2])
190 | 
191 |             idx1 = idx1 + 1
192 |             idx2 = idx2 + 1
193 | 
194 |         elif trace == 1:
195 |             for i in range(len(A1)):
196 |                 merged[i].append(A1[i][idx1])
197 | 
198 |             for j in range(len(A2)):
199 |                 merged[1+i+j].append(None)
200 | 
201 |             idx1 = idx1 + 1
202 | 
203 |         elif trace == 2:
204 |             for i in range(len(A1)):
205 |                 merged[i].append(None)
206 | 
207 |             for j in range(len(A2)):
208 |                 merged[1+i+j].append(A2[j][idx2])
209 | 
210 |             idx2 = idx2 + 1
211 | 
212 |     ma.peakalgt = numpy.transpose(merged)
213 |     # sort according to average peak
214 |     ma.peakalgt = list(ma.peakalgt)
215 |     ma.peakalgt.sort(Utils.alignment_compare)
216 |     ma.peakpos = numpy.transpose(ma.peakalgt)
217 | 
218 |     return ma
219 | 
220 | def alignment_similarity(traces, score_matrix, gap):
221 | 
222 |     """
223 |     @summary: Calculates similarity score between two alignments (new method)
224 | 
225 |     @param traces: Traceback from DP algorithm
226 |     @param score_matrix: Score matrix of the two alignments
227 |     @param gap: Gap penalty
228 | 
229 |     @return: Similarity score (i.e. more similar => higher score)
230 |     @author: Woon Wai Keen
231 |     @author: Vladimir Likic
232 |     """
233 | 
234 |     score_matrix = 1. - score_matrix
235 |     similarity = 0.
236 |     idx1 = idx2 = 0
237 | 
238 |     # Trace can either be 0, 1, or 2
239 |     # If it is 0, there is a match and we add to the sum the score between
240 |     # these two aligned peaks.
241 |     #
242 |     # Otherwise, if it is 1 or 2, and there is a gap in A2 or A1
243 |     # respectively. We then subtract the gap penalty from the sum.
244 |     for trace in traces:
245 |         if trace == 0:
246 |             similarity = similarity + score_matrix[idx1][idx2]
247 |             idx1 = idx1 + 1
248 |             idx2 = idx2 + 1
249 |         elif trace == 1:
250 |             similarity = similarity - gap
251 |             idx1 = idx1 + 1
252 |         elif trace == 2:
253 |             similarity = similarity - gap
254 |             idx2 = idx2 + 1
255 | 
256 |     return similarity
257 | 
258 | def score_matrix(a1, a2, D):
259 | 
260 |     """
261 |     @summary: Calculates the score matrix between two alignments
262 | 
263 |     @param a1: The first alignment
264 |     @type a1: pyms.Peak.List.Class.Alignment
265 |     @param a2: The second alignment
266 |     @type a2: pyms.Peak.List.Class.Alignment
267 |     @param D: Retention time tolerance
268 |     @type D: FloatType
269 | 
270 |     @return: Aligned alignments
271 |     @rtype: pyms.Peak.List.Class.Alignment
272 | 
273 |     @author: Qiao Wang
274 |     @author: Andrew Isaac
275 |     """
276 | 
277 |     score_matrix = numpy.zeros((len(a1.peakalgt), len(a2.peakalgt)))
278 |     row = 0
279 |     col = 0
280 |     sim_score = 0
281 | 
282 |     for algt1pos in a1.peakalgt:
283 |         for algt2pos in a2.peakalgt:
284 |             sim_score = position_similarity(algt1pos, algt2pos, D)
285 |             score_matrix[row][col] = sim_score
286 |             col = col+1
287 |         row = row+1
288 |         col = 0
289 | 
290 |     return score_matrix
291 | 
292 | def position_similarity(pos1, pos2, D):
293 | 
294 |     """
295 |     @summary: Calculates the similarity between the two alignment
296 |         positions.  A score of 0 is best and 1 is worst.
297 | 
298 |     @param pos1: The position of the first alignment
299 |     @param pos2: The position of the second alignment
300 |     @param D: Rentention time tolerance
301 | 
302 |     @return: The similarity value for the current position
303 |     @rtype: FloatType
304 | 
305 |     @author: Qiao Wang
306 |     @author: Vladimir Likic
307 |     @author: Andrew Isaac
308 |     """
309 | 
310 |     score = 0.0
311 |     count = 0
312 | 
313 |     ## Attempt to speed up by only calculating 'in-range' values
314 |     ## set tollerance to 1/1000
315 |     _TOL = 0.001
316 |     cutoff = D*math.sqrt(-2.0*math.log(_TOL))
317 | 
318 |     for a in pos1:
319 |         if a is not None:
320 |             aspec = a.mass_spec
321 |             art = a.rt
322 |             once = True
323 |             for b in pos2:
324 |                 if b is not None:
325 |                     brt = b.rt
326 |                     # in range?
327 |                     if abs(art-brt) > cutoff:
328 |                         score += 1.0  # NB score of 1 is worst
329 |                     else:
330 |                         # Once per b-loop
331 |                         if once:
332 |                             mass_spect1 = numpy.array(aspec, dtype='d')
333 |                             mass_spect1_sum = numpy.sum(mass_spect1**2, axis=0)
334 |                             once = False
335 |                         bspec = b.mass_spec
336 |                         mass_spect2 = numpy.array(bspec, dtype='d')
337 |                         mass_spect2_sum = numpy.sum(mass_spect2**2, axis=0)
338 |                         try:
339 |                             top = numpy.dot(mass_spect1, mass_spect2)
340 |                         except(ValueError):
341 |                             error("Mass Spectra are of different length\n\n" +  
342 |                                  " Use IntensityMatrix.crop_mass() to set\n" 
343 |                                   + " same length for all Mass Spectra""")
344 |                         bot = numpy.sqrt(mass_spect1_sum*mass_spect2_sum)
345 |                         if bot > 0:
346 |                             cos = top/bot
347 |                         else:
348 |                             cos = 0
349 |                         rtime = numpy.exp(-((art-brt)/float(D))**2 / 2.0)
350 |                         score = score + (1.0 - (cos*rtime))
351 |                     count = count + 1
352 | 
353 |     if count == 0:
354 |         score = 1.0  # NB score of 1 is worst
355 |     else:
356 |         score = score/float(count)
357 | 
358 |     return score
359 | 


--------------------------------------------------------------------------------
/pyms/Peak/List/DPA/Utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Utilities for peak alignment by dynamic programming
 3 | """
 4 | 
 5 |  #############################################################################
 6 |  #                                                                           #
 7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
 8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
 9 |  #                                                                           #
10 |  #    This program is free software; you can redistribute it and/or modify   #
11 |  #    it under the terms of the GNU General Public License version 2 as      #
12 |  #    published by the Free Software Foundation.                             #
13 |  #                                                                           #
14 |  #    This program is distributed in the hope that it will be useful,        #
15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
17 |  #    GNU General Public License for more details.                           #
18 |  #                                                                           #
19 |  #    You should have received a copy of the GNU General Public License      #
20 |  #    along with this program; if not, write to the Free Software            #
21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
22 |  #                                                                           #
23 |  #############################################################################
24 | 
25 | import numpy
26 | 
27 | def alignment_compare(x, y):
28 | 
29 |     """
30 |     @summary: A helper function for sorting peak positions in a alignment
31 |     """
32 | 
33 |     x = [ _.get_rt() for _ in filter(None, x)]
34 |     y = [ _.get_rt() for _ in filter(None, y)]
35 | 
36 |     avg_x = numpy.sum(x)/len(x)
37 |     avg_y = numpy.sum(y)/len(y)
38 | 
39 |     if avg_x < avg_y:
40 |         return -1
41 |     else:
42 |         return 1
43 | 
44 | 


--------------------------------------------------------------------------------
/pyms/Peak/List/DPA/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Alignment of peak lists by dynamic programming
 3 | """
 4 | 
 5 |  #############################################################################
 6 |  #                                                                           #
 7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
 8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
 9 |  #                                                                           #
10 |  #    This program is free software; you can redistribute it and/or modify   #
11 |  #    it under the terms of the GNU General Public License version 2 as      #
12 |  #    published by the Free Software Foundation.                             #
13 |  #                                                                           #
14 |  #    This program is distributed in the hope that it will be useful,        #
15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
17 |  #    GNU General Public License for more details.                           #
18 |  #                                                                           #
19 |  #    You should have received a copy of the GNU General Public License      #
20 |  #    along with this program; if not, write to the Free Software            #
21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
22 |  #                                                                           #
23 |  #############################################################################
24 | 
25 | 


--------------------------------------------------------------------------------
/pyms/Peak/List/Function.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Functions related to Peak modification
  3 | """
  4 | 
  5 |  #############################################################################
  6 |  #                                                                           #
  7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
  8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
  9 |  #                                                                           #
 10 |  #    This program is free software; you can redistribute it and/or modify   #
 11 |  #    it under the terms of the GNU General Public License version 2 as      #
 12 |  #    published by the Free Software Foundation.                             #
 13 |  #                                                                           #
 14 |  #    This program is distributed in the hope that it will be useful,        #
 15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
 16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
 17 |  #    GNU General Public License for more details.                           #
 18 |  #                                                                           #
 19 |  #    You should have received a copy of the GNU General Public License      #
 20 |  #    along with this program; if not, write to the Free Software            #
 21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
 22 |  #                                                                           #
 23 |  #############################################################################
 24 | 
 25 | import numpy, math
 26 | 
 27 | #from pyms.Utils.Error import error
 28 | from pyms.Peak.Class import Peak
 29 | from pyms.Utils.Utils import is_str, is_list
 30 | from pyms.Utils.Math import median
 31 | from pyms.GCMS.Class import MassSpectrum
 32 | 
 33 | # If psyco is installed, use it to speed up running time
 34 | try:
 35 |     import psyco
 36 |     psyco.full()
 37 | except:
 38 |     pass
 39 | 
 40 | def composite_peak(peak_list, minutes=False):
 41 | 
 42 |     """
 43 |     @summary: Create a peak that consists of a composite spectrum from all
 44 |         spectra in the list of peaks.
 45 | 
 46 |     @param peak_list: A list of peak objects
 47 |     @type peak_list: ListType
 48 |     @param minutes: Return retention time as minutes
 49 |     @type minutes: BooleanType
 50 | 
 51 |     @return: Peak Object with combined mass spectra of 'peak_list'
 52 |     @type: pyms.Peak.Class.Peak
 53 | 
 54 |     @author: Andrew Isaac
 55 |     """
 56 | 
 57 |     first = True
 58 |     count = 0
 59 |     avg_rt = 0
 60 |     new_ms = None
 61 |     for peak in peak_list:
 62 |         if peak is not None:
 63 |             ms = peak.get_mass_spectrum()
 64 |             spec = numpy.array(ms.mass_spec, dtype='d')
 65 |             if first:
 66 |                 avg_spec = numpy.zeros(len(ms.mass_spec), dtype='d')
 67 |                 mass_list = ms.mass_list
 68 |                 first = False
 69 |             # scale all intensities to [0,100]
 70 |             max_spec = max(spec)/100.0
 71 |             if max_spec > 0:
 72 |                 spec = spec/max_spec
 73 |             else:
 74 |                 spec = spec*0
 75 |             avg_rt += peak.get_rt()
 76 |             avg_spec += spec
 77 |             count += 1
 78 |     if count > 0:
 79 |         avg_rt = avg_rt/count
 80 |         #if minutes == True:
 81 |             #avg_rt = avg_rt/60.0
 82 |         avg_spec = avg_spec/count
 83 |         avg_spec = avg_spec.tolist()  # list more compact than ndarray
 84 |         new_ms = MassSpectrum(mass_list, avg_spec)
 85 |         return Peak(avg_rt, new_ms, minutes)
 86 |     else:
 87 |         return None
 88 | 
 89 | def fill_peaks(data, peak_list, D, minutes=False):
 90 | 
 91 |     """
 92 |     @summary: Gets the best matching Retention Time and spectra from 'data' for
 93 |         each peak in the peak list.
 94 | 
 95 |     @param data: A data IntensityMatrix that has the same mass range as the
 96 |         peaks in the peak list
 97 |     @type data: pyms.GCMS.Class.IntensityMatrix
 98 |     @param peak_list: A list of peak objects
 99 |     @type peak_list: ListType
100 |     @param D: Peak width standard deviation in seconds.  Determines search
101 |         window width.
102 |     @type D: FloatType
103 |     @param minutes: Return retention time as minutes
104 |     @type minutes: BooleanType
105 | 
106 |     @return: List of Peak Objects
107 |     @type: ListType
108 | 
109 |     @author: Andrew Isaac
110 |     """
111 | 
112 |     # Test for best match in range where RT weight is greater than _TOL
113 |     _TOL = 0.001
114 |     cutoff = D*math.sqrt(-2.0*math.log(_TOL))
115 | 
116 |     # Penalise for neighboring peaks
117 |     # reweight so RT weight at nearest peak is _PEN
118 |     _PEN = 0.5
119 | 
120 |     datamat = data.get_matrix_list()
121 |     mass_list = data.get_mass_list()
122 |     datatimes = data.get_time_list()
123 |     minrt = min(datatimes)
124 |     maxrt = max(datatimes)
125 |     rtl = 0
126 |     rtr = 0
127 |     new_peak_list = []
128 |     for ii in xrange(len(peak_list)):
129 |         spec = peak_list[ii].get_mass_spectrum().mass_spec
130 |         spec = numpy.array(spec, dtype='d')
131 |         rt = peak_list[ii].get_rt()
132 |         spec_SS = numpy.sum(spec**2, axis=0)
133 | 
134 |         # get neighbour RT's
135 |         if ii > 0:
136 |             rtl = peak_list[ii-1].rt
137 |         if ii < len(peak_list)-1:
138 |             rtr = peak_list[ii+1].rt
139 |         # adjust weighting for neighbours
140 |         rtclose = min(abs(rt-rtl), abs(rt-rtr))
141 |         Dclose = rtclose/math.sqrt(-2.0*math.log(_PEN))
142 | 
143 |         if Dclose > 0:
144 |             Dclose = min(D, Dclose)
145 |         else:
146 |             Dclose = D
147 | 
148 |         # Get bounds
149 |         rtlow = rt - cutoff
150 |         if rtlow < minrt:
151 |             rtlow = minrt
152 |         lowii = data.get_index_at_time(rtlow)
153 | 
154 |         rtup = rt + cutoff
155 |         if rtup > maxrt:
156 |             rtup = maxrt
157 |         upii = data.get_index_at_time(rtup)
158 | 
159 |         # Get sub matrix of scans in bounds
160 |         submat = datamat[lowii:upii+1]
161 |         submat = numpy.array(submat, dtype='d')
162 |         subrts = datatimes[lowii:upii+1]
163 |         subrts = numpy.array(subrts, dtype='d')
164 | 
165 |         submat_SS = numpy.sum(submat**2, axis=1)
166 | 
167 |         # transpose spec (as matrix) for dot product
168 |         spec = numpy.transpose([spec])
169 |         # dot product on rows
170 | 
171 |         toparr = numpy.dot(submat, spec)
172 |         botarr = numpy.sqrt(spec_SS*submat_SS)
173 | 
174 |         # convert back to 1-D array
175 |         toparr = toparr.ravel()
176 | 
177 |         # scaled dot product of each scan
178 |         cosarr = toparr/botarr
179 | 
180 |         # RT weight of each scan
181 |         rtimearr = numpy.exp(-((subrts-rt)/float(Dclose))**2 / 2.0)
182 | 
183 |         # weighted scores
184 |         scorearr = cosarr*rtimearr
185 | 
186 |         # index of best score
187 |         best_ii = scorearr.argmax()
188 | 
189 |         # Add new peak
190 |         bestrt = subrts[best_ii]
191 |         bestspec = submat[best_ii].tolist()
192 |         ms = MassSpectrum(mass_list, bestspec)
193 |         new_peak_list.append(Peak(bestrt, ms, minutes))
194 | 
195 |     return new_peak_list
196 | 


--------------------------------------------------------------------------------
/pyms/Peak/List/Utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Utilities for manipulation of peak lists
  3 | """
  4 | 
  5 |  #############################################################################
  6 |  #                                                                           #
  7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
  8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
  9 |  #                                                                           #
 10 |  #    This program is free software; you can redistribute it and/or modify   #
 11 |  #    it under the terms of the GNU General Public License version 2 as      #
 12 |  #    published by the Free Software Foundation.                             #
 13 |  #                                                                           #
 14 |  #    This program is distributed in the hope that it will be useful,        #
 15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
 16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
 17 |  #    GNU General Public License for more details.                           #
 18 |  #                                                                           #
 19 |  #    You should have received a copy of the GNU General Public License      #
 20 |  #    along with this program; if not, write to the Free Software            #
 21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
 22 |  #                                                                           #
 23 |  #############################################################################
 24 | 
 25 | import math
 26 | 
 27 | from pyms.Peak.Class import Peak
 28 | from pyms.Utils.Error import error
 29 | from pyms.Utils.Utils import is_list, is_str
 30 | from pyms.Utils.Time import time_str_secs
 31 | 
 32 | def is_peak_list(peaks):
 33 | 
 34 |     """
 35 |     @summary: Returns True if 'peaks' is a valid peak list, False
 36 |     otherwise
 37 | 
 38 |     @param peaks: A list of peak objects
 39 |     @type peaks: ListType
 40 | 
 41 |     @return: A boolean indicator
 42 |     @rtype: BooleanType
 43 | 
 44 |     @author: Vladimir Likic
 45 |     """
 46 | 
 47 |     flag = True
 48 | 
 49 |     if not is_list(peaks):
 50 |         flag = False
 51 |     else:
 52 |         for item in peaks:
 53 |            if not isinstance(item, Peak):
 54 |               flag = False
 55 | 
 56 |     return flag
 57 | 
 58 | def sele_peaks_by_rt(peaks, rt_range):
 59 | 
 60 |     """
 61 |     @summary: Selects peaks from a retention time range
 62 | 
 63 |     @param peaks: A list of peak objects
 64 |     @type peaks: ListType
 65 |     @param rt_range: A list of two time strings, specifying lower and
 66 |            upper retention times
 67 |     @type rt_range: ListType
 68 |     @return: A list of peak objects
 69 |     @rtype: ListType
 70 |     """
 71 | 
 72 |     if not is_peak_list(peaks):
 73 |         error("'peaks' not a peak list")
 74 | 
 75 |     if not is_list(rt_range):
 76 |         error("'rt_range' not a list")
 77 |     else:
 78 |         if len(rt_range) != 2:
 79 |             error("'rt_range' must have exactly two elements")
 80 | 
 81 |         if not is_str(rt_range[0]) or not is_str(rt_range[1]):
 82 |             error("lower/upper retention time limits must be strings")
 83 | 
 84 |     rt_lo = time_str_secs(rt_range[0])
 85 |     rt_hi = time_str_secs(rt_range[1])
 86 | 
 87 |     if not rt_lo < rt_hi:
 88 |         error("lower retention time limit must be less than upper")
 89 | 
 90 |     peaks_sele = []
 91 | 
 92 |     for peak in peaks:
 93 |         rt = peak.get_rt()
 94 |         if rt > rt_lo and rt < rt_hi:
 95 |             peaks_sele.append(peak)
 96 | 
 97 |     #print "%d peaks selected" % (len(peaks_sele))
 98 | 
 99 |     return peaks_sele
100 | 


--------------------------------------------------------------------------------
/pyms/Peak/List/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Functions for modelling peak lists
 3 | """
 4 | 
 5 |  #############################################################################
 6 |  #                                                                           #
 7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
 8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
 9 |  #                                                                           #
10 |  #    This program is free software; you can redistribute it and/or modify   #
11 |  #    it under the terms of the GNU General Public License version 2 as      #
12 |  #    published by the Free Software Foundation.                             #
13 |  #                                                                           #
14 |  #    This program is distributed in the hope that it will be useful,        #
15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
17 |  #    GNU General Public License for more details.                           #
18 |  #                                                                           #
19 |  #    You should have received a copy of the GNU General Public License      #
20 |  #    along with this program; if not, write to the Free Software            #
21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
22 |  #                                                                           #
23 |  #############################################################################
24 | 
25 | 


--------------------------------------------------------------------------------
/pyms/Peak/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Functions for modelling signal peaks
 3 | """
 4 | 
 5 |  #############################################################################
 6 |  #                                                                           #
 7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
 8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
 9 |  #                                                                           #
10 |  #    This program is free software; you can redistribute it and/or modify   #
11 |  #    it under the terms of the GNU General Public License version 2 as      #
12 |  #    published by the Free Software Foundation.                             #
13 |  #                                                                           #
14 |  #    This program is distributed in the hope that it will be useful,        #
15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
17 |  #    GNU General Public License for more details.                           #
18 |  #                                                                           #
19 |  #    You should have received a copy of the GNU General Public License      #
20 |  #    along with this program; if not, write to the Free Software            #
21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
22 |  #                                                                           #
23 |  #############################################################################
24 | 
25 | 


--------------------------------------------------------------------------------
/pyms/Simulator/Function.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Provides functions for simulation of GCMS data
  3 | """
  4 | 
  5 |  #############################################################################
  6 |  #                                                                           #
  7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
  8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
  9 |  #                                                                           #
 10 |  #    This program is free software; you can redistribute it and/or modify   #
 11 |  #    it under the terms of the GNU General Public License version 2 as      #
 12 |  #    published by the Free Software Foundation.                             #
 13 |  #                                                                           #
 14 |  #    This program is distributed in the hope that it will be useful,        #
 15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
 16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
 17 |  #    GNU General Public License for more details.                           #
 18 |  #                                                                           #
 19 |  #    You should have received a copy of the GNU General Public License      #
 20 |  #    along with this program; if not, write to the Free Software            #
 21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
 22 |  #                                                                           #
 23 |  #############################################################################
 24 | 
 25 | import numpy
 26 | import math
 27 | import sys
 28 | 
 29 | sys.path.append("/x/PyMS/")
 30 | from pyms.GCMS.Class import IonChromatogram, IntensityMatrix
 31 | 
 32 | 
 33 | def gcms_sim(time_list, mass_list, peak_list):
 34 |     
 35 |     """
 36 |     @summary: Simulator of GCMS data
 37 |     
 38 |     @param time_list: the list of scan times
 39 |     @type time_list: listType
 40 |     
 41 |     @param mass_list: the list of m/z channels
 42 |     @type mass_list: listType
 43 |     
 44 |     @param peak_list: A list of peaks
 45 |     @type peak_list: list of pyms.Peak.Class.Peak
 46 |     
 47 |     @return: A simulated Intensity Matrix object
 48 |     @rtype: pyms.GCMS.Class.IntenstityMatrix
 49 |     
 50 |     @author: Sean O'Callaghan
 51 |     """
 52 |     n_mz = len(mass_list)
 53 |     n_scan = len(time_list)
 54 |         
 55 |     t1 = time_list[0]
 56 |     period = time_list[1] - t1
 57 |     
 58 |     # initialise a 2D numpy array for intensity matrix
 59 |     i_array = numpy.zeros((n_scan, n_mz), 'd') 
 60 |     
 61 |     
 62 |     for peak in peak_list:
 63 |         print "-", 
 64 |         index = int((peak.get_rt() - t1)/period) 
 65 |         height = sum(peak.get_mass_spectrum().mass_spec)
 66 |         # standard deviation = area/(height * sqrt(2/pi)) 
 67 |         sigma = peak.get_area() / (height * (math.sqrt(2*math.pi)))
 68 |         print "width", sigma
 69 |         for i in range(len(peak.get_mass_spectrum().mass_list)):
 70 |             ion_height = peak.get_mass_spectrum().mass_spec[i]
 71 |             ic = chromatogram(n_scan, index, sigma, ion_height)
 72 |             i_array[:, i] += ic
 73 |             
 74 |     im = IntensityMatrix(time_list, mass_list, i_array)
 75 |     
 76 |     return im
 77 |         
 78 | 
 79 | 
 80 | 
 81 | def chromatogram(n_scan, x_zero, sigma, peak_scale):
 82 | 
 83 |     """
 84 |     @summary: Returns a simulated ion chromatogram of a pure component
 85 |               The ion chromatogram contains a single gaussian peak.
 86 |     
 87 |     @param n_scan: the number of scans
 88 |     @type n_scan: intType
 89 |     
 90 |     @param x_zero: The apex of the peak
 91 |     @type x_zero: intType
 92 |     
 93 |     @param sigma: The standard deviation of the distribution
 94 |     @type sigma: floatType
 95 |     
 96 |     @param: peak_scale: the intensity of the peak at the apex
 97 |     @type peak_scale: floatType
 98 |     
 99 |     @return: a list of intensities
100 |     @rtype: listType
101 |     
102 |     @author: Sean O'Callaghan
103 |     """
104 | 
105 |    
106 |     ic = numpy.zeros((n_scan), 'd')                     
107 |     
108 |     for i in range(n_scan):
109 | 	x = float(i)
110 | 	
111 |         ic[i] = gaussian(x,x_zero,sigma,peak_scale)
112 | 
113 |     return ic
114 | 
115 | def gaussian(point, mean, sigma, scale):
116 |     """
117 |     @summary: calculates a point on a gaussian density function
118 |     
119 |     f = s*exp(-((x-x0)^2)/(2*w^2));
120 |     
121 |     @param point: The point currently being computed
122 |     @type point: floatType
123 |     
124 |     @param mean: The apex of the peak
125 |     @type mean: intType
126 |     
127 |     @param sigma: The standard deviation of the gaussian
128 |     @type sigma: floatType
129 |      
130 |     @param scale: The height of the apex
131 |     @type scale: floatType
132 |     
133 |     @return: a single value from a normal distribution
134 |     @rtype: floatType
135 |     
136 |     @author: Sean O'Callaghan
137 |     """
138 |      
139 |     return scale*math.exp((-(point-mean)**2)/(2*(sigma**2)))
140 | 
141 | def add_gaussc_noise(im, scale):
142 |     """
143 |     @summary: adds noise to an IntensityMatrix object
144 |      
145 |     @param im: the intensity matrix object
146 |     @param im: pyms.GCMS.Class.IntensityMatrix
147 |     
148 |     @param scale: the scale of the normal distribution from
149 |                   which the noise is drawn
150 |     @type scale: floatType
151 |     
152 |     @author: Sean O'Callaghan
153 |     """
154 |     n_scan, n_mz = im.get_size()
155 |     
156 |     for i in range(n_mz):
157 |         ic = im.get_ic_at_index(i)
158 |         add_gaussc_noise_ic(ic, scale)
159 |         im.set_ic_at_index(i, ic)
160 |     
161 | 
162 | def add_gaussv_noise(im, scale, cutoff, prop):
163 |     """
164 |     @summary: adds noise to an IntensityMatrix object
165 |      
166 |     @param im: the intensity matrix object
167 |     @param im: pyms.GCMS.Class.IntensityMatrix
168 |     
169 |     @param scale: the scale of the normal distribution from
170 |                   which the noise is drawn
171 |     @type scale: floatType
172 |     
173 |     @param cutoff: The level below which the intensity of the ic at that point
174 |                    has no effect on the scale of the noise distribution
175 |     @type cutoff: intType
176 |     
177 |     @param scale: The scale of the normal distribution for ic values 
178 |     @type scale: intType
179 |     
180 |     @param prop: For intensity values above the cutoff, the scale is 
181 |                  multiplied by the ic value multiplied by prop
182 |     @type prop: floatType
183 |     
184 |     @author: Sean O'Callaghan
185 |     """
186 |     n_scan, n_mz = im.get_size()
187 |     
188 |     for i in range(n_mz):
189 |         ic = im.get_ic_at_index(i)
190 |         add_gaussv_noise_ic(ic, scale, cutoff, prop)
191 |         im.set_ic_at_index(i, ic)
192 |     
193 |     
194 |     
195 | def add_gaussc_noise_ic(ic, scale):
196 |     """
197 |     @summary: adds noise drawn from a normal distribution 
198 |     with constant scale to an ion chromatogram
199 |     
200 |     @param ic: The ion Chromatogram
201 |     @type ic: pyms.GCMS.IonChromatogram
202 |     
203 |     @param scale: The scale of the normal distribution
204 |     @type scale: intType
205 |         
206 |     @author: Sean O'Callaghan
207 |     """
208 |     
209 |     noise = numpy.random.normal(0.0, scale, (len(ic)))
210 |      
211 |     i_array_with_noise = ic.get_intensity_array() + noise
212 |     ic.set_intensity_array(i_array_with_noise)
213 |     
214 |     
215 | def add_gaussv_noise_ic(ic, scale, cutoff, prop):
216 |     """
217 |     @summary: adds noise to an ic. The noise value is drawn from a normal
218 |               distribution, the scale of this distribution depends on the 
219 |               value of the ic at the point where the noise is being added
220 |      
221 |     @param ic: The IonChromatogram
222 |     @type ic: pyms.GCMS.IonChromatogram
223 |     
224 |     @param cutoff: The level below which the intensity of the ic at that point
225 |                    has no effect on the scale of the noise distribution
226 |     @type cutoff: intType
227 |     
228 |     @param scale: The scale of the normal distribution for ic values below the cutoff
229 |                  is modified for values above the cutoff
230 |     @type scale: intType
231 |     
232 |     @param prop: For ic values above the cutoff, the scale is multiplied by the ic
233 |                  value multiplied by prop
234 |     @type prop: floatType
235 |     
236 |     @author: Sean O'Callaghan
237 |     """
238 |     
239 |     noise = numpy.zeros(len(ic))
240 |     
241 |     i_array = ic.get_intensity_array()
242 |     time_list = ic.get_time_list()
243 |     
244 |     for i in range(len(ic)):
245 |         if i_array[i] < cutoff:
246 |             noise[i] = numpy.random.normal(0.0, scale, 1)
247 |         else:
248 |             noise[i] = numpy.random.normal(0.0, scale*i_array[i]*prop, 1)
249 |         
250 |     i_array_with_noise = noise + i_array
251 |     ic.set_intensity_array(i_array_with_noise)
252 |     
253 |     
254 | 


--------------------------------------------------------------------------------
/pyms/Simulator/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Sub-package to simulate GCMS data
 3 | """
 4 | 
 5 |  #############################################################################
 6 |  #                                                                           #
 7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
 8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
 9 |  #                                                                           #
10 |  #    This program is free software; you can redistribute it and/or modify   #
11 |  #    it under the terms of the GNU General Public License version 2 as      #
12 |  #    published by the Free Software Foundation.                             #
13 |  #                                                                           #
14 |  #    This program is distributed in the hope that it will be useful,        #
15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
17 |  #    GNU General Public License for more details.                           #
18 |  #                                                                           #
19 |  #    You should have received a copy of the GNU General Public License      #
20 |  #    along with this program; if not, write to the Free Software            #
21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
22 |  #                                                                           #
23 |  #############################################################################
24 | 
25 | 


--------------------------------------------------------------------------------
/pyms/Utils/DP.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Dynamic Programming routine
  3 | """
  4 | 
  5 |  #############################################################################
  6 |  #                                                                           #
  7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
  8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
  9 |  #                                                                           #
 10 |  #    This program is free software; you can redistribute it and/or modify   #
 11 |  #    it under the terms of the GNU General Public License version 2 as      #
 12 |  #    published by the Free Software Foundation.                             #
 13 |  #                                                                           #
 14 |  #    This program is distributed in the hope that it will be useful,        #
 15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
 16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
 17 |  #    GNU General Public License for more details.                           #
 18 |  #                                                                           #
 19 |  #    You should have received a copy of the GNU General Public License      #
 20 |  #    along with this program; if not, write to the Free Software            #
 21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
 22 |  #                                                                           #
 23 |  #############################################################################
 24 | 
 25 | import numpy
 26 | from Error import error
 27 | #from mpi4py import MPI
 28 | 
 29 | def dp(S, gap_penalty):
 30 |     
 31 |     """ 
 32 |     @summary: Solves optimal path in score matrix based on global sequence
 33 |     alignment
 34 | 
 35 |     @param S: Score matrix
 36 |     @type S: numpy.
 37 |     @param gap_penalty: Gap penalty
 38 |     @type gap_penalty: FloatType
 39 | 
 40 |     @return: A dictionary of results
 41 |     @rtype: DictType
 42 | 
 43 |     @author: Tim Erwin
 44 |     """
 45 |     #comm = MPI.COMM_WORLD
 46 |     #rank = comm.Get_rank()
 47 |     #print " In DP.py, I am rank", rank
 48 | 
 49 |     try:
 50 |         row_length = len(S[:,0])
 51 |     except(IndexError):
 52 |         error('Zero length alignment found: Samples with no peaks \
 53 |                cannot be aligned')
 54 |     col_length = len(S[0,:])
 55 | 
 56 |     #D contains the score of the optimal alignment
 57 |     D = numpy.zeros((row_length+1,col_length+1), dtype='d')
 58 |     for i in range(1, row_length+1):
 59 |         D[i,0] = gap_penalty*i
 60 |     for j in range(1, col_length+1):
 61 |         D[0,j] = gap_penalty*j
 62 |     D[0,0] = 0.0
 63 |     D[1:(row_length+1), 1:(col_length+1)] = S.copy();
 64 | 
 65 |     # Directions for trace
 66 |     # 0 - match               (move diagonal)
 67 |     # 1 - peaks1 has no match (move up)
 68 |     # 2 - peaks2 has no match (move left)
 69 |     # 3 - stop
 70 |     trace_matrix = numpy.zeros((row_length+1,col_length+1))
 71 |     trace_matrix[:,0] = 1; 
 72 |     trace_matrix[0,:] = 2;
 73 |     trace_matrix[0,0] = 3;
 74 |    
 75 |     for i in range(1,row_length+1):
 76 |         for j in range(1,col_length+1):
 77 |  
 78 |             #
 79 |             # Needleman-Wunsch Algorithm assuming a score function S(x,x)=0
 80 |             #
 81 |             #              | D[i-1,j-1] + S(i,j)
 82 |             # D[i,j] = min | D(i-1,j] + gap
 83 |             #              | D[i,j-1] + gap
 84 |             #
 85 | 
 86 |             darray = [D[i-1,j-1]+S[i-1,j-1], D[i-1,j]+gap_penalty, D[i,j-1]+gap_penalty]
 87 |             D[i,j] = min(darray)
 88 |             #Store direction in trace matrix
 89 |             trace_matrix[i,j] = darray.index(D[i,j])
 90 | 
 91 |     # Trace back from bottom right
 92 |     trace = []
 93 |     matches = []
 94 |     i = row_length
 95 |     j = col_length
 96 |     direction = trace_matrix[i,j]
 97 |     p = [row_length-1]
 98 |     q = [col_length-1]
 99 |     
100 |     while direction != 3:
101 |         
102 |         if direction == 0: #Match
103 |             i = i-1
104 |             j = j-1
105 |             matches.append([i,j])
106 |         elif direction == 1: #peaks1 has no match
107 |             i = i-1
108 |         elif direction == 2: #peaks2 has no match
109 |             j = j-1
110 |         p.append(i-1)
111 |         q.append(j-1)
112 |         trace.append(direction)
113 |         direction=trace_matrix[i,j]
114 | 
115 |     #remove 'stop' entry
116 |     p.pop()
117 |     q.pop()
118 |     # reverse the trace back
119 |     p.reverse()
120 |     q.reverse()
121 |     trace.reverse()
122 |     matches.reverse()
123 | 
124 |     return {'p':p, 'q':q, 'trace':trace, 'matches':matches, 'D':D, 'phi':trace_matrix}
125 | 
126 | 


--------------------------------------------------------------------------------
/pyms/Utils/Error.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Defines error()
  3 | """
  4 | 
  5 |  #############################################################################
  6 |  #                                                                           #
  7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
  8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
  9 |  #                                                                           #
 10 |  #    This program is free software; you can redistribute it and/or modify   #
 11 |  #    it under the terms of the GNU General Public License version 2 as      #
 12 |  #    published by the Free Software Foundation.                             #
 13 |  #                                                                           #
 14 |  #    This program is distributed in the hope that it will be useful,        #
 15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
 16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
 17 |  #    GNU General Public License for more details.                           #
 18 |  #                                                                           #
 19 |  #    You should have received a copy of the GNU General Public License      #
 20 |  #    along with this program; if not, write to the Free Software            #
 21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
 22 |  #                                                                           #
 23 |  #############################################################################
 24 | 
 25 | import sys
 26 | 
 27 | def error(message=None):
 28 | 
 29 |     """
 30 |     @summary: PyMS wide error function
 31 | 
 32 |     Prints out the error message (if supplied) and terminates execution
 33 |     with sys.exit(1).
 34 | 
 35 |     @param message: The error message to be printed
 36 |     @type message: StringType
 37 | 
 38 |     @return: none
 39 |     @rtype: NoneType
 40 | 
 41 |     @author: Lewis Lee
 42 |     @author: Vladimir Likic
 43 |     """
 44 | 
 45 |     sys.stdout = sys.__stderr__
 46 | 
 47 |     if message == None:
 48 |         message = "(no message)"
 49 |     else:
 50 |         message.rstrip('\n')
 51 | 
 52 |     # Retrieve details of the caller function from the caller
 53 |     # function's stack frame (i.e. the second most recent stack
 54 |     # frame, denoted by "1").
 55 |     funame = sys._getframe(1).f_code.co_name
 56 |     fargcount = sys._getframe(1).f_code.co_argcount
 57 |     fmodule = sys._getframe(1).f_code.co_filename
 58 |     fmodule = fmodule.split('/')
 59 |     fmodule = fmodule[-1]
 60 |     fmodule = fmodule.split('.')[0]
 61 | 
 62 |     if funame == '?':
 63 |         fmessage = "ERROR: 'main' in module %s.\n" % (fmodule)
 64 |     else:
 65 |         fmessage = "ERROR: %s(%d) in module %s.\n" % \
 66 |                 (funame, fargcount, fmodule)
 67 | 
 68 |     fmessage = fmessage + " %s" % ( message )
 69 | 
 70 |     message_list = fmessage.split("\n")
 71 | 
 72 |     n = 0
 73 |     for line in message_list:
 74 |         if len(line) > n:
 75 |             n = len(line)
 76 | 
 77 |     cstr = ""
 78 |     for ii in range(n):
 79 |         cstr = cstr + "=" 
 80 |     print "\n %s" % (cstr)
 81 |     print " %s" % (fmessage)
 82 |     print " %s\n" % (cstr)
 83 | 
 84 |     sys.exit(1)
 85 | 
 86 | def stop(message=None):
 87 | 
 88 |     """
 89 |     @summary: A simple termination of execution
 90 | 
 91 |     @param message: The message to be printed
 92 |     @type message: StringType
 93 | 
 94 |     @return: none
 95 |     @rtype: NoneType
 96 |     """
 97 | 
 98 |     if message != None:
 99 |         print message 
100 | 
101 |     raise RuntimeError
102 | 
103 | 


--------------------------------------------------------------------------------
/pyms/Utils/IO.py:
--------------------------------------------------------------------------------
  1 | """
  2 | General I/O functions
  3 | """
  4 | 
  5 |  #############################################################################
  6 |  #                                                                           #
  7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
  8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
  9 |  #                                                                           #
 10 |  #    This program is free software; you can redistribute it and/or modify   #
 11 |  #    it under the terms of the GNU General Public License version 2 as      #
 12 |  #    published by the Free Software Foundation.                             #
 13 |  #                                                                           #
 14 |  #    This program is distributed in the hope that it will be useful,        #
 15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
 16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
 17 |  #    GNU General Public License for more details.                           #
 18 |  #                                                                           #
 19 |  #    You should have received a copy of the GNU General Public License      #
 20 |  #    along with this program; if not, write to the Free Software            #
 21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
 22 |  #                                                                           #
 23 |  #############################################################################
 24 | 
 25 | import types, os, string, cPickle
 26 | 
 27 | from pyms.Utils.Error import error
 28 | from pyms.Utils.Utils import is_number, is_str, is_list
 29 | 
 30 | def dump_object(object, file_name):
 31 | 
 32 |     """
 33 |     @summary: Dumps an object to a file through cPickle.dump()
 34 | 
 35 |     @param object: Object to be dumpted
 36 |     @type object: An instance of an arbitrary class
 37 | 
 38 |     @param file_name: Name of the file for the object dump
 39 |     @type file_name: StringType
 40 | 
 41 |     @author: Vladimir Likic
 42 |     """
 43 | 
 44 |     fp = open_for_writing(file_name)
 45 |     cPickle.dump(object, fp)
 46 |     close_for_writing(fp)
 47 | 
 48 | def load_object(file_name):
 49 | 
 50 |     """
 51 |     @summary: Loads an object previously dumped with dump_object()
 52 | 
 53 |     @param file_name: Name of the object dump file
 54 |     @type file_name: StringType
 55 | 
 56 |     @return: Object contained in the file 'file_name'
 57 |     @rtype: An instance of an arbitrary class
 58 | 
 59 |     @author: Vladimir Likic
 60 |     """
 61 | 
 62 |     fp = open_for_reading(file_name)
 63 |     object = cPickle.load(fp)
 64 |     close_for_reading(fp)
 65 | 
 66 |     return object
 67 | 
 68 | def open_for_reading(file_name):
 69 | 
 70 |     """
 71 |     @summary: Opens file for reading, returns file pointer
 72 | 
 73 |     @param file_name: Name of the file to be opened for reading
 74 |     @type file_name: StringType
 75 | 
 76 |     @return: Pointer to the opened file
 77 |     @rtype: FileType
 78 | 
 79 |     @author: Vladimir Likic
 80 |     """
 81 | 
 82 |     if not is_str(file_name):
 83 |         error("'file_name' is not a string")
 84 |     try:
 85 |         fp = open(file_name)
 86 |     except IOError:
 87 |         error("'%s' does not exist" % (file_name))
 88 | 
 89 |     return fp
 90 | 
 91 | def open_for_writing(file_name):
 92 | 
 93 |     """
 94 |     @summary: Opens file for writing, returns file pointer
 95 | 
 96 |     @param file_name: Name of the file to be opened for writing
 97 |     @type file_name: StringType
 98 | 
 99 |     @return: Pointer to the opened file
100 |     @rtype: FileType
101 | 
102 |     @author: Vladimir Likic
103 |     """
104 | 
105 |     if not is_str(file_name):
106 |         error("'file_name' is not a string")
107 |     try:
108 |         fp = open(file_name, "w")
109 |     except IOError:
110 |         error("Cannot open '%s' for writing" % (file_name))
111 | 
112 |     return fp
113 | 
114 | def close_for_reading(fp):
115 | 
116 |     """
117 |     @summary: Closes file pointer open for reading
118 | 
119 |     @param fp: A file pointer, previously opened for reading
120 |     @type fp: FileType
121 | 
122 |     @return: none
123 |     @rtype: NoneType
124 | 
125 |     @author: Vladimir Likic
126 |     """
127 | 
128 |     fp.close()
129 | 
130 | def close_for_writing(fp):
131 | 
132 |     """
133 |     @summary: Closes file pointer open for writing
134 | 
135 |     @param fp: A file pointer, previously opened for writing
136 |     @type fp: FileType
137 | 
138 |     @return: none
139 |     @rtype: NoneType
140 | 
141 |     @author: Vladimir Likic
142 |     """
143 | 
144 |     fp.close()
145 | 
146 | def file_lines(file_name, filter=False):
147 | 
148 |     """
149 |     @summary: Returns lines from a file, as a list
150 | 
151 |     @param file_name: Name of a file
152 |     @type: StringType
153 |     @param filter: If True, lines are pre-processes. Newline character
154 |         if removed, leading and taling whitespaces are removed, and lines
155 |         starting with '#' are discarded
156 |     @type: BooleanType 
157 | 
158 |     @return: A list of lines
159 |     @rtype: ListType
160 | 
161 |     @author: Vladimir Likic
162 |     """
163 | 
164 |     if not is_str(file_name):
165 |         error("'file_name' is not a string")
166 | 
167 |     fp = open_for_reading(file_name)
168 |     lines = fp.readlines()
169 |     close_for_reading(fp)
170 | 
171 |     if filter:
172 |         # strip leading and talining whitespaces
173 |         lines_filtered = []
174 |         for line in lines:
175 |             line = line.strip()
176 |             lines_filtered.append(line)
177 | 
178 |         # discard comments
179 |         lines_to_discard = []
180 |         for line in lines_filtered:
181 |             # remove empty lines and comments
182 |             if len(line) == 0 or line[0] == "#":
183 |                 lines_to_discard.append(line)
184 |         for line in lines_to_discard:
185 |             lines_filtered.remove(line)
186 |         lines = lines_filtered
187 | 
188 |     return lines
189 | 
190 | def save_data(file_name, data, format_str="%.6f", prepend="", sep=" ",
191 | 	compressed=False):
192 | 
193 |     """
194 |     @summary: Saves a list of numbers or a list of lists of numbers
195 |     to a file with specific formatting
196 | 
197 |     @param file_name: Name of a file
198 |     @type: StringType
199 |     @param data: A list of numbers, or a list of lists
200 |     @type: ListType
201 |     @param format_str: A format string for individual entries
202 |     @type: StringType
203 |     @param prepend: A string, printed before each row
204 |     @type: StringType
205 |     @param sep: A string, printed after each number
206 |     @type: StringType
207 |     @param compressed: A boolean. If True, the output will be gzipped
208 |     @type: BooleanType
209 | 
210 |     @return: none
211 |     @rtype: NoneType
212 | 
213 |     @author: Vladimir Likic
214 |     """
215 | 
216 |     if not is_str(file_name):
217 |         error("'file_name' is not a string")
218 | 
219 |     if not is_list(data):
220 |         error("'data' is not a list")
221 | 
222 |     if not is_str(prepend):
223 |         error("'prepend' is not a string")
224 | 
225 |     if not is_str(sep):
226 |         error("'sep' is not a string")
227 | 
228 |     fp = open_for_writing(file_name)
229 | 
230 |     # decide whether data is a vector or matrix
231 |     if is_number(data[0]):
232 |         for item in data:
233 |             if not is_number(item):
234 |                 error("not all elements of the list are numbers")
235 |         data_is_matrix = 0
236 |     else:
237 |         for item in data:
238 |             if not is_list(item):
239 |                 error("not all elements of the list are lists")
240 |         data_is_matrix = 1
241 | 
242 |     if data_is_matrix:
243 |         for ii in range(len(data)):
244 |             fp.write(prepend)
245 |             for jj in range(len(data[ii])):
246 |                 if is_number(data[ii][jj]):
247 |                     fp.write(format_str % (data[ii][jj]))
248 |                     if (jj<(len(data[ii])-1)): fp.write(sep)
249 |                 else:
250 |                     error("datum not a number")
251 |             fp.write("\n")
252 |     else:
253 |         for ii in range(len(data)):
254 |             fp.write(prepend)
255 |             fp.write(format_str % (data[ii]))
256 |             fp.write("\n")
257 | 
258 |     close_for_writing(fp)
259 | 
260 |     if compressed:
261 |         status = os.system('gzip %s' % (file_name))
262 |         if status != 0:
263 |             error("gzip compress failed")
264 | 
265 | 


--------------------------------------------------------------------------------
/pyms/Utils/Math.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Provides mathematical functions
  3 | """
  4 | 
  5 |  #############################################################################
  6 |  #                                                                           #
  7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
  8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
  9 |  #                                                                           #
 10 |  #    This program is free software; you can redistribute it and/or modify   #
 11 |  #    it under the terms of the GNU General Public License version 2 as      #
 12 |  #    published by the Free Software Foundation.                             #
 13 |  #                                                                           #
 14 |  #    This program is distributed in the hope that it will be useful,        #
 15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
 16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
 17 |  #    GNU General Public License for more details.                           #
 18 |  #                                                                           #
 19 |  #    You should have received a copy of the GNU General Public License      #
 20 |  #    along with this program; if not, write to the Free Software            #
 21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
 22 |  #                                                                           #
 23 |  #############################################################################
 24 | 
 25 | import copy, math
 26 | 
 27 | from pyms.Utils.Error import error
 28 | from pyms.Utils.Utils import is_list, is_number
 29 | 
 30 | def median(v):
 31 | 
 32 |     """
 33 |     @summary: Returns a median of a list or numpy array
 34 | 
 35 |     @param v: Input list or array
 36 |     @type v: ListType or numpy.core.ndarray
 37 |     @return: The median of the input list
 38 |     @rtype: FloatType
 39 | 
 40 |     @author: Vladimir Likic
 41 |     """
 42 | 
 43 |     if not is_list(v):
 44 |         error("argument neither list nor array")
 45 | 
 46 |     local_data = copy.deepcopy(v)
 47 |     local_data.sort()
 48 |     N = len(local_data)
 49 | 
 50 |     if (N % 2) == 0:
 51 |         # even number of points
 52 |         K = N/2 - 1 
 53 |         median = (local_data[K] + local_data[K+1])/2.0
 54 |     else:
 55 | 	    # odd number of points
 56 |         K = (N - 1)/2 - 1
 57 |         median = local_data[K+1]
 58 | 
 59 |     return median
 60 | 
 61 | def vector_by_step(vstart,vstop,vstep):
 62 | 
 63 |     """
 64 |     @summary: generates a list by using start, stop, and step values
 65 | 
 66 |     @param vstart: Initial value 
 67 |     @type vstart: A number
 68 |     @param vstop: Max value
 69 |     @type vstop: A number
 70 |     @param vstep: Step
 71 |     @type vstep: A number
 72 |    
 73 |     @return: A list generated
 74 |     @rtype: ListType
 75 | 
 76 |     @author: Vladimir Likic
 77 |     """
 78 | 
 79 |     if not is_number(vstart) or not is_number(vstop) or not is_number(vstep):
 80 |         error("parameters start, stop, step must be numbers")
 81 | 
 82 |     v = []
 83 | 
 84 |     p = vstart 
 85 |     while p < vstop:
 86 |         v.append(p)
 87 |         p = p + vstep
 88 | 
 89 |     return v
 90 | 
 91 | def MAD(v):
 92 | 
 93 |     """
 94 |     @summary: median absolute deviation
 95 | 
 96 |     @param v: A list or array
 97 |     @type v: ListType, TupleType, or numpy.core.ndarray
 98 | 
 99 |     @return: median absolute deviation
100 |     @rtype: FloatType
101 | 
102 |     @author: Vladimir Likic
103 |     """
104 | 
105 |     if not is_list(v):
106 |         error("argument neither list nor array")
107 | 
108 |     m = median(v)
109 |     m_list = []
110 | 
111 |     for xi in v:
112 |         d = math.fabs(xi - m)
113 |         m_list.append(d)
114 | 
115 |     mad = median(m_list)/0.6745
116 | 
117 |     return mad
118 | 
119 | def amin(v):
120 | 
121 |     """
122 |     @summary: Finds the minimum element in a list or array
123 | 
124 |     @param v: A list or array
125 |     @type v: ListType, TupleType, or numpy.core.ndarray
126 | 
127 |     @return: Tuple (maxi, maxv), where maxv is the minimum 
128 |         element in the list and maxi is its index
129 |     @rtype: TupleType
130 | 
131 |     @author: Vladimir Likic
132 |     """
133 | 
134 |     if not is_list(v):
135 |         error("argument neither list nor array")
136 | 
137 |     minv = max(v) # built-in max() function
138 |     mini = None
139 | 
140 |     for ii in range(len(v)):
141 |         if v[ii] < minv:
142 |             minv = v[ii]
143 |             mini = ii
144 | 
145 |     if mini == None:
146 |         error("finding maximum failed")
147 | 
148 |     return mini, minv
149 | 
150 | def mean(v):
151 | 
152 |     """
153 |     @summary: Calculates the mean
154 | 
155 |     @param v: A list or array
156 |     @type v: ListType, TupleType, or numpy.core.ndarray
157 | 
158 |     @return: Mean
159 |     @rtype: FloatType
160 | 
161 |     @author: Vladimir Likic
162 |     """
163 | 
164 |     if not is_list(v):
165 |         error("argument neither list nor array")
166 | 
167 |     s = 0.0
168 |     for e in v:
169 |         s = s + e 
170 |     s_mean = s/float(len(v))
171 | 
172 |     return s_mean
173 | 
174 | def std(v):
175 | 
176 |     """
177 |     @summary: Calculates standard deviation
178 | 
179 |     @param v: A list or array
180 |     @type v: ListType, TupleType, or numpy.core.ndarray
181 | 
182 |     @return: Mean
183 |     @rtype: FloatType
184 | 
185 |     @author: Vladimir Likic
186 |     """
187 | 
188 |     if not is_list(v):
189 |         error("argument neither list nor array")
190 | 
191 |     v_mean = mean(v)
192 | 
193 |     s = 0.0 
194 |     for e in v:
195 |         d = e - v_mean
196 |         s = s + d*d
197 |     s_mean = s/float(len(v)-1)
198 |     v_std = math.sqrt(s_mean)
199 | 
200 |     return v_std
201 | 
202 | 
203 | def rmsd(list1, list2):
204 | 
205 |     """
206 |     @summary: Calculates RMSD for the 2 lists
207 | 
208 |     @param list1: First data set
209 |     @type list1: ListType, TupleType, or numpy.core.ndarray 
210 |     @param list2: Second data set
211 |     @type list2: ListType, TupleType, or numpy.core.ndarray 
212 |     @return: RMSD value
213 |     @rtype: FloatType
214 | 
215 |     @author: Qiao Wang
216 |     @author: Andrew Isaac
217 |     @author: Vladimir Likic
218 |     """
219 | 
220 |     if not is_list(list1):
221 |         error("argument neither list nor array")
222 | 
223 |     if not is_list(list2):
224 |         error("argument neither list nor array")
225 | 
226 |     sum = 0.0
227 |     for i in range(len(list1)):
228 |         sum = sum + (list1[i] - list2[i]) ** 2
229 |     rmsd = math.sqrt(sum / len(list1))
230 |     return rmsd
231 | 
232 | 


--------------------------------------------------------------------------------
/pyms/Utils/Time.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Time conversion and related functions
  3 | """
  4 | 
  5 |  #############################################################################
  6 |  #                                                                           #
  7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
  8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
  9 |  #                                                                           #
 10 |  #    This program is free software; you can redistribute it and/or modify   #
 11 |  #    it under the terms of the GNU General Public License version 2 as      #
 12 |  #    published by the Free Software Foundation.                             #
 13 |  #                                                                           #
 14 |  #    This program is distributed in the hope that it will be useful,        #
 15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
 16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
 17 |  #    GNU General Public License for more details.                           #
 18 |  #                                                                           #
 19 |  #    You should have received a copy of the GNU General Public License      #
 20 |  #    along with this program; if not, write to the Free Software            #
 21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
 22 |  #                                                                           #
 23 |  #############################################################################
 24 | 
 25 | import math
 26 | 
 27 | from pyms.Utils.Error import error
 28 | from pyms.Utils.Utils import is_int, is_str, is_str_num
 29 | 
 30 | def time_str_secs(time_str):
 31 | 
 32 |     """
 33 |     @summary: Resolves time string of the form "<NUMBER>s" or "<NUMBER>m",
 34 |         returns time in seconds
 35 | 
 36 |     @param time_str: A time string, which must be of the form
 37 |         "<NUMBER>s" or "<NUMBER>m" where "<NUMBER>" is a valid number
 38 |     @type time_str: StringType
 39 | 
 40 |     @return: Time in seconds
 41 |     @rtype: FloatType
 42 | 
 43 |     @author: Vladimir Likic
 44 |     """
 45 | 
 46 |     if not is_str(time_str):
 47 |         error("time string not a string")
 48 | 
 49 |     time_number = time_str[:-1]
 50 |     time_spec = time_str[-1].lower()
 51 | 
 52 |     if not is_str_num(time_number):
 53 |        print " --> received time string '%s'" % (time_number)
 54 |        error("improper time string")
 55 | 
 56 |     if not time_spec == "s" and not time_spec == "m":
 57 |         error("time string must end with either 's' or 'm'")
 58 | 
 59 |     time = float(time_number)
 60 | 
 61 |     if time_spec == "m":
 62 |         time = time*60.0
 63 | 
 64 |     return time
 65 | 
 66 | def window_sele_points(ic, window_sele, half_window=False):
 67 | 
 68 |     """
 69 |     @summary: Converts window selection parameter into points based
 70 |         on the time step in an ion chromatogram
 71 | 
 72 |     @param ic: ion chromatogram object relevant for the conversion
 73 |     @type ic: pyms.GCMS.Class.IonChromatogram
 74 | 
 75 |     @param window_sele: The window selection parameter. This can be
 76 |         an integer or time string. If integer, taken as the number
 77 |         of points. If a string, must of the form "<NUMBER>s" or
 78 |         "<NUMBER>m", specifying a time in seconds or minutes,
 79 |         respectively
 80 |     @type window_sele: IntType or StringType
 81 | 
 82 |     @param half_window: Specifies whether to return half-window
 83 |     @type half_window: BooleanType
 84 | 
 85 |     @return: The number of points in the window
 86 |     @rtype: IntType
 87 | 
 88 |     @author: Vladimir Likic
 89 |     """
 90 | 
 91 |     if not is_int(window_sele) and not is_str(window_sele):
 92 |         error("'window' must be an integer or a string")
 93 | 
 94 |     if is_int(window_sele):
 95 |         if half_window:
 96 |             if window_sele % 2 == 0:
 97 |                 error("window must be an odd number of points")
 98 |             else:
 99 |                 points = int(math.floor(window_sele*0.5))
100 |         else:
101 |             points = window_sele
102 |     else:
103 |         time = time_str_secs(window_sele)
104 |         time_step = ic.get_time_step()
105 | 
106 |         if half_window:
107 |             time = time*0.5
108 | 
109 |         points = int(math.floor(time/time_step))
110 | 
111 |     if half_window:
112 |         if points < 1: error("window too small (half window=%d)" % (points))
113 |     else:
114 |         if points < 2: error("window too small (window=%d)" % (points))
115 | 
116 |     return points
117 | 
118 | 


--------------------------------------------------------------------------------
/pyms/Utils/Utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | General utility functions
  3 | """
  4 | 
  5 |  #############################################################################
  6 |  #                                                                           #
  7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
  8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
  9 |  #                                                                           #
 10 |  #    This program is free software; you can redistribute it and/or modify   #
 11 |  #    it under the terms of the GNU General Public License version 2 as      #
 12 |  #    published by the Free Software Foundation.                             #
 13 |  #                                                                           #
 14 |  #    This program is distributed in the hope that it will be useful,        #
 15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
 16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
 17 |  #    GNU General Public License for more details.                           #
 18 |  #                                                                           #
 19 |  #    You should have received a copy of the GNU General Public License      #
 20 |  #    along with this program; if not, write to the Free Software            #
 21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
 22 |  #                                                                           #
 23 |  #############################################################################
 24 | 
 25 | import types, re
 26 | 
 27 | import numpy
 28 | 
 29 | from pyms.Utils.Error import error
 30 | 
 31 | def is_str(arg):
 32 | 
 33 |     """
 34 |     @summary: Returns True if the argument is a string, False otherwise
 35 | 
 36 |     @param arg: The argument to be evaluated as a string
 37 |     @type arg: arbitrary
 38 | 
 39 |     @return: A boolean indicator True or False
 40 |     @rtype: BooleanType
 41 | 
 42 |     @author: Vladimir Likic
 43 |     """
 44 | 
 45 |     if isinstance(arg,types.StringType):
 46 |         return True 
 47 |     else:
 48 |         return False
 49 | 
 50 | def is_int(arg):
 51 | 
 52 |     """
 53 |     @summary: Returns True if the argument is an integer, False
 54 |         otherwise
 55 | 
 56 |     @param arg: The argument to be evaluated as an integer
 57 |     @type arg: arbitrary
 58 | 
 59 |     @return: A boolean indicator True or False
 60 |     @rtype: BooleanType
 61 | 
 62 |     @author: Vladimir Likic
 63 |     """
 64 | 
 65 |     if isinstance(arg,types.IntType) or isinstance(arg,types.LongType):
 66 |         return True
 67 |     else:
 68 |         return False
 69 | 
 70 | def is_float(arg):
 71 | 
 72 |     """
 73 |     @summary: Returns True if the argument is a float, False otherwise
 74 | 
 75 |     @param arg: The argument to be evaluated as a float
 76 |     @type arg: arbitrary
 77 | 
 78 |     @return: A boolean indicator True or False
 79 |     @rtype: BooleanType
 80 | 
 81 |     @author: Vladimir Likic
 82 |     """
 83 | 
 84 |     if isinstance(arg,types.FloatType):
 85 |         return True
 86 |     else:
 87 |         return False
 88 | 
 89 | def is_number(arg):
 90 | 
 91 |     """
 92 |     @summary: Returns True if the argument is a number (integer or
 93 |         float), False otherwise
 94 |    
 95 |     @param arg: The argument to be evaluated as a number
 96 |     @type arg: arbitrary
 97 | 
 98 |     @return: A boolean indicator True or False
 99 |     @rtype: BooleanType
100 | 
101 |     @author: Vladimir Likic
102 |     """
103 | 
104 |     if is_int(arg) or is_float(arg):
105 |         return True 
106 |     else:
107 |         return False
108 | 
109 | def is_list(arg):
110 | 
111 |     """
112 |     @summary: Returns True if the argument is a list, tuple, or numpy
113 |         array, False otherwise
114 | 
115 |     @param arg: The argument to be evaluated as a list
116 |     @type arg: arbitrary
117 | 
118 |     @return: A boolean indicator True or False
119 |     @rtype: BooleanType
120 | 
121 |     @author: Vladimir Likic
122 |     """
123 | 
124 |     if isinstance(arg,types.ListType) or isinstance(arg,types.TupleType) \
125 |             or isinstance(arg, numpy.core.ndarray):
126 |         return True 
127 |     else:
128 |         return False
129 | 
130 | def is_array(arg):
131 | 
132 |     """
133 |     @summary: Returns True if the argument is a numpy array, False
134 |         otherwise
135 | 
136 |     @param arg: The argument to be evaluated as a numpy array
137 |     @type arg: arbitrary
138 | 
139 |     @return: A boolean indicator True or False
140 |     @rtype: BooleanType 
141 | 
142 |     @author: Vladimir Likic
143 |     """
144 | 
145 |     if isinstance(arg, numpy.core.ndarray):
146 |         return True 
147 |     else:
148 |         return False
149 | 
150 | 
151 | def is_boolean(arg):
152 | 
153 |     """
154 |     @summary: Returns true of the argument is booleean, False otherwise
155 | 
156 |     @param arg: The argument to be evaluated as boolean
157 |     @type arg: arbitrary
158 | 
159 |     @return: A boolean indicator True or False
160 |     @rtype: BooleanType 
161 | 
162 |     @author: Vladimir Likic
163 |     """
164 | 
165 |     if isinstance(arg,types.BooleanType):
166 |         return True
167 |     else:
168 |         return False 
169 | 
170 | def is_str_num(arg):
171 | 
172 |     """
173 |     @summary: Determines if the argument is a string in the format of a number
174 | 
175 |     The number can be an integer, or alternatively floating point in scientific
176 |     or engineering format.
177 | 
178 |     @param arg: A string to be evaluate as a number
179 |     @type arg: StringType
180 | 
181 |     @return: A boolean indicator True or False
182 |     @rtype:  BooleanType
183 | 
184 |     @author: Gyro Funch (from Active State Python Cookbook)
185 |     """
186 | 
187 |     NUM_RE = re.compile(r'^[-+]?([0-9]+\.?[0-9]*|\.[0-9]+)([eE][-+]?[0-9]+)?$')
188 | 
189 |     if NUM_RE.match(str(arg)):
190 |         return True
191 |     else:
192 |         return False
193 | 
194 | def is_positive_int(arg):
195 | 
196 |     """
197 |     @summary: Determines if the argument is an integer greater than zero
198 | 
199 |     @param arg: A string to be evaluate as a postive integer
200 |     @type arg: types.StringType
201 | 
202 |     @return: A boolean indicator True or False
203 |     @rtype:  BooleanType
204 | 
205 |     @author: Milica Ng
206 |     """
207 | 
208 |     if not is_int(arg):
209 |         return False
210 |     elif not (arg > 0):
211 |         return False
212 |     else:
213 |         return True
214 | 
215 | def is_list_of_dec_nums(arg):
216 | 
217 |     """
218 |     @summary: Determines if the argument is a list of decimal numbers
219 | 
220 |     @param arg: A string to be evaluate as a list of decimal numbers
221 |     @type arg: types.StringType
222 | 
223 |     @return: A boolean indicator True or False
224 |     @rtype:  BooleanType
225 | 
226 |     @author: Milica Ng
227 |     """
228 | 
229 |     if not(isinstance(arg, types.ListType)):
230 |         return False
231 |     elif (arg == []):
232 |         return False
233 |     else:
234 |         for q in arg:
235 |            if not(isinstance(q, types.FloatType)):
236 |                return False
237 |     return True
238 | 
239 | 


--------------------------------------------------------------------------------
/pyms/Utils/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Utility functions for PyMS wide use
 3 | """
 4 | 
 5 |  #############################################################################
 6 |  #                                                                           #
 7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
 8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
 9 |  #                                                                           #
10 |  #    This program is free software; you can redistribute it and/or modify   #
11 |  #    it under the terms of the GNU General Public License version 2 as      #
12 |  #    published by the Free Software Foundation.                             #
13 |  #                                                                           #
14 |  #    This program is distributed in the hope that it will be useful,        #
15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
17 |  #    GNU General Public License for more details.                           #
18 |  #                                                                           #
19 |  #    You should have received a copy of the GNU General Public License      #
20 |  #    along with this program; if not, write to the Free Software            #
21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
22 |  #                                                                           #
23 |  #############################################################################
24 | 
25 | 


--------------------------------------------------------------------------------
/pyms/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The root of the package pyms
 3 | """
 4 | 
 5 |  #############################################################################
 6 |  #                                                                           #
 7 |  #    PyMS software for processing of metabolomic mass-spectrometry data     #
 8 |  #    Copyright (C) 2005-2012 Vladimir Likic                                 #
 9 |  #                                                                           #
10 |  #    This program is free software; you can redistribute it and/or modify   #
11 |  #    it under the terms of the GNU General Public License version 2 as      #
12 |  #    published by the Free Software Foundation.                             #
13 |  #                                                                           #
14 |  #    This program is distributed in the hope that it will be useful,        #
15 |  #    but WITHOUT ANY WARRANTY; without even the implied warranty of         #
16 |  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          #
17 |  #    GNU General Public License for more details.                           #
18 |  #                                                                           #
19 |  #    You should have received a copy of the GNU General Public License      #
20 |  #    along with this program; if not, write to the Free Software            #
21 |  #    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              #
22 |  #                                                                           #
23 |  #############################################################################
24 | 
25 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | try:
 3 |     from setuptools import setup
 4 | except ImportError:
 5 |     from distutils.core import setup
 6 | 
 7 | try:
 8 |     from setuptools import find_packages
 9 | except:
10 |     pass
11 | 
12 | setup(
13 |     name='pyms',
14 |     version='2.1',
15 |     author='Sean O\'Callaghan',
16 |     author_email='spoc@unimelb.edu.au',
17 |     packages=find_packages(),
18 |     url='https://github.com/ma-bio21/pyms',
19 |     download_url = 'https://github.com/ma-bio21/pyms/tarball/0.1',
20 |     license='LICENSE.txt',
21 |     description='Python Toolkit for Mass Spectrometry',
22 |     long_description=('PyMS is a collection of PyMS libraries '
23 |                       'which can be used to perform peak picking ',
24 |                       'alignment by dynamic programming, and '
25 |                       'gap-filling in Gas Chromatography Mass '
26 |                       'Spectrometry experiments'),
27 |     install_requires=[
28 |         # PyMS also requires both scipy and numpy. Installation
29 |         # of scipy and numpy should be performed before PyMS 
30 |         # installation, as these packages do not play
31 |         # well with automated package installers
32 |         # In addition, matplotlib is required if graphical output is 
33 |         # desired. 
34 |         # A good solution which provides all of these dependencies is
35 |         # Anaconda
36 |         #"numpy >= 1.7.1",
37 |         #"scipy >= 0.12.0",
38 |         "pymzml >= 0.7.5",
39 |         "pycluster >= 1.49",
40 |         ],
41 | )
42 | 


--------------------------------------------------------------------------------