├── .travis.yml
├── Dockerfile
├── ExampleDotNet.exe.config
├── ExampleDotNet.vshost.exe.config
├── Mono.Options.dll
├── Newtonsoft.Json.dll
├── README.md
├── README.txt
├── RawFileRdr_License_Agreement_RevA.txt
├── ThermoFisher.CommonCore.BackgroundSubtraction.dll
├── ThermoFisher.CommonCore.Data.dll
├── ThermoFisher.CommonCore.MassPrecisionEstimator.dll
├── ThermoFisher.CommonCore.RawFileReader.dll
├── absence_peak_data
    ├── B002413_Ap_22cm_Yeast_171215184201.txt
    ├── B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol.txt
    ├── B002419_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol_inYeast.txt
    ├── B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol.txt
    └── configuration_iRT.ini
├── example_parameter_file.ini
├── license
├── moff.py
├── moff_all.py
├── moff_enviroment.yml
├── moff_mbr.py
├── moff_setting.properties
├── ptm_setting_mq.json
├── ptm_setting_ps.json
├── requirements
    └── development.txt
├── sample_data
    ├── 20080311_CPTAC6_07_6A005.txt
    ├── 20080313_CPTAC6_07_6A005.txt
    └── 20080315_CPTAC6_07_6A005.txt
├── test
    ├── B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol.txt
    ├── configuration_iRT_test_match.ini
    ├── test_apex.py
    └── test_mbr.py
└── txic_json.exe


/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | python:
 4 |   - "3.6"
 5 | 
 6 | notifications:
 7 |   email: false
 8 | 
 9 | install:
10 |     - wget http://bit.ly/miniconda -O miniconda.sh
11 |     - bash miniconda.sh -b -p $HOME/miniconda
12 |     - export PATH="$HOME/miniconda/bin:$PATH"
13 |     - conda update --yes conda
14 |     - conda config --add channels bioconda
15 |     - travis_retry conda create --yes -n TEST python=3.6 $CONDA --file ./requirements/development.txt
16 |     - source activate TEST
17 |     - conda install  --yes -c conda-forge mono
18 |     - conda update pymzml
19 |     - conda install --yes  -c conda-forge brain-isotopic-distribution
20 |     - conda install --yes pytest
21 |     - wget http://genesis.ugent.be/uvpublicdata/moFF_test/B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol.raw
22 | 
23 | 
24 | #before-script:
25 | 
26 | script:
27 |   #- flake8 . --ignore E501 E203
28 |   -  python moff_all.py --tsv test/B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol.txt --raw_list B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol.raw  --xic_length 2 --rt_peak_win 1  --loc_out test/ --tol  4 --mbr off
29 |   -  python moff_all.py --config_file test/configuration_iRT_test_match.ini
30 |   -  pytest test/
31 |   #- python moff_all.py --help
32 | 
33 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | from continuumio/miniconda3
 2 | 
 3 | COPY moff_enviroment.yml .
 4 | RUN apt-get update
 5 | RUN apt-get -y install git
 6 | 
 7 | RUN conda env create -f moff_enviroment.yml
 8 | RUN echo "source activate moff_env" >>  ~/.bashrc
 9 | ENV PATH /opt/conda/envs/moff_env/bin:$PATH
10 | RUN git clone  -b master  --single-branch https://github.com/compomics/moff /moFF
11 | WORKDIR /moFF
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/ExampleDotNet.exe.config:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <configuration>
 3 |     <startup> 
 4 |         <supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.7"/>
 5 |     </startup>
 6 |   <runtime>
 7 |     <loadFromRemoteSources enabled="true"></loadFromRemoteSources>
 8 |   </runtime>
 9 | </configuration>
10 | 


--------------------------------------------------------------------------------
/ExampleDotNet.vshost.exe.config:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="utf-8"?>
2 | <configuration>
3 |     <startup> 
4 |         <supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.6"/>
5 |     </startup>
6 | </configuration>
7 | 


--------------------------------------------------------------------------------
/Mono.Options.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/moFF/279f0101efc031d13c2a1729023df33a796d9eb9/Mono.Options.dll


--------------------------------------------------------------------------------
/Newtonsoft.Json.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/moFF/279f0101efc031d13c2a1729023df33a796d9eb9/Newtonsoft.Json.dll


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # moFF #
  2 | [![Build Status](https://travis-ci.org/compomics/moFF.svg?branch=master)](https://travis-ci.org/compomics/moFF)
  3 | 
  4 | 
  5 |  * [Introduction](#introduction)
  6 |  * [Minimum Requirements](#minimum-requirements)
  7 |  * [Input Data](#input-data)
  8 |  * [Sample Data](#sample-data)
  9 |  * [Absence of Peak Sample Sata](#absence-of-peak-sample-data#)
 10 |  * [Match between runs](#match-between-runs)
 11 |  * [Apex Intensity](#apex-intensity)
 12 |  * [Entire workflow](#entire-workflow)
 13 |  * [Post Translation Modification file](#post-translation-modification-file)
 14 |  * [Docker](#docker)
 15 |  * [Output Data](#output-data)
 16 | 
 17 | ---
 18 | 
 19 | ## Introduction ##
 20 | 
 21 | moFF is an OS independent tool designed to extract apex MS1 intensity using a set of identified MS2 peptides. It currently uses a Thermo library to directly extract data from Thermo Raw spectrum files, eliminating the need for conversions from other formats. Moreover, moFF also allows to work directly with mzML files.
 22 | 
 23 | moFF is built up from two  modules :
 24 | - *moff_mbr.py* : match between run (mbr)
 25 | - *moff.py*: apex intensity
 26 | 
 27 | NOTE : Please use *moff_all.py* script to run the entire pipeline with both MBR and apex strategies.
 28 | 
 29 | The version presented here is a commandline tool that can easily be adapted to a cluster environment. A graphical user interface can be found [here](https://github.com/compomics/moff-gui). The latter is designed to be able to use [PeptideShaker](https://github.com/compomics/peptide-shaker) results as an input format. Please refer to the [moff-GUI](https://github.com/compomics/moff-gui) manual for more information on how to do this.
 30 | 
 31 | [![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat-square)](http://bioconda.github.io/recipes/moff/README.html) 
 32 | 
 33 | moFF is also available on bioconda. To install with conda, use the following command:
 34 | ```
 35 | conda install -c bioconda moff
 36 | ```
 37 | This automatically installs all dependencies. Note that bioconda only supports 64-bit macOS and Linux.
 38 | 
 39 | 
 40 | 
 41 | [Top of page](#moff)
 42 | 
 43 | ----
 44 | 
 45 | ## moFF Publication:
 46 |   * [Argentini et al. Nature Methods. 2016 12(13):964–966](http://www.nature.com/nmeth/journal/v13/n12/full/nmeth.4075.html).
 47 |   * If you use moFF as part of a publication, please include this reference.
 48 | 
 49 | ---
 50 | 
 51 | ## Minimum Requirements ##
 52 | 
 53 | Required python libraries :
 54 | - Python 3.6+
 55 | - pandas  >  0.23
 56 | - numpy > 1.15.0
 57 | - argparse > 1.2.1
 58 | - scipy 1.1.0
 59 | - scikit-learn > 0.19
 60 | - pymzML > 2.0.3
 61 | - brain-isotopic-distribution > 1.3.2
 62 | - pyteomics >  3.5
 63 | 
 64 | 
 65 | Required linux library:
 66 | - Mono version 4.2.1
 67 | 
 68 | Required windows library:
 69 | - .NET Framework 4.6.2
 70 | 
 71 | 
 72 | Optional requirements :
 73 | -when using PeptideShaker results as a source, a PeptideShaker installation (<http://compomics.github.io/projects/peptide-shaker.html>) needs to be availabe.
 74 |  
 75 | 
 76 | During processing, moFF makes use of a third party algorithm (txic_json.exe) which allows for the parsing of the Thermo RAW data.
 77 | 
 78 | 
 79 | [Top of page](#moff)
 80 | 
 81 | ---
 82 | 
 83 | 
 84 | ## Input Data ##
 85 | 
 86 | moFF requires two types of input for the quantification procedure :
 87 |  - Thermo RAW file or mzML file
 88 |  - MS2 identified peptide information
 89 | 
 90 | The MS2 identified peptides can be presented as a tab-delimited file containing mimimal (mandatory) annotation for each peptide (a)
 91 | 
 92 | (a) The tab-delimited file must contain the following information for all the peptides:
 93 |   - 'peptide' : peptide-spectrum-match  sequence
 94 |   - 'prot' : protein ID 
 95 |   - 'mod_peptide' :  peptide-spectrum-match  sequence that contains also possible modification (i.e `NH2-M<Mox>LTKFESK-COOH` )
 96 |   - 'rt': peptide-spectrum-match retention time  (i.e the retention time contained in the mgf file; The retention time must be specified in second)
 97 |   - 'mz' : mass over charge
 98 |   - 'mass' : mass of the peptide
 99 |   - 'charge' : charge of the ionized peptide
100 |  
101 | NOTE 1 : In case the tab-delimited file provided by the user contains fields that are not mentioned here (i.e petides length, search engines score) the algorithm will retain these in the final output. The peptide-spectrum-match sequence with its modications  and the protein id  and  informations are used only in the match-between-run module.
102 | 
103 | NOTE 2 : Users can also provide the default PSM export provided by PeptideShaker as source material for moFF.
104 | 
105 | 
106 | [Top of page](#moff)
107 | 
108 | ---
109 | 
110 | ## Sample data  ##
111 | 
112 | The  *sample_folder* contains a result set for 3 runs of the CPTAC study 6 (Paulovich, MCP Proteomics, 2010).
113 | These MS2 peptides are identified by X!Tandem and MSGF+ using SearchGUI and then processed by PeptidesShaker. The [raw files]( https://goo.gl/ukbpCI) for this study are required to apply moFF to the sample data.
114 | 
115 | [Top of page](#moff)
116 | 
117 | ---
118 | 
119 | ## Absence of Peak Sample Data ##
120 | 
121 | To evaluate the filtering of the matched peak, we provide a data set composed by 4 runs :
122 | 
123 | | File name  | iRT | yeast |
124 | | ------------- | ------------- | ------------- |
125 | | B002413_Ap_22cm_Yeast_171215184201 |    | x |
126 | | B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol |  x  |  |
127 | | B002419_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol_inYeast   |  x  | x |
128 | | B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmoll  |  x  |  |
129 | 
130 | Run *B002413* does not contain iRT peptides and it works as controll. No iRT peptides are expected after the matching-between-runs across all four runs.
131 | 
132 | To test the filter of the matched peak, you can follow the steps:
133 | - clone the moFF repository
134 | - download the .zip file that contains all Thermo raw file from [here](http://genesis.ugent.be/uvpublicdata//moFF_absence_of_peak_dataset/)
135 | - unzip it inside  the folder *absence_peak_data*
136 | - check the input/output paths in the *coinfiguration_iRT.ini*
137 | 
138 | then you an run moFF using:
139 | 
140 | `python moff_all.py  --config_file  absense_peak_data/config_iRT.ini `
141 | to run  mbr + apex and filtering function
142 | 
143 | [Top of page](#moff)
144 | 
145 | ---
146 | 
147 | ## Match between runs ##
148 | 
149 | use :  `python moff_all.py -mbr only `
150 | ```
151 |   --loc_in                      the folder where the input files are located
152 |   --sample                      reg exp to filter the input file names (only with --loc_in input option-
153 |   --ext                         file extention of the input file. Default .txt)
154 |   --log_label                   filename for the mbr log file. Default moFF_mbr
155 |   --w_filt                      width value for outlier filtering. Default 3
156 |   --out_flag                    if set, outliers for rt time allignment are filtered. Default value: True
157 |   --w_comb                      if set, RT model combination is weighted using traing model errors: Default value: False
158 | ```
159 | 
160 | `python moff_mbr.py --loc_in sample_folder/ --mbr only `
161 | 
162 | This command runs the MBR modules. The output will be stored in a subfolder ('mbr_output') inside the specified input folder.
163 | The MBR module will consider all the .txt files present in the specified input folder as replicates (to select specific files or different extension, please refer to the example below).
164 | The files in *sample_folder/mbr_output* will be identical to the input files, but they will have an additional field ('matched') that specifies which peptides have match (1) or not (0). The MBR algorithm also produces a log file in the provided input directory.
165 | 
166 | 
167 | ### Customizing Match between runs ###
168 | 
169 | In case of a different extension (.list, etc), please use :
170 | 
171 | `python moff_mbr.py  --loc_in  sample_folder/ --ext list ` (Provide the extension without the period ('.'))
172 | 
173 | In case of using only specific input files within the provided directory, please use a regular expression:
174 | 
175 | `python moff_mbr.py --loc_in sample_folder/  --sample *_6A` (This can be combined with the aforementioned syntax)
176 | 
177 | You can set all the parameters values in a file and load them using  `--config_file`. For an example see `example_parameter_file.ini`
178 | 
179 | 
180 | 
181 | [Top of page](#moff)
182 | 
183 | ---
184 | 
185 | ## Apex intensity ##
186 | 
187 | use `python moff_all.py -mbr off `
188 | ```
189 |   --loc_in                      the folder containing all input files
190 |   --raw_repo                    the folder containing all the raw files
191 |   --tsv_list                    the input file with for MS2 peptides
192 |   --raw_list                    pecify directly the  raw file
193 |   --tol                         mass tollerance (ppm)
194 |   --xic_length                  rt windows for xic (minutes). Default value is 3  min
195 |   --rt_peak_win                 time windows used to get the apex for the ms2 peptide/feature  (minutes). Default value is 1
196 |   --rt_peak_win_match           time windows used to get the apex for machted features (minutes). Default value is 1.2
197 |   --peptide_summary             flag that allows have as output the peptided summary intensity file. Default is disable(0)
198 |   --tag_pepsum                  tag string that will be part of the  peptided summary intensity file name. Default is moFF_run
199 |   --loc_out                     output folder
200 |   --tag_pepsum                  a tag that is used in the peptide summary file name
201 | 
202 |   --match_filter                If set, filtering on the matched peak is activated. Default value: False
203 |   --ptm_file                    modification json ptm file. Default file ptm_setting.json
204 |   --quantile_thr_filtering      quantile value used to computed the filtering threshold for the matched peak . Default is 0.75
205 |   --sample_size                 percentage of MS2 identified peptides used to estimated the threshold
206 | ```
207 | 
208 | You can run the apex module in two ways:
209 | 
210 | `python moff_all.py  --mbr off --tsv_list sample_folder/20080311_CPTAC6_07_6A005.txt  --raw_list sample_folder/20080311_CPTAC6_07_6A005.RAW --tol 1O --loc_out output_moff --peptide_summary  `
211 | in this case you specify more than a file separated by a blanck space
212 | 
213 | In case you want to run the apex module  on all the files in a folder (all so the raw files shold located in a foder)
214 | 
215 | `python moff_all.py  --mbr on  --loc_in sample_folder/sample_data/  --raw_repo sample_folder/sample_data/your_raw_folder   --tol 1O --loc_out output_moff --peptide_summary  `
216 | 
217 | You can activate the filtering of the matching peptides setting `--match_filter`. In order to do the filtering:
218 | - `--ptm_file` MUST be specified and input files MUST contain a matched field.
219 | 
220 | This option is useful in the case you have run the mbr module alone and later you want to run the apex module separately.
221 | 
222 | WARNING :  in case of  --loc_in  and  --raw_repo  raw file names MUST be the same of the input file otherwise the script gives you an error !
223 | 
224 | WARNING 1  :  you can not mixed the two input ways ( --loc_in / --raw_repo and --tsv_list / --raw_list  ) otherwise the script gives you an error !
225 | 
226 | WARNING 2: mzML raw file MUST be only specified using `--tsv_list | --raw_list`. The `--raw_repo` option is not available for mzML files.
227 | 
228 | NOTE: all the parameters related to the the time windows (xic_lentgh,rt_peak_win, rt_peak_win_match) are basicaly the half of the entire time windows where the apex peak is searched or the XiC is retrieved. For a correct rt windows, we suggest to set the **rt_peak_win** value equal or slighly greater to the __dynamic exclusion duration set in your machine.__
229 | We suggest also to set the rt_peak_win_match  always slightly bigger than tha values used for rt_peak_win
230 | 
231 | 
232 | [Top of page](#moff)
233 | 
234 | ---
235 | 
236 | 
237 | ## Entire workflow ##
238 | 
239 | use `python moff_all.py -mbr on`
240 | ```
241 |   --config_file                 specify a moFF parameter file
242 |   --loc_in                      the folder containing all input files
243 |   --raw_repo                    the folder containing all the raw files
244 |   --tsv_list                    the input file with for MS2 peptides
245 |   --raw_list                    pecify directly the  raw file
246 | 
247 |   --sample                      reg exp to filter the input file names (only with --loc_in input option-
248 |   --ext                         file extention of the input file. Default .txt)
249 |   --log_label                   filename for the mbr log file. Default moFF_mbr
250 |   --w_filt                      width value for outlier filtering. Default 3
251 |   --out_flag                    if set, outliers for rt time allignment are filtered. Default value: True
252 |   --w_comb                      if set, RT model combination is weighted using traing model errors: Default value: False
253 | 
254 |   --tol                        mass tollerance (ppm)
255 |   --xic_length                  rt windows for xic (minutes). Default value is 3  min
256 |   --rt_peak_win                 time windows used to get the apex for the ms2 peptide/feature  (minutes). Default value is 1
257 |   --rt_peak_win_match           time windows used to get the apex for machted features (minutes). Default value is 1.2
258 |   --peptide_summary             if set, export a peptide intesity summary tab-delited file. Default value: False
259 |   --tag_pepsum                  tag string that will be part of the  peptided summary intensity file name. Default value is moFF_run
260 |   --loc_out                     output folder  default is the input folder, raw_repo)
261 |    --tag_pepsum                  a tag that is used in the peptide summary file name
262 | 
263 |   --match_filter                If set, filtering on the matched peak is activated. Default value: False
264 |   --ptm_file                    modification json ptm file. Default file ptm_setting.json
265 |   --quantile_thr_filtering      quantile value used to computed the filtering threshold for the matched peak . Default is 0.75
266 |   --sample_size                 percentage of MS2 identified peptides used to estimated the threshold
267 | ```
268 | 
269 | Like for the apex module, you input  you input data specifing the folder :
270 | 
271 | `python moff_all.py --mbr all  --loc_in  sample_folder/   --raw_repo sample_folder/ --tol 10  --loc_out output_moff --peptide_summary`
272 | 
273 | OR, specifing a list of input and raw files using:
274 | 
275 | `python moff_all.py  --mbr all --tsv_list  sample_folder/input_file1.txt sample_folder/input_file2.txt  --raw_list sample_folder/input_file1.raw sample_folder/input_file2.raw --tol 10 --loc_out output_moff --peptide_summary `
276 | 
277 | The options are identical for both apex and MBR modules. The output for the latter (MBR) is stored in the folder sample_folder/mbr_output, while the former (apex) generates files in the specified output_moff folder.Log files for both algorithms are generated in the respective folders.
278 | 
279 | In case you activate the filtering of the mached peptides  you have to specify with `--ptm_file` a valid json file that describes the modificatiuon used in your experiment. See section
280 | 
281 | You can set all the parameters values in a file and load them using `--config_file`. For an example see `example_parameter_file.ini`
282 | 
283 | WARNING: Using `--tsv_list | --raw_list`  you can not filterted the input file using `--sample --ext` like in the case with `--loc_in | --raw_repo`
284 | 
285 | WARNING: **mzML raw file  MUST be specified  using `--tsv_list | --raw_list`. The `--raw_repo` option is not available for mzML files.
286 | 
287 | NOTE: The consideration of retention time window parameters (xic_length,rt_peak_win,rt_peak_win_match) mentioned for apex module are stil valid also for the entire workflow
288 | 
289 | 
290 | [Top of page](#moff)
291 | 
292 | 
293 | ---
294 | ## Post Translation Modification file ##
295 | 
296 | The Post Translation Modificatio must be indicated in json file with the following structure :
297 | ```
298 | {
299 | "tagModification": {"deltaChem":[H atom, C atom, N atom ,O atom],"desc":"name unimod : unimod_id"},
300 | }
301 | ```
302 | 
303 | - `"tagModification"` : the tag used in modified sequence for the modification
304 | - `"deltaChem":[H atom, C atom, N atom ,O atom]` : the delta of chemical composition if the modification. The order of the elements is fixed, so pay attention when you add your modification
305 | - `desc` : name of the modification and its unimod id.
306 | 
307 | For example a ptm file (ptm_setting_ps.json) with Carboxyamidomethylation of Cysteine and Oxidation for PeptideShaker output looks like:
308 | ```
309 | {
310 | "<cmm>": {"deltaChem":[3,2,1,1],"desc":"Carboxyamidomethylation C unimod:4"},
311 | "<ox>": {"deltaChem":[0,0,0,1],"desc":"oxidation oxidation unimod:35" }
312 | }
313 | ```
314 | 
315 | 
316 | [Top of page](#moff)
317 | 
318 | ---
319 | 
320 | ## Docker ##
321 | 
322 | 
323 | One you have cloned or downloaded moFF repository, inside the moFF folder you can build  docker with the the command
324 | ```
325 | docker build . -t moff
326 | ```
327 | 
328 | Inside the docker you can run moFF with all commands showed above. Run example with the apex module:
329 | ```
330 | docker run -v /home/user/data:/data_input -i -t moff python moff_all.py --tsv_list /data_input/input_file.tab --raw_list /data_input/input_file.raw --tol 10 --rt_peak_win 1 --xic_length 3 --loc_out /data_input/output folder --mbr off
331 | ```
332 | 
333 | [Top of page](#moff)
334 | 
335 | ---
336 | 
337 | 
338 | 
339 | ## Output data ##
340 | 
341 | The output consists of : 
342 | 
343 | - a tab delimited file (with the same name of the input raw file) containing the apex intensity values and additional information (a)
344 | - a log file specific to the apex module (b) or the MBR module (c)
345 | - peptide summary intensity file (when peptide summary option is enabled) (d) 
346 | 
347 | (a) Description of the fields added by moFF in the output file:
348 | 
349 | Parameter | Meaning
350 | --- | -------------- | 
351 | *rt_peak* | retention time (in seconds) for the discovered apex peak
352 | *SNR*     | signal-to-noise ratio of the peak intensity.
353 | *log_L_R*'| peak shape. 0 indicates that the peak is centered. Positive or negative values are an indicator for respectively right or left skewness 
354 | *intensity* |  MS1 intensity
355 | *log_int* | log2 transformed MS1 intensity 
356 | *lwhm* | first rt value where the intensity is at least the 50% of the apex peak intensity on the left side
357 | *rwhm* | first rt value where the intensity is at least the 50% of the apex peak intensity on the right side
358 | *5p_noise* | 5th percentile of the intensity values contained in the XiC. This value is used for the *SNR* computation
359 | *10p_noise* |  10th percentile of the intensity values contained in the XiC.
360 | *code_unique* | this field is concatenation of the peptide sequence and mass values. It is used by moFF during the match-between-runs.
361 | *matched* | this value indicated if the featured has been added by the match-between-run (1) or is a ms2 identified features (0) 
362 | 
363 | (b) A log file is also provided containing the process output. 
364 | 
365 | (c) A log file where all the information about all the trained linear model are displayed.
366 | 
367 | (d) The peptide summary intensity is a tab delimited file where for each peptide sequence MS1 intensities are summed for all the occurences in each run (aggregated by charge states and modification).
368 | 
369 | In case you run the entire workflow on an a settings that contains N runs, the size of the file (rows and columns) will be **M x (N+2)**, where M is number of peptides (across all the runs) and N are summed intensity columns plus the peptide sequence and the protein ids. In case of running only the apex module, the size of the file  will be on M x 3 (only one replicate is considered).
370 | 
371 | If a peptide is shared across several proteins, the protein column will also contains all the shared protein ids usually separed by _;_ or _,_.
372 | In case a peptide is not quantified it has 0 as intensities. The peptide summary intensity could be used for downstream statistical analysis such as in MsQRob
373 | 
374 | 
375 | NOTE : The log files and the output files are in the output folder specified by the user.
376 | 
377 | [Go to top of page](#moff)
378 | 


--------------------------------------------------------------------------------
/README.txt:
--------------------------------------------------------------------------------
  1 | # moFF : A modest Feature Finder (but still robust) to extract apex MS1 itensity directly from Thermo  raw file 
  2 | ================================
  3 | 
  4 | 
  5 | moFF is written in python and it is based on a Go library that is able to read raw file from Thermo machine
  6 | 
  7 | Required library :
  8 | 
  9 | Python 2.7
 10 | pandas  > 0.17.
 11 | numpy > 1.9.0
 12 | argparse > 1.2.1 
 13 | scikit-learn > 0.17
 14 | 
 15 | moFF is composed by two stand alone modules : 
 16 | 	moff_mbr.py :  matching between run 
 17 | 	moff.py :  apex intensity
 18 | 
 19 | To run  the entire workflow (mbr and apex ) you should  use  moff_all.py.
 20 | 
 21 | 
 22 | 
 23 | moFF uses txic to extract the XiC data from the raw files, so the execute txic must be located in the same folder where you have all moFF scripts.
 24 | 
 25 | The txic program is compatibale with  the raw file of all the Orbitrap and triple quadrupole Thermo machines. 
 26 | For the moment it does not work with the Thermo Fusion machine.
 27 | 
 28 | The input files that contain the list of the MS2 identified peptides (you can use any search engines) must contains the information showed in moFF_setting..property for each peptide.
 29 | moFF_setting.property : it specifies the minimun specificic requirements of the input files tha are :
 30 | 	-- tab delimited file
 31 | 	-- the header of the infput file should  contains the following the fields  and columnns names  :  
 32 | 		'peptide' : sequence of the peptide
 33 | 		'prot': protein ID 
 34 | 		'rt': retention time of peptide  
 35 | 		'mz' : mass over charge
 36 | 		'mass' : mass of the peptide
 37 | 		'charge' : charge of the ionized peptide
 38 | 
 39 | see the sample input files in the folder f1_folder.
 40 | The retention time must be specified in second
 41 | 
 42 | 
 43 | In the folder f1_folder you have three input files, that contain the MS2  identified  peptides (sing MASCOT) of three runs (three tecnical replicates ) from  the CPTAC study 6. 
 44 | you can download the relative raw files from https://goo.gl/ukbpCI, in order to run the next examples.
 45 | 
 46 | 
 47 | Matching between run module:
 48 | 
 49 | use :  python moff_mbr.py -h
 50 | 
 51 |   --inputF LOC_IN             specify the folder of the input MS2 peptide files [REQUIRED]
 52 |   --sample SAMPLE            specify which replicate files are used fot mbr [regular expr. are valid]
 53 |   --ext EXT                  specify the exstension of the input file (txt as default value)
 54 |   --log_file_name LOG_LABEL  a label name for the log file (moFF_mbr.log as default log file name)
 55 |   --filt_width W_FILT        iwidth value of the filter (k * mean(Dist_Malahobis) , k = 2 as default)
 56 |   --out_filt OUT_FLAG        filter outlier in each rt time allignment (active as default)
 57 |   --weight_comb W_COMB       weights for model combination combination : 0 for no weight (default) 1 weighted devised by model errors.
 58 | 
 59 | 
 60 | python moff_mbr.py --inputF f1_folder/ 
 61 | 
 62 | It runs the mbr modules and save the output files in a subfolder  called 'mbr_output' inside the folder given in input.
 63 | The mbr module will take all the .txt files in your input folder as replicates. (to select specific files or different extension see below))
 64 | In the f1_folder/mbr_output you will find the same number of the input files, but they will have a new field called 'matched' that specifies which peptides are matched  (1) or the not (0)
 65 | The rt field of the matched peptide contains the predicted rt retentioins time.
 66 | 
 67 | if your input files inside your working fodler  have another exstension like (.list, etc) you can use :
 68 | 
 69 | use : python --inputF f1_folder/ --ext list ( Do not specify '.list' but only 'list')
 70 | 
 71 | if you need to select specific input files from your working folder  ( choose  ) , you can use an regular expression as:
 72 | 
 73 | use : python --inputF f1_folder/  --sample *_6A (you can also use --ext option if you need)
 74 | 
 75 | the mbr will output a log file (moFF_mbr.log as default log file name) with all the details and it is saved inside the  --inputF given in inout
 76 | 
 77 | 
 78 | 
 79 | Apex module:
 80 | 
 81 | use  python moff.py -h
 82 | 
 83 |   --input NAME                        specify the input file with the of MS2 peptides
 84 |   --tol TOLL                          specify the tollerance parameter in ppm
 85 |   --rt_w RT_WINDOW                    specify rt window for xic (minute). Default value is 3 min
 86 |   --rt_p RT_P_WINDOW                  specify the time windows for the peak ( minute). Default value is 0.1
 87 |   --rt_p_match RT_P_WINDOW_MATCH      specify the time windows for the matched peptide peak ( minute). Default value is 0.4
 88 |   --raw_repo RAW                      specify the raw file repository
 89 |   --output_folder LOC_OUT             specify the folder output
 90 | 
 91 | python moff.mbr --input f1_folder/20080311_CPTAC6_07_6A005.txt  --raw_rep f1_folder/ --tol 1O 
 92 |  
 93 | it run the apex module on the input file , extraxing the apex intesity from the respective raw file in folder --raw_repo.
 94 | In the output files, moFF just add the following fields to  your origin input file:
 95 | 
 96 | 	"intensity" intensity, taking the highest peak in the XIC
 97 | 	"rt_peak" rt of the highest peak
 98 | 	"lwhm" left width half maximun of the signal in seconds
 99 | 	"rwhm" right width half maximun of the signal in seconds
100 | 	"SNR" signal-to-noise
101 | 	"log_L_R" log ratio of lwhm over rwhm (peak shape )
102 | 	"log_int" log 2 of the intesity 
103 | 
104 | It generates a .log file (with same name of input file ) that contains  detailesd information for each peak retrieved.
105 | This module determines automaticaly if the input file contains matched peptides or not.
106 | 
107 | REMARK : the raw file names  MUST be the same of the input file otherwise the script give you an error !
108 | 
109 | python moff.mbr --input f1_folder/20080311_CPTAC6_07_6A005.txt  --raw_rep f1_folder/ --tol 1O --output_folder output_moff
110 | It will put the results in the folder output_moff
111 | 
112 | 
113 | Run the entire workflow (Mbr + Apex ) :
114 | 
115 | use python moff_all.py
116 | 
117 | 	--inputF LOC_IN       specify the folder of the input MS2 peptide list files
118 |   	--sample SAMPLE       specify witch replicated use for mbr reg_exp are valid
119 |   	--ext EXT             specify the file extentention of the input like
120 |   	--log_file_name LOG_LABEL a label name to use for the log file
121 |   	--filt_width W_FILT   width value of the filter k * mean(Dist_Malahobis)
122 |   	--out_filt OUT_FLAG   filter outlier in each rt time allignment
123 |   	--weight_comb W_COMB  weights for model combination combination : 0 for no weight 1 weighted devised by trein err of the model.
124 |   	--tol TOLL            specify the tollerance parameter in ppm
125 |   	--rt_w RT_WINDOW      specify rt window for xic (minute). Default value is  3  min
126 |   	--rt_p RT_P_WINDOW    specify the time windows for the peak ( minute). Default value is 0.1
127 |   	--rt_p_match RT_P_WINDOW_MATCH	specify the time windows for the matched peptide peak ( minute). Default value is 0.4
128 |   	--raw_repo RAW        	specify the raw file repository
129 |   	--output_folder LOC_OUT		specify the folder output
130 | 
131 | python moff_all.py --inputF  f1_folder/   --raw_repo f1_folder/ --output_folder output_moff
132 | 
133 | The option are the same of the two modules, the the output mbr files are stores in the folder f1_folder/mbr_output  and the result of the apex module are stored in output_moff
134 | Also the log files are stored in the respective folders
135 | 
136 | 
137 | 


--------------------------------------------------------------------------------
/RawFileRdr_License_Agreement_RevA.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/moFF/279f0101efc031d13c2a1729023df33a796d9eb9/RawFileRdr_License_Agreement_RevA.txt


--------------------------------------------------------------------------------
/ThermoFisher.CommonCore.BackgroundSubtraction.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/moFF/279f0101efc031d13c2a1729023df33a796d9eb9/ThermoFisher.CommonCore.BackgroundSubtraction.dll


--------------------------------------------------------------------------------
/ThermoFisher.CommonCore.Data.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/moFF/279f0101efc031d13c2a1729023df33a796d9eb9/ThermoFisher.CommonCore.Data.dll


--------------------------------------------------------------------------------
/ThermoFisher.CommonCore.MassPrecisionEstimator.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/moFF/279f0101efc031d13c2a1729023df33a796d9eb9/ThermoFisher.CommonCore.MassPrecisionEstimator.dll


--------------------------------------------------------------------------------
/ThermoFisher.CommonCore.RawFileReader.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/moFF/279f0101efc031d13c2a1729023df33a796d9eb9/ThermoFisher.CommonCore.RawFileReader.dll


--------------------------------------------------------------------------------
/absence_peak_data/B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol.txt:
--------------------------------------------------------------------------------
  1 | peptide	mod_peptide	prot	Type	Raw file	Experiment	mz	charge	another m/z	mass	rt	PEP	Reverse
  2 | TCVADESHAGCEK	_TC<cmm>VADESHAGC<cmm>EK_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	488.534423828125	3	488.534502	1462.58168	1207.74	0.007303100000000002	
  3 | TCVADESHAGCEK	_TC<cmm>VADESHAGC<cmm>EK_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	732.2982788085941	2	732.298115	1462.58168	1207.98	0.000528	
  4 | LGGNEQVTR	_LGGNEQVTR_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	487.256774902344	2	487.256705	972.498858	1297.26	3.4299e-12	
  5 | YVLEHHPR	_YVLEHHPR_	P00560	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	526.2781982421881	2	525.777608	1049.54066	1299.12	0.0085868	
  6 | QNCDQFEK	_QNC<cmm>DQFEK_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	534.7244873046881	2	534.724381	1067.43421	1308.7800000000002	0.003908099999999999	
  7 | SHCIAEVEK	_SHC<cmm>IAEVEK_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	536.758117675781	2	536.758223	1071.50189	1331.16	0.001059	
  8 | LQQLEDK	_LQQLEDK_	CON__P34955	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	437.237396240234	2	437.23745	872.4603470000002	1366.44	7.9653e-05	
  9 | ALGGEDVR	_ALGGEDVR_	CON__P12763	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	408.714263916016	2	408.714142	815.413731	1373.4599999999996	0.0069494	
 10 | GAGSSEPVTGLDAK	_GAGSSEPVTGLDAK_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	644.8226318359381	2	644.822606	1287.63066	1418.16	0.00070708	
 11 | GAGSSEPVTGLDAK	_GAGSSEPVTGLDAK_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	430.21749877929705	3	430.217496	1287.63066	1418.52	0.00021801	
 12 | RVLGQLHGGPSSCSATGTNR	_RVLGQLHGGPSSC<cmm>SATGTNR_	CON__P15636	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	686.010803222656	3	685.676466	2054.00757	1436.5199999999998	0.0057482	
 13 | HTLNQIDSVK	_HTLNQIDSVK_	CON__P12763	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	577.8118896484381	2	577.8118450000002	1153.60914	1461.7199999999998	0.0016408000000000002	
 14 | AGFAGDDAPR	_AGFAGDDAPR_	P60010	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	488.727600097656	2	488.727781	975.441008	1462.86	0.0030172	
 15 | YICDNQDTISSK	_YIC<cmm>DNQDTISSK_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	722.3251342773441	2	722.324656	1442.63476	1490.88	0.0037977	
 16 | DLGEEHFK	_DLGEEHFK_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	487.73269653320295	2	487.73253200000005	973.45051	1497.4199999999996	0.0090248	
 17 | IGSEVYHNLK	_IGSEVYHNLK_	P00925;P00924	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	580.307861328125	2	580.308938	1158.60332	1523.34	0.0070383	
 18 | VASLRETYGDMADCCEK	_VASLRETYGDM<ox>ADC<cmm>C<cmm>EK_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	674.619689941406	3	674.2851469999998	2019.83361	1525.02	0.004357	
 19 | LSSPATLNSR	_LSSPATLNSR_	CON__P00761	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	523.2855224609381	2	523.2854629999998	1044.55637	1532.64	0.00024491	
 20 | STLVGHDTFTK	_STLVGHDTFTK_	CON__Streptavidin	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	603.312072753906	2	603.3116769999998	1204.6088	1555.5	0.00053611	
 21 | VEIIANDQGNR	_VEIIANDQGNR_	P22202	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	614.8179931640631	2	614.817658	1227.62076	1559.0400000000004	8.082199999999999e-05	
 22 | GAGSSEPVTGLDAK	_GAGSSEPVTGLDAK_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	644.8233642578131	2	644.822606	1287.63066	1566.42	8.4721e-09	
 23 | QQTQHAVEGDCDIHVLK	_QQTQHAVEGDC<cmm>DIHVLK_	CON__P12763	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	495.492095947266	4	495.241632	1976.93742	1569.6	0.0040366	
 24 | LVTDLTK	_LVTDLTK_	CON__P02769;CON__P02768-1	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	395.239379882813	2	395.239461	788.46437	1605.36	0.0021019000000000003	
 25 | EYEATLEECCAK	_EYEATLEEC<cmm>C<cmm>AK_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	751.8107299804691	2	751.810524	1501.6065	1616.4	4.8215e-05	
 26 | YILAGVENSK	_YILAGVENSK_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.2978515625	2	547.298039	1092.58152	1627.86	0.00025242	
 27 | VGGHAAEYGAEALER	_VGGHAAEYGAEALER_	CON__P01966	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	510.58297729492205	3	510.58295	1528.72702	1640.6999999999996	0.0063424	
 28 | LNNELLAK	_LNNELLAK_	CON__P34955	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	457.76882934570295	2	457.768917	913.523281	1661.2199999999998	0.0067480000000000005	
 29 | AEFVEVTK	_AEFVEVTK_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	461.747680664063	2	461.74765	921.480748	1664.46	0.0022896	
 30 | FSVSGEGEGDATYGK	_FSVSGEGEGDATYGK_	CON__Q9U6Y5	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	752.334838867188	2	752.333535	1502.65252	1668.84	0.004831800000000001	
 31 | FEGDTLVNR	_FEGDTLVNR_	CON__Q9U6Y5	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	525.7652587890631	2	525.764363	1049.51417	1686.5400000000004	0.001196	
 32 | VEATFGVDESNAK	_VEATFGVDESNAK_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	683.828796386719	2	683.8278889999998	1365.64122	1698.84	8.6027e-12	
 33 | GAGSSEPVTGLDAK	_GAGSSEPVTGLDAK_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	644.822998046875	2	644.822606	1287.63066	1699.14	0.00088742	
 34 | EACFAVEGPK	_EAC<cmm>FAVEGPK_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	554.260986328125	2	554.2605990000002	1106.50665	1699.2	6.717600000000001e-05	
 35 | YQVTVIDAPGHR	_YQVTVIDAPGHR_	P02994	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	452.57376098632795	3	452.573726	1354.69935	1711.9199999999996	0.0046098	
 36 | VATVSLPR	_VATVSLPR_	CON__P00761	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	421.75830078125	2	421.758352	841.5021519999998	1723.26	0.00012912	
 37 | LVLVGDGGTGK	_LVLVGDGGTGK_	P32835;P32836	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	508.292663574219	2	508.292756	1014.57096	1746.12	0.0011582	
 38 | YILAGVENSK	_YILAGVENSK_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.2984619140631	2	547.298039	1092.58152	1753.26	0.0094572	
 39 | ECCHGDLLECADDRADLAK	_EC<cmm>C<cmm>HGDLLEC<cmm>ADDRADLAK_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	750.3193359375	3	749.9857599999998	2246.93545	1758.4800000000002	5.5767e-05	
 40 | DDPHACYSTVFDK	_DDPHAC<cmm>YSTVFDK_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	518.8897705078131	3	518.889162	1553.64566	1781.16	0.0010465	
 41 | NPVILADACCSR	_NPVILADAC<cmm>C<cmm>SR_	P06169	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	688.3263549804691	2	688.326479	1374.6384	1795.9800000000002	0.0014334	
 42 | IWHHTFYNELR	_IWHHTFYNELR_	P60010	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	506.256256103516	3	505.921237	1514.74188	1797.36	0.0057702	
 43 | YILAGVENSK	_YILAGVENSK_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.298767089844	2	547.298039	1092.58152	1799.94	6.069600000000002e-13	
 44 | HSTVFDNLPNPEDRK	_HSTVFDNLPNPEDRK_	CON__Q29443;CON__Q0IIK2	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	590.294250488281	3	590.2919469999998	1767.8540100000002	1803.18	0.00025780000000000003	
 45 | GAGSSEPVTGLDAK	_GAGSSEPVTGLDAK_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	645.32373046875	2	644.822606	1287.63066	1803.78	0.006525399999999999	
 46 | HLVDEPQNLIK	_HLVDEPQNLIK_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	435.910339355469	3	435.910227	1304.70885	1806.42	3.6826e-05	
 47 | HLVDEPQNLIK	_HLVDEPQNLIK_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	653.3619384765631	2	653.3617019999998	1304.70885	1806.48	3.9546e-11	
 48 | LQAEIEGLK	_LQAEIEGLK_	CON__P05787	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	500.7873229980471	2	500.78730700000006	999.560061	1817.22	0.0095432	
 49 | IGGIGTVPVGR	_IGGIGTVPVGR_	P02994	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	513.308898925781	2	513.308741	1024.60293	1839.78	1.5818e-05	
 50 | TPVISGGPYEYR	_TPVISGGPYEYR_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	669.838684082031	2	669.8380589999998	1337.66157	1862.94	7.4054e-16	
 51 | YLYEIAR	_YLYEIAR_	CON__P02769;CON__P02768-1	MULTI-SECPEP	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	464.531829833984	2	464.25036	926.486168	1867.62	0.0070293000000000005	
 52 | ESTLHLVLR	_ESTLHLVLR_	P0CH09;P0CH08;P0CG63;P05759	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	534.314270019531	2	534.314023	1066.61349	1881.3	0.00473	
 53 | LKPDPNTLCDEFK	_LKPDPNTLC<cmm>DEFK_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	526.260925292969	3	526.260708	1575.76029	1893.18	7.773999999999999e-09	
 54 | LPLQDVYK	_LPLQDVYK_	P02994	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	488.280029296875	2	488.279118	974.543683	1904.64	0.0076661	
 55 | RHPEYAVSVLLR	_RHPEYAVSVLLR_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	480.608642578125	3	480.60877	1438.80448	1908.48	0.00013782	
 56 | AVFPSIVGRPR	_AVFPSIVGRPR_	P60010	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	400.24020385742205	3	400.240018	1197.69823	1910.52	3.151600000000001e-06	
 57 | AVFPSIVGRPR	_AVFPSIVGRPR_	P60010	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	599.8565673828131	2	599.85639	1197.69823	1910.76	0.0028858	
 58 | TPVITGAPYEYR	_TPVITGAPYEYR_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	683.854919433594	2	683.853709	1365.69287	1922.46	2.1038e-16	
 59 | YILAGVENSK	_YILAGVENSK_	Biognosys	MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.298278808594	2	547.298039	1092.58152	1946.7	8.6306e-15	
 60 | YILAGVENSK	_YILAGVENSK_	Biognosys	MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.2982177734381	2	547.298039	1092.58152	1959.3	0.00062374	
 61 | TPVISGGPYEYR	_TPVISGGPYEYR_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	669.838806152344	2	669.8380589999998	1337.66157	1970.7	1.6978e-06	
 62 | YILAGVENSK	_YILAGVENSK_	Biognosys	MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.298034667969	2	547.298039	1092.58152	1971.84	1.8827e-09	
 63 | TPIVGQPSIPGGPVR	_TPIVGQPSIPGGPVR_	CON__P12763	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	737.92333984375	2	737.922458	1473.83036	1980.18	4.2493999999999996e-21	
 64 | TGPNLHGLFGR	_TGPNLHGLFGR_	CON__P62894	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	390.212066650391	3	390.21224	1167.61489	1982.52	0.0052224	
 65 | ANELLINVK	_ANELLINVK_	P00330	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	507.303070068359	2	507.303124	1012.5917	1983.24	0.0040695	
 66 | YILAGVENSK	_YILAGVENSK_	Biognosys	MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.298095703125	2	547.298039	1092.58152	1984.44	0.0041457	
 67 | LSISETYDLK	_LSISETYDLK_	CON__P34955	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	584.810974121094	2	584.808436	1167.60232	1986.12	0.0023285	
 68 | LSELEAALQR	_LSELEAALQR_	CON__P05787;CON__REFSEQ:XP_092267;CON__H-INV:HIT000292931;CON__Q9H552	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	565.315246582031	2	565.31422	1128.61389	1990.1400000000006	0.00065691	
 69 | YILAGVENSK	_YILAGVENSK_	Biognosys	MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.298095703125	2	547.298039	1092.58152	1997.34	8.6306e-15	
 70 | AGLQFPVGR	_AGLQFPVGR_	Q12692	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	472.769378662109	2	472.769252	943.52395	2006.22	0.00060853	
 71 | YILAGVENSK	_YILAGVENSK_	Biognosys	MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.298034667969	2	547.298039	1092.58152	2009.76	0.00035391	
 72 | YILAGVENSK	_YILAGVENSK_	Biognosys	MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.298095703125	2	547.298039	1092.58152	2022.66	0.0011562	
 73 | TPVITGAPYEYR	_TPVITGAPYEYR_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	683.853759765625	2	683.853709	1365.69287	2029.38	7.5797e-07	
 74 | TPVITGAPYEYR	_TPVITGAPYEYR_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	683.853820800781	2	683.853709	1365.69287	2047.14	4.6213e-05	
 75 | VPQVSTPTLVEVSR	_VPQVSTPTLVEVSR_	CON__P02769;CON__P02768-1	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	756.4257202148441	2	756.42503	1510.83551	2054.82	9.242e-10	
 76 | DGLDAASYYAPVR	_DGLDAASYYAPVR_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	466.56106567382795	3	466.561375	1396.66229	2059.02	0.00040223	
 77 | DGLDAASYYAPVR	_DGLDAASYYAPVR_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	699.33984375	2	699.338424	1396.66229	2060.52	7.3166e-16	
 78 | YILAGVENSK	_YILAGVENSK_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.298095703125	2	547.298039	1092.58152	2060.64	0.00059209	
 79 | TPVITGAPYEYR	_TPVITGAPYEYR_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	683.853820800781	2	683.853709	1365.69287	2074.080000000001	4.6115e-05	
 80 | LAVNMVPFPR	_LAVNM<ox>VPFPR_	CON__ENSEMBL:ENSBTAP00000025008	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	580.318298339844	2	580.318251	1158.62195	2082.72	0.00060611	
 81 | LAVNMVPFPR	_LAVNM<ox>VPFPR_	CON__ENSEMBL:ENSBTAP00000025008	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	580.318542480469	2	580.318251	1158.62195	2095.38	0.00010404	
 82 | PLLVEPEGLEK	_PLLVEPEGLEK_	CON__ENSEMBL:ENSBTAP00000024146	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	612.348327636719	2	612.347729	1222.6809	2095.6800000000007	0.0020652	
 83 | AVFPSIVGR	_AVFPSIVGR_	P60010	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	473.279174804688	2	473.279452	944.544351	2103.24	0.006728100000000001	
 84 | LVNELTEFAK	_LVNELTEFAK_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	582.319152832031	2	582.318971	1162.62339	2103.84	3.6337e-06	
 85 | LILPGELAK	_LILPGELAK_	P02294;P02293	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	477.305297851563	2	477.30513600000006	952.595718	2146.08	0.004601600000000001	
 86 | ADVTPADFSEWSK	_ADVTPADFSEWSK_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	726.8375854492191	2	726.8357139999998	1451.65687	2152.56	2.4022e-07	
 87 | TPVITGAPYEYR	_TPVITGAPYEYR_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	683.854064941406	2	683.853709	1365.69287	2152.92	0.00040175	
 88 | TPVISGGPYEYR	_TPVISGGPYEYR_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	669.8394775390631	2	669.8380589999998	1337.66157	2153.46	0.00079166	
 89 | YILAGVENSK	_YILAGVENSK_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.298583984375	2	547.298039	1092.58152	2154.48	0.0025257	
 90 | DGLDAASYYAPVR	_DGLDAASYYAPVR_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	699.3391723632809	2	699.338424	1396.66229	2185.2000000000007	4.3384e-06	
 91 | GVVDSEDLPLNLSR	_GVVDSEDLPLNLSR_	P02829;P15108	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	757.397338867188	2	757.39647	1512.77839	2191.8	0.0005147	
 92 | DGLDAASYYAPVR	_DGLDAASYYAPVR_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	699.339721679688	2	699.338424	1396.66229	2207.7000000000007	0.00031579	
 93 | VVVLPFPSK	_VVVLPFPSK_	CON__Q58D62	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	493.30810546875	2	493.307678	984.600803	2218.14	0.006728100000000001	
 94 | QDGQFSVLFTK	_QDGQFSVLFTK_	CON__P12763	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	635.3277587890631	2	635.3273280000002	1268.6401	2236.2000000000007	0.0016334	
 95 | TVMENFVAFVDK	_TVM<ox>ENFVAFVDK_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	708.34814453125	2	708.347403	1414.68025	2255.580000000001	4.4343e-05	
 96 | LVDTFLEDVK	_LVDTFLEDVK_	CON__P34955	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	589.819030761719	2	589.818804	1177.62306	2255.88	0.00057709	
 97 | DAGTIAGLNVLR	_DAGTIAGLNVLR_	P10591;P10592;P16474;P22202	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	600.341003417969	2	600.340769	1198.66699	2258.58	0.0013466	
 98 | EALDFFAR	_EALDFFAR_	P00330;P00331;P38113	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	484.745544433594	2	484.745442	967.476331	2259.6	0.0050642	
 99 | IGLDCASSEFFK	_IGLDC<cmm>ASSEFFK_	P00925;P00924	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	687.324462890625	2	687.323928	1372.6333	2265.12	0.00021901	
100 | QDGQFSVLFTK	_QDGQFSVLFTK_	CON__P12763	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	635.327575683594	2	635.3273280000002	1268.6401	2269.92	0.0011582	
101 | IINEPTAAAIAYGLDK	_IINEPTAAAIAYGLDK_	P10591;P10592;P22202	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	830.9541625976559	2	830.451245	1658.88794	2271.24	1.8319e-07	
102 | SYELPDGQVITIGNER	_SYELPDGQVITIGNER_	P60010	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	895.951049804688	2	895.949598	1789.8846399999998	2283.84	0.00038165	
103 | DGLDAASYYAPVR	_DGLDAASYYAPVR_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	699.339599609375	2	699.338424	1396.66229	2319.66	0.00080568	
104 | LGEYGFQNALIVR	_LGEYGFQNALIVR_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	740.4022827148441	2	740.401358	1478.78816	2326.56	4.0991000000000005e-09	
105 | GTFIIDPGGVIR	_GTFIIDPGGVIR_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	622.854675292969	2	622.853512	1243.69247	2355.36	1.9825e-06	
106 | GTFIIDPAAVIR	_GTFIIDPAAVIR_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	636.86962890625	2	636.869163	1271.7237699999996	2529.54	5.1317e-05	
107 | YFPTQALNFAFK	_YFPTQALNFAFK_	P18239;P18238;P04710	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	724.3764038085941	2	723.874445	1445.73434	2557.8	0.00023335	
108 | 


--------------------------------------------------------------------------------
/absence_peak_data/B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol.txt:
--------------------------------------------------------------------------------
  1 | peptide	mod_peptide	prot	Type	Raw file	Experiment	mz	charge	another m/z	mass	rt	PEP	Reverse
  2 | TCVADESHAGCEK	_TC<cmm>VADESHAGC<cmm>EK_	CON__P02769	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	732.298461914063	2	732.298115	1462.58168	1209.12	0.0003041	
  3 | LGGNEQVTR	_LGGNEQVTR_	Biognosys	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	487.257171630859	2	487.256705	972.498858	1298.76	1.3230999999999997e-17	
  4 | HIIVDGK	_HIIVDGK_	P00359	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	391.23196411132795	2	391.231971	780.449388	1333.1999999999996	0.00071113	
  5 | SHCIAEVEK	_SHC<cmm>IAEVEK_	CON__P02769	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	536.758605957031	2	536.758223	1071.50189	1333.32	2.4132e-12	
  6 | LQQLEDK	_LQQLEDK_	CON__P34955	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	437.237426757813	2	437.23745	872.4603470000002	1368.54	0.0065259	
  7 | ALGGEDVR	_ALGGEDVR_	CON__P12763	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	408.714202880859	2	408.714142	815.413731	1375.9800000000002	0.0024061	
  8 | GAGSSEPVTGLDAK	_GAGSSEPVTGLDAK_	Biognosys	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	644.8223876953131	2	644.822606	1287.63066	1419.72	0.006525399999999999	
  9 | GAGSSEPVTGLDAK	_GAGSSEPVTGLDAK_	Biognosys	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	430.217529296875	3	430.217496	1287.63066	1419.84	0.0030078	
 10 | IEEELGDK	_IEEELGDK_	P00925	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	466.732360839844	2	466.732197	931.449842	1430.28	0.0034974000000000003	
 11 | LCQLCAGK	_LC<cmm>QLC<cmm>AGK_	CON__Q29443;CON__Q0IIK2;CON__Q2HJF0	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	475.233032226563	2	475.23333	948.452107	1435.38	0.0064153000000000005	
 12 | HTLNQIDSVK	_HTLNQIDSVK_	CON__P12763	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	577.81201171875	2	577.8118450000002	1153.60914	1463.1	0.00042423	
 13 | AGFAGDDAPR	_AGFAGDDAPR_	P60010	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	488.727783203125	2	488.727781	975.441008	1464.84	0.0030092	
 14 | YICDNQDTISSK	_YIC<cmm>DNQDTISSK_	CON__P02769	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	722.325317382813	2	722.324656	1442.63476	1491.96	0.0002945	
 15 | DLGEEHFK	_DLGEEHFK_	CON__P02769	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	487.732788085938	2	487.73253200000005	973.45051	1498.86	0.0090248	
 16 | IIAPPER	_IIAPPER_	P60010	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	398.23974609375	2	398.239796	794.4650379999998	1521.7199999999998	0.003918199999999999	
 17 | IGSEVYHNLK	_IGSEVYHNLK_	P00925;P00924	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	580.3079833984381	2	580.308938	1158.60332	1524.6	0.0014977	
 18 | LSSPATLNSR	_LSSPATLNSR_	CON__P00761	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	523.285705566406	2	523.2854629999998	1044.55637	1534.74	5.2774e-07	
 19 | STLVGHDTFTK	_STLVGHDTFTK_	CON__Streptavidin	MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	603.312072753906	2	603.3116769999998	1204.6088	1553.04	6.2115e-05	
 20 | VEIIANDQGNR	_VEIIANDQGNR_	P22202	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	614.817932128906	2	614.817658	1227.62076	1558.92	1.6394000000000002e-07	
 21 | STLVGHDTFTK	_STLVGHDTFTK_	CON__Streptavidin	MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	603.3111572265631	2	603.3116769999998	1204.6088	1565.4	0.0057552	
 22 | GAGSSEPVTGLDAK	_GAGSSEPVTGLDAK_	Biognosys	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	644.822998046875	2	644.822606	1287.63066	1566.48	3.3689e-09	
 23 | LGGNEQVTR	_LGGNEQVTR_	Biognosys	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	487.256591796875	2	487.256705	972.498858	1571.5800000000004	0.0048055	
 24 | LVTDLTK	_LVTDLTK_	CON__P02769;CON__P02768-1	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	395.239318847656	2	395.239461	788.46437	1607.0400000000004	0.0017614	
 25 | TASGNIIPSSTGAAK	_TASGNIIPSSTGAAK_	P00358;P00359;P00360	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	687.864990234375	2	687.8648049999998	1373.71506	1612.44	3.4597e-06	
 26 | EYEATLEECCAK	_EYEATLEEC<cmm>C<cmm>AK_	CON__P02769	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	751.81103515625	2	751.810524	1501.6065	1617.3	9.7629e-06	
 27 | YILAGVENSK	_YILAGVENSK_	Biognosys	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.297912597656	2	547.298039	1092.58152	1629.5400000000004	0.00010418	
 28 | VGGHAAEYGAEALER	_VGGHAAEYGAEALER_	CON__P01966	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	510.583099365234	3	510.58295	1528.72702	1644.96	0.006312300000000001	
 29 | NYELLCGDNTRK	_NYELLC<cmm>GDNTRK_	CON__Q29443;CON__Q0IIK2	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	494.905029296875	3	494.905035	1481.6932800000004	1654.98	0.0082133	
 30 | LNNELLAK	_LNNELLAK_	CON__P34955	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	457.768859863281	2	457.768917	913.523281	1663.62	0.0012598	
 31 | AEFVEVTK	_AEFVEVTK_	CON__P02769	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	461.74752807617205	2	461.74765	921.480748	1666.32	0.0022896	
 32 | HLEGISDADIAK	_HLEGISDADIAK_	P00950	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	423.55416870117205	3	423.55422	1267.64083	1668.3	0.00044373	
 33 | HLEGISDADIAK	_HLEGISDADIAK_	P00950	MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	634.8277587890631	2	634.827692	1267.64083	1668.6	3.608400000000001e-07	
 34 | HLQLAIR	_HLQLAIR_	P04912;P04911;Q12692	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	425.766418457031	2	425.766512	849.5184710000002	1679.4	0.0027225	
 35 | DQNYPGAIAIHHPNVAEK	_DQNYPGAIAIHHPNVAEK_	CON__P15636	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	659.00048828125	3	658.665784	1972.97552	1692.1800000000005	0.0078922	
 36 | VVDLVEHVAK	_VVDLVEHVAK_	P00358;P00359	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	554.821594238281	2	554.821681	1107.62881	1695.4199999999996	0.0020657	
 37 | VEATFGVDESNAK	_VEATFGVDESNAK_	Biognosys	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	683.8287353515631	2	683.8278889999998	1365.64122	1699.2599999999998	4.4566000000000005e-09	
 38 | EACFAVEGPK	_EAC<cmm>FAVEGPK_	CON__P02769	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	554.261047363281	2	554.2605990000002	1106.50665	1699.32	0.00023988	
 39 | YNGVFQECCQAEDK	_YNGVFQEC<cmm>C<cmm>QAEDK_	CON__P02769	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	874.35693359375	2	874.3561609999998	1746.69777	1701.9	0.00037694	
 40 | YQVTVIDAPGHR	_YQVTVIDAPGHR_	P02994	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	452.573822021484	3	452.573726	1354.69935	1712.4	0.0046098	
 41 | VATVSLPR	_VATVSLPR_	CON__P00761	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	421.75827026367205	2	421.758352	841.5021519999998	1723.26	0.00012912	
 42 | VAINGFGR	_VAINGFGR_	P00358;P00359	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	417.234985351563	2	417.235045	832.455536	1741.44	0.0071314	
 43 | LVLVGDGGTGK	_LVLVGDGGTGK_	P32835;P32836	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	508.293060302734	2	508.292756	1014.57096	1747.38	6.8582000000000015e-06	
 44 | YILAGVENSK	_YILAGVENSK_	Biognosys	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.298156738281	2	547.298039	1092.58152	1755.12	0.0070383	
 45 | AHSSMVGFDLPQR	_AHSSM<ox>VGFDLPQR_	P00560	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	487.569885253906	3	487.569876	1459.6878	1756.74	0.00045223	
 46 | ECCHGDLLECADDRADLAK	_EC<cmm>C<cmm>HGDLLEC<cmm>ADDRADLAK_	CON__P02769	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	750.31884765625	3	749.9857599999998	2246.93545	1760.64	0.0024107	
 47 | DDPHACYSTVFDK	_DDPHAC<cmm>YSTVFDK_	CON__P02769	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	518.8896484375	3	518.889162	1553.64566	1783.98	0.0039123000000000005	
 48 | SSAAGNTVIIGGGDTATVAK	_SSAAGNTVIIGGGDTATVAK_	P00560	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	895.4694213867191	2	895.468155	1788.9217600000004	1794.06	3.9324e-05	
 49 | NPVILADACCSR	_NPVILADAC<cmm>C<cmm>SR_	P06169	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	688.3265991210941	2	688.326479	1374.6384	1795.68	0.00093536	
 50 | YILAGVENSK	_YILAGVENSK_	Biognosys	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.298645019531	2	547.298039	1092.58152	1802.52	2.325e-09	
 51 | HLVDEPQNLIK	_HLVDEPQNLIK_	CON__P02769	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	435.91012573242205	3	435.910227	1304.70885	1806.6	3.1412e-05	
 52 | HLVDEPQNLIK	_HLVDEPQNLIK_	CON__P02769	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	653.362548828125	2	653.3617019999998	1304.70885	1807.2	7.225600000000001e-09	
 53 | ELISNASDALDK	_ELISNASDALDK_	P02829;P15108	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	638.324523925781	2	638.324982	1274.63541	1826.1	0.00081423	
 54 | YGGVYVGTLSKPEVK	_YGGVYVGTLSKPEVK_	P06169	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	532.959716796875	3	532.959246	1595.85591	1826.88	0.009406200000000002	
 55 | IGGIGTVPVGR	_IGGIGTVPVGR_	P02994	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	513.308898925781	2	513.308741	1024.60293	1842.3	2.7692e-05	
 56 | DFELEETDEEK	_DFELEETDEEK_	P02829;P15108	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	692.293579101563	2	692.293544	1382.57254	1847.94	0.00049445	
 57 | TPVISGGPYEYR	_TPVISGGPYEYR_	Biognosys	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	669.838684082031	2	669.8380589999998	1337.66157	1865.94	4.6471e-17	
 58 | IVLQIDNAR	_IVLQIDNAR_	CON__P19001;CON__P08727;CON__P05784	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	521.306884765625	2	521.306198	1040.59784	1878.24	0.006728100000000001	
 59 | ESTLHLVLR	_ESTLHLVLR_	P0CH09;P0CH08;P0CG63;P05759	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	534.314392089844	2	534.314023	1066.61349	1882.98	0.0050334	
 60 | YYGYTGAFR	_YYGYTGAFR_	CON__Q29443;CON__Q0IIK2	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	549.256164550781	2	549.256174	1096.49779	1883.88	0.0034734	
 61 | LKPDPNTLCDEFK	_LKPDPNTLC<cmm>DEFK_	CON__P02769	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	788.8878784179691	2	788.887423	1575.76029	1893.96	0.0034483000000000005	
 62 | LKPDPNTLCDEFK	_LKPDPNTLC<cmm>DEFK_	CON__P02769	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	526.260925292969	3	526.260708	1575.76029	1896.0	1.6105e-10	
 63 | LPLQDVYK	_LPLQDVYK_	P02994	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	488.279632568359	2	488.279118	974.543683	1904.94	0.0018989	
 64 | AVFPSIVGRPR	_AVFPSIVGRPR_	P60010	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	400.240081787109	3	400.240018	1197.69823	1910.82	2.9322e-05	
 65 | RHPEYAVSVLLR	_RHPEYAVSVLLR_	CON__P02769	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	480.608825683594	3	480.60877	1438.80448	1911.96	0.00049224	
 66 | AVFPSIVGRPR	_AVFPSIVGRPR_	P60010	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	599.856506347656	2	599.85639	1197.69823	1912.38	0.00016023	
 67 | TPVITGAPYEYR	_TPVITGAPYEYR_	Biognosys	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	683.854675292969	2	683.853709	1365.69287	1923.3	7.1514e-22	
 68 | YILAGVENSK	_YILAGVENSK_	Biognosys	MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.298767089844	2	547.298039	1092.58152	1923.6	4.9323e-15	
 69 | TSIIGTIGPK	_TSIIGTIGPK_	P00549;P52489	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	493.797668457031	2	493.797675	985.580796	1924.32	0.0030092	
 70 | YILAGVENSK	_YILAGVENSK_	Biognosys	MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.29833984375	2	547.298039	1092.58152	1936.02	8.6306e-15	
 71 | YILAGVENSK	_YILAGVENSK_	Biognosys	MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.2982177734381	2	547.298039	1092.58152	1948.62	5.1593e-15	
 72 | YILAGVENSK	_YILAGVENSK_	Biognosys	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.298278808594	2	547.298039	1092.58152	1958.7000000000005	2.7891e-05	
 73 | YILAGVENSK	_YILAGVENSK_	Biognosys	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.29833984375	2	547.298039	1092.58152	1970.28	3.1324000000000007e-06	
 74 | GYLAVAVVK	_GYLAVAVVK_	CON__Q29443;CON__Q0IIK2	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	460.284057617188	2	460.2842030000001	918.553853	1970.76	0.005111699999999999	
 75 | TPVISGGPYEYR	_TPVISGGPYEYR_	Biognosys	MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	669.838195800781	2	669.8380589999998	1337.66157	1977.3	2.4071e-07	
 76 | TPIVGQPSIPGGPVR	_TPIVGQPSIPGGPVR_	CON__P12763	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	737.9236450195309	2	737.922458	1473.83036	1980.96	2.523e-06	
 77 | TGPNLHGLFGR	_TGPNLHGLFGR_	CON__P62894	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	390.212066650391	3	390.21224	1167.61489	1983.48	0.0052224	
 78 | LSISETYDLK	_LSISETYDLK_	CON__P34955	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	584.8109130859381	2	584.808436	1167.60232	1988.04	0.0024649	
 79 | AIIVLSTSGTTPR	_AIIVLSTSGTTPR_	P00549	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	658.383483886719	2	658.3826339999998	1314.75072	1988.52	2.0112e-05	
 80 | TPVISGGPYEYR	_TPVISGGPYEYR_	Biognosys	MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	669.8387451171881	2	669.8380589999998	1337.66157	1991.34	6.447199999999999e-11	
 81 | LSELEAALQR	_LSELEAALQR_	CON__P05787;CON__REFSEQ:XP_092267;CON__H-INV:HIT000292931;CON__Q9H552	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	565.314758300781	2	565.31422	1128.61389	1991.7	0.00069269	
 82 | KQTALVELLK	_KQTALVELLK_	CON__P02769	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	381.57614135742205	3	381.576296	1141.70706	1993.38	0.008533	
 83 | SIVPSGASTGVHEALEMR	_SIVPSGASTGVHEALEMR_	P00925;P00924	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	614.646911621094	3	614.312242	1839.9149	2002.86	0.005024600000000001	
 84 | SISIVGSYVGNR	_SISIVGSYVGNR_	P00330;P00331	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	626.338134765625	2	626.338227	1250.6619	2005.92	0.0032354	
 85 | AGLQFPVGR	_AGLQFPVGR_	Q12692	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	472.769439697266	2	472.769252	943.52395	2006.04	0.0013348	
 86 | TPVISGGPYEYR	_TPVISGGPYEYR_	Biognosys	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	669.838623046875	2	669.8380589999998	1337.66157	2010.24	3.0445e-05	
 87 | YILAGVENSK	_YILAGVENSK_	Biognosys	MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.297912597656	2	547.298039	1092.58152	2011.02	6.0714999999999985e-06	
 88 | SIAPAYGIPVVLHSDHCAK	_SIAPAYGIPVVLHSDHC<cmm>AK_	P14540	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	509.767150878906	4	509.516197	2034.03568	2015.58	0.0022399	
 89 | YILAGVENSK	_YILAGVENSK_	Biognosys	MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.2982177734381	2	547.298039	1092.58152	2023.32	0.0041457	
 90 | TPVITGAPYEYR	_TPVITGAPYEYR_	Biognosys	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	683.853698730469	2	683.853709	1365.69287	2040.96	2.1343e-06	
 91 | VPQVSTPTLVEVSR	_VPQVSTPTLVEVSR_	CON__P02769;CON__P02768-1	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	504.618835449219	3	504.619112	1510.83551	2056.14	0.0041647	
 92 | LTSLNVVAGSDLR	_LTSLNVVAGSDLR_	P00549	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	672.8778076171881	2	672.877716	1343.74088	2056.44	0.0048562	
 93 | VPQVSTPTLVEVSR	_VPQVSTPTLVEVSR_	CON__P02769;CON__P02768-1	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	756.425415039063	2	756.42503	1510.83551	2056.86	3.8651e-09	
 94 | TPVISGGPYEYR	_TPVISGGPYEYR_	Biognosys	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	669.837707519531	2	669.8380589999998	1337.66157	2059.8	3.0445e-05	
 95 | TPVITGAPYEYR	_TPVITGAPYEYR_	Biognosys	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	683.854248046875	2	683.853709	1365.69287	2059.92	0.0010462	
 96 | YILAGVENSK	_YILAGVENSK_	Biognosys	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.298278808594	2	547.298039	1092.58152	2061.12	0.0006364	
 97 | DGLDAASYYAPVR	_DGLDAASYYAPVR_	Biognosys	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	699.3400268554691	2	699.338424	1396.66229	2061.84	3.6227e-16	
 98 | TPVISGGPYEYR	_TPVISGGPYEYR_	Biognosys	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	669.839050292969	2	669.8380589999998	1337.66157	2069.34	0.00021901	
 99 | TPVITGAPYEYR	_TPVITGAPYEYR_	Biognosys	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	683.8541259765631	2	683.853709	1365.69287	2075.64	1.842e-05	
100 | LAVNMVPFPR	_LAVNM<ox>VPFPR_	CON__ENSEMBL:ENSBTAP00000025008	MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	580.318603515625	2	580.318251	1158.62195	2090.88	0.00535	
101 | NVPLYQHLADLSK	_NVPLYQHLADLSK_	P00925	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	499.94039916992205	3	499.940186	1496.79873	2096.88	0.0044076	
102 | AVFPSIVGR	_AVFPSIVGR_	P60010	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	473.279144287109	2	473.279452	944.544351	2104.38	0.0044089	
103 | LVNELTEFAK	_LVNELTEFAK_	CON__P02769	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	582.319396972656	2	582.318971	1162.62339	2105.82	8.098300000000001e-05	
104 | ADVTPADFSEWSK	_ADVTPADFSEWSK_	Biognosys	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	726.8369140625	2	726.8357139999998	1451.65687	2154.42	4.701100000000001e-09	
105 | TPVISGGPYEYR	_TPVISGGPYEYR_	Biognosys	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	669.8394775390631	2	669.8380589999998	1337.66157	2154.6	2.402e-05	
106 | TPVITGAPYEYR	_TPVITGAPYEYR_	Biognosys	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	684.356872558594	2	683.853709	1365.69287	2155.56	0.0005748	
107 | YILAGVENSK	_YILAGVENSK_	Biognosys	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.298034667969	2	547.298039	1092.58152	2155.56	0.0017464000000000002	
108 | DGLDAASYYAPVR	_DGLDAASYYAPVR_	Biognosys	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	699.339111328125	2	699.338424	1396.66229	2196.4800000000005	0.00031579	
109 | DGLDAASYYAPVR	_DGLDAASYYAPVR_	Biognosys	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	699.339233398438	2	699.338424	1396.66229	2207.28	0.00031579	
110 | VLENTEIGDSIFDK	_VLENTEIGDSIFDK_	P00560	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	790.3973999023441	2	790.396135	1578.7777199999996	2209.5	0.00034864	
111 | VVVLPFPSK	_VVVLPFPSK_	CON__Q58D62	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	493.3078308105471	2	493.307678	984.600803	2218.44	0.0043097000000000005	
112 | DGLDAASYYAPVR	_DGLDAASYYAPVR_	Biognosys	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	699.339477539063	2	699.338424	1396.66229	2223.0	0.00047751	
113 | QDGQFSVLFTK	_QDGQFSVLFTK_	CON__P12763	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	635.327697753906	2	635.3273280000002	1268.6401	2236.44	0.00011656	
114 | TVMENFVAFVDK	_TVM<ox>ENFVAFVDK_	CON__P02769	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	708.348266601563	2	708.347403	1414.68025	2255.82	5.5488e-05	
115 | LVDTFLEDVK	_LVDTFLEDVK_	CON__P34955	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	589.8189697265631	2	589.818804	1177.62306	2255.82	0.0030574	
116 | EALDFFAR	_EALDFFAR_	P00330;P00331;P38113	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	484.745422363281	2	484.745442	967.476331	2257.2	0.0035996	
117 | DAGTIAGLNVLR	_DAGTIAGLNVLR_	P10591;P10592;P16474;P22202	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	600.341064453125	2	600.340769	1198.66699	2257.92	0.00069278	
118 | IGLDCASSEFFK	_IGLDC<cmm>ASSEFFK_	P00925;P00924	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	687.32373046875	2	687.323928	1372.6333	2263.9800000000005	0.0027378	
119 | YILAGVENSK	_YILAGVENSK_	Biognosys	MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.29833984375	2	547.298039	1092.58152	2278.2	0.0085109	
120 | SYELPDGQVITIGNER	_SYELPDGQVITIGNER_	P60010	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	895.9513549804691	2	895.949598	1789.8846399999998	2286.54	0.00035557	
121 | DGLDAASYYAPVR	_DGLDAASYYAPVR_	Biognosys	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	699.339599609375	2	699.338424	1396.66229	2291.7	0.0017302	
122 | LGEYGFQNALIVR	_LGEYGFQNALIVR_	CON__P02769	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	740.402221679688	2	740.401358	1478.78816	2327.16	1.5704e-11	
123 | DGLDAASYYAPVR	_DGLDAASYYAPVR_	Biognosys	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	699.339477539063	2	699.338424	1396.66229	2331.4800000000005	0.0039713000000000005	
124 | DGLDAASYYAPVR	_DGLDAASYYAPVR_	Biognosys	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	699.338256835938	2	699.338424	1396.66229	2356.32	0.0025161	
125 | GTFIIDPGGVIR	_GTFIIDPGGVIR_	Biognosys	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	622.853820800781	2	622.853512	1243.69247	2358.42	5.2725e-05	
126 | TPVISGGPYEYR	_TPVISGGPYEYR_	Biognosys	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	669.8387451171881	2	669.8380589999998	1337.66157	2474.4600000000005	0.0045031	
127 | GTFIIDPAAVIR	_GTFIIDPAAVIR_	Biognosys	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	636.86962890625	2	636.869163	1271.7237699999996	2530.62	0.00011833	
128 | AVDDFLLSLDGTANK	_AVDDFLLSLDGTANK_	P00925	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	790.406982421875	2	789.904128	1577.7937	2532.12	0.0071984	
129 | YGVQLSQIQSVISGFEAQLSDVR	_YGVQLSQIQSVISGFEAQLSDVR_	CON__P19001	MULTI-MSMS	B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	21_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	842.7744750976559	3	842.106249	2523.29692	3240.78	0.00053653	
130 | 


--------------------------------------------------------------------------------
/absence_peak_data/configuration_iRT.ini:
--------------------------------------------------------------------------------
 1 | [moFF_parameters]
 2 | loc_in= absence_peak_data\
 3 | raw_repo= absence_peak_data\raw_repo\
 4 | xic_length= 4
 5 | rt_peak_win= 1
 6 | rt_peak_win_match= 1.1
 7 | tol= 5
 8 | cpu= 0
 9 | peptide_summary= 1
10 | loc_out= absence_peak_data/output
11 | sample=
12 | ext=txt
13 | log_label = moFF
14 | w_filt = 1.5
15 | out_flag= True
16 | w_comb=
17 | mbr= on
18 | #  to set to False left empty
19 | match_filter= True
20 | ptm_file = ptm_setting_ps.json
21 | quantile_thr_filtering = 0.85
22 | sample_size = 0.10
23 | 


--------------------------------------------------------------------------------
/example_parameter_file.ini:
--------------------------------------------------------------------------------
 1 | [moFF_parameters]
 2 | loc_in= sample_data/
 3 | # you can comment each voice  using #
 4 | # use a space to separate each item
 5 | #tsv_list= sample_data/20080311_CPTAC6_07_6A005.txt sample_data/20080311_CPTAC6_10_6B019.txt sample_data/20080311_CPTAC6_13_6C012.txt
 6 | #raw_list = your_local_folder_raw_repo//20080311_CPTAC6_07_6A005.RAW your_local_folder_raw_repo//raw_repo/20080311_CPTAC6_10_6B019.RAW your_local_folder_raw_repo//20080311_CPTAC6_13_6C012.RAW
 7 | # folder where all the raw files are located
 8 | raw_repo= your_local_folder_raw_repo/
 9 | # lenght of XIC
10 | xic_length= 3
11 | # size of the rt win used to find the for  MS2 identified peptides
12 | rt_peak_win= 1
13 | # size of the rt win used to find the for  machted  peptides
14 | rt_peak_win_match= 1
15 | # tollerance in ppm
16 | tol= 10
17 | # export the peptide summary for further analysis True/False. to set to False left empy
18 | peptide_summary = True
19 | # set output folder
20 | loc_out= output_data/
21 | #specify witch replicated to use for mbr reg_exp are valid . i.e: *_A*.txt
22 | sample=
23 | # specify the file extentention of the input like
24 | ext=txt
25 | # a label name to use for the log file
26 | log_label = moFF
27 | #width value of the filter  k * mean(Dist_Malahobis) . default value 2
28 | w_filt = 2
29 | # filter outlier in each rt time allignment . default value True.  to set to False left empy
30 | out_flag = True
31 | #weigthing schema  True/False  to set to False left empy
32 | w_comb  =
33 | #  select the moFF workflow: on = mbr + apex , off = apex only , only= only mbr
34 | mbr = on
35 | # activate /deactivate the filtering.  to set to False left empy
36 | match_filter = True
37 | #choose the ptm json schema
38 | ptm_file = ptm_setting_mq.json
39 | # quantile value used to compute the filtering threshold for the matched peak
40 | quantile_thr_filtering = 0.75
41 | # percentage of MS2 peptide used to estimated the threshold.
42 | sample_size = 0.20
43 | # number of cpu. use 0 to automatically detects the cpu numbers in your machine
44 | cpu 0
45 | 
46 | 


--------------------------------------------------------------------------------
/license:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/moff.py:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/env python
   2 | 
   3 | import bisect
   4 | import glob
   5 | import logging
   6 | import multiprocessing
   7 | import os as os
   8 | import shlex
   9 | import subprocess
  10 | import sys
  11 | # import time
  12 | import traceback
  13 | from collections import Counter
  14 | from functools import reduce
  15 | from itertools import chain
  16 | from sys import platform as _platform
  17 | 
  18 | import numpy as np
  19 | import pandas as pd
  20 | import pymzml
  21 | import simplejson as json
  22 | from brainpy import isotopic_variants
  23 | from pyteomics.mass import std_aa_comp
  24 | from scipy.stats import spearmanr
  25 | 
  26 | log = logging.getLogger(__name__)
  27 | log.setLevel(logging.DEBUG)
  28 | 
  29 | """
  30 | moFF: this module contains all core utilities such as apex computation, peak apex computation etc..
  31 | """
  32 | 
  33 | TXIC_PATH = os.environ.get('TXIC_PATH', './')
  34 | 
  35 | 
  36 | def set_logger(name_file):
  37 |     if len(log.handlers) == 0:
  38 |         ch = logging.StreamHandler()
  39 |         ch.setLevel(logging.ERROR)
  40 |         log.addHandler(ch)
  41 |         fh = logging.FileHandler(name_file, mode='a')
  42 |         fh.setLevel(logging.DEBUG)
  43 |         # formatter = logging.Formatter('%(message)s')
  44 |         # fh.setFormatter(formatter)
  45 |         log.addHandler(fh)
  46 | 
  47 | 
  48 | def detach_handler():
  49 |     handlers = log.handlers[:]
  50 |     for handler in handlers:
  51 |         handler.close()
  52 |         log.removeHandler(handler)
  53 | 
  54 | 
  55 | def clean_json_temp_file(loc_output):
  56 |     for f in glob.glob(loc_output + "/*.json"):
  57 |         os.remove(f)
  58 |     return 1
  59 | 
  60 | 
  61 | def compute_peptide_matrix(loc_output, log, tag_filename):
  62 |     """
  63 |     Computation of the export summary intensities peptides
  64 |     :param loc_output:
  65 |     :param log:
  66 |     :param tag_filename:
  67 |     :return:
  68 |     """
  69 |     name_col = []
  70 |     name_col.append('prot')
  71 |     d = []
  72 |     if not glob.glob(loc_output + '/*_moff_result.txt'):
  73 |         return False
  74 |     for name in glob.glob(loc_output + '/*_moff_result.txt'):
  75 | 
  76 |         if 'match_' in os.path.basename(name):
  77 |             name_col.append(
  78 |                 'sumIntensity_' + os.path.basename(name).split('_match_moff_result.txt')[0])
  79 |         else:
  80 |             name_col.append(
  81 |                 'sumIntensity_' + os.path.basename(name).split('_moff_result.txt')[0])
  82 |         data = pd.read_csv(name, sep="\t")
  83 | 
  84 |         '''
  85 |         Other possibile quality controll filter
  86 |         data = data[ data['lwhm'] != -1]
  87 |         data = data[data['rwhm'] != -1 ]
  88 |         '''
  89 | 
  90 |         data = data[data['intensity'] != -1]
  91 |         data.sort_values('rt', ascending=True, inplace=True)
  92 |         log.critical('Collecting moFF result file : %s   --> Retrived peptide peaks after filtering:  %i',
  93 |                      os.path.basename(name), data.shape[0])
  94 |         # cleaning peptide fragmented more than one time. we keep the earliest one
  95 |         data.drop_duplicates(subset=[
  96 |             'prot', 'peptide', 'mod_peptide', 'mass', 'charge'], keep='first', inplace=True)
  97 |         d.append(data[['prot', 'peptide', 'mod_peptide', 'mass',
  98 |                        'charge', 'rt_peak', 'rt', 'intensity']])
  99 | 
 100 |     intersect_share = reduce(np.union1d, ([x['peptide'].unique() for x in d]))
 101 |     index = intersect_share
 102 | 
 103 |     df = pd.DataFrame(index=index, columns=name_col)
 104 |     df = df.fillna(0)
 105 |     for i in range(0, len(d)):
 106 |         grouped = d[i].groupby('peptide', as_index=True)[['prot', 'intensity']]
 107 |         # print grouped.agg({'prot':'max', 'intensity':'sum'}).columns
 108 |         df.iloc[:, i + 1] = grouped.agg({'prot': 'max',
 109 |                                        'intensity': 'sum'})['intensity']
 110 |         df.loc[np.intersect1d(df.index, list(grouped.groups.keys())), 'prot'] = \
 111 |         grouped.agg({'prot': 'max', 'intensity': 'sum'})[
 112 |             'prot']
 113 |     # print df.head(5)
 114 |     df.reset_index(inplace=True)
 115 |     #df.reset_index(level=0, inplace=True)
 116 |     df = df.fillna(0)
 117 |     df.rename(columns={'index': 'peptide'}, inplace=True)
 118 |     log.critical('Writing peptide_summary intensity file')
 119 |     df.to_csv(os.path.join(loc_output, "peptide_summary_intensity_" +
 120 |                            tag_filename + ".tab"), sep='\t', index=False)
 121 |     return True
 122 | 
 123 | 
 124 | def save_moff_apex_result(result):
 125 |     """
 126 |     Collect all CPU results in a data frame
 127 | 
 128 |     :param result:
 129 |     :return:
 130 |     """
 131 |     try:
 132 |         xx = []
 133 |         for df_index in result:
 134 |             if result[df_index].get()[1] == -1:
 135 |                 exit('Raw file not retrieved: wrong path or upper/low case mismatch')
 136 |             else:
 137 |                 xx.append(result[df_index].get()[0])
 138 | 
 139 |         final_res = pd.concat(xx)
 140 |         if 'index' in final_res.columns:
 141 |             final_res.drop('index', axis=1, inplace=True)
 142 | 
 143 |     except Exception as e:
 144 |         traceback.print_exc()
 145 |         raise e
 146 |     return (final_res)
 147 | 
 148 | 
 149 | def map_ps2moff(data, type_mapping):
 150 |     data.drop(data.columns[[0]], axis=1, inplace=True)
 151 |     data.columns = data.columns.str.lower()
 152 |     if type_mapping == 'col_must_have_mbr':
 153 |         data.rename(columns={'sequence': 'peptide', 'modified sequence': 'mod_peptide', 'measured charge': 'charge',
 154 |                              'theoretical mass': 'mass', 'protein(s)': 'prot', 'm/z': 'mz'}, inplace=True)
 155 |     if type_mapping == 'col_must_have_apex':
 156 |         data.rename(columns={'sequence': 'peptide', 'modified sequence': 'mod_peptide', 'measured charge': 'charge',
 157 |                              'theoretical mass': 'mass',
 158 |                              'protein(s)': 'prot', 'm/z': 'mz'}, inplace=True)
 159 |     return data, data.columns.values.tolist()
 160 | 
 161 | 
 162 | def check_ps_input_data(input_column_name, list_col_ps_default):
 163 |     """
 164 |      Control if the input data is complaint with PS input file
 165 | 
 166 |     :param input_column_name:
 167 |     :param list_col_ps_default:
 168 |     :return:
 169 |     """
 170 |     input_column_name.sort()
 171 |     list_col_ps_default.sort()
 172 |     if list_col_ps_default == input_column_name:
 173 |         # detected a default PS input file
 174 |         return 1
 175 |     else:
 176 |         # not detected a default PS input file
 177 |         return 0
 178 | 
 179 | 
 180 | def check_columns_name(col_list, col_must_have, log):
 181 |     """
 182 |     Controls if the  current input  file informations are complaint with the minimun set of informations need by moFF
 183 | 
 184 |     :param col_list:
 185 |     :param col_must_have:
 186 |     :param log:
 187 |     :return:
 188 |     """
 189 |     for c_name in col_must_have:
 190 |         if not (c_name in col_list):
 191 |             # fail
 192 |             log.critical('This information is missing : %s ', c_name)
 193 |             return 1
 194 |         # succes
 195 |     return 0
 196 | 
 197 | 
 198 | def scan_mzml(name):
 199 |     """
 200 |         This function scan all the mzml file , and save all MS1 spectrum scan time and ID to speed the XIC calculation
 201 |     :param name:
 202 |     :return: list of scan rt , list of spectrum id
 203 | 
 204 |     """
 205 | 
 206 |     # when I am using thermo raw and --raw_repo option used
 207 |     if name is None:
 208 |         return (-1, -1)
 209 |     if 'MZML' in name.upper():
 210 |         rt_list = []
 211 |         runid_list = []
 212 |         run_temp = pymzml.run.Reader(name,MS1_Precision=5e-6)
 213 |         run = pymzml.run.Reader(name,MS1_Precision=5e-6)
 214 |         #I use two reader, one as iterator and one to check if spectra has random access.
 215 |         # The fact why some spectra are available  if iterarate them but not
 216 |         # if you access direct them. it is a mistery.
 217 |         for spectrum in run_temp:
 218 |             try:
 219 |                 tt = run[spectrum.ID]
 220 |                 if spectrum.ms_level == 1:
 221 |                     ## reminder:  weird thing: in python 3.6 (virt env) spectrum.scan_time is a float.
 222 |                     # but in native py3.6+ env : spectrum.scan_time is a tuple (float,'unit measure')
 223 |                     # that why i check .
 224 |                     if isinstance(spectrum.scan_time,tuple):
 225 |                         rt_list.append(spectrum.scan_time[0])
 226 |                     else:
 227 |                         rt_list.append(spectrum.scan_time)
 228 |                     runid_list.append(spectrum.ID)
 229 |             except:
 230 |                 pass
 231 | 
 232 |         return (rt_list, runid_list)
 233 |     else:
 234 |         # in case of raw file  I put to -1 -1 thm result
 235 |         return (-1, -1)
 236 | 
 237 | 
 238 | def mzML_get_all(temp, tol,  run, rt_list1, runid_list1):
 239 |     """
 240 |         run pyMZML_xic_out for all the requested peptide in
 241 |     :param temp: dataframe with the input peptide information
 242 |     :param tol:  tollerance
 243 |     :param run:  pyzml reader on the mzmml file
 244 |     :param rt_list1:  list of all the scan time  in the current mzml
 245 |     :param runid_list1: list of all the spectum ID in the current mzml file
 246 |     :return: list of dataframe
 247 |     """
 248 |     app_list = []
 249 |     ppm = float(  tol / (10 ** 6))
 250 |     for index_ms2, row in temp.iterrows():
 251 |         #start_time = time.time()
 252 |         data, status = pyMZML_xic_out(ppm, row['ts'], row['te'], row['mz'], run, runid_list1, rt_list1)
 253 |         # status is evaluated only herenot used anymore
 254 |         if status != -1:
 255 |             app_list.append(data)
 256 |         else:
 257 |             app_list.append(pd.DataFrame(columns=['rt', 'intensity']))
 258 | 
 259 |     return app_list
 260 | 
 261 | 
 262 | def pyMZML_xic_out( ppmPrecision, minRT, maxRT, MZValue, run, runid_list, rt_list):
 263 |     """
 264 |         EXtract XiC using pymzml library
 265 |     :param ppmPrecision:
 266 |     :param minRT:
 267 |     :param maxRT:
 268 |     :param MZValue:
 269 |     :param run:
 270 |     :param runid_list:
 271 |     :param rt_list:
 272 |     :return: pandas  dataframe
 273 |     """
 274 |     timeDependentIntensities = []
 275 |     minpos = bisect.bisect_left(rt_list, minRT)
 276 |     maxpos = bisect.bisect_left(rt_list, maxRT)
 277 | 
 278 |     lmz =(float(MZValue - ppmPrecision * MZValue), None)
 279 |     umz = (float(MZValue + ppmPrecision * MZValue), None)
 280 | 
 281 |     for specpos in range(minpos, maxpos):
 282 |         specid = runid_list[specpos]
 283 |         spectrum = run[specid]
 284 |         if isinstance(spectrum.scan_time, tuple):
 285 |             curr_rt= spectrum.scan_time[0]
 286 |         else:
 287 |             curr_rt = spectrum.scan_time
 288 |         if curr_rt > maxRT:
 289 |             break
 290 |         if curr_rt > minRT and  curr_rt < maxRT:
 291 |             peaks = list(map(tuple, spectrum.peaks("raw")))
 292 |             lower_index = bisect.bisect(
 293 |                 peaks,lmz)
 294 |             upper_index = bisect.bisect(
 295 |                 peaks, umz)
 296 |             maxI = 0.0
 297 |             for sp in peaks[lower_index: upper_index]:
 298 |                 if sp[1] > maxI:
 299 |                     maxI = sp[1]
 300 |             if maxI > 0:
 301 |                 timeDependentIntensities.append(
 302 |                     [curr_rt, maxI])
 303 | 
 304 |     if len(timeDependentIntensities) > 5:
 305 |         return (pd.DataFrame(timeDependentIntensities, columns=['rt', 'intensity']), 1)
 306 |     else:
 307 |         return (pd.DataFrame(timeDependentIntensities, columns=['rt', 'intensity']), -1)
 308 | 
 309 | 
 310 | def check_log_existence(file_to_check):
 311 |     """
 312 |     Controls the presence of a log file
 313 | 
 314 |     :param file_to_check:
 315 |     :return:
 316 |     """
 317 |     if os.path.isfile(file_to_check):
 318 |         os.remove(file_to_check)
 319 |         return True
 320 |     else:
 321 |         return False
 322 | 
 323 | 
 324 | def check_output_folder_existence(loc_output):
 325 |     """
 326 |     Controls the presence of a directory. if not, it makes it
 327 | 
 328 |     :param loc_output:
 329 |     :return:
 330 |     """
 331 | 
 332 |     if not os.path.exists(loc_output):
 333 |         os.mkdir(loc_output)
 334 |         return 1
 335 |     else:
 336 |         return 0
 337 | 
 338 | 
 339 | def compute_log_LR(data_xic, index, v_max, disc):
 340 |     """
 341 |     Computation shape peak metrics log_L_R
 342 | 
 343 |     :param data_xic:
 344 |     :param index:
 345 |     :param v_max:
 346 |     :param disc:
 347 |     :return:
 348 |     """
 349 |     log_time = [-1, -1]
 350 |     c_left = 0
 351 |     find_5 = False
 352 |     stop = False
 353 |     while c_left <= (index - 1) and not stop:
 354 |         if not find_5 and (data_xic.iloc[(index - 1) - c_left, 1] <= (disc * v_max)) :
 355 |             find_5 = True
 356 |             log_time[0] = data_xic.iloc[(index - 1) - c_left, 0] * 60
 357 |             stop = True
 358 |         if data_xic.iloc[(index - 1) -  c_left, 1] > v_max:
 359 |             # avoid local minima
 360 |             # intensity must decrease
 361 |             stop = True
 362 |         c_left += 1
 363 |     find_5 = False
 364 |     stop = False
 365 |     r_left = 0
 366 |     while ((index + 1) + r_left < data_xic.shape[0]) and not stop:
 367 |         if not find_5 and data_xic.iloc[(index + 1) + r_left, 1] <= (disc * v_max) :
 368 |             find_5 = True
 369 |             log_time[1] = data_xic.iloc[(index + 1) + r_left, 0] * 60
 370 |             stop = True
 371 |         if data_xic.iloc[(index + 1) + r_left, 1] > v_max:
 372 |             # avoid local minima
 373 |             # intensity must decrease
 374 |             stop = True
 375 |         r_left += 1
 376 |     return log_time
 377 | 
 378 | 
 379 | def compute_peak_simple(x, xic_array, log, mbr_flag, h_rt_w, s_w, s_w_match, offset_index, moff_pride_flag,
 380 |                         rt_match_peak, count_match, filt_flag):
 381 |     """
 382 | 
 383 |     Apex computation method
 384 | 
 385 |     :param x:
 386 |     :param xic_array:
 387 |     :param log:
 388 |     :param mbr_flag:
 389 |     :param h_rt_w:
 390 |     :param s_w:
 391 |     :param s_w_match:
 392 |     :param offset_index:
 393 |     :param moff_pride_flag:
 394 |     :param rt_match_peak:
 395 |     :param count_match:
 396 |     :param filt_flag:
 397 |     :return:
 398 |     """
 399 |     if count_match != -1:
 400 |         c = x.prog_xic_index
 401 |     else:
 402 |         c = x.name
 403 |     data_xic = xic_array[c]
 404 |     if rt_match_peak > -1:
 405 |         time_w = rt_match_peak
 406 |     else:
 407 |         time_w = x['rt']
 408 |         if not moff_pride_flag:
 409 |             # NOT moff pride data
 410 |             # dealling with rt in minutes
 411 |             # standar cases rt must be in second
 412 |             time_w = time_w / 60
 413 |     # print time_w, x['rt'] , moff_pride_flag, rt_match_peak,time_w, s_w,s_w_match
 414 |     if not mbr_flag:
 415 |         temp_w = s_w
 416 |     else:
 417 |         # row['matched'])
 418 |         if x['matched'] == 1:
 419 |             temp_w = s_w_match
 420 |         else:
 421 |             temp_w = s_w
 422 |     if data_xic[(data_xic['rt'] > (time_w - temp_w)) & (data_xic['rt'] < (time_w + temp_w))].shape[0] >= 1:
 423 |         # data_xic[(data_xic['rt'] > (time_w - temp_w)) & (data_xic['rt'] < (time_w + temp_w))].to_csv('thermo_testXIC_'+str(c)+'.txt',index=False,sep='\t')
 424 |         ind_v = data_xic.index
 425 |         pp = data_xic[data_xic["intensity"] == data_xic[(data_xic['rt'] > (
 426 |                 time_w - temp_w)) & (data_xic['rt'] < (time_w + temp_w))]['intensity'].max()].index
 427 |         pos_p = ind_v[pp]
 428 |         if pos_p.values.shape[0] > 1:
 429 |             log.info('error, no apex found')
 430 |             return pd.Series(
 431 |                 {'intensity': -1, 'rt_peak': -1, 'lwhm': -1, 'rwhm': -1, '5p_noise': -1, '10p_noise': -1, 'SNR': -1,
 432 |                  'log_L_R': -1, 'log_int': -1})
 433 |         val_max = data_xic.iloc[pos_p, 1].values
 434 |     else:
 435 |         if filt_flag == 1:
 436 |             if 'matched' in x.axes[0].tolist():
 437 |                 log.info(
 438 |                     'peptide %r -->  MZ: %4.4f RT: %4.4f matched (yes=1/no=0): %i Peak not detected  Xic shape %r ',
 439 |                     x['mod_peptide'], x['mz'], time_w, x['matched'],
 440 |                     data_xic[(data_xic['rt'] > (time_w - temp_w)) & (data_xic['rt'] < (time_w + temp_w))].shape[0])
 441 |             else:
 442 |                 log.info(
 443 |                     'peptide %r -->  MZ: %4.4f RT: %4.4f matched (yes=1/no=0): %i Peak not detected  Xic shape %r ',
 444 |                     x['mod_peptide'], x['mz'], time_w, 0,
 445 |                     data_xic[(data_xic['rt'] > (time_w - temp_w)) & (data_xic['rt'] < (time_w + temp_w))].shape[0])
 446 |         # log.info('peptide at line %i -->  MZ: %4.4f RT: %4.4f ', (offset_index +c +2), x['mz'], time_w)
 447 |         # log.info("\t LW_BOUND window  %4.4f", time_w - temp_w)
 448 |         # log.info("\t UP_BOUND window %4.4f", time_w + temp_w)
 449 |         # log.info("\t WARNINGS: Peak not detected  Xic shape %r ", data_xic[(data_xic['rt'] > (time_w - temp_w)) & (data_xic['rt'] < (time_w + temp_w))].shape[0])
 450 |         return pd.Series({'intensity': -1, 'rt_peak': -1,
 451 |                           'lwhm': -1,
 452 |                           'rwhm': -1,
 453 |                           '5p_noise': -1,
 454 |                           '10p_noise': -1,
 455 |                           'SNR': -1,
 456 |                           'log_L_R': -1,
 457 |                           'log_int': -1})
 458 |     pnoise_5 = np.percentile(data_xic[(data_xic['rt'] > (
 459 |             time_w - h_rt_w)) & (data_xic['rt'] < (time_w + h_rt_w))]['intensity'], 5)
 460 |     pnoise_10 = np.percentile(data_xic[(data_xic['rt'] > (
 461 |             time_w - h_rt_w)) & (data_xic['rt'] < (time_w + h_rt_w))]['intensity'], 10)
 462 |     # find the lwhm and rwhm
 463 |     time_point = compute_log_LR(data_xic, pos_p[0], val_max, 0.5)
 464 |     if (time_point[0] * time_point[1] == 1) or (time_point[0] * time_point[1] < 0):
 465 |         # Try a second time FWHM  computation with 0.7 * max intensity
 466 |         time_point = compute_log_LR(data_xic, pos_p[0], val_max, 0.7)
 467 | 
 468 |     if time_point[0] == -1 or time_point[1] == -1:
 469 |         # keep the shape measure to -1 in case on txo point are -1
 470 |         log_L_R = -1
 471 |     else:
 472 |         log_L_R = np.log2(
 473 |             abs(time_w - time_point[0]) / abs(time_w - time_point[1]))
 474 | 
 475 |     if pnoise_5 == 0 and pnoise_10 > 0:
 476 |         SNR = 20 * np.log10(data_xic.iloc[pos_p, 1].values / pnoise_10)
 477 |     else:
 478 |         if pnoise_5 != 0:
 479 |             SNR = 20 * np.log10(data_xic.iloc[pos_p, 1].values / pnoise_5)
 480 |         else:
 481 |             log.info('\t 5 percentile is %4.4f (added 0.5)', pnoise_5)
 482 |             SNR = 20 * \
 483 |                   np.log10(data_xic.iloc[pos_p, 1].values / (pnoise_5 + 0.5))
 484 | 
 485 |     return pd.Series({'intensity': val_max[0], 'rt_peak': data_xic.iloc[pos_p, 0].values[0] * 60,
 486 |                       'lwhm': time_point[0],
 487 |                       'rwhm': time_point[1],
 488 |                       '5p_noise': pnoise_5,
 489 |                       '10p_noise': pnoise_10,
 490 |                       'SNR': SNR[0],
 491 |                       'log_L_R': log_L_R,
 492 |                       'log_int': np.log2(val_max)[0]})
 493 | 
 494 | 
 495 | # def estimate_parameter( df , name_file, raw_name, tol, h_rt_w, s_w, s_w_match, loc_raw, loc_output,  rt_list , id_list, moff_pride_flag ,ptm_map,   log,sample_size, quantile_value, match_filter_flag   ):
 496 | 
 497 | def estimate_parameter(df, name_file, raw_name, tol, h_rt_w, s_w, s_w_match, loc_raw, loc_output, rt_list, id_list,
 498 |                        moff_pride_flag, ptm_map, sample_size, quantile_value, match_filter_flag, log_file, num_CPU):
 499 |     """
 500 |     Compute the quality metrics for the filtering during the estimation part
 501 | 
 502 |     :param df:
 503 |     :param name_file:
 504 |     :param raw_name:
 505 |     :param tol:
 506 |     :param h_rt_w:
 507 |     :param s_w:
 508 |     :param s_w_match:
 509 |     :param loc_raw:
 510 |     :param loc_output:
 511 |     :param rt_list:
 512 |     :param id_list:
 513 |     :param moff_pride_flag:
 514 |     :param ptm_map:
 515 |     :param sample_size:
 516 |     :param quantile_value:
 517 |     :param match_filter_flag:
 518 |     :param log_file:
 519 |     :param num_CPU:
 520 |     :return:
 521 |     """
 522 |     set_logger(log_file)
 523 |     myPool = multiprocessing.Pool(num_CPU)
 524 |     sample = df[df['matched'] == 0].sample(frac=sample_size)
 525 |     log.critical(
 526 |         'quality measures estimation  using %r  MS2 ident. peptides randomly sampled' % sample.shape[0])
 527 |     data_split = np.array_split(sample, num_CPU)
 528 |     result = {}
 529 |     offset = 0
 530 |     # run matchinf filtering for
 531 |     # for result in data_split:
 532 |     for df_index in range(0, len(data_split)):
 533 |         result[df_index] = myPool.apply_async(apex_multithr, args=(
 534 |         data_split[df_index], name_file, raw_name, tol, h_rt_w, s_w, s_w_match,
 535 |         loc_raw, loc_output, offset, rt_list, id_list, moff_pride_flag, ptm_map, 1, -1, -1, match_filter_flag,
 536 |         log_file))
 537 |         offset += len(data_split[df_index])
 538 |     myPool.close()
 539 |     myPool.join()
 540 |     ms2_data = save_moff_apex_result(result)
 541 |     # log.critical ('Estimated distribution rank correlation exp. int. vs theor. int. %r %r %r ' %( ms2_data[ ms2_data['rankcorr'] != -1]['rankcorr'].quantile(0.25), ms2_data[ms2_data['rankcorr'] != -1]['rankcorr'].quantile(0.50),   ms2_data[ ms2_data['rankcorr'] != -1]['rankcorr'].quantile(0.75) )  )
 542 |     log.critical('MAD retention time along all isotope %r',
 543 |                  ms2_data[ms2_data['RT_drift'] != -1]['RT_drift'].describe())
 544 |     log.critical('Estimated distribition ratio exp. int. left isotope vs. monoisotopic isotope %r ',
 545 |                  ms2_data[ms2_data['delta_log_int'] != -1]['delta_log_int'].describe())
 546 |     error_relInr = ms2_data[ms2_data['Erro_RelIntensity_TheoExp'] != -1]['Erro_RelIntensity_TheoExp'].quantile(
 547 |         quantile_value)
 548 |     rt_drift = ms2_data[ms2_data['RT_drift'] != -1]['RT_drift'].quantile(quantile_value)
 549 |     ratio_log_int = ms2_data[ms2_data['delta_log_int'] != -1]['delta_log_int'].quantile(quantile_value)
 550 |     return (rt_drift, error_relInr, ratio_log_int)
 551 | 
 552 | 
 553 | def compute_match_peak_quality_measure(input_data, moff_pride_flag, log):
 554 |     """
 555 |     Compute filter quality metrics
 556 | 
 557 |     :param input_data:
 558 |     :param moff_pride_flag:
 559 |     :param log:
 560 |     :return:
 561 |     """
 562 |     sum_intensity = input_data['intensity'].sum()
 563 |     mad_diff_int = np.mean(abs((input_data['intensity'] / sum_intensity) - (
 564 |             input_data['ratio_iso'] / input_data['ratio_iso'].sum())))
 565 |     rank_spearman = spearmanr(
 566 |         (input_data['intensity'] / sum_intensity), input_data['ratio_iso'])[0]
 567 |     mad_rt = np.mean(abs(input_data['rt_peak'] - input_data['rt_peak'].mean()))
 568 |     return (mad_diff_int, rank_spearman, mad_rt)
 569 | 
 570 | 
 571 | def estimate_on_match_peak(x, input_data, estimate_flag, moff_pride_flag, log, thr_q2, err_ratio_int, xic_data,
 572 |                            mbr_flag, h_rt_w, s_w, s_w_match, offset_index):
 573 |     """
 574 |     Estimation of filter quality measures based on sampling of the MS2 identified peptides.
 575 | 
 576 |     :param x:
 577 |     :param input_data:
 578 |     :param estimate_flag:
 579 |     :param moff_pride_flag:
 580 |     :param log:
 581 |     :param thr_q2:
 582 |     :param err_ratio_int:
 583 |     :param xic_data:
 584 |     :param mbr_flag:
 585 |     :param h_rt_w:
 586 |     :param s_w:
 587 |     :param s_w_match:
 588 |     :param offset_index:
 589 |     :return:
 590 |     """
 591 |     test = input_data.loc[input_data['original_ptm'] == x.name, :].copy()
 592 |     test.reset_index(inplace=True)
 593 |     # print 'local df inside estimate ', input_data.columns
 594 |     test.iloc[0:1, 13:22] = test.iloc[0:1, :].apply(lambda x: compute_peak_simple(
 595 |         x, xic_data, log, mbr_flag, h_rt_w, s_w, s_w_match, offset_index, moff_pride_flag, -1, 1, 0), axis=1)
 596 |     # print 'output -->>  ',input_data.iloc[:,12:22]
 597 |     # print  input_data.iloc[0, input_data.columns.get_indexer(['log_L_R'])].all() != -1
 598 |     if (test.iloc[0, test.columns.get_indexer(['log_L_R'])]).any() != -1:
 599 |         new_point = test.iloc[0,
 600 |                               test.columns.get_indexer(['rt_peak'])] / 60
 601 |         test.iloc[1:4, 13:22] = test.iloc[1:4, :].apply(lambda x: compute_peak_simple(
 602 |             x, xic_data, log, mbr_flag, h_rt_w, 0.3, 0.3, offset_index, moff_pride_flag, new_point[0], 1, 0), axis=1)
 603 |         if (test.iloc[0:3, test.columns.get_indexer(['log_L_R'])] != -1).all()[0]:
 604 |             mad_diff_int, rank_spearman, mad_rt = compute_match_peak_quality_measure(
 605 |                 test.iloc[0:3, :], moff_pride_flag, log)
 606 |             if (test.iloc[3, test.columns.get_indexer(['log_L_R'])]).all() == -1:
 607 |                 return pd.Series(
 608 |                     {'Erro_RelIntensity_TheoExp': mad_diff_int, 'rankcorr': rank_spearman, 'RT_drift': mad_rt,
 609 |                      'delta_rt': -1, 'delta_log_int': -1})
 610 |             else:
 611 |                 delta_rt_wrong_iso = abs(
 612 |                     test.at[3, 'rt_peak'] - test.iloc[0:3, test.columns.get_indexer(['rt_peak'])].mean()[0])
 613 |                 delta_log_int = test.at[3, 'log_int'] / test.at[0, 'log_int']
 614 |                 # print pd.Series({'Erro_RelIntensity_TheoExp': mad_diff_int, 'rankcorr': rank_spearman,'RT_drift': mad_rt ,'delta_rt': delta_rt_wrong_iso ,'delta_log_int': delta_log_int})
 615 |                 return pd.Series(
 616 |                     {'Erro_RelIntensity_TheoExp': mad_diff_int, 'rankcorr': rank_spearman, 'RT_drift': mad_rt,
 617 |                      'delta_rt': delta_rt_wrong_iso, 'delta_log_int': delta_log_int})
 618 |         else:
 619 |             return pd.Series(
 620 |                 {'Erro_RelIntensity_TheoExp': -1, 'rankcorr': -1, 'RT_drift': -1, 'delta_rt': -1, 'delta_log_int': -1})
 621 |     else:
 622 |         return pd.Series(
 623 |             {'Erro_RelIntensity_TheoExp': -1, 'rankcorr': -1, 'RT_drift': -1, 'delta_rt': -1, 'delta_log_int': -1})
 624 | 
 625 | 
 626 | def filtering_match_peak(x, input_data, estimate_flag, moff_pride_flag, log, thr_q2, err_ratio_int, xic_data, mbr_flag,
 627 |                          h_rt_w, s_w, s_w_match, offset_index):
 628 |     """
 629 |     Filtering of the matched peptides based on the isotopic envelope and quality measures estimated
 630 |     :param x:
 631 |     :param input_data:
 632 |     :param estimate_flag:
 633 |     :param moff_pride_flag:
 634 |     :param log:
 635 |     :param thr_q2:
 636 |     :param err_ratio_int:
 637 |     :param xic_data:
 638 |     :param mbr_flag:
 639 |     :param h_rt_w:
 640 |     :param s_w:
 641 |     :param s_w_match:
 642 |     :param offset_index:
 643 |     :return:
 644 |     """
 645 |     # print 'inside filtering routine ...'
 646 |     # log.info('matched peptide  --> %r  mZ: %4.4f RT: %4.4f ', x.mod_peptide , x.mz, x.rt)
 647 |     test = input_data.loc[input_data['original_ptm'] == x.name, :].copy()
 648 |     test.reset_index(inplace=True)
 649 |     test.iloc[0:1, 13:22] = test.iloc[0:1, :].apply(lambda x: compute_peak_simple(
 650 |         x, xic_data, log, mbr_flag, h_rt_w, s_w, s_w_match, offset_index, moff_pride_flag, -1, 1, 0), axis=1)
 651 |     if (test.iloc[0, test.columns.get_indexer(['log_L_R'])]).all() != -1:
 652 |         # if not moff_pride_flag :
 653 |         #    new_point = test.iloc[0, test.columns.get_indexer(['rt_peak'])]
 654 |         # else:
 655 |         # to minute - second
 656 |         # moffpride data -> convert again in second
 657 |         # from the ssecond isotope always convert to minute case : if new_point is provided
 658 |         new_point = test.iloc[0, test.columns.get_indexer(['rt_peak'])] / 60
 659 |         test.iloc[1:4, 13:22] = test.iloc[1:4, :].apply(lambda x: compute_peak_simple(
 660 |             x, xic_data, log, mbr_flag, h_rt_w, 0.3, 0.3, offset_index, moff_pride_flag, new_point[0], 1, 0), axis=1)
 661 |         # check isotope 2-3
 662 |         if (test.iloc[1:3, test.columns.get_indexer(['log_L_R'])] != -1).all()[0]:
 663 |             mad_diff_int, rank_spearman, mad_rt = compute_match_peak_quality_measure(
 664 |                 test.iloc[0:3, :], moff_pride_flag, log)
 665 |             if (mad_rt < thr_q2 and rank_spearman > 0.9):
 666 |                 # check isotope -1
 667 |                 if (test.iloc[3, test.columns.get_indexer(['log_L_R'])]).all() != -1:
 668 |                     delta_rt_wrong_iso = abs(
 669 |                         test.at[3, 'rt_peak'] - test.iloc[0:3, test.columns.get_indexer(['rt_peak'])].mean()[0])
 670 |                     delta_log_int = test.at[3,
 671 |                                             'log_int'] / test.at[0, 'log_int']
 672 |                     if (delta_rt_wrong_iso < thr_q2) and (delta_log_int > err_ratio_int):
 673 |                         # filter  overlapping peptide isotope
 674 |                         log.info(
 675 |                             ' %r mz: %4.4f RT: %4.4f --> Not valid isotope envelope  overlapping detected -->  --  MAD RT  %r  -- rankCorr %r ',
 676 |                             x.mod_peptide, x.mz, x.rt, mad_rt, rank_spearman)
 677 |                         return pd.Series(
 678 |                             {'intensity': -1, 'rt_peak': -1, 'lwhm': -1, 'rwhm': -1, '5p_noise': -1, '10p_noise': -1,
 679 |                              'SNR': -1, 'log_L_R': -1, 'log_int': -1})
 680 |                     else:
 681 |                         log.info(
 682 |                             '%r mz: %4.4f RT: %4.4f --> Valid isotope envelope detected after overlapping check -->  --  MAD RT  %r  -- rankCorr %r ',
 683 |                             x.mod_peptide, x.mz, x.rt, mad_rt, rank_spearman)
 684 |                         return test.loc[
 685 |                             test['ratio_iso'].idxmax(axis=1), ['intensity', 'rt_peak', 'lwhm', 'rwhm', '5p_noise',
 686 |                                                                '10p_noise', 'SNR', 'log_L_R', 'log_int']]
 687 |                 else:
 688 |                     log.info(
 689 |                         ' %r mz: %4.4f RT: %4.4f  --> Valid isotope envelope detected and no overlaping detected -->  --  MAD RT  %r  -- rankCorr %r ',
 690 |                         x.mod_peptide, x.mz, x.rt, mad_rt, rank_spearman)
 691 |                     return test.loc[
 692 |                         test['ratio_iso'].idxmax(axis=1), ['intensity', 'rt_peak', 'lwhm', 'rwhm', '5p_noise',
 693 |                                                            '10p_noise', 'SNR', 'log_L_R', 'log_int']]
 694 |             else:
 695 |                 # not pass the thr. control
 696 |                 log.info(
 697 |                     ' %r mz: %4.4f RT: %4.4f  --> Not valid isotope envelope detected  -->  --  MAD RT  %r  -- rankCorr %r ',
 698 |                     x.mod_peptide, x.mz, x.rt, mad_rt, rank_spearman)
 699 |                 return pd.Series(
 700 |                     {'intensity': -1, 'rt_peak': -1, 'lwhm': -1, 'rwhm': -1, '5p_noise': -1, '10p_noise': -1, 'SNR': -1,
 701 |                      'log_L_R': -1, 'log_int': -1})
 702 |         else:
 703 |             # I have only the 1st valid isotope peak  but not the second or third
 704 |             log.info(' %r mz: %4.4f RT: %4.4f --> not enough isotope peaks detected  ',
 705 |                      x.mod_peptide, x.mz, x.rt)
 706 |             return pd.Series(
 707 |                 {'intensity': -1, 'rt_peak': -1, 'lwhm': -1, 'rwhm': -1, '5p_noise': -1, '10p_noise': -1, 'SNR': -1,
 708 |                  'log_L_R': -1, 'log_int': -1})
 709 |     else:
 710 |         log.info(' %r mz: %4.4f RT: %4.4f  --> first isotope peak not detected ',
 711 |                  x.mod_peptide, x.mz, x.rt)
 712 |         return pd.Series(
 713 |             {'intensity': -1, 'rt_peak': -1, 'lwhm': -1, 'rwhm': -1, '5p_noise': -1, '10p_noise': -1, 'SNR': -1,
 714 |              'log_L_R': -1, 'log_int': -1})
 715 | 
 716 | 
 717 | def apex_multithr(data_ms2, name_file, raw_name, tol, h_rt_w, s_w, s_w_match, loc_raw, loc_output, offset_index,
 718 |                   rt_list, id_list, moff_pride_flag, ptm_map, estimate_flag, rt_drift, err_ratio_int, match_filter_flag,
 719 |                   log_file):
 720 |     """
 721 | 
 722 |     General apex method used both for filtering and not filtering usage
 723 | 
 724 |     :param data_ms2:
 725 |     :param name_file:
 726 |     :param raw_name:
 727 |     :param tol:
 728 |     :param h_rt_w:
 729 |     :param s_w:
 730 |     :param s_w_match:
 731 |     :param loc_raw:
 732 |     :param loc_output:
 733 |     :param offset_index:
 734 |     :param rt_list:
 735 |     :param id_list:
 736 |     :param moff_pride_flag:
 737 |     :param ptm_map:
 738 |     :param estimate_flag:
 739 |     :param rt_drift:
 740 |     :param err_ratio_int:
 741 |     :param match_filter_flag:
 742 |     :param log_file:
 743 |     :return:
 744 |     """
 745 |     set_logger(log_file)
 746 |     # setting flag and ptah
 747 |     flag_mzml = False
 748 |     flag_windows = False
 749 |     mbr_flag = False
 750 | 
 751 | 
 752 |     # set platform
 753 |     if _platform in ["linux", "linux2", 'darwin']:
 754 |         flag_windows = False
 755 |     elif _platform == "win32":
 756 |         flag_windows = True
 757 | 
 758 |     if loc_output != '':
 759 |         if not (os.path.isdir(loc_output)):
 760 |             os.makedirs(loc_output)
 761 |             log.info("created output folder: ", loc_output)
 762 | 
 763 |     if '_match' in name_file:
 764 |         # in case of mbr , here i dont have evaluate the flag mbr
 765 |         start = name_file.find('_match')
 766 |         # extract the name of the file
 767 |         name_file = name_file[0:start]
 768 | 
 769 |     if loc_raw is not None:
 770 |         if flag_windows:
 771 |             loc = os.path.join(loc_raw, name_file.upper() + '.RAW')
 772 | 
 773 |         else:
 774 |             # raw file name must have capitals letters :) this shloud be checked
 775 |             # this should be done in moe elegant way
 776 | 
 777 |             loc = os.path.normcase(os.path.join(loc_raw, name_file + '.RAW'))
 778 | 
 779 |             if not (os.path.isfile(loc)):
 780 |                 loc = os.path.join(loc_raw, name_file + '.raw')
 781 | 
 782 |     else:
 783 |         # mzML work only with --raw_list option
 784 |         loc = raw_name
 785 |         if 'MZML' in raw_name.upper():
 786 |             flag_mzml = True
 787 | 
 788 |     if not (os.path.isfile(loc)):
 789 |         log.critical(
 790 |             'ERROR: Wrong path or wrong raw file name included: %s' % loc)
 791 |         return None, -1
 792 | 
 793 |     # index_offset = data_ms2.columns.shape[0] - 1
 794 |     data_ms2["intensity"] = -1
 795 |     data_ms2["rt_peak"] = -1
 796 |     data_ms2["lwhm"] = -1
 797 |     data_ms2["rwhm"] = -1
 798 |     data_ms2["5p_noise"] = -1
 799 |     data_ms2["10p_noise"] = -1
 800 |     data_ms2["SNR"] = -1
 801 |     data_ms2["log_L_R"] = -1
 802 |     data_ms2["log_int"] = -1
 803 |     data_ms2["rt_peak"] = data_ms2["rt_peak"].astype('float64')
 804 |     data_ms2['intensity'] = data_ms2['intensity'].astype('float64')
 805 |     data_ms2['lwhm'] = data_ms2['lwhm'].astype('float64')
 806 |     data_ms2["rwhm"] = data_ms2['rwhm'].astype('float64')
 807 |     data_ms2["5p_noise"] = data_ms2['5p_noise'].astype('float64')
 808 |     data_ms2["10p_noise"] = data_ms2['10p_noise'].astype('float64')
 809 |     data_ms2["SNR"] = data_ms2['SNR'].astype('float64')
 810 |     data_ms2["log_L_R"] = data_ms2['log_L_R'].astype('float64')
 811 |     data_ms2["log_int"] = data_ms2['log_int'].astype('float64')
 812 |     if estimate_flag == 1:
 813 |         # add extra filed if I am in a estimate mode
 814 |         data_ms2["Erro_RelIntensity_TheoExp"] = -1
 815 |         data_ms2["rankcorr"] = -1
 816 |         data_ms2["RT_drift"] = -1
 817 |         data_ms2["delta_rt"] = -1
 818 |         data_ms2["delta_log_int"] = -1
 819 |     # set mbr_flag
 820 |     if 'matched' in data_ms2.columns:
 821 |         if (data_ms2['matched'] == 1).all():
 822 |             # case valid in case of  filtering
 823 |             mbr_flag = True
 824 |         else:
 825 |             if (data_ms2['matched'] == 0).all():
 826 |                 # case valiD for estimation
 827 |                 mbr_flag = False
 828 |             else:
 829 |                 # case valid in case of not filtering
 830 |                 mbr_flag = True
 831 |     # get txic path: assumes txic is in the same directory as moff.py
 832 |     txic_executable_name = "txic_json.exe"
 833 |     txic_path = os.path.join(os.path.dirname(
 834 |         os.path.realpath(sys.argv[0])), txic_executable_name)
 835 |     # for all the input peptide in data_ms2
 836 |     try:
 837 |         if match_filter_flag:
 838 |             all_isotope_df = build_matched_modification(
 839 |                 data_ms2, ptm_map, tol, moff_pride_flag, h_rt_w)
 840 |             xic_data = get_xic_data(flag_mzml, flag_windows, all_isotope_df[[
 841 |                 'mz', 'tol', 'ts', 'te']], loc_output, name_file, txic_path, loc, 1, tol,rt_list,id_list)
 842 |             # new filtering
 843 |             # not needed
 844 |             all_isotope_df['prog_xic_index'] = list(range(0, len(xic_data)))
 845 |             all_isotope_df['original_ptm'] = np.repeat(data_ms2.index, 4)
 846 |             all_isotope_df["intensity"] = -1
 847 |             all_isotope_df["rt_peak"] = -1
 848 |             all_isotope_df["lwhm"] = -1
 849 |             all_isotope_df["rwhm"] = -1
 850 |             all_isotope_df["5p_noise"] = -1
 851 |             all_isotope_df["10p_noise"] = -1
 852 |             all_isotope_df["SNR"] = -1
 853 |             all_isotope_df["log_L_R"] = -1
 854 |             all_isotope_df["log_int"] = -1
 855 |             if estimate_flag == 0:
 856 |                 data_ms2[['intensity', 'rt_peak', 'lwhm', 'rwhm', '5p_noise', '10p_noise', 'SNR', 'log_L_R',
 857 |                           'log_int']] = data_ms2.apply(lambda x: filtering_match_peak(
 858 |                     x, all_isotope_df, estimate_flag, moff_pride_flag, log, rt_drift, err_ratio_int, xic_data, mbr_flag,
 859 |                     h_rt_w, s_w, s_w_match, offset_index), axis=1)
 860 |             else:
 861 |                 data_ms2[['Erro_RelIntensity_TheoExp', 'rankcorr', 'RT_drift', 'delta_rt',
 862 |                           'delta_log_int']] = data_ms2.apply(lambda x: estimate_on_match_peak(
 863 |                     x, all_isotope_df, estimate_flag, moff_pride_flag, log, rt_drift, err_ratio_int, xic_data, mbr_flag,
 864 |                     h_rt_w, s_w, s_w_match, offset_index), axis=1)
 865 |             if estimate_flag != 1:
 866 |                 data_ms2 = data_ms2[(data_ms2[['10p_noise', '5p_noise', 'SNR', 'intensity',
 867 |                                                'log_L_R', 'log_int', 'lwhm', 'rt_peak', 'rwhm']] != -1).all(1)]
 868 |         else:
 869 |             # not match  filter
 870 |             temp = data_ms2[['mz', 'rt']].copy()  # strange cases
 871 |             temp['tol'] = int(tol)
 872 |             if moff_pride_flag == 1:
 873 |                 temp['ts'] = (data_ms2['rt']) - h_rt_w
 874 |                 temp['te'] = (data_ms2['rt']) + h_rt_w
 875 |             else:
 876 |                 temp['ts'] = (data_ms2['rt'] / 60) - h_rt_w
 877 |                 temp['te'] = (data_ms2['rt'] / 60) + h_rt_w
 878 |             temp.drop('rt', 1, inplace=True)
 879 |             xic_data = get_xic_data(
 880 |                 flag_mzml, flag_windows, temp, loc_output, name_file, txic_path, loc, 0, tol,rt_list,id_list)
 881 |             data_ms2.reset_index(inplace=True)
 882 |             data_ms2[['intensity', 'rt_peak', 'lwhm', 'rwhm', '5p_noise', '10p_noise', 'SNR', 'log_L_R',
 883 |                       'log_int']] = data_ms2.apply(
 884 |                 lambda x: compute_peak_simple(x, xic_data, log, mbr_flag, h_rt_w, s_w, s_w_match, offset_index,
 885 |                                               moff_pride_flag, -1, -1, 1), axis=1)
 886 | 
 887 |     except Exception as e:
 888 |         traceback.print_exc()
 889 |         raise e
 890 |     return (data_ms2, 1)
 891 | 
 892 | 
 893 | def build_matched_modification(data, ptm_map, tol, moff_pride_flag, h_rt_w):
 894 |     """
 895 |     Computation of th. isotopic envelope tanking into account PSM modification
 896 |     :param data:
 897 |     :param ptm_map:
 898 |     :param tol:
 899 |     :param moff_pride_flag:
 900 |     :param h_rt_w:
 901 |     :return:
 902 |     """
 903 |     all_isotope_df = pd.DataFrame(
 904 |         columns=['peptide', 'mz', 'ratio_iso', 'tol', 'rt', 'matched', 'ts', 'te'])
 905 |     for row in data.itertuples():
 906 |         # get the sequence
 907 |         # for MQ sequence is (mod_tag )
 908 |         # for PS sequence is  <mod_tag>
 909 |         mq_mod_flag = False
 910 |         if mq_mod_flag:
 911 |             if not ('(' in row.mod_peptide) and mq_mod_flag:
 912 |                 #  only fixed mod
 913 |                 comps = Counter(
 914 |                     list(chain(*[list(std_aa_comp[aa].elements()) for aa in row.peptide])))
 915 |                 comps["H"] += 2
 916 |                 comps["O"] += 1
 917 |                 fix_mod_count = row.peptide.count('C')
 918 |                 if fix_mod_count > 0:
 919 |                     comps["H"] += (ptm_map['cC']['deltaChem']
 920 |                                    [0] * fix_mod_count)
 921 |                     comps["C"] += (ptm_map['cC']['deltaChem']
 922 |                                    [1] * fix_mod_count)
 923 |                     comps["N"] += (ptm_map['cC']['deltaChem']
 924 |                                    [2] * fix_mod_count)
 925 |                     comps["O"] += (ptm_map['cC']['deltaChem']
 926 |                                    [3] * fix_mod_count)
 927 |             else:
 928 |                 comps = Counter(
 929 |                     list(chain(*[list(std_aa_comp[aa].elements()) for aa in row.peptide])))
 930 |                 for ptm in ptm_map.keys():
 931 |                     ptm_c = row.mod_peptide.count(ptm)
 932 |                     if ptm_c >= 1:
 933 |                         comps["H"] += (ptm_map[ptm]['deltaChem'][0] * ptm_c)
 934 |                         comps["C"] += (ptm_map[ptm]['deltaChem'][1] * ptm_c)
 935 |                         comps["N"] += (ptm_map[ptm]['deltaChem'][2] * ptm_c)
 936 |                         comps["O"] += (ptm_map[ptm]['deltaChem'][3] * ptm_c)
 937 |                 # add eventually fixed mod/
 938 |                 fix_mod_count = row.mod_peptide.count('C')
 939 |                 if fix_mod_count > 0:
 940 |                     comps["H"] += (ptm_map['cC']['deltaChem']
 941 |                                    [0] * fix_mod_count)
 942 |                     comps["C"] += (ptm_map['cC']['deltaChem']
 943 |                                    [1] * fix_mod_count)
 944 |                     comps["N"] += (ptm_map['cC']['deltaChem']
 945 |                                    [2] * fix_mod_count)
 946 |                     comps["O"] += (ptm_map['cC']['deltaChem']
 947 |                                    [3] * fix_mod_count)
 948 |                 comps["H"] += 2
 949 |                 comps["O"] += 1
 950 |         else:
 951 |             # fixed and variable mod are both in the sequence
 952 |             comps = Counter(
 953 |                 list(chain(*[list(std_aa_comp[aa].elements()) for aa in row.peptide])))
 954 |             if '<' in row.mod_peptide or '-' in row.mod_peptide:
 955 |                 # check only if modificatio are present.
 956 |                 # for the future use dthe tag_mod_sequence_delimiter use in moFF_setting
 957 |                 for ptm in ptm_map.keys():
 958 |                     ptm_c = row.mod_peptide.count(ptm)
 959 |                     # ptm_c =  sum(ptm in s for s in row.mod_peptide)
 960 |                     if ptm_c >= 1:
 961 |                         comps["H"] += (ptm_map[ptm]['deltaChem'][0] * ptm_c)
 962 |                         comps["C"] += (ptm_map[ptm]['deltaChem'][1] * ptm_c)
 963 |                         comps["N"] += (ptm_map[ptm]['deltaChem'][2] * ptm_c)
 964 |                         comps["O"] += (ptm_map[ptm]['deltaChem'][3] * ptm_c)
 965 |             comps["H"] += 2
 966 |             comps["O"] += 1
 967 | 
 968 |         theoretical_isotopic_cluster = isotopic_variants(
 969 |             comps,   charge= int(round(row.mass / float(row.mz))) , npeaks=3)
 970 |         mz_iso = [peak.mz for peak in theoretical_isotopic_cluster]
 971 |         delta = mz_iso[0] - mz_iso[1]
 972 |         mz_iso.append(mz_iso[0] + delta)
 973 |         ratio_iso = [peak.intensity for peak in theoretical_isotopic_cluster]
 974 |         ratio_iso.append(-1)
 975 |         isotopic_df = pd.DataFrame({'mz': mz_iso, 'ratio_iso': ratio_iso})
 976 | 
 977 |         isotopic_df.loc[:, 'exp_mz'] = row.mz
 978 |         isotopic_df.loc[:, 'peptide'] = row.mod_peptide
 979 |         isotopic_df.loc[:, 'tol'] = int(tol)
 980 |         isotopic_df.loc[:, 'rt'] = row.rt
 981 |         isotopic_df.loc[:, 'matched'] = 1
 982 |         if moff_pride_flag:
 983 |             # moffpridedata  rt is in minutes
 984 |             isotopic_df['ts'] = (row.rt) - h_rt_w
 985 |             isotopic_df['te'] = (row.rt) + h_rt_w
 986 |         else:
 987 |             # not moffpridedata rt in second
 988 |             isotopic_df['ts'] = (row.rt / 60) - h_rt_w
 989 |             isotopic_df['te'] = (row.rt / 60) + h_rt_w
 990 | 
 991 |         all_isotope_df = pd.concat(
 992 |             [all_isotope_df, isotopic_df], join='outer', axis=0, sort=False)
 993 |     all_isotope_df.reset_index(inplace=True)
 994 | 
 995 |     return all_isotope_df
 996 | 
 997 | 
 998 | def get_xic_data(flag_mzml, flag_windows, data, loc_output, name_file, txic_path, loc, flag_filtering, tol,rt_list,id_list):
 999 |     """
1000 | 
1001 |     Run the txic_json.xe library to get all the xic requested by each process for Thermo raw file
1002 | 
1003 |     :param flag_mzml:
1004 |     :param flag_windows:
1005 |     :param data: called temp where the function it's called
1006 |     :param loc_output:
1007 |     :param name_file:
1008 |     :param txic_path:
1009 |     :param loc:
1010 |     :param flag_filtering:
1011 |     :param tol: ms search tolerance in ppm
1012 |     :return:
1013 |     """
1014 |     if not flag_mzml:
1015 |         # txic-28-9-separate-jsonlines.exe
1016 |         if not flag_windows:
1017 |             # Linux  to avoid cmd  string too long  and its error. the thresold is mainly base on  from empirical evaluation.
1018 |             if len(data.to_json(orient='records')) >= 50000:
1019 |                 with open(os.path.join(loc_output, multiprocessing.current_process().name + '_' + name_file + '.json'),
1020 |                           'w') as f:
1021 |                     f.write(data.to_json(orient='records'))
1022 |                 args_txic = shlex.split("mono " + txic_path + " -jf " + os.path.join(
1023 |                     loc_output, multiprocessing.current_process().name + '_' + name_file + '.json') + " -f " + loc,
1024 |                                         posix=False)
1025 |             else:
1026 |                 #  small amount of char. in the request
1027 |                 args_txic = shlex.split(
1028 |                     "mono " + txic_path + " -j " + data.to_json(orient='records') + " -f " + loc, posix=True)
1029 |         else:
1030 |             # Windows to avoid cmd  string too long  and its error. the thresold is mainly base on  from empirical evaluation.
1031 |             if len(data.to_json(orient='records')) >= 10000:
1032 |                 with open(os.path.join(loc_output, multiprocessing.current_process().name + '_' + name_file + '.json'),
1033 |                           'w') as f:
1034 |                     f.write(data.to_json(orient='records'))
1035 |                 args_txic = shlex.split(txic_path + " -jf " + os.path.join(
1036 |                     loc_output, multiprocessing.current_process().name + '_' + name_file + '.json') + " -f " + loc,
1037 |                                         posix=False)
1038 |             else:
1039 |                 #  small amount of char. in the request
1040 |                 args_txic = shlex.split(
1041 |                     txic_path + " -j " + data.to_json(orient='records') + " -f " + loc, posix=False)
1042 |         # start_timelocal = time.time()
1043 |         p = subprocess.Popen(args_txic, stdout=subprocess.PIPE)
1044 |         output, err = p.communicate()
1045 |         xic_data = []
1046 |         for l in range(0, data.shape[0]):
1047 |             temp = json.loads(output.decode("utf-8").split('\n')[l])
1048 |             xic_data.append(pd.DataFrame(
1049 |                 {'rt': temp['results']['times'], 'intensity': temp['results']['intensities']},
1050 |                 columns=['rt', 'intensity']))
1051 |     else:
1052 |         run_temp = pymzml.run.Reader(loc,MS1_Precision=5e-6)
1053 |         #rt_list, id_list = scan_mzml(loc)
1054 |         xic_data = mzML_get_all(data, tol,  run_temp, rt_list, id_list)
1055 |     return xic_data
1056 | 


--------------------------------------------------------------------------------
/moff_all.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import argparse
  4 | import ast
  5 | import configparser
  6 | import gc
  7 | import json
  8 | import logging.config
  9 | import multiprocessing
 10 | import os
 11 | import sys
 12 | import time
 13 | 
 14 | import numpy as np
 15 | import pandas as pd
 16 | 
 17 | import moff
 18 | import moff_mbr
 19 | 
 20 | log = logging.getLogger(__name__)
 21 | log.setLevel(logging.DEBUG)
 22 | 
 23 | """moFF: entry point where to run moFF and all its functions """
 24 | 
 25 | if __name__ == '__main__':
 26 | 
 27 |     multiprocessing.freeze_support()
 28 | 
 29 |     parser_1 = argparse.ArgumentParser(
 30 |         description='moFF match between run and apex module input parameter', add_help=False)
 31 | 
 32 |     parser_1.add_argument('--config_file', dest='config_file', action='store',
 33 |                           help='specify a moFF parameter file ', required=False)
 34 |     args, remaining_argv = parser_1.parse_known_args()
 35 |     if args.config_file:
 36 |         config = configparser.SafeConfigParser(allow_no_value=True)
 37 |         config.read([args.config_file])
 38 |         moFF_parameters = dict(config.items("moFF_parameters"))
 39 |         # check if loc_in  is set in the input file
 40 |         if not ('loc_in' in moFF_parameters.keys() and 'raw_repo' in moFF_parameters.keys()):
 41 |             moFF_parameters['tsv_list'] = moFF_parameters['tsv_list'].split(
 42 |                 ' ')
 43 |         if not ('raw_repo' in moFF_parameters.keys()):
 44 |             moFF_parameters['raw_list'] = moFF_parameters['raw_list'].split(
 45 |                 ' ')
 46 |         if not ('tol' in moFF_parameters.keys()):
 47 |             exit('you must specify the tollerance in the configuration file ')
 48 |         moFF_parameters['tol'] = float(moFF_parameters['tol'])
 49 |         moFF_parameters['xic_length'] = float(moFF_parameters['xic_length'])
 50 |         moFF_parameters['rt_peak_win'] = float(moFF_parameters['rt_peak_win'])
 51 |         moFF_parameters['rt_peak_win_match'] = float(
 52 |             moFF_parameters['rt_peak_win_match'])
 53 |         moFF_parameters['peptide_summary'] = True if moFF_parameters['peptide_summary'] != '' else False
 54 |         moFF_parameters['w_comb'] = True if moFF_parameters['w_comb'] != '' else False
 55 |         moFF_parameters['out_flag'] = True if moFF_parameters['out_flag'] != '' else False
 56 |         moFF_parameters['w_filt'] = float(moFF_parameters['w_filt'])
 57 |         moFF_parameters['quantile_thr_filtering'] = float(moFF_parameters['quantile_thr_filtering'])
 58 |         moFF_parameters['cpu_num'] = int(moFF_parameters['cpu'])
 59 |         moFF_parameters['sample_size'] = float(moFF_parameters['sample_size'])
 60 |         moFF_parameters['match_filter'] = True if moFF_parameters['match_filter'] != '' else False
 61 |     args_1, remaining_argv = parser_1.parse_known_args()
 62 | 
 63 |     parser = argparse.ArgumentParser(parents=[parser_1],
 64 |                                      description=__doc__,
 65 |                                      formatter_class=argparse.RawDescriptionHelpFormatter, )
 66 | 
 67 |     parser = argparse.ArgumentParser(
 68 |         description='moFF match between run and apex module input parameter')
 69 |     parser.add_argument('--loc_in', dest='loc_in', action='store',
 70 |                         help='specify the folder of the input MS2 peptide list files', required=False)
 71 | 
 72 |     parser.add_argument('--tsv_list', dest='tsv_list', action='store', nargs='*',
 73 |                         help='specify the mzid file as a list', required=False)
 74 | 
 75 |     parser.add_argument('--raw_list', dest='raw_list', action='store',
 76 |                         nargs='*', help='specify the raw file as a list', required=False)
 77 | 
 78 |     parser.add_argument('--sample', dest='sample', action='store',
 79 |                         help='specify witch replicated to use for mbr reg_exp are valid', required=False)
 80 | 
 81 |     parser.add_argument('--ext', dest='ext', action='store', default='txt',
 82 |                         help='specify the file extentention of the input like. Default value: txt', required=False)
 83 | 
 84 |     parser.add_argument('--log_label', dest='log_label', action='store', default='moFF',
 85 |                         help='a label name to use for the log file. Default value: moFF', required=False)
 86 | 
 87 |     parser.add_argument('--w_filt', dest='w_filt', action='store', default=2,
 88 |                         help='width value of the filter  k * mean(Dist_Malahobis). Default value: 2', required=False)
 89 | 
 90 |     parser.add_argument('--out_flag', dest='out_flag', action='store_true', default=True,
 91 |                         help='if set, outliers for rt time allignment are filtered. Default value: True',
 92 |                         required=False)
 93 | 
 94 |     parser.add_argument('--w_comb', dest='w_comb', action='store_true', default=False,
 95 |                         help='if set, RT model combination is weighted using traing model errors: Default value: False',
 96 |                         required=False)
 97 |     parser.add_argument('--tol', dest='tol', action='store', default=10, type=float,
 98 |                         help='specify the tollerance  parameter in ppm. Default value: 10', required=False)
 99 | 
100 |     parser.add_argument('--xic_length', dest='xic_length', action='store', type=float, default=3,
101 |                         help='specify rt window for xic (minutes). Default value: 3', required=False)
102 | 
103 |     parser.add_argument('--rt_peak_win', dest='rt_peak_win', action='store', type=float, default=1,
104 |                         help='specify the time windows for the peak (minutes). Default value: 1', required=False)
105 | 
106 |     parser.add_argument('--rt_peak_win_match', dest='rt_peak_win_match', action='store', type=float, default=1.2,
107 |                         help='specify the time windows for the matched peptide peak (minutes). Default value: 1.2 ',
108 |                         required=False)
109 | 
110 |     parser.add_argument('--raw_repo', dest='raw_repo', action='store',
111 |                         help='specify the raw file repository ', required=False)
112 | 
113 |     parser.add_argument('--loc_out', dest='loc_out', action='store', default='', help='specify the folder output',
114 |                         required=False)
115 | 
116 |     parser.add_argument('--rt_feat_file', dest='rt_feat_file', action='store',
117 |                         help='specify the file that contains the features to use in the match-between-run RT prediction ',
118 |                         required=False)
119 | 
120 |     parser.add_argument('--peptide_summary', dest='peptide_summary', action='store_true', default=False,
121 |                         help='if set, export a peptide intesity summary tab-delited file. Default value: False',
122 |                         required=False)
123 | 
124 |     parser.add_argument('--tag_pepsum', dest='tag_pepsum', action='store', type=str, default='moFF_run',
125 |                         help='a tag text used for peptide summary file name (peptide_summary_intensity_ + tag + .tab ). Default value: moFF_run ',
126 |                         required=False)
127 |     parser.add_argument('--match_filter', dest='match_filter', action='store_true', default=False,
128 |                         help='If set, filtering on the matched peak is activated. Default value: False', required=False)
129 |     parser.add_argument('--ptm_file', dest='ptm_file', action='store', default='ptm_setting.json',
130 |                         help='name of json ptm file. default file ptm_setting.json ', required=False)
131 |     parser.add_argument('--quantile_thr_filtering', dest='quantile_thr_filtering', action='store', type=float,
132 |                         default=0.75,
133 |                         help='quantile value used to compute the filtering threshold for the matched peak .Default value: 0.75',
134 |                         required=False)
135 |     parser.add_argument('--sample_size', dest='sample_size', action='store', type=float, default=0.20,
136 |                         help='percentage of MS2 peptide used to estimated the threshold. Default value: 0.20',
137 |                         required=False)
138 | 
139 |     parser.add_argument('--mbr', dest='mbr', action='store', type=str, default='on',
140 |                         help='select the moFF workflow: on to run mbr + apex , off to run only apex, only to run obnly mbr. Default value: on   ',
141 |                         required=False)
142 | 
143 |     parser.add_argument('--cpu', dest='cpu_num', action='store', type=int, default=0,
144 |                         help='number of cpu. as default value it will detect automaticaly the CPU number in your machine.',
145 |                         required=False)
146 | 
147 |     if args.config_file:
148 |         # load from config file and load the remaining parametes
149 |         parser.set_defaults(**moFF_parameters)
150 |         args = parser.parse_args(remaining_argv)
151 |     else:
152 |         # normal case for the input parsing
153 |         args = parser.parse_args()
154 | 
155 |     # init global logger
156 |     ch = logging.StreamHandler()
157 |     ch.setLevel(logging.ERROR)
158 |     log.addHandler(ch)
159 | 
160 |     if args.tol is None:
161 |         exit('you must specify the tollerance in ppm ')
162 |     if (args.tsv_list is None) and (args.loc_in is None) and (args.raw_list is None) and (args.raw_repo is None):
163 |         exit('you must specify the input and raw files ')
164 |     if (args.tsv_list is not None) and (args.loc_in is not None) and (args.raw_list is not None) and (
165 |             args.raw_repo is not None):
166 |         exit('you must specify the input and raw files or using: --tsv_list and --raw_list or --loc_in and --raw_repo ')
167 |     else:
168 |         if ((args.tsv_list is None) and (args.raw_list is not None)) or (
169 |                 (args.tsv_list is not None) and (args.raw_list is None)):
170 |             exit(
171 |                 'Missing information: using --tsv_list you must specify the raw file with --raw_list ')
172 |         if ((args.loc_in is None) and (args.raw_repo is not None)) or (
173 |                 (args.loc_in is not None) and (args.raw_repo is None)):
174 |             exit(
175 |                 'Missing information: using --loc_in you must specify the raw file with --raw_repo ')
176 | 
177 |     if args.loc_out != '':
178 |         if not (os.path.isdir(args.loc_out)):
179 |             os.makedirs(args.loc_out)
180 |             log.critical("created output folder  %r", args.loc_out)
181 | 
182 |     config = configparser.RawConfigParser()
183 |     config.read(os.path.join(os.path.dirname(
184 |         os.path.realpath(sys.argv[0])), 'moff_setting.properties'))
185 | 
186 |     # just for Galaxy input is possible to use one big input file and a list of raw file.
187 |     # the big file must have the result of each raw file and the columns 'Spectrum File' should be availabe
188 |     # This option work only with PS report using only --tsv_list and --raw_list
189 |     if  ( args.tsv_list is not None) and  ( args.raw_list is not None) and (len(args.tsv_list)==1)  :
190 |         data_temp= pd.read_csv(args.tsv_list[0],sep="\t")
191 |         if moff.check_ps_input_data(data_temp.columns.tolist(), ast.literal_eval(config.get('moFF', 'ps_default_export_v1'))) == 1:
192 |             # split the data input file only if inave more than ONE raw file and tha input file contain identification for more the ONE run
193 |             if  len(data_temp['Spectrum File'].unique())> 1 and len(args.raw_list) > 1:
194 | 
195 |                 output_list_loc=[]
196 |                 for file in data_temp['Spectrum File'].unique():
197 |                     data_temp[data_temp['Spectrum File']== file].to_csv(os.path.join(os.path.split(args.tsv_list[0])[0],file.split('.')[0]+ '.txt')
198 |                                                                         , sep='\t' , index=False )
199 |                     output_list_loc.append(os.path.join(os.path.split(args.tsv_list[0])[0],file.split('.')[0]+ '.txt') )
200 | 
201 |                 if len(args.raw_list) != len(output_list_loc):
202 |                     exit('-- Number of raw file is different to the number of input sources detectd in your one input file --')
203 |                 #sort them to be sure about the association between input - raw file
204 |                 args.raw_list= sorted(args.raw_list)
205 |                 args.tsv_list= sorted(output_list_loc)
206 |                 #clean dataset thta I don use anymore
207 |                 del data_temp
208 |                 gc.collect()
209 | 
210 | 
211 | 
212 |     ##---
213 | 
214 |     # fixed variable number of split and also number of CPU presence in the macine
215 |     # change this variable  with repset to the machine setting of the user
216 | 
217 |     if args.cpu_num > 0:
218 |         num_CPU = args.cpu_num
219 |     else:
220 |         num_CPU = multiprocessing.cpu_count()
221 | 
222 |     # only mbr
223 |     if 'only' in args.mbr:
224 |         log.critical('starting matching between run module (mbr)')
225 |         res_state, output_list_loc = moff_mbr.run_mbr(args)
226 |         if res_state == -1:
227 |             exit('An error is occurred during the writing of the mbr file')
228 |         else:
229 |             log.critical('end matching between run module (mbr)')
230 |             exit()
231 | 
232 |     if 'on' in args.mbr:
233 |         log.critical('Matching between run module (mbr)')
234 |         res_state, output_list_loc = moff_mbr.run_mbr(args)
235 |         # --- debug version-- just to run skip the mbr in for special cases
236 |         # res_state= 1
237 |         # output_list_loc =[]
238 |         # for item in os.listdir(args.loc_in):
239 |         #    #log.critical(item)
240 |         #    if os.path.isfile(os.path.join(args.loc_in, item)):
241 |         #        if os.path.join(args.loc_in, item).endswith('.' + args.ext):
242 |         #            mbr_list_loc.append(os.path.join(args.loc_in, item))
243 |         if res_state == -1:
244 |             exit('An error is occurred during the writing of the mbr file')
245 |         if args.tsv_list is not None:
246 |             # input list of raw and tsv file
247 |             if len(args.tsv_list) != len(args.raw_list):
248 |                 exit(
249 |                     'Error:  number of the input files is different from the number of raw files')
250 |             # in case list of file as input , mbr_output is written in local folder
251 |             folder = os.path.join('mbr_output')
252 |         else:
253 |             folder = os.path.join(args.loc_in, 'mbr_output')
254 | 
255 |         log.critical('Apex module... ')
256 | 
257 |     if 'off' in args.mbr:
258 |         # put everython in mbr_loc
259 |         output_list_loc = []
260 |         if not (args.loc_in is None):
261 |             for item in os.listdir(args.loc_in):
262 |                 # log.critical(item)
263 |                 if os.path.isfile(os.path.join(args.loc_in, item)):
264 |                     if os.path.join(args.loc_in, item).endswith('.' + args.ext):
265 |                         output_list_loc.append(os.path.join(args.loc_in, item))
266 |         else:
267 |             output_list_loc = args.tsv_list
268 | 
269 |     for c, file_name in enumerate(output_list_loc):
270 |         name = os.path.basename(file_name).split('.')[0]
271 |         moff.check_log_existence(os.path.join(
272 |             args.loc_out, name + '__moff.log'))
273 |         fh = logging.FileHandler(os.path.abspath(
274 |             os.path.join(args.loc_out, name + '__moff.log')), mode='a')
275 |         fh.setLevel(logging.DEBUG)
276 | 
277 |         log.addHandler(fh)
278 | 
279 |         log_file = os.path.join(args.loc_out, name + '__moff.log')
280 |         tol = args.tol
281 |         h_rt_w = args.xic_length
282 |         s_w = args.rt_peak_win
283 |         s_w_match = args.rt_peak_win_match
284 | 
285 |         if args.tsv_list is not None:
286 |             # raw_list contains the current raws file provided by args.raw_list option
287 |             raw_list = args.raw_list[c]
288 |         else:
289 |             raw_list = None
290 | 
291 |         loc_raw = args.raw_repo if not None else raw_list
292 |         loc_output = args.loc_out
293 | 
294 | 
295 |         df = pd.read_csv(file_name, sep="\t")
296 |         # add same safety checks len > 1
297 |         # Flag for pride pipeline, or to set from second to minute as input rt time scale
298 |         moff_pride_flag = False
299 |         if moff.check_ps_input_data(df.columns.tolist(), ast.literal_eval(config.get('moFF', 'moffpride_format'))) == 1:
300 |             # if it is a moff_pride data I do not check aany other requirement
301 |             log.critical('moffPride input detected')
302 |             moff_pride_flag = True
303 |         else:
304 |             if not 'matched' in df.columns:
305 |                 # check if it is a PS file ,
306 |                 list_name = df.columns.values.tolist()
307 |                 # get the lists of PS  defaultcolumns from properties file
308 |                 list = ast.literal_eval(config.get(
309 |                     'moFF', 'ps_default_export_v1'))
310 |                 # here it controls if the input file is a PS export; if yes it maps the input in right moFF name
311 |                 if moff.check_ps_input_data(list_name, list) == 1:
312 |                     # map  the columns name according to moFF input requirements
313 |                     if not args.peptide_summary:
314 |                         data_ms2, list_name = moff.map_ps2moff(
315 |                             df, 'col_must_have_apex')
316 |                     else:
317 |                         data_ms2, list_name = moff.map_ps2moff(
318 |                             df, 'col_must_have_mbr')
319 |                 # check if the field names are     good, in case of pep summary we need same req as in  mbr
320 |             if args.peptide_summary:
321 |                 if moff.check_columns_name(df.columns.tolist(),
322 |                                            ast.literal_eval(config.get('moFF', 'col_must_have_mbr')), log) == 1:
323 |                     exit('ERROR minimal field requested are missing or wrong')
324 |             else:
325 |                 if moff.check_columns_name(df.columns.tolist(),
326 |                                            ast.literal_eval(config.get('moFF', 'col_must_have_apex')), log) == 1:
327 |                     exit('ERROR minimal field requested are missing or wrong')
328 | 
329 |         # check if filtering is UP and the input data is not suitable for mbr filtering
330 |         if 'off' in args.mbr and args.match_filter:
331 |             if not 'matched' in df.columns:
332 |                 exit(
333 |                     'mbr peptide not detect in the input file, filtering of mbr peptides is not possible. Please set --match_filter to 0 and run again.')
334 |             if not ('mod_peptide' in df.columns):
335 |                 exit(
336 |                     'mod_peptide sequence is not present your the input file, filtering of mbr peptides is not possible. Please check your infput file or parameter settings')
337 |         log.critical('Starting Apex for %s ...', file_name)
338 |         log.critical('moff Input file: %s  XIC_tol %s XIC_win %4.4f moff_rtWin_peak %4.4f ' % (
339 |             file_name, tol, h_rt_w, s_w))
340 |         if args.raw_list is None:
341 |             log.critical('RAW file from folder :  %s' % loc_raw)
342 |         else:
343 |             log.critical('RAW file  :  %s' % args.raw_list)
344 |         log.critical('Output file in :  %s', loc_output)
345 |         # load the ptm file IF
346 |         # mbr on with filtering  UP
347 |         # mbr off with filtering flag UP (already check if inputdata contains matched field.)
348 |         if 'matched' in df.columns and args.match_filter:
349 |             log.critical('Apex module has detected mbr peptides')
350 |             with open(os.path.join(os.path.dirname(os.path.realpath(sys.argv[0])), args.ptm_file)) as data_file:
351 |                 ptm_map = json.load(data_file)
352 | 
353 |         name = os.path.basename(file_name).split('.')[0]
354 |         #  IF raw_list contains mzML file -->  I m going to  read the file,
355 |         #  one time just to save all the scan  Id and their RT.
356 |         rt_list, id_list = moff.scan_mzml(raw_list)
357 | 
358 |         # control id the folder exist
359 |         moff.check_output_folder_existence(loc_output)
360 | 
361 |         # control if exist the same log file : avoid appending output
362 |         # moff.check_log_existence(os.path.join(loc_output, name + '__moff.log'))
363 | 
364 |         if args.match_filter:
365 |             with open(os.path.join(os.path.dirname(os.path.realpath(sys.argv[0])), args.ptm_file)) as data_file:
366 |                 ptm_map = json.load(data_file)
367 |             start_time = time.time()
368 | 
369 |             moff.set_logger(log_file)
370 |             log.critical('starting estimation of quality measures..')
371 |             # run estimation_parameter
372 |             rt_drift, not_used_measure, error_ratio = moff.estimate_parameter(
373 |                 df, name, raw_list, tol, h_rt_w, s_w, s_w_match, loc_raw, loc_output, rt_list, id_list, moff_pride_flag,
374 |                 ptm_map, args.sample_size, args.quantile_thr_filtering, args.match_filter, log_file, num_CPU)
375 |             log.critical(
376 |                 'quality threhsold estimated : MAD_retetion_time %r  Ratio Int. FakeIsotope/1estIsotope: %r' % (
377 |                 rt_drift, error_ratio))
378 |             log.critical('starting apex quantification of MS2 peptides..')
379 |             log.info('log of MS2 identified peptide not retrived :  ..')
380 |             moff.clean_json_temp_file(loc_output)
381 |             myPool = multiprocessing.Pool(num_CPU)
382 |             data_split = np.array_split(
383 |                 df[df['matched'] == 0], num_CPU)
384 |             result = {}
385 |             offset = 0
386 |             for df_index in range(0, len(data_split)):
387 |                 result[df_index] = myPool.apply_async(moff.apex_multithr, args=(
388 |                 data_split[df_index], name, raw_list, tol, h_rt_w, s_w, s_w_match,
389 |                 loc_raw, loc_output, offset, rt_list, id_list, moff_pride_flag, ptm_map, 0, rt_drift, error_ratio, 0,
390 |                 log_file))
391 |                 offset += len(data_split[df_index])
392 |             # save ms2 resulr
393 |             ms2_data = moff.save_moff_apex_result(result)
394 |             log.critical('end  apex quantification of MS2 peptides..')
395 |             log.critical(
396 |                 'starting quantification with matched peaks using the quality filtering...')
397 |             log.critical('initial # matched peaks: %r',
398 |                          df[df['matched'] == 1].shape)
399 |             moff.clean_json_temp_file(loc_output)
400 |             log.info('Log Matched Peptides filtered :')
401 |             data_split = np.array_split(
402 |                 df[df['matched'] == 1], num_CPU)
403 |             result = {}
404 |             offset = 0
405 |             for df_index in range(0, len(data_split)):
406 |                 result[df_index] = myPool.apply_async(moff.apex_multithr, args=(
407 |                 data_split[df_index], name, raw_list, tol, h_rt_w, s_w, s_w_match,
408 |                 loc_raw, loc_output, offset, rt_list, id_list, moff_pride_flag, ptm_map, 0, rt_drift, error_ratio,
409 |                 args.match_filter, log_file))
410 |                 offset += len(data_split[df_index])
411 |             myPool.close()
412 |             myPool.join()
413 |             log.critical('end apex quantification matched peptide ')
414 |             log.critical('Computational time (sec):  %4.4f ' %
415 |                          (time.time() - start_time))
416 |             matched_peak = moff.save_moff_apex_result(result)
417 |             log.critical('after filtering matched peak #%r ',
418 |                          matched_peak.shape[0])
419 |             # concat the ms2 res  + mateched result
420 |             final_res = pd.concat([ms2_data, matched_peak])
421 |             # save result
422 |             final_res.to_csv(os.path.join(loc_output, os.path.basename(
423 |                 name).split('.')[0] + "_moff_result.txt"), sep="\t", index=False)
424 |             moff.clean_json_temp_file(loc_output)
425 |         else:
426 |             moff.set_logger(log_file)
427 |             log.critical(
428 |                 'starting  peptide quantification (ms2 / matched ) ..')
429 |             myPool = multiprocessing.Pool(num_CPU)
430 |             data_split = np.array_split(df, num_CPU)
431 |             result = {}
432 |             offset = 0
433 |             log.info('log of MS2 identified peptide not retrived ')
434 |             start_time = time.time()
435 |             for df_index in range(0, len(data_split)):
436 |                 result[df_index] = myPool.apply_async(moff.apex_multithr,
437 |                                                       args=(data_split[df_index], name, raw_list, tol, h_rt_w,
438 |                                                             s_w, s_w_match, loc_raw, loc_output, offset, rt_list,
439 |                                                             id_list, moff_pride_flag, None, 0, -1, -1, 0, log_file))
440 |                 offset += len(data_split[df_index])
441 |             myPool.close()
442 |             myPool.join()
443 |             log.critical('end apex quantification (ms2 / matched ) peptides')
444 |             log.critical('computational time (sec):  %4.4f ' %
445 |                          (time.time() - start_time))
446 |             start_time_2 = time.time()
447 |             result = moff.save_moff_apex_result(result)
448 |             result.to_csv(os.path.join(loc_output, os.path.basename(name).split(
449 |                 '.')[0] + "_moff_result.txt"), sep="\t", index=False)
450 |             moff.clean_json_temp_file(loc_output)
451 | 
452 |         fh.close()
453 |         log.removeHandler(fh)
454 |         moff.detach_handler()
455 | 
456 |     moff.clean_json_temp_file(loc_output)
457 |     if args.peptide_summary:
458 |         state = moff.compute_peptide_matrix(args.loc_out, log, args.tag_pepsum)
459 |         if not state:
460 |             log.critical(
461 |                 'Error during the computation of the peptide intensity summary file: Check the output folder that contains the moFF results file')
462 | 


--------------------------------------------------------------------------------
/moff_enviroment.yml:
--------------------------------------------------------------------------------
 1 | name: moff_env
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 |   - defaults
 6 | dependencies:
 7 | 
 8 |   - brain-isotopic-distribution=1.5.3=py36_0
 9 |   - python=3.6.13
10 |   - mono=6.12.0.90
11 | 
12 |   - numpy=1.19.5
13 | 
14 |   - pandas=1.1.5
15 | 
16 |   - pymzml=2.4.7=py_0
17 |   - pynumpress=0.0.5
18 |   - pyteomics=4.4.2
19 |   - python=3.6.13
20 |   - scikit-learn=0.24.1
21 |   - scipy=1.5.3
22 |   - simplejson=3.17.2
23 | 
24 | prefix: /home/andrea/anaconda3/envs/moff_env
25 | 


--------------------------------------------------------------------------------
/moff_mbr.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import ast
  4 | import configparser
  5 | import copy
  6 | import itertools
  7 | import logging
  8 | import os
  9 | import re
 10 | import sys
 11 | from functools import reduce
 12 | 
 13 | import numpy as np
 14 | import pandas as pd
 15 | from sklearn import linear_model
 16 | from sklearn.metrics import mean_absolute_error
 17 | 
 18 | import moff
 19 | 
 20 | """moFF: matching between runs module """
 21 | 
 22 | # debug
 23 | 
 24 | log = logging.getLogger(__name__)
 25 | log.setLevel(logging.DEBUG)
 26 | 
 27 | 
 28 | # filtering _outlier
 29 | def MahalanobisDist(x, y):
 30 |     """
 31 |     Computee the Mahalanobis distance to filter outlier in the RT allignment
 32 |     :param x:
 33 |     :param y:
 34 |     :return:
 35 |     """
 36 |     covariance_xy = np.cov(x, y, rowvar=0)
 37 |     inv_covariance_xy = np.linalg.inv(covariance_xy)
 38 |     xy_mean = np.mean(x), np.mean(y)
 39 |     x_diff = np.array([x_i - xy_mean[0] for x_i in x])
 40 |     y_diff = np.array([y_i - xy_mean[1] for y_i in y])
 41 |     diff_xy = np.transpose([x_diff, y_diff])
 42 |     md = []
 43 |     for i in range(len(diff_xy)):
 44 |         md.append(np.sqrt(
 45 |             np.dot(np.dot(np.transpose(diff_xy[i]), inv_covariance_xy), diff_xy[i])))
 46 |     return md
 47 | 
 48 | 
 49 | # remove outlier
 50 | def MD_removeOutliers(x, y, width):
 51 |     """
 52 |     Remove outliers point using MahalanobisDist function
 53 |     :param x:
 54 |     :param y:
 55 |     :param width:
 56 |     :return:
 57 |     """
 58 |     MD = MahalanobisDist(x, y)
 59 |     threshold = np.mean(MD) * float(width)  # adjust 1.5 accordingly
 60 |     nx, ny, outliers = [], [], []
 61 |     for i in range(len(MD)):
 62 |         if MD[i] <= threshold:
 63 |             nx.append(x[i])
 64 |             ny.append(y[i])
 65 |         else:
 66 |             outliers.append(i)  # position of removed pair
 67 |     return np.array(nx), np.array(ny), np.array(outliers)
 68 | 
 69 | 
 70 | # combination of rt predicted by each single model
 71 | def combine_model(x, model, err, weight_flag):
 72 |     x = x.values
 73 |     tot_err = np.sum(np.array(err)[np.where(~np.isnan(x))])
 74 | 
 75 |     app_sum = 0
 76 |     app_sum_2 = 0
 77 |     for ii in range(0, len(x)):
 78 |         if ~  np.isnan(x[ii]):
 79 |             if not weight_flag :
 80 |                 app_sum = app_sum + (model[ii].predict(x[ii].reshape(-1,1))[0][0])
 81 |             else:
 82 |                 app_sum_2 = app_sum_2 + \
 83 |                     (model[ii].predict(x[ii].reshape(-1,1))[0][0] *
 84 |                      (float(err[ii]) / float(tot_err)))
 85 | 
 86 |                 # " output weighted mean
 87 |     if not  weight_flag :
 88 |         # not weight outpuy
 89 |         return float(app_sum) / float(np.where(~ np.isnan(x))[0].shape[0])
 90 | 
 91 |     else:
 92 |         # output weight
 93 |         return float(app_sum_2)
 94 | 
 95 | 
 96 | '''
 97 | def train_gp(data_A,data_B,c=None):
 98 |     """
 99 |     Using GP for retention time alligment 
100 |     """
101 |     bins = np.linspace(data_B.min()-2, data_B.max()+1,20)
102 |     digitized = np.digitize(data_B, bins)
103 |     size_bin = [  digitized[digitized == i].shape[0]  for i in range(1, len(bins))]
104 |     tt_x = np.concatenate([random.sample(np.where(digitized == i)[0], 20) for i in range(1, len(bins))])
105 |     test =  np.setdiff1d(range(data_A.shape[0]),tt_x)
106 |     #ff = ym_test_predicted[:, 0] - np.sqrt(np.diag(y_cov))
107 |     #dd = ym_test_predicted[:, 0] + np.sqrt(np.diag(y_cov))
108 |     # plt.fill_between(data_B[test,0], ff  , dd , alpha=0.5,color='k')
109 |     #size_train= int (data_A.shape[0] * 0.10)
110 |     #rows = random.sample(range(data_A.shape[0]),size_train)
111 |     #data_A= data_A[rows,:]
112 |     #data_B= data_B[rows,:]
113 |     # data_B is x
114 |     # data_A if Y
115 |     #kern = GPy.kern.Linear(1)
116 |     #kernel = 3 * DotProduct() + WhiteKernel(noise_level=0.1,noise_level_bounds=(1e-3, 1e-0))
117 | 
118 |     kernel = 0.5 * RBF(length_scale=1, length_scale_bounds=(1e-2, 1e3)) + WhiteKernel(noise_level=0.3,noise_level_bounds=(1e-3, 2e-0))
119 |     m = GaussianProcessRegressor(kernel=kernel, alpha=0.1, normalize_y=False, n_restarts_optimizer=1).fit(data_B[tt_x], data_A[tt_x])
120 | 
121 |     ym_train_predicted, y_cov_train = m.predict(data_B[tt_x], return_std=False, return_cov=True)
122 |     ym_test_predicted, y_cov_test = m.predict(data_B[test], return_std=False, return_cov=True)
123 | 
124 |     #ff =  np.sqrt(np.diag(y_cov_test))
125 |     #dd = np.sqrt(np.diag(y_cov_test))
126 | 
127 |     ff = ym_test_predicted[:, 0] - (1.96 * np.sqrt(np.diag(y_cov_test)))
128 |     dd = ym_test_predicted[:, 0] + ( 1.96 * np.sqrt(np.diag(y_cov_test)) )
129 | 
130 |     ##printing modell
131 |     plt.figure(figsize=(15, 14), dpi=100)
132 |     #plt.scatter(data_B[test],ym_test_predicted,marker='*',c='red',s=15 )
133 | 
134 |     plt.fill_between(data_B[test, 0], ff, dd, alpha=0.9, color='r')
135 |     plt.scatter(data_B[test],data_A[test],marker='<',c='black',s=25,label='True RT' )
136 |     plt.scatter(data_B[test], ym_test_predicted, marker='*', c='blue', s=25,label='predicted RT')
137 |     plt.legend(loc="best", scatterpoints=1, prop={'size': 18})
138 | 
139 |     plt.title('GP with Rbf_+ whiteNoise on test set: ' + c )
140 |     plt.savefig( 'D:\\workspace\\ionstar_dataset\\mbr_output\\' + c + '__model.png' )
141 | 
142 |     log.critical(' Size error training : %i sec ', tt_x.shape[0])
143 |     log.critical(' Mean absolute error training : %4.4f sec',mean_absolute_error(data_A[tt_x], ym_train_predicted))
144 |     log.critical(' Mean absolute error test_set (not sampled point) : %4.4f sec',mean_absolute_error(data_A[test], ym_test_predicted))
145 | 
146 |     return m, ym_train_predicted,mean_absolute_error(data_A[tt_x], ym_train_predicted)
147 | 
148 | 
149 | 
150 | def combine_model_GP(x, model, err, weight_flag):
151 |     """
152 |     Combination of GP model 
153 |     """
154 |     ra_flag= 0
155 | 
156 | 
157 |     #tot_err =  1- ( (np.array(err)[np.where(~np.isnan(x))]) / np.max(np.array(err)[np.where(~np.isnan(x))]))
158 |     tot_err = np.sum(np.array(err)[np.where(~pd.isnull(x))])
159 |     #print tot_err
160 |     #print x
161 |     app_sum = 0
162 |     app_sum_2 = 0
163 |     app_var =0
164 |     for ii in range(0, len(x)):
165 | 
166 | 
167 |         if ~  np.isnan(x[ii]):
168 |             #sklearn
169 |             pred, y_cov = model[ii].predict(x[ii].reshape(1, 1), return_std=False, return_cov=True)
170 |             var = np.sqrt(np.diag(y_cov))
171 |             #pred, var = model[ii].predict(x[ii].reshape(1, 1), include_likelihood=True)
172 |             #ress = model[ii].predict_quantiles(x[ii].reshape(1, 1))
173 | 
174 |             #print ' %i Input Rt  %4.4f  Predicted: %4.4f Var %4.4f  Interval at 95 %4.4f <--> %4.4f  ' % (
175 |             #ii, x[ii], float(pred), float(var), pred - (1.965 * var) ,  pred + (1.965 * var)  )
176 |             #print 'intervall width length %4.4f' % abs((pred - (1.965 * var)) - (pred + (1.965 * var)))
177 | 
178 | 
179 |             if not weight_flag :
180 |                 app_sum = app_sum + (pred)
181 |                 app_var += var
182 |             else:
183 |                 # print ii,model[ii].predict(x[ii])[0][0]
184 |                 w = (float(err[ii]) / float(tot_err))
185 |                 # w= tot_err[ii]
186 |                 # print ii ,'weighted', (model[ii].predict(x[ii])[0][0] * w ),w
187 |                 app_sum_2 = app_sum_2 + (pred * w)
188 |                 app_var += var
189 |     # " output weighted mean
190 | 
191 |     if  float(np.where(~ np.isnan(x))[0].shape[0]) == 0.0 :
192 |         return pd.Series({'time_pred': -1 , 'uncertainty_win': -1})
193 | 
194 |     if  weight_flag :
195 |         f_p = app_sum_2
196 |         mean_var = float(app_var) / float(np.where(~ np.isnan(x))[0].shape[0])
197 |     else:
198 |         mean_var = float(app_var) / float(np.where(~ np.isnan(x))[0].shape[0])
199 |         f_p = float(app_sum) / float(np.where(~ pd.isnull(x))[0].shape[0])
200 | 
201 |     log.critical( '   -->  Final Aggr Predicted: %4.4f Var %4.4f  Interval at 95 %4.4f <--> %4.4f  ' % (  f_p , float(mean_var), f_p - (1.965 * mean_var), f_p + (1.965 * mean_var)))
202 |     log.critical ('   -->  Final intervall width length  %4.4f  |  #_inputs %i' %  (abs(    (f_p - (1.965 * mean_var)) - (f_p + (1.965 * mean_var)) ),int(np.where(~ np.isnan(x))[0].shape[0])) )
203 | 
204 |     return pd.Series({'time_pred': f_p, 'uncertainty_win': 1.965 * mean_var})
205 | 
206 | '''
207 | 
208 | # run the mbr in moFF : input  ms2 identified peptide   output csv file with the matched peptides added
209 | 
210 | 
211 | def run_mbr(args):
212 |     """
213 |     Macthing Between Run module.
214 |     :param args:
215 |     :return:
216 |     """
217 |     ch = logging.StreamHandler()
218 |     ch.setLevel(logging.ERROR)
219 |     log.addHandler(ch)
220 | 
221 |     if args.loc_in is None:
222 |         # the user uses --inputtsv option
223 |         if not (args.loc_out is None):
224 |             # if the user use --output_folder the mbr folder will be created there
225 |             output_dir = os.path.join(args.loc_out, 'mbr_output')
226 |         else:
227 |             # if the user does not use  --output_folder the mbr folder will be created on moFF path location
228 |             output_dir = os.path.join('mbr_output')
229 |             print(os.path.abspath(output_dir))
230 | 
231 |     else:
232 |         # the user use the --inputF option
233 |         if os.path.exists(os.path.join(args.loc_in)):
234 |             # if '/' in  str(args.loc_in):
235 |             output_dir = os.path.join(args.loc_in, 'mbr_output')
236 |         else:
237 |             exit(os.path.join(args.loc_in) +
238 |                  ' EXIT input folder path is not well specified --> / missing or wrong path')
239 | 
240 |             # if not (os.path.isdir(args.loc_in)):
241 |             #   exit(str(args.loc_in) + '-->  input folder does not exist ! ')
242 | 
243 |             # if str(args.loc_in) == '':
244 |             #    output_dir = 'mbr_output'
245 |             # else:
246 |             #    if os.path.exists(os.path.join(args.loc_in)):
247 |             # if '/' in  str(args.loc_in):
248 |     # output_dir = os.path.join(args.loc_in, 'mbr_output')
249 |     #    else:
250 |     #        exit(os.path.join(args.loc_in) + ' EXIT input folder path not well specified --> / missing ')
251 | 
252 |     if not (os.path.isdir(output_dir)):
253 | 
254 |         log.critical("Created MBR output folder in : %s ",
255 |                      os.path.abspath(output_dir))
256 |         os.makedirs(output_dir)
257 |     else:
258 |         log.critical("MBR Output folder in : %s ", os.path.abspath(output_dir))
259 |     # set log to file
260 |     w_mbr = logging.FileHandler(os.path.join(
261 |         output_dir, args.log_label + '_mbr_.log'), mode='w')
262 |     w_mbr.setLevel(logging.INFO)
263 |     log.addHandler(w_mbr)
264 | 
265 |     moff_path = os.path.dirname(os.path.realpath(sys.argv[0]))
266 |     config = configparser.RawConfigParser()
267 |     config.read(os.path.join(moff_path, 'moff_setting.properties'))
268 | 
269 |     # it s always placed in same folder of moff_mbr.py
270 |     # read input
271 |     # comment better
272 |     # name of the input file
273 |     exp_set = []
274 |     # list of the input dataframe
275 |     exp_t = []
276 |     # list of the output dataframe
277 |     exp_out = []
278 |     # lsit of input datafra used as help
279 |     exp_subset = []
280 |     # list of the name of the mbr output
281 |     exp_out_name = []
282 | 
283 |     if args.loc_in is None:
284 |         for id_name in args.tsv_list:
285 |             exp_set.append(id_name)
286 |     else:
287 |         for item in os.listdir(args.loc_in):
288 | 
289 |             if os.path.isfile(os.path.join(args.loc_in, item)):
290 |                 if os.path.join(args.loc_in, item).endswith('.' + args.ext):
291 |                     log.critical(item)
292 |                     exp_set.append(os.path.join(args.loc_in, item))
293 | 
294 |                 # sample optiion is valid only if  folder iin option is valid
295 |     if (args.sample is not None) and (args.loc_in is not None):
296 |         exp_set_app = copy.deepcopy(exp_set)
297 |         for a in exp_set:
298 |             if re.search(args.sample, a) is None:
299 |                 exp_set_app.remove(a)
300 |         exp_set = exp_set_app
301 | 
302 |     if (exp_set == []) or (len(exp_set) == 1):
303 |         exit(
304 |             'ERROR input files not found or just one input file selected . check the folder or the extension given in input')
305 | 
306 |     for a in exp_set:
307 |         log.critical('Reading file: %s ', a)
308 |         exp_subset.append(a)
309 |         data_moff = pd.read_csv(a, sep="\t", header=0)
310 |         list_name = data_moff.columns.values.tolist()
311 |         # get the lists of PS  defaultcolumns from properties file
312 |         list_ps_def = ast.literal_eval(
313 |             config.get('moFF', 'ps_default_export_v1'))
314 |         # here it controls if the input file is a PS export; if yes it maps the input in right moFF name
315 |         if moff.check_ps_input_data(list_name, list_ps_def) == 1:
316 |             log.critical(
317 |                 'Detected input file from PeptideShaker export..: %s ', a)
318 |             # map  the columns name according to moFF input requirements
319 |             data_moff, list_name = moff.map_ps2moff(
320 |                 data_moff, 'col_must_have_mbr')
321 |             log.critical(
322 |                 'Mapping columns names into the  the moFF requested column name..: %s ', a)
323 |             # print data_moff.columns
324 |         if moff.check_columns_name(list_name, ast.literal_eval(config.get('moFF', 'col_must_have_mbr')), log) == 1:
325 |             exit('ERROR minimal field requested are missing or wrong')
326 |         data_moff['matched'] = 0
327 |         data_moff['mass'] = data_moff['mass'].map('{:.4f}'.format)
328 | 
329 |         data_moff['code_unique'] = data_moff['mod_peptide'].astype(
330 |             str)  # + '_' + data_moff['mass'].astype(str)
331 |         data_moff = data_moff.sort_values(by='rt')
332 |         exp_t.append(data_moff)
333 |         exp_out.append(data_moff)
334 | 
335 |     log.critical('Read input --> done ')
336 |     # parameter of the number of query
337 |     # set a list of filed mandatory
338 |     # ['matched','peptide','mass','mz','charge','prot','rt']
339 |     n_replicates = len(exp_t)
340 |     exp_set = exp_subset
341 |     aa = range(0, n_replicates)
342 |     out = list(itertools.product(aa, repeat=2))
343 |     # just to save all the model
344 |     # add matched columns
345 |     list_name.append('matched')
346 |     # final status -1 if one of the output is empty
347 |     out_flag = 1
348 |     # input of the methods
349 |     diff_field = np.setdiff1d(exp_t[0].columns, [
350 |         'matched', 'mod_peptide', 'peptide', 'mass', 'mz', 'charge', 'prot', 'rt'])
351 | 
352 |     log.info('Outlier Filtering is %s  ', 'active' if
353 |         args.out_flag else 'not active')
354 |     log.info('Number of replicates %i,', n_replicates)
355 |     log.info('Pairwise model computation ----')
356 | 
357 |     if args.rt_feat_file is not None:
358 |         log.critical(
359 |             'Custom list of peptide used  provided by the user in %s', args.rt_feat_file)
360 |         # log.info('Custom list of peptide used  provided by the user in %s', args.rt_feat_file)
361 |         shared_pep_list = pd.read_csv(args.rt_feat_file, sep='\t')
362 |         shared_pep_list['mass'] = shared_pep_list['mass'].map('{:.4f}'.format)
363 |         shared_pep_list['code'] = shared_pep_list['peptide'].astype(
364 |             str) + '_' + shared_pep_list['mass'].astype(str)
365 |         list_shared_pep = shared_pep_list['code']
366 |         log.info('Custom list of peptide contains  %i ',
367 |                  list_shared_pep.shape[0])
368 | 
369 |     for jj in aa:
370 |         # list of the model saved
371 |         model_save = []
372 |         # list of the error in min/or sec
373 |         model_err = []
374 |         # list of the status of the model -1 means model not available for low points in the training set
375 |         model_status = []
376 |         c_rt = 0
377 |         pre_pep_save = []
378 |         log.info('matching  in %s', exp_set[jj])
379 |         result = itertools.filterfalse(lambda x: x[0] != jj or x[1] == jj, out)
380 |         for i in result:
381 |             #if i[0] == jj and i[1] != jj:
382 |             if args.rt_feat_file is not None:
383 |                 # use of custom peptide
384 |                 comA = exp_t[i[0]][exp_t[i[0]]['code_unique'].isin(list_shared_pep)][
385 |                     ['code_unique', 'peptide', 'prot', 'rt']]
386 |                 comB = exp_t[i[1]][exp_t[i[1]]['code_unique'].isin(list_shared_pep)][
387 |                     ['code_unique', 'peptide', 'prot', 'rt']]
388 |                 comA = comA.groupby('code_unique', as_index=False).mean()
389 |                 comB = comB.groupby('code_unique', as_index=False).mean()
390 |                 common = pd.merge(
391 |                     comA, comB, on=['code_unique'], how='inner')
392 |             else:
393 |                 # use of shared peptdes.
394 |                 log.info('  Matching  %s peptide in   searching in %s ',
395 |                          exp_set[i[0]], exp_set[i[1]])
396 |                 list_pep_repA = exp_t[i[0]]['code_unique'].unique()
397 |                 list_pep_repB = exp_t[i[1]]['code_unique'].unique()
398 |                 log.info('Peptide unique (mass + sequence) %i , %i ',
399 |                          list_pep_repA.shape[0],
400 |                          list_pep_repB.shape[0])
401 |                 set_dif_s_in_1 = np.setdiff1d(list_pep_repB, list_pep_repA)
402 |                 add_pep_frame = exp_t[i[1]][exp_t[i[1]]['code_unique'].isin(
403 |                     set_dif_s_in_1)].copy()
404 |                 #-- prepare the testing set
405 |                 add_pep_frame = add_pep_frame[[
406 |                     'peptide', 'mod_peptide', 'mass', 'mz', 'charge', 'prot', 'rt']]
407 |                 # add_pep_frame['code_unique'] = '_'.join([add_pep_frame['peptide'], add_pep_frame['prot'], add_pep_frame['mass'].astype(str), add_pep_frame['charge'].astype(str)])
408 |                 add_pep_frame['code_unique'] = add_pep_frame['mod_peptide'] + '_' + \
409 |                                                add_pep_frame['prot'] + '_' + '_' + \
410 |                                                add_pep_frame['charge'].astype(str)
411 |                 add_pep_frame = add_pep_frame.groupby('code_unique', as_index=False)[[
412 |                     'peptide', 'mod_peptide', 'mass', 'charge', 'mz', 'prot', 'rt']].aggregate(max)
413 |                 add_pep_frame = add_pep_frame[[
414 |                     'code_unique', 'peptide', 'mod_peptide', 'mass', 'mz', 'charge', 'prot', 'rt']]
415 |                 list_name = add_pep_frame.columns.tolist()
416 |                 list_name = [w.replace('rt', 'rt_' + str(c_rt))
417 |                              for w in list_name]
418 |                 add_pep_frame.columns = list_name
419 |                 pre_pep_save.append(add_pep_frame)
420 |                 c_rt += 1
421 |                 #--------
422 |                 pep_shared = np.intersect1d(list_pep_repA, list_pep_repB)
423 |                 log.info(
424 |                     '  Peptide (mass + sequence)  added size  %i ', add_pep_frame.shape[0])
425 |                 log.info('  Peptide (mass + sequence) )shared  %i ',
426 |                          pep_shared.shape[0])
427 |                 comA = exp_t[i[0]][exp_t[i[0]]['code_unique'].isin(pep_shared)][
428 |                     ['code_unique', 'peptide', 'prot', 'rt']]
429 |                 comB = exp_t[i[1]][exp_t[i[1]]['code_unique'].isin(pep_shared)][
430 |                     ['code_unique', 'peptide', 'prot', 'rt']]
431 |                 # filtering using the variance added 17_08
432 |                 flag_var_filt = False
433 |                 if flag_var_filt:
434 |                     dd = comA.groupby('code_unique', as_index=False)
435 |                     top_res = dd.agg(['std', 'mean', 'count'])
436 |                     # print np.nanpercentile(top_res['rt']['std'].values,[5,10,20,30,50,60,80,90,95,97,99,100])
437 |                     th = np.nanpercentile(top_res['rt']['std'].values, 60)
438 |                     comA = comA[~ comA['code_unique'].isin(
439 |                         top_res[top_res['rt']['std'] > th].index)]
440 |                     # data B '
441 |                     dd = comB.groupby('code_unique', as_index=False)
442 | 
443 |                     top_res = dd.agg(['std', 'mean', 'count'])
444 |                     # print comB.shape
445 |                     # print np.nanpercentile(top_res['rt']['std'].values,[5,10,20,30,50,60,80,90,95,97,99,100])
446 |                     th = np.nanpercentile(top_res['rt']['std'].values, 60)
447 |                     comB = comB[~ comB['code_unique'].isin(
448 |                         top_res[top_res['rt']['std'] > th].index)]
449 | 
450 |                 comA = comA.groupby('code_unique', as_index=False).mean()
451 |                 comB = comB.groupby('code_unique', as_index=False).mean()
452 |                 common = pd.merge(
453 |                     comA, comB, on=['code_unique'], how='inner')
454 |             if common.shape[0] <= 10 and args.rt_feat_file is not None:
455 |                 model_status.append(-1)
456 |                 continue
457 |             # filtering outlier option
458 |             else:
459 |                 if args.out_flag :
460 |                     filt_x, filt_y, pos_out = MD_removeOutliers(common['rt_y'].values, common['rt_x'].values,
461 |                                                                 args.w_filt)
462 |                     data_B = filt_x
463 |                     data_A = filt_y
464 |                     data_B = np.reshape(data_B, [filt_x.shape[0], 1])
465 |                     data_A = np.reshape(data_A, [filt_y.shape[0], 1])
466 |                     log.info('Outlier founded %i  w.r.t %i',
467 |                              pos_out.shape[0], common['rt_y'].shape[0])
468 |                 else:
469 |                     data_B = common['rt_y'].values
470 |                     data_A = common['rt_x'].values
471 |                     data_B = np.reshape(data_B, [common.shape[0], 1])
472 |                     data_A = np.reshape(data_A, [common.shape[0], 1])
473 | 
474 |                 log.info(' Size trainig shared peptide , %i %i ',
475 |                          data_A.shape[0], data_B.shape[0])
476 |                 clf = linear_model.RidgeCV(alphas=np.power(
477 |                     2, np.linspace(-30, 30)), scoring='neg_mean_absolute_error')
478 |                 clf.fit(data_B, data_A)
479 |                 clf_final = linear_model.Ridge(alpha=clf.alpha_)
480 |                 clf_final.fit(data_B, data_A)
481 |                 # save the model
482 |                 model_save.append(clf_final)
483 |                 model_err.append(mean_absolute_error(
484 |                     data_A, clf_final.predict(data_B)))
485 |                 log.info(' Mean absolute error training : %4.4f sec',
486 |                          mean_absolute_error(data_A, clf_final.predict(data_B)))
487 |                 model_status.append(1)
488 |                 '''
489 |                 # GP version
490 |                 model_gp, predicted_train, error = train_gp(data_A, data_B,c= str(i[0])+'_'+str(i[1]))
491 |                 #print i[1], comA.shape, error
492 | 
493 |                 model_err.append(error)
494 |                 model_save.append(model_gp)
495 |                 model_status.append(1)
496 |                     '''
497 |         if np.where(np.array(model_status) == -1)[0].shape[0] >= (len(aa) / 2):
498 |             log.error(
499 |                 'MBR aborted :  mbr cannnot be run, not enough shared pepetide among the replicates ')
500 |             exit('ERROR : mbr cannnot be run, not enough shared pepetide among the replicates')
501 | 
502 |         log.info('Combination of the  model  --------')
503 |         log.info('Weighted combination  %s : ', 'Weighted' if
504 |         args.w_comb else 'Unweighted')
505 |         if n_replicates == 2:
506 |             test = pre_pep_save[0]
507 |         else:
508 |             test = reduce(
509 |                 lambda left, right: pd.merge(left, right, on=[
510 |                                              'code_unique', 'peptide', 'mod_peptide', 'mass', 'mz', 'charge', 'prot'], how='outer'),
511 |                 pre_pep_save)
512 |         test = test.groupby('code_unique', as_index=False).aggregate(max)
513 |         test.drop('code_unique', axis=1, inplace=True)
514 |         test['time_pred'] = test.iloc[:, 6: (6 + (n_replicates - 1))].apply(
515 |             lambda x: combine_model(x, model_save, model_err, args.w_comb),axis=1)
516 |         #test['time_pred'] = test.iloc[:, 6: (6 + (n_replicates - 1))].apply(
517 |         #    lambda x: combine_model(x, model_save[(jj * (n_replicates - 1)):((jj + 1) * (n_replicates - 1))],
518 |         #                            model_err[(jj * (n_replicates - 1)):((jj + 1) * (n_replicates - 1))], args.w_comb),
519 |         #    axis=1)
520 |         test['matched'] = 1
521 | 
522 |         # still to check better
523 |         if test[test['time_pred'] <= 0].shape[0] >= 1:
524 |             log.info(' -- Predicted negative RT: those peptide will be deleted')
525 |             test = test[test['time_pred'] > 0]
526 | 
527 |         list_name = test.columns.tolist()
528 |         list_name = [w.replace('time_pred', 'rt') for w in list_name]
529 |         test.columns = list_name
530 | 
531 |         # test = test[['peptide','mod_peptide', 'mass', 'mz', 'charge',
532 |         # 'prot', 'rt', 'matched','uncertainty_win']]
533 |         test = test[['peptide', 'mod_peptide', 'mass',
534 |                      'mz', 'charge', 'prot', 'rt', 'matched']]
535 |         # just put nan with the missing values
536 |         for field in diff_field.tolist():
537 |             test[field] = np.nan
538 |         log.info('Before adding %s contains %i ',
539 |                  exp_set[jj], exp_t[jj].shape[0])
540 |         exp_out[jj] = pd.concat(
541 |             [exp_t[jj], test], join='outer', axis=0, sort=False)
542 |         log.info('After MBR %s contains:  %i  peptides',
543 |                  exp_set[jj], exp_out[jj].shape[0])
544 |         log.critical('matched features   %i  MS2 features  %i ', exp_out[jj][exp_out[jj]['matched'] == 1].shape[0],
545 |                      exp_out[jj][exp_out[jj]['matched'] == 0].shape[0])
546 |         exp_out[jj].to_csv(
547 |             path_or_buf=os.path.join(output_dir, os.path.split(exp_set[jj])[1].split('.')[0] + '_match.txt'), sep='\t',
548 |             index=False)
549 |         exp_out_name.append(os.path.join(output_dir, os.path.split(
550 |             exp_set[jj])[1].split('.')[0] + '_match.txt'))
551 |         if exp_out[jj].shape[0] > 0:
552 |             out_flag = 1 * out_flag
553 |         else:
554 |             out_flag = -1 * out_flag
555 | 
556 | 
557 |     w_mbr.close()
558 |     log.removeHandler(w_mbr)
559 |     return out_flag, exp_out_name
560 | 
561 | 


--------------------------------------------------------------------------------
/moff_setting.properties:
--------------------------------------------------------------------------------
 1 | [moFF]
 2 | 
 3 | indicator_ptm="("
 4 | separator = "/t"
 5 | col_must_have_moffpride= ['rt','mz','charge']
 6 | col_must_have_apex= ['peptide','prot','rt','mz','mass','charge']
 7 | col_must_have_mbr= ['peptide','mod_peptide','prot','rt','mz','mass','charge']
 8 | moffpride_format= ['charge','#spectraindex','rt','mz','scan']
 9 | ps_default_export_v1=['Unnamed: 0', 'Protein(s)','Sequence', 'AAs Before', 'AAs After','Position', u'Modified Sequence', 'Variable Modifications','Fixed Modifications', 'Spectrum File', 'Spectrum Title','Spectrum Scan Number', 'RT', 'm/z', 'Measured Charge','Identification Charge', 'Theoretical Mass', 'Isotope Number', 'Precursor m/z Error [ppm]', 'Localization Confidence','Probabilistic PTM score', 'D-score', 'Confidence [%]','Validation']
10 | ps_default_export= ['Unnamed: 0','Protein(s)','Sequence','Variable Modifications','Fixed Modifications','Spectrum File','Spectrum Title','Spectrum Scan Number','RT','m/z','Measured Charge','Identification Charge','Theoretical Mass','Isotope Number','Precursor m/z Error [ppm]','Localization Confidence','Probabilistic PTM score','D-score','Confidence [%]','Validation']
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/ptm_setting_mq.json:
--------------------------------------------------------------------------------
1 | {
2 | "cC": {"deltaChem":[3,2,1,1],"desc":"Carboxyamidomethylation C unimod 4"},
3 | "(ox)": {"deltaChem":[0,0,0,1],"desc":"oxidation unimod 35" },
4 | "(ac)": {"deltaChem":[2,2,0,1],"desc":"Acetylation unimod:1" },
5 | "(gl)": {"deltaChem":[-3,0,-1,0],"desc":"Pyro-glu from Q unimod:28"}}
6 | 


--------------------------------------------------------------------------------
/ptm_setting_ps.json:
--------------------------------------------------------------------------------
1 | {
2 | "<cmm>": {"deltaChem":[3,2,1,1],"desc":"Carboxyamidomethylation C  unimod:4"},
3 | "<ox>": {"deltaChem":[0,0,0,1],"desc":"oxidation oxidation unimod:35" } ,
4 | "ace-":  {"deltaChem":[2,2,0,1],"desc":"Acetylation N-term unimod:1" },
5 | "pyro-": {"deltaChem":[-3,0,-1,0],"desc":"Pyro-glu from Q unimod:28" }}
6 | 


--------------------------------------------------------------------------------
/requirements/development.txt:
--------------------------------------------------------------------------------
 1 | flake8
 2 | mono=6.12.0.90
 3 | pymzml=2.4.7=py_0
 4 | pynumpress=0.0.5
 5 | pyteomics=4.4.2
 6 | numpy=1.19.5
 7 | simplejson=3.17.2
 8 | pyteomics=4.4.2
 9 | scipy=1.5.3
10 | scikit-learn=0.24.1
11 | brain-isotopic-distribution=1.5.3=py36_0


--------------------------------------------------------------------------------
/test/B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol.txt:
--------------------------------------------------------------------------------
  1 | peptide	mod_peptide	prot	Type	Raw file	Experiment	mz	charge	another m/z	mass	rt	PEP	Reverse
  2 | TCVADESHAGCEK	_TCVADESHAGCEK_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	488.534423828125	3	488.534502	1462.58168	1207.74	0.007303100000000001	
  3 | TCVADESHAGCEK	_TCVADESHAGCEK_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	732.2982788085941	2	732.298115	1462.58168	1207.98	0.000528	
  4 | LGGNEQVTR	_LGGNEQVTR_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	487.256774902344	2	487.25670499999995	972.498858	1297.26	3.4299e-12	
  5 | YVLEHHPR	_YVLEHHPR_	P00560	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	526.2781982421881	2	525.777608	1049.54066	1299.12	0.0085868	
  6 | QNCDQFEK	_QNCDQFEK_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	534.7244873046881	2	534.724381	1067.43421	1308.7800000000002	0.003908099999999999	
  7 | SHCIAEVEK	_SHCIAEVEK_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	536.758117675781	2	536.758223	1071.50189	1331.16	0.001059	
  8 | LQQLEDK	_LQQLEDK_	CON__P34955	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	437.237396240234	2	437.23745	872.4603470000001	1366.44	7.9653e-05	
  9 | ALGGEDVR	_ALGGEDVR_	CON__P12763	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	408.714263916016	2	408.714142	815.413731	1373.4599999999998	0.0069494	
 10 | GAGSSEPVTGLDAK	_GAGSSEPVTGLDAK_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	644.8226318359381	2	644.822606	1287.63066	1418.1599999999999	0.00070708	
 11 | GAGSSEPVTGLDAK	_GAGSSEPVTGLDAK_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	430.21749877929705	3	430.217496	1287.63066	1418.52	0.00021801	
 12 | RVLGQLHGGPSSCSATGTNR	_RVLGQLHGGPSSCSATGTNR_	CON__P15636	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	686.010803222656	3	685.676466	2054.0075699999998	1436.5199999999998	0.0057482	
 13 | HTLNQIDSVK	_HTLNQIDSVK_	CON__P12763	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	577.8118896484381	2	577.8118450000001	1153.60914	1461.7199999999998	0.0016408000000000002	
 14 | AGFAGDDAPR	_AGFAGDDAPR_	P60010	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	488.727600097656	2	488.72778099999994	975.441008	1462.8600000000001	0.0030172	
 15 | YICDNQDTISSK	_YICDNQDTISSK_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	722.3251342773441	2	722.324656	1442.63476	1490.88	0.0037976999999999998	
 16 | DLGEEHFK	_DLGEEHFK_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	487.73269653320295	2	487.73253200000005	973.45051	1497.4199999999998	0.0090248	
 17 | IGSEVYHNLK	_IGSEVYHNLK_	P00925;P00924	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	580.307861328125	2	580.308938	1158.60332	1523.34	0.0070383	
 18 | VASLRETYGDMADCCEK	_VASLRETYGDM(ox)ADCCEK_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	674.619689941406	3	674.2851469999999	2019.8336100000001	1525.02	0.004357	
 19 | LSSPATLNSR	_LSSPATLNSR_	CON__P00761	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	523.2855224609381	2	523.2854629999999	1044.55637	1532.64	0.00024491	
 20 | STLVGHDTFTK	_STLVGHDTFTK_	CON__Streptavidin	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	603.312072753906	2	603.3116769999999	1204.6088	1555.5	0.00053611	
 21 | VEIIANDQGNR	_VEIIANDQGNR_	P22202	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	614.8179931640631	2	614.817658	1227.62076	1559.0400000000002	8.082199999999999e-05	
 22 | GAGSSEPVTGLDAK	_GAGSSEPVTGLDAK_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	644.8233642578131	2	644.822606	1287.63066	1566.42	8.4721e-09	
 23 | QQTQHAVEGDCDIHVLK	_QQTQHAVEGDCDIHVLK_	CON__P12763	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	495.492095947266	4	495.24163200000004	1976.93742	1569.6	0.0040365999999999996	
 24 | LVTDLTK	_LVTDLTK_	CON__P02769;CON__P02768-1	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	395.239379882813	2	395.239461	788.46437	1605.3600000000001	0.0021019000000000003	
 25 | EYEATLEECCAK	_EYEATLEECCAK_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	751.8107299804691	2	751.810524	1501.6065	1616.4	4.8215e-05	
 26 | YILAGVENSK	_YILAGVENSK_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.2978515625	2	547.298039	1092.58152	1627.8600000000001	0.00025242	
 27 | VGGHAAEYGAEALER	_VGGHAAEYGAEALER_	CON__P01966	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	510.58297729492205	3	510.58295	1528.72702	1640.6999999999998	0.0063424	
 28 | LNNELLAK	_LNNELLAK_	CON__P34955	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	457.76882934570295	2	457.768917	913.523281	1661.2199999999998	0.0067480000000000005	
 29 | AEFVEVTK	_AEFVEVTK_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	461.747680664063	2	461.74765	921.4807480000001	1664.46	0.0022896	
 30 | FSVSGEGEGDATYGK	_FSVSGEGEGDATYGK_	CON__Q9U6Y5	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	752.334838867188	2	752.333535	1502.6525199999999	1668.84	0.004831800000000001	
 31 | FEGDTLVNR	_FEGDTLVNR_	CON__Q9U6Y5	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	525.7652587890631	2	525.764363	1049.51417	1686.5400000000002	0.001196	
 32 | VEATFGVDESNAK	_VEATFGVDESNAK_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	683.828796386719	2	683.8278889999999	1365.64122	1698.84	8.6027e-12	
 33 | GAGSSEPVTGLDAK	_GAGSSEPVTGLDAK_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	644.822998046875	2	644.822606	1287.63066	1699.14	0.00088742	
 34 | EACFAVEGPK	_EACFAVEGPK_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	554.260986328125	2	554.2605990000001	1106.50665	1699.2	6.717600000000001e-05	
 35 | YQVTVIDAPGHR	_YQVTVIDAPGHR_	P02994	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	452.57376098632795	3	452.57372599999997	1354.69935	1711.9199999999998	0.0046098	
 36 | VATVSLPR	_VATVSLPR_	CON__P00761	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	421.75830078125	2	421.75835199999995	841.5021519999999	1723.26	0.00012912	
 37 | LVLVGDGGTGK	_LVLVGDGGTGK_	P32835;P32836	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	508.292663574219	2	508.29275599999994	1014.57096	1746.12	0.0011582	
 38 | YILAGVENSK	_YILAGVENSK_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.2984619140631	2	547.298039	1092.58152	1753.26	0.0094572	
 39 | ECCHGDLLECADDRADLAK	_ECCHGDLLECADDRADLAK_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	750.3193359375	3	749.9857599999999	2246.93545	1758.4800000000002	5.5766999999999996e-05	
 40 | DDPHACYSTVFDK	_DDPHACYSTVFDK_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	518.8897705078131	3	518.889162	1553.64566	1781.16	0.0010465	
 41 | NPVILADACCSR	_NPVILADACCSR_	P06169	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	688.3263549804691	2	688.326479	1374.6384	1795.9800000000002	0.0014334	
 42 | IWHHTFYNELR	_IWHHTFYNELR_	P60010	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	506.256256103516	3	505.921237	1514.74188	1797.36	0.0057702	
 43 | YILAGVENSK	_YILAGVENSK_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.298767089844	2	547.298039	1092.58152	1799.94	6.069600000000001e-13	
 44 | HSTVFDNLPNPEDRK	_HSTVFDNLPNPEDRK_	CON__Q29443;CON__Q0IIK2	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	590.294250488281	3	590.2919469999999	1767.8540100000002	1803.18	0.00025780000000000003	
 45 | GAGSSEPVTGLDAK	_GAGSSEPVTGLDAK_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	645.32373046875	2	644.822606	1287.63066	1803.7800000000002	0.006525399999999999	
 46 | HLVDEPQNLIK	_HLVDEPQNLIK_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	435.910339355469	3	435.91022699999996	1304.70885	1806.42	3.6825999999999996e-05	
 47 | HLVDEPQNLIK	_HLVDEPQNLIK_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	653.3619384765631	2	653.3617019999999	1304.70885	1806.48	3.9546e-11	
 48 | LQAEIEGLK	_LQAEIEGLK_	CON__P05787	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	500.78732299804705	2	500.78730700000006	999.5600609999999	1817.22	0.0095432	
 49 | IGGIGTVPVGR	_IGGIGTVPVGR_	P02994	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	513.308898925781	2	513.308741	1024.60293	1839.78	1.5818e-05	
 50 | TPVISGGPYEYR	_TPVISGGPYEYR_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	669.838684082031	2	669.8380589999999	1337.66157	1862.94	7.4054e-16	
 51 | YLYEIAR	_YLYEIAR_	CON__P02769;CON__P02768-1	MULTI-SECPEP	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	464.531829833984	2	464.25035999999994	926.486168	1867.62	0.0070293000000000005	
 52 | ESTLHLVLR	_ESTLHLVLR_	P0CH09;P0CH08;P0CG63;P05759	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	534.314270019531	2	534.314023	1066.61349	1881.3	0.00473	
 53 | LKPDPNTLCDEFK	_LKPDPNTLCDEFK_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	526.260925292969	3	526.260708	1575.76029	1893.18	7.773999999999999e-09	
 54 | LPLQDVYK	_LPLQDVYK_	P02994	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	488.280029296875	2	488.279118	974.543683	1904.6399999999999	0.0076661	
 55 | RHPEYAVSVLLR	_RHPEYAVSVLLR_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	480.608642578125	3	480.60877	1438.80448	1908.4800000000002	0.00013782	
 56 | AVFPSIVGRPR	_AVFPSIVGRPR_	P60010	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	400.24020385742205	3	400.240018	1197.69823	1910.52	3.1516000000000005e-06	
 57 | AVFPSIVGRPR	_AVFPSIVGRPR_	P60010	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	599.8565673828131	2	599.85639	1197.69823	1910.76	0.0028858	
 58 | TPVITGAPYEYR	_TPVITGAPYEYR_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	683.854919433594	2	683.853709	1365.6928699999999	1922.4599999999998	2.1038e-16	
 59 | YILAGVENSK	_YILAGVENSK_	Biognosys	MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.298278808594	2	547.298039	1092.58152	1946.7	8.6306e-15	
 60 | YILAGVENSK	_YILAGVENSK_	Biognosys	MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.2982177734381	2	547.298039	1092.58152	1959.3000000000002	0.00062374	
 61 | TPVISGGPYEYR	_TPVISGGPYEYR_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	669.838806152344	2	669.8380589999999	1337.66157	1970.6999999999998	1.6978e-06	
 62 | YILAGVENSK	_YILAGVENSK_	Biognosys	MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.298034667969	2	547.298039	1092.58152	1971.8400000000001	1.8827e-09	
 63 | TPIVGQPSIPGGPVR	_TPIVGQPSIPGGPVR_	CON__P12763	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	737.92333984375	2	737.922458	1473.83036	1980.18	4.2493999999999996e-21	
 64 | TGPNLHGLFGR	_TGPNLHGLFGR_	CON__P62894	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	390.212066650391	3	390.21224	1167.61489	1982.52	0.0052223999999999994	
 65 | ANELLINVK	_ANELLINVK_	P00330	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	507.303070068359	2	507.303124	1012.5917	1983.2400000000002	0.0040695	
 66 | YILAGVENSK	_YILAGVENSK_	Biognosys	MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.298095703125	2	547.298039	1092.58152	1984.4399999999998	0.0041457	
 67 | LSISETYDLK	_LSISETYDLK_	CON__P34955	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	584.810974121094	2	584.808436	1167.60232	1986.12	0.0023285	
 68 | LSELEAALQR	_LSELEAALQR_	CON__P05787;CON__REFSEQ:XP_092267;CON__H-INV:HIT000292931;CON__Q9H552	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	565.315246582031	2	565.31422	1128.61389	1990.1400000000003	0.00065691	
 69 | YILAGVENSK	_YILAGVENSK_	Biognosys	MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.298095703125	2	547.298039	1092.58152	1997.3400000000001	8.6306e-15	
 70 | AGLQFPVGR	_AGLQFPVGR_	Q12692	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	472.769378662109	2	472.76925199999994	943.52395	2006.2199999999998	0.00060853	
 71 | YILAGVENSK	_YILAGVENSK_	Biognosys	MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.298034667969	2	547.298039	1092.58152	2009.7600000000002	0.00035391	
 72 | YILAGVENSK	_YILAGVENSK_	Biognosys	MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.298095703125	2	547.298039	1092.58152	2022.6599999999999	0.0011562	
 73 | TPVITGAPYEYR	_TPVITGAPYEYR_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	683.853759765625	2	683.853709	1365.6928699999999	2029.38	7.5797e-07	
 74 | TPVITGAPYEYR	_TPVITGAPYEYR_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	683.853820800781	2	683.853709	1365.6928699999999	2047.1399999999999	4.6213e-05	
 75 | VPQVSTPTLVEVSR	_VPQVSTPTLVEVSR_	CON__P02769;CON__P02768-1	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	756.4257202148441	2	756.42503	1510.8355099999999	2054.82	9.242e-10	
 76 | DGLDAASYYAPVR	_DGLDAASYYAPVR_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	466.56106567382795	3	466.561375	1396.66229	2059.02	0.00040223	
 77 | DGLDAASYYAPVR	_DGLDAASYYAPVR_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	699.33984375	2	699.338424	1396.66229	2060.52	7.3166e-16	
 78 | YILAGVENSK	_YILAGVENSK_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.298095703125	2	547.298039	1092.58152	2060.64	0.00059209	
 79 | TPVITGAPYEYR	_TPVITGAPYEYR_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	683.853820800781	2	683.853709	1365.6928699999999	2074.0800000000004	4.6115e-05	
 80 | LAVNMVPFPR	_LAVNM(ox)VPFPR_	CON__ENSEMBL:ENSBTAP00000025008	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	580.318298339844	2	580.318251	1158.62195	2082.72	0.00060611	
 81 | LAVNMVPFPR	_LAVNM(ox)VPFPR_	CON__ENSEMBL:ENSBTAP00000025008	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	580.318542480469	2	580.318251	1158.62195	2095.38	0.00010404	
 82 | PLLVEPEGLEK	_PLLVEPEGLEK_	CON__ENSEMBL:ENSBTAP00000024146	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	612.348327636719	2	612.347729	1222.6809	2095.6800000000003	0.0020651999999999997	
 83 | AVFPSIVGR	_AVFPSIVGR_	P60010	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	473.279174804688	2	473.27945199999994	944.544351	2103.2400000000002	0.006728100000000001	
 84 | LVNELTEFAK	_LVNELTEFAK_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	582.319152832031	2	582.318971	1162.62339	2103.84	3.6337e-06	
 85 | LILPGELAK	_LILPGELAK_	P02294;P02293	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	477.305297851563	2	477.30513600000006	952.5957179999999	2146.08	0.004601600000000001	
 86 | ADVTPADFSEWSK	_ADVTPADFSEWSK_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	726.8375854492191	2	726.8357139999999	1451.65687	2152.56	2.4021999999999996e-07	
 87 | TPVITGAPYEYR	_TPVITGAPYEYR_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	683.854064941406	2	683.853709	1365.6928699999999	2152.92	0.00040175	
 88 | TPVISGGPYEYR	_TPVISGGPYEYR_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	669.8394775390631	2	669.8380589999999	1337.66157	2153.46	0.00079166	
 89 | YILAGVENSK	_YILAGVENSK_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	547.298583984375	2	547.298039	1092.58152	2154.48	0.0025256999999999996	
 90 | DGLDAASYYAPVR	_DGLDAASYYAPVR_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	699.3391723632809	2	699.338424	1396.66229	2185.2000000000003	4.3384e-06	
 91 | GVVDSEDLPLNLSR	_GVVDSEDLPLNLSR_	P02829;P15108	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	757.397338867188	2	757.39647	1512.77839	2191.8	0.0005147	
 92 | DGLDAASYYAPVR	_DGLDAASYYAPVR_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	699.339721679688	2	699.338424	1396.66229	2207.7000000000003	0.00031579	
 93 | VVVLPFPSK	_VVVLPFPSK_	CON__Q58D62	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	493.30810546875	2	493.307678	984.6008029999999	2218.14	0.006728100000000001	
 94 | QDGQFSVLFTK	_QDGQFSVLFTK_	CON__P12763	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	635.3277587890631	2	635.3273280000001	1268.6401	2236.2000000000003	0.0016334	
 95 | TVMENFVAFVDK	_TVM(ox)ENFVAFVDK_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	708.34814453125	2	708.347403	1414.68025	2255.5800000000004	4.4343e-05	
 96 | LVDTFLEDVK	_LVDTFLEDVK_	CON__P34955	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	589.819030761719	2	589.818804	1177.62306	2255.88	0.00057709	
 97 | DAGTIAGLNVLR	_DAGTIAGLNVLR_	P10591;P10592;P16474;P22202	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	600.341003417969	2	600.340769	1198.66699	2258.58	0.0013465999999999999	
 98 | EALDFFAR	_EALDFFAR_	P00330;P00331;P38113	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	484.745544433594	2	484.74544199999997	967.4763310000001	2259.6	0.0050642	
 99 | IGLDCASSEFFK	_IGLDCASSEFFK_	P00925;P00924	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	687.324462890625	2	687.323928	1372.6333	2265.12	0.00021901	
100 | QDGQFSVLFTK	_QDGQFSVLFTK_	CON__P12763	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	635.327575683594	2	635.3273280000001	1268.6401	2269.92	0.0011582	
101 | IINEPTAAAIAYGLDK	_IINEPTAAAIAYGLDK_	P10591;P10592;P22202	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	830.9541625976559	2	830.451245	1658.88794	2271.24	1.8319000000000001e-07	
102 | SYELPDGQVITIGNER	_SYELPDGQVITIGNER_	P60010	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	895.951049804688	2	895.949598	1789.8846399999998	2283.84	0.00038165	
103 | DGLDAASYYAPVR	_DGLDAASYYAPVR_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	699.339599609375	2	699.338424	1396.66229	2319.66	0.00080568	
104 | LGEYGFQNALIVR	_LGEYGFQNALIVR_	CON__P02769	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	740.4022827148441	2	740.401358	1478.78816	2326.56	4.0991000000000005e-09	
105 | GTFIIDPGGVIR	_GTFIIDPGGVIR_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	622.854675292969	2	622.853512	1243.69247	2355.36	1.9825e-06	
106 | GTFIIDPAAVIR	_GTFIIDPAAVIR_	Biognosys	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	636.86962890625	2	636.869163	1271.7237699999998	2529.54	5.1317e-05	
107 | YFPTQALNFAFK	_YFPTQALNFAFK_	P18239;P18238;P04710	MULTI-MSMS	B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	17_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol	724.3764038085941	2	723.874445	1445.73434	2557.8	0.00023335	
108 | 


--------------------------------------------------------------------------------
/test/configuration_iRT_test_match.ini:
--------------------------------------------------------------------------------
 1 | [moFF_parameters]
 2 | loc_in= absence_peak_data/
 3 | raw_repo= absence_peak_data/raw_repo/
 4 | xic_length= 4
 5 | rt_peak_win= 1
 6 | rt_peak_win_match= 1.1
 7 | tol= 5
 8 | cpu= 0
 9 | peptide_summary= 1
10 | loc_out= test/output_mbr_test
11 | sample=
12 | ext=txt
13 | log_label = moFF
14 | w_filt = 1.5
15 | out_flag= True
16 | w_comb=
17 | mbr= only
18 | #  to set to False left empty
19 | match_filter=
20 | ptm_file = ptm_setting_mq.json
21 | quantile_thr_filtering = 0.85
22 | sample_size = 0.10
23 | 


--------------------------------------------------------------------------------
/test/test_apex.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | 
 4 | @pytest.fixture
 5 | def data_apex():
 6 |     import pandas
 7 |     return  pandas.read_csv('test/B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol_moff_result.txt',sep="\t")
 8 | 
 9 | 
10 | def test_apex(data_apex):
11 |     assert all([a == b for a, b in zip(data_apex.columns, ['peptide', 'mod_peptide', 'prot', 'Type', 'Raw file', 'Experiment',
12 |        'mz', 'charge', 'another m/z', 'mass', 'rt', 'PEP', 'Reverse',
13 |        'intensity', 'rt_peak', 'lwhm', 'rwhm', '5p_noise', '10p_noise', 'SNR',
14 |        'log_L_R', 'log_int'])]),"wrong  column names"
15 |     assert not(data_apex.iloc[:, 13].isnull().all()), 'missing value on intensity '
16 |     assert not(data_apex.iloc[:, 14].isnull().all()), 'missing value on rt_peak'
17 |     assert not(data_apex.iloc[:, 15].isnull().all()), 'missing value on  lwhm'
18 |     assert not(data_apex.iloc[:, 16].isnull().all()), 'missing value on rwhm'
19 |     assert not(data_apex.iloc[:, 17].isnull().all()), 'missing value on 5p_noise'
20 |     assert not(data_apex.iloc[:, 18].isnull().all()), 'missing value on 10p_noise'
21 |     assert not(data_apex.iloc[:, 19].isnull().all()), 'missing value on SNR'
22 |     assert not(data_apex.iloc[:, 20].isnull().all()), 'missing value on log_L_R'
23 |     assert not(data_apex.iloc[:, 21].isnull().all()), 'missing value on log_int'
24 |     assert data_apex.shape[0]== 106 , "wrong data size"
25 |     assert data_apex[data_apex.log_L_R == -1].shape[0] ==10," worng number of record with log_L_R "
26 |     assert data_apex.log_int.mean() == 19.892951681679005, "wrong log_int mean "
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/test/test_mbr.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | @pytest.fixture
 4 | def data_mbr():
 5 |     import pandas
 6 |     return  pandas.read_csv('absence_peak_data/mbr_output/B002413_Ap_22cm_Yeast_171215184201_match.txt',sep="\t")
 7 | 
 8 | @pytest.fixture
 9 | def data_mbr_2():
10 |     import pandas
11 |     d1=pandas.read_csv('absence_peak_data/mbr_output/B002413_Ap_22cm_Yeast_171215184201_match.txt', sep="\t")
12 |     d2=pandas.read_csv('absence_peak_data/mbr_output/B002417_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol_match.txt', sep="\t")
13 |     d3=pandas.read_csv('absence_peak_data/mbr_output/B002419_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol_inYeast_match.txt',sep="\t")
14 |     d4=pandas.read_csv('absence_peak_data/mbr_output/B002421_Ap_22cm_iRT_PRC-Hans_equimolar_100fmol_match.txt',sep="\t")
15 |     return ( d1,d2,d3,d4)
16 | 
17 | 
18 | 
19 | def test_mbr_iRT(data_mbr):
20 |     iRT_match = data_mbr[data_mbr['prot']=='Biognosys']
21 |     assert (iRT_match['peptide'].unique().shape[0] ==  11 ), 'not right unique iRT peptide matched'
22 |     assert (iRT_match['rt'].mean()/ 60 == 36.38888249800042), 'not right mean iRT rt matched peptide'
23 |     assert (iRT_match.shape[0]== 15) ,'not right size '
24 | 
25 | 
26 | def test_general_mbr(data_mbr_2):
27 |     assert (data_mbr_2[0].shape[0] ==  8356 ), 'not right unique iRT peptide matched'
28 |     assert (data_mbr_2[1].shape[0] ==  8427 ), 'not right unique iRT peptide matched'
29 |     assert (data_mbr_2[2].shape[0] ==  8264 ), 'not right unique iRT peptide matched'
30 |     assert (data_mbr_2[3].shape[0] ==  8424 ), 'not right unique iRT peptide matched'
31 | 


--------------------------------------------------------------------------------
/txic_json.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompOmics/moFF/279f0101efc031d13c2a1729023df33a796d9eb9/txic_json.exe


--------------------------------------------------------------------------------