├── LICENSE
├── Scout_post_processing_params_high_complex_samples.json
├── Scout_post_processing_params_low_complex_samples.json
├── Scout_search_params.json
└── README.md
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Diogo Borges
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Scout_post_processing_params_high_complex_samples.json:
--------------------------------------------------------------------------------
1 | {
2 | "defaultCSMLooplinkParams": [
3 | "XLScore",
4 | "PoissonScore",
5 | "MinDDPScore",
6 | "DiffPPM",
7 | "MinUniqueRPsInInterProteins",
8 | "MinUniqueRPsInIntraProteins"
9 | ],
10 | "defaultCSMInterParams": [
11 | "PoissonScore",
12 | "MinDDPScore",
13 | "DiffPPM",
14 | "MinUniqueRPsInInterProteins",
15 | "MinUniqueRPsInIntraProteins"
16 | ],
17 | "CSM_Features": [
18 | "PoissonScore",
19 | "MinDDPScore",
20 | "DiffPPM",
21 | "MinUniqueRPsInInterProteins",
22 | "MinUniqueRPsInIntraProteins"
23 | ],
24 | "CSM_FDR": 0.01,
25 | "ResPair_Features": [
26 | "TopCSMScore",
27 | "BestPoisson",
28 | "BestMinScore",
29 | "MinUniqueRPsInInterProteins",
30 | "MinUniqueRPsInIntraProteins"
31 | ],
32 | "ResPair_FDR": 0.01,
33 | "PPI_Features": [
34 | "BestClassificationScore",
35 | "UniquePepsA_B",
36 | "UniquePepsB_A"
37 | ],
38 | "PPI_FDR": 0.01,
39 | "UniquePPIsOnly": false,
40 | "UsePythonModels": true,
41 | "UseFinalScoringCSMs": false,
42 | "GroupedByGene": false,
43 | "IsLooplinkFilter": false,
44 | "ApplyBoostFDR": false,
45 | "ApplyPostProcessingFilters": true,
46 | "DiffPPM_Threshold": 3,
47 | "Apply_DiffPPM_Threshold": false,
48 | "PoissonScore_Threshold": 1.5,
49 | "MinDDPScore_Threshold": 0.06,
50 | "MinUniqueRPsInInterProteins": true,
51 | "MinUniqueRPsInIntraProteins": false,
52 | "Independent_FDR_Control": false,
53 | "FDRMode": 1
54 | }
55 |
--------------------------------------------------------------------------------
/Scout_post_processing_params_low_complex_samples.json:
--------------------------------------------------------------------------------
1 | {
2 | "defaultCSMLooplinkParams": [
3 | "XLScore",
4 | "PoissonScore",
5 | "MinDDPScore",
6 | "DiffPPM",
7 | "MinUniqueRPsInInterProteins",
8 | "MinUniqueRPsInIntraProteins"
9 | ],
10 | "defaultCSMInterParams": [
11 | "PoissonScore",
12 | "MinDDPScore",
13 | "DiffPPM",
14 | "MinUniqueRPsInInterProteins",
15 | "MinUniqueRPsInIntraProteins"
16 | ],
17 | "CSM_Features": [
18 | "PoissonScore",
19 | "MinDDPScore",
20 | "DiffPPM",
21 | "MinUniqueRPsInInterProteins",
22 | "MinUniqueRPsInIntraProteins"
23 | ],
24 | "CSM_FDR": 0.01,
25 | "ResPair_Features": [
26 | "TopCSMScore",
27 | "BestPoisson",
28 | "BestMinScore",
29 | "MinUniqueRPsInInterProteins",
30 | "MinUniqueRPsInIntraProteins"
31 | ],
32 | "ResPair_FDR": 0.01,
33 | "PPI_Features": [
34 | "BestClassificationScore",
35 | "UniquePepsA_B",
36 | "UniquePepsB_A"
37 | ],
38 | "PPI_FDR": 0.01,
39 | "UniquePPIsOnly": false,
40 | "UsePythonModels": true,
41 | "UseFinalScoringCSMs": false,
42 | "GroupedByGene": false,
43 | "IsLooplinkFilter": false,
44 | "ApplyBoostFDR": false,
45 | "ApplyPostProcessingFilters": true,
46 | "DiffPPM_Threshold": 8,
47 | "Apply_DiffPPM_Threshold": true,
48 | "PoissonScore_Threshold": 0.08,
49 | "MinDDPScore_Threshold": 0.11,
50 | "MinUniqueRPsInInterProteins": true,
51 | "MinUniqueRPsInIntraProteins": false,
52 | "Independent_FDR_Control": false,
53 | "FDRMode": 1
54 | }
55 |
--------------------------------------------------------------------------------
/Scout_search_params.json:
--------------------------------------------------------------------------------
1 | {
2 | "BDP_Mode": false,
3 | "MSFileExtension": ".raw",
4 | "PPMMS1Tolerance": 20.0,
5 | "PPMMS2Tolerance": 20.0,
6 | "PerformShotgunSearch": false,
7 | "PerformCleaveXLSearch": true,
8 | "SaveSpectraToResults": false,
9 | "MaxQueryResults": 4,
10 | "MinPepLength": 6,
11 | "MaxPepLength": 60,
12 | "MinPepMass": 500.0,
13 | "MaxPepMass": 6000.0,
14 | "FastaFile": "/path/database.fasta",
15 | "RawPath": "/path/raw",
16 | "OutputFolder": "/path/results",
17 | "AddMinusOneIsotope": false,
18 | "CarbonIsotopeShift": 1.0033548,
19 | "IsotopicPossibilitiesPrecursor": 1,
20 | "MiscleavageNum": 3,
21 | "Enzyme": {
22 | "Name": "Trypsin",
23 | "CTerminus": true,
24 | "Sites": "KR",
25 | "BlockedBy": "P"
26 | },
27 | "EnzymeSpecificity": 0,
28 | "MaximumVariableModsPerPeptide": 2,
29 | "StaticModifications": [
30 | {
31 | "MassShift": 57.02146,
32 | "Name": "Carbamidomethyl",
33 | "IsCTerm": false,
34 | "IsNTerm": false,
35 | "IsVariable": false,
36 | "TargetResidues": "C",
37 | "ModIndex": 1
38 | }
39 | ],
40 | "VariableModifications": [
41 | {
42 | "MassShift": 15.9949,
43 | "Name": "Oxidation of Methionine",
44 | "IsCTerm": false,
45 | "IsNTerm": false,
46 | "IsVariable": true,
47 | "TargetResidues": "M",
48 | "ModIndex": 2
49 | }
50 | ],
51 | "ExtraAminoacids": null,
52 | "SearchLoopLinks": true,
53 | "IonPairMaxCharge": 2,
54 | "CXLReagent": {
55 | "Name": "DSSO_KSYT",
56 | "LightTag": "Light",
57 | "LightFragment": 54.01056468,
58 | "HeavyTag": "Heavy",
59 | "HeavyFragment": 85.98263585,
60 | "WholeTag": "Full",
61 | "WholeMass": 158.00376533,
62 | "DeltaShift": 31.972071169999992,
63 | "Targets": "KSYT",
64 | "TargetNTerm": true
65 | },
66 | "FullPairsOnly": false,
67 | "PairFinderPPM": 10.0,
68 | "MinBinMZ": 200.0,
69 | "MaxBinMZ": 1800.0,
70 | "BinSize": 0.02,
71 | "Offset": 0.0,
72 | "ScoreFunction": 1,
73 | "MS2NormalizationTypes": 2,
74 | "NormalizationWindowWidth": 80.0,
75 | "NormalizationWindowPeaksKept": 13,
76 | "AddPrecursorToDotProduct": false,
77 | "AddIonPairsToDotProduct": false,
78 | "ApplyMZRangeWeighting": false,
79 | "ReorderCleaveCandidateScores": true,
80 | "RemoveIonPairsFromExperimentalMS": true,
81 | "RemovePrecursorFromExperimentalMS": true,
82 | "ReplaceNullPoissonToXLScore": true,
83 | "ReorderingMethod": 0,
84 | "ReorderingCombinatorialDepthHeavy": 3,
85 | "ReorderingCombinatorialDepthLight": 2,
86 | "FastaBatchSize": 30000,
87 | "ParallelPSMs": true,
88 | "QueryBatches": null,
89 | "MergeDatabase": true,
90 | "MethionineInitiator": true,
91 | "AddDecoys": true,
92 | "AddContaminants": true,
93 | "DontShowContaminants": true,
94 | "DecoyTag": "Reverse",
95 | "DecoyGenerationMode": 0,
96 | "AddUnlabelledDecoys": false,
97 | "UnlabelledDecoyTag": "Unlabelled",
98 | "OffsetUnlabelledDecoy": true,
99 | "OffsetUnlabelled": 2,
100 | "UnlabelledDecoysGenerationMode": 4,
101 | "AddLocusStringToPeptides": true,
102 | "DeconvolutionForPairSearching": true,
103 | "DeconvolutionForMSScoring": false,
104 | "SilacSearch": false,
105 | "SilacHybridMode": false,
106 | "SilacGroups": [
107 | {
108 | "GroupName": "Light",
109 | "GroupAminoacids": [
110 | {
111 | "TargetResidue": "K",
112 | "MassShift": 0.0
113 | },
114 | {
115 | "TargetResidue": "R",
116 | "MassShift": 0.0
117 | }
118 | ]
119 | },
120 | {
121 | "GroupName": "Heavy",
122 | "GroupAminoacids": [
123 | {
124 | "TargetResidue": "K",
125 | "MassShift": 8.0141988131999
126 | },
127 | {
128 | "TargetResidue": "R",
129 | "MassShift": 10.0082685996
130 | }
131 | ]
132 | }
133 | ],
134 | "SilacSearchNormalPeptides": false,
135 | "IsobaricLabelling_Search": false,
136 | "IsobaricLabelling_Mods": [
137 | {
138 | "MassShift": 144.1,
139 | "Name": "iTRAQ - 4",
140 | "IsCTerm": false,
141 | "IsNTerm": true,
142 | "IsVariable": false,
143 | "TargetResidues": "K",
144 | "ModIndex": 8
145 | },
146 | {
147 | "MassShift": 304.2022,
148 | "Name": "iTRAQ - 8",
149 | "IsCTerm": false,
150 | "IsNTerm": true,
151 | "IsVariable": false,
152 | "TargetResidues": "K",
153 | "ModIndex": 7
154 | },
155 | {
156 | "MassShift": 229.1629321141,
157 | "Name": "TMT - 6plex",
158 | "IsCTerm": false,
159 | "IsNTerm": true,
160 | "IsVariable": false,
161 | "TargetResidues": "K",
162 | "ModIndex": 9
163 | },
164 | {
165 | "MassShift": 229.1629321141,
166 | "Name": "TMT - 10plex",
167 | "IsCTerm": false,
168 | "IsNTerm": true,
169 | "IsVariable": false,
170 | "TargetResidues": "K",
171 | "ModIndex": 10
172 | },
173 | {
174 | "MassShift": 229.1629321141,
175 | "Name": "TMT - 11plex",
176 | "IsCTerm": false,
177 | "IsNTerm": true,
178 | "IsVariable": false,
179 | "TargetResidues": "K",
180 | "ModIndex": 11
181 | },
182 | {
183 | "MassShift": 304.207146,
184 | "Name": "TMT - 16plex (TMTpro)",
185 | "IsCTerm": false,
186 | "IsNTerm": true,
187 | "IsVariable": false,
188 | "TargetResidues": "K",
189 | "ModIndex": 12
190 | },
191 | {
192 | "MassShift": 304.207146,
193 | "Name": "TMT - 18plex (TMTpro)",
194 | "IsCTerm": false,
195 | "IsNTerm": true,
196 | "IsVariable": false,
197 | "TargetResidues": "K",
198 | "ModIndex": 13
199 | }
200 | ],
201 | "SelectedIsobaricLabelling_Mod": {
202 | "MassShift": 304.207146,
203 | "Name": "TMT - 16plex (TMTpro)",
204 | "IsCTerm": false,
205 | "IsNTerm": true,
206 | "IsVariable": false,
207 | "TargetResidues": "K",
208 | "ModIndex": 12
209 | },
210 | "IsobaricLabelling_AllowedFreeResidues": 2,
211 | "CalculatePairIntensities": true,
212 | "AddXLasVariableMod": false,
213 | "IsotopesInTheoreticalMS": 2,
214 | "MaxChargeInTheoreticalMS": 2,
215 | "BonusMode": true,
216 | "BonusScore": 0.001,
217 | "ApplyNeutralLossH2O": false,
218 | "ApplyNeutralLossNH3": false,
219 | "StarIntensity": 0.7,
220 | "FastaDistinctByLocus": true,
221 | "ApplyQuantileIntensityThreshold": false,
222 | "QuantileIntensityThreshold": 0.3
223 | }
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Scout
2 | Interactomics studies play a critical role in elucidating protein structures, functions, and interactions within complex cellular environments. Cross-linking mass spectrometry with cleavable cross-linking reagents (cXL-MS) has emerged as a powerful technique for large-scale interactomics analysis by identifying proximal amino acid pairs in protein samples. However, current computational cXL-MS tools face limitations in proteomic-scale studies, such as being too slow or generating excessive false positives, particularly at the protein-protein interactions level (PPIs).
3 |
4 | Here, we present **Scout**, a computational methodology that enables interactomic analysis by identifying mass spectra of peptides linked with cleavable cross-linking reagents. By leveraging machine learning techniques, Scout ensures a controlled false discovery rate (FDR) at multiple levels, including cross-linked spectrum matches, residue pairs, and PPIs. Our methodology offers an efficient and accurate solution for large-scale interactomics studies, addressing the existing computational challenges.
5 |
6 | _Please cite our paper:_
7 | _Clasen, MA, et al., [“Proteome-scale recombinant standards and a robust high-speed search engine to advance cross-linking MS-based interactomics”](https://doi.org/10.1038/s41592-024-02478-1), Nature Methods, 2024._
8 |
9 | # Equipment
10 | ## Hardware
11 | - A computer with a minimum of 16 GB RAM and 4 computing cores is recommended. However, the software can take advantage of superior configurations.
12 |
13 | ## Software
14 | - Windows 10 (64 bits) or later; macOS 15.7.2 (64 bits) or later; Linux (64 bits)
15 | - [Python 3.12 or later](https://www.python.org/downloads/).
16 | - [.NET Core 9 or later](https://dotnet.microsoft.com/en-us/download).
17 | - [MPFR and GMP (for Linux or macOS)](https://github.com/diogobor/Scout#ref_2_6_2_2)
18 | - _Scout_ software, available for download at https://github.com/diogobor/Scout/releases
19 |
20 | ## Data files
21 | **Input**
22 | - _Scout_ is compatible with data files in the formats mzML, MS2, Mascot Generic Format (MGF), Bruker® .d files (Windows-only), and Thermo® RAW files.
23 |
24 | **Output**
25 | - _Scout_ saves results in its _*.scout_ format, in the [mzIdentML 1.2](http://www.psidev.info/mzidentml#mzid12) and [mzIdentML 1.3](http://www.psidev.info/mzidentml#mzid13) proposed by [HUPO Proteomics Standard Initiative](http://www.psidev.info/) to support the identification of cross-linked peptides. We note this is able to perform complete submissions of XL-MS data to PRIDE[1], and is therefore compatible with the PRIDE Inspector software[2]. Additionally, the software supports exporting all CSMs, Residue Pairs and PPIs as CSV files, as well as all results to [XlinkCyNET](https://apps.cytoscape.org/apps/xlinkcynet)[3] for visualization within [Cytoscape](https://cytoscape.org/)[4].
26 |
27 | # Procedures
28 |
29 | 1. ## Software installation
30 | 1.1 Download Scout by clicking on Scout_setup_64bit.msi in the [latest release](https://github.com/diogobor/Scout/releases/).
31 |
1.2 Install it according to the OS:
32 |
1.2.1 Windows: by double-clicking in the previous downloaded file (_*.msi_).
33 |
1.2.2 Linux or macOS: by uncompressing the previous downloaded file (_*.zip_)
34 |
35 | 2. ## Workflow
36 | The following workflow demonstrates how to perform a search using _Scout_.
If you are interested to run the software in _Automation_ mode, go to 2.6.
37 | _PS: Linux and macOS versions only work in Automation mode._
38 |
39 | 2.1 (Windows-only) Launch Scout: Open the Scout application to access its main window, as shown in Figure 1.
40 |

41 | Figure 1: Graphical User Interface of Scout’s main window.
42 | 2.2 Initial Setup
43 | 2.2.1. Searching selected file(s): Check the ‘Raw File(s)’ radio button and then select at least one tandem mass spectra file (e.g., MS2, MGF or Thermo® RAW).
44 | PS: For Bruker® .d files, select the folder that contains the name of the file.
45 | 2.2.2. Batch searching: Check the ‘Raw folder’ radio button and then specify a directory containing the tandem mass spectra files.
46 | 2.2.3.
Fasta File: Select a file containing the protein sequences. The file format must be in FASTA format, typically obtained from
Uniprot.
For instance:
47 | >protein name
48 | PROTEINSEQUENCE
49 |
50 | 2.2.4 Output Folder: Select a folder where the results will be saved.
51 | ⇒ Click on 'Start' button to initiate the search by using the default parameters. Once the search is complete, the results window will be opened (see item 2.3).
52 | ⇒ To stop the search, click on 'Cancel' button and confirm.
53 | PS: If for some reason the Scout closes, the search can continue from the point it was paused. To do this, just set the same parameters again and press the start button.
54 | ⇒ All procedures will be recorded in the Log box. To export it, go to File → Export log (or press ALT + M).
55 | 2.2.5 Search Parameters
56 | Search parameters can be adjusted to optimize the search process. To modify the parameters, navigate to Parameters → Search (or press ALT + S), as illustrated in Figure 2a, a new window will open (Figure 2b).
57 | 
58 | Figure 2a: Search and Post Processing Parameters can be modified on Parameters menu.
59 | 
60 | Figure 2b: Search Parameters window
61 | 2.2.5.1. MS1 PPM Tolerance: Specify the ppm error tolerance for the precursor mass.
62 | 2.2.5.2. MS2 PPM Tolerance: Specify the ppm error tolerance for fragment ions.
63 | 2.2.5.3. Ion Pair PPM Tolerance: Specify the ppm error tolerance for ion pair mass.
64 | 2.2.5.4. Min. Peptide Length: Specify the minimum number of amino acids in each connected peptide.
65 | 2.2.5.5. Max. Peptide Length: Specify the maximum number of amino acids in each connected peptide.
66 | 2.2.5.6. Min. Peptide Mass: Specify the minimum peptide mass in Daltons.
67 | 2.2.5.7. Max. Peptide Mass: Specify the maximum peptide mass in Daltons.
68 | 2.2.5.8. Missed Cleavages: Specify the maximum missed cleavages allowed in a single peptide.
69 | 2.2.5.9. Max. Variable Mods: Specify the maximum number of variable post-translational modifications in a single peptide.
70 | 2.2.5.10. Enzyme: Select a proteolytic enzyme for in-silico digestion.
71 | 2.2.5.10.1. Add Enzyme: Navigate to the Enzymes tab and click on ‘Add Enzyme’ button (Figure 3a). A new window will be opened (Figure 3b).
72 | 
73 | Figure 3a: Enzymes window – This tab enables the addition or removal of enzymes.
74 | 
75 | Figure 3b: New Enzyme Inclusion – This window allows users to introduce a new enzyme to the existing list of enzymes.
76 |
77 | 2.2.5.10.1.1. Name: Specify a name for the new enzyme.
78 | 2.2.5.10.1.2. Sites: Specify the amino acids at which cleavage should occur. PS: The amino acids should be included without spaces, for instance, the trypsin sites should appear as KR.
79 | 2.2.5.10.1.3. Blocked by: Specify the amino acids that will impede the cleavage. PS: As in ‘Sites’, the amino acids must be typed without spaces.
80 | 2.2.5.10.1.4. C-Terminal: Check this option if the new enzyme cleaves at the C-terminus of the peptide; otherwise, cleavage will occur at the N-terminus.
81 | Click on the ‘Confirm’ button to incorporate the new enzyme into the Enzymes table. Afterwards, return to 2.2.5.10.
82 | 2.2.5.10.2 To remove an Enzyme, press ‘Del’ key. A confirmation message will be displayed. Confirm it to proceed.
83 | 2.2.5.11. Enzyme specificity: Select an enzyme specificity from the list: full specific or semi-specific.
84 | 2.2.5.12. Cleavable Reagent: Select a cleavable cross-linker from the list.
85 | 2.2.5.12.1. Add Reagent: Go to XL Reagents tab and click on ‘Add Reagent’ button (Figure 4a). A new window will be opened (Figure 4b).
86 | 
87 | Figure 4a: Chemical cross-linkers window: on this tab, new reagents can be added or removed.
88 | 
89 | Figure 4b: A new reagent can be added into the list of cross-linkers.
90 |
91 | 2.2.5.12.1.1. Name: Specify a unique identifier for the new cleavable reagent.
92 | 2.2.5.12.1.2. Light Fragment Mass: Specify the light fragment mass in Daltons.
93 | 2.2.5.12.1.3. Heavy Fragment Mass: Specify the heavy fragment mass in Daltons.
94 | 2.2.5.12.1.4. Full Mass: Specify the full mass of the reagent in Daltons.
95 | 2.2.5.12.1.5. Ion Pair Shift: The pair will be automatically calculated according to the light and heavy fragment masses.
96 | 2.2.5.12.1.6. Target Residues: Specify the target residues that the new cleavable cross-linker will react with. PS: List residues without spaces; for example, use KSYT for DSSO.
97 | 2.2.5.12.1.7. N-Terminal: Check this option if the new cleavable cross-linker also reacts at the N-terminus of the protein.
98 | Click the ‘Confirm’ button to incorporate the new cleavable reagent into the XL Reagents table. Subsequently, return to 2.2.5.10.
99 | 2.2.5.12.2 To remove an XL Reagent, press ‘Del’ key. A confirmation message will be displayed. Confirm it to proceed.
100 | 2.2.5.13. Deconvolute for Ion Pair Searching: Check this option to deconvolute the spectra before searching the ion pairs. If enabled, the deconvolution will be performed by YADA 3.0 [5].
101 | 2.2.5.14. Deconvolute for Scoring: Check this option to deconvolute spectra prior to searching for CSMs. If enabled, the deconvolution will be performed by YADA 3.0. [5]
102 | Explanation on ‘deconvolution’: As cleavable cross-linking search relies heavily on locating ion pairs, noisy spectra can be very harmful to the overall quality of end results. As such, spectra deconvolution is generally recommended, and set as default for the ion pair searching step of Scout. In mass spectrometry, deconvolution refers to the process of de-charging and/or deisotoping a spectrum. In practical terms, this is the process of iterating the MS2 searching looking for charge envelopes and isotopic envelopes, grouping them all into a single ion at charge +1. This is particularly important for the first step of Scout’s workflow, Ion Pair Doublet Searching, as we found that being too lenient with the search for ion pairs may lead to false positives.
103 | 2.2.5.15. Add Modification: Click on this button to add a new post-translational modification (Figure 5a). A new window will appear (Figure 5b).
104 | 
105 | Figure 5a: Modification Window – This tab displays all variable and static modifications.
106 | 
107 | Figure 5b: New Modification Inclusion – This window enables the addition of a new modification into the modifications list.
108 |
109 | 2.2.5.15.1. Name: Specify a unique name for the new post-translational modification.
110 | 2.2.5.15.2. Mass Shift: Specify the mass shift in Daltons.
111 | 2.2.5.15.3. Target Residues: Specify the target residues for this new post-translational modification. Use capital letters without spaces.
112 | 2.2.5.15.4. C-Terminal: Check this option if the new post-translational modification occurs at the C-terminus of the peptide.
113 | 2.2.5.15.5. N-Terminal: Check this option if the new post-translational modification occurs at the N-terminus of the peptide.
114 | 2.2.5.15.6. Variable: Check this option if the new post-translational modification is dynamic, i.e., if it may or may not occur.
115 | Click on the ‘Confirm’ button to incorporate the new modification into the Modification table.
116 | PS: Upon completing this process, ensure the new post-translational modification is checked in ‘Use’ field for it to be considered in the search.
117 | 2.2.5.15.7 To remove a modification, press the ‘Del’ key. A confirmation message will be displayed. Confirm the action to proceed.
118 | 2.2.5.16. Contaminants: On this tab, the current contaminants can be modified as well as new ones added (Figure 6). PS: All contaminants must be entered in FASTA format (similar to item 2.2.3).
119 |

120 | Figure 6: Contaminants tab: all contaminant sequences can be modified as well as new ones can be added.
121 | 2.2.5.17. Export: Choose this option to save the current parameters to a file.
122 | 2.2.5.18. Load: Select this option to import parameters from a file.
123 | 2.2.5.19. As default: Set the current parameters as the software’s default settings.
124 | 2.2.5.20. Restore: Revert to factory default parameters.
125 | 2.2.5.21. Advanced: Click on this link to customize the advanced parameters (not necessary for most searches). (Figure 7).
126 |

127 | Figure 7: Edit advanced parameters: In this window, all search parameters can be modified.
128 | 2.2.5.22. Advanced Search Parameters
129 | 2.2.5.22.1. Save spectra in results file: Check this option to save the identified experimental spectra in the results file.
130 | 2.2.5.22.2. Add contaminants: Check this option to consider common mass spectrometry contaminants during the search.
131 | 2.2.5.22.3. Add decoys: Check this option to add decoys before initiating the search. Note: for the FDR calculation, this option should be checked.
132 | 2.2.5.22.4. Fasta batch size: Specify the maximum number of protein sequences to be loaded into memory at a given time.
133 | 2.2.5.22.5. Fragment bin tolerance: Specify the bin size for binning mass spectra and for theoretical mass spectra generation.
134 | 2.2.5.22.6. Fragment bin offset: Specify offset in Daltons to be considered to initiate the binning process.
135 | 2.2.5.22.7. Minimum fragment bin m/z: Specify the minimum m/z to be vectorized.
136 | 2.2.5.22.8. Maximum fragment bin m/z: Specify the maximum m/z to be vectorized.
137 | Explanation on ‘Binning’: We refer to binning mass spectra into vectors as the process of discretization of continuous m/z values by partitioning them into predefined bins. The process consists of establishing an offset (in Da) and a bin width (in Da) to define the initial point and bin size, respectively. Each bin encompasses a specific m/z range, and peaks are allocated to the corresponding bin based on their m/z value. Subsequently, the intensity values of peaks within each bin are aggregated, in our case, by summation. The output entails a vector of intensity values, with each entry representing a distinct bin. This vectorial representation streamlines mass spectral data manipulation and comparison, facilitating bioinformatics analyses. Therefore, the binning loosely refers to the MS/MS tolerance.
138 | 2.2.5.22.9. No. Isotopic Possibilities: The precursor mass stored in raw data files may not correspond to the monoisotopic peak. This option allows the software to find the correct monoisotopic peak, which is required to identify the molecule but at the cost of opening up the search space. If a high number of isotopic possibilities is set, the search space will increase accordingly and impact Scout’s sensitivity negatively.
139 | 2.2.5.22.10. Metabolic labelling search: Check this option to perform SILAC search.
140 | 2.2.5.22.10.1 Add Group: A new window will open to add the groups for labelling peptides, e.g., heavy and light groups as well as their amino acids can be added in this feature.
141 | 2.2.5.22.10.2 Hybrid mode: Check this option to find not only heavy-heavy / light-light peptides, but also heavy-light/light-heavy ones.
142 | 2.2.5.22.11. Isobaric labelling search: Check this option to perform Isobaric labelling search (e.g., TMT, iTRAQ).
143 | 2.2.5.22.11.1 Add Reagent: A new window will open to set the reagent up.
144 | 2.2.5.22.11.1.1 Reagent: Select a reagent. If the desired reagent is not in the list, click on the 'Add' button.
145 | 2.2.5.22.11.1.2 Free residue tolerance: Set the minimum number of residues that TMT will not react.
146 | 2.2.5.22.12. Target-decoy fusion mode: Check this option to perform a search integrating the target and decoy protein sequences into a single sequence.
147 | 2.2.5.22.13. Export: See 2.2.5.17.
148 | 2.2.5.22.14. Load: See 2.2.5.18.
149 | 2.2.5.22.15. As default: See 2.2.5.19.
150 | 2.2.5.22.16. Restore: See 2.2.5.20.
151 | 2.2.6 Post Processing Parameters
152 | Adjusting certain post processing parameters may improve the performance of the process. To do this, navigate to Parameters → Post Processing (or use the keyboard shortcut ALT + P), as can be illustrated in Figure 2a. A new window will appear (Figure 8).
153 | 
154 | Figure 8: Post Processing Parameters window
155 | 2.2.6.1. Use only unique XLs into PPIs: Check this option to remove PPIs that contain shared cross-linked peptides.
156 | 2.2.6.2. Separate protein intra- and inter-crosslinks: Check this option to apply FDR control separately to intra- and inter-crosslinks at the CSM, Residue Pair, and PPI levels.
157 | 2.2.6.3. Group PPIs by gene: Check this option to group all protein-protein interactions by gene name.
158 | 2.2.6.4. FDR on CSM level: Specify the FDR on CSM level.
159 | 2.2.6.5. FDR on Residue Pair level: Specify the FDR on Residue Pair level.
160 | 2.2.6.6. FDR on PPI level: Specify the FDR on PPI level.
161 |
162 | 2.2.6.8. Load: Similar to 2.2.5.18.
163 | 2.2.6.9. As default: Similar to 2.2.5.19.
164 | 2.2.6.10. Restore: Similar to 2.2.5.20.
165 | 2.3. Results
166 | Upon completion of the search processing, the results are automatically saved in the same directory in which the RAW files are (*.scout file) and presented in a new window with separate tabs: CSMs, Residue Pairs and PPIs, as well as the parameters used in the search. (Figure 9)
167 | 
168 | Figure 9: Results window.
169 | Double-clicking on a row containing a CSM result opens the spectrum viewer displaying the spectrum from which it was identified*. The sequence coverage (Figure 10a) and the standard deviation plot (m/z vs ppm) of all identified peaks (Figure 10b) can be visualized through this window as well as all fragment ions (Figure 10c). Double-clicking a cross-link opens a list of CSMs from which it is derived (Figure 10d). Double-clicking a PPI displays all cross-links belonging to the PPI (Figure 10e) and an additional click reveals all CSMs associated with the respective cross-link.
170 | *PS: The spectrum viewer will be opened if the RAW file is in the directory or the mass spectrum was saved in *.scout file (see item 2.3.8).
171 |

172 | Figure 10a: Sequence coverage annotation & spectrum visualization.
173 | 
174 | Figure 10b: Standard deviation plot of all identified peaks.
175 | 
176 | Figure 10c: Theoretical fragment ions.
177 | 
178 | Figure 10d: List of CSMs from a specific Residue Pair.
179 | 
180 | Figure 10e: List of Residue Pairs from a specific PPI.
181 |
182 | 2.3.1 Filter results: Results contain FDR filtered identifications on all levels – in the graphical user interface, personal filters can be applied:
183 | 2.3.1.1. CSM level: In this tab (Figure 9), the CSMs are displayed according to the specified filter parameters.
184 | 2.3.1.1.1 Scan: Specify the scan number to be displayed.
185 | 2.3.1.1.2 Score: Specify the score cutoff. All CSMs with a score greater than ‘Score’ will be displayed.
186 | 2.3.1.1.3 Search: Type the α or/and β peptide (separated by '-') as well as the protein 1 or/and protein 2 (separated by '-'), or even gene 1 or/and gene 2 (separated by '-') to be displayed. PS: Type at least four characters.
187 | 2.3.1.1.4 Files: Select the file(s) that the results to be displayed belong to. If no files or ‘All files’ is selected, all results will be displayed.
188 | 2.3.1.1.5 Show inter-protein links only: Check this option to display only the CSMs that belong to inter-protein interactions.
189 | 2.3.1.1.6 Show decoys: Check this option to display decoy identifications.
190 | 2.3.1.1.7 Click on ‘Filter’ button or press Enter to perform the filter.
191 | 2.3.1.1.8 Click on ‘Reset’ button to restore default result display.
192 | 2.3.1.1.9. Summary: In this box, the number of identified CSMs will be displayed as well as the calculated FDR.
193 | 2.3.1.2. Residue Pair level: On this tab, the residue pairs will be displayed according to the specified filter parameters (Figure 11).
194 |

195 | Figure 11: Residue Pairs tab
196 | 2.3.1.2.1 Score: Specify the score cutoff. All Residue Pairs with a score greater than ‘Score’ will be displayed.
197 | 2.3.1.2.2 Search: Type the α or/and β (separated by '-') peptide as well as the protein 1 or/and protein 2 (separated by '-') or even gene 1 or/and gene 2 (separated by '-') to be displayed. PS: Type at least four characters.
198 | 2.3.1.2.3 Show inter-protein links only: Check this option to display only the Residue Pairs that belong to inter-protein interactions.
199 | 2.3.1.2.4 Show decoys: Check this option to display decoy identifications.
200 | 2.3.1.2.5 Click on ‘Filter’ button or press Enter to perform the filter.
201 | 2.3.1.2.6 Click on ‘Reset’ button to restore the results.
202 | 2.3.1.2.7. Summary: In this box, the number of identified Residue Pairs will be displayed as well as the calculated FDR.
203 | 2.3.1.3. PPI level: On this tab, the PPIs will be displayed according to the specified filter parameters (Figure 12)
204 |

205 | Figure 12: PPIs tab
206 | 2.3.1.3.1 Score: Specify the score cutoff. All PPIs with a score greater than ‘Score’ will be displayed.
207 | 2.3.1.3.2 Search: Type protein 1 or/and protein 2 (separated by '-') as well as gene 1 or/and gene 2 (separated by '-') to be displayed. PS: Type at least four characters.
208 | 2.3.1.3.3 Show inter-protein links only: Check this option to display only the identifications that belong to inter-protein interactions.
209 | 2.3.1.3.4 Show decoys: Check this option to display decoy identifications.
210 | 2.3.1.3.5 Group PPIs by gene: Check this option to group all protein-protein interactions by gene name.
211 | 2.3.1.3.6 Click on ‘Filter’ button or press Enter to filter the results.
212 | 2.3.1.3.7 Click on ‘Reset’ button to restore the results.
213 | 2.3.1.3.8. Summary: in this box, the number of identified PPIs will be displayed as well as the calculated FDR.
214 | 2.3.2 Parameters: Both search and post processing parameters used in the search can be visualized on this tab. (Figure 13a and b)
215 |

216 | Figure 13: Search and post processing parameters can be visualized on this tab (Figure 13a and 13b, respectively).
217 | 2.3.2.1 Post processing parameters: The parameters used to perform FDR on CSM, Residue Pair and PPI levels can be modified to improve the results. To do so, click on 'Edit' button and change the parameters (Figure 13 b) (Similar to 2.2.6). Afterwards, a new filter will be performed.
218 | 2.3.3 Open Results: New Scout results can be opened (*.scout file). To do so, go to File → Open Results (or press CTRL + O), as can be seen in Figure 14a. PS: Multiple files can be opened if all of them used the same parameters in the search.
219 | ⇒ Results can also be opened from the Scout starting page by clicking on File menu → Open Results (or pressing CTRL + O).
220 | 2.3.4 Save Results: The current results can be saved to preserve them. To do so, go to File → Save → Results (or press CTRL + S), as can be seen in Figure 14a.
221 |

222 | Figure 14a: Open and Save results as well as the parameters used in the search.
223 | 2.3.4.1 Save Results as mzIdentML file: The current results can also be saved in mzIdentML 1.2 or mzIdentML 1.3 format. To do so, after going to ‘Save Results’, a new window will open ('Save as'), then change ‘Save as type’ to mzIdentML 1.2 (or 1.3) Result File (.mzid), as can be seen in Figure 14b. Type a file name and click on 'Save' (or press enter).
224 | ⇒ PS: Besides the mzIdentML file, a *-specID.ms2 file will also be saved, which holds all the identified MS/MS spectra. Both files are required to proceed with the 'Complete Submission' in the PRIDE[1] system.
225 |

226 | Figure 14b: Save the results in mzIdentML 1.2 or mzIdentML 1.3 format.
227 | 2.3.5 Save Parameters: the search and post processing parameters used in the search can be exported. To do so, go to File → Save → Parameters (or press ALT + W), as can be seen in Figure 15.
228 | 2.3.6 Report: Scout allows to export displayed reports, such as CSMs (filtered results), Residue Pairs, PPIs and unfiltered CSMs as well as the import file used on XlinkCyNET to visualize the protein-protein interaction network. (Figure 15)
229 |

230 | Figure 15: Export reports as well as the input file used on XlinkCyNET.
231 | 2.3.7 Reprocess FDR: The results can be filtered again by using the current post-processing parameters (that can be modified, see item 2.3.2). To do so, go to Tools → Reprocess FDR (or press ALT + F).(Figure 16)
232 |

233 | Figure 16: Reprocess FDR, Import spectra and Statistical analysis features accessed by Tools menu.
234 | 2.3.8
Import Spectra: If the option ‘
Save spectra in results file’ is unchecked (see item
2.2.5.22.1), the identified spectra will not be displayed if the RAW file is not present in the same directory of the results. To import the identified spectra, go to Tools → Import Spectra (or press CTRL+ I) and specify where the RAW files are. (
Figure 16)
235 | 2.3.9 Statistics: The user can obtain some statistical analysis from the results, such as, the precursor charge distribution (Figure 17a) as well as reaction sites distribution (Figure 17b) based on the identified cross-links. To do so, go to Tools → Statistical analysis (or press CTRL + Y). (Figure 16)
236 | 
237 | Figure 17a: Precursor charge distribution of the identified cross-links.
238 | 
239 | Figure 17b: Reaction sites distribution taking into account all identified cross-links.
240 |
241 | 2.4. Filter from the Scout starting page
242 | The results can be filtered again with a different FDR from the one that was used for the first round by I) switching to the tab ‘Filter’; II) specifying the FASTA file; III) selecting the folder that contains the identification files (*.buf); IV) selecting the folder where the new resuts will be saved; V) modifying the post-processing parameters (see item 2.2.6); and clicking on ‘Filter’ button (Figure 18). When the filter is finished, a result window opens (see item 2.3).
243 | ⇒ To stop the filter, click on 'Cancel' button and confirm.
244 | 
245 | Figure 18: Filter tab window
246 | 2.5. Check for updates
247 | Scout checks for updates on software startup. Additionally, on Help → Check for updates, the user can visualize all releases (and their notes) as well as whether Scout is updated. If the current Scout version is not up-to-date, users will have the option to update within this window. (Figure 19)
248 | 
249 | Figure 19: Check for updates window.
250 | 2.6. Automation
251 | Scout supports automation from CLI (Command Line Interface).
252 | 2.6.1. Windows platform
253 | 2.6.1.1 To do so, open a Terminal (press Win+R, type cmd and press enter).
254 | 2.6.1.1.1 Navigate to the directory where Scout has been installed, e.g., cd C:\\Program Files\\Scout.
255 | 2.6.1.2 To start a search, the following arguments are required:
256 | 2.6.1.2.1 scout.exe -search search_params_file.json filter_params_file.json.
257 | 2.6.1.2.1.1 In search_params_file.json, the following parameters must be filled:
258 | 2.6.1.2.1.1.1 FastaFile: The database file needs to be defined with its directory, e.g., C:\\my_search\\my_db.fasta (for Windows); /home/my_user/Documents/my_db.fasta (for Linux); /Users/my_user/Documents/my_db.fasta (for macOS).
259 | 2.6.1.2.1.1.2 RawPath: The raw file(s) need(s) to be defined with its directory, e.g., C:\\my_search\\my_raw_file.raw (for Windows); /home/my_user/Documents/my_raw_file.raw (for Linux); /Users/my_user/Documents/my_raw_file.raw (for macOS). Multiple raw files must be separeted with ';', e.g., C:\\my_search\\my_raw_file_1.raw;C:\\my_search\\my_raw_file_2.raw;C:\\my_search\\my_raw_file_3.raw. A folder where the raw files are can be defined instead of raw file(s). In this case, define only the directory, e.g., C:\\my_search (for Windows); /home/my_user/Documents/scout_results (for Linux); /Users/my_user/Documents/scout_results (for macOS).
260 | 2.6.1.2.1.1.3 OutputFolder: Define a folder where the results will be saved, e.g., C:\my_search\results.
261 | ⇒ The search_params_file.json can be generated according to 2.2.5.17.
262 | 2.6.1.2.1.2 In filter_params_file.json, the following parameters must be filled:
263 | 2.6.1.2.1.2.1 CSM_FDR, ResPair_FDR, PPI_FDR: The FDR values need to be defined with a value between 0 and 1.
264 | ⇒ The filter_params_file.json can be generated according to 2.2.6.7.
265 | ⇒ Once the search starts, a log file is generated in the output folder.
266 | ⇒ In the end, four files will be generated: filtered_csms.csv, filtered_looplinks.csv, filtered_residue_pairs.csv and filtered_ppis.csv.
267 | 2.6.1.3 To filter only the results, the following arguments are required:
268 | 2.6.1.3.1 scout.exe -filter filter_params_file -fasta fasta_file -i path_search_result_files -o path_to_output_files.
269 | 2.6.1.3.1.1
filter_params_file.json: Similar to
2.6.1.2.1.2
270 | 2.6.1.3.1.2 fasta_file: Similar to 2.6.1.2.1.1.1
271 | 2.6.1.3.1.3 path_search_result_files: Define a directory where the search resuts files (*.buf) are.
272 | 2.6.1.3.1.4 path_to_output_files: Similar to 2.6.1.2.1.1.3
273 | ⇒ Once the filter starts, a log file is generated in the output folder.
274 | ⇒ In the end, four files will be generated: filtered_csms.csv, filtered_looplinks.csv, filtered_residue_pairs.csv and filtered_ppis.csv.
275 | 2.6.1.4 Scout also allows to export unfiltered CSMs. To do so, the following arguments are required:
276 | 2.6.1.4.1 scout.exe -unfiltered -i scout_file -o path_to_output_file.
277 | 2.6.1.4.1.1 scout_file: *.scout generated at the end of the process.
278 | 2.6.1.4.1.2 path_to_output_file: Similar to 2.6.1.2.1.1.3
279 | 2.6.2. Linux and macOS platform
280 | 2.6.2.1 Open a Terminal and navigate to the directory where Scout has been uncompressed, e.g., cd /home/my_user/Documents/Scout (for Linux); cd /Users/my_user/Documents/Scout (for macOS).
281 | 2.6.2.2 Install MPFR and GMP libraries
282 | 2.6.2.2.1 Linux: sudo apt install libmpfr-dev libgmp-dev
283 | 2.6.2.2.1.1 Check where the libraries have been installed: whereis libmpfr.so
284 | 2.6.2.2.1.2 If the path is not one of these:
285 | "/usr/lib"
286 | "/usr/local/lib"
287 | "/lib"
288 | "/lib64"
289 | "/usr/lib64"
290 | "/usr/lib/x86_64-linux-gnu"
291 | "/opt/homebrew/lib"
292 | edit run_scout.sh in the line 79 to add the path of libmpfr.so.
293 | 2.6.2.2.2 macOS: brew install gmp mpfr
294 | 2.6.2.3 Give permission to run_scout.sh
295 | 2.6.2.3.1 To do so, type chmod +x run_scout.sh and press enter.
296 | 2.6.2.4 To start a search, the following arguments are required:
297 | 2.6.2.4.1 ./run_scout.sh -search search_params_file.json filter_params_file.json.
298 | 2.6.2.4.2 Instructions similar to 2.6.1.2.1.1.
299 | ⇒ Once the search starts, a log file is generated in the output folder.
300 | ⇒ In the end, four files will be generated: filtered_csms.csv, filtered_looplinks.csv, filtered_residue_pairs.csv and filtered_ppis.csv.
301 | 2.6.2.5 To filter only the results, the following arguments are required:
302 | 2.6.2.5.1 ./run_scout.sh -filter filter_params_file -fasta fasta_file -i path_search_result_files -o path_to_output_files.
303 | 2.6.2.5.2 Instructions similar to 2.6.1.3.1.1.
304 | ⇒ Once the filter starts, a log file is generated in the output folder.
305 | ⇒ In the end, four files will be generated: filtered_csms.csv, filtered_looplinks.csv, filtered_residue_pairs.csv and filtered_ppis.csv.
306 | 2.6.2.6 Scout also allows to export unfiltered CSMs. To do so, the following arguments are required:
307 | 2.6.2.6.1 ./run_scout.sh -unfiltered -i scout_file -o path_to_output_file.
308 | 2.6.2.6.2 Instructions similar to 2.6.1.4.1.1.
309 | 2.6.4 Troubleshooting
310 | 2.6.4.1 Search/Filter does not start: Check the above mentioned parameters are not duplicated in the json files.
311 | 2.6.4.2 Terminal opens and closes quickly: Make sure Python is installed on your machine. Check 2.6.4.1.
312 | 2.6.4.3 Log file is saved in the root path of the partition: OutputFolder parameter in search_params_file.json is null or duplicated in the file.
313 |
314 | # Closing remarks
315 | In conclusion, Scout is a powerful tool for identifying protein-protein interactions using cleavable cross-linkers in proteomic datasets. Its user-friendly interface, customizable search and post-processing parameters, and multiple filtering options make it a versatile tool for protein interaction analysis. Scout can be particularly useful for studying complex biological systems when identifying protein-protein interactions is crucial for understanding their function. Overall, Scout provides a valuable resource for researchers interested in studying protein-protein interactions at a large scale.
316 |
317 | # References
318 | [1] J. A. Vizcaíno et al., “The PRoteomics IDEntifications (PRIDE) database and associated tools: status in 2013,” Nucleic Acids Res., vol. 41, no. Database issue, pp. D1063-1069, Jan. 2013, doi:
10.1093/nar/gks1262.
319 | [2] Y. Perez-Riverol et al., “PRIDE Inspector Toolsuite: Moving Toward a Universal Visualization Tool for Proteomics Data Standard Formats and Quality Assessment of ProteomeXchange Datasets,” Mol. Cell Proteomics, vol. 15, no. 1, pp. 305–317, Jan. 2016, doi:
10.1074/mcp.O115.050229.
320 | [3] D. B. Lima, Y. Zhu, and F. Liu, “XlinkCyNET: A Cytoscape Application for Visualization of Protein Interaction Networks Based on Cross-Linking Mass Spectrometry Identifications,” J. Proteome Res., vol. 20, no. 4, pp. 1943–1950, Apr. 2021, doi:
10.1021/acs.jproteome.0c00957.
321 | [4] P. Shannon et al., “Cytoscape: A Software Environment for Integrated Models of Biomolecular Interaction Networks,” Genome Res., vol. 13, no. 11, pp. 2498–2504, Nov. 2003, doi:
10.1101/gr.1239303.
322 | [5] M. A. Clasen et al., “Increasing confidence in proteomic spectral deconvolution through mass defect,” Bioinformatics, vol. 38, no. 22, pp. 5119–5120, Nov. 2022, doi:
10.1093/bioinformatics/btac638.
323 |
--------------------------------------------------------------------------------