├── RawFileReaderLicense.doc ├── .gitignore ├── ThermoRawFileParserTest ├── Data │ ├── small.RAW │ ├── small2.RAW │ ├── xic_in.json │ ├── ExtensionTest.tsv │ └── small.json ├── OntologyMappingTests.cs ├── Xic │ └── print_json_xics.py ├── Query │ └── print_spectrum_query.py ├── ThermoRawFileParserTest.csproj ├── UtilTests.cs ├── QueryTests.cs └── XicReaderTests.cs ├── ThermoRawFileParser ├── LogFormat.cs ├── OutputFormat.cs ├── RawFileParserException.cs ├── XIC ├── XicOutputMeta.cs ├── XicData.cs ├── XicMeta.cs ├── XicParameters.cs ├── JSONInputUnit.cs ├── XicUnit.cs ├── XicExecutor.cs ├── JSONParser.cs └── XicReader.cs ├── Writer ├── ISpectrumWriter.cs ├── PrecursorInfo.cs ├── WriterUtil.cs ├── S3Loader.cs ├── MzML │ └── mzML1.1.1_idx.xsd ├── ScanTrailer.cs ├── Metadata.cs ├── MgfSpectrumWriter.cs ├── ParquetSpectrumWriter.cs └── SpectrumWriter.cs ├── Util ├── CVHelpers.cs ├── Peptide.cs ├── NativeMethods.cs ├── LimitedSizeDictionary.cs └── GeneralHelpers.cs ├── Query ├── ProxiSpectrum.cs ├── QueryExecutor.cs ├── QueryParameters.cs └── ProxiSpectrumReader.cs ├── .github └── workflows │ ├── buildandtest.yml │ └── release.yml ├── App.config ├── ThermoRawFileParser.sln ├── ThermoRawFileParser.csproj ├── ParseInput.cs ├── THERMO_LICENSE ├── RawFileParser.cs ├── LICENSE └── README.md /RawFileReaderLicense.doc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ThermoRawFileParser/HEAD/RawFileReaderLicense.doc -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | bin/ 3 | obj/ 4 | *.log 5 | *.sln.DotSettings.user 6 | .vs/ 7 | *.csproj.user 8 | .vscode/ 9 | Properties/ -------------------------------------------------------------------------------- /ThermoRawFileParserTest/Data/small.RAW: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ThermoRawFileParser/HEAD/ThermoRawFileParserTest/Data/small.RAW -------------------------------------------------------------------------------- /ThermoRawFileParserTest/Data/small2.RAW: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CompOmics/ThermoRawFileParser/HEAD/ThermoRawFileParserTest/Data/small2.RAW -------------------------------------------------------------------------------- /ThermoRawFileParser: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | echo "Analysing Thermo RW file input" 3 | mono /home/biodocker/bin/bin/x64/Debug/ThermoRawFileParser.exe "$@" 4 | -------------------------------------------------------------------------------- /LogFormat.cs: -------------------------------------------------------------------------------- 1 | namespace ThermoRawFileParser 2 | { 3 | public enum LogFormat 4 | { 5 | SILENT, 6 | VERBOSE, 7 | DEFAULT, 8 | WARNING, 9 | ERROR 10 | } 11 | 12 | } -------------------------------------------------------------------------------- /OutputFormat.cs: -------------------------------------------------------------------------------- 1 | namespace ThermoRawFileParser 2 | { 3 | public enum OutputFormat 4 | { 5 | MGF, 6 | MzML, 7 | IndexMzML, 8 | Parquet, 9 | None 10 | } 11 | 12 | public enum MetadataFormat 13 | { 14 | JSON, 15 | TXT, 16 | None 17 | } 18 | } -------------------------------------------------------------------------------- /RawFileParserException.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Runtime.Serialization; 3 | 4 | namespace ThermoRawFileParser 5 | { 6 | public class RawFileParserException : Exception 7 | { 8 | public RawFileParserException() 9 | { 10 | } 11 | 12 | public RawFileParserException(string message) : base(message) 13 | { 14 | } 15 | 16 | public RawFileParserException(string message, Exception innerException) : base(message, innerException) 17 | { 18 | } 19 | } 20 | } -------------------------------------------------------------------------------- /XIC/XicOutputMeta.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | 3 | namespace ThermoRawFileParser.XIC 4 | { 5 | public class XicOutputMeta 6 | { 7 | public bool base64 { get; set; } 8 | public string timeunit { get; set; } 9 | 10 | public XicOutputMeta() 11 | { 12 | base64 = false; 13 | timeunit = String.Empty; 14 | } 15 | 16 | public XicOutputMeta(XicOutputMeta copy) 17 | { 18 | base64 = copy.base64; 19 | timeunit = copy.timeunit; 20 | } 21 | } 22 | } -------------------------------------------------------------------------------- /ThermoRawFileParserTest/Data/xic_in.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "mz": 488.5384, 4 | "tolerance": 2, 5 | "tolerance_unit": "da", 6 | "filter": "ms2" 7 | }, 8 | { 9 | "mz": 570.2413, 10 | "tolerance": 1000, 11 | "tolerance_unit": "ppm", 12 | "rt_start": 20, 13 | "rt_end": 80 14 | }, 15 | { 16 | "mz_start": 749.7860, 17 | "mz_end": 752.8093, 18 | "rt_start": 20, 19 | "rt_end": 80 20 | }, 21 | { 22 | "sequence": "PEPTIDE", 23 | "tolerance": 0.5, 24 | "tolerance_unit": "amu" 25 | } 26 | ] 27 | -------------------------------------------------------------------------------- /Writer/ISpectrumWriter.cs: -------------------------------------------------------------------------------- 1 | using ThermoFisher.CommonCore.Data.Interfaces; 2 | 3 | namespace ThermoRawFileParser.Writer 4 | { 5 | public interface ISpectrumWriter 6 | { 7 | /// 8 | /// Write the RAW files' spectra to a file. 9 | /// 10 | /// the RAW file interface 11 | /// the first scan number 12 | /// the last scan number 13 | void Write(IRawDataPlus rawFile, int firstScanNumber, int lastScanNumber); 14 | } 15 | } -------------------------------------------------------------------------------- /XIC/XicData.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | 3 | namespace ThermoRawFileParser.XIC 4 | { 5 | public class XicData 6 | { 7 | public XicOutputMeta OutputMeta { get; set; } 8 | public List Content { get; set; } 9 | 10 | public XicData() 11 | { 12 | OutputMeta = new XicOutputMeta(); 13 | 14 | Content = new List(); 15 | } 16 | 17 | public XicData(XicData copy) 18 | { 19 | OutputMeta = new XicOutputMeta(copy.OutputMeta); 20 | 21 | Content = new List(); 22 | foreach (XicUnit unit in copy.Content) 23 | { 24 | Content.Add(new XicUnit(unit)); 25 | } 26 | } 27 | } 28 | } -------------------------------------------------------------------------------- /ThermoRawFileParserTest/Data/ExtensionTest.tsv: -------------------------------------------------------------------------------- 1 | Format Gzip UserInput FileOutput 2 | MGF FALSE rawfile rawfile.mgf 3 | MGF FALSE rawfile. rawfile.mgf 4 | MGF FALSE rawfile.mgf rawfile.mgf 5 | MGF FALSE rawfile.MGF rawfile.mgf 6 | MGF FALSE rawfile.. rawfile.mgf 7 | MGF TRUE rawfile.mgf.gz rawfile.mgf.gz 8 | MGF TRUE rawfile rawfile.mgf.gz 9 | MGF TRUE rawfile.. rawfile.mgf.gz 10 | MGF TRUE rawfile.mgf.GZ rawfile.mgf.gz 11 | MGF TRUE rawfile.mgf rawfile.mgf.gz 12 | MGF TRUE rawfile.MGF. rawfile.mgf.gz 13 | mzML FALSE rawfile rawfile.mzML 14 | mzML FALSE rawfile. rawfile.mzML 15 | mzML FALSE rawfile.mgf rawfile.mgf.mzML 16 | mzML FALSE rawfile.MzML rawfile.mzML 17 | mzML FALSE rawfile.. rawfile.mzML 18 | mzML TRUE rawfile.mzML.gz rawfile.mzML.gz 19 | mzML TRUE rawfile rawfile.mzML.gz 20 | mzML TRUE rawfile.. rawfile.mzML.gz 21 | mzML TRUE rawfile.mzml.GZ rawfile.mzML.gz 22 | mzML TRUE rawfile.mzML rawfile.mzML.gz 23 | mzML TRUE rawfile.MZml. rawfile.mzML.gz 24 | -------------------------------------------------------------------------------- /ThermoRawFileParserTest/OntologyMappingTests.cs: -------------------------------------------------------------------------------- 1 | using NUnit.Framework; 2 | using ThermoRawFileParser.Writer; 3 | 4 | namespace ThermoRawFileParserTest 5 | { 6 | [TestFixture] 7 | public class OntologyMappingTests 8 | { 9 | [Test] 10 | public void TestGetInstrumentModel() 11 | { 12 | // exact match 13 | var match = OntologyMapping.GetInstrumentModel("LTQ Orbitrap"); 14 | Assert.That(match.accession, Is.EqualTo("MS:1000449")); 15 | // partial match, should return the longest partial match 16 | var partialMatch = OntologyMapping.GetInstrumentModel("LTQ Orbitrap XXL"); 17 | Assert.That(partialMatch.accession, Is.EqualTo("MS:1000449")); 18 | // no match, should return the generic thermo instrument 19 | var noMatch = OntologyMapping.GetInstrumentModel("non existing model"); 20 | Assert.That(noMatch.accession, Is.EqualTo("MS:1000483")); 21 | } 22 | } 23 | } -------------------------------------------------------------------------------- /ThermoRawFileParserTest/Xic/print_json_xics.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | 6 | if len(sys.argv) < 2: 7 | print("usage:", sys.argv[0], "input-json") 8 | exit() 9 | 10 | with open(sys.argv[1], 'r') as f: 11 | loaded_json = json.load(f) 12 | 13 | 14 | fig = plt.figure() 15 | 16 | #base64.standard_b64decode(s)¶ 17 | 18 | for content in loaded_json["content"]: 19 | if isinstance(content["X"], str): 20 | import base64 21 | import struct 22 | times = base64.standard_b64decode(content["X"]) 23 | intens = base64.standard_b64decode(content["Y"]) 24 | 25 | times = struct.unpack('d' * (len(times) >> 3), times) 26 | intens = struct.unpack('d' * (len(intens) >> 3), intens) 27 | 28 | 29 | else: 30 | times = content["X"] 31 | intens = content["Y"] 32 | 33 | plt.plot(times, intens) 34 | 35 | fig.tight_layout() 36 | plt.grid(True) 37 | plt.show() 38 | -------------------------------------------------------------------------------- /Util/CVHelpers.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using ThermoRawFileParser.Writer.MzML; 3 | 4 | namespace ThermoRawFileParser.Util 5 | { 6 | public static class CVHelpers 7 | { 8 | public static CVParamType Copy (this CVParamType old) 9 | { 10 | return new CVParamType 11 | { 12 | accession = old.accession, 13 | name = old.name, 14 | cvRef = old.cvRef, 15 | unitAccession = old.unitAccession, 16 | unitCvRef = old.unitCvRef, 17 | unitName = old.unitName, 18 | value = old.value 19 | }; 20 | } 21 | } 22 | 23 | public class CVComparer : IEqualityComparer 24 | { 25 | public bool Equals(CVParamType cv1, CVParamType cv2) 26 | { 27 | return cv1.accession == cv2.accession; 28 | } 29 | 30 | public int GetHashCode(CVParamType cv) 31 | { 32 | return cv.accession.GetHashCode(); 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /ThermoRawFileParserTest/Data/small.json: -------------------------------------------------------------------------------- 1 | { 2 | "output_meta":{ 3 | "base64":false, 4 | "time_unit":"min" 5 | }, 6 | "content":[ 7 | { 8 | "meta":{ 9 | "mz":488.5384, 10 | "tolerance":10, 11 | "tolerance_unit":"ppm" 12 | }, 13 | "x":[ 14 | 1, 15 | 2, 16 | 3 17 | ], 18 | "y":[ 19 | 2, 20 | 3, 21 | 4 22 | ] 23 | }, 24 | { 25 | "meta":{ 26 | "mz":575.2413, 27 | "tolerance":10, 28 | "tolerance_unit":"ppm" 29 | }, 30 | "x":[ 31 | 1, 32 | 2, 33 | 3 34 | ], 35 | "y":[ 36 | 2, 37 | 3, 38 | 4 39 | ] 40 | }, 41 | { 42 | "meta":{ 43 | "mz_start":749.7860, 44 | "mz_end":749.8093, 45 | "rt_start":10.5, 46 | "rt_end":11.0 47 | }, 48 | "x":[ 49 | 1, 50 | 2, 51 | 3 52 | ], 53 | "y":[ 54 | 2, 55 | 3, 56 | 4 57 | ] 58 | } 59 | ] 60 | } -------------------------------------------------------------------------------- /Query/ProxiSpectrum.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using System.Linq; 3 | using ThermoRawFileParser.Writer; 4 | 5 | namespace ThermoRawFileParser.Query 6 | { 7 | public class ProxiSpectrum 8 | { 9 | public List mzs { get; set; } 10 | public List intensities { get; set; } 11 | public List attributes { get; set; } 12 | 13 | public ProxiSpectrum() 14 | { 15 | mzs = new List(); 16 | intensities = new List(); 17 | attributes = new List(); 18 | } 19 | 20 | public void AddAttribute(string accession=null, string cvGroup=null, string name=null, string value=null, string valueAccession=null) 21 | { 22 | attributes.Add(new ProxiCvTerm(accession, cvGroup, name, value, valueAccession)); 23 | } 24 | 25 | public void AddMz(IList mzList) 26 | { 27 | mzs = mzList.ToList(); 28 | } 29 | 30 | public void AddIntensities(IList intList) 31 | { 32 | intensities = intList.ToList(); 33 | } 34 | 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /.github/workflows/buildandtest.yml: -------------------------------------------------------------------------------- 1 | name: buildtest 2 | 3 | on: 4 | push: 5 | paths: 6 | - '**.cs' 7 | - '.github/workflows/*.yml' 8 | pull_request: 9 | paths: 10 | - '**.cs' 11 | - '.github/workflows/*.yml' 12 | 13 | jobs: 14 | buildandtest: 15 | 16 | runs-on: 'ubuntu-latest' 17 | 18 | steps: 19 | - uses: actions/checkout@v4 20 | with: 21 | path: 'main' 22 | 23 | - name: Setup .NET 24 | uses: actions/setup-dotnet@v4 25 | with: 26 | dotnet-version: 8.0.x 27 | 28 | - name: Get Thermo packages 29 | uses: actions/checkout@v4 30 | with: 31 | repository: 'thermofisherlsms/RawFileReader' 32 | path: 'ThermoPKG' 33 | token: ${{ secrets.GITHUB_TOKEN }} 34 | sparse-checkout: 'Libs/NetCore/Net8/*.nupkg' 35 | sparse-checkout-cone-mode: 'false' 36 | 37 | - name: Add Thermo packages as NuGet source 38 | run: | 39 | dotnet nuget add source -n ThermoPKG `pwd`/ThermoPKG/Libs/NetCore/Net8 40 | 41 | - name: Build and test 42 | run: | 43 | cd main 44 | dotnet build . 45 | dotnet test . 46 | -------------------------------------------------------------------------------- /Writer/PrecursorInfo.cs: -------------------------------------------------------------------------------- 1 | namespace ThermoRawFileParser.Writer 2 | { 3 | /// 4 | /// Class that stores info from precursors 5 | /// 6 | public class PrecursorInfo 7 | { 8 | //Current MSLevel 9 | public int MSLevel { get; } 10 | 11 | //precursor scan number, 0 - means not a precursor 12 | public int Scan { get; } 13 | 14 | //technical field to store number of reactions the precursor has 15 | //every level of SA counts as additional reaction and thus we need to keep track of it 16 | public int ReactionCount { get; } 17 | 18 | //mzML-formatted precursor information for all levels 19 | public MzML.PrecursorType[] Precursors { get ; } 20 | 21 | public PrecursorInfo() 22 | { 23 | Scan = 0; 24 | ReactionCount = 0; 25 | Precursors = new MzML.PrecursorType[0]; 26 | } 27 | 28 | public PrecursorInfo(int scan, int level, int reactionCount, MzML.PrecursorType[] precursors) 29 | { 30 | Scan = scan; 31 | MSLevel = level; 32 | ReactionCount = reactionCount; 33 | Precursors = precursors; 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /Writer/WriterUtil.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using ThermoFisher.CommonCore.Data.FilterEnums; 5 | using ThermoFisher.CommonCore.Data.Interfaces; 6 | 7 | namespace ThermoRawFileParser.Writer 8 | { 9 | static public class WriterUtil 10 | { 11 | public static Dictionary CountScanOrder(IRawDataPlus rawFile, int firstScan, int lastScan) 12 | { 13 | var scanOrderCounts = new Dictionary(); 14 | foreach (MSOrderType item in Enum.GetValuesAsUnderlyingType(typeof(MSOrderType))) 15 | { 16 | scanOrderCounts[item] = 0; 17 | } 18 | 19 | for (int scan=firstScan; scan <=lastScan; scan++) 20 | { 21 | var filter = rawFile.GetFilterForScanNumber(scan); 22 | scanOrderCounts[filter.MSOrder] += 1; 23 | } 24 | 25 | scanOrderCounts[MSOrderType.Any] = scanOrderCounts.Values.Sum(); 26 | 27 | return scanOrderCounts; 28 | } 29 | 30 | public static Dictionary CountScanOrder(IRawDataPlus rawFile) 31 | { 32 | return CountScanOrder(rawFile, rawFile.RunHeaderEx.FirstSpectrum, rawFile.RunHeaderEx.LastSpectrum); 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /App.config: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 |
6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 30 | 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /ThermoRawFileParserTest/Query/print_spectrum_query.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | 6 | if len(sys.argv) < 2: 7 | print("usage:", sys.argv[0], "input-json") 8 | exit() 9 | 10 | with open(sys.argv[1], 'r') as f: 11 | loaded_json = json.load(f) 12 | 13 | 14 | fig = plt.figure() 15 | 16 | 17 | for content in loaded_json: 18 | if isinstance(content["mzs"], str): 19 | import base64 20 | import struct 21 | masses = base64.standard_b64decode(content["mzs"]) 22 | intens = base64.standard_b64decode(content["intensities"]) 23 | 24 | masses = struct.unpack('d' * (len(masses) >> 3), masses) 25 | intens = struct.unpack('d' * (len(intens) >> 3), intens) 26 | 27 | 28 | else: 29 | masses = content["mzs"] 30 | intens = content["intensities"] 31 | 32 | label = [] 33 | for attribute in content["attributes"]: 34 | if attribute["accession"] == "MS:10003057": label.append("scan #%s" % attribute["value"]) 35 | elif attribute["accession"] == "MS:1000511": label.append("MS Level %s" % attribute["value"]) 36 | 37 | plt.plot(masses, intens, label = ", ".join(label)) 38 | 39 | fig.tight_layout() 40 | plt.legend(loc='upper right') 41 | plt.grid(True) 42 | plt.show() 43 | -------------------------------------------------------------------------------- /ThermoRawFileParser.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.9.34902.65 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ThermoRawFileParser", "ThermoRawFileParser.csproj", "{A1AB1BD8-180D-49FD-AFBB-9515EE1E862B}" 7 | EndProject 8 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ThermoRawFileParserTest", "ThermoRawFileParserTest\ThermoRawFileParserTest.csproj", "{5D42807F-E856-42D7-9D3E-0A17584A9D55}" 9 | EndProject 10 | Global 11 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 12 | Debug|x64 = Debug|x64 13 | Release|x64 = Release|x64 14 | EndGlobalSection 15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 16 | {A1AB1BD8-180D-49FD-AFBB-9515EE1E862B}.Debug|x64.ActiveCfg = Debug|x64 17 | {A1AB1BD8-180D-49FD-AFBB-9515EE1E862B}.Debug|x64.Build.0 = Debug|x64 18 | {A1AB1BD8-180D-49FD-AFBB-9515EE1E862B}.Release|x64.ActiveCfg = Release|x64 19 | {A1AB1BD8-180D-49FD-AFBB-9515EE1E862B}.Release|x64.Build.0 = Release|x64 20 | {5D42807F-E856-42D7-9D3E-0A17584A9D55}.Debug|x64.ActiveCfg = Debug|x64 21 | {5D42807F-E856-42D7-9D3E-0A17584A9D55}.Debug|x64.Build.0 = Debug|x64 22 | {5D42807F-E856-42D7-9D3E-0A17584A9D55}.Release|x64.ActiveCfg = Release|x64 23 | {5D42807F-E856-42D7-9D3E-0A17584A9D55}.Release|x64.Build.0 = Release|x64 24 | EndGlobalSection 25 | GlobalSection(SolutionProperties) = preSolution 26 | HideSolutionNode = FALSE 27 | EndGlobalSection 28 | GlobalSection(ExtensibilityGlobals) = postSolution 29 | SolutionGuid = {6549F3E3-F5CF-4970-8527-E8E0594A0294} 30 | EndGlobalSection 31 | EndGlobal 32 | -------------------------------------------------------------------------------- /XIC/XicMeta.cs: -------------------------------------------------------------------------------- 1 | using System.ComponentModel; 2 | using Newtonsoft.Json; 3 | 4 | namespace ThermoRawFileParser.XIC 5 | { 6 | public class XicMeta 7 | { 8 | [JsonProperty(DefaultValueHandling = DefaultValueHandling.IgnoreAndPopulate)] 9 | [DefaultValue(null)] 10 | public double? MzStart { get; set; } 11 | 12 | [JsonProperty(DefaultValueHandling = DefaultValueHandling.IgnoreAndPopulate)] 13 | [DefaultValue(null)] 14 | public double? MzEnd { get; set; } 15 | 16 | [JsonProperty(DefaultValueHandling = DefaultValueHandling.IgnoreAndPopulate)] 17 | [DefaultValue(null)] 18 | public double? RtStart { get; set; } 19 | 20 | [JsonProperty(DefaultValueHandling = DefaultValueHandling.IgnoreAndPopulate)] 21 | [DefaultValue(null)] 22 | public double? RtEnd { get; set; } 23 | 24 | [JsonProperty(DefaultValueHandling = DefaultValueHandling.IgnoreAndPopulate)] 25 | [DefaultValue(null)] 26 | public string Filter { get; set; } 27 | 28 | [JsonProperty(DefaultValueHandling = DefaultValueHandling.IgnoreAndPopulate)] 29 | [DefaultValue(null)] 30 | public string Comment { get; set; } 31 | 32 | 33 | public XicMeta() 34 | { 35 | MzStart = null; 36 | MzEnd = null; 37 | RtStart = null; 38 | RtEnd = null; 39 | Filter = null; 40 | Comment = null; 41 | } 42 | 43 | public XicMeta(XicMeta copy) 44 | { 45 | MzStart = copy.MzStart; 46 | MzEnd = copy.MzEnd; 47 | RtStart = copy.RtStart; 48 | RtEnd = copy.RtEnd; 49 | Filter = copy.Filter; 50 | Comment = copy.Comment; 51 | } 52 | } 53 | } -------------------------------------------------------------------------------- /Query/QueryExecutor.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Data; 4 | using System.IO; 5 | using Newtonsoft.Json; 6 | 7 | namespace ThermoRawFileParser.Query 8 | { 9 | public class QueryExecutor 10 | { 11 | public static void Run(QueryParameters parameters) 12 | { 13 | 14 | var reader = new ProxiSpectrumReader(parameters); 15 | var results = reader.Retrieve(); 16 | 17 | if (parameters.stdout) 18 | { 19 | StdOutputQueryData(results); 20 | } 21 | else 22 | { 23 | string outputFileName; 24 | 25 | // if outputFile has been defined, put output there. 26 | if (parameters.outputFile != null) 27 | { 28 | outputFileName = Path.GetFullPath(parameters.outputFile); 29 | } 30 | // otherwise put output files into the same directory as the raw file input 31 | else 32 | { 33 | outputFileName = Path.GetFullPath(parameters.userFilePath); 34 | } 35 | 36 | var directory = Path.GetDirectoryName(outputFileName); 37 | 38 | outputFileName = Path.Combine(directory ?? throw new NoNullAllowedException(), 39 | Path.GetFileNameWithoutExtension(outputFileName) + ".json"); 40 | 41 | OutputQueryData(results, outputFileName); 42 | } 43 | } 44 | 45 | private static void OutputQueryData(List outputData, string outputFileName) 46 | { 47 | var outputString = JsonConvert.SerializeObject(outputData, Formatting.Indented); 48 | File.WriteAllText(outputFileName, outputString); 49 | } 50 | 51 | 52 | private static void StdOutputQueryData(List outputData) 53 | { 54 | var outputString = JsonConvert.SerializeObject(outputData, Formatting.Indented); 55 | Console.Write(outputString); 56 | } 57 | } 58 | } -------------------------------------------------------------------------------- /Util/Peptide.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text.RegularExpressions; 5 | 6 | namespace ThermoRawFileParser.Util 7 | { 8 | public static class PeptideData 9 | { 10 | public static readonly Dictionary AAMasses = new Dictionary 11 | { 12 | { 'G', 57.02146 }, 13 | { 'A', 71.03711 }, 14 | { 'S', 87.03203 }, 15 | { 'P', 97.05276 }, 16 | { 'V', 99.06841 }, 17 | { 'T', 101.04768 }, 18 | { 'C', 103.00919 }, 19 | { 'L', 113.08406 }, 20 | { 'I', 113.08406 }, 21 | { 'N', 114.04293 }, 22 | { 'D', 115.02694 }, 23 | { 'Q', 128.05858 }, 24 | { 'K', 128.09496 }, 25 | { 'E', 129.04259 }, 26 | { 'M', 131.04049 }, 27 | { 'H', 137.05891 }, 28 | { 'F', 147.06841 }, 29 | { 'U', 150.95364 }, 30 | { 'R', 156.10111 }, 31 | { 'Y', 163.06333 }, 32 | { 'W', 186.07931 }, 33 | { 'O', 237.14773 } 34 | }; 35 | 36 | public static readonly double Proton = 1.00727646677; 37 | public static readonly double H2O = 18.0105646837; 38 | 39 | public static readonly Regex invalidAA = new Regex(String.Format("[^{0}]", new String(PeptideData.AAMasses.Keys.ToArray()))); 40 | } 41 | public class Peptide 42 | { 43 | public string Sequence { get; } 44 | 45 | 46 | public Peptide() 47 | { 48 | 49 | } 50 | 51 | public Peptide(string sequence) 52 | { 53 | if (IsValidSequence(sequence)) Sequence = sequence; 54 | else throw new Exception("Sequence have unknow amino acids"); 55 | } 56 | 57 | public double GetMz(int z) 58 | { 59 | if (z == 0) throw new Exception("Charge cannot be zero!"); 60 | 61 | double mass = Sequence.ToCharArray().Select(c => PeptideData.AAMasses[c]).Sum() + PeptideData.H2O; 62 | return (mass + z * PeptideData.Proton) / Math.Abs(z); 63 | } 64 | 65 | private bool IsValidSequence(string sequence) 66 | { 67 | return !PeptideData.invalidAA.IsMatch(sequence); 68 | } 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /XIC/XicParameters.cs: -------------------------------------------------------------------------------- 1 | using System.Collections; 2 | 3 | namespace ThermoRawFileParser.XIC 4 | { 5 | public class XicParameters 6 | { 7 | private int _errors; 8 | private int _warnings; 9 | 10 | public bool help { get; set; } 11 | public ArrayList rawFileList { get; set; } 12 | public string jsonFilePath { get; set; } 13 | public ArrayList outputFileList { get; set; } 14 | public bool printJsonExample { get; set; } 15 | public bool base64 { get; set; } 16 | public bool stdout { get; set; } 17 | public bool Vigilant { get; set; } 18 | public int Errors { get => _errors; } 19 | public int Warnings { get => _warnings; } 20 | public LogFormat LogFormat { get; set; } 21 | 22 | public XicParameters() 23 | { 24 | help = false; 25 | rawFileList = new ArrayList(); 26 | jsonFilePath = null; 27 | outputFileList = new ArrayList(); 28 | printJsonExample = false; 29 | base64 = false; 30 | stdout = false; 31 | Vigilant = false; 32 | LogFormat = LogFormat.DEFAULT; 33 | _errors = 0; 34 | _warnings = 0; 35 | } 36 | 37 | public void NewError() 38 | { 39 | _errors++; 40 | } 41 | 42 | public void NewWarn() 43 | { 44 | _warnings++; 45 | } 46 | 47 | 48 | public XicParameters(XicParameters copy) 49 | { 50 | help = copy.help; 51 | rawFileList = new ArrayList(); 52 | foreach (string fileName in copy.rawFileList) 53 | { 54 | rawFileList.Add(fileName); 55 | } 56 | 57 | jsonFilePath = copy.jsonFilePath; 58 | outputFileList = copy.outputFileList; 59 | printJsonExample = copy.printJsonExample; 60 | base64 = copy.base64; 61 | stdout = copy.stdout; 62 | LogFormat = copy.LogFormat; 63 | Vigilant = copy.Vigilant; 64 | _errors = copy.Errors; 65 | _warnings = copy.Warnings; 66 | } 67 | } 68 | } -------------------------------------------------------------------------------- /Query/QueryParameters.cs: -------------------------------------------------------------------------------- 1 | namespace ThermoRawFileParser.Query 2 | { 3 | public class QueryParameters 4 | { 5 | private int _errors; 6 | 7 | private int _warnings; 8 | 9 | private string _rawFilePath; 10 | 11 | private string _userFilePath; 12 | 13 | public bool help { get; set; } 14 | public string rawFilePath 15 | { 16 | get => _rawFilePath; 17 | set 18 | { 19 | _rawFilePath = value; 20 | _userFilePath = value; 21 | } 22 | } 23 | 24 | public string userFilePath { get => _userFilePath; } 25 | 26 | public string scans { get; set; } 27 | public string outputFile { get; set; } 28 | public bool noPeakPicking { get; set; } 29 | public bool stdout { get; set; } 30 | public bool Vigilant { get; set; } 31 | public int Errors { get => _errors; } 32 | public int Warnings { get => _warnings; } 33 | public LogFormat LogFormat { get; set; } 34 | 35 | public QueryParameters() 36 | { 37 | help = false; 38 | rawFilePath = null; 39 | scans = ""; 40 | outputFile = null; 41 | noPeakPicking = false; 42 | stdout = false; 43 | Vigilant = false; 44 | LogFormat = LogFormat.DEFAULT; 45 | _errors = 0; 46 | _warnings = 0; 47 | } 48 | 49 | public QueryParameters(QueryParameters copy) 50 | { 51 | help = copy.help; 52 | rawFilePath = copy.rawFilePath; 53 | scans = copy.scans; 54 | outputFile = copy.outputFile; 55 | noPeakPicking = copy.noPeakPicking; 56 | stdout = copy.stdout; 57 | Vigilant = copy.Vigilant; 58 | LogFormat = copy.LogFormat; 59 | _errors = copy.Errors; 60 | _warnings = copy.Warnings; 61 | } 62 | 63 | public void NewError() 64 | { 65 | _errors++; 66 | } 67 | 68 | public void NewWarn() 69 | { 70 | _warnings++; 71 | } 72 | 73 | public void UpdateRealPath(string realPath) 74 | { 75 | _rawFilePath = realPath; 76 | } 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /ThermoRawFileParserTest/ThermoRawFileParserTest.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net8.0 6 | disable 7 | annotations 8 | 9 | x64 10 | x64 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | all 29 | runtime; build; native; contentfiles; analyzers; buildtransitive 30 | 31 | 32 | all 33 | runtime; build; native; contentfiles; analyzers; buildtransitive 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | PreserveNewest 45 | 46 | 47 | PreserveNewest 48 | 49 | 50 | PreserveNewest 51 | 52 | 53 | PreserveNewest 54 | 55 | 56 | PreserveNewest 57 | 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /ThermoRawFileParserTest/UtilTests.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text.RegularExpressions; 4 | using NUnit.Framework; 5 | using ThermoRawFileParser; 6 | using ThermoRawFileParser.Util; 7 | 8 | namespace ThermoRawFileParserTest 9 | { 10 | [TestFixture] 11 | public class UtilTests 12 | { 13 | [Test] 14 | public void TestRegex() 15 | { 16 | const string filterString = "ITMS + c NSI r d Full ms2 961.8803@cid35.00 [259.0000-1934.0000]"; 17 | const string pattern = @"ms2 (.*?)@"; 18 | 19 | Match result = Regex.Match(filterString, pattern); 20 | if (result.Success) 21 | { 22 | Assert.That(result.Groups[1].Value, Is.EqualTo("961.8803")); 23 | } 24 | else 25 | { 26 | Assert.Fail(); 27 | } 28 | } 29 | 30 | [Test] 31 | public void TestNumberIterator() 32 | { 33 | NumberIterator iterator; 34 | iterator = new NumberIterator(); 35 | Assert.That(new List(iterator.IterateScans()), 36 | Is.EqualTo(new List())); 37 | 38 | iterator = new NumberIterator("1, 2,3- 5, 7, 9 - 10", 1, 100); 39 | Assert.That(new List(iterator.IterateScans()), 40 | Is.EqualTo(new List { 1, 2, 3, 4, 5, 7, 9, 10 })); 41 | 42 | iterator = new NumberIterator(null, 1, 5); 43 | Assert.That(new List(iterator.IterateScans()), 44 | Is.EqualTo(new List { 1, 2, 3, 4, 5 })); 45 | 46 | iterator = new NumberIterator(" - ", 1, 5); 47 | Assert.That(new List(iterator.IterateScans()), 48 | Is.EqualTo(new List { 1, 2, 3, 4, 5 })); 49 | 50 | iterator = new NumberIterator("- 5, 9 - ", 1, 12); 51 | Assert.That(new List(iterator.IterateScans()), 52 | Is.EqualTo(new List { 1, 2, 3, 4, 5, 9, 10, 11, 12 })); 53 | 54 | Assert.Throws(typeof(Exception), () => new NumberIterator("1, 2, 2-5", 1, 10)); 55 | Assert.Throws(typeof(Exception), () => new NumberIterator("3, -5", 1, 10)); 56 | Assert.Throws(typeof(Exception), () => new NumberIterator("3 -,7", 1, 10)); 57 | Assert.Throws(typeof(Exception), () => new NumberIterator("a,-,7", 1, 10)); 58 | 59 | } 60 | 61 | } 62 | } -------------------------------------------------------------------------------- /XIC/JSONInputUnit.cs: -------------------------------------------------------------------------------- 1 | using System.ComponentModel; 2 | using Newtonsoft.Json; 3 | 4 | namespace ThermoRawFileParser.XIC 5 | { 6 | public class JSONInputUnit 7 | { 8 | [JsonProperty("mz_start", DefaultValueHandling = DefaultValueHandling.Populate)] 9 | [DefaultValue(null)] 10 | public double? MzStart { get; set; } 11 | 12 | [JsonProperty("mz_end", DefaultValueHandling = DefaultValueHandling.Populate)] 13 | [DefaultValue(null)] 14 | public double? MzEnd { get; set; } 15 | 16 | [JsonProperty("mz", DefaultValueHandling = DefaultValueHandling.Populate)] 17 | [DefaultValue(null)] 18 | public double? Mz { get; set; } 19 | 20 | [JsonProperty("sequence", DefaultValueHandling = DefaultValueHandling.Populate)] 21 | [DefaultValue("")] 22 | public string Sequence { get; set; } 23 | 24 | [JsonProperty("tolerance", DefaultValueHandling = DefaultValueHandling.Populate)] 25 | [DefaultValue(null)] 26 | public double? Tolerance { get; set; } 27 | 28 | [JsonProperty("tolerance_unit", DefaultValueHandling = DefaultValueHandling.Populate)] 29 | [DefaultValue("ppm")] 30 | public string ToleranceUnit { get; set; } 31 | 32 | [JsonProperty("charge", DefaultValueHandling = DefaultValueHandling.Populate)] 33 | [DefaultValue(1)] 34 | public int Charge { get; set; } 35 | 36 | [JsonProperty("rt_start", DefaultValueHandling = DefaultValueHandling.Populate)] 37 | [DefaultValue(null)] 38 | public double? RtStart { get; set; } 39 | 40 | [JsonProperty("rt_end", DefaultValueHandling = DefaultValueHandling.Populate)] 41 | [DefaultValue(null)] 42 | public double? RtEnd { get; set; } 43 | 44 | [JsonProperty("scan_filter", DefaultValueHandling = DefaultValueHandling.Populate)] 45 | [DefaultValue(null)] 46 | public string Filter { get; set; } 47 | 48 | [JsonProperty("comment", DefaultValueHandling = DefaultValueHandling.Populate)] 49 | [DefaultValue(null)] 50 | public string Comment { get; set; } 51 | 52 | public bool HasMzRange() 53 | { 54 | return MzStart != null && MzEnd != null; 55 | } 56 | 57 | public bool HasMz() 58 | { 59 | return Mz != null; 60 | } 61 | 62 | public bool HasSequence() 63 | { 64 | return Sequence != ""; 65 | } 66 | } 67 | } -------------------------------------------------------------------------------- /Util/NativeMethods.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.ComponentModel; 3 | using System.IO; 4 | using System.Runtime.InteropServices; 5 | using System.Text; 6 | 7 | namespace ThermoRawFileParser.Util 8 | { 9 | /// 10 | /// Uses Windows API to resolve reparse point target 11 | /// 12 | /// Kudos to user Knagis from StackOverflow 13 | /// https://stackoverflow.com/questions/2302416/how-to-obtain-the-target-of-a-symbolic-link-or-reparse-point-using-net 14 | /// 15 | public static class NativeMethods 16 | { 17 | private static readonly IntPtr INVALID_HANDLE_VALUE = new IntPtr(-1); 18 | 19 | private const uint FILE_READ_EA = 0x0008; 20 | private const uint FILE_FLAG_BACKUP_SEMANTICS = 0x2000000; 21 | 22 | [DllImport("Kernel32.dll", SetLastError = true, CharSet = CharSet.Auto)] 23 | static extern uint GetFinalPathNameByHandle(IntPtr hFile, [MarshalAs(UnmanagedType.LPTStr)] StringBuilder lpszFilePath, uint cchFilePath, uint dwFlags); 24 | 25 | [DllImport("kernel32.dll", SetLastError = true)] 26 | [return: MarshalAs(UnmanagedType.Bool)] 27 | static extern bool CloseHandle(IntPtr hObject); 28 | 29 | [DllImport("kernel32.dll", CharSet = CharSet.Auto, SetLastError = true)] 30 | public static extern IntPtr CreateFile( 31 | [MarshalAs(UnmanagedType.LPTStr)] string filename, 32 | [MarshalAs(UnmanagedType.U4)] uint access, 33 | [MarshalAs(UnmanagedType.U4)] FileShare share, 34 | IntPtr securityAttributes, // optional SECURITY_ATTRIBUTES struct or IntPtr.Zero 35 | [MarshalAs(UnmanagedType.U4)] FileMode creationDisposition, 36 | [MarshalAs(UnmanagedType.U4)] uint flagsAndAttributes, 37 | IntPtr templateFile); 38 | 39 | public static string GetFinalPathName(string path) 40 | { 41 | var h = CreateFile(path, 42 | FILE_READ_EA, 43 | FileShare.ReadWrite | FileShare.Delete, 44 | IntPtr.Zero, 45 | FileMode.Open, 46 | FILE_FLAG_BACKUP_SEMANTICS, 47 | IntPtr.Zero); 48 | if (h == INVALID_HANDLE_VALUE) 49 | throw new Win32Exception(); 50 | 51 | try 52 | { 53 | var sb = new StringBuilder(1024); 54 | var res = GetFinalPathNameByHandle(h, sb, 1024, 0); 55 | if (res == 0) 56 | throw new Win32Exception(); 57 | 58 | return sb.ToString(); 59 | } 60 | finally 61 | { 62 | CloseHandle(h); 63 | } 64 | } 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /ThermoRawFileParser.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net8.0 6 | disable 7 | annotations 8 | ThermoRawFileParser.MainClass 9 | x64 10 | x64 11 | 2.0.0-dev 12 | $(AssemblyName) 13 | Compomics 14 | 2017 - 2025 15 | https://compomics.github.io/projects/ThermoRawFileParser 16 | Parser allowing reading Thermo RAW files and converting to common open formats on all platforms supporting .NET Core 17 | README.md 18 | https://github.com/compomics/ThermoRawFileParser 19 | Compomics 20 | LICENSE 21 | THERMO_LICENSE 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | True 52 | \ 53 | PreserveNewest 54 | 55 | 56 | True 57 | \ 58 | 59 | 60 | PreserveNewest 61 | 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /ThermoRawFileParserTest/QueryTests.cs: -------------------------------------------------------------------------------- 1 | using NUnit.Framework; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.IO; 5 | using System.Linq; 6 | using ThermoRawFileParser.Query; 7 | 8 | namespace ThermoRawFileParserTest 9 | { 10 | [TestFixture] 11 | public class QueryTests 12 | { 13 | [Test] 14 | public void TestProxiReaderScans() 15 | { 16 | var testRawFile = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, @"Data/small.RAW"); 17 | 18 | var parameters = new QueryParameters 19 | { 20 | rawFilePath = testRawFile, 21 | }; 22 | 23 | //Interval of scans to retrieve 24 | parameters.scans = "1-10"; 25 | ProxiSpectrumReader reader = new ProxiSpectrumReader(parameters); 26 | var results = reader.Retrieve(); 27 | Assert.That(GetScanNumbers(results), Is.EqualTo(new List { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 })); 28 | 29 | //Open-ended interval 30 | parameters.scans = "-5"; 31 | reader = new ProxiSpectrumReader(parameters); 32 | results = reader.Retrieve(); 33 | Assert.That(GetScanNumbers(results), Is.EqualTo(new List { 1, 2, 3, 4, 5 })); 34 | 35 | //Open-ended interval 36 | parameters.scans = "41-"; 37 | reader = new ProxiSpectrumReader(parameters); 38 | results = reader.Retrieve(); 39 | Assert.That(GetScanNumbers(results), Is.EqualTo(new List { 41, 42, 43, 44, 45, 46, 47, 48})); 40 | 41 | //Interval larger than available scans 42 | parameters.scans = "45-50"; 43 | reader = new ProxiSpectrumReader(parameters); 44 | results = reader.Retrieve(); 45 | Assert.That(GetScanNumbers(results), Is.EqualTo(new List { 45, 46, 47, 48 })); 46 | 47 | //Sequence of scans to retrieve 48 | parameters.scans = "1,5,7"; 49 | reader = new ProxiSpectrumReader(parameters); 50 | results = reader.Retrieve(); 51 | Assert.That(GetScanNumbers(results), Is.EqualTo(new List { 1, 5, 7 })); 52 | 53 | //Combination of intervals and individual scans 54 | parameters.scans = "-2,5,7-10,15,46-"; 55 | reader = new ProxiSpectrumReader(parameters); 56 | results = reader.Retrieve(); 57 | Assert.That(GetScanNumbers(results), Is.EqualTo(new List { 1, 2, 5, 7, 8, 9, 10, 15, 46, 47, 48 })); 58 | } 59 | 60 | private List GetScanNumbers(List results) 61 | { 62 | List scanNumbers = new List(); 63 | 64 | foreach (var result in results) 65 | { 66 | result.attributes.Where(a => a.Name == "scan number") 67 | .ToList() 68 | .ForEach(a => scanNumbers.Add(int.Parse(a.Value))); 69 | } 70 | 71 | return scanNumbers; 72 | } 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /XIC/XicUnit.cs: -------------------------------------------------------------------------------- 1 | using System.Collections; 2 | 3 | namespace ThermoRawFileParser.XIC 4 | { 5 | public class XicUnit 6 | { 7 | public XicMeta Meta { get; set; } 8 | public object RetentionTimes { get; set; } 9 | public object Intensities { get; set; } 10 | 11 | 12 | public XicUnit() 13 | { 14 | Meta = new XicMeta(); 15 | RetentionTimes = null; 16 | Intensities = null; 17 | } 18 | 19 | public XicUnit(double mzStart, double mzEnd, double? rtStart, double? rtEnd, string filter=null, string comment=null) 20 | { 21 | Meta = new XicMeta(); 22 | Meta.MzStart = mzStart; 23 | Meta.MzEnd = mzEnd; 24 | Meta.RtStart = rtStart; 25 | Meta.RtEnd = rtEnd; 26 | Meta.Filter = filter; 27 | Meta.Comment = comment; 28 | } 29 | 30 | public bool HasValidRanges() 31 | { 32 | var valid = !(Meta.MzStart > Meta.MzEnd); 33 | 34 | if (Meta.MzStart != null && Meta.RtEnd != null) 35 | { 36 | if (Meta.RtStart > Meta.RtEnd) 37 | { 38 | valid = false; 39 | } 40 | } 41 | 42 | return valid; 43 | } 44 | 45 | public string GetMeta() 46 | { 47 | return string.Format("Filter: \"{0}\"; m/z: [{1} - {2}]; RT: [{3} - {4}]; Comment: {5}", Meta.Filter, Meta.MzStart, Meta.MzEnd, Meta.RtStart, Meta.RtEnd, Meta.Comment); 48 | } 49 | 50 | public XicUnit(XicUnit copy) 51 | { 52 | Meta = new XicMeta(copy.Meta); 53 | 54 | if (copy.RetentionTimes != null) 55 | { 56 | if (copy.RetentionTimes is string) 57 | { 58 | RetentionTimes = (string) copy.RetentionTimes; 59 | } 60 | else 61 | { 62 | ArrayList x = new ArrayList(); 63 | foreach (double d in (ArrayList) copy.RetentionTimes) 64 | { 65 | x.Add(d); 66 | } 67 | 68 | RetentionTimes = x; 69 | } 70 | } 71 | else 72 | { 73 | RetentionTimes = null; 74 | } 75 | 76 | if (copy.Intensities != null) 77 | { 78 | if (copy.Intensities is string) 79 | { 80 | Intensities = (string) copy.Intensities; 81 | } 82 | else 83 | { 84 | ArrayList y = new ArrayList(); 85 | foreach (double d in (ArrayList) copy.Intensities) 86 | { 87 | y.Add(d); 88 | } 89 | 90 | Intensities = y; 91 | } 92 | } 93 | else 94 | { 95 | Intensities = null; 96 | } 97 | } 98 | } 99 | } -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: publish 2 | 3 | on: 4 | release: 5 | types: 'published' 6 | 7 | jobs: 8 | publish: 9 | 10 | runs-on: 'ubuntu-latest' 11 | 12 | steps: 13 | - name: Checkout 14 | uses: actions/checkout@v4 15 | with: 16 | path: 'main' 17 | 18 | - name: Setup .NET 19 | uses: actions/setup-dotnet@v4 20 | with: 21 | dotnet-version: 8.0.x 22 | 23 | - name: Get Thermo packages 24 | uses: actions/checkout@v4 25 | with: 26 | repository: 'thermofisherlsms/RawFileReader' 27 | path: 'ThermoPKG' 28 | token: ${{ secrets.GITHUB_TOKEN }} 29 | sparse-checkout: 'Libs/NetCore/Net8/*.nupkg' 30 | sparse-checkout-cone-mode: 'false' 31 | 32 | - name: Add Thermo packages as NuGet source 33 | run: | 34 | dotnet nuget add source -n ThermoPKG `pwd`/ThermoPKG/Libs/NetCore/Net8 35 | 36 | - name: Publish all and zip artifacts 37 | run: | 38 | dotnet publish --configuration Release --runtime linux-x64 main/ThermoRawFileParser.csproj --sc -o publish/linux-x64 39 | dotnet publish --configuration Release --runtime win-x64 main/ThermoRawFileParser.csproj --sc -o publish/win-x64 40 | dotnet publish --configuration Release --runtime osx-x64 main/ThermoRawFileParser.csproj --sc -o publish/osx-x64 41 | dotnet publish --configuration Release --framework net8.0 main/ThermoRawFileParser.csproj -o publish/net8 42 | cd publish/linux-x64 43 | zip -r -q ThermoRawFileParser-${{ github.ref_name }}-linux.zip * 44 | cd ../win-x64 45 | zip -r -q ThermoRawFileParser-${{ github.ref_name }}-win.zip * 46 | cd ../osx-x64 47 | zip -r -q ThermoRawFileParser-${{ github.ref_name }}-osx.zip * 48 | cd ../net8 49 | zip -r -q ThermoRawFileParser-${{ github.ref_name }}-net8.zip * 50 | 51 | - name: Upload Linux to release 52 | uses: svenstaro/upload-release-action@v2 53 | with: 54 | repo_token: ${{ secrets.GITHUB_TOKEN }} 55 | file: publish/linux-x64/ThermoRawFileParser-${{ github.ref_name }}-linux.zip 56 | tag: ${{ github.ref }} 57 | 58 | - name: Upload OSX to release 59 | uses: svenstaro/upload-release-action@v2 60 | with: 61 | repo_token: ${{ secrets.GITHUB_TOKEN }} 62 | file: publish/osx-x64/ThermoRawFileParser-${{ github.ref_name }}-osx.zip 63 | tag: ${{ github.ref }} 64 | 65 | - name: Upload Windows to release 66 | uses: svenstaro/upload-release-action@v2 67 | with: 68 | repo_token: ${{ secrets.GITHUB_TOKEN }} 69 | file: publish/win-x64/ThermoRawFileParser-${{ github.ref_name }}-win.zip 70 | tag: ${{ github.ref }} 71 | 72 | - name: Upload framework-based to release 73 | uses: svenstaro/upload-release-action@v2 74 | with: 75 | repo_token: ${{ secrets.GITHUB_TOKEN }} 76 | file: publish/net8/ThermoRawFileParser-${{ github.ref_name }}-net8.zip 77 | tag: ${{ github.ref }} 78 | -------------------------------------------------------------------------------- /XIC/XicExecutor.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.IO; 3 | using System.Text; 4 | using System.Reflection; 5 | using Newtonsoft.Json; 6 | using ThermoFisher.CommonCore.Data; 7 | using log4net; 8 | 9 | namespace ThermoRawFileParser.XIC 10 | { 11 | public static class XicExecutor 12 | { 13 | private static readonly ILog Log = 14 | LogManager.GetLogger(MethodBase.GetCurrentMethod().DeclaringType); 15 | 16 | public static void Run(XicParameters parameters) 17 | { 18 | Log.InfoFormat("Reading and validating JSON input"); 19 | var jsonString = File.ReadAllText(parameters.jsonFilePath, Encoding.UTF8); 20 | var validationErrors = JSONParser.ValidateJson(jsonString); 21 | if (!validationErrors.IsNullOrEmpty()) 22 | { 23 | var validationMessage = new StringBuilder("JSON validation error(s):\n"); 24 | foreach (var validationError in validationErrors) 25 | { 26 | if (validationError.ToString().Contains("ExcludedSchemaValidates")) 27 | { 28 | validationMessage.Append( 29 | "Use M/Z and tolerance, M/Z start and M/Z end or sequence and tolerance, not a combination (with optional RT start and/or end).\n"); 30 | } 31 | 32 | validationMessage.Append( 33 | $"element start line number: {validationError.LineNumber}\n{validationError.ToString()}"); 34 | } 35 | 36 | throw new RawFileParserException(validationMessage.ToString()); 37 | } 38 | 39 | var xicData = JSONParser.ParseJSON(jsonString); 40 | 41 | Log.InfoFormat("Input contains {0} XICs", xicData.Content.Count); 42 | 43 | for (int index = 0; index < parameters.rawFileList.Count; index++) 44 | { 45 | string rawFile = (string) parameters.rawFileList[index]; 46 | 47 | var dataInstance = new XicData(xicData); 48 | XicReader.ReadXic(rawFile, parameters.base64, dataInstance, ref parameters); 49 | 50 | if (parameters.stdout) 51 | { 52 | StdOutputXicData(dataInstance); 53 | } 54 | else 55 | { 56 | var outputFileName = (string) parameters.outputFileList[index]; 57 | 58 | OutputXicData(dataInstance, outputFileName); 59 | } 60 | } 61 | } 62 | 63 | private static void StdOutputXicData(XicData outputData) 64 | { 65 | var outputString = JsonConvert.SerializeObject(outputData, Formatting.Indented); 66 | Console.WriteLine(outputString); 67 | } 68 | 69 | private static void OutputXicData(XicData outputData, string outputFileName) 70 | { 71 | var outputString = JsonConvert.SerializeObject(outputData, Formatting.Indented); 72 | File.WriteAllText(outputFileName, outputString); 73 | } 74 | } 75 | } -------------------------------------------------------------------------------- /Util/LimitedSizeDictionary.cs: -------------------------------------------------------------------------------- 1 | using System.Collections; 2 | using System.Collections.Generic; 3 | 4 | namespace ThermoRawFileParser.Util 5 | { 6 | //https://social.msdn.microsoft.com/Forums/vstudio/en-US/789c37ea-b9bf-4512-a418-f4f9532c59bf/dictionary-with-limited-size?forum=csharpgeneral 7 | public class LimitedSizeDictionary : IDictionary 8 | { 9 | private readonly Dictionary _dict; 10 | private Queue _queue; 11 | private readonly int _size; 12 | 13 | public LimitedSizeDictionary(int size) 14 | { 15 | _size = size; 16 | _dict = new Dictionary(size + 1); 17 | _queue = new Queue(size); 18 | } 19 | 20 | public void Add(TKey key, TValue value) 21 | { 22 | _dict.Add(key, value); 23 | if (_queue.Count == _size) 24 | _dict.Remove(_queue.Dequeue()); 25 | _queue.Enqueue(key); 26 | } 27 | 28 | public bool ContainsKey(TKey key) 29 | { 30 | return _dict.ContainsKey(key); 31 | } 32 | 33 | public bool Remove(TKey key) 34 | { 35 | if (_dict.Remove(key)) 36 | { 37 | var newQueue = new Queue(_size); 38 | foreach (var item in _queue) 39 | if (!_dict.Comparer.Equals(item, key)) 40 | newQueue.Enqueue(item); 41 | _queue = newQueue; 42 | return true; 43 | } 44 | else 45 | return false; 46 | } 47 | 48 | public bool TryGetValue(TKey key, out TValue value) 49 | { 50 | return _dict.TryGetValue(key, out value); 51 | } 52 | 53 | public TValue this[TKey key] 54 | { 55 | get => _dict[key]; 56 | set => _dict[key] = value; 57 | } 58 | 59 | public ICollection Keys => _dict.Keys; 60 | 61 | public ICollection Values => _dict.Values; 62 | 63 | public IEnumerator> GetEnumerator() 64 | { 65 | return _dict.GetEnumerator(); 66 | } 67 | 68 | IEnumerator IEnumerable.GetEnumerator() 69 | { 70 | return ((IEnumerable) _dict).GetEnumerator(); 71 | } 72 | 73 | public void Add(KeyValuePair item) 74 | { 75 | ((IDictionary) _dict).Add(item); 76 | } 77 | 78 | public void Clear() 79 | { 80 | _dict.Clear(); 81 | } 82 | 83 | public bool Contains(KeyValuePair item) 84 | { 85 | return ((IDictionary) _dict).Contains(item); 86 | } 87 | 88 | public void CopyTo(KeyValuePair[] array, int arrayIndex) 89 | { 90 | ((IDictionary) _dict).CopyTo(array, arrayIndex); 91 | } 92 | 93 | public bool Remove(KeyValuePair item) 94 | { 95 | return ((IDictionary) _dict).Remove(item); 96 | } 97 | 98 | public int Count => _dict.Count; 99 | 100 | public bool IsReadOnly => ((IDictionary) _dict).IsReadOnly; 101 | } 102 | } -------------------------------------------------------------------------------- /Writer/S3Loader.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using Amazon; 3 | using Amazon.Runtime; 4 | using Amazon.S3; 5 | using Amazon.S3.Model; 6 | 7 | namespace ThermoRawFileParser.Writer 8 | { 9 | public class S3Loader 10 | { 11 | private readonly string _bucketName; 12 | 13 | // Example creates two objects (for simplicity, we upload same file twice). 14 | // You specify key names for these objects. 15 | private static readonly RegionEndpoint bucketRegion = RegionEndpoint.EUWest1; 16 | private static IAmazonS3 _client; 17 | private readonly string _s3Url; 18 | private readonly string _s3AccessKeyId; 19 | private readonly string _s3SecretAccessKey; 20 | 21 | 22 | public S3Loader(string s3url, string s3AccessKeyId, string s3SecretAccessKey, string bucketName) 23 | { 24 | this._s3Url = s3url; 25 | this._s3AccessKeyId = s3AccessKeyId; 26 | this._s3SecretAccessKey = s3SecretAccessKey; 27 | this._bucketName = bucketName; 28 | AWSConfigsS3.UseSignatureVersion4 = false; 29 | 30 | var s3Config = new AmazonS3Config 31 | { 32 | RegionEndpoint = RegionEndpoint.EUWest2, 33 | ForcePathStyle = true, 34 | SignatureVersion = "2", 35 | ServiceURL = s3url, 36 | SignatureMethod = SigningAlgorithm.HmacSHA1 37 | }; 38 | 39 | _client = new AmazonS3Client(new BasicAWSCredentials(s3AccessKeyId, s3SecretAccessKey), s3Config); 40 | this._bucketName = bucketName; 41 | 42 | var buckets = _client.ListObjectsAsync(bucketName); 43 | 44 | if (buckets == null) 45 | throw new AmazonS3Exception("Connection to AWS url -- " + this._s3Url); 46 | } 47 | 48 | public bool loadObjectToS3(string filePath, string name, string contentType, string label) 49 | { 50 | try 51 | { 52 | PutObjectRequest putRequest = new PutObjectRequest 53 | { 54 | BucketName = _bucketName, 55 | Key = name, 56 | ContentType = contentType, 57 | FilePath = filePath 58 | }; 59 | // It is important to put the client creation to the request issue: 60 | // https://github.com/aws/aws-sdk-net/issues/856. In addition 61 | var s3Config = new AmazonS3Config 62 | { 63 | RegionEndpoint = RegionEndpoint.EUWest2, 64 | ForcePathStyle = true, 65 | SignatureVersion = "2", 66 | ServiceURL = _s3Url, 67 | SignatureMethod = SigningAlgorithm.HmacSHA1 68 | }; 69 | 70 | putRequest.Metadata.Add("x-amz-meta-title", label); 71 | putRequest.Metadata.Add("x-amz-meta-original-file-name", filePath); 72 | 73 | using (_client = new AmazonS3Client(_s3AccessKeyId, _s3SecretAccessKey, s3Config)) 74 | { 75 | var response = _client.PutObjectAsync(putRequest).Result; 76 | } 77 | } 78 | catch (AmazonS3Exception e) 79 | { 80 | Console.WriteLine( 81 | "Error encountered ***. Message:'{0}' when writing an object" 82 | , e.Message); 83 | } 84 | catch (Exception e) 85 | { 86 | Console.WriteLine( 87 | "Unknown encountered on server. Message:'{0}' when writing an object" 88 | , e.Message); 89 | } 90 | 91 | return true; 92 | } 93 | } 94 | } -------------------------------------------------------------------------------- /Writer/MzML/mzML1.1.1_idx.xsd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 9 | 10 | 11 | 12 | 13 | 14 | Index element containing one or more offsets for random data access for the entity described in the 'name' attribute. 15 | 16 | 17 | 18 | 19 | 20 | Number of indices in this list. 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | File pointer offset (in bytes) of the element identified by the 'id' attribute. 29 | 30 | 31 | 32 | 33 | 34 | The name of the entity the index entries are pointing to. 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | Reference to the 'id' attribute of the indexed element. 50 | 51 | 52 | 53 | 54 | The identifier for the spot from which this spectrum was derived, if a MALDI or similar run. 55 | 56 | 57 | 58 | 59 | In the case of a spectrum representing a single scan, this attribute may be used to reference it by the time at which the scan was acquired (a.k.a. scan time or retention time). 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | Container element for mzML which allows the addition of an index. 68 | 69 | 70 | 71 | 72 | 73 | 74 | List of indices. 75 | 76 | 77 | 78 | 79 | File pointer offset (in bytes) of the 'indexList' element. 80 | 81 | 82 | 83 | 84 | SHA-1 checksum from beginning of file to end of 'fileChecksum' open tag. 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | -------------------------------------------------------------------------------- /Util/GeneralHelpers.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Text.RegularExpressions; 4 | using ThermoFisher.CommonCore.Data; 5 | 6 | namespace ThermoRawFileParser.Util 7 | { 8 | struct MZArray 9 | { 10 | public double[] Masses { get; set; } 11 | public double[] Intensities { get; set; } 12 | } 13 | 14 | struct MZData 15 | { 16 | public double? basePeakMass; 17 | public double? basePeakIntensity; 18 | public double[] masses; 19 | public double[] intensities; 20 | public double[] charges; 21 | public double[] baselineData; 22 | public double[] noiseData; 23 | public double[] massData; 24 | public bool isCentroided; 25 | } 26 | 27 | /// 28 | /// Iterates over numbers based on a provided interval string. 29 | /// The interval string can specify individual scans or ranges (e.g., "1-5,8,10-12"). 30 | /// Open ended intervals are allowed (e.g., "5-" or "-3"). Inttervals be in ascending order and cannot overlap. 31 | /// If the interval string is empty, the iterator covers the full range from min to max. 32 | /// Throws exceptions for invalid formats or edge cases. 33 | /// 34 | public class NumberIterator 35 | { 36 | private List edges; 37 | private readonly Regex valid = new Regex(@"^[\d,\-\s]+$"); 38 | private readonly Regex interval = new Regex(@"^\s*(\d+)?\s*(-)?\s*(\d+)?\s*$"); 39 | 40 | public NumberIterator() //empty constructor initializes with no edges 41 | { 42 | edges = new List(); 43 | } 44 | 45 | public NumberIterator(string intervalString, int min, int max) 46 | { 47 | if (intervalString.IsNullOrEmpty()) 48 | { 49 | edges = new List { min, max }; 50 | } 51 | else 52 | { 53 | if (!valid.IsMatch(intervalString)) 54 | { 55 | throw new Exception("Invalid iterval format."); 56 | } 57 | 58 | edges = new List(); 59 | var intervals = intervalString.Split(',', StringSplitOptions.TrimEntries); 60 | 61 | foreach (var piece in intervals) 62 | { 63 | try 64 | { 65 | int start; 66 | int end; 67 | 68 | var intervalMatch = interval.Match(piece); 69 | 70 | if (!intervalMatch.Success) 71 | throw new Exception(); 72 | 73 | if (intervalMatch.Groups[2].Success) //it is interval 74 | { 75 | if (intervalMatch.Groups[1].Success) 76 | start = Math.Max(min, int.Parse(intervalMatch.Groups[1].Value)); 77 | else 78 | start = min; // if no start is specified, use min 79 | 80 | if (intervalMatch.Groups[3].Success) 81 | end = Math.Min(max, int.Parse(intervalMatch.Groups[3].Value)); 82 | else 83 | end = max; // if no end is specified, use max 84 | } 85 | else 86 | { 87 | if (intervalMatch.Groups[1].Success) 88 | end = start = int.Parse(intervalMatch.Groups[1].Value); 89 | else 90 | throw new Exception(); 91 | 92 | if (intervalMatch.Groups[3].Success) 93 | throw new Exception(); 94 | } 95 | 96 | if (edges.Count > 0 && start <= edges[edges.Count - 1]) 97 | { 98 | throw new Exception("Interval edges should be consequtive"); 99 | } 100 | 101 | edges.Add(start); 102 | edges.Add(end); 103 | } 104 | catch (Exception ex) 105 | { 106 | throw new Exception($"Cannot parse '{piece}' in {intervalString} - {ex.Message}"); 107 | } 108 | } 109 | } 110 | 111 | if (edges.Count == 0) 112 | { 113 | throw new Exception("No valid scan numbers provided in the interval string."); 114 | } 115 | else if (edges.Count % 2 != 0) 116 | { 117 | throw new Exception("Odd number of edges"); 118 | } 119 | } 120 | public IEnumerable IterateScans() 121 | { 122 | for (int i = 0; i < edges.Count; i += 2) 123 | { 124 | for (int scan = edges[i]; scan <= edges[i + 1]; scan++) 125 | { 126 | yield return scan; 127 | } 128 | } 129 | } 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /ParseInput.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using System.IO; 3 | using ThermoRawFileParser.Writer; 4 | 5 | namespace ThermoRawFileParser 6 | { 7 | public class ParseInput 8 | { 9 | // All MS levels 10 | public static HashSet AllLevels { get => new HashSet { -3, -2, -1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; } 11 | 12 | /// 13 | /// The RAW file path. 14 | /// 15 | private string _rawFilePath; 16 | 17 | private string _userProvidedFilePath; 18 | 19 | private int _errors; 20 | 21 | private int _warnings; 22 | 23 | /// 24 | /// The RAW folder path. 25 | /// 26 | public string RawDirectoryPath { get; set; } 27 | 28 | public string RawFilePath 29 | { 30 | get => _rawFilePath; 31 | set 32 | { 33 | _rawFilePath = value; 34 | _userProvidedFilePath = value; 35 | if (value != null) 36 | { 37 | RawFileNameWithoutExtension = Path.GetFileNameWithoutExtension(value); 38 | } 39 | } 40 | } 41 | public string UserProvidedPath 42 | { 43 | get => _userProvidedFilePath; 44 | } 45 | 46 | public int Errors 47 | { 48 | get => _errors; 49 | } 50 | public int Warnings 51 | { 52 | get => _warnings; 53 | } 54 | 55 | /// 56 | /// The output directory. 57 | /// 58 | public string OutputDirectory { get; set; } 59 | 60 | /// 61 | /// The output file. 62 | /// > 63 | public string OutputFile { get; set; } 64 | 65 | /// 66 | /// The output format. 67 | /// 68 | public OutputFormat OutputFormat { get; set; } 69 | 70 | /// 71 | /// The metadata output format. 72 | /// 73 | public MetadataFormat MetadataFormat { get; set; } 74 | 75 | /// 76 | /// The metadata output file. 77 | /// > 78 | public string MetadataOutputFile { get; set; } 79 | 80 | /// 81 | /// Gzip the output file. 82 | /// 83 | public bool Gzip { get; set; } 84 | 85 | public HashSet NoPeakPicking { get; set; } 86 | 87 | public bool NoZlibCompression { get; set; } 88 | 89 | public bool AllDetectors { get; set; } 90 | 91 | public LogFormat LogFormat { get; set; } 92 | 93 | public bool IgnoreInstrumentErrors { get; set; } 94 | 95 | public bool ExData { get; set; } 96 | 97 | public HashSet MsLevel { get; set; } 98 | 99 | public int MaxLevel { get; set; } 100 | 101 | public bool MgfPrecursor { get; set; } 102 | 103 | public bool NoiseData { get; set; } 104 | 105 | public bool ChargeData { get; set; } 106 | 107 | public bool StdOut { get; set; } 108 | 109 | public bool Vigilant { get; set; } 110 | 111 | private S3Loader S3Loader { get; set; } 112 | 113 | public string S3AccessKeyId { get; set; } 114 | 115 | public string S3SecretAccessKey { get; set; } 116 | 117 | public string S3Url { get; set; } 118 | 119 | public string BucketName { get; set; } 120 | 121 | /// 122 | /// The RAW file name without extension. 123 | /// 124 | public string RawFileNameWithoutExtension { get; private set; } 125 | 126 | public ParseInput() 127 | { 128 | MetadataFormat = MetadataFormat.None; 129 | OutputFormat = OutputFormat.None; 130 | Gzip = false; 131 | NoPeakPicking = new HashSet(); 132 | NoZlibCompression = false; 133 | LogFormat = LogFormat.DEFAULT; 134 | IgnoreInstrumentErrors = false; 135 | AllDetectors = false; 136 | MsLevel = AllLevels; 137 | MgfPrecursor = false; 138 | StdOut = false; 139 | NoiseData = false; 140 | ChargeData = false; 141 | Vigilant = false; 142 | _errors = 0; 143 | _warnings = 0; 144 | MaxLevel = 10; 145 | } 146 | 147 | public ParseInput(string rawFilePath, string rawDirectoryPath, string outputDirectory, OutputFormat outputFormat 148 | ) : this() 149 | { 150 | RawFilePath = rawFilePath; 151 | RawDirectoryPath = rawDirectoryPath; 152 | OutputDirectory = outputDirectory; 153 | OutputFormat = outputFormat; 154 | } 155 | 156 | public void InitializeS3Bucket() 157 | { 158 | S3Loader = new S3Loader(S3Url, S3AccessKeyId, S3SecretAccessKey, BucketName); 159 | } 160 | 161 | public void NewError() 162 | { 163 | _errors++; 164 | } 165 | 166 | public void NewWarn() 167 | { 168 | _warnings++; 169 | } 170 | 171 | public void UpdateRealPath(string path) 172 | { 173 | if (path != null) 174 | { 175 | _userProvidedFilePath = _rawFilePath; 176 | _rawFilePath = path; 177 | 178 | } 179 | } 180 | } 181 | } -------------------------------------------------------------------------------- /Writer/ScanTrailer.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using System.Globalization; 3 | using System.Linq; 4 | using System.Text.RegularExpressions; 5 | using ThermoFisher.CommonCore.Data.Interfaces; 6 | 7 | namespace ThermoRawFileParser.Writer 8 | { 9 | public class ScanTrailer 10 | { 11 | public int Length 12 | { 13 | get => data.Count; 14 | } 15 | 16 | public string[] Labels 17 | { 18 | get => data.Keys.ToArray(); 19 | } 20 | 21 | public string[] Values 22 | { 23 | get => data.Values.ToArray(); 24 | } 25 | 26 | private readonly Dictionary data; 27 | 28 | public ScanTrailer(ILogEntryAccess trailerData) 29 | { 30 | data = new Dictionary(); 31 | 32 | for (int i = 0; i < trailerData.Length; i++) 33 | { 34 | data[trailerData.Labels[i]] = trailerData.Values[i].Trim(); 35 | } 36 | } 37 | 38 | public ScanTrailer() 39 | { 40 | data = new Dictionary(); 41 | } 42 | 43 | /// 44 | /// Try returning selected trailer element as boolean value, 45 | /// if the element does not exist or cannot be converted to boolean return null 46 | /// 47 | /// name of the element 48 | public bool? AsBool(string key) 49 | { 50 | if (data.ContainsKey(key)) 51 | { 52 | var stringValue = data[key].ToLower(); 53 | 54 | switch (stringValue) 55 | { 56 | case "on": 57 | case "true": 58 | case "yes": 59 | return true; 60 | default: 61 | return false; 62 | } 63 | } 64 | 65 | return null; 66 | } 67 | 68 | /// 69 | /// Try returning selected trailer element as double value, 70 | /// if the element does not exist or cannot be converted to double return null 71 | /// 72 | /// name of the element 73 | public double? AsDouble(string key) 74 | { 75 | if (data.ContainsKey(key)) 76 | { 77 | if (double.TryParse(data[key], NumberStyles.Any, 78 | CultureInfo.CurrentCulture, out var result)) return result; 79 | } 80 | 81 | return null; 82 | } 83 | 84 | /// 85 | /// Try returning selected trailer element as integer value, 86 | /// if the element does not exist or cannot be converted to integer return null 87 | /// 88 | /// name of the element 89 | public int? AsInt(string key) 90 | { 91 | if (data.ContainsKey(key)) 92 | { 93 | if (int.TryParse(data[key], out var result)) return result; 94 | } 95 | 96 | return null; 97 | } 98 | 99 | /// 100 | /// Try returning selected trailer element as strictly positive (non zero) integer value, 101 | /// if the element does not exist or cannot be converted to strictly positive integer return null 102 | /// 103 | /// name of the element 104 | public int? AsPositiveInt(string key) 105 | { 106 | int? value = AsInt(key); 107 | 108 | if (value != null && value > 0) return value; 109 | else return null; 110 | } 111 | 112 | /// 113 | /// Try returning selected trailer element as string, 114 | /// alias to `Get` 115 | /// 116 | /// name of the element 117 | public string AsString(string key) 118 | { 119 | return Get(key); 120 | } 121 | 122 | /// 123 | /// Try getting selected trailer element by name, 124 | /// if the element does not exist return null 125 | /// 126 | /// name of the element 127 | public string Get(string key) 128 | { 129 | if (data.ContainsKey(key)) 130 | { 131 | return data[key]; 132 | } 133 | 134 | return null; 135 | } 136 | 137 | /// 138 | /// Check if selected trailer element exists 139 | /// 140 | /// name of the element 141 | public bool Has(string key) 142 | { 143 | return data.ContainsKey(key); 144 | } 145 | 146 | /// 147 | /// Return iterator over trailer element names matching regex 148 | /// 149 | /// compiled regex object 150 | public IEnumerable MatchKeys(Regex regex) 151 | { 152 | return data.Keys.Where(k => regex.IsMatch(k)); 153 | } 154 | 155 | /// 156 | /// Return iterator over trailer element values which names are matching regex 157 | /// 158 | /// compiled regex object 159 | public IEnumerable MatchValues(Regex regex) 160 | { 161 | return data.Where(item => regex.IsMatch(item.Key)).Select(item => item.Value); 162 | } 163 | } 164 | } -------------------------------------------------------------------------------- /Writer/Metadata.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using Newtonsoft.Json; 3 | 4 | namespace ThermoRawFileParser.Writer 5 | { 6 | public class Metadata 7 | { 8 | /** The general Path properties contains: RAW path , RAW file version **/ 9 | /** The Instruments properties contains the information of the instrument **/ 10 | /** Scan Settings **/ 11 | /** MS and MS data including number of MS and MS/MS **/ 12 | /** 13 | * Default constructor 14 | */ 15 | public Metadata() 16 | { 17 | } 18 | 19 | public Metadata(List fileProperties, 20 | List instrumentProperties, 21 | List msData) 22 | { 23 | FileProperties = fileProperties; 24 | InstrumentProperties = instrumentProperties; 25 | MsData = msData; 26 | } 27 | 28 | public List FileProperties { get; } = new List(); 29 | 30 | public List InstrumentProperties { get; } = new List(); 31 | 32 | public List MsData { get; } = new List(); 33 | 34 | public List ScanSettings { get; } = new List(); 35 | 36 | public List SampleData { get; } = new List(); 37 | 38 | /** 39 | * Add a File property to the fileProperties 40 | */ 41 | public void addFileProperty(CVTerm value) 42 | { 43 | FileProperties.Add(value); 44 | } 45 | 46 | public void addInstrumentProperty(CVTerm value) 47 | { 48 | InstrumentProperties.Add(value); 49 | } 50 | 51 | public void addScanSetting(CVTerm value) 52 | { 53 | ScanSettings.Add(value); 54 | } 55 | 56 | public void addScanSetting(ICollection value) 57 | { 58 | ScanSettings.AddRange(value); 59 | } 60 | 61 | 62 | public void addMSData(CVTerm value) 63 | { 64 | MsData.Add(value); 65 | } 66 | 67 | public void addMSData(HashSet value) 68 | { 69 | MsData.AddRange(value); 70 | } 71 | 72 | public void addSampleProperty(CVTerm value) 73 | { 74 | SampleData.Add(value); 75 | } 76 | } 77 | 78 | public class ProxiCvTerm 79 | { 80 | [JsonProperty("accession", NullValueHandling = NullValueHandling.Ignore)] 81 | public string Accession { get; set; } 82 | 83 | [JsonProperty("cv_param_group", NullValueHandling = NullValueHandling.Ignore)] 84 | public string CvGroup { get; set; } 85 | 86 | [JsonProperty("name", NullValueHandling = NullValueHandling.Ignore)] 87 | public string Name { get; set; } 88 | 89 | [JsonProperty("value", NullValueHandling = NullValueHandling.Ignore)] 90 | public string Value { get; set; } 91 | 92 | [JsonProperty("value_accession", NullValueHandling = NullValueHandling.Ignore)] 93 | public string ValueAccession { get; set; } 94 | 95 | public ProxiCvTerm(string accession, string cvGroup, string name, string value, string valueAccession) 96 | { 97 | Accession = accession; 98 | CvGroup = cvGroup; 99 | Name = name; 100 | Value = value; 101 | ValueAccession = valueAccession; 102 | } 103 | } 104 | 105 | public class CVTerm 106 | { 107 | private readonly string acc = ""; 108 | private readonly string cvLabelID = ""; 109 | private readonly string cvName = ""; 110 | private readonly string cvValue = ""; 111 | 112 | public CVTerm() 113 | { 114 | } 115 | 116 | public CVTerm(string accession, string cvLabel, string name, string value) 117 | { 118 | acc = accession; 119 | cvLabelID = cvLabel; 120 | cvName = name; 121 | cvValue = value; 122 | } 123 | 124 | public string accession => acc; 125 | public string cvLabel => cvLabelID; 126 | public string name => cvName; 127 | public string value => cvValue; 128 | 129 | 130 | public override int GetHashCode() 131 | { 132 | return CvTermComparer.GetHashCode(this); 133 | } 134 | 135 | private sealed class CvTermEqualityComparer : IEqualityComparer 136 | { 137 | public bool Equals(CVTerm x, CVTerm y) 138 | { 139 | if (ReferenceEquals(x, y)) return true; 140 | if (ReferenceEquals(x, null)) return false; 141 | if (ReferenceEquals(y, null)) return false; 142 | if (x.GetType() != y.GetType()) return false; 143 | return string.Equals(x.acc, y.acc) && string.Equals(x.cvLabelID, y.cvLabelID) && 144 | string.Equals(x.cvName, y.cvName) && string.Equals(x.cvValue, y.cvValue); 145 | } 146 | 147 | public int GetHashCode(CVTerm obj) 148 | { 149 | unchecked 150 | { 151 | var hashCode = (obj.acc != null ? obj.acc.GetHashCode() : 0); 152 | hashCode = (hashCode * 397) ^ (obj.cvLabelID != null ? obj.cvLabelID.GetHashCode() : 0); 153 | hashCode = (hashCode * 397) ^ (obj.cvName != null ? obj.cvName.GetHashCode() : 0); 154 | hashCode = (hashCode * 397) ^ (obj.cvValue != null ? obj.cvValue.GetHashCode() : 0); 155 | return hashCode; 156 | } 157 | } 158 | } 159 | 160 | public static IEqualityComparer CvTermComparer { get; } = new CvTermEqualityComparer(); 161 | 162 | public override bool Equals(object obj) 163 | { 164 | return CvTermComparer.Equals(obj); 165 | } 166 | } 167 | } -------------------------------------------------------------------------------- /XIC/JSONParser.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using System.Threading.Tasks; 3 | using Newtonsoft.Json; 4 | using NJsonSchema.Validation; 5 | using ThermoRawFileParser.Util; 6 | 7 | namespace ThermoRawFileParser.XIC 8 | { 9 | public static class JSONParser 10 | { 11 | private const string Schema = @"{ 12 | 'type': 'array', 13 | 'title': 'The XIC input Schema', 14 | 'items': { 15 | '$id': '#/items', 16 | 'type': 'object', 17 | 'title': 'The XIC items Schema', 18 | 'oneOf' : [ 19 | {'required' : ['mz', 'tolerance']}, 20 | {'required' : ['mz_start', 'mz_end']}, 21 | {'required' : ['sequence', 'tolerance']} 22 | ], 23 | 'not' : { 24 | 'anyOf' : [ 25 | {'required' : ['mz','mz_start']}, 26 | {'required' : ['mz','mz_end']}, 27 | {'required' : ['mz','sequence']}, 28 | {'required' : ['mz_start','sequence']}, 29 | {'required' : ['mz_end','sequence']}, 30 | {'required' : ['mz_start','tolerance']}, 31 | {'required' : ['mz_end','tolerance']}, 32 | ] 33 | }, 34 | 'additionalProperties': false, 35 | 'properties': { 36 | 'mz': { 37 | '$id': '#/items/properties/mz', 38 | 'type': 'number', 39 | 'minimum': 0, 40 | 'title': 'The Mz Schema', 41 | }, 42 | 'tolerance': { 43 | '$id': '#/items/properties/tolerance', 44 | 'type': 'number', 45 | 'minimum': 0, 46 | 'title': 'The Tolerance Schema', 47 | }, 48 | 'tolerance_unit': { 49 | '$id': '#/items/properties/tolerance_unit', 50 | 'type': 'string', 51 | 'title': 'The Tolerance_unit Schema', 52 | 'enum': ['ppm', 'amu', 'mmu', 'da'] 53 | }, 54 | 'mz_start': { 55 | '$id': '#/items/properties/mz_start', 56 | 'type': 'number', 57 | 'minimum': 0, 58 | 'title': 'The Mz_start Schema', 59 | }, 60 | 'mz_end': { 61 | '$id': '#/items/properties/mz_end', 62 | 'type': 'number', 63 | 'minimum': 0, 64 | 'title': 'The Mz_end Schema', 65 | }, 66 | 'rt_start': { 67 | '$id': '#/items/properties/rt_start', 68 | 'type': 'number', 69 | 'minimum': 0, 70 | 'title': 'The Rt_start Schema', 71 | }, 72 | 'rt_end': { 73 | '$id': '#/items/properties/rt_end', 74 | 'type': 'number', 75 | 'minimum': 0, 76 | 'title': 'The Rt_end Schema', 77 | }, 78 | 'sequence': { 79 | '$id': '#/items/properties/sequence', 80 | 'type': 'string', 81 | 'title': 'The Sequence Schema', 82 | }, 83 | 'scan_filter': { 84 | '$id': '#/items/properties/scan_filter', 85 | 'type': 'string', 86 | 'title': 'The Filter Schema', 87 | }, 88 | 'comment': { 89 | '$id': '#/items/properties/comment', 90 | 'type': 'string', 91 | 'title': 'The Comment Schema', 92 | } 93 | } 94 | } 95 | }"; 96 | 97 | public static ICollection ValidateJson(string jsonString) 98 | { 99 | Task schemaFromString = NJsonSchema.JsonSchema.FromJsonAsync(Schema); 100 | var jsonSchemaResult = schemaFromString.Result; 101 | return jsonSchemaResult.Validate(jsonString); 102 | } 103 | 104 | public static XicData ParseJSON(string jsonString) 105 | { 106 | List jsonIn; 107 | XicData data = new XicData(); 108 | jsonIn = JsonConvert.DeserializeObject>(jsonString); 109 | foreach (JSONInputUnit xic in jsonIn) 110 | { 111 | XicUnit xicUnit = null; 112 | if (xic.HasSequence()) 113 | { 114 | Peptide p = new Peptide(xic.Sequence); 115 | xic.Mz = p.GetMz(xic.Charge); 116 | } 117 | 118 | if (xic.HasMz()) 119 | { 120 | double delta; 121 | switch (xic.ToleranceUnit.ToLower()) 122 | { 123 | case "ppm": 124 | delta = xic.Mz.Value * xic.Tolerance.Value * 1e-6; 125 | break; 126 | case "amu": 127 | delta = xic.Tolerance.Value; 128 | break; 129 | case "mmu": 130 | delta = xic.Tolerance.Value * 1e-3; 131 | break; 132 | case "da": 133 | delta = xic.Tolerance.Value; 134 | break; 135 | case "": 136 | delta = xic.Mz.Value * xic.Tolerance.Value * 1e-6; 137 | break; 138 | default: 139 | delta = 10; 140 | break; 141 | } 142 | 143 | xicUnit = new XicUnit(xic.Mz.Value - delta, xic.Mz.Value + delta, xic.RtStart, 144 | xic.RtEnd, xic.Filter, xic.Comment); 145 | } 146 | else if (xic.HasMzRange()) 147 | { 148 | xicUnit = new XicUnit(xic.MzStart.Value, xic.MzEnd.Value, xic.RtStart, xic.RtEnd, xic.Filter, xic.Comment); 149 | } 150 | 151 | if (xicUnit == null || !xicUnit.HasValidRanges()) 152 | { 153 | throw new RawFileParserException( 154 | $"Invalid M/Z and/or retention time range:\n{JsonConvert.SerializeObject(xic, Formatting.Indented)}"); 155 | } 156 | 157 | data.Content.Add(xicUnit); 158 | } 159 | 160 | return data; 161 | } 162 | } 163 | } -------------------------------------------------------------------------------- /THERMO_LICENSE: -------------------------------------------------------------------------------- 1 | SOFTWARE LICENSE AGREEMENT (“License”) FOR RawFileReader 2 | 3 | These License terms are an agreement between you and Thermo Finnigan LLC ("Licensor"). They apply to Licensor’s MSFileReader software program (“Software”), which includes documentation and any media on which you received it. These terms also apply to any updates or supplements for this Software, unless other terms accompany those items, in which case those terms apply. If you use this Software, you accept this License. If you do not accept this License, you are prohibited from using this software. If you comply with these License terms, you have the rights set forth below. 4 | 5 | 1. Rights Granted: 6 | 7 | 1.1. You may install and use this Software on any of your computing devices. 8 | 9 | 1.2. You may distribute this Software to others, but only in combination with other software components and/or programs that you provide and subject to the distribution requirements and restrictions below. 10 | 11 | 2. Use Restrictions: 12 | 13 | 2.1. You may not decompile, disassemble, reverse engineer, use reflection or modify this Software. 14 | 15 | 3. Distribution Requirements: 16 | 17 | If you distribute this Software to others, you agree to: 18 | 19 | 3.1. Indemnify, defend and hold harmless the Licensor from any claims, including attorneys’ fees, related to the distribution or use of this Software; 20 | 21 | 3.2. Display the following text in your software’s “About” box: “RawFileReader reading tool. Copyright © 2016 by Thermo Fisher Scientific, Inc. All rights reserved.”; 22 | 23 | 3.3. Require your end users to agree to a license agreement that prohibits them from redistributing this Software to others. 24 | 25 | 4. Distribution Restrictions: 26 | 27 | 4.1. You may not use the Licensor’s trademarks in a way that suggests your software components and/or programs are provided by or are endorsed by the Licensor; and 28 | 29 | 4.2. You may not commercially exploit this Software or products that incorporate this Software without the prior written consent of Licensor. Commercial exploitation includes, but is not limited to, charging a purchase price, license fee, maintenance fee, or subscription fee; or licensing, transferring or redistributing the Software in exchange for consideration of any kind. 30 | 31 | 4.3. Your rights to this Software do not include any license, right, power or authority to subject this Software in whole or in part to any of the terms of an Excluded License. "Excluded License" means any license that requires as a condition of use, modification and/or distribution of software subject to the Excluded License, that such software or other software combined and/or distributed with such software be (a) disclosed or distributed in source code form; or (b) licensed for the purpose of making derivative works. Without limiting the foregoing obligation, you are specifically prohibited from distributing this Software with any software that is subject to the General Public License (GPL) or similar license in a manner that would create a combined work. 32 | 33 | 5. Additional Terms Applicable to Software: 34 | 35 | 5.1. This Software is licensed, not sold. This License only gives you some rights to use this Software; the Licensor reserves all other rights. Unless applicable law gives you more rights despite this limitation, you may use this Software only as expressly permitted in this License. 36 | 37 | 5.2. Licensor has no obligation to fix, update, supplement or support this Software. 38 | 39 | 5.3. This Software is not designed, manufactured or intended for any use requiring fail-safe performance in which the failure of this Software could lead to death, serious personal injury or severe physical and environmental damage (“High Risk Activities”), such as the operation of aircraft, medical or nuclear facilities. You agree not to use, or license the use of, this Software in connection with any High Risk Activities. 40 | 41 | 5.4. Your rights under this License terminate automatically if you breach this License in any way. Termination of this License will not affect any of your obligations or liabilities arising prior to termination. The following sections of this License shall survive termination: 2.1, 3.1, 3.2, 3.3, 4.1, 4.2, 4.3, 5.1, 5.2, 5.3, 5.5, 5.6, 5.7, 5.8, and 5.9. 42 | 43 | 5.5. This Software is subject to United States export laws and regulations. You agree to comply with all domestic and international export laws and regulations that apply to this Software. These laws include restrictions on destinations, end users and end use. 44 | 45 | 5.6. This License shall be construed and controlled by the laws of the State of California, U.S.A., without regard to conflicts of law. You consent to the jurisdiction of the state and federal courts situated in the State of California in any action arising under this License. The application of the U.N. Convention on Contracts for the International Sale of Goods to this License is hereby expressly excluded. If any provision of this License shall be deemed unenforceable or contrary to law, the rest of this License shall remain in full effect and interpreted in an enforceable manner that most nearly captures the intent of the original language. 46 | 47 | 5.7. THIS SOFTWARE IS LICENSED "AS IS". YOU BEAR ALL RISKS OF USING IT. LICENSOR GIVES NO AND DISCLAIMS ALL EXPRESS AND IMPLIED WARRANTIES, REPRESENTATIONS OR GUARANTEES. YOU MAY HAVE ADDITIONAL CONSUMER RIGHTS UNDER YOUR LOCAL LAWS WHICH THIS LICENSE CANNOT CHANGE. TO THE EXTENT PERMITTED UNDER YOUR LOCAL LAWS, LICENSOR EXCLUDES THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 48 | 49 | 5.8. LICENSOR’S TOTAL LIABILITY TO YOU FOR DIRECT DAMAGES ARISING UNDER THIS LICENSE IS LIMITED TO U.S. $1.00. YOU CANNOT RECOVER ANY OTHER DAMAGES, INCLUDING CONSEQUENTIAL, LOST PROFITS, SPECIAL, INDIRECT OR INCIDENTAL DAMAGES, EVEN IF LICENSOR IS EXPRESSLY MADE AWARE OF THE POSSIBILITY THEREOF OR IS NEGLIGENT. THIS LIMITATION APPLIES TO ANYTHING RELATED TO THIS SOFTWARE, SERVICES, CONTENT (INCLUDING CODE) ON THIRD PARTY INTERNET SITES, OR THIRD PARTY PROGRAMS, AND CLAIMS FOR BREACH OF CONTRACT, BREACH OF WARRANTY, GUARANTEE OR CONDITION, STRICT LIABILITY, NEGLIGENCE, OR OTHER TORT TO THE EXTENT PERMITTED BY APPLICABLE LAW. 50 | 51 | 5.9. Use, duplication or disclosure of this Software by the U.S. Government is subject to the restricted rights applicable to commercial computer software (under FAR 52.227019 and DFARS 252.227-7013 or parallel regulations). The manufacturer for this purpose is Thermo Finnigan LLC, 355 River Oaks Parkway, San Jose, California 95134, U.S.A. 52 | -------------------------------------------------------------------------------- /RawFileParser.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.IO; 3 | using System.Linq; 4 | using System.Reflection; 5 | using System.Runtime.InteropServices; 6 | using log4net; 7 | using ThermoFisher.CommonCore.Data; 8 | using ThermoFisher.CommonCore.Data.Business; 9 | using ThermoFisher.CommonCore.Data.Interfaces; 10 | using ThermoRawFileParser.Writer; 11 | using ThermoRawFileParser.Util; 12 | 13 | namespace ThermoRawFileParser 14 | { 15 | public static class RawFileParser 16 | { 17 | private static readonly ILog Log = 18 | LogManager.GetLogger(MethodBase.GetCurrentMethod().DeclaringType); 19 | 20 | /// 21 | /// Process and extract the RAW file(s). 22 | /// 23 | /// the parse input object 24 | public static void Parse(ParseInput parseInput) 25 | { 26 | // Input raw folder mode 27 | if (parseInput.RawDirectoryPath != null) 28 | { 29 | Log.Info("Started analyzing folder " + parseInput.RawDirectoryPath); 30 | 31 | var rawFilesPath = Directory 32 | .EnumerateFiles(parseInput.RawDirectoryPath, "*", SearchOption.TopDirectoryOnly) 33 | .Where(s => s.ToLower().EndsWith("raw")).ToArray(); 34 | Log.Info(String.Format("The folder contains {0} RAW files", rawFilesPath.Length)); 35 | 36 | if (rawFilesPath.Length == 0) 37 | { 38 | Log.Debug("No raw files found in folder"); 39 | throw new RawFileParserException("No raw files found in folder!"); 40 | } 41 | 42 | foreach (var filePath in rawFilesPath) 43 | { 44 | parseInput.RawFilePath = filePath; 45 | Log.Info("Started parsing " + parseInput.RawFilePath); 46 | TryProcessFile(parseInput); 47 | } 48 | } 49 | // Input raw file mode 50 | else 51 | { 52 | Log.Info("Started parsing " + parseInput.RawFilePath); 53 | 54 | TryProcessFile(parseInput); 55 | } 56 | } 57 | 58 | /// 59 | /// Process and extract the given RAW file and catch IO exceptions. 60 | /// 61 | /// the parse input object 62 | private static void TryProcessFile(ParseInput parseInput) 63 | { 64 | try 65 | { 66 | ProcessFile(parseInput); 67 | } 68 | 69 | catch (Exception ex) 70 | { 71 | if (ex is UnauthorizedAccessException) 72 | { 73 | Log.Error(!ex.Message.IsNullOrEmpty() 74 | ? ex.Message 75 | : "Attempting to write to an unauthorized location."); 76 | parseInput.NewError(); 77 | } 78 | else if (ex is RawFileParserException) 79 | { 80 | Log.Error(ex.Message); 81 | parseInput.NewError(); 82 | } 83 | else 84 | { 85 | Log.Error("An unexpected error occured (see below)"); 86 | Log.Error(ex.ToString()); 87 | parseInput.NewError(); 88 | } 89 | } 90 | } 91 | 92 | /// 93 | /// Process and extract the given RAW file. 94 | /// 95 | /// the parse input object 96 | private static void ProcessFile(ParseInput parseInput) 97 | { 98 | // Create the IRawDataPlus object for accessing the RAW file 99 | IRawDataPlus rawFile; 100 | 101 | //checking for symlinks 102 | var fileInfo = new FileInfo(parseInput.RawFilePath); 103 | 104 | if (fileInfo.Attributes.HasFlag(FileAttributes.ReparsePoint)) //detected path is a symlink 105 | { 106 | if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) 107 | { 108 | var realPath = NativeMethods.GetFinalPathName(parseInput.RawFilePath); 109 | Log.DebugFormat("Detected reparse point, real path: {0}", realPath); 110 | parseInput.UpdateRealPath(realPath); 111 | } 112 | else //Mono should handle all non-windows platforms 113 | { 114 | var realPath = Path.Combine(Path.GetDirectoryName(parseInput.RawFilePath), Mono.Unix.UnixPath.ReadLink(parseInput.RawFilePath)); 115 | Log.DebugFormat("Detected reparse point, real path: {0}", realPath); 116 | parseInput.UpdateRealPath(realPath); 117 | } 118 | } 119 | 120 | using (rawFile = RawFileReaderFactory.ReadFile(parseInput.RawFilePath)) 121 | { 122 | if (!rawFile.IsOpen) 123 | { 124 | throw new RawFileParserException("Unable to access the RAW file using the native Thermo API"); 125 | } 126 | 127 | // Check for any errors in the RAW file 128 | if (rawFile.IsError) 129 | { 130 | throw new RawFileParserException($"Native Thermo API reported the following error - RAW file is likely corrupted\n{rawFile.FileError.ErrorMessage}"); 131 | } 132 | 133 | // Check if the RAW file is being acquired 134 | if (rawFile.InAcquisition) 135 | { 136 | throw new RawFileParserException("RAW file cannot be processed since it is still being acquired"); 137 | } 138 | 139 | // Get the number of instruments (controllers) present in the RAW file and set the 140 | // selected instrument to the MS instrument, first instance of it 141 | var firstScanNumber = -1; 142 | var lastScanNumber = -1; 143 | 144 | if (rawFile.GetInstrumentCountOfType(Device.MS) != 0) 145 | { 146 | rawFile.SelectInstrument(Device.MS, 1); 147 | rawFile.IncludeReferenceAndExceptionData = !parseInput.ExData; 148 | 149 | // Get the first and last scan from the RAW file 150 | firstScanNumber = rawFile.RunHeaderEx.FirstSpectrum; 151 | lastScanNumber = rawFile.RunHeaderEx.LastSpectrum; 152 | 153 | // Check for empty file 154 | if (lastScanNumber < 1) 155 | { 156 | throw new RawFileParserException("Empty RAW file, no output will be produced"); 157 | } 158 | } 159 | 160 | if (parseInput.MetadataFormat != MetadataFormat.None) 161 | { 162 | MetadataWriter metadataWriter = new MetadataWriter(parseInput); 163 | metadataWriter.WriteMetadata(rawFile, firstScanNumber, lastScanNumber); 164 | } 165 | 166 | if (parseInput.OutputFormat != OutputFormat.None) 167 | { 168 | SpectrumWriter spectrumWriter; 169 | switch (parseInput.OutputFormat) 170 | { 171 | case OutputFormat.MGF: 172 | spectrumWriter = new MgfSpectrumWriter(parseInput); 173 | spectrumWriter.Write(rawFile, firstScanNumber, lastScanNumber); 174 | break; 175 | case OutputFormat.MzML: 176 | case OutputFormat.IndexMzML: 177 | spectrumWriter = new MzMlSpectrumWriter(parseInput); 178 | spectrumWriter.Write(rawFile, firstScanNumber, lastScanNumber); 179 | break; 180 | case OutputFormat.Parquet: 181 | spectrumWriter = new ParquetSpectrumWriter(parseInput); 182 | spectrumWriter.Write(rawFile, firstScanNumber, lastScanNumber); 183 | break; 184 | } 185 | } 186 | 187 | Log.Info("Finished parsing " + parseInput.UserProvidedPath); 188 | } 189 | } 190 | } 191 | } -------------------------------------------------------------------------------- /ThermoRawFileParserTest/XicReaderTests.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.IO; 4 | using NUnit.Framework; 5 | using ThermoFisher.CommonCore.Data; 6 | using ThermoRawFileParser; 7 | using ThermoRawFileParser.XIC; 8 | 9 | namespace ThermoRawFileParserTest 10 | { 11 | [TestFixture] 12 | public class XicReaderTests 13 | { 14 | [Test] 15 | public void testXicReadFullRange() 16 | { 17 | var testRawFile = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, @"Data/small.RAW"); 18 | XicData xicData = new XicData 19 | { 20 | // test the full range 21 | Content = new List 22 | { 23 | new XicUnit() 24 | { 25 | Meta = new XicMeta() 26 | { 27 | MzStart = null, 28 | MzEnd = null, 29 | RtStart = null, 30 | RtEnd = null 31 | } 32 | } 33 | } 34 | }; 35 | 36 | XicParameters xicparams = new XicParameters(); 37 | 38 | XicReader.ReadXic(testRawFile, false, xicData, ref xicparams); 39 | XicUnit xicUnit = xicData.Content[0]; 40 | Assert.That(((Array)xicUnit.RetentionTimes).Length, Is.EqualTo(14)); 41 | Assert.That(((Array)xicUnit.Intensities).Length, Is.EqualTo(14)); 42 | Assert.That(Math.Abs(140 - xicUnit.Meta.MzStart.Value) < 0.01); 43 | Assert.That(Math.Abs(2000 - xicUnit.Meta.MzEnd.Value) < 0.01); 44 | Assert.That(Math.Abs(0.004935 - xicUnit.Meta.RtStart.Value) < 0.01); 45 | Assert.That(Math.Abs(0.4872366666 - xicUnit.Meta.RtEnd.Value) < 0.01); 46 | } 47 | 48 | [Test] 49 | public void testXicRead() 50 | { 51 | var testRawFile = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, @"Data/small2.RAW"); 52 | XicData xicData = new XicData 53 | { 54 | // test the full retention time range 55 | Content = new List 56 | { 57 | new XicUnit() 58 | { 59 | Meta = new XicMeta() 60 | { 61 | MzStart = 749.786, 62 | MzEnd = 749.8093, 63 | RtStart = null, 64 | RtEnd = null 65 | } 66 | } 67 | } 68 | }; 69 | 70 | XicParameters xicparams = new XicParameters(); 71 | 72 | XicReader.ReadXic(testRawFile, false, xicData, ref xicparams); 73 | XicUnit xicUnit = xicData.Content[0]; 74 | Assert.That(((Array)xicUnit.RetentionTimes).Length, Is.EqualTo(46)); 75 | Assert.That(((Array)xicUnit.Intensities).Length, Is.EqualTo(46)); 76 | Assert.That(Math.Abs(749.786 - xicUnit.Meta.MzStart.Value) < 0.01); 77 | Assert.That(Math.Abs(749.8093 - xicUnit.Meta.MzEnd.Value) < 0.01); 78 | Assert.That(Math.Abs(10 - xicUnit.Meta.RtStart.Value) < 0.01); 79 | Assert.That(Math.Abs(10.98 - xicUnit.Meta.RtEnd.Value) < 0.01); 80 | 81 | xicData = new XicData 82 | { 83 | // test the nonsensical retention time range 84 | Content = new List 85 | { 86 | new XicUnit() 87 | { 88 | Meta = new XicMeta() 89 | { 90 | MzStart = 749.786, 91 | MzEnd = 749.8093, 92 | RtStart = 300, 93 | RtEnd = 400 94 | } 95 | } 96 | } 97 | }; 98 | XicReader.ReadXic(testRawFile, false, xicData, ref xicparams); 99 | xicUnit = xicData.Content[0]; 100 | Assert.That(((Array)xicUnit.RetentionTimes).Length, Is.EqualTo(1)); 101 | Assert.That(((Array)xicUnit.Intensities).Length, Is.EqualTo(1)); 102 | Assert.That(Math.Abs(749.786 - xicUnit.Meta.MzStart.Value) < 0.01); 103 | Assert.That(Math.Abs(749.8093 - xicUnit.Meta.MzEnd.Value) < 0.01); 104 | Assert.That(Math.Abs(300 - xicUnit.Meta.RtStart.Value) < 0.01); 105 | Assert.That(Math.Abs(400 - xicUnit.Meta.RtEnd.Value) < 0.01); 106 | } 107 | 108 | [Test] 109 | public void testValidateJson() 110 | { 111 | string json = @"[ 112 | { 113 | 'mz':488.5384, 114 | 'tolerance':10, 115 | 'tolerance_unit':'ppm', 116 | 'scan_filter':'ms' 117 | }, 118 | { 119 | 'mz':575.2413, 120 | 'tolerance':10, 121 | 'tolerance_unit':'ppm' 122 | }, 123 | { 124 | 'mz_start':749.7860, 125 | 'mz_end' : 750.4, 126 | 'rt_start':630, 127 | 'rt_end':660 128 | }, 129 | { 130 | 'sequence':'LENNART', 131 | 'tolerance':10, 132 | 'rt_start':630, 133 | 'rt_end':660 134 | }, 135 | { 136 | 'mz':575.2413, 137 | 'tolerance':10, 138 | 'tolerance_unit':'ppm', 139 | 'comment': 'this is comment' 140 | } 141 | ]"; 142 | 143 | // test a valid json 144 | var errors = JSONParser.ValidateJson(json); 145 | Assert.That(errors.IsNullOrEmpty()); 146 | 147 | json = @"[ 148 | { 149 | 'mz':488.5384, 150 | 'tolerance_unit':'ppm' 151 | }, 152 | { 153 | 'mz':575.2413, 154 | 'tolerance':10, 155 | 'tolerance_unit':'ppm' 156 | }, 157 | { 158 | 'mz_start':749.7860, 159 | 'mz_end' : 750.4, 160 | 'rt_start':630, 161 | 'rt_end':660 162 | }, 163 | { 164 | 'sequence':'LENNART', 165 | 'rt_start':630, 166 | 'rt_end':660 167 | } 168 | ]"; 169 | 170 | // test a json with 2 missing properties 171 | errors = JSONParser.ValidateJson(json); 172 | Assert.That(!errors.IsNullOrEmpty()); 173 | Assert.That(errors.Count, Is.EqualTo(2)); 174 | 175 | json = @"[ 176 | { 177 | 'mz': -488.5384, 178 | 'tolerance':10, 179 | 'tolerance_unit':'ppm' 180 | }, 181 | { 182 | 'mz':575.2413, 183 | 'tolerance':10, 184 | 'tolerance_unit':'ppm' 185 | }, 186 | { 187 | 'mz_start':749.7860, 188 | 'mz_end' : 750.4, 189 | 'rt_start': -630, 190 | 'rt_end': 660 191 | }, 192 | { 193 | 'sequence':'LENNART', 194 | 'tolerance':10, 195 | 'rt_start': 630, 196 | 'rt_end': 660 197 | } 198 | ]"; 199 | 200 | // test a json with 2 negative numbers 201 | errors = JSONParser.ValidateJson(json); 202 | Assert.That(!errors.IsNullOrEmpty()); 203 | Assert.That(errors.Count, Is.EqualTo(2)); 204 | } 205 | 206 | [Test] 207 | public void testParseJson() 208 | { 209 | string json = @"[ 210 | { 211 | 'mz': 488.5384, 212 | 'tolerance':10, 213 | 'tolerance_unit':'ppm', 214 | 'scan_filter': 'ms2' 215 | }, 216 | { 217 | 'mz':575.2413, 218 | 'tolerance':10, 219 | }, 220 | { 221 | 'mz_start':749.7860, 222 | 'mz_end' : 750.4, 223 | 'rt_start': 630, 224 | 'rt_end': 660 225 | }, 226 | { 227 | 'sequence':'LENNART', 228 | 'tolerance':10, 229 | 'rt_start': 630, 230 | 'rt_end': 660 231 | } 232 | ]"; 233 | 234 | var xicData = JSONParser.ParseJSON(json); 235 | Assert.That(xicData is not null); 236 | 237 | json = @"[ 238 | { 239 | 'mz': 488.5384, 240 | 'tolerance':10, 241 | 'tolerance_unit':'ppm' 242 | }, 243 | { 244 | 'mz':575.2413, 245 | 'tolerance':10, 246 | }, 247 | { 248 | 'mz_start':749.7860, 249 | 'mz_end' : 750.4, 250 | 'rt_start': 680, 251 | 'rt_end': 660 252 | }, 253 | { 254 | 'sequence':'LENNART', 255 | 'tolerance':10, 256 | 'rt_start': 630, 257 | 'rt_end': 660 258 | } 259 | ]"; 260 | 261 | Assert.Throws(() => JSONParser.ParseJSON(json)); 262 | 263 | json = @"[ 264 | { 265 | 'mz': 488.5384, 266 | 'tolerance':10, 267 | 'tolerance_unit':'ppm' 268 | }, 269 | { 270 | 'mz':575.2413, 271 | 'tolerance':10, 272 | }, 273 | { 274 | 'mz_start':849.7860, 275 | 'mz_end' : 750.4, 276 | 'rt_start': 630, 277 | 'rt_end': 660 278 | }, 279 | { 280 | 'sequence':'LENNART', 281 | 'tolerance':10, 282 | 'rt_start': 630, 283 | 'rt_end': 660 284 | } 285 | ]"; 286 | 287 | Assert.Throws(() => JSONParser.ParseJSON(json)); 288 | } 289 | } 290 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /Writer/MgfSpectrumWriter.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Reflection; 3 | using System.Text; 4 | using log4net; 5 | using ThermoFisher.CommonCore.Data.Business; 6 | using ThermoFisher.CommonCore.Data.FilterEnums; 7 | using ThermoFisher.CommonCore.Data.Interfaces; 8 | using ThermoRawFileParser.Util; 9 | 10 | namespace ThermoRawFileParser.Writer 11 | { 12 | public class MgfSpectrumWriter : SpectrumWriter 13 | { 14 | private static readonly ILog Log = 15 | LogManager.GetLogger(MethodBase.GetCurrentMethod().DeclaringType); 16 | 17 | private const string PositivePolarity = "+"; 18 | private const string NegativePolarity = "-"; 19 | 20 | public MgfSpectrumWriter(ParseInput parseInput) : base(parseInput) 21 | { 22 | ParseInput.MsLevel.Remove(1); // MS1 spectra are not supposed to be in MGF 23 | } 24 | 25 | /// 26 | public override void Write(IRawDataPlus rawFile, int firstScanNumber, int lastScanNumber) 27 | { 28 | if (!rawFile.HasMsData) 29 | { 30 | throw new RawFileParserException("No MS data in RAW file, no output will be produced"); 31 | } 32 | 33 | ConfigureWriter(".mgf"); 34 | using (Writer) 35 | { 36 | 37 | Log.Info("Processing " + (lastScanNumber - firstScanNumber + 1) + " scans"); 38 | 39 | var lastScanProgress = 0; 40 | for (var scanNumber = firstScanNumber; scanNumber <= lastScanNumber; scanNumber++) 41 | { 42 | if (ParseInput.LogFormat == LogFormat.DEFAULT) 43 | { 44 | var scanProgress = (int)((double)scanNumber / (lastScanNumber - firstScanNumber + 1) * 100); 45 | if (scanProgress % ProgressPercentageStep == 0) 46 | { 47 | if (scanProgress != lastScanProgress) 48 | { 49 | Console.Write("" + scanProgress + "% "); 50 | lastScanProgress = scanProgress; 51 | } 52 | } 53 | } 54 | 55 | try 56 | { 57 | var spectrumText = CreateMGFScan(rawFile, scanNumber); 58 | if (!string.IsNullOrEmpty(spectrumText)) 59 | { 60 | Writer.WriteLine(spectrumText); 61 | Log.Debug("Spectrum written to file -- SCAN# " + scanNumber); 62 | } 63 | } 64 | catch (Exception ex) 65 | { 66 | Log.Error($"Scan #{scanNumber} cannot be processed because of the following exception: {ex.Message}"); 67 | Log.Debug($"{ex.StackTrace}\n{ex.InnerException}"); 68 | ParseInput.NewError(); 69 | continue; 70 | } 71 | } 72 | 73 | if (ParseInput.LogFormat == LogFormat.DEFAULT) 74 | { 75 | Console.WriteLine(); 76 | } 77 | 78 | } 79 | } 80 | 81 | private string? CreateMGFScan(IRawDataPlus rawFile, int scanNumber) 82 | { 83 | int _precursorScanNumber = 0; 84 | StringBuilder mgfSpectrumText = new StringBuilder(); 85 | string resultString = String.Empty; 86 | 87 | // Get the retention time 88 | var retentionTime = rawFile.RetentionTimeFromScanNumber(scanNumber); 89 | 90 | // Get the scan filter for this scan number 91 | var scanFilter = rawFile.GetFilterForScanNumber(scanNumber); 92 | 93 | // Get the scan event for this scan number 94 | var scanEvent = rawFile.GetScanEventForScanNumber(scanNumber); 95 | 96 | // Trailer extra data list 97 | ScanTrailer trailerData; 98 | 99 | try 100 | { 101 | trailerData = new ScanTrailer(rawFile.GetTrailerExtraInformation(scanNumber)); 102 | } 103 | catch (Exception ex) 104 | { 105 | Log.WarnFormat("Cannot load trailer infromation for scan {0} due to following exception\n{1}", scanNumber, ex.Message); 106 | ParseInput.NewWarn(); 107 | trailerData = new ScanTrailer(); 108 | } 109 | 110 | // Get scan ms level 111 | var msLevel = (int)scanFilter.MSOrder; 112 | 113 | // Construct the precursor reference string for the title 114 | var precursorReference = ""; 115 | 116 | //Tracking precursor scan numbers for MSn scans 117 | if (msLevel == 1) 118 | { 119 | // Keep track of the MS1 scan number for precursor reference 120 | _precursorScanNumbers[""] = scanNumber; 121 | } 122 | else 123 | { 124 | // Keep track of scan number and isolation m/z for precursor reference 125 | var result = _filterStringIsolationMzPattern.Match(scanEvent.ToString()); 126 | if (result.Success) 127 | { 128 | if (_precursorScanNumbers.ContainsKey(result.Groups[1].Value)) 129 | { 130 | _precursorScanNumbers.Remove(result.Groups[1].Value); 131 | } 132 | 133 | _precursorScanNumbers.Add(result.Groups[1].Value, scanNumber); 134 | } 135 | 136 | //update precursor scan if it is provided in trailer data 137 | var trailerMasterScan = trailerData.AsPositiveInt("Master Scan Number:"); 138 | if (trailerMasterScan.HasValue) 139 | { 140 | _precursorScanNumber = trailerMasterScan.Value; 141 | } 142 | else //try getting it from the scan filter 143 | { 144 | _precursorScanNumber = GetParentFromScanString(result.Groups[1].Value); 145 | } 146 | 147 | if (_precursorScanNumber > 0) 148 | { 149 | precursorReference = ConstructSpectrumTitle((int)Device.MS, 1, _precursorScanNumber); 150 | } 151 | else if (ParseInput.MgfPrecursor) 152 | { 153 | Log.Error($"Cannot find precursor scan for scan# {scanNumber}"); 154 | _precursorTree[-2] = new PrecursorInfo(0, msLevel, FindLastReaction(scanEvent, msLevel), null); 155 | ParseInput.NewError(); 156 | } 157 | } 158 | 159 | if (ParseInput.MsLevel.Contains(msLevel)) 160 | { 161 | var reaction = GetReaction(scanEvent, scanNumber); 162 | 163 | mgfSpectrumText.AppendLine("BEGIN IONS"); 164 | if (!ParseInput.MgfPrecursor) 165 | { 166 | mgfSpectrumText.AppendLine($"TITLE={ConstructSpectrumTitle((int)Device.MS, 1, scanNumber)}"); 167 | } 168 | else 169 | { 170 | mgfSpectrumText.AppendLine( 171 | $"TITLE={ConstructSpectrumTitle((int)Device.MS, 1, scanNumber)} [PRECURSOR={precursorReference}]"); 172 | } 173 | 174 | mgfSpectrumText.AppendLine($"SCANS={scanNumber}"); 175 | mgfSpectrumText.AppendLine($"RTINSECONDS={(retentionTime * 60):f5}"); 176 | 177 | int? charge = trailerData.AsPositiveInt("Charge State:"); 178 | double? monoisotopicMz = trailerData.AsDouble("Monoisotopic M/Z:"); 179 | double? isolationWidth = 180 | trailerData.AsDouble("MS" + msLevel + " Isolation Width:"); 181 | 182 | if (reaction != null && !(msLevel == (int)MSOrderType.Nl || msLevel == (int)MSOrderType.Ng)) 183 | // Precursor m/z and intensity is not applicable for neutral loss and neutral gain scans 184 | { 185 | var selectedIonMz = 186 | CalculateSelectedIonMz(reaction, monoisotopicMz, isolationWidth); 187 | 188 | var selectedIonIntensity = (selectedIonMz > ZeroDelta && _precursorScanNumber > 0) ? 189 | CalculatePrecursorPeakIntensity(rawFile, _precursorScanNumber, reaction.PrecursorMass, isolationWidth, 190 | ParseInput.NoPeakPicking.Contains(msLevel - 1)) : 0; 191 | 192 | mgfSpectrumText.AppendLine($"PEPMASS={selectedIonMz:f5} {selectedIonIntensity:f3}"); 193 | } 194 | 195 | // Charge 196 | if (charge != null) 197 | { 198 | // Scan polarity 199 | var polarity = PositivePolarity; 200 | if (scanFilter.Polarity == PolarityType.Negative) 201 | { 202 | polarity = NegativePolarity; 203 | } 204 | 205 | mgfSpectrumText.AppendLine($"CHARGE={charge}{polarity}"); 206 | } 207 | 208 | // Get scan mz data 209 | MZData mzData; 210 | 211 | try 212 | { 213 | mzData = ReadMZData(rawFile, scanEvent, scanNumber, 214 | !ParseInput.NoPeakPicking.Contains((int)scanFilter.MSOrder), //requestCentroidedData 215 | ParseInput.ChargeData, //requestChargeData 216 | false); //requestNoiseData 217 | } 218 | catch (Exception ex) 219 | { 220 | Log.ErrorFormat("Failed reading mz data for scan #{0} due to following exception: {1}\nMZ data will be empty", scanNumber, ex.Message); 221 | Log.DebugFormat("{0}\n{1}", ex.StackTrace, ex.InnerException); 222 | ParseInput.NewError(); 223 | 224 | mzData = new MZData 225 | { 226 | basePeakMass = null, 227 | basePeakIntensity = null, 228 | masses = Array.Empty(), 229 | intensities = Array.Empty(), 230 | charges = Array.Empty(), 231 | baselineData = Array.Empty(), 232 | noiseData = Array.Empty(), 233 | massData = Array.Empty(), 234 | isCentroided = false 235 | }; 236 | } 237 | 238 | if (!(mzData.masses is null) && mzData.masses.Length > 0) 239 | { 240 | //Sorting masses and intensities 241 | if (!(mzData.charges is null) && mzData.charges.Length > 0) 242 | { 243 | for (var i = 0; i < mzData.masses.Length; i++) 244 | { 245 | mgfSpectrumText.AppendLine($"{mzData.masses[i]:f5} {mzData.intensities[i]:f3} {(int)mzData.charges[i]:d}"); 246 | } 247 | } 248 | else 249 | { 250 | for (var i = 0; i < mzData.masses.Length; i++) 251 | { 252 | mgfSpectrumText.AppendLine($"{mzData.masses[i]:f5} {mzData.intensities[i]:f3}"); 253 | } 254 | } 255 | } 256 | else 257 | { 258 | Log.WarnFormat("Spectrum {0} has no m/z data", scanNumber); 259 | ParseInput.NewWarn(); 260 | } 261 | 262 | mgfSpectrumText.Append("END IONS"); 263 | resultString = mgfSpectrumText.ToString(); 264 | } 265 | 266 | return resultString; 267 | } 268 | } 269 | } -------------------------------------------------------------------------------- /XIC/XicReader.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.IO; 3 | using System.Reflection; 4 | using System.Collections.Generic; 5 | using System.Runtime.InteropServices; 6 | using log4net; 7 | using ThermoFisher.CommonCore.Data; 8 | using ThermoFisher.CommonCore.Data.Business; 9 | using ThermoFisher.CommonCore.Data.Interfaces; 10 | using ThermoRawFileParser.Util; 11 | using Range = ThermoFisher.CommonCore.Data.Business.Range; 12 | 13 | namespace ThermoRawFileParser.XIC 14 | { 15 | public class XicReader 16 | { 17 | private static readonly ILog Log = 18 | LogManager.GetLogger(MethodBase.GetCurrentMethod().DeclaringType); 19 | 20 | private const string MsFilter = "ms"; 21 | 22 | public static void ReadXic(string rawFilePath, bool base64, XicData xicData, ref XicParameters parameters) 23 | { 24 | IRawDataPlus rawFile; 25 | int _xicCount = 0; 26 | string _userProvidedPath = rawFilePath; 27 | 28 | //checking for symlinks 29 | var fileInfo = new FileInfo(rawFilePath); 30 | if (fileInfo.Attributes.HasFlag(FileAttributes.ReparsePoint)) //detected path is a symlink 31 | { 32 | if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) 33 | { 34 | var realPath = NativeMethods.GetFinalPathName(rawFilePath); 35 | Log.DebugFormat("Detected reparse point, real path: {0}", realPath); 36 | rawFilePath = realPath; 37 | } 38 | else //Mono should handle all non-windows platforms 39 | { 40 | var realPath = Path.Combine(Path.GetDirectoryName(rawFilePath), Mono.Unix.UnixPath.ReadLink(rawFilePath)); 41 | Log.DebugFormat("Detected reparse point, real path: {0}", realPath); 42 | rawFilePath = realPath; 43 | } 44 | } 45 | 46 | using (rawFile = RawFileReaderFactory.ReadFile(rawFilePath)) 47 | { 48 | Log.Info($"Started parsing {_userProvidedPath}"); 49 | 50 | if (!rawFile.IsOpen) 51 | { 52 | throw new RawFileParserException("Unable to access the RAW file using the native Thermo library."); 53 | } 54 | 55 | // Check for any errors in the RAW file 56 | if (rawFile.IsError) 57 | { 58 | throw new RawFileParserException( 59 | $"RAW file cannot be processed because of an error - {rawFile.FileError}"); 60 | } 61 | 62 | // Check if the RAW file is being acquired 63 | if (rawFile.InAcquisition) 64 | { 65 | throw new RawFileParserException("RAW file cannot be processed since it is still being acquired"); 66 | } 67 | 68 | // Get the number of instruments (controllers) present in the RAW file and set the 69 | // selected instrument to the MS instrument, first instance of it 70 | rawFile.SelectInstrument(Device.MS, 1); 71 | 72 | // Get the first and last scan from the RAW file 73 | var firstScanNumber = rawFile.RunHeaderEx.FirstSpectrum; 74 | var lastScanNumber = rawFile.RunHeaderEx.LastSpectrum; 75 | 76 | // Get the start and end time from the RAW file 77 | var startTime = rawFile.RunHeaderEx.StartTime; 78 | var endTime = rawFile.RunHeaderEx.EndTime; 79 | 80 | // Get the mass range from the RAW file 81 | var minMass = rawFile.RunHeaderEx.LowMass; 82 | var maxMass = rawFile.RunHeaderEx.HighMass; 83 | 84 | // Update global metadata 85 | xicData.OutputMeta.base64 = base64; 86 | xicData.OutputMeta.timeunit = "minutes"; 87 | 88 | 89 | System.Timers.Timer tick = new System.Timers.Timer(2000); 90 | if (!parameters.stdout) 91 | { 92 | tick.Elapsed += (object sender, System.Timers.ElapsedEventArgs e) => Console.Out.Write("\rCompleted XIC {0} of {1}", _xicCount, xicData.Content.Count); 93 | tick.Start(); 94 | } 95 | 96 | foreach (var xicUnit in xicData.Content) 97 | { 98 | IChromatogramSettings settings = null; 99 | if (!xicUnit.Meta.MzStart.HasValue && !xicUnit.Meta.MzEnd.HasValue) 100 | { 101 | settings = new ChromatogramTraceSettings() 102 | { 103 | Filter = xicUnit.Meta.Filter ?? "ms" 104 | }; 105 | } 106 | 107 | if (!xicUnit.Meta.MzStart.HasValue) 108 | { 109 | xicUnit.Meta.MzStart = minMass; 110 | } 111 | 112 | if (!xicUnit.Meta.MzEnd.HasValue) 113 | { 114 | xicUnit.Meta.MzEnd = maxMass; 115 | } 116 | 117 | if (settings == null) 118 | { 119 | settings = new ChromatogramTraceSettings(TraceType.MassRange) 120 | { 121 | Filter = xicUnit.Meta.Filter ?? "ms", 122 | MassRanges = new[] 123 | { 124 | new Range(xicUnit.Meta.MzStart.Value, 125 | xicUnit.Meta.MzEnd.Value) 126 | } 127 | }; 128 | } 129 | 130 | List rtFilteredScans = null; 131 | if (!xicUnit.Meta.RtStart.HasValue && !xicUnit.Meta.RtEnd.HasValue) 132 | { 133 | rtFilteredScans = new List(); 134 | } 135 | 136 | if (!xicUnit.Meta.RtStart.HasValue) 137 | { 138 | xicUnit.Meta.RtStart = startTime; 139 | } 140 | 141 | if (!xicUnit.Meta.RtEnd.HasValue) 142 | { 143 | xicUnit.Meta.RtEnd = endTime; 144 | } 145 | 146 | IChromatogramData data = null; 147 | if (rtFilteredScans == null) 148 | { 149 | rtFilteredScans = rawFile.GetFilteredScansListByTimeRange(MsFilter, 150 | xicUnit.Meta.RtStart.Value, xicUnit.Meta.RtEnd.Value); 151 | 152 | if (rtFilteredScans.Count != 0) 153 | { 154 | data = GetChromatogramData(rawFile, settings, rtFilteredScans[0], 155 | rtFilteredScans[rtFilteredScans.Count - 1]); 156 | if (data != null && data.PositionsArray.Length == 1 && data.PositionsArray[0].Length == 1 && 157 | (Math.Abs(data.PositionsArray[0][0] - startTime) < 0.001 || 158 | Math.Abs(data.PositionsArray[0][0] - endTime) < 0.001)) 159 | { 160 | Log.Warn( 161 | $"Only the minimum or maximum retention time was returned. " + 162 | $"Does the provided retention time range [{xicUnit.Meta.RtStart}-{xicUnit.Meta.RtEnd}] lies outside the max. window [{startTime}-{endTime}]?"); 163 | parameters.NewWarn(); 164 | } 165 | } 166 | else 167 | { 168 | Log.Warn( 169 | $"No scans found in retention time range [{xicUnit.Meta.RtStart}-{xicUnit.Meta.RtEnd}]. " + 170 | $"Does the provided retention time window lies outside the max. window [{startTime}-{endTime}]"); 171 | parameters.NewWarn(); 172 | } 173 | } 174 | else 175 | { 176 | try 177 | { 178 | data = GetChromatogramData(rawFile, settings, firstScanNumber, lastScanNumber); 179 | } 180 | 181 | catch (Exception ex) 182 | { 183 | Log.Error($"Cannot produce XIC using {xicUnit.GetMeta()} - {ex.Message}\nDetails:\n{ex.StackTrace}"); 184 | parameters.NewError(); 185 | } 186 | } 187 | 188 | if (data != null) 189 | { 190 | var chromatogramTrace = ChromatogramSignal.FromChromatogramData(data); 191 | if (chromatogramTrace[0].Scans.Count != 0) 192 | { 193 | if (!base64) 194 | { 195 | xicUnit.RetentionTimes = chromatogramTrace[0].Times; 196 | xicUnit.Intensities = chromatogramTrace[0].Intensities; 197 | } 198 | else 199 | { 200 | xicUnit.RetentionTimes = GetBase64String(chromatogramTrace[0].Times); 201 | xicUnit.Intensities = GetBase64String(chromatogramTrace[0].Intensities); 202 | } 203 | } 204 | else 205 | { 206 | Log.Warn($"Empty XIC returned by {xicUnit.GetMeta()}"); 207 | parameters.NewWarn(); 208 | } 209 | } 210 | 211 | _xicCount++; 212 | } 213 | 214 | if (!parameters.stdout) 215 | { 216 | tick.Stop(); 217 | Console.Out.Write("\r"); 218 | } 219 | 220 | Log.Info($"Finished parsing {_userProvidedPath}"); 221 | } 222 | } 223 | 224 | private static IChromatogramData GetChromatogramData(IRawDataPlus rawFile, IChromatogramSettings settings, 225 | int firstScanNumber, int lastScanNumber) 226 | { 227 | IChromatogramData data = null; 228 | try 229 | { 230 | data = rawFile.GetChromatogramData(new IChromatogramSettings[] {settings}, firstScanNumber, 231 | lastScanNumber); 232 | } 233 | catch (InvalidFilterFormatException) 234 | { 235 | throw new RawFileParserException($"Invalid filter string \"{settings.Filter}\""); 236 | 237 | } 238 | catch (InvalidFilterCriteriaException) 239 | { 240 | throw new RawFileParserException($"Invalid filter string \"{settings.Filter}\""); 241 | } 242 | 243 | return data; 244 | } 245 | 246 | /// 247 | /// Convert the double array into a base64 string 248 | /// 249 | /// the double collection 250 | /// the base64 string 251 | private static string GetBase64String(IEnumerable array) 252 | { 253 | byte[] bytes; 254 | 255 | using (var memoryStream = new MemoryStream()) 256 | { 257 | foreach (var doubleValue in array) 258 | { 259 | var doubleValueByteArray = BitConverter.GetBytes(doubleValue); 260 | memoryStream.Write(doubleValueByteArray, 0, doubleValueByteArray.Length); 261 | } 262 | 263 | memoryStream.Position = 0; 264 | bytes = memoryStream.ToArray(); 265 | } 266 | 267 | return Convert.ToBase64String(bytes); 268 | } 269 | } 270 | } -------------------------------------------------------------------------------- /Query/ProxiSpectrumReader.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Globalization; 4 | using System.IO; 5 | using System.Reflection; 6 | using System.Runtime.InteropServices; 7 | using log4net; 8 | using ThermoFisher.CommonCore.Data.Business; 9 | using ThermoFisher.CommonCore.Data.FilterEnums; 10 | using ThermoFisher.CommonCore.Data.Interfaces; 11 | using ThermoRawFileParser.Writer; 12 | using ThermoRawFileParser.Util; 13 | 14 | namespace ThermoRawFileParser.Query 15 | { 16 | public class ProxiSpectrumReader 17 | { 18 | private static readonly ILog Log = 19 | LogManager.GetLogger(MethodBase.GetCurrentMethod().DeclaringType); 20 | 21 | private readonly QueryParameters queryParameters; 22 | 23 | public ProxiSpectrumReader(QueryParameters _queryParameters) 24 | { 25 | this.queryParameters = _queryParameters; 26 | } 27 | 28 | public List Retrieve() 29 | { 30 | var resultList = new List(); 31 | IRawDataPlus rawFile; 32 | 33 | //checking for symlinks 34 | var fileInfo = new FileInfo(queryParameters.rawFilePath); 35 | if (fileInfo.Attributes.HasFlag(FileAttributes.ReparsePoint)) //detected path is a symlink 36 | { 37 | if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) 38 | { 39 | var realPath = NativeMethods.GetFinalPathName(queryParameters.rawFilePath); 40 | Log.DebugFormat("Detected reparse point, real path: {0}", realPath); 41 | queryParameters.UpdateRealPath(realPath); 42 | } 43 | else //Mono should handle all non-windows platforms 44 | { 45 | var realPath = Path.Combine(Path.GetDirectoryName(queryParameters.rawFilePath), Mono.Unix.UnixPath.ReadLink(queryParameters.rawFilePath)); 46 | Log.DebugFormat("Detected reparse point, real path: {0}", realPath); 47 | queryParameters.UpdateRealPath(realPath); 48 | } 49 | } 50 | 51 | using (rawFile = RawFileReaderFactory.ReadFile(queryParameters.rawFilePath)) 52 | { 53 | Log.Info($"Started parsing {queryParameters.userFilePath}"); 54 | 55 | if (!rawFile.IsOpen) 56 | { 57 | throw new RawFileParserException("Unable to access the RAW file using the native Thermo library."); 58 | } 59 | 60 | // Check for any errors in the RAW file 61 | if (rawFile.IsError) 62 | { 63 | throw new RawFileParserException( 64 | $"RAW file cannot be processed because of an error - {rawFile.FileError}"); 65 | } 66 | 67 | // Check if the RAW file is being acquired 68 | if (rawFile.InAcquisition) 69 | { 70 | throw new RawFileParserException("RAW file cannot be processed since it is still being acquired"); 71 | } 72 | 73 | // Get the number of instruments (controllers) present in the RAW file and set the 74 | // selected instrument to the MS instrument, first instance of it 75 | rawFile.SelectInstrument(Device.MS, 1); 76 | 77 | // Set a cvGroup number counter 78 | var cvGroup = 1; 79 | 80 | NumberIterator scanNumbers; 81 | 82 | try 83 | { 84 | scanNumbers = new NumberIterator(queryParameters.scans, 85 | rawFile.RunHeaderEx.FirstSpectrum, rawFile.RunHeaderEx.LastSpectrum); 86 | } 87 | catch (Exception ex) 88 | { 89 | throw new RawFileParserException($"Cannot create scan iterator from {queryParameters.scans} - {ex.Message}"); 90 | } 91 | 92 | foreach (var scanNumber in scanNumbers.IterateScans()) 93 | { 94 | var proxiSpectrum = new ProxiSpectrum(); 95 | 96 | try 97 | { 98 | // Get each scan from the RAW file 99 | var scan = Scan.FromFile(rawFile, scanNumber); 100 | 101 | var time = rawFile.RetentionTimeFromScanNumber(scanNumber); 102 | 103 | // Get the scan filter for this scan number 104 | var scanFilter = rawFile.GetFilterForScanNumber(scanNumber); 105 | 106 | // Get the scan event for this scan number 107 | var scanEvent = rawFile.GetScanEventForScanNumber(scanNumber); 108 | 109 | IReaction reaction = null; 110 | if (scanEvent.MSOrder != MSOrderType.Ms) 111 | { 112 | reaction = SpectrumWriter.GetReaction(scanEvent, scanNumber); 113 | } 114 | 115 | proxiSpectrum.AddAttribute(accession: "MS:1003057", name: "scan number", 116 | value: scanNumber.ToString(CultureInfo.InvariantCulture)); 117 | proxiSpectrum.AddAttribute(accession: "MS:1000016", name: "scan start time", 118 | value: (time * 60).ToString(CultureInfo.InvariantCulture)); 119 | proxiSpectrum.AddAttribute(accession: "MS:1000511", name: "ms level", 120 | value: ((int) scanFilter.MSOrder).ToString(CultureInfo.InvariantCulture)); 121 | 122 | // trailer extra data list 123 | ScanTrailer trailerData; 124 | try 125 | { 126 | trailerData = new ScanTrailer(rawFile.GetTrailerExtraInformation(scanNumber)); 127 | } 128 | catch (Exception ex) 129 | { 130 | Log.WarnFormat("Cannot load trailer infromation for scan {0} due to following exception\n{1}", scanNumber, ex.Message); 131 | queryParameters.NewWarn(); 132 | trailerData = new ScanTrailer(); 133 | } 134 | 135 | int? charge = trailerData.AsPositiveInt("Charge State:"); 136 | double? monoisotopicMz = trailerData.AsDouble("Monoisotopic M/Z:"); 137 | double? ionInjectionTime = trailerData.AsDouble("Ion Injection Time (ms):"); 138 | double? isolationWidth = trailerData.AsDouble("MS" + (int)scanFilter.MSOrder + " Isolation Width:"); 139 | 140 | //injection time 141 | if (ionInjectionTime != null) 142 | { 143 | proxiSpectrum.AddAttribute(accession: "MS:1000927", name: "ion injection time", 144 | value: ionInjectionTime.ToString(), cvGroup: cvGroup.ToString()); 145 | proxiSpectrum.AddAttribute(accession: "UO:0000028", name: "millisecond", 146 | cvGroup: cvGroup.ToString()); 147 | cvGroup++; 148 | } 149 | 150 | if (reaction != null) 151 | { 152 | // Store the precursor information 153 | var selectedIonMz = 154 | SpectrumWriter.CalculateSelectedIonMz(reaction, monoisotopicMz, isolationWidth); 155 | proxiSpectrum.AddAttribute(accession: "MS:1000744", name: "selected ion m/z", 156 | value: selectedIonMz.ToString(CultureInfo.InvariantCulture)); 157 | proxiSpectrum.AddAttribute(accession: "MS:1000827", 158 | name: "isolation window target m/z", 159 | value: selectedIonMz.ToString(CultureInfo.InvariantCulture)); 160 | 161 | // Store the isolation window information 162 | var offset = isolationWidth.Value / 2 + reaction.IsolationWidthOffset; 163 | proxiSpectrum.AddAttribute(accession: "MS:1000828", 164 | name: "isolation window lower offset", 165 | value: (isolationWidth.Value - offset).ToString()); 166 | proxiSpectrum.AddAttribute(accession: "MS:1000829", 167 | name: "isolation window upper offset", 168 | value: offset.ToString()); 169 | } 170 | 171 | // scan polarity 172 | if (scanFilter.Polarity == PolarityType.Positive) 173 | { 174 | proxiSpectrum.AddAttribute(accession: "MS:1000465", name: "scan polarity", 175 | value: "positive scan", valueAccession: "MS:1000130"); 176 | } 177 | else 178 | { 179 | proxiSpectrum.AddAttribute(accession: "MS:1000465", name: "scan polarity", 180 | value: "negative scan", valueAccession: "MS:1000129"); 181 | } 182 | 183 | // charge state 184 | if (charge != null) 185 | { 186 | proxiSpectrum.AddAttribute(accession: "MS:1000041", name: "charge state", 187 | value: charge.ToString()); 188 | } 189 | 190 | // write the filter string 191 | proxiSpectrum.AddAttribute(accession: "MS:1000512", name: "filter string", 192 | value: scanEvent.ToString()); 193 | 194 | double[] masses = null; 195 | double[] intensities = null; 196 | 197 | if (!queryParameters.noPeakPicking) // centroiding requested 198 | { 199 | // check if the scan has a centroid stream 200 | if (scan.HasCentroidStream) 201 | { 202 | if (scan.CentroidScan.Length > 0) 203 | { 204 | proxiSpectrum.AddAttribute(accession: "MS:1000525", name: "spectrum representation", 205 | value: "centroid spectrum", valueAccession: "MS:1000127"); 206 | 207 | masses = scan.CentroidScan.Masses; 208 | intensities = scan.CentroidScan.Intensities; 209 | } 210 | } 211 | else // otherwise take the low res segmented data 212 | { 213 | // if the spectrum is profile perform centroiding 214 | var segmentedScan = scanEvent.ScanData == ScanDataType.Profile 215 | ? Scan.ToCentroid(scan).SegmentedScan 216 | : scan.SegmentedScan; 217 | 218 | if (segmentedScan.PositionCount > 0) 219 | { 220 | proxiSpectrum.AddAttribute(accession: "MS:1000525", name: "spectrum representation", 221 | value: "centroid spectrum", valueAccession: "MS:1000127"); 222 | 223 | masses = segmentedScan.Positions; 224 | intensities = segmentedScan.Intensities; 225 | } 226 | } 227 | } 228 | else // use the segmented data as is 229 | { 230 | if (scan.SegmentedScan.Positions.Length > 0) 231 | { 232 | switch (scanEvent.ScanData) //check if the data is centroided already 233 | { 234 | case ScanDataType.Centroid: 235 | proxiSpectrum.AddAttribute(accession: "MS:1000525", 236 | name: "spectrum representation", 237 | value: "centroid spectrum", valueAccession: "MS:1000127"); 238 | break; 239 | 240 | case ScanDataType.Profile: 241 | proxiSpectrum.AddAttribute(accession: "MS:1000525", 242 | name: "spectrum representation", 243 | value: "profile spectrum", valueAccession: "MS:1000128"); 244 | break; 245 | } 246 | 247 | masses = scan.SegmentedScan.Positions; 248 | intensities = scan.SegmentedScan.Intensities; 249 | } 250 | } 251 | 252 | if (masses != null && intensities != null) 253 | { 254 | Array.Sort(masses, intensities); 255 | 256 | proxiSpectrum.AddMz(masses); 257 | proxiSpectrum.AddIntensities(intensities); 258 | } 259 | 260 | resultList.Add(proxiSpectrum); 261 | } 262 | catch (Exception ex) 263 | { 264 | if (ex.GetBaseException() is IndexOutOfRangeException) 265 | { 266 | Log.WarnFormat("Spectrum #{0} is outside of file boundries", scanNumber); 267 | queryParameters.NewWarn(); 268 | } 269 | else 270 | { 271 | throw; 272 | } 273 | } 274 | } 275 | } 276 | 277 | Log.Info($"Finished processing {queryParameters.userFilePath}"); 278 | 279 | return resultList; 280 | } 281 | } 282 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ThermoRawFileParser 2 | 3 | A tool allowing reading Thermo RAW mass spectrometer files and converting to common open formats on all platforms supporting .NET Core. 4 | 5 | Supported formats: 6 | * MGF 7 | * mzML and indexed mzML 8 | * Apache Parquet 9 | 10 | Version before 2.0.0 require Mono to run on Linux and Mac. 11 | 12 | As of version 1.2.0, 2 subcommands are available (shoutout to the [eubic 2020 developers meeting](https://eubic-ms.org/events/2020-developers-meeting/), see [usage](#usage) for examples): 13 | * query: returns one or more spectra in JSON PROXI by scan number(s) 14 | * xic: returns chromatogram data based on JSON filter input 15 | 16 | RawFileReader reading tool. Copyright © 2016 by Thermo Fisher Scientific, Inc. All rights reserved 17 | 18 | ## ThermoRawFileParser Publication: 19 | * Hulstaert N, Shofstahl J, Sachsenberg T, Walzer M, Barsnes H, Martens L, Perez-Riverol Y: _ThermoRawFileParser: Modular, Scalable, and Cross-Platform RAW File Conversion_ [[PMID 31755270](https://www.ncbi.nlm.nih.gov/pubmed/31755270)]. 20 | * If you use ThermoRawFileParser as part of a publication, please include this reference. 21 | 22 | ## Requirements 23 | 24 | ### Current version 25 | Release page provide self-contained releases for OSX, Linux, and Windows and framework-based release. Framework-based release requires [.NET 8 runtime](https://dotnet.microsoft.com/en-us/download/dotnet/8.0/runtime). 26 | 27 | For developers: [.NET 8 SDK](https://dotnet.microsoft.com/en-us/download/dotnet/8.0) is required to build the tool. 28 | 29 | ### Prior to 1.5.0 30 | [Mono](https://www.mono-project.com/download/stable/#download-lin) (install mono-complete if you encounter "assembly not found" errors). 31 | 32 | ## Download 33 | 34 | Click [here](https://github.com/compomics/ThermoRawFileParser/releases) to go to the release page (with [release notes](https://github.com/compomics/ThermoRawFileParser/wiki/ReleaseNotes) starting from v1.1.7). 35 | 36 | You can find the ThermoRawFileParserGUI [here](https://github.com/compomics/ThermoRawFileParserGUI). 37 | 38 | ## Release Notes 39 | 40 | You can read release notes (starting from version 1.1.7) in the [wiki page](https://github.com/compomics/ThermoRawFileParser/wiki/ReleaseNotes) 41 | 42 | ## Usage 43 | 44 | ``` 45 | ThermoRawFileParser -i=/home/user/data_input/raw_file.raw -o=/home/user/data_input/output/ -f=0 -g -m=0 46 | ``` 47 | 48 | with only the mimimal required argument `-i` or `-d` this becomes 49 | 50 | ``` 51 | ThermoRawFileParser -i=/home/user/data_input/raw_file.raw 52 | ``` 53 | or 54 | 55 | ``` 56 | ThermoRawFileParser -d=/home/user/data_input/ 57 | ``` 58 | 59 | When running framework-based version use `dotnet ThermoRawFileParser.dll` instead. 60 | 61 | The optional parameters only work in the -option=value format. The tool can output some RAW file metadata `-m=0|1` (0 for JSON, 1 for TXT) and the spectra file `-f=0|1|2|3|4` (0 for MGF, 1 for mzML, 2 for indexed mzML, 3 for Parquet, 4 for no output) or both. Use the `-p` flag to disable the thermo native peak picking. 62 | 63 | ``` 64 | Usage is ThermoRawFileParser.exe [subcommand] [options] 65 | optional subcommands are xic|query (use [subcommand] -h for more info]): 66 | -h, --help Prints out the options. 67 | -v, --version Prints out the version of the executable. 68 | -i, --input=VALUE The raw file input (Required). Specify this or an 69 | input directory -d. 70 | -d, --input_directory=VALUE 71 | The directory containing the raw files (Required). 72 | Specify this or an input raw file -i. 73 | -b, --output=VALUE The output file. Specify this or an output 74 | directory -o. Specifying neither writes to the 75 | input directory. 76 | -o, --output_directory=VALUE 77 | The output directory. Specify this or an output 78 | file -b. Specifying neither writes to the input 79 | directory. 80 | -s, --stdout Write to standard output. Cannot be combined with 81 | file or directory output. Implies silent logging, 82 | i.e. logging level 0 83 | -f, --format=VALUE The spectra output format: 0 for MGF, 1 for mzML, 84 | 2 for indexed mzML, 3 for Parquet, 4 for None ( 85 | no output); both numeric and text (case 86 | insensitive) value recognized. Defaults to 87 | indexed mzML if no format is specified. 88 | -m, --metadata=VALUE The metadata output format: 0 for JSON, 1 for TXT, 89 | 2 for None (no output); both numeric and text ( 90 | case insensitive) value recognized. Defaults to 91 | None 92 | -c, --metadata_output_file=VALUE 93 | The metadata output file. By default the metadata 94 | file is written to the output directory. 95 | -g, --gzip GZip the output file. 96 | -p, --noPeakPicking[=VALUE] 97 | Don't use the peak picking provided by the native 98 | Thermo library. By default peak picking is 99 | enabled. Optional argument allows disabling peak 100 | peaking only for selected MS levels and should 101 | be a comma-separated list of integers (1,2,3) 102 | and/or intervals (1-3), open-end intervals (1-) 103 | are allowed 104 | -z, --noZlibCompression Don't use zlib compression for the m/z ratios and 105 | intensities. By default zlib compression is 106 | enabled. 107 | -a, --allDetectors Extract additional detector data: UV/PDA etc 108 | -l, --logging=VALUE Optional logging level: 0 for silent, 1 for 109 | verbose, 2 for default, 3 for warning, 4 for 110 | error; both numeric and text (case insensitive) 111 | value recognized. 112 | -e, --ignoreInstrumentErrors 113 | Ignore missing properties by the instrument. 114 | -x, --excludeExceptionData Exclude reference and exception data 115 | -L, --msLevel=VALUE Select MS levels (MS1, MS2, etc) included in the 116 | output, should be a comma-separated list of 117 | integers (1,2,3) and/or intervals (1-3), open- 118 | end intervals (1-) are allowed 119 | -P, --mgfPrecursor Include precursor scan number in MGF file TITLE 120 | -N, --noiseData Include noise data in mzML output 121 | -C, --chargeData Include instrument detected charge states in mzML 122 | output (only for high resolution centroided data) 123 | -w, --warningsAreErrors Return non-zero exit code for warnings; default 124 | only for errors 125 | -u, --s3_url[=VALUE] Optional property to write directly the data into 126 | S3 Storage. 127 | -k, --s3_accesskeyid[=VALUE] 128 | Optional key for the S3 bucket to write the file 129 | output. 130 | -t, --s3_secretaccesskey[=VALUE] 131 | Optional key for the S3 bucket to write the file 132 | output. 133 | -n, --s3_bucketName[=VALUE] 134 | S3 bucket name 135 | ``` 136 | 137 | Output file extension is determined by the used output format and (optional) gzip compression, for example, if format is MGF without gzip compression, the output file will receive `.mgf` extension, if format is mzML with gzip compression the output file will have `.mzML.gz` extension. All user input will be standardized to fulfill abovementioned requirements. 138 | 139 | A (java) graphical user interface is also available [here](https://github.com/compomics/ThermoRawFileParserGUI) that enables the selection of an input RAW directory or one ore more RAW files. 140 | 141 | ### query subcommand 142 | Enables the retrieval spectra by (a) scan number(s) in [PROXI format](https://github.com/HUPO-PSI/proxi-schemas). 143 | 144 | ``` 145 | mono ThermoRawFileParser.exe query -i=/home/user/data_input/raw_file.raw -o=/home/user/output.json n="1-5, 20, 25-30" 146 | ``` 147 | 148 | ``` 149 | ThermoRawFileParser.exe query --help 150 | usage is: 151 | -h, --help Prints out the options. 152 | -i, --input=VALUE The raw file input (Required). 153 | -n, --scans=VALUE The scan numbers. e.g. "1-5, 20, 25-30" 154 | -b, --output=VALUE The output file. Specifying none writes the output 155 | file to the input file parent directory. 156 | -p, --noPeakPicking Don't use the peak picking provided by the native 157 | Thermo library. By default peak picking is 158 | enabled. 159 | -s, --stdout Pipes the output into standard output. Logging is 160 | being turned off 161 | -w, --warningsAreErrors Return non-zero exit code for warnings; default 162 | only for errors 163 | -l, --logging=VALUE Optional logging level: 0 for silent, 1 for 164 | verbose, 2 for default, 3 for warning, 4 for 165 | error; both numeric and text (case insensitive) 166 | value recognized. 167 | ``` 168 | 169 | ### xic subcommand 170 | Return one or more chromatograms based on query JSON input. 171 | 172 | ``` 173 | mono ThermoRawFileParser.exe xic -i=/home/user/data_input/raw_file.raw -j=/home/user/xic_input.json 174 | ``` 175 | 176 | ``` 177 | ThermoRawFileParser.exe xic --help 178 | -h, --help Prints out the options. 179 | -i, --input=VALUE The raw file input (Required). Specify this or an 180 | input directory -d 181 | -d, --input_directory=VALUE 182 | The directory containing the raw files (Required). 183 | Specify this or an input file -i. 184 | -j, --json=VALUE The json input file (Required). 185 | -p, --print_example Show a json input file example. 186 | -b, --output=VALUE The output file. Specify this or an output 187 | directory. Specifying neither writes to the 188 | input directory. 189 | -o, --output_directory=VALUE 190 | The output directory. Specify this or an output 191 | file. Specifying neither writes to the input 192 | directory. 193 | -6, --base64 Encodes the content of the xic vectors as base 64 194 | encoded string. 195 | -s, --stdout Pipes the output into standard output. Logging is 196 | being turned off. 197 | -w, --warningsAreErrors Return non-zero exit code for warnings; default 198 | only for errors 199 | -l, --logging=VALUE Optional logging level: 0 for silent, 1 for 200 | verbose, 2 for default, 3 for warning, 4 for 201 | error; both numeric and text (case insensitive) 202 | value recognized. 203 | ``` 204 | 205 | Provide one of the following filters: 206 | * M/Z and tolerance (tolerance unit optional, defaults to `ppm`) 207 | * M/Z start and end 208 | * sequence and tolerance (tolerance unit optional, defaults to `ppm`) 209 | 210 | optionally one can define starting and ending retention times, provide filter string (defaults to `ms`, i.e. only MS1 scans), and a comment (free text) field; any valid filter string is supported, 211 | however only basic validation is performed, see [issue #158](https://github.com/compomics/ThermoRawFileParser/issues/158) for details. Comment can contain any text and will be preserved in the output. 212 | 213 | An example input JSON file: 214 | 215 | ``` 216 | [ 217 | { 218 | "mz":488.5384, 219 | "tolerance":10, 220 | "tolerance_unit":"ppm" 221 | }, 222 | { 223 | "mz":575.2413, 224 | "tolerance":10, 225 | "rt_start":630, 226 | "rt_end":660, 227 | "scan_filter":"ms2" 228 | }, 229 | { 230 | "mz_start":749.7860, 231 | "mz_end" : 750.4, 232 | "rt_start":630, 233 | "rt_end":660 234 | }, 235 | { 236 | "sequence":"TRANNEL", 237 | "tolerance":10 238 | }, 239 | { 240 | "mz":1014.5099732499732, 241 | "rt_start":14.0600881872, 242 | "rt_end":14.4167198290667, 243 | "tolerance":5, 244 | "tolerance_unit":"ppm", 245 | "comment":"Only ion trap scans" 246 | "scan_filter":"ITMS" 247 | } 248 | } 249 | ] 250 | 251 | ``` 252 | 253 | [Go to top of page](#thermorawfileparser) 254 | 255 | ## Galaxy integration 256 | 257 | ThermoRawFileParser is available in the Galaxy [ToolShed](https://toolshed.g2.bx.psu.edu/view/galaxyp/thermo_raw_file_converter/a3edda696e4d) and is deployed at the [European Galaxy Server](https://usegalaxy.eu/root?tool_id=toolshed.g2.bx.psu.edu/repos/galaxyp/thermo_raw_file_converter/thermo_raw_file_converter/). 258 | 259 | ## Logging 260 | 261 | By default the parser only logs to console. To enable logging to file, uncomment the file appender in the `log4net.config` file. 262 | 263 | ``` 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 286 | 287 | ``` 288 | 289 | ## Docker 290 | 291 | First check the latest version tag on [biocontainers/thermorawfileparser/tags](https://quay.io/repository/biocontainers/thermorawfileparser?tab=tags). Then pull and run the container with 292 | 293 | ```bash 294 | docker run -i -t -v /home/user/raw:/data_input quay.io/biocontainers/thermorawfileparser: ThermoRawFileParser.sh --help 295 | ``` 296 | 297 | [Go to top of page](#thermorawfileparser) 298 | -------------------------------------------------------------------------------- /Writer/ParquetSpectrumWriter.cs: -------------------------------------------------------------------------------- 1 | using log4net; 2 | using Parquet.Serialization; 3 | using System; 4 | using System.Collections.Generic; 5 | using System.Reflection; 6 | using System.Text.RegularExpressions; 7 | using ThermoFisher.CommonCore.Data.FilterEnums; 8 | using ThermoFisher.CommonCore.Data.Interfaces; 9 | using ThermoRawFileParser.Util; 10 | 11 | namespace ThermoRawFileParser.Writer 12 | { 13 | struct MzParquet 14 | { 15 | public uint scan; 16 | public byte level; 17 | public string scan_type; 18 | public float rt; 19 | public float mz; 20 | public float intensity; 21 | public float? ion_mobility; 22 | public float? isolation_lower; 23 | public float? isolation_upper; 24 | public int? precursor_scan; 25 | public float? precursor_mz; 26 | public uint? precursor_charge; 27 | } 28 | 29 | struct PrecursorData 30 | { 31 | public float? mz; 32 | public float? isolation_lower; 33 | public float? isolation_upper; 34 | } 35 | 36 | public class ParquetSpectrumWriter : SpectrumWriter 37 | { 38 | private static readonly ILog Log = 39 | LogManager.GetLogger(MethodBase.GetCurrentMethod().DeclaringType); 40 | 41 | private const int ParquetSliceSize = 1_048_576; 42 | 43 | public ParquetSpectrumWriter(ParseInput parseInput) : base(parseInput) 44 | { 45 | //nothing to do here 46 | } 47 | 48 | public override void Write(IRawDataPlus raw, int firstScanNumber, int lastScanNumber) 49 | { 50 | if (!raw.HasMsData) 51 | { 52 | throw new RawFileParserException("No MS data in RAW file, no output will be produced"); 53 | } 54 | 55 | ConfigureWriter(".mzparquet"); 56 | 57 | ParquetSerializerOptions opts = new ParquetSerializerOptions(); 58 | opts.CompressionLevel = System.IO.Compression.CompressionLevel.Fastest; 59 | opts.CompressionMethod = Parquet.CompressionMethod.Zstd; 60 | 61 | var data = new List(); 62 | 63 | var lastScanProgress = 0; 64 | 65 | Log.Info(String.Format("Processing {0} MS scans", +(1 + lastScanNumber - firstScanNumber))); 66 | 67 | for (var scanNumber = firstScanNumber; scanNumber <= lastScanNumber; scanNumber++) 68 | { 69 | if (ParseInput.LogFormat == LogFormat.DEFAULT) 70 | { 71 | var scanProgress = (int)((double)scanNumber / (lastScanNumber - firstScanNumber + 1) * 100); 72 | if (scanProgress % ProgressPercentageStep == 0) 73 | { 74 | if (scanProgress != lastScanProgress) 75 | { 76 | Console.Write("" + scanProgress + "% "); 77 | lastScanProgress = scanProgress; 78 | } 79 | } 80 | } 81 | 82 | try 83 | { 84 | int level = (int)raw.GetScanEventForScanNumber(scanNumber).MSOrder; //applying MS level filter 85 | if (level <= ParseInput.MaxLevel) // Primary MS level filter 86 | { 87 | var scanData = ReadScan(raw, scanNumber); 88 | if (scanData != null && ParseInput.MsLevel.Contains(level)) // Final MS level filter 89 | data.AddRange(scanData); 90 | } 91 | 92 | } 93 | catch (Exception ex) 94 | { 95 | Log.Error($"Scan #{scanNumber} cannot be processed because of the following exception: {ex.Message}"); 96 | Log.Debug($"{ex.StackTrace}\n{ex.InnerException}"); 97 | ParseInput.NewError(); 98 | } 99 | 100 | // If we have enough ions to write a row group, do so 101 | // - some row groups might have more than this number of ions 102 | // but this ensures that all ions from a single scan are always 103 | // present in the same row group (critical property of mzparquet) 104 | if (data.Count >= ParquetSliceSize) 105 | { 106 | var task = ParquetSerializer.SerializeAsync(data, Writer.BaseStream, opts); 107 | task.Wait(); 108 | opts.Append = true; 109 | data.Clear(); 110 | Log.Debug("Writing next row group"); 111 | } 112 | } 113 | 114 | // serialize any remaining ions into the final row group 115 | if (data.Count > 0) 116 | { 117 | var task = ParquetSerializer.SerializeAsync(data, Writer.BaseStream, opts); 118 | task.Wait(); 119 | Log.Debug("Writing final row group"); 120 | } 121 | 122 | if (ParseInput.LogFormat == LogFormat.DEFAULT) //Add new line after progress bar 123 | { 124 | Console.WriteLine(); 125 | } 126 | 127 | // Release the OS file handle 128 | Writer.Flush(); 129 | Writer.Close(); 130 | } 131 | 132 | private List ReadScan(IRawDataPlus raw, int scanNumber) 133 | { 134 | var scanFilter = raw.GetFilterForScanNumber(scanNumber); 135 | 136 | // Get the scan event for this scan number 137 | var scanEvent = raw.GetScanEventForScanNumber(scanNumber); 138 | 139 | // Get scan ms level 140 | var msLevel = (int)scanFilter.MSOrder; 141 | 142 | // Get Scan trailer 143 | ScanTrailer trailerData; 144 | 145 | //Scan type 146 | string scan_type; 147 | 148 | try 149 | { 150 | trailerData = new ScanTrailer(raw.GetTrailerExtraInformation(scanNumber)); 151 | } 152 | catch (Exception ex) 153 | { 154 | Log.WarnFormat("Cannot load trailer infromation for scan {0} due to following exception\n{1}", scanNumber, ex.Message); 155 | ParseInput.NewWarn(); 156 | trailerData = new ScanTrailer(); 157 | } 158 | 159 | int? trailer_charge = trailerData.AsPositiveInt("Charge State:"); 160 | double? trailer_mz = trailerData.AsDouble("Monoisotopic M/Z:"); 161 | double? trailer_isolationWidth = trailerData.AsDouble("MS" + msLevel + " Isolation Width:"); 162 | double? FAIMSCV = null; 163 | if (trailerData.AsBool("FAIMS Voltage On:").GetValueOrDefault(false)) 164 | FAIMSCV = trailerData.AsDouble("FAIMS CV:"); 165 | 166 | double rt = raw.RetentionTimeFromScanNumber(scanNumber); 167 | int precursor_scan = 0; 168 | PrecursorData precursor_data = new PrecursorData 169 | { 170 | isolation_lower = null, 171 | isolation_upper = null, 172 | mz = null 173 | 174 | }; 175 | if (msLevel == 1) 176 | { 177 | // Keep track of scan number for precursor reference 178 | _precursorScanNumbers[""] = scanNumber; 179 | _precursorTree[scanNumber] = new PrecursorInfo(); 180 | scan_type = "MS1 spectrum"; 181 | } 182 | else if (msLevel == (int)MSOrderType.Nl) 183 | { 184 | scan_type = "constant neutral loss spectrum"; 185 | } 186 | else if (msLevel == (int)MSOrderType.Ng) 187 | { 188 | scan_type = "constant neutral gain spectrum"; 189 | } 190 | else 191 | { 192 | Match result = null; 193 | 194 | if (msLevel > 1) 195 | { 196 | // Keep track of scan number and isolation m/z for precursor reference 197 | result = _filterStringIsolationMzPattern.Match(scanEvent.ToString()); 198 | scan_type = "MSn spectrum"; 199 | } 200 | else if (msLevel == (int)MSOrderType.Par) 201 | { 202 | // Keep track of scan number and isolation m/z for precursor reference 203 | result = _filterStringParentMzPattern.Match(scanEvent.ToString()); 204 | scan_type = "precursor ion spectrum"; 205 | } 206 | else 207 | { 208 | throw new ArgumentOutOfRangeException($"Unknown msLevel: {msLevel}"); 209 | } 210 | 211 | if (result != null && result.Success) 212 | { 213 | if (_precursorScanNumbers.ContainsKey(result.Groups[1].Value)) 214 | { 215 | _precursorScanNumbers.Remove(result.Groups[1].Value); 216 | } 217 | 218 | _precursorScanNumbers.Add(result.Groups[1].Value, scanNumber); 219 | } 220 | 221 | //update precursor scan if it is provided in trailer data 222 | var trailerMasterScan = trailerData.AsPositiveInt("Master Scan Number:"); 223 | if (trailerMasterScan.HasValue) 224 | { 225 | precursor_scan = trailerMasterScan.Value; 226 | } 227 | else //try getting it from the scan filter 228 | { 229 | precursor_scan = GetParentFromScanString(result == null ? "" : result.Groups[1].Value); 230 | } 231 | 232 | //finding precursor scan failed 233 | if (precursor_scan == -2 || !_precursorTree.ContainsKey(precursor_scan)) 234 | { 235 | Log.Warn($"Cannot find precursor scan for scan# {scanNumber}"); 236 | _precursorTree[precursor_scan] = new PrecursorInfo(0, msLevel, FindLastReaction(scanEvent, msLevel), null); 237 | ParseInput.NewWarn(); 238 | } 239 | 240 | try 241 | { 242 | try //since there is no direct way to get the number of reactions available, it is necessary to try and fail 243 | { 244 | scanEvent.GetReaction(_precursorTree[precursor_scan].ReactionCount); 245 | } 246 | catch (ArgumentOutOfRangeException ex) 247 | { 248 | Log.Debug($"Using Tribrid decision tree fix for scan# {scanNumber}"); 249 | //Is it a decision tree scheduled scan on tribrid? 250 | if (msLevel == _precursorTree[precursor_scan].MSLevel) 251 | { 252 | precursor_scan = GetParentFromScanString(result.Groups[1].Value); 253 | } 254 | else 255 | { 256 | throw new RawFileParserException( 257 | $"Tribrid decision tree fix failed - cannot get reaction# {_precursorTree[precursor_scan].ReactionCount} from {scanEvent.ToString()}", 258 | ex); 259 | } 260 | } 261 | 262 | // Get Precursor m/z and isolation window borders, exccept for 263 | precursor_data = GetPrecursorData(precursor_scan, scanEvent, trailer_mz, trailer_isolationWidth, out var reactionCount); 264 | 265 | //save precursor information for later reference 266 | _precursorTree[scanNumber] = new PrecursorInfo(precursor_scan, msLevel, reactionCount, null); 267 | } 268 | catch (Exception e) 269 | { 270 | var extra = (e.InnerException is null) ? "" : $"\n{e.InnerException.StackTrace}"; 271 | 272 | Log.Warn($"Failed creating precursor list for scan# {scanNumber} - precursor information for this and dependent scans will be empty\nException details:{e.Message}\n{e.StackTrace}\n{extra}"); 273 | ParseInput.NewWarn(); 274 | 275 | _precursorTree[scanNumber] = new PrecursorInfo(precursor_scan, 1, 0, null); 276 | 277 | } 278 | } 279 | 280 | MZData mzData; 281 | 282 | // Get each mz data for scan 283 | try 284 | { 285 | mzData = ReadMZData(raw, scanEvent, scanNumber, 286 | !ParseInput.NoPeakPicking.Contains((int)scanFilter.MSOrder), //requestCentroidedData 287 | false, //requestChargeData 288 | false); //requestNoiseData 289 | } 290 | catch (Exception ex) 291 | { 292 | Log.ErrorFormat("Failed reading mz data for scan #{0} due to following exception: {1}\nMZ data will be empty", scanNumber, ex.Message); 293 | Log.DebugFormat("{0}\n{1}", ex.StackTrace, ex.InnerException); 294 | ParseInput.NewError(); 295 | 296 | mzData = new MZData 297 | { 298 | basePeakMass = null, 299 | basePeakIntensity = null, 300 | masses = Array.Empty(), 301 | intensities = Array.Empty(), 302 | charges = Array.Empty(), 303 | baselineData = Array.Empty(), 304 | noiseData = Array.Empty(), 305 | massData = Array.Empty(), 306 | isCentroided = false 307 | }; 308 | } 309 | 310 | if (mzData.masses.Length == 0 || mzData.intensities.Length == 0) 311 | { 312 | Log.WarnFormat("Spectrum {0} has no m/z data", scanNumber); 313 | } 314 | 315 | List scanData = new List(mzData.masses.Length); 316 | // Add a row to parquet file for every m/z value in this scan 317 | for (int i = 0; i < mzData.masses.Length; i++) 318 | { 319 | MzParquet m; 320 | m.rt = (float)rt; 321 | m.scan = (uint)scanNumber; 322 | m.scan_type = scan_type; 323 | m.level = msLevel > 0 ? (byte)msLevel : (byte)2; 324 | m.intensity = (float)mzData.intensities[i]; 325 | m.mz = (float)mzData.masses[i]; 326 | m.isolation_lower = precursor_data.isolation_lower; 327 | m.isolation_upper = precursor_data.isolation_upper; 328 | m.precursor_scan = precursor_scan > 0? precursor_scan : 0; 329 | m.precursor_mz = precursor_data.mz; 330 | m.precursor_charge = (uint?)trailer_charge; 331 | m.ion_mobility = (float?)FAIMSCV; 332 | scanData.Add(m); 333 | } 334 | 335 | return scanData; 336 | } 337 | 338 | private PrecursorData GetPrecursorData(int precursorScanNumber, IScanEventBase scanEvent, 339 | double? monoisotopicMz, double? isolationWidth, out int reactionCount) 340 | { 341 | double? isolation_lower = null; 342 | double? isolation_upper = null; 343 | 344 | // Get precursors from earlier levels 345 | var prevPrecursors = _precursorTree[precursorScanNumber]; 346 | reactionCount = prevPrecursors.ReactionCount; 347 | 348 | var reaction = scanEvent.GetReaction(reactionCount); 349 | 350 | //if isolation width was not found in the trailer, try to get one from the reaction 351 | if (isolationWidth == null) isolationWidth = reaction.IsolationWidth; 352 | if (isolationWidth < 0) isolationWidth = null; 353 | 354 | // Selected ion MZ 355 | var selectedIonMz = CalculateSelectedIonMz(reaction, monoisotopicMz, isolationWidth); 356 | 357 | if (isolationWidth != null) 358 | { 359 | var offset = isolationWidth.Value / 2 + reaction.IsolationWidthOffset; 360 | isolation_lower = reaction.PrecursorMass - isolationWidth.Value + offset; 361 | isolation_upper = reaction.PrecursorMass + offset; 362 | } 363 | 364 | // Activation only to keep track of the reactions 365 | //increase reaction count 366 | reactionCount++; 367 | 368 | //Sometimes the property of supplemental activation is not set (Tune v4 on Tribrid), 369 | //or is On if *at least* one of the levels had SA (i.e. not necissirily the last one), thus we need to try (and posibly fail) 370 | try 371 | { 372 | reaction = scanEvent.GetReaction(reactionCount); 373 | 374 | if (reaction != null) 375 | { 376 | //increase reaction count after successful parsing 377 | reactionCount++; 378 | } 379 | } 380 | catch (IndexOutOfRangeException) 381 | { 382 | // If we failed do nothing 383 | } 384 | 385 | return new PrecursorData 386 | { 387 | mz = (float?)selectedIonMz, 388 | isolation_lower = (float?)isolation_lower, 389 | isolation_upper = (float?)isolation_upper 390 | }; 391 | 392 | } 393 | } 394 | 395 | } -------------------------------------------------------------------------------- /Writer/SpectrumWriter.cs: -------------------------------------------------------------------------------- 1 | using log4net; 2 | using System; 3 | using System.Collections.Generic; 4 | using System.IO; 5 | using System.IO.Compression; 6 | using System.Linq; 7 | using System.Reflection; 8 | using System.Text.RegularExpressions; 9 | using ThermoFisher.CommonCore.Data; 10 | using ThermoFisher.CommonCore.Data.Business; 11 | using ThermoFisher.CommonCore.Data.FilterEnums; 12 | using ThermoFisher.CommonCore.Data.Interfaces; 13 | using ThermoRawFileParser.Util; 14 | 15 | namespace ThermoRawFileParser.Writer 16 | { 17 | public abstract class SpectrumWriter : ISpectrumWriter 18 | { 19 | private static readonly ILog Log = 20 | LogManager.GetLogger(MethodBase.GetCurrentMethod().DeclaringType); 21 | 22 | protected const double ZeroDelta = 0.0001; 23 | 24 | /// 25 | /// The progress step size in percentage. 26 | /// 27 | protected const int ProgressPercentageStep = 10; 28 | 29 | private const double PrecursorMzDelta = 0.0001; 30 | private const double DefaultIsolationWindowLowerOffset = 1.5; 31 | private const double DefaultIsolationWindowUpperOffset = 2.5; 32 | 33 | /// 34 | /// The parse input object 35 | /// 36 | protected readonly ParseInput ParseInput; 37 | 38 | /// 39 | /// The output stream writer 40 | /// 41 | protected StreamWriter Writer; 42 | 43 | /// 44 | /// Precursor cache 45 | /// 46 | private static LimitedSizeDictionary precursorCache; 47 | 48 | // Precursor scan number (value) and isolation m/z (key) for reference in the precursor element of an MSn spectrum 49 | private protected readonly Dictionary _precursorScanNumbers = new Dictionary(); 50 | 51 | //Precursor information for scans 52 | private protected Dictionary _precursorTree = new Dictionary(); 53 | 54 | // Filter string regex to extract an isoaltion entry 55 | private protected readonly Regex _filterStringIsolationMzPattern = new Regex(@"ms\d+ (.+?) \["); 56 | // Filter string regex to extract an parent entry 57 | private protected readonly Regex _filterStringParentMzPattern = new Regex(@"pr (.+?) \["); 58 | 59 | /// 60 | /// Constructor. 61 | /// 62 | /// the parse input object 63 | protected SpectrumWriter(ParseInput parseInput) 64 | { 65 | ParseInput = parseInput; 66 | precursorCache = new LimitedSizeDictionary(10); 67 | _precursorScanNumbers[""] = -1; 68 | _precursorTree[-1] = new PrecursorInfo(); 69 | } 70 | 71 | /// 72 | public abstract void Write(IRawDataPlus rawFile, int firstScanNumber, int lastScanNumber); 73 | 74 | /// 75 | /// Configure the output writer 76 | /// 77 | /// The extension of the output file 78 | protected void ConfigureWriter(string extension) 79 | { 80 | if (ParseInput.StdOut) 81 | { 82 | Writer = new StreamWriter(Console.OpenStandardOutput()); 83 | Writer.AutoFlush = true; 84 | return; 85 | } 86 | 87 | var fileName = NormalizeFileName(ParseInput.OutputFile, extension, ParseInput.Gzip); 88 | if (ParseInput.OutputFormat == OutputFormat.Parquet) 89 | { 90 | Writer = new StreamWriter(File.Create(fileName)); 91 | } 92 | else if (!ParseInput.Gzip || ParseInput.OutputFormat == OutputFormat.IndexMzML) 93 | { 94 | Writer = File.CreateText(fileName); 95 | } 96 | else 97 | { 98 | var fileStream = File.Create(fileName); 99 | var compress = new GZipStream(fileStream, CompressionMode.Compress); 100 | Writer = new StreamWriter(compress); 101 | } 102 | 103 | } 104 | 105 | private string NormalizeFileName(string outputFile, string extension, bool gzip) 106 | { 107 | string result = outputFile == null ? Path.Combine(ParseInput.OutputDirectory, ParseInput.RawFileNameWithoutExtension) : outputFile; 108 | string tail = ""; 109 | 110 | string[] extensions; 111 | if (gzip) 112 | extensions = new string[] { ".gz", extension }; 113 | else 114 | extensions = new string[] { extension }; 115 | 116 | result = result.TrimEnd('.'); 117 | 118 | foreach (var ext in extensions) 119 | { 120 | if (result.ToLower().EndsWith(ext.ToLower())) 121 | result = result.Substring(0, result.Length - ext.Length); 122 | 123 | tail = ext + tail; 124 | result = result.TrimEnd('.'); 125 | } 126 | 127 | return result + tail; 128 | } 129 | 130 | /// 131 | /// Construct the spectrum title. 132 | /// 133 | /// the spectrum scan number 134 | protected static string ConstructSpectrumTitle(int instrumentType, int instrumentNumber, int scanNumber) 135 | { 136 | return $"controllerType={instrumentType} controllerNumber={instrumentNumber} scan={scanNumber}"; 137 | } 138 | 139 | /// 140 | /// Calculate the selected ion m/z value. This is necessary because the precursor mass found in the reaction 141 | /// isn't always the monoisotopic mass. 142 | /// https://github.com/ProteoWizard/pwiz/blob/master/pwiz/data/vendor_readers/Thermo/SpectrumList_Thermo.cpp#L564-L574 143 | /// 144 | /// the scan event reaction 145 | /// the monoisotopic m/z value 146 | /// the scan event reaction 147 | public static double CalculateSelectedIonMz(IReaction reaction, double? monoisotopicMz, 148 | double? isolationWidth) 149 | { 150 | var selectedIonMz = reaction.PrecursorMass; 151 | 152 | // take the isolation width from the reaction if no value was found in the trailer data 153 | if (isolationWidth == null || isolationWidth < ZeroDelta) 154 | { 155 | isolationWidth = reaction.IsolationWidth; 156 | } 157 | 158 | isolationWidth /= 2; 159 | 160 | if (monoisotopicMz != null && monoisotopicMz > ZeroDelta 161 | && Math.Abs( 162 | reaction.PrecursorMass - monoisotopicMz.Value) > 163 | PrecursorMzDelta) 164 | { 165 | selectedIonMz = monoisotopicMz.Value; 166 | 167 | // check if the monoisotopic mass lies in the precursor mass isolation window 168 | // otherwise take the precursor mass 169 | if (isolationWidth <= 2.0) 170 | { 171 | if ((selectedIonMz < 172 | (reaction.PrecursorMass - DefaultIsolationWindowLowerOffset * 2)) || 173 | (selectedIonMz > 174 | (reaction.PrecursorMass + DefaultIsolationWindowUpperOffset))) 175 | { 176 | selectedIonMz = reaction.PrecursorMass; 177 | } 178 | } 179 | else if ((selectedIonMz < (reaction.PrecursorMass - isolationWidth)) || 180 | (selectedIonMz > (reaction.PrecursorMass + isolationWidth))) 181 | { 182 | selectedIonMz = reaction.PrecursorMass; 183 | } 184 | } 185 | 186 | return selectedIonMz; 187 | } 188 | 189 | public static IReaction GetReaction(IScanEvent scanEvent, int scanNumber) 190 | { 191 | IReaction reaction = null; 192 | try 193 | { 194 | var order = (int)scanEvent.MSOrder; 195 | if (order < 0) 196 | { 197 | reaction = scanEvent.GetReaction(0); 198 | } 199 | else if (order > 1) 200 | { 201 | reaction = scanEvent.GetReaction(order - 2); 202 | } 203 | else 204 | { 205 | Log.Warn($"Attempting to get reaction for MS{order} scan# {scanNumber} failed"); 206 | } 207 | 208 | } 209 | catch (ArgumentOutOfRangeException) 210 | { 211 | Log.Warn("No reaction found for scan " + scanNumber); 212 | } 213 | 214 | return reaction; 215 | } 216 | 217 | /// 218 | /// Calculate the precursor peak intensity (similar to modern MSConvert). 219 | /// Sum intensities of all peaks in the isolation window. 220 | /// 221 | /// the RAW file object 222 | /// the precursor scan number 223 | /// the precursor mass 224 | /// the isolation width 225 | /// profile/centroid switch 226 | protected static double CalculatePrecursorPeakIntensity(IRawDataPlus rawFile, int precursorScanNumber, 227 | double precursorMass, double? isolationWidth, bool useProfile) 228 | { 229 | double precursorIntensity = 0; 230 | double halfWidth = isolationWidth is null || isolationWidth == 0 ? 0 : DefaultIsolationWindowLowerOffset; // that is how it is made in MSConvert (why?) 231 | 232 | double[] masses; 233 | double[] intensities; 234 | 235 | // Get the mz-array from RAW file or cache 236 | if (precursorCache.ContainsKey(precursorScanNumber)) 237 | { 238 | masses = precursorCache[precursorScanNumber].Masses; 239 | intensities = precursorCache[precursorScanNumber].Intensities; 240 | } 241 | else 242 | { 243 | Scan scan = Scan.FromFile(rawFile, precursorScanNumber); 244 | 245 | if (useProfile) //get the profile data 246 | { 247 | masses = scan.SegmentedScan.Positions; 248 | intensities = scan.SegmentedScan.Intensities; 249 | } 250 | else 251 | { 252 | if (scan.HasCentroidStream) //use centroids if possible 253 | { 254 | masses = scan.CentroidScan.Masses; 255 | intensities = scan.CentroidScan.Intensities; 256 | } 257 | else 258 | { 259 | var scanEvent = rawFile.GetScanEventForScanNumber(precursorScanNumber); 260 | if (scan.SegmentedScan.PositionCount > 0) 261 | { 262 | var centroidedScan = scanEvent.ScanData == ScanDataType.Profile //only centroid profile spectra 263 | ? Scan.ToCentroid(scan).SegmentedScan 264 | : scan.SegmentedScan; 265 | 266 | masses = centroidedScan.Positions; 267 | intensities = centroidedScan.Intensities; 268 | } 269 | else 270 | { 271 | masses = Array.Empty(); 272 | intensities = Array.Empty(); 273 | } 274 | } 275 | } 276 | 277 | //save to cache 278 | precursorCache.Add(precursorScanNumber, new MZArray { Masses = masses, Intensities = intensities }); 279 | } 280 | 281 | var index = masses.FastBinarySearch(precursorMass - halfWidth); //set index to the first peak inside isolation window 282 | 283 | while (index > 0 && index < masses.Length && masses[index] < precursorMass + halfWidth) //negative index means value was not found 284 | { 285 | precursorIntensity += intensities[index]; 286 | index++; 287 | } 288 | 289 | return precursorIntensity; 290 | } 291 | 292 | private protected int GetParentFromScanString(string scanString) 293 | { 294 | var parts = Regex.Split(scanString, " "); 295 | 296 | //find the position of the first (from the end) precursor with a different mass 297 | //to account for possible supplementary activations written in the filter 298 | var lastIonMass = parts.Last().Split('@').First(); 299 | int last = parts.Length; 300 | while (last > 0 && 301 | parts[last - 1].Split('@').First() == lastIonMass) 302 | { 303 | last--; 304 | } 305 | 306 | string parentFilter = String.Join(" ", parts.Take(last)); 307 | if (_precursorScanNumbers.ContainsKey(parentFilter)) 308 | { 309 | return _precursorScanNumbers[parentFilter]; 310 | } 311 | 312 | return -2; //unsuccessful parsing 313 | } 314 | 315 | private protected int FindLastReaction(IScanEvent scanEvent, int msLevel) 316 | { 317 | int lastReactionIndex = msLevel - 2; 318 | 319 | //iteratively trying find the last available index for reaction 320 | while (true) 321 | { 322 | try 323 | { 324 | scanEvent.GetReaction(lastReactionIndex + 1); 325 | } 326 | catch (IndexOutOfRangeException) 327 | { 328 | //stop trying 329 | break; 330 | } 331 | 332 | lastReactionIndex++; 333 | } 334 | 335 | //supplemental activation flag is on -> one of the levels (not necissirily the last one) used supplemental activation 336 | //check last two activations 337 | if (scanEvent.SupplementalActivation == TriState.On) 338 | { 339 | var lastActivation = scanEvent.GetReaction(lastReactionIndex).ActivationType; 340 | var beforeLastActivation = scanEvent.GetReaction(lastReactionIndex - 1).ActivationType; 341 | 342 | if ((beforeLastActivation == ActivationType.ElectronTransferDissociation || beforeLastActivation == ActivationType.ElectronCaptureDissociation) && 343 | (lastActivation == ActivationType.CollisionInducedDissociation || lastActivation == ActivationType.HigherEnergyCollisionalDissociation)) 344 | return lastReactionIndex - 1; //ETD or ECD followed by HCD or CID -> supplemental activation in the last level (move the last reaction one step back) 345 | else 346 | return lastReactionIndex; 347 | } 348 | else //just use the last one 349 | { 350 | return lastReactionIndex; 351 | } 352 | } 353 | 354 | private protected MZData ReadMZData(IRawData rawFile, IScanEvent scanEvent, int scanNumber, bool centroid, bool charge, bool noiseData) 355 | { 356 | double[] raw_masses;// copy of original (unsorted) masses 357 | 358 | MZData mzData = new MZData(); 359 | 360 | var scan = Scan.FromFile(rawFile, scanNumber); 361 | 362 | //If centroiding is requested 363 | if (centroid) 364 | { 365 | mzData.isCentroided = true; // flag that the data is centroided 366 | // Check if the scan has a centroid stream 367 | if (scan.HasCentroidStream) 368 | { 369 | mzData.basePeakMass = scan.CentroidScan.BasePeakMass; 370 | mzData.basePeakIntensity = scan.CentroidScan.BasePeakIntensity; 371 | 372 | mzData.masses = scan.CentroidScan.Masses; 373 | raw_masses = scan.CentroidScan.Masses; 374 | mzData.intensities = scan.CentroidScan.Intensities; 375 | 376 | if (charge) 377 | { 378 | mzData.charges = scan.CentroidScan.Charges; 379 | } 380 | } 381 | else // otherwise take the segmented (low res) scan 382 | { 383 | mzData.basePeakMass = scan.ScanStatistics.BasePeakMass; 384 | mzData.basePeakIntensity = scan.ScanStatistics.BasePeakIntensity; 385 | 386 | //cannot centroid empty segmented scan 387 | if (scan.SegmentedScan.PositionCount > 0) 388 | { 389 | // If the spectrum is profile perform centroiding 390 | var segmentedScan = scanEvent.ScanData == ScanDataType.Profile 391 | ? Scan.ToCentroid(scan).SegmentedScan 392 | : scan.SegmentedScan; 393 | 394 | mzData.masses = segmentedScan.Positions; 395 | raw_masses = segmentedScan.Positions; 396 | mzData.intensities = segmentedScan.Intensities; 397 | } 398 | else 399 | { 400 | mzData.masses = Array.Empty(); 401 | mzData.intensities = Array.Empty(); 402 | raw_masses = Array.Empty(); 403 | } 404 | } 405 | } 406 | else // use the segmented data as is 407 | { 408 | switch (scanEvent.ScanData) //check if the data centroided already 409 | { 410 | case ScanDataType.Centroid: 411 | mzData.isCentroided = true; 412 | break; 413 | case ScanDataType.Profile: 414 | mzData.isCentroided = false; 415 | break; 416 | } 417 | 418 | mzData.basePeakMass = scan.ScanStatistics.BasePeakMass; 419 | mzData.basePeakIntensity = scan.ScanStatistics.BasePeakIntensity; 420 | 421 | mzData.masses = scan.SegmentedScan.Positions; 422 | raw_masses = scan.SegmentedScan.Positions; 423 | mzData.intensities = scan.SegmentedScan.Intensities; 424 | } 425 | 426 | // Sort all arrays by m/z 427 | if (raw_masses != null) 428 | { 429 | if (mzData.masses != null) 430 | { 431 | Array.Sort((double[])raw_masses.Clone(), mzData.masses); 432 | 433 | } 434 | if (mzData.intensities != null) 435 | { 436 | Array.Sort((double[])raw_masses.Clone(), mzData.intensities); 437 | } 438 | if (charge && mzData.charges != null) 439 | { 440 | Array.Sort((double[])raw_masses.Clone(), mzData.charges); 441 | } 442 | 443 | } 444 | // If requested, read the noise data 445 | if (noiseData) 446 | { 447 | mzData.baselineData = scan.PreferredBaselines; 448 | mzData.noiseData = scan.PreferredNoises; 449 | mzData.massData = scan.PreferredMasses; 450 | } 451 | 452 | return mzData; 453 | } 454 | } 455 | } --------------------------------------------------------------------------------