├── vignettes ├── .gitignore ├── Lib2NIST_configuration.png ├── Lib2NIST_define_subset.png ├── check_number_of_spectra_nist.png └── Compile_EI_and_tandem_mass_spectral_libraries.Rmd ├── R ├── globals.R ├── data.R ├── organize_libraries_MS2.R ├── write_msp.R ├── organize_libraries_EI_and_MS2.R ├── read_structures.R ├── organize_libraries_EI.R └── read_libraries.R ├── LICENSE ├── data ├── EI.rda ├── MS2_mgf.rda └── MS2_msp.rda ├── tests ├── testthat.R └── testthat │ └── test-read_lib.R ├── .gitignore ├── .Rbuildignore ├── man ├── EI.Rd ├── MS2_mgf.Rd ├── MS2_msp.Rd ├── remove_ri.Rd ├── remove_rt.Rd ├── write_EI_msp.Rd ├── write_MS2_msp.Rd ├── complete_gnps.Rd ├── reorganize_mona.Rd ├── read_multilibs.Rd ├── separate_polarity.Rd ├── clean_ri_dat.Rd ├── clean_user_dbu.Rd ├── extract_structure.Rd ├── assign_ri.Rd ├── extract_ri.Rd ├── change_meta.Rd ├── assign_smiles.Rd ├── combine_mol2sdf.Rd └── read_lib.Rd ├── mspcompiler.Rproj ├── NAMESPACE ├── LICENSE.md ├── DESCRIPTION ├── inst ├── EI.msp ├── MS2.msp └── MS2.mgf ├── README.Rmd └── README.md /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | -------------------------------------------------------------------------------- /R/globals.R: -------------------------------------------------------------------------------- 1 | utils::globalVariables(c(".i")) 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2021 2 | COPYRIGHT HOLDER: mspcompiler authors 3 | -------------------------------------------------------------------------------- /data/EI.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QizhiSu/mspcompiler/HEAD/data/EI.rda -------------------------------------------------------------------------------- /data/MS2_mgf.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QizhiSu/mspcompiler/HEAD/data/MS2_mgf.rda -------------------------------------------------------------------------------- /data/MS2_msp.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QizhiSu/mspcompiler/HEAD/data/MS2_msp.rda -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(mspcompiler) 3 | 4 | test_check("mspcompiler") 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .Rdata 4 | .httr-oauth 5 | .DS_Store 6 | inst/doc 7 | /doc/ 8 | /Meta/ 9 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^mspcompiler\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^LICENSE\.md$ 4 | ^README\.Rmd$ 5 | ^doc$ 6 | ^Meta$ 7 | 8 | -------------------------------------------------------------------------------- /vignettes/Lib2NIST_configuration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QizhiSu/mspcompiler/HEAD/vignettes/Lib2NIST_configuration.png -------------------------------------------------------------------------------- /vignettes/Lib2NIST_define_subset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QizhiSu/mspcompiler/HEAD/vignettes/Lib2NIST_define_subset.png -------------------------------------------------------------------------------- /vignettes/check_number_of_spectra_nist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/QizhiSu/mspcompiler/HEAD/vignettes/check_number_of_spectra_nist.png -------------------------------------------------------------------------------- /R/data.R: -------------------------------------------------------------------------------- 1 | #' An example \code{list} read from EI mass spectral library 2 | "EI" 3 | 4 | 5 | #' An example \code{list} read from MS2 mass spectral library in msp format 6 | "MS2_msp" 7 | 8 | 9 | #' An example \code{list} read from MS2 mass spectral library in mgf format 10 | "MS2_mgf" 11 | -------------------------------------------------------------------------------- /man/EI.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{EI} 5 | \alias{EI} 6 | \title{An example \code{list} read from EI mass spectral library} 7 | \format{ 8 | An object of class \code{list} of length 4. 9 | } 10 | \usage{ 11 | EI 12 | } 13 | \description{ 14 | An example \code{list} read from EI mass spectral library 15 | } 16 | \keyword{datasets} 17 | -------------------------------------------------------------------------------- /man/MS2_mgf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{MS2_mgf} 5 | \alias{MS2_mgf} 6 | \title{An example \code{list} read from MS2 mass spectral library in mgf format} 7 | \format{ 8 | An object of class \code{list} of length 4. 9 | } 10 | \usage{ 11 | MS2_mgf 12 | } 13 | \description{ 14 | An example \code{list} read from MS2 mass spectral library in mgf format 15 | } 16 | \keyword{datasets} 17 | -------------------------------------------------------------------------------- /man/MS2_msp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{MS2_msp} 5 | \alias{MS2_msp} 6 | \title{An example \code{list} read from MS2 mass spectral library in msp format} 7 | \format{ 8 | An object of class \code{list} of length 4. 9 | } 10 | \usage{ 11 | MS2_msp 12 | } 13 | \description{ 14 | An example \code{list} read from MS2 mass spectral library in msp format 15 | } 16 | \keyword{datasets} 17 | -------------------------------------------------------------------------------- /mspcompiler.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | LineEndingConversion: Posix 18 | 19 | BuildType: Package 20 | PackageUseDevtools: Yes 21 | PackageInstallArgs: --no-multiarch --with-keep.source 22 | PackageRoxygenize: rd,collate,namespace 23 | -------------------------------------------------------------------------------- /man/remove_ri.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/organize_libraries_EI.R 3 | \name{remove_ri} 4 | \alias{remove_ri} 5 | \title{Remove retention index for EI libraries} 6 | \usage{ 7 | remove_ri(lib) 8 | } 9 | \arguments{ 10 | \item{lib}{The \code{list} generated by \code{read_lib}.} 11 | } 12 | \value{ 13 | A \code{list} without RI 14 | } 15 | \description{ 16 | \code{remove_ri} offers a way to remove all RI for EI libraries. 17 | } 18 | \details{ 19 | This function supports parallel computing. 20 | } 21 | -------------------------------------------------------------------------------- /man/remove_rt.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/organize_libraries_MS2.R 3 | \name{remove_rt} 4 | \alias{remove_rt} 5 | \title{Remove retention time for MS2 libraries} 6 | \usage{ 7 | remove_rt(lib) 8 | } 9 | \arguments{ 10 | \item{lib}{The \code{list} generated by \code{read_lib}.} 11 | } 12 | \value{ 13 | A \code{list} without retention time 14 | } 15 | \description{ 16 | \code{remove_rt} offers a way to remove all retention time for MS2 library. 17 | } 18 | \details{ 19 | This function supports parallel computing. 20 | } 21 | -------------------------------------------------------------------------------- /man/write_EI_msp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/write_msp.R 3 | \name{write_EI_msp} 4 | \alias{write_EI_msp} 5 | \title{Write EI library into a msp file} 6 | \usage{ 7 | write_EI_msp(lib, filename) 8 | } 9 | \arguments{ 10 | \item{lib}{The organized EI library} 11 | 12 | \item{filename}{The location and filename of the msp file to be exported, 13 | e.g., "/home/exported.msp".} 14 | } 15 | \value{ 16 | No return but create a msp file 17 | } 18 | \description{ 19 | \code{write_EI_msp} offers a way to write the organized EI library into a 20 | msp file. 21 | } 22 | -------------------------------------------------------------------------------- /man/write_MS2_msp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/write_msp.R 3 | \name{write_MS2_msp} 4 | \alias{write_MS2_msp} 5 | \title{Write MS2 library into a msp file} 6 | \usage{ 7 | write_MS2_msp(lib, filename) 8 | } 9 | \arguments{ 10 | \item{lib}{The organized MS2 library} 11 | 12 | \item{filename}{The location and filename of the msp file to be exported, 13 | e.g., "/home/exported.msp".} 14 | } 15 | \value{ 16 | No return but create a msp file 17 | } 18 | \description{ 19 | \code{write_EI_msp} offers a way to write the organized MS2 library into a 20 | msp file. 21 | } 22 | -------------------------------------------------------------------------------- /tests/testthat/test-read_lib.R: -------------------------------------------------------------------------------- 1 | # Test EI library 2 | test_that("read_lib() reads EI library correctly", { 3 | EI_file <- system.file("EI.msp", package = "mspcompiler") 4 | 5 | expect_identical(read_lib(EI_file, type = "EI"), EI) 6 | }) 7 | 8 | # Test MS2 msp library 9 | test_that("read_lib() reads MS2 msp library correctly", { 10 | MS2_msp_file <- system.file("MS2.msp", package = "mspcompiler") 11 | 12 | expect_identical(read_lib(MS2_msp_file), MS2_msp) 13 | }) 14 | 15 | # Test MS2 mgf library 16 | test_that("read_lib() reads MS2 mgf library correctly", { 17 | MS2_mgf_file <- system.file("MS2.mgf", package = "mspcompiler") 18 | 19 | expect_identical(read_lib(MS2_mgf_file, format = "mgf"), MS2_mgf) 20 | }) 21 | -------------------------------------------------------------------------------- /man/complete_gnps.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/organize_libraries_MS2.R 3 | \name{complete_gnps} 4 | \alias{complete_gnps} 5 | \title{Add formula to the gnps library and remove wrong SMILES} 6 | \usage{ 7 | complete_gnps(lib) 8 | } 9 | \arguments{ 10 | \item{lib}{The \code{list} generated by \code{read_lib} from mgf library.} 11 | } 12 | \value{ 13 | A \code{list} with molecular formula assigned 14 | } 15 | \description{ 16 | \code{complete_gnps} offers a way to complete the molecular formula filed in 17 | the mgf file. 18 | } 19 | \details{ 20 | The mgf file downloaded from GNPS has no molecular formula (MF). Therefore, this 21 | function tries to calculate the MF from the SMILES (if it exists). 22 | } 23 | -------------------------------------------------------------------------------- /man/reorganize_mona.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/organize_libraries_EI_and_MS2.R 3 | \name{reorganize_mona} 4 | \alias{reorganize_mona} 5 | \title{Reorganize MoNA library} 6 | \usage{ 7 | reorganize_mona(lib) 8 | } 9 | \arguments{ 10 | \item{lib}{The MoNA library generated by \code{read_lib}.} 11 | } 12 | \value{ 13 | A \code{List} with SMILES retrieved. 14 | } 15 | \description{ 16 | \code{reorganize_mona} offers a way to reorganize MoNA libray, 17 | mainly to retrieve SMILES from the "Comments" field. 18 | } 19 | \details{ 20 | The msp file from MoNA has no "SMILES" field but has SMILES information 21 | stored in the "Comments" field. Therefore, This function tries to retrieve 22 | SMILES from the "Comments" field. This function supports parallel computing. 23 | } 24 | -------------------------------------------------------------------------------- /man/read_multilibs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/read_libraries.R 3 | \name{read_multilibs} 4 | \alias{read_multilibs} 5 | \title{A wrapper to read multiple msp files at a time} 6 | \usage{ 7 | read_multilibs(folder) 8 | } 9 | \arguments{ 10 | \item{folder}{The folder that contains multiple msp files.} 11 | } 12 | \value{ 13 | A single \code{list} combining all msp files 14 | } 15 | \description{ 16 | \code{read_multilibs} offers a way to read multiple msp files at a time and 17 | combine them into a single file. 18 | } 19 | \details{ 20 | When you are building your in-house libraries, you may probably have multiple 21 | msp files at hand (e.g., one msp for one group of compounds). To avoid 22 | empolying \code{read_lib} several times, this function provides a way to read 23 | all these files at once. 24 | } 25 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(assign_ri) 4 | export(assign_smiles) 5 | export(change_meta) 6 | export(combine_mol2sdf) 7 | export(complete_gnps) 8 | export(extract_ri) 9 | export(extract_structure) 10 | export(read_lib) 11 | export(read_multilibs) 12 | export(remove_ri) 13 | export(remove_rt) 14 | export(reorganize_mona) 15 | export(separate_polarity) 16 | export(write_EI_msp) 17 | export(write_MS2_msp) 18 | import(ChemmineOB) 19 | import(ChemmineR, except = c(groups, view)) 20 | import(dplyr) 21 | import(future.apply) 22 | import(readr) 23 | import(rio) 24 | import(rlist) 25 | import(stats, except = c(filter, lag)) 26 | import(stringr) 27 | import(tibble) 28 | importFrom(ChemmineR,MF) 29 | importFrom(ChemmineR,smiles2sdf) 30 | importFrom(qdapRegex,rm_between) 31 | importFrom(rlang,.data) 32 | importFrom(webchem,is.smiles) 33 | -------------------------------------------------------------------------------- /man/separate_polarity.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/organize_libraries_MS2.R 3 | \name{separate_polarity} 4 | \alias{separate_polarity} 5 | \title{Separate positive and negative modes in MS2 library} 6 | \usage{ 7 | separate_polarity(lib, polarity) 8 | } 9 | \arguments{ 10 | \item{lib}{A MS2 library mixed with positive and negative modes.} 11 | 12 | \item{polarity}{The polarity, can be either "pos" or "neg"} 13 | } 14 | \value{ 15 | A \code{list}, being positive or negative 16 | } 17 | \description{ 18 | \code{separate_polarity} offers a way to separate a MS2 library based on 19 | polarity. 20 | } 21 | \details{ 22 | Some libraries, e.g., NIST and GNPS have both positive and negative MS2 23 | records mixed in a singled file. However, in practice, a MS2 library should 24 | be either positive or negative based on the polarity used in the experiment. 25 | Therefore, this function provides a way to separate positive and negative 26 | modes in MS2 library. 27 | } 28 | -------------------------------------------------------------------------------- /man/clean_ri_dat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/organize_libraries_EI.R 3 | \name{clean_ri_dat} 4 | \alias{clean_ri_dat} 5 | \title{Extract experimental RI from NIST} 6 | \usage{ 7 | clean_ri_dat(file) 8 | } 9 | \arguments{ 10 | \item{file}{The "ri.dat" file in the installation path 11 | (e.g., "~/Programs/nist17/mssearch").} 12 | } 13 | \value{ 14 | A cleaned data.frame containing experimental RI from NIST 15 | } 16 | \description{ 17 | \code{clean_ri_dat}, an interanl function, offers a way to extract RI from 18 | the "ri.dat" file. 19 | } 20 | \details{ 21 | Once you have NIST library installed, there will be a "ri.dat" file in the 22 | installation path (e.g., "~/Programs/nist17/mssearch"). This file 23 | contains all experimental RI in the NIST library but it is not human readable. 24 | This function provides a way to convert the "ri.dat" file into a data.frame, 25 | so that we can better leverage the RI information present in the NIST library 26 | and to incorporate them into the msp file. 27 | } 28 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2021 mspcompiler authors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /man/clean_user_dbu.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/organize_libraries_EI.R 3 | \name{clean_user_dbu} 4 | \alias{clean_user_dbu} 5 | \title{Extract InChIKey for compounds that have experimental RI} 6 | \usage{ 7 | clean_user_dbu(file) 8 | } 9 | \arguments{ 10 | \item{file}{The "USER.DBU" file in the installation path 11 | (e.g., "~/Programs/nist17/mssearch")} 12 | } 13 | \value{ 14 | A data.frame containing four variables, Name, InChIKey, ID, 15 | and "Formula" 16 | } 17 | \description{ 18 | \code{clean_user_dbu}, an internal function, offers a way to extract InChIKey 19 | for compounds that have experimental RI from the "USER.DBU" file. 20 | } 21 | \details{ 22 | RI values in the cleaned RI table obtained by \code{\link{clean_ri_dat}} 23 | cannot be linked to compounds in the msp file. Providing that the "USER.DBU" 24 | file in the installation path (e.g., "~/Programs/nist17/mssearch") contains 25 | InChIKey of each compound in RI table. However this file is not human 26 | readable. Therefore, this function provides a way to clean the "USER.DBU" 27 | file. Then, we can link experimental RI values to the compounds in the msp 28 | file. 29 | } 30 | -------------------------------------------------------------------------------- /man/extract_structure.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/read_structures.R 3 | \name{extract_structure} 4 | \alias{extract_structure} 5 | \title{Extract SMILES from the sdf file generated by \code{\link{combine_mol2sdf}}} 6 | \usage{ 7 | extract_structure(input, output) 8 | } 9 | \arguments{ 10 | \item{input}{The sdf file generated by \code{\link{combine_mol2sdf}}, e.g., 11 | "/home/exported.sdf".} 12 | 13 | \item{output}{The location where the structure information will be stored 14 | and its name, e.g., "/home/exported.txt".} 15 | } 16 | \value{ 17 | A data.frame and creates a *.txt file. 18 | } 19 | \description{ 20 | \code{extract_structure} offers a way to retrieve SMILES from the sdf file. 21 | } 22 | \details{ 23 | The function is a wrapper of the \code{convertFormatFile} function from the 24 | \pkg{ChemmineOB} package. As InChI and InChIKey are not supported in Windows- 25 | based systems, this function will automatically determine which type of 26 | operating system you are working with. Only \strong{name} and \strong{SMILES} 27 | will be retrieved if you work with Windows, while \strong{InChI} and 28 | \strong{InChIKey} will be exported as well in Linux-based or Mac OS systems. 29 | } 30 | -------------------------------------------------------------------------------- /man/assign_ri.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/organize_libraries_EI.R 3 | \name{assign_ri} 4 | \alias{assign_ri} 5 | \title{Assign experimental RI to compounds in the msp file} 6 | \usage{ 7 | assign_ri(lib, ri_table, polarity = "semi-polar") 8 | } 9 | \arguments{ 10 | \item{lib}{The EI library generated by \code{read_lib}.} 11 | 12 | \item{ri_table}{The RI table cleaned up by \code{extract_ri}.} 13 | 14 | \item{polarity}{The polarity of the column. Can be either "semi-polar", 15 | "non-polar", or "polar".} 16 | } 17 | \value{ 18 | A \code{list} with experimental RI assigned. 19 | } 20 | \description{ 21 | \code{assign_ri} offers a way to assign experimental RI to the msp file if 22 | you have NIST library installed. 23 | } 24 | \details{ 25 | Depending on the column polarity, experimental RI can be assigned to 26 | compounds in the msp file. Providing that "capillary" GC columns are 27 | commonly used. This function will only keep RI records from "capillary" 28 | columns and "Lee RI" will be removed. When there are multiple records for 29 | a single compound, the median RI will be used and if the standard deviation 30 | is higher than 30, this value will be discarded. This function supports 31 | parallel computing. 32 | } 33 | -------------------------------------------------------------------------------- /man/extract_ri.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/organize_libraries_EI.R 3 | \name{extract_ri} 4 | \alias{extract_ri} 5 | \title{Extract experimental RI from NIST library} 6 | \usage{ 7 | extract_ri(ri_dat, user_dbu) 8 | } 9 | \arguments{ 10 | \item{ri_dat}{The "ri.dat" file in the installation path 11 | (e.g., "~/Programs/nist17/mssearch/nist_ri").} 12 | 13 | \item{user_dbu}{The "USER.DBU" file in the installation path 14 | (e.g., "~/Programs/nist17/mssearch/nist_ri")} 15 | } 16 | \value{ 17 | A \code{data.frame} containing experimental RI and InChIKey assigned. 18 | } 19 | \description{ 20 | \code{extract_ri} offers a way to extract experimental RI from the NIST 21 | library if you have it installed. 22 | } 23 | \details{ 24 | Once you have NIST library installed, there will be a "ri.dat" file in the 25 | installation path (e.g., "~/Programs/nist17/mssearch"). This file 26 | contains all experimental RI in the NIST library but it is not human readable. 27 | This function firstly convert the "ri.dat" file into a data.frame. However, 28 | it is tricky to link RI values in the cleaned RI table to compounds in the 29 | msp file. Providing that the "USER.DBU" file in the installation path 30 | (e.g., "~/Programs/nist17/mssearch") contains InChIKey of each compound in 31 | RI table, we can assign correspondent InChIKey to the RI table, but this 32 | file is not human readable. Therefore, this function secondly provides a way 33 | to clean the "USER.DBU" file and then assign correspondent InChIKey to the 34 | RI table. 35 | } 36 | -------------------------------------------------------------------------------- /man/change_meta.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/organize_libraries_EI_and_MS2.R 3 | \name{change_meta} 4 | \alias{change_meta} 5 | \title{Change meta data} 6 | \usage{ 7 | change_meta(lib, CE = NA, instrument = NA, comment = NA, add = FALSE) 8 | } 9 | \arguments{ 10 | \item{lib}{The in-house library generated by \code{read_lib}.} 11 | 12 | \item{CE}{User defined collision energy. If no CE is supplied, the 13 | CollisionEnergy field will not be changed.} 14 | 15 | \item{instrument}{User define instrument type. If no instrument is supplied, 16 | the InstrumentType field will not be changed.} 17 | 18 | \item{comment}{User define comment, e.g., Principle investigator, data 19 | collector, laboratory, etc.If no comment is supplied, the Comment field 20 | will not be changed. If you want to add new comment, please set "add = TRUE", 21 | then old and new comment will be separated by ";". Otherwise, the old 22 | comment will be covered by the new one.} 23 | 24 | \item{add}{A logical scalar. Whether to keep the old comment and add new 25 | comment behind or just replace the old comment. TRUE or FALSE.} 26 | } 27 | \value{ 28 | A \code{list} with meta data assigned. 29 | } 30 | \description{ 31 | \code{change_meta} offers a way to change meta data (mainly used for in-house 32 | library). 33 | } 34 | \details{ 35 | When you build your own mass spectral library (either EI or MS2 library), 36 | you might want to add or change some meta data, such as collision energy, 37 | instrument,and comment. This function provides an easy way to achieve this. 38 | } 39 | -------------------------------------------------------------------------------- /man/assign_smiles.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/organize_libraries_EI_and_MS2.R 3 | \name{assign_smiles} 4 | \alias{assign_smiles} 5 | \title{Assign SMILES} 6 | \usage{ 7 | assign_smiles(lib, structure_data, match = "name") 8 | } 9 | \arguments{ 10 | \item{lib}{The library generated by \code{read_lib}.} 11 | 12 | \item{structure_data}{the correspondent structure data generated by 13 | \code{extract_structure}.} 14 | 15 | \item{match}{Correspondence can be done by either "name" or "inchikey".} 16 | } 17 | \value{ 18 | A \code{list} with SMILES assigned. 19 | } 20 | \description{ 21 | \code{assign_smiles} offers a way to assign SMILES to the library obtained 22 | from NIST format (exported by Lib2NIST with structure information separately 23 | stored in mol files). 24 | } 25 | \details{ 26 | The msp file obtained from Lib2NIST has no SMILES and the structure 27 | information is stored in multiple mol files. After transforming all mol files 28 | into a single sdf file by \code{combine_mol2sdf} and retrieving structure 29 | information by \code{extract_structure}, SMILES is available. This function 30 | provides a way to assign SMILES to correspondent compound in the msp file. 31 | If you are working with Linux-based or Mac OS system, it is better to use 32 | "inchikey" for matching. However, if you are working with Windows, the only 33 | option is to use "name" for matching, which is a kind of compromise as 34 | some chemicals in the *.mol files do not have full chemical names. Hence they 35 | will not be matched. This function is useful for both EI and MS2 libraries. 36 | This function supports parallel computing. 37 | } 38 | -------------------------------------------------------------------------------- /man/combine_mol2sdf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/read_structures.R 3 | \name{combine_mol2sdf} 4 | \alias{combine_mol2sdf} 5 | \title{Combine multiple mol files into a single sdf file and remove duplicates} 6 | \usage{ 7 | combine_mol2sdf(input, output, use_filename = FALSE) 8 | } 9 | \arguments{ 10 | \item{input}{The location of the exported *.MOL folder from Lib2NIST, 11 | e.g., "/home/nist.MOL".} 12 | 13 | \item{output}{The location where the sdf file will be stored and its name, 14 | e.g., "/home/exported.sdf".} 15 | 16 | \item{use_filename}{In case you want to use the file name as the Molecule_Name 17 | in the sdf file, please use \code{use_filename = TRUE}. This is useful when 18 | you draw your own chemicals which might not have Molecule_Name in the .MOL files. 19 | With this option, you can use the name of the .MOL files.} 20 | } 21 | \value{ 22 | It will return no value but only creates a sdf file. 23 | } 24 | \description{ 25 | \code{combine_mol2sdf} offers a way to combine multiple mol files in to 26 | a single sdf file removing duplicates. 27 | } 28 | \details{ 29 | The msp file exported from NIST format by the \strong{Lib2NIST} software 30 | has no SMILES which is used for viewing the structure in MS-DIAL and is 31 | crucial as well if you want to predict retention index (RI) for compounds 32 | with no experimental RI (in the case of EI library). Chemical structure of 33 | each compound in the NIST library can be exported as a mol file. Every 34 | library entry will have one corresponding mol file. All these mol files will 35 | be stored in a folder with ".MOL" suffix. This function aims to combine all 36 | these mol files into a single sdf file which can be then used for retrieve 37 | SMILES for each entry. This function supports parallel computing. 38 | } 39 | -------------------------------------------------------------------------------- /man/read_lib.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/read_libraries.R 3 | \name{read_lib} 4 | \alias{read_lib} 5 | \title{Read msp/mgf mass spectral libraries} 6 | \usage{ 7 | read_lib( 8 | file, 9 | format = "msp", 10 | type = "MS2", 11 | remove_ri = TRUE, 12 | remove_rt = TRUE 13 | ) 14 | } 15 | \arguments{ 16 | \item{file}{Mass spectral library in \code{msp} or \code{mgf} format.} 17 | 18 | \item{format}{The format of the library, either \code{msp} or \code{mgf}.} 19 | 20 | \item{type}{The type of the library, either \code{EI} or \code{MS2}.} 21 | 22 | \item{remove_ri}{A logical scalar only used in case of EI mass spectral 23 | library. Should retention index (RI) be removed? \code{TRUE} or \code{FALSE}} 24 | 25 | \item{remove_rt}{A logical scalar only used in case of MS2 mass spectral 26 | library. Should retention time (RT) be removed? \code{TRUE} or \code{FALSE}} 27 | } 28 | \value{ 29 | A \code{list} with each spectral entry as a list element for further 30 | processing. 31 | } 32 | \description{ 33 | \code{read_lib} offers a way to read mass spectral libraries into R 34 | for further processing. 35 | } 36 | \details{ 37 | This is a generic function to read either EI or MS2 mass spectral libraries. 38 | The library can be either in \code{msp} or \code{mgf} form. For this reason, 39 | it is required to set the format and the type of the input library. The 40 | default is \code{MS2} in \code{msp} format. In the case of EI mass spectral 41 | library, an additional Boolean parameter \code{remove_ri} can be set to 42 | remove or keep the retention index (RI). In the case of MS2 mass spectral 43 | library, an additional Boolean parameter \code{remove_rt} can be set to 44 | remove or keep the retention time (RT). This function supports parallel 45 | computing making use of the \pkg{future.apply}. Please see the vignette 46 | for more details. 47 | } 48 | \examples{ 49 | # The first 2 lines only indicate the location where the example files are 50 | # stored. You might not need them. 51 | EI_file <- system.file("EI.msp", package = "mspcompiler") 52 | MS2_mgf_file <- system.file("MS2.mgf", package = "mspcompiler") 53 | 54 | EI <- read_lib(file = EI_file, format = "msp", type = "EI", remove_ri = FALSE) 55 | MS2_mgf <- read_lib(file = MS2_mgf_file, format = "mgf", type = "MS2") 56 | } 57 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: mspcompiler 2 | Title: Compile Mass Spectral Libraries from Various Sources 3 | Version: 0.1.4 4 | Authors@R: 5 | person(given = "Qizhi", 6 | family = "Su", 7 | role = c("aut", "cre"), 8 | email = "sukissqz@gmail.com", 9 | comment = c(ORCID = "https://orcid.org/0000-0002-8124-997X")) 10 | Description: Mass spectral library is of great importance for annotating mass 11 | spectrometry data. There are various libraries scatter around, be it 12 | commercial like NIST library or publicly available such as MoNA and GNPS. 13 | And yet, no tool is viable for compiling and organizing these libraries into 14 | a neat, well-organized, and up-to-date library. MS-DIAL is one of the 15 | excellent open-source tools to process MS data, which uses a library in msp 16 | format for annotation purpose. Since version 4.70, MS-DIAL is capable of 17 | viewing molecular structures making use of the SMILES field in the msp 18 | library file. However, some ready-made msp files do not have the SMILES field. 19 | In the case of EI library, the msp file converted from the NIST library by 20 | lib2nist does not contain the RI (retention index) field which is an 21 | additional measure to increase the identification confidence in GC-MS data. 22 | Moreover, the GNPS library is in mgf format and therefore has to be first 23 | converted into msp format and then combined with others. For this reason, 24 | the mspcompiler package aims to offer ways to compile and organize the latest 25 | mass spectral libraries into a well-organized msp file that can be better 26 | used by other tools, mainly MS-DIAL. 27 | License: MIT + file LICENSE 28 | Encoding: UTF-8 29 | LazyData: true 30 | Roxygen: list(markdown = TRUE) 31 | RoxygenNote: 7.1.2 32 | Imports: 33 | ChemmineOB (>= 1.26.0), 34 | ChemmineR (>= 3.40.0), 35 | dplyr, 36 | future, 37 | future.apply, 38 | qdapRegex, 39 | readr, 40 | rio, 41 | rlang, 42 | rlist, 43 | stats, 44 | stringr, 45 | tibble, 46 | webchem 47 | Depends: 48 | R (>= 2.10) 49 | Suggests: 50 | rmarkdown, 51 | knitr, 52 | parallel, 53 | testthat (>= 3.0.0) 54 | Config/testthat/edition: 3 55 | URL: https://github.com/QizhiSu/mspcompiler 56 | BugReports: https://github.com/QizhiSu/mspcompiler/issues 57 | Remotes: 58 | girke-lab/ChemmineR, 59 | girke-lab/ChemmineOB 60 | VignetteBuilder: knitr 61 | -------------------------------------------------------------------------------- /R/organize_libraries_MS2.R: -------------------------------------------------------------------------------- 1 | #' Remove retention time for MS2 libraries 2 | #' 3 | #' \code{remove_rt} offers a way to remove all retention time for MS2 library. 4 | #' 5 | #' This function supports parallel computing. 6 | #' 7 | #' @param lib The \code{list} generated by \code{read_lib}. 8 | #' 9 | #' @return A \code{list} without retention time 10 | #' @export 11 | #' 12 | #' @import future.apply 13 | remove_rt <- function(lib) { 14 | future.apply::future_lapply(lib, function(x) { 15 | x$RetentionTime <- NA 16 | 17 | return(x) 18 | }) 19 | } 20 | 21 | 22 | #' Separate positive and negative modes in MS2 library 23 | #' 24 | #' \code{separate_polarity} offers a way to separate a MS2 library based on 25 | #' polarity. 26 | #' 27 | #' Some libraries, e.g., NIST and GNPS have both positive and negative MS2 28 | #' records mixed in a singled file. However, in practice, a MS2 library should 29 | #' be either positive or negative based on the polarity used in the experiment. 30 | #' Therefore, this function provides a way to separate positive and negative 31 | #' modes in MS2 library. 32 | #' 33 | #' @param lib A MS2 library mixed with positive and negative modes. 34 | #' @param polarity The polarity, can be either "pos" or "neg" 35 | #' 36 | #' @return A \code{list}, being positive or negative 37 | #' @export 38 | #' 39 | #' @import rlist 40 | #' @importFrom rlang .data 41 | separate_polarity <- function(lib, polarity) { 42 | if (polarity == "pos") { 43 | tmp <- 44 | rlist::list.filter( 45 | lib, grepl("P", .data$IonMode, ignore.case = TRUE) 46 | ) 47 | } else { 48 | tmp <- 49 | rlist::list.filter( 50 | lib, grepl("N", .data$IonMode, ignore.case = TRUE) 51 | ) 52 | } 53 | 54 | return(tmp) 55 | } 56 | 57 | 58 | #' Add formula to the gnps library and remove wrong SMILES 59 | #' 60 | #' \code{complete_gnps} offers a way to complete the molecular formula filed in 61 | #' the mgf file. 62 | #' 63 | #' The mgf file downloaded from GNPS has no molecular formula (MF). Therefore, this 64 | #' function tries to calculate the MF from the SMILES (if it exists). 65 | #' 66 | #' @param lib The \code{list} generated by \code{read_lib} from mgf library. 67 | #' 68 | #' @return A \code{list} with molecular formula assigned 69 | #' @export 70 | #' 71 | #' @import future.apply 72 | #' @importFrom ChemmineR smiles2sdf MF 73 | #' @importFrom webchem is.smiles 74 | #' 75 | #' @rawNamespace import(ChemmineR, except = c(groups, view)) 76 | complete_gnps <- function(lib) { 77 | future.apply::future_lapply(lib, function(x) { 78 | x$Smiles <- ifelse(webchem::is.smiles(x$Smiles), x$Smiles, NA) 79 | x$Formula <- 80 | tryCatch(ChemmineR::MF(ChemmineR::smiles2sdf(x$Smiles), addH = TRUE)[[1]], 81 | error = function(e) NULL 82 | ) 83 | 84 | return(x) 85 | }) 86 | } 87 | -------------------------------------------------------------------------------- /R/write_msp.R: -------------------------------------------------------------------------------- 1 | #' Write EI library into a msp file 2 | #' 3 | #' \code{write_EI_msp} offers a way to write the organized EI library into a 4 | #' msp file. 5 | #' 6 | #' @param lib The organized EI library 7 | #' @param filename The location and filename of the msp file to be exported, 8 | #' e.g., "/home/exported.msp". 9 | #' 10 | #' @return No return but create a msp file 11 | #' @export 12 | write_EI_msp <- function(lib, filename) { # nolint 13 | zz <- file(description = filename, open = "w") 14 | for (i in seq_along(lib)) { 15 | cat(paste("Name: ", lib[[i]]$Name, "\n", sep = ""), file = zz) 16 | cat(paste("InChIKey: ", lib[[i]]$InChIKey, "\n", sep = ""), file = zz) 17 | cat(paste("SMILES: ", lib[[i]]$Smiles, "\n", sep = ""), file = zz) 18 | cat(paste("Formula: ", lib[[i]]$Formula, "\n", sep = ""), file = zz) 19 | cat(paste("MW: ", lib[[i]]$"Molecular weight", "\n", sep = ""), file = zz) 20 | cat(paste("RI: ", lib[[i]]$RI, "\n", sep = ""), file = zz) 21 | cat(paste("Comment: ", lib[[i]]$Comment, "\n", sep = ""), file = zz) 22 | cat(paste("Num peaks: ", lib[[i]]$"Number of peaks", "\n", sep = ""), file = zz) 23 | cat(paste(paste(lib[[i]]$Spectra$mz, lib[[i]]$Spectra$ins, sep = " "), "", sep = ""), 24 | file = zz, sep = "\n" 25 | ) 26 | cat("\r\n", file = zz) 27 | } 28 | close(zz) 29 | } 30 | 31 | 32 | #' Write MS2 library into a msp file 33 | #' 34 | #' \code{write_EI_msp} offers a way to write the organized MS2 library into a 35 | #' msp file. 36 | #' 37 | #' @param lib The organized MS2 library 38 | #' @param filename The location and filename of the msp file to be exported, 39 | #' e.g., "/home/exported.msp". 40 | #' 41 | #' @return No return but create a msp file 42 | #' @export 43 | write_MS2_msp <- function(lib, filename) { # nolint 44 | zz <- file(description = filename, open = "w") 45 | for (i in seq_along(lib)){ 46 | cat(paste("Name: ", lib[[i]]$Name, "\n", sep = ""), file = zz) 47 | cat(paste("PrecursorMZ: ", lib[[i]]$PrecursorMZ, "\n", sep = ""), file = zz) 48 | cat(paste("PrecursorType: ", lib[[i]]$PrecusorType, "\n", sep = ""), file = zz) 49 | cat(paste("IonMode: ", lib[[i]]$IonMode, "\n", sep = ""), file = zz) 50 | cat(paste("Formula: ", lib[[i]]$Formula, "\n", sep = ""), file = zz) 51 | cat(paste("SMILES: ", lib[[i]]$Smiles, "\n", sep = ""), file = zz) 52 | cat(paste("InChIKey: ", lib[[i]]$InChIKey, "\n", sep = ""), file = zz) 53 | cat(paste("RetentionTime: ", lib[[i]]$RetentionTime, "\n", sep = ""), file = zz) 54 | cat(paste("CCS: ", lib[[i]]$CCS, "\n", sep = ""), file = zz) 55 | cat(paste("CollisionEnergy: ", lib[[i]]$CollisionEnergy, "\n", sep = ""), file = zz) 56 | cat(paste("InstrumentType: ", lib[[i]]$InstrumentType, "\n", sep = ""), file = zz) 57 | cat(paste("Comment: ", lib[[i]]$Comment, "\n", sep = ""), file = zz) 58 | cat(paste("Num peaks: ", lib[[i]]$"Number of peaks", "\n", sep = ""), file = zz) 59 | cat(paste(paste(lib[[i]]$Spectra$mz, lib[[i]]$Spectra$ins, sep = " "), "", sep = ""), 60 | file = zz, sep = "\n" 61 | ) 62 | cat("\r", file = zz) 63 | } 64 | close(zz) 65 | } 66 | -------------------------------------------------------------------------------- /inst/EI.msp: -------------------------------------------------------------------------------- 1 | NAME: 1-NITROPYRENE; EI-B; MS 2 | EXACTMASS: 247.0633285 3 | FORMULA: C16H9NO2 4 | SMILES: [O-1][N+1](=O)c(c4)c(c1)c(c3c4)c(c2cc3)c(ccc2)c1 5 | ONTOLOGY: Pyrenes 6 | INCHIKEY: ALRLPDGCPYIVHP-UHFFFAOYSA-N 7 | RETENTIONTIME: -1 8 | RETENTIONINDEX: 1872.217 9 | QUANTMASS: 201 10 | IONMODE: Positive 11 | COLLISIONENERGY: 70eV 12 | LICENSE: CC BY-SA 13 | Comment: 14 | Num Peaks: 75 15 | 51 27 16 | 55 80 17 | 57 73 18 | 58 13 19 | 59 13 20 | 60 140 21 | 61 13 22 | 62 33 23 | 63 33 24 | 66 13 25 | 68 87 26 | 70 20 27 | 72 53 28 | 73 73 29 | 74 33 30 | 75 27 31 | 76 20 32 | 78 13 33 | 80 40 34 | 81 20 35 | 82 13 36 | 83 33 37 | 86 127 38 | 87 87 39 | 92 20 40 | 93 100 41 | 94 60 42 | 98 147 43 | 99 833 44 | 100 607 45 | 104 40 46 | 107 13 47 | 108 13 48 | 110 33 49 | 112 13 50 | 113 13 51 | 115 13 52 | 116 13 53 | 120 13 54 | 122 40 55 | 123 27 56 | 124 27 57 | 125 20 58 | 126 13 59 | 134 13 60 | 135 20 61 | 137 13 62 | 147 13 63 | 149 20 64 | 150 47 65 | 151 33 66 | 159 20 67 | 162 20 68 | 163 27 69 | 173 20 70 | 174 87 71 | 175 47 72 | 177 20 73 | 187 53 74 | 188 47 75 | 189 567 76 | 190 120 77 | 191 167 78 | 198 107 79 | 199 93 80 | 200 727 81 | 201 999 82 | 202 160 83 | 203 13 84 | 207 13 85 | 214 13 86 | 217 253 87 | 218 53 88 | 247 527 89 | 248 102 90 | 91 | NAME: 2,4-DINITROPHENOL; EI-B; MS 92 | EXACTMASS: 184.0120212 93 | FORMULA: C6H4N2O5 94 | SMILES: [O-1][N+1](=O)c(c1)cc([N+1]([O-1])=O)c(O)c1 95 | ONTOLOGY: Dinitrophenols 96 | INCHIKEY: UFBJCMHMOXMLKC-UHFFFAOYSA-N 97 | RETENTIONTIME: -1 98 | RETENTIONINDEX: 1547.829 99 | QUANTMASS: 184 100 | IONMODE: Positive 101 | COLLISIONENERGY: 70eV 102 | LICENSE: CC BY-SA 103 | Comment: 104 | Num Peaks: 64 105 | 51 272 106 | 52 199 107 | 53 618 108 | 54 68 109 | 55 140 110 | 56 39 111 | 57 115 112 | 60 64 113 | 61 134 114 | 62 362 115 | 63 614 116 | 64 262 117 | 65 67 118 | 66 51 119 | 67 74 120 | 68 103 121 | 69 292 122 | 70 55 123 | 71 61 124 | 73 41 125 | 74 39 126 | 75 35 127 | 76 43 128 | 77 62 129 | 78 51 130 | 79 351 131 | 80 99 132 | 81 160 133 | 82 54 134 | 83 61 135 | 84 30 136 | 85 30 137 | 90 120 138 | 91 533 139 | 92 283 140 | 93 183 141 | 94 35 142 | 95 64 143 | 96 54 144 | 97 51 145 | 98 24 146 | 105 38 147 | 106 64 148 | 107 390 149 | 108 71 150 | 109 40 151 | 111 26 152 | 120 21 153 | 121 45 154 | 122 40 155 | 123 31 156 | 126 21 157 | 136 28 158 | 137 31 159 | 138 36 160 | 149 41 161 | 153 40 162 | 154 393 163 | 155 32 164 | 168 33 165 | 183 33 166 | 184 999 167 | 185 82 168 | 186 13 169 | 170 | NAME: 3,4-DICHLOROPHENOL; EI-B; MS 171 | EXACTMASS: 161.9639201 172 | FORMULA: C6H4Cl2O 173 | SMILES: Oc(c1)cc(Cl)c(Cl)c1 174 | ONTOLOGY: Dichlorobenzenes 175 | INCHIKEY: WDNBURPWRNALGP-UHFFFAOYSA-N 176 | RETENTIONTIME: -1 177 | RETENTIONINDEX: 1279.325 178 | QUANTMASS: 162 179 | IONMODE: Positive 180 | COLLISIONENERGY: 70eV 181 | LICENSE: CC BY-SA 182 | Comment: 183 | Num Peaks: 36 184 | 51 23 185 | 53 64 186 | 60 41 187 | 61 98 188 | 62 204 189 | 63 324 190 | 64 56 191 | 71 22 192 | 72 83 193 | 73 136 194 | 74 62 195 | 75 52 196 | 81 83 197 | 82 53 198 | 83 28 199 | 91 21 200 | 97 63 201 | 98 256 202 | 99 337 203 | 100 98 204 | 101 123 205 | 107 23 206 | 109 21 207 | 126 77 208 | 127 37 209 | 128 28 210 | 133 51 211 | 134 74 212 | 135 36 213 | 136 48 214 | 161 36 215 | 162 999 216 | 163 87 217 | 164 623 218 | 165 45 219 | 166 98 220 | 221 | NAME: 2,5-DICHLOROPHENOL; EI-B; MS 222 | EXACTMASS: 161.9639201 223 | FORMULA: C6H4Cl2O 224 | SMILES: Oc(c1)c(Cl)ccc(Cl)1 225 | ONTOLOGY: Dichlorobenzenes 226 | INCHIKEY: RANCECPPZPIPNO-UHFFFAOYSA-N 227 | RETENTIONTIME: -1 228 | RETENTIONINDEX: 1243.95 229 | QUANTMASS: 63 230 | IONMODE: Positive 231 | COLLISIONENERGY: 70eV 232 | LICENSE: CC BY-SA 233 | Comment: 234 | Num Peaks: 44 235 | 51 51 236 | 52 23 237 | 53 229 238 | 59 37 239 | 60 166 240 | 61 333 241 | 62 621 242 | 63 999 243 | 64 116 244 | 65 27 245 | 66 41 246 | 71 30 247 | 72 120 248 | 73 323 249 | 74 127 250 | 75 114 251 | 81 67 252 | 82 46 253 | 83 38 254 | 84 30 255 | 85 28 256 | 87 29 257 | 89 22 258 | 90 21 259 | 91 63 260 | 96 36 261 | 97 156 262 | 98 390 263 | 99 337 264 | 100 138 265 | 101 109 266 | 126 90 267 | 127 31 268 | 128 33 269 | 133 63 270 | 134 43 271 | 135 42 272 | 136 26 273 | 161 117 274 | 162 890 275 | 163 124 276 | 164 529 277 | 165 46 278 | 166 88 279 | -------------------------------------------------------------------------------- /R/organize_libraries_EI_and_MS2.R: -------------------------------------------------------------------------------- 1 | #' Assign SMILES 2 | #' 3 | #' \code{assign_smiles} offers a way to assign SMILES to the library obtained 4 | #' from NIST format (exported by Lib2NIST with structure information separately 5 | #' stored in mol files). 6 | #' 7 | #' The msp file obtained from Lib2NIST has no SMILES and the structure 8 | #' information is stored in multiple mol files. After transforming all mol files 9 | #' into a single sdf file by \code{combine_mol2sdf} and retrieving structure 10 | #' information by \code{extract_structure}, SMILES is available. This function 11 | #' provides a way to assign SMILES to correspondent compound in the msp file. 12 | #' If you are working with Linux-based or Mac OS system, it is better to use 13 | #' "inchikey" for matching. However, if you are working with Windows, the only 14 | #' option is to use "name" for matching, which is a kind of compromise as 15 | #' some chemicals in the *.mol files do not have full chemical names. Hence they 16 | #' will not be matched. This function is useful for both EI and MS2 libraries. 17 | #' This function supports parallel computing. 18 | #' 19 | #' @param lib The library generated by \code{read_lib}. 20 | #' @param structure_data the correspondent structure data generated by 21 | #' \code{extract_structure}. 22 | #' @param match Correspondence can be done by either "name" or "inchikey". 23 | #' 24 | #' @return A \code{list} with SMILES assigned. 25 | #' @export 26 | #' 27 | #' @import future.apply 28 | assign_smiles <- function(lib, structure_data, match = "name") { 29 | future.apply::future_lapply(lib, function(x) { 30 | if (match == "name") { 31 | x$Smiles <- 32 | structure_data$Smiles[match(tolower(x$Name), structure_data$Name)] 33 | } else { 34 | x$Smiles <- 35 | structure_data$Smiles[match(x$InChIKey, structure_data$InChIKey)] 36 | } 37 | 38 | return(x) 39 | }) 40 | } 41 | 42 | 43 | #' Reorganize MoNA library 44 | #' 45 | #' \code{reorganize_mona} offers a way to reorganize MoNA libray, 46 | #' mainly to retrieve SMILES from the "Comments" field. 47 | #' 48 | #' The msp file from MoNA has no "SMILES" field but has SMILES information 49 | #' stored in the "Comments" field. Therefore, This function tries to retrieve 50 | #' SMILES from the "Comments" field. This function supports parallel computing. 51 | #' 52 | #' @param lib The MoNA library generated by \code{read_lib}. 53 | #' 54 | #' @return A \code{List} with SMILES retrieved. 55 | #' @export 56 | #' 57 | #' @import future.apply 58 | #' @importFrom qdapRegex rm_between 59 | reorganize_mona <- function(lib) { 60 | future.apply::future_lapply(lib, function(x) { 61 | tmp <- unlist(qdapRegex::rm_between(x$Comment, '"', '"', extract = TRUE)) 62 | 63 | x$Smiles <- tmp[grepl("^SMILES=", tmp, ignore.case = TRUE)] 64 | x$Smiles <- gsub("^SMILES=", "", x$Smiles, ignore.case = TRUE) 65 | 66 | return(x) 67 | }) 68 | } 69 | 70 | 71 | #' Change meta data 72 | #' 73 | #' \code{change_meta} offers a way to change meta data (mainly used for in-house 74 | #' library). 75 | #' 76 | #' When you build your own mass spectral library (either EI or MS2 library), 77 | #' you might want to add or change some meta data, such as collision energy, 78 | #' instrument,and comment. This function provides an easy way to achieve this. 79 | #' 80 | #' @param lib The in-house library generated by \code{read_lib}. 81 | #' @param CE User defined collision energy. If no CE is supplied, the 82 | #' CollisionEnergy field will not be changed. 83 | #' @param instrument User define instrument type. If no instrument is supplied, 84 | #' the InstrumentType field will not be changed. 85 | #' @param comment User define comment, e.g., Principle investigator, data 86 | #' collector, laboratory, etc.If no comment is supplied, the Comment field 87 | #' will not be changed. If you want to add new comment, please set "add = TRUE", 88 | #' then old and new comment will be separated by ";". Otherwise, the old 89 | #' comment will be covered by the new one. 90 | #' @param add A logical scalar. Whether to keep the old comment and add new 91 | #' comment behind or just replace the old comment. TRUE or FALSE. 92 | #' 93 | #' @return A \code{list} with meta data assigned. 94 | #' @export 95 | #' 96 | #' @import future.apply 97 | change_meta <- 98 | function(lib, CE = NA, instrument = NA, comment = NA, add = FALSE) { 99 | future.apply::future_lapply(lib, function(x) { 100 | if (is.na(CE)) { 101 | x$CollisionEnergy <- x$CollisionEnergy 102 | } else { 103 | x$CollisionEnergy <- CE 104 | } 105 | 106 | if (is.na(instrument)) { 107 | x$InstrumentType <- x$InstrumentType 108 | } else { 109 | x$InstrumentType <- instrument 110 | } 111 | 112 | if (is.na(comment)) { 113 | x$Comment <- x$comment 114 | } else { 115 | if (add == FALSE) { 116 | x$Comment <- comment 117 | } else { 118 | x$Comment <- paste(x$Comment, comment, sep = ";") 119 | } 120 | } 121 | 122 | return(x) 123 | }) 124 | } 125 | -------------------------------------------------------------------------------- /inst/MS2.msp: -------------------------------------------------------------------------------- 1 | NAME: Withanone; PlaSMA ID-2558 2 | PRECURSORMZ: 471.27412 3 | PRECURSORTYPE: [M+H]+ 4 | FORMULA: C28H38O6 5 | Ontology: Withanolides and derivatives 6 | INCHIKEY: FAZIYUIDUNHZRG-UHFFFAOYNA-N 7 | SMILES: CC(C1CC(C)=C(C)C(=O)O1)C1(O)CCC2C3C4OC4C4(O)CC=CC(=O)C4(C)C3CCC12C 8 | RETENTIONTIME: 6.82 9 | CCS: 220.9656493 10 | IONMODE: Positive 11 | COLLISIONENERGY: 12 | Comment: Annotation level-1; PlaSMA ID-2558; ID title-Withanone; Max plant tissue-Standard only 13 | Num Peaks: 10 14 | 68.06053 24 15 | 153.09654 20 16 | 171.07501 20 17 | 181.09505 21 18 | 220.09355 22 19 | 263.13885 18 20 | 283.15988 31 21 | 417.25351 27 22 | 435.2543 24 23 | 471.27539 57 24 | 25 | NAME: Corosolic acid; PlaSMA ID-2570 26 | PRECURSORMZ: 473.36254 27 | PRECURSORTYPE: [M+H]+ 28 | FORMULA: C30H48O4 29 | Ontology: Triterpenoids 30 | INCHIKEY: HFGSQOYIOKBQOW-UHFFFAOYNA-N 31 | SMILES: CC1CCC2(CCC3(C)C(=CCC4C5(C)CC(O)C(O)C(C)(C)C5CCC34C)C2C1C)C(O)=O 32 | RETENTIONTIME: 9.89 33 | CCS: 223.1106231 34 | IONMODE: Positive 35 | COLLISIONENERGY: 36 | Comment: Annotation level-1; PlaSMA ID-2570; ID title-Corosolic acid; Max plant tissue-Standard only 37 | Num Peaks: 74 38 | 109.08733 30 39 | 121.0966 59 40 | 121.10341 27 41 | 123.11742 37 42 | 135.11803 65 43 | 145.0999 42 44 | 147.11229 44 45 | 149.12842 45 46 | 149.17274 17 47 | 159.11057 24 48 | 159.11604 18 49 | 163.11037 25 50 | 163.121 31 51 | 163.14987 37 52 | 164.11726 21 53 | 173.12769 24 54 | 175.13152 17 55 | 175.14751 19 56 | 175.15778 18 57 | 177.16833 22 58 | 179.15005 20 59 | 187.14374 42 60 | 188.16075 18 61 | 189.14455 24 62 | 189.16357 319 63 | 190.16736 24 64 | 203.1581 20 65 | 203.17538 37 66 | 203.18948 27 67 | 204.1898 23 68 | 205.16061 403 69 | 205.1963 78 70 | 205.24748 22 71 | 206.16309 41 72 | 206.1933 18 73 | 207.17136 77 74 | 210.43964 27 75 | 214.16658 17 76 | 215.17697 36 77 | 221.1918 20 78 | 223.15833 18 79 | 235.16354 48 80 | 241.20367 26 81 | 245.19838 19 82 | 249.18239 21 83 | 249.19588 39 84 | 285.26138 17 85 | 295.24545 55 86 | 313.23444 20 87 | 313.25516 18 88 | 391.3255 18 89 | 408.75885 17 90 | 409.35251 327 91 | 410.34262 62 92 | 410.37625 47 93 | 427.35739 65 94 | 427.37686 17 95 | 428.35693 69 96 | 437.30423 21 97 | 437.32928 46 98 | 437.34772 64 99 | 437.37619 20 100 | 438.35327 91 101 | 439.34619 22 102 | 439.36374 19 103 | 455.3334 21 104 | 455.3522 112 105 | 455.36343 52 106 | 456.34946 107 107 | 457.35306 17 108 | 472.35297 21 109 | 473.34711 55 110 | 473.36707 200 111 | 473.3905 37 112 | 113 | NAME: Maslinic acid; PlaSMA ID-2571 114 | PRECURSORMZ: 473.36254 115 | PRECURSORTYPE: [M+H]+ 116 | FORMULA: C30H48O4 117 | Ontology: Triterpenoids 118 | INCHIKEY: MDZKJHQSJHYOHJ-UHFFFAOYNA-N 119 | SMILES: CC1(C)CCC2(CCC3(C)C(=CCC4C5(C)CC(O)C(O)C(C)(C)C5CCC34C)C2C1)C(O)=O 120 | RETENTIONTIME: 9.77 121 | CCS: 218.152878 122 | IONMODE: Positive 123 | COLLISIONENERGY: 124 | Comment: Annotation level-1; PlaSMA ID-2571; ID title-Maslinic acid; Max plant tissue-Standard only 125 | Num Peaks: 70 126 | 71.06963 24 127 | 107.08249 20 128 | 121.10465 27 129 | 123.10526 24 130 | 133.10515 47 131 | 135.1165 20 132 | 137.13234 17 133 | 147.11588 23 134 | 147.12146 18 135 | 149.12863 18 136 | 161.13533 61 137 | 163.14793 44 138 | 164.15503 24 139 | 173.12798 19 140 | 175.13443 23 141 | 175.14763 20 142 | 177.16472 45 143 | 178.15854 27 144 | 185.13341 20 145 | 187.14854 40 146 | 189.1628 32 147 | 189.1687 66 148 | 191.18356 37 149 | 192.19316 20 150 | 193.16087 36 151 | 201.16898 18 152 | 203.16878 28 153 | 203.179 82 154 | 203.19046 32 155 | 204.18051 24 156 | 204.1913 22 157 | 205.1539 19 158 | 205.16736 37 159 | 205.19719 36 160 | 206.15448 20 161 | 207.17291 61 162 | 207.18587 37 163 | 208.18057 41 164 | 216.17879 21 165 | 222.19405 23 166 | 231.17532 28 167 | 233.19249 21 168 | 247.16046 17 169 | 267.17273 20 170 | 324.28058 24 171 | 349.28433 25 172 | 375.46722 22 173 | 381.34262 22 174 | 391.33182 21 175 | 392.33908 57 176 | 409.33063 59 177 | 409.34772 311 178 | 409.35947 88 179 | 410.34534 53 180 | 410.36133 93 181 | 410.37891 17 182 | 411.32861 19 183 | 411.36859 49 184 | 412.34894 29 185 | 427.36331 38 186 | 437.32886 117 187 | 437.35199 44 188 | 438.34506 18 189 | 439.32047 18 190 | 439.35138 20 191 | 439.36945 22 192 | 455.33713 36 193 | 455.36435 45 194 | 473.34592 100 195 | 473.36807 192 196 | 197 | NAME: Soyasapogenol A; PlaSMA ID-2579 198 | PRECURSORMZ: 475.37819 199 | PRECURSORTYPE: [M+H]+ 200 | FORMULA: C30H50O4 201 | Ontology: Triterpenoids 202 | INCHIKEY: CDDWAYFUFNQLRZ-UHFFFAOYNA-N 203 | SMILES: CC1(C)CC2C3=CCC4C5(C)CCC(O)C(C)(CO)C5CCC4(C)C3(C)CCC2(C)C(O)C1O 204 | RETENTIONTIME: 8.94 205 | CCS: 219.2367595 206 | IONMODE: Positive 207 | COLLISIONENERGY: 208 | Comment: Annotation level-1; PlaSMA ID-2579; ID title-Soyasapogenol A; Max plant tissue-Standard only 209 | Num Peaks: 20 210 | 123.08025 22 211 | 151.10565 19 212 | 188.14404 17 213 | 223.16701 22 214 | 228.18935 16 215 | 241.19829 16 216 | 245.18082 24 217 | 245.2159 20 218 | 351.25888 16 219 | 381.30728 16 220 | 398.30753 24 221 | 439.35776 25 222 | 439.37079 17 223 | 440.35336 21 224 | 440.36847 38 225 | 457.35892 50 226 | 457.37723 38 227 | 457.39117 17 228 | 458.36426 18 229 | 459.37164 20 230 | -------------------------------------------------------------------------------- /R/read_structures.R: -------------------------------------------------------------------------------- 1 | #' Combine multiple mol files into a single sdf file and remove duplicates 2 | #' 3 | #' \code{combine_mol2sdf} offers a way to combine multiple mol files in to 4 | #' a single sdf file removing duplicates. 5 | #' 6 | #' The msp file exported from NIST format by the \strong{Lib2NIST} software 7 | #' has no SMILES which is used for viewing the structure in MS-DIAL and is 8 | #' crucial as well if you want to predict retention index (RI) for compounds 9 | #' with no experimental RI (in the case of EI library). Chemical structure of 10 | #' each compound in the NIST library can be exported as a mol file. Every 11 | #' library entry will have one corresponding mol file. All these mol files will 12 | #' be stored in a folder with ".MOL" suffix. This function aims to combine all 13 | #' these mol files into a single sdf file which can be then used for retrieve 14 | #' SMILES for each entry. This function supports parallel computing. 15 | #' 16 | #' @param input The location of the exported *.MOL folder from Lib2NIST, 17 | #' e.g., "/home/nist.MOL". 18 | #' @param output The location where the sdf file will be stored and its name, 19 | #' e.g., "/home/exported.sdf". 20 | #' @param use_filename In case you want to use the file name as the Molecule_Name 21 | #' in the sdf file, please use \code{use_filename = TRUE}. This is useful when 22 | #' you draw your own chemicals which might not have Molecule_Name in the .MOL files. 23 | #' With this option, you can use the name of the .MOL files. 24 | #' 25 | #' @return It will return no value but only creates a sdf file. 26 | #' @export 27 | #' 28 | #' @import future.apply 29 | #' @rawNamespace import(ChemmineR, except = c(groups, view)) 30 | #' @import rlist 31 | combine_mol2sdf <- function(input, output, use_filename = FALSE) { 32 | # Read mol files into a single sdfset. 33 | mols <- list.files( 34 | path = input, pattern = "*.MOL", 35 | full.names = TRUE, ignore.case = TRUE 36 | ) 37 | # Allows to use file name as the compound name 38 | # it is useful for home-draw chemicals which might not have Molecule_Name 39 | if (use_filename) { 40 | sdfset <- future.apply::future_lapply( 41 | mols, function(mol) { 42 | tmp <- read.SDFset(mol, skipErrors = TRUE) 43 | name <- gsub(".*\\\\|.*/", "", mol) 44 | name <- gsub("\\.MOL", "", name, ignore.case = TRUE) 45 | tmp@SDF[[1]]@header[["Molecule_Name"]] <- name 46 | 47 | return(tmp) 48 | } 49 | ) 50 | } else { 51 | sdfset <- future.apply::future_lapply( 52 | mols, function(mol) read.SDFset(mol, skipErrors = TRUE) 53 | ) 54 | } 55 | 56 | # Remove duplicates based on "name" to save time and export the sdf file 57 | sdfset <- lapply(sdfset, "[[", 1) 58 | name <- sapply(sdfset, function(x) x[[1]][[1]]) # Extract Molecule_Name 59 | not_duplicate_index <- which(!duplicated(name)) # Index of non-duplicated name 60 | # Important to put the index into the list 61 | # otherwise, it will not be recognized in list.filter 62 | sdfset <- c(sdfset, not_duplicate_index) 63 | sdfset <- rlist::list.filter(sdfset, .i %in% not_duplicate_index) 64 | sdfset <- SDFset(SDFlist = sdfset) # Turn it back to SDFset class 65 | # Assign cid for each SDF. It is important! Otherwise no information 66 | # will be converted 67 | cid(sdfset) <- paste0("CMP", seq_along(sdfset)) 68 | sdfset <- sdfset[validSDF(sdfset)] # Important to remove invalid sdf 69 | 70 | write.SDF(sdfset, output) 71 | } 72 | 73 | 74 | #' Extract SMILES from the sdf file generated by \code{\link{combine_mol2sdf}} 75 | #' 76 | #' \code{extract_structure} offers a way to retrieve SMILES from the sdf file. 77 | #' 78 | #' The function is a wrapper of the \code{convertFormatFile} function from the 79 | #' \pkg{ChemmineOB} package. As InChI and InChIKey are not supported in Windows- 80 | #' based systems, this function will automatically determine which type of 81 | #' operating system you are working with. Only \strong{name} and \strong{SMILES} 82 | #' will be retrieved if you work with Windows, while \strong{InChI} and 83 | #' \strong{InChIKey} will be exported as well in Linux-based or Mac OS systems. 84 | #' 85 | #' @param input The sdf file generated by \code{\link{combine_mol2sdf}}, e.g., 86 | #' "/home/exported.sdf". 87 | #' @param output The location where the structure information will be stored 88 | #' and its name, e.g., "/home/exported.txt". 89 | #' 90 | #' @return A data.frame and creates a *.txt file. 91 | #' @export 92 | #' 93 | #' @import ChemmineOB 94 | #' @import rio 95 | extract_structure <- function(input, output) { 96 | if (grepl("windows", Sys.info()[1], ignore.case = TRUE)) { 97 | # Only extract smiles in windows system, as it does not support inchi 98 | # with inchi and inchikey, it takes longer time. 99 | ChemmineOB::convertFormatFile("SDF", 100 | "SMI", 101 | input, 102 | output, 103 | options = data.frame( 104 | names = "e", 105 | args = "" 106 | ) 107 | ) 108 | # Read back the converted data into R 109 | structure_data <- rio::import(output, header = FALSE) 110 | colnames(structure_data) <- c("Smiles", "Name") # Set column names 111 | structure_data$Name <- tolower(structure_data$Name) 112 | rio::export(structure_data, output) 113 | 114 | return(structure_data) 115 | } else { 116 | # Include inchi and inchikey 117 | # \t (tab-delimted) in the "args" 118 | ChemmineOB::convertFormatFile("SDF", 119 | "SMI", 120 | input, 121 | output, 122 | options = data.frame( 123 | names = "append", 124 | args = "\tinchi\tinchikey" 125 | ) 126 | ) 127 | structure_data <- rio::import(output, header = FALSE) 128 | colnames(structure_data) <- c("Smiles", "Name", "InChI", "InChIKey") 129 | structure_data$Name <- tolower(structure_data$Name) 130 | rio::export(structure_data, output) 131 | 132 | return(structure_data) 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /R/organize_libraries_EI.R: -------------------------------------------------------------------------------- 1 | #' Remove retention index for EI libraries 2 | #' 3 | #' \code{remove_ri} offers a way to remove all RI for EI libraries. 4 | #' 5 | #' This function supports parallel computing. 6 | #' 7 | #' @param lib The \code{list} generated by \code{read_lib}. 8 | #' 9 | #' @return A \code{list} without RI 10 | #' @export 11 | #' 12 | #' @import future.apply 13 | remove_ri <- function(lib) { 14 | future.apply::future_lapply(lib, function(x) { 15 | x$RI <- NA 16 | 17 | return(x) 18 | }) 19 | } 20 | 21 | 22 | # Define characters to be kept and keep them as raw. 23 | keep_char <- c( 24 | letters, LETTERS, 0:9, "*", ".", ",", ";", '"', "'", "\\", 25 | "/", ":", "_", "^", "%", "&", "{", "}", "[", "]", "(", ")", 26 | "+", "-", "|", "=", "@", "#", "!", "$", "\n", "\t", " " 27 | ) %>% 28 | sapply(charToRaw) 29 | 30 | 31 | #' Extract experimental RI from NIST 32 | #' 33 | #' \code{clean_ri_dat}, an interanl function, offers a way to extract RI from 34 | #' the "ri.dat" file. 35 | #' 36 | #' Once you have NIST library installed, there will be a "ri.dat" file in the 37 | #' installation path (e.g., "~/Programs/nist17/mssearch"). This file 38 | #' contains all experimental RI in the NIST library but it is not human readable. 39 | #' This function provides a way to convert the "ri.dat" file into a data.frame, 40 | #' so that we can better leverage the RI information present in the NIST library 41 | #' and to incorporate them into the msp file. 42 | #' 43 | #' 44 | #' @param file The "ri.dat" file in the installation path 45 | #' (e.g., "~/Programs/nist17/mssearch"). 46 | #' 47 | #' @return A cleaned data.frame containing experimental RI from NIST 48 | #' 49 | #' @import readr 50 | #' @import rio 51 | #' @export 52 | clean_ri_dat <- function(file) { 53 | # Read the file in binary. 54 | tmp <- readr::read_file_raw(file) 55 | # Convert all NUL characters to \n 56 | tmp[tmp == 00] <- charToRaw("\n") 57 | # Keep only pre-defined characters 58 | tmp <- tmp[tmp %in% keep_char] 59 | 60 | # Write it into a *.txt to allow being re-read in text form 61 | readr::write_file(tmp, "tmp.txt") 62 | tmp <- readLines("tmp.txt") 63 | # Every useful entry starts with C/R/U following a number, which is the ID of 64 | # the compound. So, only keep these elements 65 | tmp <- tmp[grepl("^[A-Z]+[0-9]+", tmp)] 66 | 67 | # Write it into a *.txt to allowing being read in tab delimited form 68 | writeLines(tmp, "tmp.txt") 69 | tmp <- read.delim("tmp.txt", header = FALSE) 70 | # Set column names 71 | names(tmp) <- c( 72 | "ID", "Name", "Molecular_Formula", "RI", "Column_Type", 73 | "Column_Polarity", "Column", "Column_Length", "Carrier_Gas", 74 | "Substrate", "Column_Diameter", "Phase_Thickness", "RI_Type", 75 | "Ramp_Type", "Temperature1", "Temperature2", 76 | "Temperature_Increment", "Time1", "Time2", "Ramp_Detail", "Note" 77 | ) 78 | 79 | # Remove the temporary file 80 | file.remove("tmp.txt") 81 | return(tmp) 82 | } 83 | 84 | 85 | #' Extract InChIKey for compounds that have experimental RI 86 | #' 87 | #' \code{clean_user_dbu}, an internal function, offers a way to extract InChIKey 88 | #' for compounds that have experimental RI from the "USER.DBU" file. 89 | #' 90 | #' RI values in the cleaned RI table obtained by \code{\link{clean_ri_dat}} 91 | #' cannot be linked to compounds in the msp file. Providing that the "USER.DBU" 92 | #' file in the installation path (e.g., "~/Programs/nist17/mssearch") contains 93 | #' InChIKey of each compound in RI table. However this file is not human 94 | #' readable. Therefore, this function provides a way to clean the "USER.DBU" 95 | #' file. Then, we can link experimental RI values to the compounds in the msp 96 | #' file. 97 | #' 98 | #' @param file The "USER.DBU" file in the installation path 99 | #' (e.g., "~/Programs/nist17/mssearch") 100 | #' 101 | #' @return A data.frame containing four variables, Name, InChIKey, ID, 102 | #' and "Formula" 103 | #' 104 | #' @import readr 105 | #' @import stringr 106 | #' @import rio 107 | #' @export 108 | clean_user_dbu <- function(file) { 109 | tmp <- readr::read_file_raw(file) 110 | # Convert all SOH characters to \n 111 | tmp[tmp == 01] <- charToRaw("\n") 112 | # change all NUL to \t 113 | tmp[tmp == 00] <- charToRaw("\t") 114 | # Keep only pre-defined characters 115 | tmp <- tmp[tmp %in% keep_char] 116 | # Write it into a *.txt to allow being re-read in text form 117 | readr::write_file(tmp, "tmp.txt") 118 | 119 | tmp <- readLines("tmp.txt", warn = FALSE) 120 | # remove everything before the second continuous \t from last 121 | tmp <- str_remove(tmp, "^.*\t(?=\t)") 122 | tmp <- str_remove_all(tmp, "^\t.{1,2}$") # remove the remaining starting \t 123 | tmp <- str_trim(tmp, side = "both") 124 | tmp <- tmp[str_detect(tmp, "^.+")] 125 | tmp <- tmp[str_count(tmp) > 5] # previously 4, but in nist23, must be 5 126 | tmp <- str_replace(tmp, " \\${2} \\$:28", "\t") 127 | tmp <- str_replace(tmp, "\\$\\$\\s*[^\\t]*", "") 128 | 129 | # Write it into a *.txt to allowing being read in tab delimited form 130 | writeLines(tmp, "tmp.txt") 131 | 132 | tmp <- rio::import( 133 | "tmp.txt", fill = TRUE, comment.char = "", 134 | header = FALSE, quote = "", sep = "\t" 135 | ) 136 | colnames(tmp) <- c("Name", "InChIKey", "ID", "Formula") 137 | 138 | file.remove("tmp.txt") 139 | return(tmp) 140 | } 141 | 142 | 143 | #' Extract experimental RI from NIST library 144 | #' 145 | #' \code{extract_ri} offers a way to extract experimental RI from the NIST 146 | #' library if you have it installed. 147 | #' 148 | #' Once you have NIST library installed, there will be a "ri.dat" file in the 149 | #' installation path (e.g., "~/Programs/nist17/mssearch"). This file 150 | #' contains all experimental RI in the NIST library but it is not human readable. 151 | #' This function firstly convert the "ri.dat" file into a data.frame. However, 152 | #' it is tricky to link RI values in the cleaned RI table to compounds in the 153 | #' msp file. Providing that the "USER.DBU" file in the installation path 154 | #' (e.g., "~/Programs/nist17/mssearch") contains InChIKey of each compound in 155 | #' RI table, we can assign correspondent InChIKey to the RI table, but this 156 | #' file is not human readable. Therefore, this function secondly provides a way 157 | #' to clean the "USER.DBU" file and then assign correspondent InChIKey to the 158 | #' RI table. 159 | #' 160 | #' @param ri_dat The "ri.dat" file in the installation path 161 | #' (e.g., "~/Programs/nist17/mssearch/nist_ri"). 162 | #' @param user_dbu The "USER.DBU" file in the installation path 163 | #' (e.g., "~/Programs/nist17/mssearch/nist_ri") 164 | #' 165 | #' @return A \code{data.frame} containing experimental RI and InChIKey assigned. 166 | #' @export 167 | #' 168 | #' @import dplyr 169 | #' @importFrom rlang .data 170 | extract_ri <- function(ri_dat, user_dbu) { 171 | # First, clean ri.dat and re-order it based on ID, which is important to 172 | # assign correspond ID. 173 | nist_ri <- clean_ri_dat(ri_dat) %>% arrange(.data$ID) 174 | # Count the number of records for each compound. 175 | nist_ri_table <- table(nist_ri$ID) 176 | # Assign the order of the compound. 177 | nist_ri <- 178 | nist_ri %>% 179 | mutate(correspond_ID = rep(seq_along(nist_ri_table), nist_ri_table)) %>% 180 | relocate(.data$correspond_ID, .before = .data$ID) 181 | 182 | # Second, clean the USER.DBU file file and assign correspond ID. 183 | # !!! The appearance of a compound in this list is the same as that in 184 | # the nist_ri after re-ordering. 185 | nist_ri_inchikey <- 186 | clean_user_dbu(user_dbu) %>% 187 | as_tibble() %>% 188 | mutate( 189 | ID = as.numeric(str_remove(.data$ID, "@")), 190 | correspond_ID = row_number() 191 | ) %>% 192 | arrange(!desc(.data$correspond_ID)) 193 | # Assign inchikey to nist_ri 194 | nist_ri <- 195 | nist_ri %>% 196 | mutate(InChIKey = nist_ri_inchikey$InChIKey[ 197 | match(.data$correspond_ID, nist_ri_inchikey$correspond_ID) 198 | ]) %>% 199 | relocate(.data$InChIKey, .before = .data$Molecular_Formula) 200 | 201 | return(nist_ri) 202 | } 203 | 204 | 205 | #' Assign experimental RI to compounds in the msp file 206 | #' 207 | #' \code{assign_ri} offers a way to assign experimental RI to the msp file if 208 | #' you have NIST library installed. 209 | #' 210 | #' Depending on the column polarity, experimental RI can be assigned to 211 | #' compounds in the msp file. Providing that "capillary" GC columns are 212 | #' commonly used. This function will only keep RI records from "capillary" 213 | #' columns and "Lee RI" will be removed. When there are multiple records for 214 | #' a single compound, the median RI will be used and if the standard deviation 215 | #' is higher than 30, this value will be discarded. This function supports 216 | #' parallel computing. 217 | #' 218 | #' @param lib The EI library generated by \code{read_lib}. 219 | #' @param ri_table The RI table cleaned up by \code{extract_ri}. 220 | #' @param polarity The polarity of the column. Can be either "semi-polar", 221 | #' "non-polar", or "polar". 222 | 223 | #' 224 | #' @return A \code{list} with experimental RI assigned. 225 | #' @export 226 | #' 227 | #' @import dplyr 228 | #' @import tibble 229 | #' @import future.apply 230 | #' @importFrom rlang .data 231 | #' @rawNamespace import(stats, except = c(filter, lag)) 232 | assign_ri <- 233 | function(lib, ri_table, polarity = "semi-polar") { 234 | # Subset RI based on polarity provided. 235 | if (polarity == "semi-polar") { 236 | exp_ri <- ri_table %>% 237 | filter(.data$Column_Polarity == "Semi-standard non-polar") 238 | } else if (polarity == "non-polar") { 239 | exp_ri <- ri_table %>% 240 | filter(.data$Column_Polarity == "Standard non-polar") 241 | } else { 242 | exp_ri <- ri_table %>% 243 | filter(.data$Column_Polarity == "Standard polar") 244 | } 245 | # Remove Lee RI and only keep Capillary RI 246 | exp_ri <- exp_ri %>% 247 | filter(.data$RI_Type != "Lee RI" & .data$Column_Type == "Capillary") %>% 248 | group_by(.data$InChIKey) %>% 249 | # Experimental RI will be rounded to integer while predicted RI will have 250 | # two digit numbers. This tiny distinction can be easily differentiated 251 | # in MS-DIAL to help people understand how well the match is. 252 | summarise( 253 | SD = round(sd(.data$RI)), 254 | RI = round(median(.data$RI)), 255 | number = n() 256 | ) %>% 257 | # Change SD values of only one replicate to 0 258 | mutate(SD = case_when( 259 | is.na(.data$SD) ~ 0, 260 | TRUE ~ .data$SD 261 | )) %>% 262 | # In the case of multiple records, SD higher than 30 will be removed. 263 | filter(.data$SD <= 30) 264 | 265 | # Fourth, assign experimental RI to the msp file. 266 | future.apply::future_lapply(lib, function(x) { 267 | if (length(x$InChIKey) != 0) { 268 | if (is.na(x$RI)) { 269 | x$RI <- exp_ri$RI[match(x$InChIKey, exp_ri$InChIKey)] 270 | } 271 | } else { 272 | x$RI <- NA 273 | } 274 | 275 | return(x) 276 | }) 277 | } 278 | -------------------------------------------------------------------------------- /vignettes/Compile_EI_and_tandem_mass_spectral_libraries.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Compile EI and tandem mass spectral libraries" 3 | description: Learn how to compile various EI or MS2 mass spectral libraries 4 | into a single, up-to-date, and MS-DIAL friendly msp file. 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{Compile EI and tandem mass spectral libraries} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | %\VignetteEncoding{UTF-8} 10 | --- 11 | ```{r, include = FALSE} 12 | knitr::opts_chunk$set( 13 | collapse = TRUE, 14 | comment = "#>", 15 | eval = FALSE 16 | ) 17 | ``` 18 | 19 | ```{r setup, include = FALSE} 20 | library(mspcompiler) 21 | ``` 22 | 23 | Here, we will show you in detail how to compile various mass spectral libraries 24 | into a single, up-to-date, and MS-DIAL friendly library in msp format. This tutorial 25 | will be divided into two sections explaining the pipeline to process EI and tandem 26 | mass spectral libraries, respectively. To understand each function in detail, 27 | please use "help or ?", for example, 28 | ```{r} 29 | ?read_lib 30 | ``` 31 | 32 | ## EI libraries 33 | ### NIST EI library 34 | NIST is the most commonly used **commercial** EI library. Once you have the 35 | NIST library installed, you can transformed it into a msp file by *Lib2NIST*. 36 | Normally *Lib2NIST* will be installed along with the NIST library installation. 37 | If not, you can download it from https://chemdata.nist.gov/dokuwiki/doku.php?id=chemdata:nist17. 38 | Please use the following settings in *Lib2NIST*: 39 | 40 | 1. *Add Input Libraries/Files*. For Agilent users, the input file can be found 41 | in, for example, "C:/database/NIST14.L". 42 | 43 | 2. Tick *Use Subset* and click *Define Subset* to set detail parameters: 44 | 45 | * Enter which spectra should be exported in *List of Spectra IDs*. 46 | 47 | ![Settings inside *Define Subset* in Lib2NIST Programe](Lib2NIST_define_subset.png){width=500px} 48 | 49 | * The total number of spectra that your NIST library have can be checked in the *MS Search* 50 | program (Options -> Libraries -> Add both *mainlib* and *relib*). Then you can 51 | see the number of spectra your NIST library have. 52 | 53 | ![Screenshot of checking total number of spectra in the MS Search Program](check_number_of_spectra_nist.png){width=500px} 54 | 55 | 3. Select "Text File(.MSP) + MOLfiles linked by BOTH" in *Output Format*; 56 | 57 | 4. Select the library in *Input Libraries or Text Files* and *Convert*. 58 | ![Configuration of Lib2NIST](Lib2NIST_configuration.png){width=500px} 59 | 60 | Once you have the \*.MSP file and the correspondent \*.MOL folder exported, you 61 | can use the following code to add *SMILES* and *Retention Index (RI)*. 62 | 63 | The whole process is time-consuming (several hours, depending on the capability 64 | of your PC), so we suggests to use parallel computing. 65 | 66 | ```{r} 67 | library(mspcompiler) 68 | library(future) 69 | library(future.apply) 70 | library(parallel) 71 | # Set up parallel computing. Just remember to set it back once you have the 72 | # library compiled by "plan(sequential)". We will include it later. 73 | plan(multisession(workers = detectCores() - 1)) 74 | ``` 75 | 76 | Below are the code to process NIST EI library assuming that we have all files stored 77 | in the *MS_libraries* folder in the D disk. You may need to change the path 78 | accordingly. **Note**: Since the *.MOL folder contains a large number of mol files, 79 | it will be time-consuming to move, copy, or delete this folder. As such, we 80 | recommend pick up a good location to put this file when you are exporting in 81 | Lib2NIST, avoiding moving it. 82 | ```{r} 83 | # Read the msp file into R. 84 | nist_ei <- read_lib("D:/MS_libraries/NIST.MSP", type = "EI") 85 | # Combine all mol files into a single sdf file for subsequent structure retrieval. 86 | combine_mol2sdf("D:/MS_libraries/NIST.MOL", "D:/MS_libraries/nist.sdf") 87 | # Extract structure based on the sdf file exported before. 88 | nist_ei_structure <- extract_structure("D:/MS_libraries/nist.sdf", "D:/MS_libraries/nist_structure.txt") 89 | # Assign SMILES to the library. If you are working with Linux-based or Mac OS, 90 | # please use "match = "inchikey". 91 | nist_ei <- assign_smiles(nist_ei, nist_ei_structure, match = "name") 92 | ``` 93 | 94 | ### RIKEN EI library 95 | The MS-DIAL developers have compiled an EI library with Kovat RI included. 96 | This library can be downloaded from http://prime.psc.riken.jp/compms/msdial/main.html#MSP. 97 | Please download "All records with Kovats RI...EI-MS..." As it contains Kovats RI, 98 | we can set *remove_ri* to **FALSE** to keep original RI in this file. This file 99 | already have SMILES and InChIKey well-organized. Hence, no further treatment is 100 | needed. 101 | ```{r} 102 | riken_ei <- read_lib("D:/MS_libraries/GCMS DB-Public-KovatsRI-VS3.msp", 103 | type = "EI", remove_ri = FALSE) 104 | ``` 105 | 106 | ### MoNA EI library 107 | The MassBank of North America (MoNA) has an EI library available for download 108 | as well, https://mona.fiehnlab.ucdavis.edu/downloads. Please download 109 | "GC-MS Spectra" in "MSP" form. This file has SMILES information though, it is in 110 | the *Comment* field. Therefore, the SMILES has to be extracted from the *Comment* 111 | and put into the *SMILES* field by the *reorganize_mona* function. 112 | ```{r} 113 | mona_ei <- read_lib("D:/MS_libraries/MoNA-export-GC-MS_Spectra.msp", type = "EI") 114 | mona_ei <- reorganize_mona(mona_ei) 115 | ``` 116 | 117 | ### SWGDRUG EI library 118 | The Scientific Working Group for the Analysis of Seized Drugs (SWGDRUG) has 119 | complied an EI library with drug or drug-related compounds. The library is 120 | available in https://swgdrug.org/ms.html. The library is available in multiple 121 | formats. To correctly parse this library by mspcompiler, please download both 122 | **NIST Format** and **Agilent Format**. Then use *Lib2NIST* to convert the 123 | **NIST Format** into a *msp* file and to transform the **Agilent Format** into 124 | the *mol* files. Finally, we can read it in a way similar to that used for the 125 | NIST library. 126 | ```{r} 127 | swgdrug_ei <- read_lib("D:/MS_libraries/SWGDRUG.MSP", type = "EI") 128 | combine_mol2sdf("D:/MS_libraries/SWGDRUG.MOL", "D:/MS_libraries/swgdrug.sdf") 129 | swgdrug_ei_structure <- extract_structure("D:/MS_libraries/swgdrug.sdf", "D:/MS_libraries/swgdrug_structure.txt") 130 | # As the SWGDRUG file does not contain InChIKey information, even though you 131 | # are working with Linux-based or Mac OS, you should not use "match = inchikey". 132 | # "match = "name" is more than enough in this case. 133 | swgdrug_ei <- assign_smiles(swgdrug_ei, swgdrug_ei_structure, match = "name") 134 | ``` 135 | 136 | ### Combine all libraries 137 | After read in and organize all these libraries, we can now combine them into a 138 | single file, assign experimental RI retrieved from the "ri.dat" and "USER.DBU" files 139 | (if you have NIST library installed). 140 | ```{r} 141 | # Combine them 142 | combine_ei <- c(nist_ei, riken_ei, mona_ei, swgdrug_ei) 143 | 144 | # Extract experimental RI from the "ri.dat" and "USER.DBU" files. Once you have 145 | # NIST library installed, these files can be found in, for example, 146 | # "~/Programs/nist14/mssearch/nist_ri". Assuming you have copied these two files 147 | # in the D:/MS_libraries folder, then you will have: 148 | nist_ri <- extract_ri("/D:/MS_libraries/ri.dat", "/D:/MS_libraries/USER.DBU") 149 | # Assign experimental RI to the combined library depending on the column 150 | # polarity. The polarity can be "semi-polar", "non-polar", or "polar". 151 | # Providing that "capillary" GC columns are commonly used. This 152 | # function will only keep RI records from "capillary" columns and "Lee RI" 153 | # will be removed. When there are multiple records for a single compound, 154 | # the median RI will be used and if the standard deviation is higher than 30, 155 | # this value will be discarded. 156 | combine_ei <- assign_ri(combine_ei, nist_ri, polarity = "semi-polar") 157 | # At the end, disable parallel computing 158 | plan(sequential) 159 | # Then you can write it out 160 | write_EI_msp(combine_ei, "/D:MS_libraries/combine_ei.msp") 161 | ``` 162 | 163 | ## MS2 libraries 164 | ```{r} 165 | library(mspcompiler) 166 | library(future) 167 | library(future.apply) 168 | library(parallel) 169 | plan(multisession(workers = detectCores() - 1)) 170 | ``` 171 | MS2 libraries can be processed in a similar way, but positive and negative modes 172 | are normally separated into 2 msp files. 173 | #### NIST MS2 library 174 | The NIST MS2 library can be treated as the same as the NIST EI library detailed 175 | above with only one exception. That is the input file for *Lib2NIST* can be 176 | found in "C:/Programs/nist14/mssearch/nist_msms". The exported msp file has both 177 | positive and negative modes mixed in a singled file, so we have to separated 178 | them by the separate_polarity function. 179 | ```{r} 180 | nist_ms2 <- read_lib("D:/MS_libraries/NIST_msms.MSP", type = "MS2") 181 | combine_mol2sdf("D:/MS_libraries/NIST_msms.MOL", "D:/MS_libraries/nist_msms.sdf") 182 | nist_ms2_structure <- extract_structure("D:/MS_libraries/nist_msms.sdf", "D:/MS_libraries/nist_msms_structure.txt") 183 | nist_ms2 <- assign_smiles(nist_ms2, nist_ms2_structure, match = "name") 184 | nist_ms2_pos <- separate_polarity(nist_ms2, polarity = "pos") 185 | nist_ms2_neg <- separate_polarity(nist_ms2, polarity = "neg") 186 | ``` 187 | 188 | ### RIKEN MS2 libraries 189 | The RIKEN MS2 libraries can be download from the MS-DIAL homepage http://prime.psc.riken.jp/compms/msdial/main.html#MSP. Please download all 190 | public MS/MS positive and negative, separately. 191 | ```{r} 192 | riken_ms2_pos <- read_lib("D:/MS_libraries/MSMS-Public-Pos-VS15.msp") 193 | riken_ms2_neg <- read_lib("D:/MS_libraries/MSMS-Public-Neg-VS15.msp") 194 | ``` 195 | 196 | ### MoNA MS2 libraries 197 | The MoNA MS2 libraries can be downloaded from https://mona.fiehnlab.ucdavis.edu/downloads. 198 | Please download "LC-MS/MS Positive Mode" and "LC-MS/MS Negative Mode", separately. 199 | ```{r} 200 | mona_ms2_pos <- read_lib("D:/MS_libraries/MoNA-export-LC-MS-MS_Positive_Mode.msp") 201 | # Reorganize the SMILES field. 202 | mona_ms2_pos <- reorganize_mona(mona_ms2_pos) 203 | 204 | mona_ms2_neg <- read_lib("D:/MS_libraries/MoNA-export-LC-MS-MS_Negitive_Mode.msp") 205 | mona_ms2_neg <- reorganize_mona(mona_ms2_neg) 206 | ``` 207 | 208 | ### GNPS MS2 library 209 | Unlike others, the GNPS library is organized in mgf format, so it has to be 210 | treated differently. Hence, we have to set *format = "mgf"* in the *read_lib* 211 | function. Besides, this library does not have the *Molecular Formula* (MF) field, 212 | so we can calculated the MF from the SMILES (if it exists) by the *complete_mgf* 213 | function. Finally, both positive and negative modes are in a single file as well. 214 | Therefore, we need to separated the polarity by the *separate_polarity* function 215 | as well. The GNPS library can be download from https://gnps.ucsd.edu/ProteoSAFe/libraries.jsp. 216 | Please download "All GNPS Library Spectra". 217 | ```{r} 218 | gnps <- read_lib("D:/MS_libraries/ALL_GNPS.mgf", format = "mgf") 219 | # Compute MF 220 | gnps <- complete_mgf(gnps) 221 | gnps_pos <- separate_polarity(gnps, polarity = "pos") 222 | gnps_nge <- separate_polarity(gnps, polarity = "neg") 223 | ``` 224 | 225 | Now we have all MS2 libraries well-organized, so we can combine them. 226 | ```{r} 227 | combine_ms2_pos <- c(nist_ms2_pos, riken_ms2_pos, mona_ms2_pos, gnps_pos) 228 | combine_ms2_neg <- c(nist_ms2_neg, riken_ms2_neg, mona_ms2_neg, gnps_nge) 229 | # Disable parallel computing 230 | plan(sequential) 231 | # Then you can write them out separately 232 | write_MS2_msp(combine_ms2_pos, "/D:MS_libraries/combine_ms2_pos.msp") 233 | write_MS2_msp(combine_ms2_neg, "/D:MS_libraries/combine_ms2_neg.msp") 234 | ``` 235 | 236 | ### Others 237 | When you have multiple libraries to be read in, for instance if you are building 238 | your in-house library and you have one msp file for each batch of standards, 239 | then you will have many msp files to combine. The read_multilibs function give 240 | you an easy way to read all of them at once. In this case, what you need to input 241 | is the folder that contain all these msp files, say the **in_house** folder. 242 | ```{r} 243 | in_house <- read_multilibs("D:/MS_libraries/in_house") 244 | ``` 245 | 246 | This package offers ways to remove RI and RT as well. You can use *remove_ri* and 247 | *remove_rt* functions, respectively. For in-house libraries, you might want to 248 | change meta data, e.g., comment (both EI and MS2 libraries, to add principle 249 | investigator and data collector for example), collision energy and instrument type 250 | (for MS2 libraries). You can use the *change_meta* function for this purpose. 251 | 252 | **Note**: 253 | 254 | 1. The RIKEN, MoNA, and GNPS MS2 libraries might contains some 255 | identical spectra as they all compile some well-known libraries. However, it is 256 | tricky to separate them. This might increase the size of the final msp file, but 257 | should not affect its use. 258 | 259 | 2. It takes a long time to process the NIST libraries. For a given lab, it may 260 | have a particular version of NIST and may not change frequently. For this reason, 261 | we recommend that once you have NIST libraries organized, save them as .Rda file 262 | and reuse it the next time. For others, you can check if there is any update 263 | since your last compilation. 264 | 3. All the aforementioned tandem mass spectral libraries are relatively big and 265 | would consuming rather high amount of memory once they are read into R. If your 266 | PC does not have enough memory, you can process each library separately, write it 267 | out as single msp file, and then combine them in a text editor, e.g., Notepad++ 268 | as the msp file is basically a text file. 269 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r, include = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | comment = "#>", 11 | fig.path = "man/figures/README-", 12 | out.width = "100%", 13 | eval = FALSE 14 | ) 15 | ``` 16 | 17 | # mspcompiler 18 | 19 | 20 | 21 | 22 | The goal of mspcompiler is to offer ways to compile either EI or tandem mass 23 | spectral libraries from various sources, such as NIST (if you have it installed), 24 | MoNA, and GPNS, and organize them into a neat and up-to-date msp file that can 25 | be used in MS-DIAL. 26 | 27 | ## Installation 28 | 29 | Install \pkg{mspcompiler} by trying the following code. 30 | 31 | ```{r} 32 | install.packages("devtools") 33 | devtools::install_github("QizhiSu/mspcompiler", build_vignettes = TRUE) 34 | ``` 35 | If ChemineR and ChemineOB fail, please try: 36 | ```{r, eval=FALSE} 37 | if (!requireNamespace("BiocManager", quietly = TRUE)) 38 | install.packages("BiocManager") 39 | 40 | BiocManager::install("ChemmineR") 41 | 42 | if (!requireNamespace("BiocManager", quietly = TRUE)) 43 | install.packages("BiocManager") 44 | 45 | BiocManager::install("ChemmineOB") 46 | ``` 47 | 48 | ## Usage 49 | ## EI libraries 50 | ### NIST EI library 51 | NIST is the most commonly used **commercial** EI library. Once you have the 52 | NIST library installed, you can transformed it into a msp file by *Lib2NIST*. 53 | Normally *Lib2NIST* will be installed along with the NIST library installation. 54 | If not, you can download it from https://chemdata.nist.gov/dokuwiki/doku.php?id=chemdata:nist17. 55 | Please use the following settings in *Lib2NIST*: 56 | 57 | 1. *Add Input Libraries/Files*. For Agilent users, the input file can be found 58 | in, for example, "C:/database/NIST14.L". 59 | 60 | 2. Tick *Use Subset* and click *Define Subset* to set detail parameters: 61 | 62 | * Enter which spectra should be exported in *List of Spectra IDs*. 63 | 64 | ![\nSettings inside *Define Subset* in Lib2NIST Programe](vignettes/Lib2NIST_define_subset.png){width=500px} 65 | 66 | * The total number of spectra that your NIST library have can be checked in the *MS Search* 67 | program (Options -> Libraries -> Add both *mainlib* and *relib*). Then you can 68 | see the number of spectra your NIST library have. 69 | 70 | ![\nScreenshot of checking total number of spectra in the MS Search Program](vignettes/check_number_of_spectra_nist.png){width=500px} 71 | 72 | 3. Select "Text File(.MSP) + MOLfiles linked by BOTH" in *Output Format*; 73 | 74 | 4. Select the library in *Input Libraries or Text Files* and *Convert*. 75 | ![\nConfiguration of Lib2NIST](vignettes/Lib2NIST_configuration.png){width=500px} 76 | 77 | Once you have the \*.MSP file (normally hundreds megabytes) and the correspondent 78 | \*.MOL folder (hundreds thousands .MOL files inside the folder) exported, you 79 | can use the following code to add *SMILES* and *Retention Index (RI)*. 80 | 81 | The whole process is time-consuming (several hours, depending on the capability 82 | of your PC), so we suggests to use parallel computing. 83 | 84 | ```{r} 85 | library(mspcompiler) 86 | library(future) 87 | library(future.apply) 88 | library(parallel) 89 | # Set up parallel computing. Just remember to set it back once you have the 90 | # library compiled by "plan(sequential)". We will include it later. 91 | plan(multisession(workers = detectCores() - 1)) 92 | ``` 93 | 94 | Below are the code to process NIST EI library assuming that we have all files stored 95 | in the *MS_libraries* folder in the D disk. You may need to change the path 96 | accordingly. **Note**: Since the *.MOL folder contains a large number of mol files, 97 | it will be time-consuming to move, copy, or delete this folder. As such, we 98 | recommend pick up a good location to put this file when you are exporting in 99 | Lib2NIST, avoiding moving it. 100 | ```{r, eval = FALSE} 101 | # Read the msp file into R. 102 | nist_ei <- read_lib("D:/MS_libraries/NIST.MSP", type = "EI") 103 | # Combine all mol files into a single sdf file for subsequent structure retrieval. 104 | combine_mol2sdf("D:/MS_libraries/NIST.MOL", "D:/MS_libraries/nist.sdf") 105 | # Extract structure based on the sdf file exported before. 106 | nist_ei_structure <- extract_structure("D:/MS_libraries/nist.sdf", "D:/MS_libraries/nist_structure.txt") 107 | # Assign SMILES to the library. If you are working with Linux-based or Mac OS, 108 | # please use "match = "inchikey". 109 | nist_ei <- assign_smiles(nist_ei, nist_ei_structure, match = "name") 110 | ``` 111 | 112 | In case you want to expand your library by adding publicly available libraries, 113 | continue with the instructions for each library. Otherwise, jump to the "combine 114 | all libraries" section to add RI values accordingly. 115 | 116 | ### RIKEN EI library 117 | The MS-DIAL developers have compiled an EI library with Kovat RI included. 118 | This library can be downloaded from http://prime.psc.riken.jp/compms/msdial/main.html#MSP. 119 | Please download "All records with Kovats RI...EI-MS..." As it contains Kovats RI, 120 | we can set *remove_ri* to **FALSE** to keep original RI in this file. This file 121 | already have SMILES and InChIKey well-organized. Hence, no further treatment is 122 | needed. 123 | ```{r, } 124 | riken_ei <- read_lib("D:/MS_libraries/GCMS DB-Public-KovatsRI-VS3.msp", 125 | type = "EI", remove_ri = FALSE) 126 | ``` 127 | 128 | ### MoNA EI library 129 | The MassBank of North America (MoNA) has an EI library available for download 130 | as well, https://mona.fiehnlab.ucdavis.edu/downloads. Please download 131 | "GC-MS Spectra" in "MSP" form. This file has SMILES information though, it is in 132 | the *Comment* field. Therefore, the SMILES has to be extracted from the *Comment* 133 | and put into the *SMILES* field by the *reorganize_mona* function. 134 | ```{r} 135 | mona_ei <- read_lib("D:/MS_libraries/MoNA-export-GC-MS_Spectra.msp", type = "EI") 136 | mona_ei <- reorganize_mona(mona_ei) 137 | ``` 138 | 139 | ### SWGDRUG EI library 140 | The Scientific Working Group for the Analysis of Seized Drugs (SWGDRUG) has 141 | complied an EI library with drug or drug-related compounds. The library is 142 | available in https://swgdrug.org/ms.html. The library is available in multiple 143 | formats. To correctly parse this library by mspcompiler, please download both 144 | **NIST Format** and **Agilent Format**. Then use *Lib2NIST* to convert the 145 | **NIST Format** into a *msp* file and to transform the **Agilent Format** into 146 | the *mol* files. Finally, we can read it in a way similar to that used for the 147 | NIST library. 148 | ```{r} 149 | swgdrug_ei <- read_lib("D:/MS_libraries/SWGDRUG.MSP", type = "EI") 150 | combine_mol2sdf("D:/MS_libraries/SWGDRUG.MOL", "D:/MS_libraries/swgdrug.sdf") 151 | swgdrug_ei_structure <- extract_structure("D:/MS_libraries/swgdrug.sdf", "D:/MS_libraries/swgdrug_structure.txt") 152 | # As the SWGDRUG file does not contain InChIKey information, even though you 153 | # are working with Linux-based or Mac OS, you should not use "match = inchikey". 154 | # "match = "name" is more than enough in this case. 155 | swgdrug_ei <- assign_smiles(swgdrug_ei, swgdrug_ei_structure, match = "name") 156 | ``` 157 | 158 | ### Combine all libraries 159 | After read in and organize all these libraries, we can now combine them into a 160 | single file, assign experimental RI retrieved from the "ri.dat" and "USER.DBU" files 161 | (if you have NIST library installed). 162 | ```{r} 163 | # Combine them 164 | combine_ei <- c(nist_ei, riken_ei, mona_ei, swgdrug_ei) 165 | 166 | # Extract experimental RI from the "ri.dat" and "USER.DBU" files. Once you have 167 | # NIST library installed, these files can be found in, for example, 168 | # "~/Programs/nist14/mssearch/nist_ri". Assuming you have copied these two files 169 | # in the D:/MS_libraries folder, then you will have: 170 | nist_ri <- extract_ri("D:/MS_libraries/ri.dat", "D:/MS_libraries/USER.DBU") 171 | # Assign experimental RI to the combined library depending on the column 172 | # polarity. The polarity can be "semi-polar", "non-polar", or "polar". 173 | # Providing that "capillary" GC columns are commonly used. This 174 | # function will only keep RI records from "capillary" columns and "Lee RI" 175 | # will be removed. When there are multiple records for a single compound, 176 | # the median RI will be used and if the standard deviation is higher than 30, 177 | # this value will be discarded. 178 | combine_ei <- assign_ri(combine_ei, nist_ri, polarity = "semi-polar") 179 | # At the end, disable parallel computing 180 | plan(sequential) 181 | # Then you can write it out 182 | write_EI_msp(combine_ei, "D:MS_libraries/combine_ei.msp") 183 | ``` 184 | 185 | ## MS2 libraries 186 | ```{r} 187 | library(mspcompiler) 188 | library(future) 189 | library(future.apply) 190 | library(parallel) 191 | plan(multisession(workers = detectCores() - 1)) 192 | ``` 193 | MS2 libraries can be processed in a similar way, but positive and negative modes 194 | are normally separated into 2 msp files. 195 | #### NIST MS2 library 196 | The NIST MS2 library can be treated as the same as the NIST EI library detailed 197 | above with only one exception. That is the input file for *Lib2NIST* can be 198 | found in "C:/Programs/nist14/mssearch/nist_msms". The exported msp file has both 199 | positive and negative modes mixed in a singled file, so we have to separated 200 | them by the separate_polarity function. 201 | ```{r} 202 | nist_ms2 <- read_lib("D:/MS_libraries/NIST_msms.MSP", type = "MS2") 203 | combine_mol2sdf("D:/MS_libraries/NIST_msms.MOL", "D:/MS_libraries/nist_msms.sdf") 204 | nist_ms2_structure <- extract_structure("D:/MS_libraries/nist_msms.sdf", "D:/MS_libraries/nist_msms_structure.txt") 205 | nist_ms2 <- assign_smiles(nist_ms2, nist_ms2_structure, match = "name") 206 | nist_ms2_pos <- separate_polarity(nist_ms2, polarity = "pos") 207 | nist_ms2_neg <- separate_polarity(nist_ms2, polarity = "neg") 208 | ``` 209 | 210 | ### RIKEN MS2 libraries 211 | The RIKEN MS2 libraries can be download from the MS-DIAL homepage http://prime.psc.riken.jp/compms/msdial/main.html#MSP. Please download all 212 | public MS/MS positive and negative, separately. 213 | ```{r} 214 | riken_ms2_pos <- read_lib("D:/MS_libraries/MSMS-Public-Pos-VS15.msp") 215 | riken_ms2_neg <- read_lib("D:/MS_libraries/MSMS-Public-Neg-VS15.msp") 216 | ``` 217 | 218 | ### MoNA MS2 libraries 219 | The MoNA MS2 libraries can be downloaded from https://mona.fiehnlab.ucdavis.edu/downloads. 220 | Please download "LC-MS/MS Positive Mode" and "LC-MS/MS Negative Mode", separately. 221 | ```{r} 222 | mona_ms2_pos <- read_lib("D:/MS_libraries/MoNA-export-LC-MS-MS_Positive_Mode.msp") 223 | # Reorganize the SMILES field. 224 | mona_ms2_pos <- reorganize_mona(mona_ms2_pos) 225 | 226 | mona_ms2_neg <- read_lib("D:/MS_libraries/MoNA-export-LC-MS-MS_Negitive_Mode.msp") 227 | mona_ms2_neg <- reorganize_mona(mona_ms2_neg) 228 | ``` 229 | 230 | ### GNPS MS2 library 231 | Unlike others, the GNPS library is organized in mgf format, so it has to be 232 | treated differently. Hence, we have to set *format = "mgf"* in the *read_lib* 233 | function. Besides, this library does not have the *Molecular Formula* (MF) field, 234 | so we can calculated the MF from the SMILES (if it exists) by the *complete_mgf* 235 | function. Finally, both positive and negative modes are in a single file as well. 236 | Therefore, we need to separated the polarity by the *separate_polarity* function 237 | as well. The GNPS library can be download from https://gnps.ucsd.edu/ProteoSAFe/libraries.jsp. 238 | Please download "All GNPS Library Spectra". 239 | ```{r} 240 | gnps <- read_lib("D:/MS_libraries/ALL_GNPS.mgf", format = "mgf") 241 | # Compute MF 242 | gnps <- complete_mgf(gnps) 243 | gnps_pos <- separate_polarity(gnps, polarity = "pos") 244 | gnps_nge <- separate_polarity(gnps, polarity = "neg") 245 | ``` 246 | 247 | Now we have all MS2 libraries well-organized, so we can combine them. 248 | ```{r} 249 | combine_ms2_pos <- c(nist_ms2_pos, riken_ms2_pos, mona_ms2_pos, gnps_pos) 250 | combine_ms2_neg <- c(nist_ms2_neg, riken_ms2_neg, mona_ms2_neg, gnps_nge) 251 | # Disable parallel computing 252 | plan(sequential) 253 | # Then you can write them out separately 254 | write_MS2_msp(combine_ms2_pos, "D:MS_libraries/combine_ms2_pos.msp") 255 | write_MS2_msp(combine_ms2_neg, "D:MS_libraries/combine_ms2_neg.msp") 256 | ``` 257 | 258 | ### Others 259 | When you have multiple libraries to be read in, for instance if you are building 260 | your in-house library and you have one msp file for each batch of standards, 261 | then you will have many msp files to combine. The read_multilibs function give 262 | you an easy way to read all of them at once. In this case, what you need to input 263 | is the folder that contain all these msp files, say the **in_house** folder. 264 | ```{r} 265 | in_house <- read_multilibs("D:/MS_libraries/in_house") 266 | ``` 267 | 268 | This package offers ways to remove RI and RT as well. You can use *remove_ri* and 269 | *remove_rt* functions, respectively. For in-house libraries, you might want to 270 | change meta data, e.g., comment (both EI and MS2 libraries, to add principle 271 | investigator and data collector for example), collision energy and instrument type 272 | (for MS2 libraries). You can use the *change_meta* function for this purpose. 273 | 274 | **Note**: 275 | 276 | 1. The RIKEN, MoNA, and GNPS MS2 libraries might contains some 277 | identical spectra as they all compile some well-known libraries. However, it is 278 | tricky to separate them. This might increase the size of the final msp file, but 279 | should not affect its use. 280 | 281 | 2. It takes a long time to process the NIST libraries. For a given lab, it may 282 | have a particular version of NIST and may not change frequently. For this reason, 283 | we recommend that once you have NIST libraries organized, save them as .Rda file 284 | and reuse it the next time. For others, you can check if there is any update 285 | since your last compilation. 286 | 3. All the aforementioned tandem mass spectral libraries are relatively big and 287 | would consuming rather high amount of memory once they are read into R. If your 288 | PC does not have enough memory, you can process each library separately, write it 289 | out as single msp file, and then combine them in a text editor, e.g., Notepad++ 290 | as the msp file is basically a text file. 291 | 292 | 293 | ## Acknowledgement 294 | Many thanks to Miao YU, the author of the enviGCMS R package, for his help to 295 | the read_lib function. I would also like to thank Dmitriy D. Matyushin for his 296 | hints to extract RI information from the NIST library. 297 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # mspcompiler 5 | 6 | 7 | 8 | 9 | The goal of mspcompiler is to offer ways to compile either EI or tandem 10 | mass spectral libraries from various sources, such as NIST (if you have 11 | it installed), MoNA, and GPNS, and organize them into a neat and 12 | up-to-date msp file that can be used in MS-DIAL. 13 | 14 | ## Installation 15 | 16 | Install by trying the following code. 17 | 18 | ``` r 19 | install.packages("devtools") 20 | devtools::install_github("QizhiSu/mspcompiler", build_vignettes = TRUE) 21 | ``` 22 | 23 | If ChemineR and ChemineOB fail, please try: 24 | 25 | ``` r 26 | if (!requireNamespace("BiocManager", quietly = TRUE)) 27 | install.packages("BiocManager") 28 | 29 | BiocManager::install("ChemmineR") 30 | 31 | if (!requireNamespace("BiocManager", quietly = TRUE)) 32 | install.packages("BiocManager") 33 | 34 | BiocManager::install("ChemmineOB") 35 | ``` 36 | 37 | ## Usage 38 | 39 | ## EI libraries 40 | 41 | ### NIST EI library 42 | 43 | NIST is the most commonly used **commercial** EI library. Once you have 44 | the NIST library installed, you can transformed it into a msp file by 45 | *Lib2NIST*. Normally *Lib2NIST* will be installed along with the NIST 46 | library installation. If not, you can download it from 47 | . Please 48 | use the following settings in *Lib2NIST*: 49 | 50 | 1. *Add Input Libraries/Files*. For Agilent users, the input file can 51 | be found in, for example, “C:/database/NIST14.L”. 52 | 53 | 2. Tick *Use Subset* and click *Define Subset* to set detail 54 | parameters: 55 | 56 | - Enter which spectra should be exported in *List of Spectra IDs*. 57 | 58 |
59 | inside Define Subset in Lib2NIST Programe 60 |
61 | 62 | - The total number of spectra that your NIST library have can be 63 | checked in the *MS Search* program (Options -> Libraries 64 | -> Add both *mainlib* and *relib*). Then you can see the 65 | number of spectra your NIST library have. 66 | 67 |
68 | of checking total number of spectra in the MS Search Program 69 |
70 | 71 | 3. Select “Text File(.MSP) + MOLfiles linked by BOTH” in *Output 72 | Format*; 73 | 74 | 4. Select the library in *Input Libraries or Text Files* and *Convert*. 75 | of Lib2NIST 76 | 77 | Once you have the \*.MSP file (normally hundreds megabytes) and the 78 | correspondent \*.MOL folder (hundreds thousands .MOL files inside the 79 | folder) exported, you can use the following code to add *SMILES* and 80 | *Retention Index (RI)*. 81 | 82 | The whole process is time-consuming (several hours, depending on the 83 | capability of your PC), so we suggests to use parallel computing. 84 | 85 | ``` r 86 | library(mspcompiler) 87 | library(future) 88 | library(future.apply) 89 | library(parallel) 90 | # Set up parallel computing. Just remember to set it back once you have the 91 | # library compiled by "plan(sequential)". We will include it later. 92 | plan(multisession(workers = detectCores() - 1)) 93 | ``` 94 | 95 | Below are the code to process NIST EI library assuming that we have all 96 | files stored in the *MS\_libraries* folder in the D disk. You may need 97 | to change the path accordingly. **Note**: Since the \*.MOL folder 98 | contains a large number of mol files, it will be time-consuming to move, 99 | copy, or delete this folder. As such, we recommend pick up a good 100 | location to put this file when you are exporting in Lib2NIST, avoiding 101 | moving it. 102 | 103 | ``` r 104 | # Read the msp file into R. 105 | nist_ei <- read_lib("D:/MS_libraries/NIST.MSP", type = "EI") 106 | # Combine all mol files into a single sdf file for subsequent structure retrieval. 107 | combine_mol2sdf("D:/MS_libraries/NIST.MOL", "D:/MS_libraries/nist.sdf") 108 | # Extract structure based on the sdf file exported before. 109 | nist_ei_structure <- extract_structure("D:/MS_libraries/nist.sdf", "D:/MS_libraries/nist_structure.txt") 110 | # Assign SMILES to the library. If you are working with Linux-based or Mac OS, 111 | # please use "match = "inchikey". 112 | nist_ei <- assign_smiles(nist_ei, nist_ei_structure, match = "name") 113 | ``` 114 | 115 | In case you want to expand your library by adding publicly available 116 | libraries, continue with the instructions for each library. Otherwise, 117 | jump to the “combine all libraries” section to add RI values 118 | accordingly. 119 | 120 | ### RIKEN EI library 121 | 122 | The MS-DIAL developers have compiled an EI library with Kovat RI 123 | included. This library can be downloaded from 124 | . Please download 125 | “All records with Kovats RI…EI-MS…” As it contains Kovats RI, we can set 126 | *remove\_ri* to **FALSE** to keep original RI in this file. This file 127 | already have SMILES and InChIKey well-organized. Hence, no further 128 | treatment is needed. 129 | 130 | ``` r 131 | riken_ei <- read_lib("D:/MS_libraries/GCMS DB-Public-KovatsRI-VS3.msp", 132 | type = "EI", remove_ri = FALSE) 133 | ``` 134 | 135 | ### MoNA EI library 136 | 137 | The MassBank of North America (MoNA) has an EI library available for 138 | download as well, . Please 139 | download “GC-MS Spectra” in “MSP” form. This file has SMILES information 140 | though, it is in the *Comment* field. Therefore, the SMILES has to be 141 | extracted from the *Comment* and put into the *SMILES* field by the 142 | *reorganize\_mona* function. 143 | 144 | ``` r 145 | mona_ei <- read_lib("D:/MS_libraries/MoNA-export-GC-MS_Spectra.msp", type = "EI") 146 | mona_ei <- reorganize_mona(mona_ei) 147 | ``` 148 | 149 | ### SWGDRUG EI library 150 | 151 | The Scientific Working Group for the Analysis of Seized Drugs (SWGDRUG) 152 | has complied an EI library with drug or drug-related compounds. The 153 | library is available in . The library is 154 | available in multiple formats. To correctly parse this library by 155 | mspcompiler, please download both **NIST Format** and **Agilent 156 | Format**. Then use *Lib2NIST* to convert the **NIST Format** into a 157 | *msp* file and to transform the **Agilent Format** into the *mol* files. 158 | Finally, we can read it in a way similar to that used for the NIST 159 | library. 160 | 161 | ``` r 162 | swgdrug_ei <- read_lib("D:/MS_libraries/SWGDRUG.MSP", type = "EI") 163 | combine_mol2sdf("D:/MS_libraries/SWGDRUG.MOL", "D:/MS_libraries/swgdrug.sdf") 164 | swgdrug_ei_structure <- extract_structure("D:/MS_libraries/swgdrug.sdf", "D:/MS_libraries/swgdrug_structure.txt") 165 | # As the SWGDRUG file does not contain InChIKey information, even though you 166 | # are working with Linux-based or Mac OS, you should not use "match = inchikey". 167 | # "match = "name" is more than enough in this case. 168 | swgdrug_ei <- assign_smiles(swgdrug_ei, swgdrug_ei_structure, match = "name") 169 | ``` 170 | 171 | ### Combine all libraries 172 | 173 | After read in and organize all these libraries, we can now combine them 174 | into a single file, assign experimental RI retrieved from the “ri.dat” 175 | and “USER.DBU” files (if you have NIST library installed). 176 | 177 | ``` r 178 | # Combine them 179 | combine_ei <- c(nist_ei, riken_ei, mona_ei, swgdrug_ei) 180 | 181 | # Extract experimental RI from the "ri.dat" and "USER.DBU" files. Once you have 182 | # NIST library installed, these files can be found in, for example, 183 | # "~/Programs/nist14/mssearch/nist_ri". Assuming you have copied these two files 184 | # in the D:/MS_libraries folder, then you will have: 185 | nist_ri <- extract_ri("D:/MS_libraries/ri.dat", "D:/MS_libraries/USER.DBU") 186 | # Assign experimental RI to the combined library depending on the column 187 | # polarity. The polarity can be "semi-polar", "non-polar", or "polar". 188 | # Providing that "capillary" GC columns are commonly used. This 189 | # function will only keep RI records from "capillary" columns and "Lee RI" 190 | # will be removed. When there are multiple records for a single compound, 191 | # the median RI will be used and if the standard deviation is higher than 30, 192 | # this value will be discarded. 193 | combine_ei <- assign_ri(combine_ei, nist_ri, polarity = "semi-polar") 194 | # At the end, disable parallel computing 195 | plan(sequential) 196 | # Then you can write it out 197 | write_EI_msp(combine_ei, "D:MS_libraries/combine_ei.msp") 198 | ``` 199 | 200 | ## MS2 libraries 201 | 202 | ``` r 203 | library(mspcompiler) 204 | library(future) 205 | library(future.apply) 206 | library(parallel) 207 | plan(multisession(workers = detectCores() - 1)) 208 | ``` 209 | 210 | MS2 libraries can be processed in a similar way, but positive and 211 | negative modes are normally separated into 2 msp files. \#\#\#\# NIST 212 | MS2 library The NIST MS2 library can be treated as the same as the NIST 213 | EI library detailed above with only one exception. That is the input 214 | file for *Lib2NIST* can be found in 215 | “C:/Programs/nist14/mssearch/nist\_msms”. The exported msp file has both 216 | positive and negative modes mixed in a singled file, so we have to 217 | separated them by the separate\_polarity function. 218 | 219 | ``` r 220 | nist_ms2 <- read_lib("D:/MS_libraries/NIST_msms.MSP", type = "MS2") 221 | combine_mol2sdf("D:/MS_libraries/NIST_msms.MOL", "D:/MS_libraries/nist_msms.sdf") 222 | nist_ms2_structure <- extract_structure("D:/MS_libraries/nist_msms.sdf", "D:/MS_libraries/nist_msms_structure.txt") 223 | nist_ms2 <- assign_smiles(nist_ms2, nist_ms2_structure, match = "name") 224 | nist_ms2_pos <- separate_polarity(nist_ms2, polarity = "pos") 225 | nist_ms2_neg <- separate_polarity(nist_ms2, polarity = "neg") 226 | ``` 227 | 228 | ### RIKEN MS2 libraries 229 | 230 | The RIKEN MS2 libraries can be download from the MS-DIAL homepage 231 | . Please download 232 | all public MS/MS positive and negative, separately. 233 | 234 | ``` r 235 | riken_ms2_pos <- read_lib("D:/MS_libraries/MSMS-Public-Pos-VS15.msp") 236 | riken_ms2_neg <- read_lib("D:/MS_libraries/MSMS-Public-Neg-VS15.msp") 237 | ``` 238 | 239 | ### MoNA MS2 libraries 240 | 241 | The MoNA MS2 libraries can be downloaded from 242 | . Please download “LC-MS/MS 243 | Positive Mode” and “LC-MS/MS Negative Mode”, separately. 244 | 245 | ``` r 246 | mona_ms2_pos <- read_lib("D:/MS_libraries/MoNA-export-LC-MS-MS_Positive_Mode.msp") 247 | # Reorganize the SMILES field. 248 | mona_ms2_pos <- reorganize_mona(mona_ms2_pos) 249 | 250 | mona_ms2_neg <- read_lib("D:/MS_libraries/MoNA-export-LC-MS-MS_Negitive_Mode.msp") 251 | mona_ms2_neg <- reorganize_mona(mona_ms2_neg) 252 | ``` 253 | 254 | ### GNPS MS2 library 255 | 256 | Unlike others, the GNPS library is organized in mgf format, so it has to 257 | be treated differently. Hence, we have to set *format = “mgf”* in the 258 | *read\_lib* function. Besides, this library does not have the *Molecular 259 | Formula* (MF) field, so we can calculated the MF from the SMILES (if it 260 | exists) by the *complete\_mgf* function. Finally, both positive and 261 | negative modes are in a single file as well. Therefore, we need to 262 | separated the polarity by the *separate\_polarity* function as well. The 263 | GNPS library can be download from 264 | . Please download “All 265 | GNPS Library Spectra”. 266 | 267 | ``` r 268 | gnps <- read_lib("D:/MS_libraries/ALL_GNPS.mgf", format = "mgf") 269 | # Compute MF 270 | gnps <- complete_mgf(gnps) 271 | gnps_pos <- separate_polarity(gnps, polarity = "pos") 272 | gnps_nge <- separate_polarity(gnps, polarity = "neg") 273 | ``` 274 | 275 | Now we have all MS2 libraries well-organized, so we can combine them. 276 | 277 | ``` r 278 | combine_ms2_pos <- c(nist_ms2_pos, riken_ms2_pos, mona_ms2_pos, gnps_pos) 279 | combine_ms2_neg <- c(nist_ms2_neg, riken_ms2_neg, mona_ms2_neg, gnps_nge) 280 | # Disable parallel computing 281 | plan(sequential) 282 | # Then you can write them out separately 283 | write_MS2_msp(combine_ms2_pos, "D:MS_libraries/combine_ms2_pos.msp") 284 | write_MS2_msp(combine_ms2_neg, "D:MS_libraries/combine_ms2_neg.msp") 285 | ``` 286 | 287 | ### Others 288 | 289 | When you have multiple libraries to be read in, for instance if you are 290 | building your in-house library and you have one msp file for each batch 291 | of standards, then you will have many msp files to combine. The 292 | read\_multilibs function give you an easy way to read all of them at 293 | once. In this case, what you need to input is the folder that contain 294 | all these msp files, say the **in\_house** folder. 295 | 296 | ``` r 297 | in_house <- read_multilibs("D:/MS_libraries/in_house") 298 | ``` 299 | 300 | This package offers ways to remove RI and RT as well. You can use 301 | *remove\_ri* and *remove\_rt* functions, respectively. For in-house 302 | libraries, you might want to change meta data, e.g., comment (both EI 303 | and MS2 libraries, to add principle investigator and data collector for 304 | example), collision energy and instrument type (for MS2 libraries). You 305 | can use the *change\_meta* function for this purpose. 306 | 307 | **Note**: 308 | 309 | 1. The RIKEN, MoNA, and GNPS MS2 libraries might contains some 310 | identical spectra as they all compile some well-known libraries. 311 | However, it is tricky to separate them. This might increase the size 312 | of the final msp file, but should not affect its use. 313 | 314 | 2. It takes a long time to process the NIST libraries. For a given lab, 315 | it may have a particular version of NIST and may not change 316 | frequently. For this reason, we recommend that once you have NIST 317 | libraries organized, save them as .Rda file and reuse it the next 318 | time. For others, you can check if there is any update since your 319 | last compilation. 320 | 321 | 3. All the aforementioned tandem mass spectral libraries are relatively 322 | big and would consuming rather high amount of memory once they are 323 | read into R. If your PC does not have enough memory, you can process 324 | each library separately, write it out as single msp file, and then 325 | combine them in a text editor, e.g., Notepad++ as the msp file is 326 | basically a text file. 327 | 328 | ## Acknowledgement 329 | 330 | Many thanks to Miao YU, the author of the enviGCMS R package, for his 331 | help to the read\_lib function. I would also like to thank Dmitriy D. 332 | Matyushin for his hints to extract RI information from the NIST library. 333 | -------------------------------------------------------------------------------- /R/read_libraries.R: -------------------------------------------------------------------------------- 1 | #' Read msp/mgf mass spectral libraries 2 | #' 3 | #' \code{read_lib} offers a way to read mass spectral libraries into R 4 | #' for further processing. 5 | #' 6 | #' This is a generic function to read either EI or MS2 mass spectral libraries. 7 | #' The library can be either in \code{msp} or \code{mgf} form. For this reason, 8 | #' it is required to set the format and the type of the input library. The 9 | #' default is \code{MS2} in \code{msp} format. In the case of EI mass spectral 10 | #' library, an additional Boolean parameter \code{remove_ri} can be set to 11 | #' remove or keep the retention index (RI). In the case of MS2 mass spectral 12 | #' library, an additional Boolean parameter \code{remove_rt} can be set to 13 | #' remove or keep the retention time (RT). This function supports parallel 14 | #' computing making use of the \pkg{future.apply}. Please see the vignette 15 | #' for more details. 16 | #' 17 | #' @param file Mass spectral library in \code{msp} or \code{mgf} format. 18 | #' @param format The format of the library, either \code{msp} or \code{mgf}. 19 | #' @param type The type of the library, either \code{EI} or \code{MS2}. 20 | #' @param remove_ri A logical scalar only used in case of EI mass spectral 21 | #' library. Should retention index (RI) be removed? \code{TRUE} or \code{FALSE} 22 | #' @param remove_rt A logical scalar only used in case of MS2 mass spectral 23 | #' library. Should retention time (RT) be removed? \code{TRUE} or \code{FALSE} 24 | #' 25 | #' @return A \code{list} with each spectral entry as a list element for further 26 | #' processing. 27 | #' 28 | #' @import stringr 29 | #' @import future.apply 30 | #' 31 | #' @export 32 | #' 33 | #' @examples 34 | #' # The first 2 lines only indicate the location where the example files are 35 | #' # stored. You might not need them. 36 | #' EI_file <- system.file("EI.msp", package = "mspcompiler") 37 | #' MS2_mgf_file <- system.file("MS2.mgf", package = "mspcompiler") 38 | #' 39 | #' EI <- read_lib(file = EI_file, format = "msp", type = "EI", remove_ri = FALSE) 40 | #' MS2_mgf <- read_lib(file = MS2_mgf_file, format = "mgf", type = "MS2") 41 | read_lib <- 42 | function(file, format = "msp", type = "MS2", 43 | remove_ri = TRUE, remove_rt = TRUE) { 44 | tmp <- readLines(file) 45 | tmp <- gsub("", "", tmp) # fix files that have different encoding 46 | # Individual compounds are recognized differently 47 | # depending on the format 48 | if (format == "msp") { 49 | start_line <- grep("^name:", tmp, ignore.case = TRUE) 50 | } else { 51 | start_line <- grep("^begin ions", tmp, ignore.case = TRUE) 52 | } 53 | num_line <- diff(c(start_line, length(tmp) + 1)) 54 | split_factor <- rep(seq_along(start_line), num_line) 55 | cmp_list <- split(tmp, split_factor) 56 | 57 | # For EI spectral libraries 58 | if (type == "EI") { 59 | get_msp <- function(cmp) { 60 | name <- cmp[grep("^name:", cmp, ignore.case = TRUE)] 61 | name <- gsub("^name: ", "", name, ignore.case = TRUE) 62 | smiles <- cmp[grep("^smiles:", cmp, ignore.case = TRUE)] 63 | smiles <- gsub("^smiles: ", "", smiles, ignore.case = TRUE) 64 | inchikey <- cmp[grep("^inchikey:", cmp, ignore.case = TRUE)] 65 | inchikey <- gsub("inchikey: ", "", inchikey, ignore.case = TRUE) 66 | formula <- cmp[grep("^formula:", cmp, ignore.case = TRUE)] 67 | formula <- gsub("^formula: ", "", formula, ignore.case = TRUE) 68 | mw <- cmp[grep("^mw:", cmp, ignore.case = TRUE)] 69 | mw <- gsub("^mw: ", "", mw, ignore.case = TRUE) 70 | comment <- cmp[grep("^comments?:", cmp, ignore.case = TRUE)] 71 | comment <- gsub("^comments?: ", "", comment, ignore.case = TRUE) 72 | if (remove_ri) { 73 | RI <- NA # nolint: object_name_linter. 74 | } else { 75 | RI <- cmp[grep("^retention[ _]index:|^RI:", # nolint 76 | cmp, 77 | ignore.case = TRUE 78 | )] # nolint: object_name_linter. 79 | RI <- gsub("^retention[ _]index: |^RI: ", "", # nolint 80 | RI, 81 | ignore.case = TRUE 82 | ) # nolint: object_name_linter. 83 | RI <- round(as.numeric(RI)) # nolint: object_name_linter. 84 | } 85 | # Dealing with the spectrum 86 | peak_numbers <- cmp[grep("^num peaks:", cmp, ignore.case = TRUE)] 87 | peak_number <- gsub("^num peaks: ?", "", peak_numbers, ignore.case = TRUE) 88 | # matrix of masses and intensities 89 | if (as.numeric(peak_number) > 0) { 90 | # Determine position of mass intensity pairs 91 | mass_inten_posi <- which(grepl("^[0-9]", cmp) & !grepl(": ", cmp)) 92 | # Turn mass intensity pairs to a numeric vector 93 | mass_inten <- str_remove_all(cmp[mass_inten_posi], "\n") 94 | mass_inten <- str_remove(mass_inten, '".*"$') 95 | mass_inten <- unlist(strsplit(mass_inten, "\t| ")) 96 | mass_inten <- 97 | as.numeric(mass_inten[grep("^[0-9].*[0-9]$|^[0-9]$", mass_inten)]) 98 | # Extract mz and intensity 99 | mz <- mass_inten[seq(1, length(mass_inten), 2)] 100 | intensity <- mass_inten[seq(2, length(mass_inten), 2)] 101 | spectra <- cbind.data.frame(mz = mz, ins = intensity) 102 | return( 103 | list( 104 | Name = name, 105 | InChIKey = inchikey, 106 | Smiles = smiles, 107 | Formula = formula, 108 | "Molecular weight" = mw, 109 | RI = RI, 110 | Comment = comment, 111 | "Number of peaks" = peak_number, 112 | Spectra = spectra 113 | ) 114 | ) 115 | } else { 116 | return( 117 | list( 118 | Name = name, 119 | InChIKey = inchikey, 120 | Smiles = smiles, 121 | "Molecular weight" = mw, 122 | RI = RI, 123 | Comment = comment, 124 | Formula = formula, 125 | "Number of peaks" = peak_number 126 | ) 127 | ) 128 | } 129 | } 130 | # For tandem spectral libraries, treatment varies depending on the format 131 | # For msp format 132 | } else { 133 | get_msp <- function(cmp) { 134 | if (format == "msp") { 135 | name <- cmp[grep("^name:", cmp, ignore.case = TRUE)] 136 | name <- gsub("^name: ", "", name, ignore.case = TRUE) 137 | smiles <- cmp[grep("^smiles:", cmp, ignore.case = TRUE)] 138 | smiles <- gsub("^smiles: ", "", smiles, ignore.case = TRUE) 139 | inchikey <- cmp[grep("^inchikey:", cmp, ignore.case = TRUE)] 140 | inchikey <- gsub("inchikey: ", "", inchikey, ignore.case = TRUE) 141 | formula <- cmp[grep("^formula:", cmp, ignore.case = TRUE)] 142 | formula <- gsub("^formula: ", "", formula, ignore.case = TRUE) 143 | precursor_ion <- cmp[grep("^precursormz:", cmp, ignore.case = TRUE)] 144 | precursor_ion <- gsub("^precursormz: ", "", 145 | precursor_ion, 146 | ignore.case = TRUE 147 | ) 148 | precursor_type <- cmp[grep("^precursor_?type:", 149 | cmp, 150 | ignore.case = TRUE 151 | )] 152 | precursor_type <- gsub("^precursor_?type: ", "", 153 | precursor_type, 154 | ignore.case = TRUE 155 | ) 156 | ion_mode <- cmp[grep("ion_?mode:", cmp, ignore.case = TRUE)] 157 | ion_mode <- gsub("ion_?mode: ", "", ion_mode, ignore.case = TRUE) 158 | if (remove_rt) { 159 | retention_time <- NA 160 | } else { 161 | retention_time <- cmp[grep("^retention_?time:", 162 | cmp, 163 | ignore.case = TRUE 164 | )] 165 | retention_time <- gsub("^retention_?time: ", "", 166 | retention_time, 167 | ignore.case = TRUE 168 | ) 169 | } 170 | ccs <- cmp[grep("^ccs:", cmp, ignore.case = TRUE)] 171 | ccs <- gsub("^ccs: ", "", ccs, ignore.case = TRUE) 172 | collision_energy <- cmp[grep("^collision_?energy:", 173 | cmp, 174 | ignore.case = TRUE 175 | )] 176 | collision_energy <- gsub("^collision_?energy: ", "", 177 | collision_energy, 178 | ignore.case = TRUE 179 | ) 180 | instrument_type <- cmp[grep("^instrument_?type:", 181 | cmp, 182 | ignore.case = TRUE 183 | )] 184 | instrument_type <- gsub("^instrument_?type: ", "", 185 | instrument_type, 186 | ignore.case = TRUE 187 | ) 188 | comment <- cmp[grep("^comments?:", cmp, ignore.case = TRUE)] 189 | comment <- gsub("^comments?: ", "", comment, ignore.case = TRUE) 190 | peak_numbers <- cmp[grep("^num peaks:", cmp, ignore.case = TRUE)] 191 | peak_number <- gsub("^num peaks: *", "", 192 | peak_numbers, 193 | ignore.case = TRUE 194 | ) 195 | 196 | # For mgf format 197 | } else { 198 | name <- cmp[grep("^name=", cmp, ignore.case = TRUE)] 199 | name <- gsub("^name=", "", name, ignore.case = TRUE) 200 | name <- gsub(" \\[?[0-9]?M(-|\\+).*$", "", name, ignore.case = TRUE) 201 | smiles <- cmp[grep("^smiles=", cmp, ignore.case = TRUE)] 202 | smiles <- trimws(gsub("^smiles=", "", smiles, ignore.case = TRUE)) 203 | inchikey <- cmp[grep("^inchiaux=", cmp, ignore.case = TRUE)] 204 | inchikey <- gsub("^inchiaux=", "", inchikey, ignore.case = TRUE) 205 | formula <- NA 206 | precursor_ion <- cmp[grep("^pepmass=", cmp, ignore.case = TRUE)] 207 | precursor_ion <- gsub("^pepmass=", "", 208 | precursor_ion, 209 | ignore.case = TRUE 210 | ) 211 | ion_mode <- cmp[grep("^ionmode=", cmp, ignore.case = TRUE)] 212 | ion_mode <- gsub("^ionmode=", "", ion_mode, ignore.case = TRUE) 213 | retention_time <- NA 214 | ccs <- NA 215 | collision_energy <- NA 216 | instrument_type <- NA 217 | precursor_type <- str_extract(name, "\\[?[0-9]?M(-|\\+).*$") 218 | if (grepl("\\](-\\+)+$", precursor_type)) { 219 | precursor_type <- precursor_type 220 | } else { 221 | if (grepl("positive", ion_mode, ignore.case = TRUE)) { 222 | if (grepl("\\[", precursor_type)) { 223 | precursor_type <- paste0(precursor_type, "+") 224 | } else { 225 | precursor_type <- paste0("[", precursor_type, "]+") 226 | } 227 | } else { 228 | if (grepl("\\[", precursor_type)) { 229 | precursor_type <- paste0(precursor_type, "-") 230 | } else { 231 | precursor_type <- paste0("[", precursor_type, "]-") 232 | } 233 | } 234 | } 235 | PI <- cmp[grep("PI=", cmp, ignore.case = TRUE)] # nolint: object_name_linter. 236 | collector <- cmp[grep("^datacollector=", cmp, ignore.case = TRUE)] 237 | submit_user <- cmp[grep("^submituser=", cmp, ignore.case = TRUE)] 238 | spectrum_type <- cmp[grep("^mslevel", cmp, ignore.case = TRUE)] 239 | library_quality <- cmp[grep("^libraryquality=", 240 | cmp, 241 | ignore.case = TRUE 242 | )] 243 | comment <- paste(PI, 244 | collector, 245 | submit_user, 246 | spectrum_type, 247 | library_quality, 248 | sep = "; " 249 | ) 250 | peak_number <- length(which(grepl("^[0-9]", cmp))) 251 | } 252 | 253 | # Manipulation of peak matrix are the same for both msp and mgf 254 | if (rlang::is_empty(peak_number)) peak_number <- "0" 255 | if (as.numeric(peak_number) > 0) { 256 | # Determine position of mass intensity pairs 257 | mass_inten_posi <- which(grepl("^[0-9]", cmp) & !grepl(": ", cmp)) 258 | # Turn mass intensity pairs to a numeric vector 259 | mass_inten <- str_remove_all(cmp[mass_inten_posi], "\n") 260 | mass_inten <- str_remove(mass_inten, '".*"$') 261 | mass_inten <- unlist(strsplit(mass_inten, "\t| ")) 262 | mass_inten <- 263 | as.numeric(mass_inten[grep("^[0-9].*[0-9]$|^[0-9]$", mass_inten)]) 264 | # Extract mz and intensity 265 | mz <- mass_inten[seq(1, length(mass_inten), 2)] 266 | intensity <- mass_inten[seq(2, length(mass_inten), 2)] 267 | spectra <- cbind.data.frame(mz = mz, ins = intensity) 268 | return( 269 | list( 270 | Name = name, 271 | PrecursorMZ = precursor_ion, 272 | PrecusorType = precursor_type, 273 | IonMode = ion_mode, 274 | Formula = formula, 275 | Smiles = smiles, 276 | InChIKey = inchikey, 277 | RetentionTime = retention_time, 278 | CCS = ccs, 279 | CollisionEnergy = collision_energy, 280 | InstrumentType = instrument_type, 281 | Comment = comment, 282 | "Number of peaks" = peak_number, 283 | Spectra = spectra 284 | ) 285 | ) 286 | } else { 287 | return( 288 | list( 289 | Name = name, 290 | PrecursorMZ = precursor_ion, 291 | PrecusorType = precursor_type, 292 | IonMode = ion_mode, 293 | Formula = formula, 294 | Smiles = smiles, 295 | InChIKey = inchikey, 296 | RetentionTime = retention_time, 297 | CCS = ccs, 298 | CollisionEnergy = collision_energy, 299 | InstrumentType = instrument_type, 300 | Comment = comment, 301 | "Number of peaks" = peak_number 302 | ) 303 | ) 304 | } 305 | } 306 | } 307 | 308 | # Apply get_msp to the cmp_list using multiple sessions 309 | cmp_list <- future.apply::future_lapply(cmp_list, get_msp) 310 | 311 | # Return the organized list 312 | return(cmp_list) 313 | } 314 | 315 | 316 | #' A wrapper to read multiple msp files at a time 317 | #' 318 | #' \code{read_multilibs} offers a way to read multiple msp files at a time and 319 | #' combine them into a single file. 320 | #' 321 | #' When you are building your in-house libraries, you may probably have multiple 322 | #' msp files at hand (e.g., one msp for one group of compounds). To avoid 323 | #' empolying \code{read_lib} several times, this function provides a way to read 324 | #' all these files at once. 325 | #' 326 | #' @param folder The folder that contains multiple msp files. 327 | #' 328 | #' @return A single \code{list} combining all msp files 329 | #' @export 330 | #' 331 | read_multilibs <- function(folder) { 332 | all_files <- list.files(path = folder, pattern = "*.msp", full.names = TRUE) 333 | do.call(c, lapply(all_files, read_lib)) 334 | } -------------------------------------------------------------------------------- /inst/MS2.mgf: -------------------------------------------------------------------------------- 1 | BEGIN IONS 2 | PEPMASS=981.54 3 | CHARGE=0 4 | MSLEVEL=2 5 | SOURCE_INSTRUMENT=LC-ESI-qTof 6 | FILENAME=130618_Ger_Jenia_WT-3-Des-MCLR_MH981.4-qb.1.1..mgf 7 | SEQ=*..* 8 | IONMODE=Positive 9 | ORGANISM=GNPS-LIBRARY 10 | NAME=3-Des-Microcystein_LR M+H 11 | PI=Gerwick 12 | DATACOLLECTOR=Jenia 13 | SMILES=CC(C)CC1NC(=O)C(C)NC(=O)C(=C)N(C)C(=O)CCC(NC(=O)C(C)C(NC(=O)C(CCCNC(N)=N)NC(=O)C(C)C(NC1=O)C(O)=O)\C=C\C(\C)=C\C(C)C(O)Cc1ccccc1)C(O)=O 14 | INCHI=N/A 15 | INCHIAUX=N/A 16 | PUBMED=N/A 17 | SUBMITUSER=mwang87 18 | LIBRARYQUALITY=1 19 | SPECTRUMID=CCMSLIB00000001547 20 | SCANS=1 21 | 289.286377 8068.0 22 | 295.545288 22507.0 23 | 298.489624 3925.0 24 | 317.324951 18742.0 25 | 319.655945 8604.0 26 | 324.482422 8041.0 27 | 325.316284 9738.0 28 | 339.789429 16145.0 29 | 343.947021 18094.0 30 | 347.020508 13981.0 31 | 347.913391 6765.0 32 | 361.147705 11763.0 33 | 361.84436 24296.0 34 | 364.232727 2346.0 35 | 364.858154 10782.0 36 | 365.845886 10242.0 37 | 368.22168 12761.0 38 | 368.965698 19147.0 39 | 375.069519 15644.0 40 | 375.751953 25393.0 41 | 382.750549 12765.0 42 | 384.197083 17912.0 43 | 390.574219 7993.0 44 | 394.049194 16135.0 45 | 397.106262 13986.0 46 | 404.420715 12326.0 47 | 411.092712 2348.0 48 | 413.784546 8715.0 49 | 427.667358 68137.0 50 | 436.192749 14879.0 51 | 443.266113 44427.0 52 | 446.267273 23472.0 53 | 447.747498 29292.0 54 | 455.25 70939.0 55 | 456.107544 105392.0 56 | 456.822144 3.0 57 | 457.543213 12862.0 58 | 464.285461 8617.0 59 | 469.872314 87594.0 60 | 471.062195 31482.0 61 | 475.257324 15449.0 62 | 476.143616 23143.0 63 | 476.975159 28430.0 64 | 478.891113 27890.0 65 | 479.975952 33235.0 66 | 483.242432 13564.0 67 | 487.210388 32885.0 68 | 488.160156 20786.0 69 | 491.191956 55073.0 70 | 494.279602 7435.0 71 | 495.653992 32208.0 72 | 498.412964 11684.0 73 | 503.028198 30643.0 74 | 503.699951 2.0 75 | 504.344543 36421.0 76 | 505.154541 9667.0 77 | 510.176514 38891.0 78 | 512.168701 10175.0 79 | 513.265381 16524.0 80 | 514.957397 11384.0 81 | 515.922852 78764.0 82 | 520.973389 28857.0 83 | 521.82373 5810.0 84 | 523.168945 58926.0 85 | 529.036865 20722.0 86 | 530.991211 31845.0 87 | 532.376709 3005.0 88 | 534.575195 12906.0 89 | 538.003174 220949.0 90 | 539.217773 272296.0 91 | 540.672852 43876.0 92 | 548.061401 13655.0 93 | 554.117432 76225.0 94 | 556.030396 214421.0 95 | 557.288818 52970.0 96 | 557.996094 6202.0 97 | 559.942261 18112.0 98 | 561.328735 14656.0 99 | 564.123047 25971.0 100 | 564.94873 34630.0 101 | 566.439941 35564.0 102 | 571.33374 61305.0 103 | 572.047363 17235.0 104 | 575.219238 42127.0 105 | 575.888916 6.0 106 | 577.102905 29550.0 107 | 579.645874 7151.0 108 | 580.942627 17609.0 109 | 582.110107 102075.0 110 | 583.458984 10113.0 111 | 585.237061 36774.0 112 | 598.172485 26085.0 113 | 599.352783 764523.0 114 | 600.382812 114267.0 115 | 601.06665 4.0 116 | 602.267578 27144.0 117 | 609.302002 10247.0 118 | 613.415771 8621.0 119 | 622.208984 23787.0 120 | 623.023193 63940.0 121 | 623.991455 19154.0 122 | 625.216187 23050.0 123 | 638.299561 12481.0 124 | 640.265625 17392.0 125 | 641.235107 65873.0 126 | 646.095947 8409.0 127 | 649.2771 5446.0 128 | 651.526611 17521.0 129 | 657.128906 12911.0 130 | 658.094971 14824.0 131 | 659.420898 41969.0 132 | 663.387695 18284.0 133 | 668.33252 65700.0 134 | 669.357178 5671.0 135 | 680.219727 44374.0 136 | 681.987793 24446.0 137 | 685.957764 19166.0 138 | 691.648682 29177.0 139 | 693.22583 33545.0 140 | 694.307861 22539.0 141 | 696.332397 121211.0 142 | 697.127808 9503.0 143 | 709.464478 20171.0 144 | 710.79541 22346.0 145 | 711.744873 32675.0 146 | 714.071777 50487.0 147 | 715.578979 54567.0 148 | 716.216553 8.0 149 | 723.267822 14415.0 150 | 724.081909 88510.0 151 | 725.488892 8470.0 152 | 728.352051 21518.0 153 | 735.806396 52022.0 154 | 738.34668 2697.0 155 | 744.365234 16205.0 156 | 747.456055 19268.0 157 | 753.27124 14114.0 158 | 761.609131 12373.0 159 | 764.462158 19876.0 160 | 765.280029 18361.0 161 | 769.275635 41999.0 162 | 770.328613 16548.0 163 | 771.386353 13776.0 164 | 787.432617 33003.0 165 | 796.139526 9637.0 166 | 797.232788 11322.0 167 | 806.556885 18639.0 168 | 808.442383 6355.0 169 | 811.637329 14687.0 170 | 812.300049 9904.0 171 | 813.149292 15959.0 172 | 817.219238 7640.0 173 | 820.274048 6246.0 174 | 821.288208 15591.0 175 | 823.361328 13693.0 176 | 824.618286 6895.0 177 | 828.517456 32132.0 178 | 830.409302 102583.0 179 | 831.306763 65294.0 180 | 832.105469 9727.0 181 | 833.18457 4115.0 182 | 835.212891 7606.0 183 | 836.078247 8740.0 184 | 838.518677 26160.0 185 | 839.455811 72006.0 186 | 845.612915 21577.0 187 | 847.433594 196462.0 188 | 848.125854 39637.0 189 | 851.380859 246170.0 190 | 852.370605 276882.0 191 | 853.270508 44216.0 192 | 865.5979 44697.0 193 | 866.295654 111012.0 194 | 867.19043 4120.0 195 | 868.372192 78023.0 196 | 869.331177 8584.0 197 | 871.557007 5374.0 198 | 877.137451 30131.0 199 | 880.216553 5692.0 200 | 883.442261 49241.0 201 | 884.216553 8.0 202 | 888.167114 41037.0 203 | 889.282959 24795.0 204 | 892.127808 14925.0 205 | 893.467896 23506.0 206 | 895.607422 13123.0 207 | 899.010132 28633.0 208 | 901.351196 13472.0 209 | 902.325562 3774.0 210 | 909.424438 244136.0 211 | 910.515381 43770.0 212 | 911.526367 15208.0 213 | 914.30896 6532.0 214 | 915.217773 28455.0 215 | 918.666138 5610.0 216 | 919.39624 85829.0 217 | 920.06665 3.0 218 | 921.123901 16163.0 219 | 922.205688 37863.0 220 | 925.063721 43395.0 221 | 931.132812 61732.0 222 | 932.351929 136657.0 223 | 933.524048 25202.0 224 | 935.493286 33896.0 225 | 936.552002 103130.0 226 | 937.588623 67605.0 227 | 938.471069 35379.0 228 | 939.61792 77289.0 229 | 946.257812 38584.0 230 | 949.370239 85420.0 231 | 950.284546 5976.0 232 | 951.551758 29995.0 233 | 953.396606 545281.0 234 | 954.491577 123937.0 235 | 963.686768 261578.0 236 | 964.524658 318164.0 237 | 965.192139 124405.0 238 | 982.221924 27147.0 239 | END IONS 240 | 241 | 242 | BEGIN IONS 243 | PEPMASS=940.25 244 | CHARGE=0 245 | MSLEVEL=2 246 | SOURCE_INSTRUMENT=LC-ESI-qTof 247 | FILENAME=20111105_Anada_Ger_HoiamideB_MH940_qb.1.1..mgf 248 | SEQ=*..* 249 | IONMODE=Positive 250 | ORGANISM=GNPS-LIBRARY 251 | NAME=Hoiamide B M+H 252 | PI=Gerwick 253 | DATACOLLECTOR=Amanda 254 | SMILES=CCC[C@@H](C)[C@@H]([C@H](C)[C@@H]1[C@H]([C@H](Cc2nc(cs2)C3=N[C@](CS3)(C4=N[C@](CS4)(C(=O)N[C@H]([C@H]([C@H](C(=O)O[C@H](C(=O)N[C@H](C(=O)O1)[C@@H](C)O)[C@@H](C)CC)C)O)[C@@H](C)CC)C)C)OC)C)O 255 | INCHI=InChI=1S/C45H73N5O10S3/c1-14-17-24(6)34(52)26(8)37-25(7)30(58-13)18-31-46-29(19-61-31)39-49-45(12,21-62-39)43-50-44(11,20-63-43)42(57)48-32(22(4)15-2)35(53)27(9)40(55)59-36(23(5)16-3)38(54)47-33(28(10)51)41(56)60-37/h19,22-28,30,32-37,51-53H,14-18,20-21H2,1-13H3,(H,47,54)(H,48,57)/t22-,23-,24+,25-,26-,27+,28+,30-,32-,33-,34-,35-,36-,37-,44+,45+/m0/s1 256 | INCHIAUX=N/A 257 | PUBMED=N/A 258 | SUBMITUSER=mwang87 259 | LIBRARYQUALITY=1 260 | SPECTRUMID=CCMSLIB00000001548 261 | SCANS=1 262 | 278.049927 35793.0 263 | 278.957642 47593.0 264 | 281.258667 95495.0 265 | 291.996094 115278.0 266 | 293.827637 91752.0 267 | 299.065979 129040.0 268 | 309.152832 62974.0 269 | 324.236206 31157.0 270 | 334.968811 115627.0 271 | 341.283569 30547.0 272 | 343.978516 94124.0 273 | 353.147217 205611.0 274 | 353.84436 1.0 275 | 359.101166 581387.0 276 | 359.888855 18.0 277 | 368.931641 40369.0 278 | 375.024811 191523.0 279 | 375.664062 39.0 280 | 377.287628 934209.0 281 | 378.192047 295658.0 282 | 380.225586 99848.0 283 | 381.088074 28994.0 284 | 383.194763 126064.0 285 | 384.326294 119935.0 286 | 390.16333 132876.0 287 | 391.216492 152650.0 288 | 397.480957 49394.0 289 | 405.885986 15096.0 290 | 408.151367 50942.0 291 | 408.941162 1521917.0 292 | 410.069397 235028.0 293 | 412.908936 19523.0 294 | 415.317749 40832.0 295 | 416.189331 116396.0 296 | 421.346222 822444.0 297 | 422.236237 406319.0 298 | 425.736572 33271.0 299 | 430.427429 115251.0 300 | 431.030762 152986.0 301 | 436.663208 30867.0 302 | 453.213257 215667.0 303 | 453.853271 2.0 304 | 456.716797 125621.0 305 | 457.249573 718325.0 306 | 458.258514 121013.0 307 | 459.323029 42422.0 308 | 462.352295 53209.0 309 | 468.064392 56113.0 310 | 471.292297 101158.0 311 | 472.837769 17471.0 312 | 475.22522 1066899.0 313 | 476.323944 931888.0 314 | 477.293152 314415.0 315 | 481.310425 20995.0 316 | 486.095825 35999.0 317 | 488.752777 97793.0 318 | 490.151245 189397.0 319 | 491.134399 38134.0 320 | 492.823486 167781.0 321 | 493.837646 123040.0 322 | 499.340332 68929.0 323 | 500.92627 35192.0 324 | 502.819702 346672.0 325 | 503.328186 1789291.0 326 | 503.844055 115544.0 327 | 504.448425 209601.0 328 | 505.521667 210506.0 329 | 506.153992 23586.0 330 | 507.28186 1756109.0 331 | 508.139343 920658.0 332 | 508.715088 5.0 333 | 509.322998 67825.0 334 | 510.159729 125144.0 335 | 511.108826 44963.0 336 | 513.214233 63849.0 337 | 514.097351 157893.0 338 | 515.079712 76460.0 339 | 518.323853 32014.0 340 | 519.041016 240930.0 341 | 521.166443 2474045.0 342 | 522.086304 485376.0 343 | 522.963379 290176.0 344 | 529.374146 106411.0 345 | 530.899231 722904.0 346 | 532.089783 189952.0 347 | 532.834473 71377.0 348 | 534.093506 79577.0 349 | 535.231201 533187.0 350 | 536.079834 205958.0 351 | 536.935913 19616.0 352 | 537.806885 427771.0 353 | 538.545166 14.0 354 | 539.760132 185155.0 355 | 543.283936 115082.0 356 | 545.483398 53069.0 357 | 553.24353 10619710.0 358 | 554.189453 1265687.0 359 | 555.107849 885736.0 360 | 556.199341 35655.0 361 | 557.188904 144709.0 362 | 558.325073 276628.0 363 | 559.441162 38371.0 364 | 563.392944 369187.0 365 | 564.342773 53377.0 366 | 565.162598 153259.0 367 | 567.527832 90530.0 368 | 569.09668 13406.0 369 | 573.235718 244664.0 370 | 573.872803 48.0 371 | 575.213562 846336.0 372 | 576.479736 79230.0 373 | 582.099854 85856.0 374 | 583.982666 34163.0 375 | 584.817871 17642.0 376 | 585.449829 126403.0 377 | 587.480713 30454.0 378 | 589.125 202774.0 379 | 589.857056 3.0 380 | 591.335327 324628.0 381 | 593.54541 59873.0 382 | 599.153564 39495.0 383 | 601.312927 773465.0 384 | 602.344727 103997.0 385 | 605.229919 390697.0 386 | 606.044434 1.0 387 | 607.083252 438106.0 388 | 607.802979 32.0 389 | 611.162537 126605.0 390 | 613.219177 1278729.0 391 | 614.208252 134599.0 392 | 614.943481 32793.0 393 | 616.26123 35333.0 394 | 617.239929 1181920.0 395 | 617.832764 3566.0 396 | 619.109741 611095.0 397 | 620.09021 215720.0 398 | 623.606201 32893.0 399 | 625.371094 162473.0 400 | 629.113037 397263.0 401 | 631.285767 837261.0 402 | 632.427246 426749.0 403 | 633.305969 773961.0 404 | 634.005371 139725.0 405 | 635.159546 104212.0 406 | 638.429932 50215.0 407 | 639.142578 211014.0 408 | 644.970947 126358.0 409 | 645.833496 400437.0 410 | 646.540649 13.0 411 | 647.614746 28622.0 412 | 649.493652 92063.0 413 | 651.267822 2952105.0 414 | 652.501709 948097.0 415 | 653.303345 261489.0 416 | 655.203003 33201.0 417 | 656.197632 63099.0 418 | 657.307983 3027254.0 419 | 658.356934 577368.0 420 | 659.635254 85949.0 421 | 661.488525 480492.0 422 | 662.373413 76094.0 423 | 663.271667 5528330.0 424 | 664.294983 1893201.0 425 | 665.272827 48.0 426 | 671.457947 584357.0 427 | 672.555908 35863.0 428 | 675.252808 3062771.0 429 | 676.276123 1149021.0 430 | 677.002197 57.0 431 | 679.320496 347430.0 432 | 688.130615 336462.0 433 | 689.338928 5507539.0 434 | 690.380371 939865.0 435 | 691.865601 127803.0 436 | 693.103333 441499.0 437 | 694.241089 177434.0 438 | 694.866699 3.0 439 | 706.645874 764079.0 440 | 707.298584 11425631.0 441 | 708.41217 3282238.0 442 | 709.281738 37425.0 443 | 710.297302 104548.0 444 | 711.575684 59794.0 445 | 714.353027 293763.0 446 | 720.310608 463766.0 447 | 723.610229 67559.0 448 | 724.423279 333180.0 449 | 725.338684 2697145.0 450 | 726.246155 477936.0 451 | 727.232544 407357.0 452 | 727.836182 23.0 453 | 731.005493 58569.0 454 | 733.947266 42640.0 455 | 734.489502 322546.0 456 | 735.21875 30.0 457 | 736.155151 161121.0 458 | 738.186523 1302348.0 459 | 738.97168 187994.0 460 | 739.817383 181407.0 461 | 740.513184 61.0 462 | 743.460449 177286.0 463 | 744.158691 26051.0 464 | 746.365784 668916.0 465 | 746.917114 45488.0 466 | 749.86853 33868.0 467 | 752.247192 1273569.0 468 | 753.092773 124216.0 469 | 753.863403 42270.0 470 | 754.550781 53958.0 471 | 756.299866 83498.0 472 | 758.105103 72039.0 473 | 760.394287 285768.0 474 | 761.139648 92506.0 475 | 762.440063 487254.0 476 | 763.821289 118871.0 477 | 764.401733 78746.0 478 | 765.879333 244224.0 479 | 766.875977 103968.0 480 | 768.550537 359217.0 481 | 769.536011 71507.0 482 | 770.24707 6843020.0 483 | 771.345947 1834188.0 484 | 772.112915 291081.0 485 | 772.859253 98762.0 486 | 773.393433 15544.0 487 | 774.196289 89552.0 488 | 775.369934 162814.0 489 | 776.830322 39987.0 490 | 778.437683 568287.0 491 | 779.430786 387544.0 492 | 780.564697 96591.0 493 | 782.339478 146674.0 494 | 786.388916 33301.0 495 | 788.550964 179046.0 496 | 789.33606 752627.0 497 | 790.289795 2323301.0 498 | 791.421265 491574.0 499 | 792.208252 468317.0 500 | 792.876465 10.0 501 | 793.950195 158696.0 502 | 795.489502 38698.0 503 | 796.091187 954908.0 504 | 797.200439 337367.0 505 | 798.289185 46495.0 506 | 800.432495 111840.0 507 | 801.090576 18.0 508 | 802.506714 113841.0 509 | 803.42688 382726.0 510 | 804.150146 305130.0 511 | 805.09314 72883.0 512 | 805.733887 25871.0 513 | 806.318115 161990.0 514 | 807.514771 70266.0 515 | 808.27356 21070504.0 516 | 809.333984 4046391.0 517 | 810.207275 259344.0 518 | 811.469727 27873.0 519 | 812.310425 187082.0 520 | 813.238647 70162.0 521 | 814.210083 462427.0 522 | 814.969604 5.0 523 | 818.325684 249639.0 524 | 819.179932 5.0 525 | 821.257446 2401369.0 526 | 822.402405 1322524.0 527 | 823.065796 26639.0 528 | 826.38385 1256031.0 529 | 827.219849 5.0 530 | 828.190063 2491409.0 531 | 829.269775 415142.0 532 | 832.262146 456122.0 533 | 833.509155 211714.0 534 | 834.274414 8.0 535 | 836.505859 107931.0 536 | 837.125854 4.0 537 | 837.99231 18698.0 538 | 839.326721 2462282.0 539 | 840.278259 8756806.0 540 | 841.229004 1659474.0 541 | 842.093811 260534.0 542 | 843.241211 234347.0 543 | 844.313904 2187139.0 544 | 845.152466 83303.0 545 | 846.27002 2781280.0 546 | 847.444214 1204369.0 547 | 848.088745 3.0 548 | 850.265015 922613.0 549 | 851.186401 116222.0 550 | 854.273743 1016497.0 551 | 855.291382 102364.0 552 | 858.455566 702571.0 553 | 860.296204 2520372.0 554 | 861.256592 530700.0 555 | 862.471863 1859168.0 556 | 863.435181 549532.0 557 | 864.379272 443001.0 558 | 865.002441 70558.0 559 | 866.333496 12614.0 560 | 868.264954 134340.0 561 | 871.557007 123565.0 562 | 872.289917 18706856.0 563 | 873.345642 2718281.0 564 | 874.043213 315833.0 565 | 876.373779 5434008.0 566 | 876.880249 19.0 567 | 877.403809 1670336.0 568 | 878.322937 10828502.0 569 | 879.327148 2115364.0 570 | 880.158447 114122.0 571 | 886.277527 2620326.0 572 | 887.16687 545701.0 573 | 887.893188 5.0 574 | 890.246277 55866120.0 575 | 891.272888 13506386.0 576 | 892.020142 75.0 577 | 893.624634 44855.0 578 | 894.326416 18938176.0 579 | 895.376404 5114120.0 580 | 896.202576 979881.0 581 | 897.012695 226445.0 582 | 904.195679 46132128.0 583 | 905.224548 12148538.0 584 | 906.084473 13.0 585 | 906.811646 12834.0 586 | 908.183472 17888056.0 587 | 909.235107 4328052.0 588 | 910.25885 56715.0 589 | 912.229492 168909.0 590 | 913.382202 64191.0 591 | 921.45813 892847.0 592 | 922.178101 109387080.0 593 | 923.155762 23455760.0 594 | 924.007812 68.0 595 | 940.099915 1264261.0 596 | 940.88324 2.0 597 | END IONS 598 | 599 | 600 | BEGIN IONS 601 | PEPMASS=456.1 602 | CHARGE=0 603 | MSLEVEL=2 604 | SOURCE_INSTRUMENT=LC-ESI-qTof 605 | FILENAME=20111105_Jenia_Ger_MalyngamideC_MH_456_qb.1.1..mgf 606 | SEQ=*..* 607 | IONMODE=Positive 608 | ORGANISM=GNPS-LIBRARY 609 | NAME=Malyngamide C M+H 610 | PI=Gerwick 611 | DATACOLLECTOR=Amanda 612 | SMILES=CCCCCCC[C@@H](C/C=C/CCC(=O)NC/C(=C/Cl)/[C@@]12[C@@H](O1)[C@H](CCC2=O)O)OC 613 | INCHI=InChI=1S/C24H38ClNO5/c1-3-4-5-6-8-11-19(30-2)12-9-7-10-13-22(29)26-17-18(16-25)24-21(28)15-14-20(27)23(24)31-24/h7,9,16,19-20,23,27H,3-6,8,10-15,17H2,1-2H3,(H,26,29)/b9-7+,18-16-/t19-,20-,23-,24+/m0/s1 614 | INCHIAUX=N/A 615 | PUBMED=N/A 616 | SUBMITUSER=mwang87 617 | LIBRARYQUALITY=1 618 | SPECTRUMID=CCMSLIB00000001549 619 | SCANS=1 620 | 128.838745 8064.0 621 | 132.075684 8080.0 622 | 132.830322 18139.0 623 | 134.038437 11099.0 624 | 135.861877 4449.0 625 | 136.808807 25689.0 626 | 138.058044 10764.0 627 | 138.973206 4246.0 628 | 139.9487 1724.0 629 | 144.946014 7320.0 630 | 145.902298 49972.0 631 | 146.937012 51801.0 632 | 148.894623 28649.0 633 | 149.97699 1863.0 634 | 153.859802 7103.0 635 | 154.713699 36921.0 636 | 155.846817 2060.0 637 | 156.714844 6284.0 638 | 158.752625 1735.0 639 | 160.830704 16155.0 640 | 163.185608 533.0 641 | 163.910172 252195.0 642 | 164.623047 32682.0 643 | 165.733948 3395.0 644 | 166.981857 13151.0 645 | 171.847717 104840.0 646 | 172.495239 1.0 647 | 173.275482 12953.0 648 | 173.914734 5425.0 649 | 174.843506 1678.0 650 | 179.897583 20765.0 651 | 181.835129 183621.0 652 | 182.753876 156636.0 653 | 183.335068 12.0 654 | 183.913422 5378.0 655 | 185.065643 3780.0 656 | 186.137329 811.0 657 | 189.110214 39074.0 658 | 197.980545 6792.0 659 | 199.184845 10571.0 660 | 199.844528 624963.0 661 | 200.777771 142047.0 662 | 201.483322 1.0 663 | 203.099976 3409.0 664 | 205.09613 13589.0 665 | 205.86319 3181.0 666 | 206.984756 46408.0 667 | 208.040466 3242.0 668 | 208.964996 22175.0 669 | 210.259705 710.0 670 | 212.964813 16141.0 671 | 214.151306 13266.0 672 | 215.761108 11654.0 673 | 216.936066 1618.0 674 | 217.831894 937927.0 675 | 218.579987 3.0 676 | 221.636169 2399.0 677 | 222.598358 9334.0 678 | 223.988632 80101.0 679 | 224.586639 1.0 680 | 229.886719 10375.0 681 | 234.017761 27179.0 682 | 236.033844 38385.0 683 | 237.818314 31662.0 684 | 238.861313 28934.0 685 | 239.690002 11188.0 686 | 240.222229 1.0 687 | 241.927307 30061.0 688 | 242.991882 2003.0 689 | 243.90564 10750.0 690 | 247.445831 15595.0 691 | 251.955292 27835.0 692 | 254.062531 4240.0 693 | 254.630402 1254.0 694 | 255.87674 2013.0 695 | 258.053741 23367.0 696 | 259.056335 17488.0 697 | 260.177734 7311.0 698 | 260.765381 7718.0 699 | 262.098389 21095.0 700 | 263.673523 10252.0 701 | 264.552368 37683.0 702 | 268.199219 16811.0 703 | 269.090881 5872.0 704 | 271.308411 435.0 705 | 272.420837 1091.0 706 | 272.947662 29580.0 707 | 274.890625 3256.0 708 | 275.762695 12200.0 709 | 277.944397 3182.0 710 | 281.043823 1494.0 711 | 282.318359 5854.0 712 | 283.845001 23247.0 713 | 284.954407 2428.0 714 | 285.89978 2482.0 715 | 287.046417 24986.0 716 | 288.275391 4782.0 717 | 289.689941 17680.0 718 | 290.959991 6664.0 719 | 292.850342 11947.0 720 | 295.933075 17736.0 721 | 297.365479 10209.0 722 | 297.999786 9928.0 723 | 299.985352 2090.0 724 | 307.71936 52460.0 725 | 309.054688 13865.0 726 | 310.058533 3309.0 727 | 310.919434 12360.0 728 | 312.388794 2471.0 729 | 313.061646 5753.0 730 | 313.898224 654448.0 731 | 314.618134 1.0 732 | 316.06897 2603.0 733 | 321.953339 36012.0 734 | 323.073242 30548.0 735 | 323.795776 461.0 736 | 324.946899 24349.0 737 | 325.870728 1667.0 738 | 328.325989 1378.0 739 | 334.002808 33185.0 740 | 335.118286 3659.0 741 | 339.90567 14517.0 742 | 341.105286 572.0 743 | 342.098663 28518.0 744 | 342.997925 1492.0 745 | 345.584991 5009.0 746 | 350.237061 1429.0 747 | 352.251495 70368.0 748 | 353.108459 16312.0 749 | 357.992676 5510.0 750 | 360.06662 6277.0 751 | 361.583008 13914.0 752 | 362.816467 8611.0 753 | 367.725464 1254.0 754 | 370.019318 577662.0 755 | 370.866577 7.0 756 | 372.178589 6989.0 757 | 378.005493 445431.0 758 | 378.671936 4.0 759 | 380.209106 22266.0 760 | 387.997772 262881.0 761 | 388.711029 3.0 762 | 395.914978 858.0 763 | 401.854828 51090.0 764 | 406.004456 627822.0 765 | 406.774811 11.0 766 | 409.659729 1702.0 767 | 419.272339 10655.0 768 | 420.101593 70422.0 769 | 423.253418 295910.0 770 | 423.916626 32604272.0 771 | 424.812042 25.0 772 | 426.196167 18399.0 773 | 437.281433 18805.0 774 | 438.103607 28144.0 775 | 439.510559 1335.0 776 | 455.885986 4412.0 777 | END IONS 778 | 779 | 780 | BEGIN IONS 781 | PEPMASS=545.0 782 | CHARGE=0 783 | MSLEVEL=2 784 | SOURCE_INSTRUMENT=LC-ESI-Ion Trap 785 | FILENAME=20111105_Jenia_Ger_Scytonemin_MH_545_qb.1.1..mgf 786 | SEQ=*..* 787 | IONMODE=Positive 788 | ORGANISM=GNPS-LIBRARY 789 | NAME=Scytonemin M+H 790 | PI=Gerwick 791 | DATACOLLECTOR=Amanda 792 | SMILES=OC1=CC=C(\C=C2\C(=O)C(C3=C4C5=C(C=CC=C5)N=C4\C(=C/C4=CC=C(O)C=C4)C3=O)=C3C4=C(C=CC=C4)N=C23)C=C1 793 | INCHI=InChI=1S/C36H20N2O4/c39-21-13-9-19(10-14-21)17-25-33-29(23-5-1-3-7-27(23)37-33)31(35(25)41)32-30-24-6-2-4-8-28(24)38-34(30)26(36(32)42)18-20-11-15-22(40)16-12-20/h1-18,39-40H/b25-17+,26-18+ 794 | INCHIAUX=CGZKSPLDUIRCIO-RPCRKUJJSA-N 795 | PUBMED=N/A 796 | SUBMITUSER=mwang87 797 | LIBRARYQUALITY=1 798 | SPECTRUMID=CCMSLIB00000001550 799 | SCANS=1 800 | 343.896484 142503.0 801 | 345.458496 67939.0 802 | 372.684021 175247.0 803 | 386.097473 65864.0 804 | 395.15564 104575.0 805 | 399.223022 60954.0 806 | 406.367249 31184.0 807 | 408.018677 100834.0 808 | 411.038452 32080.0 809 | 412.037964 57989.0 810 | 425.144653 113047.0 811 | 428.946472 18204.0 812 | 439.259216 94641.0 813 | 451.303467 109804.0 814 | 453.173279 83206.0 815 | 471.290649 97971.0 816 | 473.082581 77358.0 817 | 474.340637 39583.0 818 | 484.037598 69735.0 819 | 488.428955 406750.0 820 | 489.287537 7417608.0 821 | 490.31311 4128547.0 822 | 491.288513 3.0 823 | 498.630005 112346.0 824 | 499.849731 163597.0 825 | 500.469604 33473.0 826 | 501.284241 87997.0 827 | 502.14502 106179.0 828 | 504.172668 27900.0 829 | 516.481567 1003328.0 830 | 517.132935 12979350.0 831 | 518.111328 7024024.0 832 | 518.976562 67.0 833 | 528.132935 9615938.0 834 | 529.155762 4285929.0 835 | 530.238403 96747.0 836 | 544.155518 4675899.0 837 | 545.082642 43057184.0 838 | 546.063965 11037290.0 839 | 546.972168 9.0 840 | END IONS 841 | --------------------------------------------------------------------------------