├── .Rbuildignore ├── .travis.yml ├── DESCRIPTION ├── NAMESPACE ├── NEWS ├── R ├── createFigure1.R ├── createFigure1_color.R ├── getPathFiedler2009.R ├── getPathNyakas2013.R ├── getPathSpecies.R └── package.R ├── README.md ├── inst ├── doc │ ├── fiedler2009.R │ ├── fiedler2009.pdf │ ├── nyakas2013.R │ ├── nyakas2013.pdf │ ├── species.R │ └── species.pdf ├── extdata │ ├── fiedler2009 │ │ ├── spectra.tar.gz │ │ └── spectra_info.csv │ ├── nyakas2013 │ │ └── spectra.tar.gz │ └── species │ │ └── spectra.tar.gz └── scripts │ ├── fiedler2009 │ └── createSpectraInfoTable.R │ └── nyakas2013 │ └── createExtDataset.sh ├── man ├── MALDIquantExamples-package.Rd ├── createFigure1.Rd ├── createFigure1Color.Rd ├── getPathFiedler2009.Rd ├── getPathNyakas2013.Rd └── getPathSpecies.Rd └── vignettes ├── bibliography.bib ├── fiedler2009.Rnw ├── foreword.tex ├── nyakas2013.Rnw ├── species.Rnw ├── utils.tex └── vignettes.tex /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^README\.md$ 2 | ^\.git$ 3 | ^\.gitignore$ 4 | ^\.travis.yml$ 5 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: r 2 | sudo: false 3 | cache: packages 4 | warnings_are_errors: true 5 | addon: 6 | apt: 7 | packages: 8 | - libudunits2-dev 9 | - libnetcdf-dev 10 | 11 | r: 12 | # - oldrel 13 | - release 14 | - devel 15 | 16 | notifications: 17 | email: 18 | on_success: change 19 | on_failure: change 20 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: MALDIquantExamples 2 | Version: 0.4 3 | Date: 2015-06-06 4 | Title: Examples for MALDIquant 5 | Authors@R: c(person("Sebastian", "Gibb", role=c("aut", "cre"), 6 | email="mail@sebastiangibb.de"), person("Korbinian", "Strimmer", 7 | role="ths")) 8 | Depends: R (>= 3.0.0), MALDIquant (>= 1.11.14), 9 | MALDIquantForeign (>= 0.7), sda (>= 1.3.3), 10 | crossval (>= 1.0.1), pvclust (>= 1.3-0) 11 | Suggests: knitr, xtable 12 | Description: This package provides example scripts and data to 13 | demonstrate the usage of MALDIquant. 14 | License: GPL (>= 3) 15 | URL: http://strimmerlab.org/software/maldiquant/ 16 | https://github.com/sgibb/MALDIquantExamples/ 17 | BugReports: https://github.com/sgibb/MALDIquantExamples/issues/ 18 | LazyLoad: yes 19 | VignetteBuilder: knitr 20 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2 (4.1.1): do not edit by hand 2 | 3 | export(createFigure1) 4 | export(createFigure1Color) 5 | export(getPathFiedler2009) 6 | export(getPathNyakas2013) 7 | export(getPathSpecies) 8 | import(MALDIquant) 9 | import(MALDIquantForeign) 10 | import(crossval) 11 | import(pvclust) 12 | import(sda) 13 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- 1 | RELEASE HISTORY OF THE "MALDIquantExample" PACKAGE 2 | ================================================== 3 | 4 | Version 0.4 [2015-06-06]: 5 | - Add new msi vignette (nyakas2013.pdf). 6 | - Remove old ims.pdf vignette. 7 | 8 | Version 0.3 [2014-10-13]: 9 | - species vignette: use hclust.method="ward.D2" (see `?hclust` for details); 10 | MALDIquantExample depends on pvclust 1.3-0 now. 11 | 12 | Version 0.2.3 [2014-08-17]: 13 | - adapt ims example to MALDIquant 11.1: remove plotImsSlice function which is 14 | now part of MALDIquant 15 | 16 | Version 0.2.2 [2014-07-12]: 17 | - adapt ims example to MALDIquantForeign 0.8 (add "centroided=TRUE"). 18 | 19 | Version 0.2.1 [2014-07-11]: 20 | - `plotImsSlice`: add "na.rm=TRUE" to scaling function to allow plotting of 21 | incomplete subsets of datasets. 22 | 23 | Version 0.2 [2014-06-15]: 24 | - Add species vignette. 25 | 26 | Version 0.1 [2014-05-20]: 27 | - First public release. 28 | 29 | -------------------------------------------------------------------------------- /R/createFigure1.R: -------------------------------------------------------------------------------- 1 | #' This function creates Figure 1 in Gibb and Strimmer 2012. 2 | #' 3 | #' @title Figure 1 4 | #' 5 | #' @references 6 | #' S. Gibb and K. Strimmer. 2012. MALDIquant: a versatile R package for the 7 | #' analysis of mass spectrometry data. Bioinformatics 28: 2270-2271 8 | #' 9 | #' @seealso \code{\link[MALDIquantExamples]{createFigure1Color}} 10 | #' 11 | #' @export 12 | #' @examples 13 | #' \dontrun{ 14 | #' library("MALDIquantExamples") 15 | #' pdfWidth <- 7 16 | #' pdfHeight <- pdfWidth*0.35 17 | #' 18 | #' pdf(file="figure1.pdf", height=pdfHeight, width=pdfWidth) 19 | #' createFigure1() 20 | #' dev.off() 21 | #' } 22 | 23 | createFigure1 <- function() { 24 | ## load example spectra 25 | data("fiedler2009subset", package="MALDIquant") 26 | 27 | ## some preprocessing 28 | 29 | ## sqrt transform (for variance stabilization) 30 | tSpectra <- transformIntensity(fiedler2009subset, method="sqrt") 31 | 32 | ## simple 5 point moving average for smoothing spectra 33 | tSpectra <- smoothIntensity(tSpectra, method="MovingAverage", 34 | halfWindowSize=2) 35 | 36 | ## remove baseline 37 | rbSpectra <- removeBaseline(tSpectra) 38 | 39 | ## calibrate intensity values by "total ion current" 40 | cbSpectra <- calibrateIntensity(rbSpectra, method="TIC") 41 | 42 | ## run peak detection 43 | peaks <- detectPeaks(cbSpectra, SNR=5) 44 | 45 | ### warping 46 | reference <- referencePeaks(peaks) 47 | warpingFunctions <- determineWarpingFunctions(peaks, reference=reference, 48 | tolerance=0.001) 49 | 50 | ## warp spectra 51 | warpedSpectra <- warpMassSpectra(cbSpectra, warpingFunctions) 52 | ## warp peaks 53 | warpedPeaks <- warpMassPeaks(peaks, warpingFunctions) 54 | 55 | ## merge technical replicates 56 | mergedSpectra <- averageMassSpectra(warpedSpectra, rep(1:8, each=2)) 57 | 58 | binnedPeaks <- binPeaks(warpedPeaks) 59 | mergedPeaks <- mergeMassPeaks(binnedPeaks, rep(1:8, each=2)) 60 | 61 | ## helper function to mark plots as LETTERS[1:4] 62 | labelPlot <- function(char, cex=1.5) { 63 | usr <- par("usr") 64 | text(x=usr[2]-(cex*strwidth(char)), 65 | y=usr[4]-(cex*strheight(char)), 66 | labels=char, cex=cex) 67 | } 68 | 69 | par(mfrow=c(2, 3)) 70 | par(cex=0.4) 71 | par(yaxt="n") 72 | par(mar=c(2.5, 1, 1, 1)) # bottom, left, top, right 73 | 74 | xlim <- c(1e3, 1e4) 75 | ## select 1 spectra for plot A/B 76 | AB <- 14 77 | 78 | ## first row 79 | ## plot A 80 | plot(fiedler2009subset[[AB]], lwd=0.25, sub="", main="", ylab="", xlab="", 81 | xlim=xlim) 82 | labelPlot("A") 83 | 84 | ## plot B 85 | plot(cbSpectra[[AB]], lwd=0.25, sub="", main="", ylab="", xlab="", xlim=xlim) 86 | points(peaks[[AB]], pch=4, lwd=0.25) 87 | labelPlot("B") 88 | 89 | ## plot C 90 | par(yaxt="s") 91 | par(mar=c(2.5, 2, 1, 1)) # bottom, left, top, right 92 | determineWarpingFunctions(peaks[[10]], reference=reference, 93 | tolerance=0.001, plot=TRUE, plotInteractive=TRUE, 94 | ylim=c(-2, 4), lwd=0.5, 95 | xlab="", ylab="", main="", sub="") 96 | labelPlot("C") 97 | 98 | ## second row 99 | par(mar=c(4, 1, 1, 1)) # bottom, left, top, right 100 | par(yaxt="n") 101 | 102 | ## select 4 spectra for plot D/E 103 | DE <- c(2, 10, 14, 16) 104 | ## limits for plot D/E 105 | xlimDE <- c(4180, 4240) 106 | ylimDE <- c(0, 1.9e-3) 107 | ## line types 108 | lty <- c(1, 4, 2, 6) 109 | 110 | ## plot D 111 | plot(cbSpectra[[1]], xlim=xlimDE, ylim=ylimDE, type="n", 112 | main="", xlab="", ylab="") 113 | labelPlot("D") 114 | 115 | for (i in seq(along=DE)) { 116 | lines(peaks[[DE[i]]], lty=lty[i], lwd=0.5) 117 | lines(cbSpectra[[DE[i]]], lty=lty[i], lwd=0.5) 118 | } 119 | 120 | ## plot E 121 | plot(cbSpectra[[1]], xlim=xlimDE, ylim=ylimDE, type="n", 122 | main="", xlab="", ylab="") 123 | labelPlot("E") 124 | 125 | for (i in seq(along=DE)) { 126 | lines(warpedPeaks[[DE[i]]], lty=lty[i], lwd=0.5) 127 | lines(warpedSpectra[[DE[i]]], lty=lty[i], lwd=0.5) 128 | } 129 | 130 | ## plot F 131 | F <- 7 132 | plot(mergedSpectra[[F]], lwd=0.25, sub="", main="", ylab="", xlab="", 133 | xlim=xlim, ylim=c(0, (max(intensity(mergedSpectra[[F]]))*1.05))) 134 | labelPlot("F") 135 | points(mergedPeaks[[F]], lwd=0.25, pch=4) 136 | ## label highest peaks 137 | top <- intensity(mergedPeaks[[F]]) %in% 138 | sort(intensity(mergedPeaks[[F]]), decreasing=TRUE)[1:10] 139 | labelPeaks(mergedPeaks[[F]], index=top, underline=TRUE, cex=0.8, lwd=0.25) 140 | } 141 | 142 | -------------------------------------------------------------------------------- /R/createFigure1_color.R: -------------------------------------------------------------------------------- 1 | #' This function creates Figure 1 in Gibb and Strimmer 2012 in a colorized 2 | #' version. 3 | #' 4 | #' @title Figure 1 5 | #' 6 | #' @references 7 | #' S. Gibb and K. Strimmer. 2012. MALDIquant: a versatile R package for the 8 | #' analysis of mass spectrometry data. Bioinformatics 28: 2270-2271 9 | #' 10 | #' @seealso \code{\link[MALDIquantExamples]{createFigure1}} 11 | #' 12 | #' @export 13 | #' @examples 14 | #' \dontrun{ 15 | #' library("MALDIquantExamples") 16 | #' pdfWidth <- 7 17 | #' pdfHeight <- pdfWidth*0.35 18 | #' 19 | #' pdf(file="figure1.pdf", height=pdfHeight, width=pdfWidth) 20 | #' createFigure1Color() 21 | #' dev.off() 22 | #' } 23 | 24 | createFigure1Color <- function() { 25 | ## load example spectra 26 | data("fiedler2009subset", package="MALDIquant") 27 | 28 | ## some preprocessing 29 | 30 | ## sqrt transform (for variance stabilization) 31 | tSpectra <- transformIntensity(fiedler2009subset, method="sqrt") 32 | 33 | ## simple 5 point moving average for smoothing spectra 34 | tSpectra <- smoothIntensity(tSpectra, method="MovingAverage", 35 | halfWindowSize=2) 36 | 37 | ## remove baseline 38 | rbSpectra <- removeBaseline(tSpectra) 39 | 40 | ## calibrate intensity values by "total ion current" 41 | cbSpectra <- calibrateIntensity(rbSpectra, method="TIC") 42 | 43 | ## run peak detection 44 | peaks <- detectPeaks(cbSpectra, SNR=5) 45 | 46 | ### warping 47 | reference <- referencePeaks(peaks) 48 | warpingFunctions <- determineWarpingFunctions(peaks, reference=reference, 49 | tolerance=0.001) 50 | 51 | ## warp spectra 52 | warpedSpectra <- warpMassSpectra(cbSpectra, warpingFunctions) 53 | ## warp peaks 54 | warpedPeaks <- warpMassPeaks(peaks, warpingFunctions) 55 | 56 | ## merge technical replicates 57 | mergedSpectra <- averageMassSpectra(warpedSpectra, rep(1:8, each=2)) 58 | 59 | binnedPeaks <- binPeaks(warpedPeaks) 60 | mergedPeaks <- mergeMassPeaks(binnedPeaks, rep(1:8, each=2)) 61 | 62 | ## helper function to mark plots as LETTERS[1:4] 63 | labelPlot <- function(char, cex=1.5) { 64 | usr <- par("usr") 65 | text(x=usr[2]-(cex*strwidth(char)), 66 | y=usr[4]-(cex*strheight(char)), 67 | labels=char, cex=cex) 68 | } 69 | 70 | par(mfrow=c(2, 3)) 71 | par(cex=0.4) 72 | par(yaxt="n") 73 | par(mar=c(2.5, 1, 1, 1)) # bottom, left, top, right 74 | 75 | xlim <- c(1e3, 1e4) 76 | ## select 1 spectra for plot A/B 77 | AB <- 14 78 | 79 | ## first row 80 | ## plot A 81 | plot(fiedler2009subset[[AB]], lwd=0.25, sub="", main="", ylab="", xlab="", 82 | xlim=xlim) 83 | lines(estimateBaseline(fiedler2009subset[[AB]]), lwd=0.75, col=2) 84 | labelPlot("A") 85 | 86 | ## plot B 87 | plot(cbSpectra[[AB]], lwd=0.25, sub="", main="", ylab="", xlab="", xlim=xlim) 88 | points(peaks[[AB]], pch=4, lwd=0.25, col=4) 89 | labelPlot("B") 90 | 91 | ## plot C 92 | par(yaxt="s") 93 | par(mar=c(2.5, 2, 1, 1)) # bottom, left, top, right 94 | determineWarpingFunctions(peaks[[10]], reference=reference, 95 | tolerance=0.001, plot=TRUE, plotInteractive=TRUE, 96 | ylim=c(-2, 4), lwd=0.5, 97 | xlab="", ylab="", main="", sub="") 98 | labelPlot("C") 99 | 100 | ## second row 101 | par(mar=c(4, 1, 1, 1)) # bottom, left, top, right 102 | par(yaxt="n") 103 | 104 | ## select 4 spectra for plot D/E 105 | DE <- c(2, 10, 14, 16) 106 | ## limits for plot D/E 107 | xlimDE <- c(4180, 4240) 108 | ylimDE <- c(0, 1.9e-3) 109 | ## line types 110 | lty <- c(1, 4, 2, 6) 111 | 112 | ## plot D 113 | plot(cbSpectra[[1]], xlim=xlimDE, ylim=ylimDE, type="n", 114 | main="", xlab="", ylab="") 115 | labelPlot("D") 116 | 117 | for (i in seq(along=DE)) { 118 | lines(peaks[[DE[i]]], lty=lty[i], lwd=0.5, col=i) 119 | lines(cbSpectra[[DE[i]]], lty=lty[i], lwd=0.5, col=i) 120 | } 121 | 122 | ## plot E 123 | plot(cbSpectra[[1]], xlim=xlimDE, ylim=ylimDE, type="n", 124 | main="", xlab="", ylab="") 125 | labelPlot("E") 126 | 127 | for (i in seq(along=DE)) { 128 | lines(warpedPeaks[[DE[i]]], lty=lty[i], lwd=0.5, col=i) 129 | lines(warpedSpectra[[DE[i]]], lty=lty[i], lwd=0.5, col=i) 130 | } 131 | 132 | ## plot F 133 | F <- 7 134 | plot(mergedSpectra[[F]], lwd=0.25, sub="", main="", ylab="", xlab="", 135 | xlim=xlim, ylim=c(0, (max(intensity(mergedSpectra[[F]]))*1.05))) 136 | labelPlot("F") 137 | points(mergedPeaks[[F]], lwd=0.25, pch=4, col=4) 138 | ## label highest peaks 139 | top <- intensity(mergedPeaks[[F]]) %in% 140 | sort(intensity(mergedPeaks[[F]]), decreasing=TRUE)[1:10] 141 | labelPeaks(mergedPeaks[[F]], index=top, underline=TRUE, cex=0.8, lwd=0.25) 142 | } 143 | 144 | -------------------------------------------------------------------------------- /R/getPathFiedler2009.R: -------------------------------------------------------------------------------- 1 | #' This dataset contains 480 MALDI-TOF mass spectra used in 2 | #' \emph{Fiedler et al. 2009}. 3 | #' 4 | #' @section Abstract: 5 | #' 6 | #' \bold{Purpose}: Mass spectrometry-based serum peptidome profiling is a 7 | #' promising tool to identify novel disease-associated biomarkers, but is 8 | #' limited by preanalytic factors and the intricacies of complex data 9 | #' processing. Therefore, we investigated whether standardized sample protocols 10 | #' and new bioinformatic tools combined with external data validation improve 11 | #' the validity of peptidome profiling for the discovery of pancreatic 12 | #' cancer-associated serum markers. 13 | #' 14 | #' \bold{Experimental Design}: For the discovery study, two sets of sera from 15 | #' patients with pancreatic cancer (n = 40) and healthy controls (n = 40) were 16 | #' obtained from two different clinical centers. For external data validation, 17 | #' we collected an independent set of samples from patients (n = 20) and healthy 18 | #' controls (n = 20). Magnetic beads with different surface functionalities were 19 | #' used for peptidome fractionation followed by matrix-assisted laser 20 | #' desorption/ionization time-of-flight (MALDI-TOF) mass spectrometry (MS). 21 | #' Data evaluation was carried out by comparing two different bioinformatic 22 | #' strategies. Following proteome database search, the matching candidate 23 | #' peptide was verified by MALDI-TOF MS after specific antibody-based 24 | #' immunoaffinity chromatography and independently confirmed by an ELISA assay. 25 | #' 26 | #' \bold{Results}: Two significant peaks (m/z 3884; 5959) achieved a 27 | #' sensitivity of 86.3\% and a specificity of 97.6\% for the discrimination of 28 | #' patients and healthy controls in the external validation set. Adding peak 29 | #' m/z 3884 to conventional clinical tumor markers (CA 19-9 and CEA) improved 30 | #' sensitivity and specificity, as shown by receiver operator characteristics 31 | #' curve analysis (AUROCcombined = 1.00). Mass spectrometry-based m/z 3884 32 | #' peak identification and following immunologic quantitation revealed platelet 33 | #' factor 4 as the corresponding peptide. 34 | #' 35 | #' \bold{Conclusions}: MALDI-TOF MS-based serum peptidome profiling allowed the 36 | #' discovery and validation of platelet factor 4 as a new discriminating marker 37 | #' in pancreatic cancer. 38 | #' 39 | #' @title Serum peptidome profiling revealed platelet factor 4 as a potential 40 | #' discriminating Peptide associated with pancreatic cancer 41 | #' 42 | #' @format 43 | #' A list containing 480 \code{\link[MALDIquant]{MassSpectrum-class}} 44 | #' objects. 45 | #' 46 | #' Three sets: 47 | #' \enumerate{ 48 | #' \item 20 patients with pancreatic cancer and 20 healthy patients from 49 | #' University hospital Leipzig (set A, discovery). 50 | #' \item 20 patients with pancreatic cancer and 20 healthy patients from 51 | #' University hospital Heidelberg (set B, discovery). 52 | #' \item 20 patients with pancreatic cancer and 20 healthy patients from 53 | #' University hospital Leipzig (set C, validation, half resolution). 54 | #' } 55 | #' 56 | #' Set A and B were measured on the same target (batch). Set C was measured a 57 | #' few month later. \cr 58 | #' Each sample has four technical replicates. 59 | #' 60 | #' @usage getPathFiedler2009 61 | #' @return Returns a \code{character} vector of length two. The first element is 62 | #' the local path to the tar-archive of the spectra and the second is the path 63 | #' to the csv file with additional information about each spectrum. 64 | #' 65 | #' @references 66 | #' Fiedler, Georg Martin, et al. "Serum peptidome profiling revealed platelet 67 | #' factor 4 as a potential discriminating Peptide associated with pancreatic 68 | #' cancer." Clinical Cancer Research 15.11 (2009): 3812-3819. 69 | #' 70 | #' @examples 71 | #' library("MALDIquantExamples") 72 | #' getPathFiedler2009() 73 | #' 74 | #' @keywords datasets 75 | #' @export 76 | getPathFiedler2009 <- function() { 77 | spectra <- system.file(file.path("extdata", "fiedler2009", "spectra.tar.gz"), 78 | package="MALDIquantExamples", mustWork=TRUE) 79 | info <- system.file(file.path("extdata", "fiedler2009", "spectra_info.csv"), 80 | package="MALDIquantExamples", mustWork=TRUE) 81 | c(spectra=spectra, info=info) 82 | } 83 | 84 | -------------------------------------------------------------------------------- /R/getPathNyakas2013.R: -------------------------------------------------------------------------------- 1 | #' This dataset contains 2222 MALDI-TOF mass spectra of a mouse kidney. 2 | #' 3 | #' This MALDI Imaging dataset contains 2222 MALDI-TOF mass spectra of a mouse 4 | #' kidney. It ranges from (x=29, y=68) to (x=101, y=92). 5 | #' 6 | #' @usage getPathNyakas2013 7 | #' 8 | #' @return Returns the local file path for the corresponding tar-archive. 9 | # 10 | #' @references 11 | #' 12 | #' This dataset was kindly provided by 13 | #' Dr. Adrien Nyakas (\email{adrien.nyakas@@dcb.unibe.ch}). 14 | #' 15 | #' See also: \url{http://dx.doi.org/10.6084/m9.figshare.735961}. 16 | #' 17 | #' @examples 18 | #' library("MALDIquantExamples") 19 | #' getPathNyakas2013() 20 | #' 21 | #' @keywords datasets 22 | #' @export 23 | getPathNyakas2013 <- function() { 24 | spectra <- system.file(file.path("extdata", "nyakas2013", "spectra.tar.gz"), 25 | package="MALDIquantExamples", mustWork=TRUE) 26 | setNames(spectra, "spectra") 27 | } 28 | -------------------------------------------------------------------------------- /R/getPathSpecies.R: -------------------------------------------------------------------------------- 1 | #' This dataset contains 96 MALDI-TOF mass spectra of different bacteria 2 | #' species. 3 | #' 4 | #' This dataset contains 96 MALDI-TOF mass spectra of four different bacteria 5 | #' species. Each species is represented by eight individual samples and each 6 | #' sample has three technical replicates. 7 | #' 8 | #' @usage getPathSpecies 9 | #' 10 | #' @return Returns the local file path for the corresponding tar-archive. 11 | #' 12 | #' @references 13 | #' 14 | #' This dataset was kindly provided by 15 | #' Dr. Bryan R. Thoma \email{bryanthoma@@yahoo.com}. 16 | #' 17 | #' @examples 18 | #' library("MALDIquantExamples") 19 | #' getPathSpecies() 20 | #' 21 | #' @keywords datasets 22 | #' @export 23 | getPathSpecies <- function() { 24 | spectra <- system.file(file.path("extdata", "species", "spectra.tar.gz"), 25 | package="MALDIquantExamples", mustWork=TRUE) 26 | setNames(spectra, "spectra") 27 | } 28 | -------------------------------------------------------------------------------- /R/package.R: -------------------------------------------------------------------------------- 1 | ## Copyright 2014 Sebastian Gibb 2 | ## 3 | ## 4 | ## This is free software: you can redistribute it and/or modify 5 | ## it under the terms of the GNU General Public License as published by 6 | ## the Free Software Foundation, either version 3 of the License, or 7 | ## (at your option) any later version. 8 | ## 9 | ## It is distributed in the hope that it will be useful, 10 | ## but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | ## GNU General Public License for more details. 13 | ## 14 | ## See 15 | 16 | #' Examples for MALDIquant 17 | #' 18 | #' This package provides example scripts and data to demonstrate the 19 | #' usage of MALDIquant. 20 | #' 21 | #' \tabular{ll}{ 22 | #' Package: \tab MALDIquantExamples \cr 23 | #' License: \tab GPL (>= 3)\cr 24 | #' URL: \tab http://www.github.com/sgibb/MALDIquantExamples/ \cr 25 | #' } 26 | #' 27 | #' @docType package 28 | #' @name MALDIquantExamples-package 29 | #' @author Sebastian Gibb <\email{mail@@sebastiangibb.de}> 30 | #' @references \url{http://www.github.com/sgibb/MALDIquantExamples/} 31 | #' @keywords package 32 | #' @rdname MALDIquantExamples-package 33 | #' 34 | #' @import MALDIquant MALDIquantForeign sda crossval pvclust 35 | #' 36 | NULL 37 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MALDIquant Examples 2 | [![Build Status](https://travis-ci.org/sgibb/MALDIquantExamples.svg?branch=master)](https://travis-ci.org/sgibb/MALDIquantExamples) 3 | [![license](https://img.shields.io/badge/license-GPL%20%28%3E=%203%29-brightgreen.svg?style=flat)](http://www.gnu.org/licenses/gpl-3.0.html) 4 | 5 | This R package provides some some examples to demonstrate how to analyze 6 | mass spectrometry data using 7 | [MALDIquant](https://strimmerlab.github.io/software/maldiquant/index.html). 8 | 9 | ## Description 10 | 11 | MALDIquant provides a complete analysis pipeline for MALDI-TOF and other mass 12 | spectrometry data. Distinctive features include baseline subtraction methods 13 | such as TopHat or SNIP, peak alignment using warping functions, 14 | handling of replicated measurements as well as allowing spectra with 15 | different resolutions. 16 | 17 | Please visit: https://strimmerlab.github.io/software/maldiquant/index.html 18 | 19 | ## Details 20 | 21 | ### MALDIquant examples 22 | 23 | - [MALDIquant vignette](https://cran.r-project.org/web/packages/MALDIquant/vignettes/MALDIquant-intro.pdf) 24 | 25 | - [Preprocessing and peak detection example - single spectrum.](https://github.com/sgibb/MALDIquant/blob/master/demo/peaks.R) 26 | 27 | - Analysis of [Fiedler et al. 2009](http://dx.doi.org/10.1158/1078-0432.CCR-08-2701) using MALDIquant 28 | [vignette](https://github.com/sgibb/MALDIquantExamples/blob/master/inst/doc/fiedler2009.pdf?raw=true), 29 | [R code](https://github.com/sgibb/MALDIquantExamples/blob/master/inst/doc/fiedler2009.R) 30 | 31 | - Bacterial Species Determination using MALDIquant 32 | [vignette](https://github.com/sgibb/MALDIquantExamples/blob/master/inst/doc/species.pdf?raw=true), 33 | [R code](https://github.com/sgibb/MALDIquantExamples/blob/master/inst/doc/species.R) 34 | 35 | - [R code to reproduce figure 1 used in Gibb and Strimmer 2012](https://github.com/sgibb/MALDIquantExamples/blob/master/R/createFigure1.R) 36 | ([colorized version] (https://github.com/sgibb/MALDIquantExamples/blob/master/R/createFigure1_color.R)). 37 | 38 | ### MALDIquantForeign examples 39 | 40 | - [MALDIquantForeign vignette](https://cran.r-project.org/web/packages/MALDIquantForeign/vignettes/MALDIquantForeign-intro.pdf) 41 | 42 | ### Mass Spectrometry Imaging (MSI) examples 43 | 44 | - [Mass Spectrometry Imaging using MALDIquant.](https://github.com/sgibb/MALDIquantExamples/blob/master/inst/doc/nyakas2013.pdf?raw=true) 45 | 46 | - [MALDIquant IMS + shiny example.](https://github.com/sgibb/ims-shiny) 47 | 48 | 49 | ### Demo files distributed with the MALDIquant R package 50 | 51 | - [Comparison of different baseline corrections.](https://github.com/sgibb/MALDIquant/blob/master/demo/baseline.R) 52 | - [Peak detection and labeling.](https://github.com/sgibb/MALDIquant/blob/master/demo/peaks.R) 53 | - [Illustration of peak alignment by warping.](https://github.com/sgibb/MALDIquant/blob/master/demo/warping.R) 54 | - [Example workflow.](https://github.com/sgibb/MALDIquant/blob/master/demo/workflow.R) 55 | 56 | ## Installation 57 | 58 | [GitHub](https://github.com) is not supported by the basic `install.packages` 59 | command. You could use the 60 | [devtools](http://cran.r-project.org/web/packages/devtools/index.html) package 61 | to install [MALDIquantExamples](https://github.com/sgibb/MALDIquantExamples). 62 | 63 | ```r 64 | install.packages("devtools") 65 | library("devtools") 66 | install_github("sgibb/MALDIquantExamples") 67 | ``` 68 | 69 | ## Contact 70 | 71 | You are welcome to: 72 | 73 | * submit suggestions and bug-reports at: 74 | * send a pull request on: 75 | * compose an e-mail to: 76 | 77 | -------------------------------------------------------------------------------- /inst/doc/fiedler2009.R: -------------------------------------------------------------------------------- 1 | ## ----knitrsetup, include=FALSE, cache=FALSE------------------------------ 2 | library("knitr") 3 | opts_chunk$set(width=40, tidy.opts=list(width.cutoff=45), tidy=FALSE, 4 | fig.path=file.path("figures", "fiedler2009/"), 5 | fig.align="center", fig.height=4.25, comment=NA, prompt=FALSE) 6 | 7 | ## ----setup, echo=TRUE, eval=FALSE---------------------------------------- 8 | # install.packages(c("MALDIquant", "MALDIquantForeign", 9 | # "sda", "crossval", "devtools")) 10 | # library("devtools") 11 | # install_github("sgibb/MALDIquantExamples") 12 | 13 | ## ----loadpackages, echo=FALSE-------------------------------------------- 14 | suppressPackageStartupMessages(library("MALDIquantExamples")) 15 | suppressPackageStartupMessages(library("xtable")) 16 | 17 | ## ----packages------------------------------------------------------------ 18 | ## the main MALDIquant package 19 | library("MALDIquant") 20 | ## the import/export routines for MALDIquant 21 | library("MALDIquantForeign") 22 | 23 | ## example data 24 | library("MALDIquantExamples") 25 | 26 | ## ----import-------------------------------------------------------------- 27 | ## import the spectra 28 | spectra <- import(getPathFiedler2009()["spectra"], 29 | verbose=FALSE) 30 | 31 | ## import metadata 32 | spectra.info <- read.table(getPathFiedler2009()["info"], 33 | sep=",", header=TRUE) 34 | 35 | ## ----reduce-------------------------------------------------------------- 36 | isHeidelberg <- spectra.info$location == "heidelberg" 37 | 38 | spectra <- spectra[isHeidelberg] 39 | spectra.info <- spectra.info[isHeidelberg,] 40 | 41 | ## ----qc------------------------------------------------------------------ 42 | table(sapply(spectra, length)) 43 | any(sapply(spectra, isEmpty)) 44 | all(sapply(spectra, isRegular)) 45 | 46 | ## ----trim---------------------------------------------------------------- 47 | spectra <- trim(spectra) 48 | 49 | ## ----plotseed, echo=FALSE------------------------------------------------ 50 | set.seed(123) 51 | 52 | ## ----plot---------------------------------------------------------------- 53 | idx <- sample(length(spectra), size=2) 54 | plot(spectra[[idx[1]]]) 55 | plot(spectra[[idx[2]]]) 56 | 57 | ## ----vs------------------------------------------------------------------ 58 | spectra <- transformIntensity(spectra, method="sqrt") 59 | 60 | ## ----sm------------------------------------------------------------------ 61 | spectra <- smoothIntensity(spectra, method="SavitzkyGolay", 62 | halfWindowSize=20) 63 | 64 | ## ----be------------------------------------------------------------------ 65 | baseline <- estimateBaseline(spectra[[1]], method="SNIP", 66 | iterations=150) 67 | plot(spectra[[1]]) 68 | lines(baseline, col="red", lwd=2) 69 | 70 | ## ----bc------------------------------------------------------------------ 71 | spectra <- removeBaseline(spectra, method="SNIP", 72 | iterations=150) 73 | plot(spectra[[1]]) 74 | 75 | ## ----cb------------------------------------------------------------------ 76 | spectra <- calibrateIntensity(spectra, method="TIC") 77 | 78 | ## ----pa------------------------------------------------------------------ 79 | spectra <- alignSpectra(spectra) 80 | 81 | ## ----avg----------------------------------------------------------------- 82 | avgSpectra <- 83 | averageMassSpectra(spectra, labels=spectra.info$patientID) 84 | avgSpectra.info <- 85 | spectra.info[!duplicated(spectra.info$patientID), ] 86 | 87 | ## ----noise--------------------------------------------------------------- 88 | noise <- estimateNoise(avgSpectra[[1]]) 89 | plot(avgSpectra[[1]], xlim=c(4000, 5000), ylim=c(0, 0.002)) 90 | lines(noise, col="red") # SNR == 1 91 | lines(noise[, 1], 2*noise[, 2], col="blue") # SNR == 2 92 | 93 | ## ----pd------------------------------------------------------------------ 94 | peaks <- detectPeaks(avgSpectra, SNR=2, halfWindowSize=20) 95 | 96 | ## ----pdp----------------------------------------------------------------- 97 | plot(avgSpectra[[1]], xlim=c(4000, 5000), ylim=c(0, 0.002)) 98 | points(peaks[[1]], col="red", pch=4) 99 | 100 | ## ----pb------------------------------------------------------------------ 101 | peaks <- binPeaks(peaks) 102 | 103 | ## ----pf------------------------------------------------------------------ 104 | peaks <- filterPeaks(peaks, minFrequency=c(0.5, 0.5), 105 | labels=avgSpectra.info$health, 106 | mergeWhitelists=TRUE) 107 | 108 | ## ----fm------------------------------------------------------------------ 109 | featureMatrix <- intensityMatrix(peaks, avgSpectra) 110 | rownames(featureMatrix) <- avgSpectra.info$patientID 111 | 112 | ## ----dda----------------------------------------------------------------- 113 | library("sda") 114 | Xtrain <- featureMatrix 115 | Ytrain <- avgSpectra.info$health 116 | ddar <- sda.ranking(Xtrain=featureMatrix, L=Ytrain, fdr=FALSE, 117 | diagonal=TRUE) 118 | 119 | ## ----ddaresults, echo=FALSE, results="asis"------------------------------ 120 | xtable(ddar[1:10, ], booktabs=TRUE) 121 | 122 | ## ----hclust-------------------------------------------------------------- 123 | distanceMatrix <- dist(featureMatrix, method="euclidean") 124 | 125 | hClust <- hclust(distanceMatrix, method="complete") 126 | 127 | plot(hClust, hang=-1) 128 | 129 | ## ----hclustfs------------------------------------------------------------ 130 | top <- ddar[1:2, "idx"] 131 | 132 | distanceMatrixTop <- dist(featureMatrix[, top], 133 | method="euclidean") 134 | 135 | hClustTop <- hclust(distanceMatrixTop, method="complete") 136 | 137 | plot(hClustTop, hang=-1) 138 | 139 | ## ----cv------------------------------------------------------------------ 140 | library("crossval") 141 | # create a prediction function for the cross validation 142 | predfun.dda <- function(Xtrain, Ytrain, Xtest, Ytest, 143 | negative) { 144 | dda.fit <- sda(Xtrain, Ytrain, diagonal=TRUE, verbose=FALSE) 145 | ynew <- predict(dda.fit, Xtest, verbose=FALSE)$class 146 | return(confusionMatrix(Ytest, ynew, negative=negative)) 147 | } 148 | 149 | # set seed to get reproducible results 150 | set.seed(1234) 151 | 152 | cv.out <- crossval(predfun.dda, 153 | X=featureMatrix[, top], 154 | Y=avgSpectra.info$health, 155 | K=10, B=20, 156 | negative="control", 157 | verbose=FALSE) 158 | diagnosticErrors(cv.out$stat) 159 | 160 | ## ----sessioninfo, echo=FALSE, results="asis"----------------------------- 161 | toLatex(sessionInfo(), locale=FALSE) 162 | 163 | -------------------------------------------------------------------------------- /inst/doc/fiedler2009.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgibb/MALDIquantExamples/b09c99d1a99074a5802d5e640cba3c349aa464f8/inst/doc/fiedler2009.pdf -------------------------------------------------------------------------------- /inst/doc/nyakas2013.R: -------------------------------------------------------------------------------- 1 | ## ----knitrsetup, include=FALSE, cache=FALSE------------------------------ 2 | library("knitr") 3 | opts_chunk$set(width=40, tidy.opts=list(width.cutoff=45), tidy=FALSE, 4 | fig.path=file.path("figures", "nyakas2013/"), 5 | fig.align="center", fig.height=4.25, comment=NA, prompt=FALSE) 6 | 7 | ## ----setup, echo=TRUE, eval=FALSE---------------------------------------- 8 | # install.packages(c("MALDIquant", "MALDIquantForeign", 9 | # "devtools")) 10 | # library("devtools") 11 | # install_github("sgibb/MALDIquantExamples") 12 | 13 | ## ----loadpackages, echo=FALSE-------------------------------------------- 14 | suppressPackageStartupMessages(library("MALDIquantExamples")) 15 | 16 | ## ----packages------------------------------------------------------------ 17 | ## the main MALDIquant package 18 | library("MALDIquant") 19 | ## the import/export routines for MALDIquant 20 | library("MALDIquantForeign") 21 | 22 | ## example data 23 | library("MALDIquantExamples") 24 | 25 | ## ----import-------------------------------------------------------------- 26 | ## import the spectra 27 | spectra <- import(getPathNyakas2013(), verbose=FALSE) 28 | 29 | ## ----preprocessing------------------------------------------------------- 30 | spectra <- transformIntensity(spectra, method="sqrt") 31 | spectra <- smoothIntensity(spectra, method="SavitzkyGolay", 32 | halfWindowSize=10) 33 | spectra <- removeBaseline(spectra, method="SNIP", 34 | iterations=10) 35 | spectra <- calibrateIntensity(spectra, method="TIC") 36 | 37 | ## ----meanspectrum-------------------------------------------------------- 38 | meanSpectrum <- averageMassSpectra(spectra) 39 | 40 | roi <- detectPeaks(meanSpectrum, SNR=4, 41 | halfWindowSize=10) 42 | 43 | plot(meanSpectrum, main="Mean Spectrum") 44 | points(roi, col="red") 45 | 46 | ## ----plotmsihigh--------------------------------------------------------- 47 | ## find order of peak intensities 48 | o <- order(intensity(roi), decreasing=TRUE) 49 | 50 | ## plot MSI slice for the highest one 51 | plotMsiSlice(spectra, center=mass(roi)[o[1]], tolerance=0.5) 52 | 53 | ## ----plotmsimultiple----------------------------------------------------- 54 | plotMsiSlice(spectra, center=mass(roi)[o[2:3]], tolerance=0.5) 55 | 56 | ## ----plotmsicombine------------------------------------------------------ 57 | plotMsiSlice(spectra, center=mass(roi)[o[1:2]], tolerance=0.5, 58 | combine=TRUE, 59 | colRamp=list(colorRamp(c("#000000", "#FF00FF")), 60 | colorRamp(c("#000000", "#00FF00")))) 61 | 62 | ## ----msislices----------------------------------------------------------- 63 | slices <- msiSlices(spectra, center=mass(roi), tolerance=0.5) 64 | attributes(slices) 65 | 66 | ## ----coordinates--------------------------------------------------------- 67 | head(coordinates(spectra)) 68 | head(coordinates(spectra, adjust=TRUE)) 69 | 70 | ## ----peakim-------------------------------------------------------------- 71 | peaks <- detectPeaks(spectra, SNR=3, 72 | halfWindowSize=10) 73 | peaks <- binPeaks(peaks) 74 | intMatrix <- intensityMatrix(peaks, spectra) 75 | 76 | ## ----kmeans-------------------------------------------------------------- 77 | km <- kmeans(intMatrix, centers=2) 78 | 79 | ## ----clustermatrix------------------------------------------------------- 80 | coord <- coordinates(spectra, adjust=TRUE) 81 | maxPixels <- apply(coord, MARGIN=2, FUN=max) 82 | m <- matrix(NA, nrow=maxPixels["x"], ncol=maxPixels["y"]) 83 | m[coord] <- km$cluster 84 | 85 | ## ----plotclusters-------------------------------------------------------- 86 | rgbCluster <- function(x) { 87 | col <- matrix(c(255, 0, 0, 88 | 0, 255, 0), nrow=2, byrow=TRUE) 89 | col[x, ] 90 | } 91 | plotMsiSlice(m, colRamp=rgbCluster, scale=FALSE) 92 | 93 | ## ----sessioninfo, echo=FALSE, results="asis"----------------------------- 94 | toLatex(sessionInfo(), locale=FALSE) 95 | 96 | -------------------------------------------------------------------------------- /inst/doc/nyakas2013.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgibb/MALDIquantExamples/b09c99d1a99074a5802d5e640cba3c349aa464f8/inst/doc/nyakas2013.pdf -------------------------------------------------------------------------------- /inst/doc/species.R: -------------------------------------------------------------------------------- 1 | ## ----knitrsetup, include=FALSE, cache=FALSE------------------------------ 2 | library("knitr") 3 | opts_chunk$set(width=40, tidy.opts=list(width.cutoff=45), tidy=FALSE, 4 | fig.path=file.path("figures", "species/"), 5 | fig.align="center", fig.height=4.25, comment=NA, prompt=FALSE) 6 | 7 | ## ----setup, echo=TRUE, eval=FALSE---------------------------------------- 8 | # install.packages(c("MALDIquant", "MALDIquantForeign", 9 | # "sda", "crossval", "devtools")) 10 | # library("devtools") 11 | # install_github("sgibb/MALDIquantExamples") 12 | 13 | ## ----loadpackages, echo=FALSE-------------------------------------------- 14 | suppressPackageStartupMessages(library("MALDIquantExamples")) 15 | suppressPackageStartupMessages(library("xtable")) 16 | 17 | ## ----packages------------------------------------------------------------ 18 | ## the main MALDIquant package 19 | library("MALDIquant") 20 | ## the import/export routines for MALDIquant 21 | library("MALDIquantForeign") 22 | 23 | ## example data 24 | library("MALDIquantExamples") 25 | 26 | ## ----import-------------------------------------------------------------- 27 | spectra <- import(getPathSpecies(), verbose=FALSE) 28 | 29 | ## ----qc------------------------------------------------------------------ 30 | table(sapply(spectra, length)) 31 | any(sapply(spectra, isEmpty)) 32 | all(sapply(spectra, isRegular)) 33 | 34 | ## ----trim---------------------------------------------------------------- 35 | spectra <- trim(spectra) 36 | 37 | ## ----plotseed, echo=FALSE------------------------------------------------ 38 | set.seed(123) 39 | 40 | ## ----plot---------------------------------------------------------------- 41 | idx <- sample(length(spectra), size=2) 42 | plot(spectra[[idx[1]]]) 43 | plot(spectra[[idx[2]]]) 44 | 45 | ## ----vs------------------------------------------------------------------ 46 | spectra <- transformIntensity(spectra, method="sqrt") 47 | 48 | ## ----fwhm---------------------------------------------------------------- 49 | plot(spectra[[1]], type="b", 50 | xlim=c(2235.3, 2252.0), ylim=c(45, 100)) 51 | abline(h=72, col=4, lty=2) 52 | plot(spectra[[1]], type="b", 53 | xlim=c(11220, 11250), ylim=c(24, 40)) 54 | abline(h=32, col=4, lty=2) 55 | 56 | ## ----sm------------------------------------------------------------------ 57 | spectra <- smoothIntensity(spectra, method="SavitzkyGolay", 58 | halfWindowSize=10) 59 | 60 | ## ----be------------------------------------------------------------------ 61 | ## define iteration steps: 25, 50, ..., 100 62 | iterations <- seq(from=25, to=100, by=25) 63 | ## define different colors for each step 64 | col <- rainbow(length(iterations)) 65 | 66 | plot(spectra[[1]], xlim=c(2000, 12000)) 67 | 68 | ## draw different baseline estimates 69 | for (i in seq(along=iterations)) { 70 | baseline <- estimateBaseline(spectra[[1]], method="SNIP", 71 | iterations=iterations[i]) 72 | lines(baseline, col=col[i], lwd=2) 73 | } 74 | 75 | legend("topright", legend=iterations, col=col, lwd=1) 76 | 77 | ## ----bc------------------------------------------------------------------ 78 | spectra <- removeBaseline(spectra, method="SNIP", 79 | iterations=25) 80 | plot(spectra[[1]]) 81 | 82 | ## ----cb------------------------------------------------------------------ 83 | spectra <- calibrateIntensity(spectra, method="TIC") 84 | 85 | ## ----pa------------------------------------------------------------------ 86 | spectra <- alignSpectra(spectra) 87 | 88 | ## ----metadata------------------------------------------------------------ 89 | metaData(spectra[[1]])$spot 90 | 91 | ## ----spots--------------------------------------------------------------- 92 | spots <- sapply(spectra, function(x)metaData(x)$spot) 93 | species <- sapply(spectra, function(x)metaData(x)$sampleName) 94 | head(spots) 95 | head(species) 96 | 97 | ## ----average------------------------------------------------------------- 98 | avgSpectra <- 99 | averageMassSpectra(spectra, labels=paste0(species, spots)) 100 | 101 | ## ----noise--------------------------------------------------------------- 102 | ## define snrs steps: 1, 1.5, ... 2.5 103 | snrs <- seq(from=1, to=2.5, by=0.5) 104 | ## define different colors for each step 105 | col <- rainbow(length(snrs)) 106 | 107 | ## estimate noise 108 | noise <- estimateNoise(avgSpectra[[1]], 109 | method="SuperSmoother") 110 | 111 | plot(avgSpectra[[1]], 112 | xlim=c(6000, 16000), ylim=c(0, 0.0016)) 113 | 114 | for (i in seq(along=snrs)) { 115 | lines(noise[, "mass"], 116 | noise[, "intensity"]*snrs[i], 117 | col=col[i], lwd=2) 118 | } 119 | legend("topright", legend=snrs, col=col, lwd=1) 120 | 121 | ## ----pd------------------------------------------------------------------ 122 | peaks <- detectPeaks(avgSpectra, SNR=2, halfWindowSize=10) 123 | 124 | ## ----pdp----------------------------------------------------------------- 125 | plot(avgSpectra[[1]], xlim=c(6000, 16000), ylim=c(0, 0.0016)) 126 | points(peaks[[1]], col="red", pch=4) 127 | 128 | ## ----pb------------------------------------------------------------------ 129 | peaks <- binPeaks(peaks) 130 | 131 | ## ----pf------------------------------------------------------------------ 132 | peaks <- filterPeaks(peaks, minFrequency=0.25) 133 | 134 | ## ----spots2-------------------------------------------------------------- 135 | spots <- sapply(avgSpectra, function(x)metaData(x)$spot) 136 | species <- sapply(avgSpectra, function(x)metaData(x)$sampleName) 137 | species <- factor(species) # convert to factor 138 | # (needed later in crossval) 139 | 140 | ## ----fm------------------------------------------------------------------ 141 | featureMatrix <- intensityMatrix(peaks, avgSpectra) 142 | rownames(featureMatrix) <- paste(species, spots, sep=".") 143 | 144 | ## ----clust, fig.height=5------------------------------------------------- 145 | library("pvclust") 146 | pv <- pvclust(t(featureMatrix), 147 | method.hclust="ward.D2", 148 | method.dist="euclidean") 149 | plot(pv, print.num=FALSE) 150 | 151 | ## ----dda, fig.height=7.5------------------------------------------------- 152 | library("sda") 153 | ddar <- sda.ranking(Xtrain=featureMatrix, L=species, 154 | fdr=FALSE, diagonal=TRUE) 155 | plot(ddar) 156 | 157 | ## ----lda, fig.height=7.5------------------------------------------------- 158 | ldar <- sda.ranking(Xtrain=featureMatrix, L=species, 159 | fdr=FALSE, diagonal=FALSE) 160 | plot(ldar) 161 | 162 | ## ----predictfuncv-------------------------------------------------------- 163 | library("crossval") 164 | predfun <- function(Xtrain, Ytrain, Xtest, Ytest, 165 | numVars, diagonal=FALSE) { 166 | # estimate ranking and determine the best numVars variables 167 | ra <- sda.ranking(Xtrain, Ytrain, 168 | verbose=FALSE, diagonal=diagonal, fdr=FALSE) 169 | selVars <- ra[,"idx"][1:numVars] 170 | 171 | # fit and predict 172 | sda.out <- sda(Xtrain[, selVars, drop=FALSE], Ytrain, 173 | diagonal=diagonal, verbose=FALSE) 174 | ynew <- predict(sda.out, Xtest[, selVars, drop=FALSE], 175 | verbose=FALSE)$class 176 | 177 | # compute accuracy 178 | acc <- mean(Ytest == ynew) 179 | 180 | return(acc) 181 | } 182 | 183 | ## ----cvsetup------------------------------------------------------------- 184 | K <- 5 # number of folds 185 | B <- 20 # number of repetitions 186 | 187 | ## ----cvtop10------------------------------------------------------------- 188 | set.seed(12345) 189 | cv.dda10 <- crossval(predfun, 190 | X=featureMatrix, Y=species, 191 | K=K, B=B, 192 | numVars=10, diagonal=FALSE, 193 | verbose=FALSE) 194 | cv.dda10$stat 195 | 196 | ## ----cvoptimaldd--------------------------------------------------------- 197 | npeaks <- c(1:15, ncol(featureMatrix)) # number of peaks 198 | 199 | ## ----cvoptimaldda-------------------------------------------------------- 200 | # estimate accuracy for DDA 201 | set.seed(12345) 202 | cvsim.dda <- sapply(npeaks, function(i) { 203 | cv <- crossval(predfun, 204 | X=featureMatrix, Y=species, 205 | K=K, B=B, numVars=i, diagonal=TRUE, 206 | verbose=FALSE) 207 | return(cv$stat) 208 | }) 209 | 210 | ## ----cvoptimallda-------------------------------------------------------- 211 | # estimate accuracy for LDA 212 | set.seed(12345) 213 | cvsim.lda <- sapply(npeaks, function(i) { 214 | cv <- crossval(predfun, 215 | X=featureMatrix, Y=species, 216 | K=K, B=B, numVars=i, diagonal=FALSE, 217 | verbose=FALSE) 218 | return(cv$stat) 219 | }) 220 | 221 | ## ----cvoptimaltable------------------------------------------------------ 222 | result.sim <- cbind(nPeaks=npeaks, 223 | "DDA-ACC"=cvsim.dda, 224 | "LDA-ACC"=cvsim.lda) 225 | 226 | ## ----cvoptimaltablelatex, echo=FALSE, results="asis"--------------------- 227 | xtable(result.sim, booktabs=TRUE, digits=c(0, 0, 3, 3)) 228 | 229 | ## ----sessioninfo, echo=FALSE, results="asis"----------------------------- 230 | toLatex(sessionInfo(), locale=FALSE) 231 | 232 | -------------------------------------------------------------------------------- /inst/doc/species.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgibb/MALDIquantExamples/b09c99d1a99074a5802d5e640cba3c349aa464f8/inst/doc/species.pdf -------------------------------------------------------------------------------- /inst/extdata/fiedler2009/spectra.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgibb/MALDIquantExamples/b09c99d1a99074a5802d5e640cba3c349aa464f8/inst/extdata/fiedler2009/spectra.tar.gz -------------------------------------------------------------------------------- /inst/extdata/fiedler2009/spectra_info.csv: -------------------------------------------------------------------------------- 1 | "patientID","patientID.orig","experiment","location","health" 2 | "LC077","LC77","discovery","leipzig","control" 3 | "LC077","LC77","discovery","leipzig","control" 4 | "LC077","LC77","discovery","leipzig","control" 5 | "LC077","LC77","discovery","leipzig","control" 6 | "LC046","LC46","discovery","leipzig","control" 7 | "LC046","LC46","discovery","leipzig","control" 8 | "LC046","LC46","discovery","leipzig","control" 9 | "LC046","LC46","discovery","leipzig","control" 10 | "LC050","LC50","discovery","leipzig","control" 11 | "LC050","LC50","discovery","leipzig","control" 12 | "LC050","LC50","discovery","leipzig","control" 13 | "LC050","LC50","discovery","leipzig","control" 14 | "LC053","LC53","discovery","leipzig","control" 15 | "LC053","LC53","discovery","leipzig","control" 16 | "LC053","LC53","discovery","leipzig","control" 17 | "LC053","LC53","discovery","leipzig","control" 18 | "LC057","LC57","discovery","leipzig","control" 19 | "LC057","LC57","discovery","leipzig","control" 20 | "LC057","LC57","discovery","leipzig","control" 21 | "LC057","LC57","discovery","leipzig","control" 22 | "LC060","LC60","discovery","leipzig","control" 23 | "LC060","LC60","discovery","leipzig","control" 24 | "LC060","LC60","discovery","leipzig","control" 25 | "LC060","LC60","discovery","leipzig","control" 26 | "LC067","LC67","discovery","leipzig","control" 27 | "LC067","LC67","discovery","leipzig","control" 28 | "LC067","LC67","discovery","leipzig","control" 29 | "LC067","LC67","discovery","leipzig","control" 30 | "LC068","LC68","discovery","leipzig","control" 31 | "LC068","LC68","discovery","leipzig","control" 32 | "LC068","LC68","discovery","leipzig","control" 33 | "LC068","LC68","discovery","leipzig","control" 34 | "LC069","LC69","discovery","leipzig","control" 35 | "LC069","LC69","discovery","leipzig","control" 36 | "LC069","LC69","discovery","leipzig","control" 37 | "LC069","LC69","discovery","leipzig","control" 38 | "LC071","LC71","discovery","leipzig","control" 39 | "LC071","LC71","discovery","leipzig","control" 40 | "LC071","LC71","discovery","leipzig","control" 41 | "LC071","LC71","discovery","leipzig","control" 42 | "LC217","LC217","discovery","leipzig","control" 43 | "LC217","LC217","discovery","leipzig","control" 44 | "LC217","LC217","discovery","leipzig","control" 45 | "LC217","LC217","discovery","leipzig","control" 46 | "LC088","LC88","discovery","leipzig","control" 47 | "LC088","LC88","discovery","leipzig","control" 48 | "LC088","LC88","discovery","leipzig","control" 49 | "LC088","LC88","discovery","leipzig","control" 50 | "LC207","LC207","discovery","leipzig","control" 51 | "LC207","LC207","discovery","leipzig","control" 52 | "LC207","LC207","discovery","leipzig","control" 53 | "LC207","LC207","discovery","leipzig","control" 54 | "LC208","LC208","discovery","leipzig","control" 55 | "LC208","LC208","discovery","leipzig","control" 56 | "LC208","LC208","discovery","leipzig","control" 57 | "LC208","LC208","discovery","leipzig","control" 58 | "LC209","LC209","discovery","leipzig","control" 59 | "LC209","LC209","discovery","leipzig","control" 60 | "LC209","LC209","discovery","leipzig","control" 61 | "LC209","LC209","discovery","leipzig","control" 62 | "LC210","LC210","discovery","leipzig","control" 63 | "LC210","LC210","discovery","leipzig","control" 64 | "LC210","LC210","discovery","leipzig","control" 65 | "LC210","LC210","discovery","leipzig","control" 66 | "LC212","LC212","discovery","leipzig","control" 67 | "LC212","LC212","discovery","leipzig","control" 68 | "LC212","LC212","discovery","leipzig","control" 69 | "LC212","LC212","discovery","leipzig","control" 70 | "LC213","LC213","discovery","leipzig","control" 71 | "LC213","LC213","discovery","leipzig","control" 72 | "LC213","LC213","discovery","leipzig","control" 73 | "LC213","LC213","discovery","leipzig","control" 74 | "LC214","LC214","discovery","leipzig","control" 75 | "LC214","LC214","discovery","leipzig","control" 76 | "LC214","LC214","discovery","leipzig","control" 77 | "LC214","LC214","discovery","leipzig","control" 78 | "LC215","LC215","discovery","leipzig","control" 79 | "LC215","LC215","discovery","leipzig","control" 80 | "LC215","LC215","discovery","leipzig","control" 81 | "LC215","LC215","discovery","leipzig","control" 82 | "LP079","LP79","discovery","leipzig","cancer" 83 | "LP079","LP79","discovery","leipzig","cancer" 84 | "LP079","LP79","discovery","leipzig","cancer" 85 | "LP079","LP79","discovery","leipzig","cancer" 86 | "LP037","LP37","discovery","leipzig","cancer" 87 | "LP037","LP37","discovery","leipzig","cancer" 88 | "LP037","LP37","discovery","leipzig","cancer" 89 | "LP037","LP37","discovery","leipzig","cancer" 90 | "LP342","LP342","discovery","leipzig","cancer" 91 | "LP342","LP342","discovery","leipzig","cancer" 92 | "LP342","LP342","discovery","leipzig","cancer" 93 | "LP342","LP342","discovery","leipzig","cancer" 94 | "LP045","LP45","discovery","leipzig","cancer" 95 | "LP045","LP45","discovery","leipzig","cancer" 96 | "LP045","LP45","discovery","leipzig","cancer" 97 | "LP045","LP45","discovery","leipzig","cancer" 98 | "LP048","LP48","discovery","leipzig","cancer" 99 | "LP048","LP48","discovery","leipzig","cancer" 100 | "LP048","LP48","discovery","leipzig","cancer" 101 | "LP048","LP48","discovery","leipzig","cancer" 102 | "LP055","LP55","discovery","leipzig","cancer" 103 | "LP055","LP55","discovery","leipzig","cancer" 104 | "LP055","LP55","discovery","leipzig","cancer" 105 | "LP055","LP55","discovery","leipzig","cancer" 106 | "LP062","LP62","discovery","leipzig","cancer" 107 | "LP062","LP62","discovery","leipzig","cancer" 108 | "LP062","LP62","discovery","leipzig","cancer" 109 | "LP062","LP62","discovery","leipzig","cancer" 110 | "LP066","LP66","discovery","leipzig","cancer" 111 | "LP066","LP66","discovery","leipzig","cancer" 112 | "LP066","LP66","discovery","leipzig","cancer" 113 | "LP066","LP66","discovery","leipzig","cancer" 114 | "LP070","LP70","discovery","leipzig","cancer" 115 | "LP070","LP70","discovery","leipzig","cancer" 116 | "LP070","LP70","discovery","leipzig","cancer" 117 | "LP070","LP70","discovery","leipzig","cancer" 118 | "LP075","LP75","discovery","leipzig","cancer" 119 | "LP075","LP75","discovery","leipzig","cancer" 120 | "LP075","LP75","discovery","leipzig","cancer" 121 | "LP075","LP75","discovery","leipzig","cancer" 122 | "LP178","LP178","discovery","leipzig","cancer" 123 | "LP178","LP178","discovery","leipzig","cancer" 124 | "LP178","LP178","discovery","leipzig","cancer" 125 | "LP178","LP178","discovery","leipzig","cancer" 126 | "LP089","LP89","discovery","leipzig","cancer" 127 | "LP089","LP89","discovery","leipzig","cancer" 128 | "LP089","LP89","discovery","leipzig","cancer" 129 | "LP089","LP89","discovery","leipzig","cancer" 130 | "LP091","LP91","discovery","leipzig","cancer" 131 | "LP091","LP91","discovery","leipzig","cancer" 132 | "LP091","LP91","discovery","leipzig","cancer" 133 | "LP091","LP91","discovery","leipzig","cancer" 134 | "LP095","LP95","discovery","leipzig","cancer" 135 | "LP095","LP95","discovery","leipzig","cancer" 136 | "LP095","LP95","discovery","leipzig","cancer" 137 | "LP095","LP95","discovery","leipzig","cancer" 138 | "LP117","LP117","discovery","leipzig","cancer" 139 | "LP117","LP117","discovery","leipzig","cancer" 140 | "LP117","LP117","discovery","leipzig","cancer" 141 | "LP117","LP117","discovery","leipzig","cancer" 142 | "LP125","LP125","discovery","leipzig","cancer" 143 | "LP125","LP125","discovery","leipzig","cancer" 144 | "LP125","LP125","discovery","leipzig","cancer" 145 | "LP125","LP125","discovery","leipzig","cancer" 146 | "LP132","LP132","discovery","leipzig","cancer" 147 | "LP132","LP132","discovery","leipzig","cancer" 148 | "LP132","LP132","discovery","leipzig","cancer" 149 | "LP132","LP132","discovery","leipzig","cancer" 150 | "LP154","LP154","discovery","leipzig","cancer" 151 | "LP154","LP154","discovery","leipzig","cancer" 152 | "LP154","LP154","discovery","leipzig","cancer" 153 | "LP154","LP154","discovery","leipzig","cancer" 154 | "LP191","LP191","discovery","leipzig","cancer" 155 | "LP191","LP191","discovery","leipzig","cancer" 156 | "LP191","LP191","discovery","leipzig","cancer" 157 | "LP191","LP191","discovery","leipzig","cancer" 158 | "LP157","LP157","discovery","leipzig","cancer" 159 | "LP157","LP157","discovery","leipzig","cancer" 160 | "LP157","LP157","discovery","leipzig","cancer" 161 | "LP157","LP157","discovery","leipzig","cancer" 162 | "HC056","HC56","discovery","heidelberg","control" 163 | "HC056","HC56","discovery","heidelberg","control" 164 | "HC056","HC56","discovery","heidelberg","control" 165 | "HC056","HC56","discovery","heidelberg","control" 166 | "HC001","HC1","discovery","heidelberg","control" 167 | "HC001","HC1","discovery","heidelberg","control" 168 | "HC001","HC1","discovery","heidelberg","control" 169 | "HC001","HC1","discovery","heidelberg","control" 170 | "HC002","HC2","discovery","heidelberg","control" 171 | "HC002","HC2","discovery","heidelberg","control" 172 | "HC002","HC2","discovery","heidelberg","control" 173 | "HC002","HC2","discovery","heidelberg","control" 174 | "HC008","HC8","discovery","heidelberg","control" 175 | "HC008","HC8","discovery","heidelberg","control" 176 | "HC008","HC8","discovery","heidelberg","control" 177 | "HC008","HC8","discovery","heidelberg","control" 178 | "HC011","HC11","discovery","heidelberg","control" 179 | "HC011","HC11","discovery","heidelberg","control" 180 | "HC011","HC11","discovery","heidelberg","control" 181 | "HC011","HC11","discovery","heidelberg","control" 182 | "HC033","HC33","discovery","heidelberg","control" 183 | "HC033","HC33","discovery","heidelberg","control" 184 | "HC033","HC33","discovery","heidelberg","control" 185 | "HC033","HC33","discovery","heidelberg","control" 186 | "HC049","HC49","discovery","heidelberg","control" 187 | "HC049","HC49","discovery","heidelberg","control" 188 | "HC049","HC49","discovery","heidelberg","control" 189 | "HC049","HC49","discovery","heidelberg","control" 190 | "HC050","HC50","discovery","heidelberg","control" 191 | "HC050","HC50","discovery","heidelberg","control" 192 | "HC050","HC50","discovery","heidelberg","control" 193 | "HC050","HC50","discovery","heidelberg","control" 194 | "HC054","HC54","discovery","heidelberg","control" 195 | "HC054","HC54","discovery","heidelberg","control" 196 | "HC054","HC54","discovery","heidelberg","control" 197 | "HC054","HC54","discovery","heidelberg","control" 198 | "HC055","HC55","discovery","heidelberg","control" 199 | "HC055","HC55","discovery","heidelberg","control" 200 | "HC055","HC55","discovery","heidelberg","control" 201 | "HC055","HC55","discovery","heidelberg","control" 202 | "HC122","HC122","discovery","heidelberg","control" 203 | "HC122","HC122","discovery","heidelberg","control" 204 | "HC122","HC122","discovery","heidelberg","control" 205 | "HC122","HC122","discovery","heidelberg","control" 206 | "HC057","HC57","discovery","heidelberg","control" 207 | "HC057","HC57","discovery","heidelberg","control" 208 | "HC057","HC57","discovery","heidelberg","control" 209 | "HC057","HC57","discovery","heidelberg","control" 210 | "HC059","HC59","discovery","heidelberg","control" 211 | "HC059","HC59","discovery","heidelberg","control" 212 | "HC059","HC59","discovery","heidelberg","control" 213 | "HC059","HC59","discovery","heidelberg","control" 214 | "HC062","HC62","discovery","heidelberg","control" 215 | "HC062","HC62","discovery","heidelberg","control" 216 | "HC062","HC62","discovery","heidelberg","control" 217 | "HC062","HC62","discovery","heidelberg","control" 218 | "HC064","HC64","discovery","heidelberg","control" 219 | "HC064","HC64","discovery","heidelberg","control" 220 | "HC064","HC64","discovery","heidelberg","control" 221 | "HC064","HC64","discovery","heidelberg","control" 222 | "HC066","HC66","discovery","heidelberg","control" 223 | "HC066","HC66","discovery","heidelberg","control" 224 | "HC066","HC66","discovery","heidelberg","control" 225 | "HC066","HC66","discovery","heidelberg","control" 226 | "HC067","HC67","discovery","heidelberg","control" 227 | "HC067","HC67","discovery","heidelberg","control" 228 | "HC067","HC67","discovery","heidelberg","control" 229 | "HC067","HC67","discovery","heidelberg","control" 230 | "HC118","HC118","discovery","heidelberg","control" 231 | "HC118","HC118","discovery","heidelberg","control" 232 | "HC118","HC118","discovery","heidelberg","control" 233 | "HC118","HC118","discovery","heidelberg","control" 234 | "HC119","HC119","discovery","heidelberg","control" 235 | "HC119","HC119","discovery","heidelberg","control" 236 | "HC119","HC119","discovery","heidelberg","control" 237 | "HC119","HC119","discovery","heidelberg","control" 238 | "HC120","HC120","discovery","heidelberg","control" 239 | "HC120","HC120","discovery","heidelberg","control" 240 | "HC120","HC120","discovery","heidelberg","control" 241 | "HC120","HC120","discovery","heidelberg","control" 242 | "HP393","HP393","discovery","heidelberg","cancer" 243 | "HP393","HP393","discovery","heidelberg","cancer" 244 | "HP393","HP393","discovery","heidelberg","cancer" 245 | "HP393","HP393","discovery","heidelberg","cancer" 246 | "HP120","HP120","discovery","heidelberg","cancer" 247 | "HP120","HP120","discovery","heidelberg","cancer" 248 | "HP120","HP120","discovery","heidelberg","cancer" 249 | "HP120","HP120","discovery","heidelberg","cancer" 250 | "HP121","HP121","discovery","heidelberg","cancer" 251 | "HP121","HP121","discovery","heidelberg","cancer" 252 | "HP121","HP121","discovery","heidelberg","cancer" 253 | "HP121","HP121","discovery","heidelberg","cancer" 254 | "HP150","HP150","discovery","heidelberg","cancer" 255 | "HP150","HP150","discovery","heidelberg","cancer" 256 | "HP150","HP150","discovery","heidelberg","cancer" 257 | "HP150","HP150","discovery","heidelberg","cancer" 258 | "HP151","HP151","discovery","heidelberg","cancer" 259 | "HP151","HP151","discovery","heidelberg","cancer" 260 | "HP151","HP151","discovery","heidelberg","cancer" 261 | "HP151","HP151","discovery","heidelberg","cancer" 262 | "HP161","HP161","discovery","heidelberg","cancer" 263 | "HP161","HP161","discovery","heidelberg","cancer" 264 | "HP161","HP161","discovery","heidelberg","cancer" 265 | "HP161","HP161","discovery","heidelberg","cancer" 266 | "HP208","HP208","discovery","heidelberg","cancer" 267 | "HP208","HP208","discovery","heidelberg","cancer" 268 | "HP208","HP208","discovery","heidelberg","cancer" 269 | "HP208","HP208","discovery","heidelberg","cancer" 270 | "HP212","HP212","discovery","heidelberg","cancer" 271 | "HP212","HP212","discovery","heidelberg","cancer" 272 | "HP212","HP212","discovery","heidelberg","cancer" 273 | "HP212","HP212","discovery","heidelberg","cancer" 274 | "HP262","HP262","discovery","heidelberg","cancer" 275 | "HP262","HP262","discovery","heidelberg","cancer" 276 | "HP262","HP262","discovery","heidelberg","cancer" 277 | "HP262","HP262","discovery","heidelberg","cancer" 278 | "HP321","HP321","discovery","heidelberg","cancer" 279 | "HP321","HP321","discovery","heidelberg","cancer" 280 | "HP321","HP321","discovery","heidelberg","cancer" 281 | "HP321","HP321","discovery","heidelberg","cancer" 282 | "HP438","HP438","discovery","heidelberg","cancer" 283 | "HP438","HP438","discovery","heidelberg","cancer" 284 | "HP438","HP438","discovery","heidelberg","cancer" 285 | "HP438","HP438","discovery","heidelberg","cancer" 286 | "HP402","HP402","discovery","heidelberg","cancer" 287 | "HP402","HP402","discovery","heidelberg","cancer" 288 | "HP402","HP402","discovery","heidelberg","cancer" 289 | "HP402","HP402","discovery","heidelberg","cancer" 290 | "HP410","HP410","discovery","heidelberg","cancer" 291 | "HP410","HP410","discovery","heidelberg","cancer" 292 | "HP410","HP410","discovery","heidelberg","cancer" 293 | "HP410","HP410","discovery","heidelberg","cancer" 294 | "HP413","HP413","discovery","heidelberg","cancer" 295 | "HP413","HP413","discovery","heidelberg","cancer" 296 | "HP413","HP413","discovery","heidelberg","cancer" 297 | "HP413","HP413","discovery","heidelberg","cancer" 298 | "HP416","HP416","discovery","heidelberg","cancer" 299 | "HP416","HP416","discovery","heidelberg","cancer" 300 | "HP416","HP416","discovery","heidelberg","cancer" 301 | "HP416","HP416","discovery","heidelberg","cancer" 302 | "HP417","HP417","discovery","heidelberg","cancer" 303 | "HP417","HP417","discovery","heidelberg","cancer" 304 | "HP417","HP417","discovery","heidelberg","cancer" 305 | "HP417","HP417","discovery","heidelberg","cancer" 306 | "HP419","HP419","discovery","heidelberg","cancer" 307 | "HP419","HP419","discovery","heidelberg","cancer" 308 | "HP419","HP419","discovery","heidelberg","cancer" 309 | "HP419","HP419","discovery","heidelberg","cancer" 310 | "HP424","HP424","discovery","heidelberg","cancer" 311 | "HP424","HP424","discovery","heidelberg","cancer" 312 | "HP424","HP424","discovery","heidelberg","cancer" 313 | "HP424","HP424","discovery","heidelberg","cancer" 314 | "HP425","HP425","discovery","heidelberg","cancer" 315 | "HP425","HP425","discovery","heidelberg","cancer" 316 | "HP425","HP425","discovery","heidelberg","cancer" 317 | "HP425","HP425","discovery","heidelberg","cancer" 318 | "HP429","HP429","discovery","heidelberg","cancer" 319 | "HP429","HP429","discovery","heidelberg","cancer" 320 | "HP429","HP429","discovery","heidelberg","cancer" 321 | "HP429","HP429","discovery","heidelberg","cancer" 322 | "VC297","297","validation","leipzig","control" 323 | "VC297","297","validation","leipzig","control" 324 | "VC297","297","validation","leipzig","control" 325 | "VC297","297","validation","leipzig","control" 326 | "VC300","300","validation","leipzig","control" 327 | "VC300","300","validation","leipzig","control" 328 | "VC300","300","validation","leipzig","control" 329 | "VC300","300","validation","leipzig","control" 330 | "VC298","298","validation","leipzig","control" 331 | "VC298","298","validation","leipzig","control" 332 | "VC298","298","validation","leipzig","control" 333 | "VC298","298","validation","leipzig","control" 334 | "VC301","301","validation","leipzig","control" 335 | "VC301","301","validation","leipzig","control" 336 | "VC301","301","validation","leipzig","control" 337 | "VC301","301","validation","leipzig","control" 338 | "VC306","306","validation","leipzig","control" 339 | "VC306","306","validation","leipzig","control" 340 | "VC306","306","validation","leipzig","control" 341 | "VC306","306","validation","leipzig","control" 342 | "VC303","303","validation","leipzig","control" 343 | "VC303","303","validation","leipzig","control" 344 | "VC303","303","validation","leipzig","control" 345 | "VC303","303","validation","leipzig","control" 346 | "VC307","307","validation","leipzig","control" 347 | "VC307","307","validation","leipzig","control" 348 | "VC307","307","validation","leipzig","control" 349 | "VC307","307","validation","leipzig","control" 350 | "VC305","305","validation","leipzig","control" 351 | "VC305","305","validation","leipzig","control" 352 | "VC305","305","validation","leipzig","control" 353 | "VC305","305","validation","leipzig","control" 354 | "VC308","308","validation","leipzig","control" 355 | "VC308","308","validation","leipzig","control" 356 | "VC308","308","validation","leipzig","control" 357 | "VC308","308","validation","leipzig","control" 358 | "VC315","315","validation","leipzig","control" 359 | "VC315","315","validation","leipzig","control" 360 | "VC315","315","validation","leipzig","control" 361 | "VC315","315","validation","leipzig","control" 362 | "VC310","310","validation","leipzig","control" 363 | "VC310","310","validation","leipzig","control" 364 | "VC310","310","validation","leipzig","control" 365 | "VC310","310","validation","leipzig","control" 366 | "VC320","320","validation","leipzig","control" 367 | "VC320","320","validation","leipzig","control" 368 | "VC320","320","validation","leipzig","control" 369 | "VC320","320","validation","leipzig","control" 370 | "VC285","285","validation","leipzig","control" 371 | "VC285","285","validation","leipzig","control" 372 | "VC285","285","validation","leipzig","control" 373 | "VC285","285","validation","leipzig","control" 374 | "VC311","311","validation","leipzig","control" 375 | "VC311","311","validation","leipzig","control" 376 | "VC311","311","validation","leipzig","control" 377 | "VC311","311","validation","leipzig","control" 378 | "VC325","325","validation","leipzig","control" 379 | "VC325","325","validation","leipzig","control" 380 | "VC325","325","validation","leipzig","control" 381 | "VC325","325","validation","leipzig","control" 382 | "VC290","290","validation","leipzig","control" 383 | "VC290","290","validation","leipzig","control" 384 | "VC290","290","validation","leipzig","control" 385 | "VC290","290","validation","leipzig","control" 386 | "VC327","327","validation","leipzig","control" 387 | "VC327","327","validation","leipzig","control" 388 | "VC327","327","validation","leipzig","control" 389 | "VC327","327","validation","leipzig","control" 390 | "VC293","293","validation","leipzig","control" 391 | "VC293","293","validation","leipzig","control" 392 | "VC293","293","validation","leipzig","control" 393 | "VC293","293","validation","leipzig","control" 394 | "VC299","299","validation","leipzig","control" 395 | "VC299","299","validation","leipzig","control" 396 | "VC299","299","validation","leipzig","control" 397 | "VC299","299","validation","leipzig","control" 398 | "VC328","328","validation","leipzig","control" 399 | "VC328","328","validation","leipzig","control" 400 | "VC328","328","validation","leipzig","control" 401 | "VC328","328","validation","leipzig","control" 402 | "VP013","13","validation","leipzig","cancer" 403 | "VP013","13","validation","leipzig","cancer" 404 | "VP013","13","validation","leipzig","cancer" 405 | "VP013","13","validation","leipzig","cancer" 406 | "VP228","228","validation","leipzig","cancer" 407 | "VP228","228","validation","leipzig","cancer" 408 | "VP228","228","validation","leipzig","cancer" 409 | "VP228","228","validation","leipzig","cancer" 410 | "VP238","238","validation","leipzig","cancer" 411 | "VP238","238","validation","leipzig","cancer" 412 | "VP238","238","validation","leipzig","cancer" 413 | "VP238","238","validation","leipzig","cancer" 414 | "VP258","258","validation","leipzig","cancer" 415 | "VP258","258","validation","leipzig","cancer" 416 | "VP258","258","validation","leipzig","cancer" 417 | "VP258","258","validation","leipzig","cancer" 418 | "VP016","16","validation","leipzig","cancer" 419 | "VP016","16","validation","leipzig","cancer" 420 | "VP016","16","validation","leipzig","cancer" 421 | "VP016","16","validation","leipzig","cancer" 422 | "VP240","240","validation","leipzig","cancer" 423 | "VP240","240","validation","leipzig","cancer" 424 | "VP240","240","validation","leipzig","cancer" 425 | "VP240","240","validation","leipzig","cancer" 426 | "VP023","23","validation","leipzig","cancer" 427 | "VP023","23","validation","leipzig","cancer" 428 | "VP023","23","validation","leipzig","cancer" 429 | "VP023","23","validation","leipzig","cancer" 430 | "VP082","82","validation","leipzig","cancer" 431 | "VP082","82","validation","leipzig","cancer" 432 | "VP082","82","validation","leipzig","cancer" 433 | "VP082","82","validation","leipzig","cancer" 434 | "VP242","242","validation","leipzig","cancer" 435 | "VP242","242","validation","leipzig","cancer" 436 | "VP242","242","validation","leipzig","cancer" 437 | "VP242","242","validation","leipzig","cancer" 438 | "VP029","29","validation","leipzig","cancer" 439 | "VP029","29","validation","leipzig","cancer" 440 | "VP029","29","validation","leipzig","cancer" 441 | "VP029","29","validation","leipzig","cancer" 442 | "VP102","102","validation","leipzig","cancer" 443 | "VP102","102","validation","leipzig","cancer" 444 | "VP102","102","validation","leipzig","cancer" 445 | "VP102","102","validation","leipzig","cancer" 446 | "VP065","65","validation","leipzig","cancer" 447 | "VP065","65","validation","leipzig","cancer" 448 | "VP065","65","validation","leipzig","cancer" 449 | "VP065","65","validation","leipzig","cancer" 450 | "VP136","136","validation","leipzig","cancer" 451 | "VP136","136","validation","leipzig","cancer" 452 | "VP136","136","validation","leipzig","cancer" 453 | "VP136","136","validation","leipzig","cancer" 454 | "VP194","194","validation","leipzig","cancer" 455 | "VP194","194","validation","leipzig","cancer" 456 | "VP194","194","validation","leipzig","cancer" 457 | "VP194","194","validation","leipzig","cancer" 458 | "VP155","155","validation","leipzig","cancer" 459 | "VP155","155","validation","leipzig","cancer" 460 | "VP155","155","validation","leipzig","cancer" 461 | "VP155","155","validation","leipzig","cancer" 462 | "VP196","196","validation","leipzig","cancer" 463 | "VP196","196","validation","leipzig","cancer" 464 | "VP196","196","validation","leipzig","cancer" 465 | "VP196","196","validation","leipzig","cancer" 466 | "VP181","181","validation","leipzig","cancer" 467 | "VP181","181","validation","leipzig","cancer" 468 | "VP181","181","validation","leipzig","cancer" 469 | "VP181","181","validation","leipzig","cancer" 470 | "VP198","198","validation","leipzig","cancer" 471 | "VP198","198","validation","leipzig","cancer" 472 | "VP198","198","validation","leipzig","cancer" 473 | "VP198","198","validation","leipzig","cancer" 474 | "VP220","220","validation","leipzig","cancer" 475 | "VP220","220","validation","leipzig","cancer" 476 | "VP220","220","validation","leipzig","cancer" 477 | "VP220","220","validation","leipzig","cancer" 478 | "VP233","233","validation","leipzig","cancer" 479 | "VP233","233","validation","leipzig","cancer" 480 | "VP233","233","validation","leipzig","cancer" 481 | "VP233","233","validation","leipzig","cancer" 482 | -------------------------------------------------------------------------------- /inst/extdata/nyakas2013/spectra.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgibb/MALDIquantExamples/b09c99d1a99074a5802d5e640cba3c349aa464f8/inst/extdata/nyakas2013/spectra.tar.gz -------------------------------------------------------------------------------- /inst/extdata/species/spectra.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sgibb/MALDIquantExamples/b09c99d1a99074a5802d5e640cba3c349aa464f8/inst/extdata/species/spectra.tar.gz -------------------------------------------------------------------------------- /inst/scripts/fiedler2009/createSpectraInfoTable.R: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | ## this script collects all metadata from the spectrum files and creates 3 | ## inst/extdata/fiedler2009/spectra_info.csv 4 | ############################################################################### 5 | 6 | ############################################################################### 7 | ## load packages 8 | ############################################################################### 9 | library("MALDIquant") 10 | library("MALDIquantForeign") 11 | 12 | ############################################################################### 13 | ## load all spectra 14 | ############################################################################### 15 | 16 | basedir <- file.path("..", "..", "extdata", "fiedler2009") 17 | 18 | ## read all 480 native spectra 19 | spectra <- import(file.path(basedir, "spectra.tar.gz")) 20 | 21 | ############################################################################### 22 | ## fetch and transform metadata 23 | ############################################################################### 24 | 25 | ## get metadata: stored in the acqu files in CMT section 26 | comments <- vapply(spectra, function(x)metaData(x)$comment[1L], character(1L)) 27 | 28 | ## entries in comments are semicolon separated 29 | comments <- read.table(textConnection(comments), sep=";", header=FALSE, 30 | stringsAsFactors=FALSE) 31 | 32 | ## ID: the first/second element in the comments 33 | patientID.orig <- comments[cbind(1:nrow(comments), 34 | as.integer(grepl("Name", comments[, 1L])) + 1L)] 35 | 36 | ## health status: decoded as "disease" or "P" 37 | health <- ifelse(grepl("^(disease|P)$", comments[, 3L]), "cancer", "control") 38 | 39 | ## experiment: discovery/validation (validation has not any letter in the IDs) 40 | experiment <- ifelse(grepl("^[H|L]", patientID.orig), "discovery", "validation") 41 | 42 | ## location: patients from Heidelberg have an H in their ID 43 | location <- ifelse(grepl("^H", patientID.orig), "heidelberg", "leipzig") 44 | 45 | ## create a better patientID for validation experiments 46 | patientID <- patientID.orig 47 | isValidation <- experiment == "validation" 48 | 49 | ## ID is now VPxxx and VCxxx for validation/cancer/x and validation/control/x 50 | patientID[isValidation] <- paste0("V", ifelse(health[isValidation] == "cancer", 51 | "P", "C"), 52 | patientID[isValidation]) 53 | ## use always 3 digit numbers for ID 54 | patientID <- sprintf("%s%03i", substr(patientID, 1L, 2L), 55 | as.integer(substr(patientID, 3L, nchar(patientID)))) 56 | 57 | ############################################################################### 58 | ## write spectra_info.csv 59 | ############################################################################### 60 | 61 | ## collect all meta information in a data.frame 62 | spectra.info <- data.frame(patientID=patientID, 63 | patientID.orig=patientID.orig, 64 | experiment=experiment, 65 | location=location, 66 | health=health, 67 | stringsAsFactors=FALSE) 68 | 69 | write.table(spectra.info, file=file.path(basedir, "spectra_info.csv"), 70 | sep=",", row.names=FALSE) 71 | 72 | -------------------------------------------------------------------------------- /inst/scripts/nyakas2013/createExtDataset.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | ############################################################################### 3 | ## this script creates the dataset in 4 | ## inst/extdata/nyakas2013/spectra.tar.gz 5 | ############################################################################### 6 | 7 | wget http://files.figshare.com/1106682/MouseKidney_IMS_testdata.zip 8 | unzip MouseKidney_IMS_testdata.zip 9 | mv "Imaging - Demo Datensatz/130611_MouseKidney" nyakas2013 10 | find nyakas2013 -name "Analysis*" -exec rm -rf {} \; 11 | tar -cvzf spectra.tar.gz nyakas2013/ 12 | mkdir -p ../../extdata/nyakas2013 13 | mv spectra.tar.gz ../../extdata/nyakas2013 14 | rm -rf "Imaging - Demo Datensatz" MouseKidney_IMS_testdata.zip nyakas2013 __MACOSX 15 | -------------------------------------------------------------------------------- /man/MALDIquantExamples-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/package.R 3 | \docType{package} 4 | \name{MALDIquantExamples-package} 5 | \alias{MALDIquantExamples-package} 6 | \title{Examples for MALDIquant} 7 | \description{ 8 | This package provides example scripts and data to demonstrate the 9 | usage of MALDIquant. 10 | } 11 | \details{ 12 | \tabular{ll}{ 13 | Package: \tab MALDIquantExamples \cr 14 | License: \tab GPL (>= 3)\cr 15 | URL: \tab http://www.github.com/sgibb/MALDIquantExamples/ \cr 16 | } 17 | } 18 | \author{ 19 | Sebastian Gibb <\email{mail@sebastiangibb.de}> 20 | } 21 | \references{ 22 | \url{http://www.github.com/sgibb/MALDIquantExamples/} 23 | } 24 | \keyword{package} 25 | 26 | -------------------------------------------------------------------------------- /man/createFigure1.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/createFigure1.R 3 | \name{createFigure1} 4 | \alias{createFigure1} 5 | \title{Figure 1} 6 | \usage{ 7 | createFigure1() 8 | } 9 | \description{ 10 | This function creates Figure 1 in Gibb and Strimmer 2012. 11 | } 12 | \examples{ 13 | \dontrun{ 14 | library("MALDIquantExamples") 15 | pdfWidth <- 7 16 | pdfHeight <- pdfWidth*0.35 17 | 18 | pdf(file="figure1.pdf", height=pdfHeight, width=pdfWidth) 19 | createFigure1() 20 | dev.off() 21 | } 22 | } 23 | \references{ 24 | S. Gibb and K. Strimmer. 2012. MALDIquant: a versatile R package for the 25 | analysis of mass spectrometry data. Bioinformatics 28: 2270-2271 26 | } 27 | \seealso{ 28 | \code{\link[MALDIquantExamples]{createFigure1Color}} 29 | } 30 | 31 | -------------------------------------------------------------------------------- /man/createFigure1Color.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/createFigure1_color.R 3 | \name{createFigure1Color} 4 | \alias{createFigure1Color} 5 | \title{Figure 1} 6 | \usage{ 7 | createFigure1Color() 8 | } 9 | \description{ 10 | This function creates Figure 1 in Gibb and Strimmer 2012 in a colorized 11 | version. 12 | } 13 | \examples{ 14 | \dontrun{ 15 | library("MALDIquantExamples") 16 | pdfWidth <- 7 17 | pdfHeight <- pdfWidth*0.35 18 | 19 | pdf(file="figure1.pdf", height=pdfHeight, width=pdfWidth) 20 | createFigure1Color() 21 | dev.off() 22 | } 23 | } 24 | \references{ 25 | S. Gibb and K. Strimmer. 2012. MALDIquant: a versatile R package for the 26 | analysis of mass spectrometry data. Bioinformatics 28: 2270-2271 27 | } 28 | \seealso{ 29 | \code{\link[MALDIquantExamples]{createFigure1}} 30 | } 31 | 32 | -------------------------------------------------------------------------------- /man/getPathFiedler2009.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/getPathFiedler2009.R 3 | \name{getPathFiedler2009} 4 | \alias{getPathFiedler2009} 5 | \title{Serum peptidome profiling revealed platelet factor 4 as a potential 6 | discriminating Peptide associated with pancreatic cancer} 7 | \format{A list containing 480 \code{\link[MALDIquant]{MassSpectrum-class}} 8 | objects. 9 | 10 | Three sets: 11 | \enumerate{ 12 | \item 20 patients with pancreatic cancer and 20 healthy patients from 13 | University hospital Leipzig (set A, discovery). 14 | \item 20 patients with pancreatic cancer and 20 healthy patients from 15 | University hospital Heidelberg (set B, discovery). 16 | \item 20 patients with pancreatic cancer and 20 healthy patients from 17 | University hospital Leipzig (set C, validation, half resolution). 18 | } 19 | 20 | Set A and B were measured on the same target (batch). Set C was measured a 21 | few month later. \cr 22 | Each sample has four technical replicates.} 23 | \usage{ 24 | getPathFiedler2009 25 | } 26 | \value{ 27 | Returns a \code{character} vector of length two. The first element is 28 | the local path to the tar-archive of the spectra and the second is the path 29 | to the csv file with additional information about each spectrum. 30 | } 31 | \description{ 32 | This dataset contains 480 MALDI-TOF mass spectra used in 33 | \emph{Fiedler et al. 2009}. 34 | } 35 | \section{Abstract}{ 36 | 37 | 38 | \bold{Purpose}: Mass spectrometry-based serum peptidome profiling is a 39 | promising tool to identify novel disease-associated biomarkers, but is 40 | limited by preanalytic factors and the intricacies of complex data 41 | processing. Therefore, we investigated whether standardized sample protocols 42 | and new bioinformatic tools combined with external data validation improve 43 | the validity of peptidome profiling for the discovery of pancreatic 44 | cancer-associated serum markers. 45 | 46 | \bold{Experimental Design}: For the discovery study, two sets of sera from 47 | patients with pancreatic cancer (n = 40) and healthy controls (n = 40) were 48 | obtained from two different clinical centers. For external data validation, 49 | we collected an independent set of samples from patients (n = 20) and healthy 50 | controls (n = 20). Magnetic beads with different surface functionalities were 51 | used for peptidome fractionation followed by matrix-assisted laser 52 | desorption/ionization time-of-flight (MALDI-TOF) mass spectrometry (MS). 53 | Data evaluation was carried out by comparing two different bioinformatic 54 | strategies. Following proteome database search, the matching candidate 55 | peptide was verified by MALDI-TOF MS after specific antibody-based 56 | immunoaffinity chromatography and independently confirmed by an ELISA assay. 57 | 58 | \bold{Results}: Two significant peaks (m/z 3884; 5959) achieved a 59 | sensitivity of 86.3\% and a specificity of 97.6\% for the discrimination of 60 | patients and healthy controls in the external validation set. Adding peak 61 | m/z 3884 to conventional clinical tumor markers (CA 19-9 and CEA) improved 62 | sensitivity and specificity, as shown by receiver operator characteristics 63 | curve analysis (AUROCcombined = 1.00). Mass spectrometry-based m/z 3884 64 | peak identification and following immunologic quantitation revealed platelet 65 | factor 4 as the corresponding peptide. 66 | 67 | \bold{Conclusions}: MALDI-TOF MS-based serum peptidome profiling allowed the 68 | discovery and validation of platelet factor 4 as a new discriminating marker 69 | in pancreatic cancer. 70 | } 71 | \examples{ 72 | library("MALDIquantExamples") 73 | getPathFiedler2009() 74 | } 75 | \references{ 76 | Fiedler, Georg Martin, et al. "Serum peptidome profiling revealed platelet 77 | factor 4 as a potential discriminating Peptide associated with pancreatic 78 | cancer." Clinical Cancer Research 15.11 (2009): 3812-3819. 79 | } 80 | \keyword{datasets} 81 | 82 | -------------------------------------------------------------------------------- /man/getPathNyakas2013.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/getPathNyakas2013.R 3 | \name{getPathNyakas2013} 4 | \alias{getPathNyakas2013} 5 | \title{This dataset contains 2222 MALDI-TOF mass spectra of a mouse kidney.} 6 | \usage{ 7 | getPathNyakas2013 8 | } 9 | \value{ 10 | Returns the local file path for the corresponding tar-archive. 11 | } 12 | \description{ 13 | This MALDI Imaging dataset contains 2222 MALDI-TOF mass spectra of a mouse 14 | kidney. It ranges from (x=29, y=68) to (x=101, y=92). 15 | } 16 | \examples{ 17 | library("MALDIquantExamples") 18 | getPathNyakas2013() 19 | } 20 | \references{ 21 | This dataset was kindly provided by 22 | Dr. Adrien Nyakas (\email{adrien.nyakas@dcb.unibe.ch}). 23 | 24 | See also: \url{http://dx.doi.org/10.6084/m9.figshare.735961}. 25 | } 26 | \keyword{datasets} 27 | 28 | -------------------------------------------------------------------------------- /man/getPathSpecies.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.1): do not edit by hand 2 | % Please edit documentation in R/getPathSpecies.R 3 | \name{getPathSpecies} 4 | \alias{getPathSpecies} 5 | \title{This dataset contains 96 MALDI-TOF mass spectra of different bacteria 6 | species.} 7 | \usage{ 8 | getPathSpecies 9 | } 10 | \value{ 11 | Returns the local file path for the corresponding tar-archive. 12 | } 13 | \description{ 14 | This dataset contains 96 MALDI-TOF mass spectra of four different bacteria 15 | species. Each species is represented by eight individual samples and each 16 | sample has three technical replicates. 17 | } 18 | \examples{ 19 | library("MALDIquantExamples") 20 | getPathSpecies() 21 | } 22 | \references{ 23 | This dataset was kindly provided by 24 | Dr. Bryan R. Thoma \email{bryanthoma@yahoo.com}. 25 | } 26 | \keyword{datasets} 27 | 28 | -------------------------------------------------------------------------------- /vignettes/bibliography.bib: -------------------------------------------------------------------------------- 1 | @MANUAL{RPROJECT, 2 | title = {{R}: {A} {L}anguage and {E}nvironment for {S}tatistical {C}omputing}, 3 | author = {{R Core Team}}, 4 | organization = {R Foundation for Statistical Computing}, 5 | address = {Vienna, Austria}, 6 | year = {2014}, 7 | url = {http://www.R-project.org/}, 8 | } 9 | @ARTICLE{MALDIquant, 10 | title = {{MALDI}quant: a versatile {R} package for the analysis of mass spectrometry data}, 11 | author = {Sebastian Gibb and Korbinian Strimmer}, 12 | volume = {28}, 13 | number = {17}, 14 | pages = {2270--2271}, 15 | year = {2012}, 16 | doi = {10.1093/bioinformatics/bts447}, 17 | url = {http://bioinformatics.oxfordjournals.org/content/28/17/2270.abstract}, 18 | eprint = {http://bioinformatics.oxfordjournals.org/content/28/17/2270.full.pdf+html}, 19 | journal = {\mbox{Bioinformatics}}, 20 | publisher = {Oxford University Press}, 21 | } 22 | @MANUAL{crossval, 23 | title = {crossval: {G}eneric {F}unctions for {C}ross {V}alidation}, 24 | author = {Korbinian Strimmer}, 25 | year = {2014}, 26 | note = {R package version 1.0.0}, 27 | url = {http://CRAN.R-project.org/package=crossval}, 28 | } 29 | @MANUAL{devtools, 30 | title = {devtools: {T}ools to make developing {R} code easier}, 31 | author = {Hadley Wickham and Winston Chang}, 32 | year = {2014}, 33 | note = {R package version 1.5}, 34 | url = {http://CRAN.R-project.org/package=devtools}, 35 | } 36 | @MANUAL{MALDIquantForeign, 37 | title = {{MALDI}quant{F}oreign: {I}mport/{E}xport routines for {MALDI}quant}, 38 | author = {Sebastian Gibb}, 39 | year = {2014}, 40 | note = {R package version 0.7}, 41 | url = {http://strimmerlab.org/software/maldiquant/ 42 | https://github.com/sgibb/MALDIquantForeign/}, 43 | } 44 | @ARTICLE{sda, 45 | author = {M. Ahdesm\"aki and K. Strimmer}, 46 | journal = {The Annals of Applied Statistics}, 47 | month = {Mar}, 48 | number = {1}, 49 | pages = {503--519}, 50 | publisher = {The Institute of Mathematical Statistics}, 51 | title = {Feature selection in omics prediction problems 52 | using cat scores and false nondiscovery rate control}, 53 | url = {http://dx.doi.org/10.1214/09-AOAS277}, 54 | doi = {10.1214/09-AOAS277}, 55 | volume = {4}, 56 | year = {2010} 57 | } 58 | @ARTICLE{Fiedler2009, 59 | author = {G. M. Fiedler and A. B. Leichtle and J. Kase 60 | and S. Baumann and U. Ceglarek and K. Felix and T. Conrad 61 | and H. Witzigmann and A. Weimann and C. Schütte and 62 | J. Hauss and M. B\"uchler and J. Thiery}, 63 | title = {Serum peptidome profiling revealed platelet factor 4 as a potential 64 | discriminating Peptide associated with pancreatic cancer.}, 65 | journal = {Clinical Cancer Research}, 66 | year = {2009}, 67 | volume = {15}, 68 | pages = {3812--3819}, 69 | month = {Jun}, 70 | doi = {10.1158/1078-0432.CCR-08-2701}, 71 | url = {http://dx.doi.org/10.1158/1078-0432.CCR-08-2701} 72 | } 73 | @ARTICLE{Savitzky1964, 74 | author = {A. Savitzky and M. J. E. Golay}, 75 | title = {{S}moothing and {D}ifferentiation of {D}ata by {S}implified {L}east 76 | {S}quares {P}rocedures.}, 77 | journal = {Analytical Chemistry}, 78 | year = {1964}, 79 | volume = {36}, 80 | pages = {1627--1639}, 81 | doi = {10.1021/ac60214a047}, 82 | url = {http://pubs.acs.org/doi/abs/10.1021/ac60214a047} 83 | } 84 | @ARTICLE{Ryan1988, 85 | author = {C. G. Ryan and E. Clayton and W. L. Griffin and S. H. Sie and D. R. Cousens}, 86 | title = {{SNIP}, a statistics-sensitive background treatment for the quantitative 87 | analysis of {PIXE} spectra in geoscience applications}, 88 | journal = {Nuclear Instruments and Methods in Physics Research Section B: Beam 89 | Interactions with Materials and Atoms}, 90 | year = {1988}, 91 | volume = {34}, 92 | pages = {396--402}, 93 | doi = {doi: 10.1016/0168-583X(88)90063-8}, 94 | url = {http://www.sciencedirect.com/science/article/B6TJN-46YSYTJ-30/2/e0d015ceb8ea8a7bc0702a857a19750b} 95 | } 96 | @ARTICLE{Bromba1981, 97 | author = {Bromba, Manfred U. A. and Ziegler, Horst}, 98 | title = {Application hints for Savitzky-Golay digital smoothing filters}, 99 | journal = {Analytical Chemistry}, 100 | year = {1981}, 101 | volume = {53}, 102 | pages = {1583--1586}, 103 | number = {11}, 104 | doi = {10.1021/ac00234a011}, 105 | eprint = {http://pubs.acs.org/doi/pdf/10.1021/ac00234a011}, 106 | url = {http://pubs.acs.org/doi/abs/10.1021/ac00234a011} 107 | } 108 | @MANUAL{pvclust, 109 | title = {pvclust: {H}ierarchical {C}lustering with {P}-{V}alues via 110 | {M}ultiscale {B}ootstrap {R}esampling}, 111 | author = {Ryota Suzuki and Hidetoshi Shimodaira}, 112 | year = {2011}, 113 | note = {R package version 1.2-2}, 114 | url = {http://CRAN.R-project.org/package=pvclust}, 115 | } 116 | -------------------------------------------------------------------------------- /vignettes/fiedler2009.Rnw: -------------------------------------------------------------------------------- 1 | %\VignetteEngine{knitr} 2 | %\VignetteIndexEntry{Analysis of Fiedler et al. 2009} 3 | %\VignetteKeywords{Bioinformatics, Proteomics, Mass Spectrometry} 4 | %\VignettePackage{fiedler2009} 5 | 6 | \documentclass[12pt]{article} 7 | 8 | \input{utils} 9 | 10 | \title{Analysis of Fiedler et al. 2009 using \Mq{} } 11 | 12 | \author{ 13 | Sebastian Gibb% 14 | \thanks{\email{mail@sebastiangibb.de}} 15 | } 16 | \date{\today} 17 | 18 | \begin{document} 19 | 20 | <>= 21 | library("knitr") 22 | opts_chunk$set(width=40, tidy.opts=list(width.cutoff=45), tidy=FALSE, 23 | fig.path=file.path("figures", "fiedler2009/"), 24 | fig.align="center", fig.height=4.25, comment=NA, prompt=FALSE) 25 | @ 26 | 27 | \maketitle 28 | 29 | \begin{abstract} 30 | This vignette describes the analysis of the MALDI-TOF spectra described in 31 | \citet{Fiedler2009} using \Mq{}\\ 32 | \end{abstract} 33 | 34 | \tableofcontents 35 | 36 | \clearpage 37 | 38 | \input{foreword} 39 | 40 | \input{vignettes} 41 | 42 | \section{Setup} 43 | 44 | Before any analysis we need to install the necessary packages 45 | (you can skip this part if you have already done this). 46 | You can install \Mq{} \citep{MALDIquant}, 47 | \MqF{} \citep{MALDIquantForeign}, \Rpackage{sda} 48 | \citep{sda} and \Rpackage{crossval} \citep{crossval} directly from \CRAN{}. 49 | To install this data package from 50 | \url{http://github.com/sgibb/MALDIquantExamples} 51 | you need the \Rpackage{devtools} \citep{devtools} package. 52 | <>= 53 | install.packages(c("MALDIquant", "MALDIquantForeign", 54 | "sda", "crossval", "devtools")) 55 | library("devtools") 56 | install_github("sgibb/MALDIquantExamples") 57 | @ 58 | 59 | \section{Dataset} 60 | 61 | In this vignette we use the dataset described in \citet{Fiedler2009}. Please 62 | contact the authors directly if you want to use the dataset in your own 63 | analysis. 64 | 65 | This dataset contains 480 MALDI-TOF mass spectra from blood sera of 60 patients 66 | and 60 healthy controls (each sample has four technical replicates). 67 | 68 | It is divided in three set: 69 | \begin{enumerate} 70 | \item \emph{Discovery Set A}: 20 patients with pancreatic cancer and 20 71 | healthy patients from the University Hospital Leipzig. 72 | \item \emph{Discovery Set B}: 20 patients with pancreatic cancer and 20 73 | healthy patients from the University Hospital Heidelberg. 74 | \item \emph{Discovery Set C}: 20 patients with pancreatic cancer and 20 75 | healthy patients from the University Hospital Leipzig (half resolution). 76 | \end{enumerate} 77 | 78 | Both discovery sets \emph{A} and \emph{B} were measured on the same target 79 | (batch). The validation set \emph{C} was measured a few months later. 80 | 81 | Please see \citet{Fiedler2009} for details. 82 | 83 | \section{Analysis} 84 | 85 | First we have to load the packages. 86 | <>= 87 | suppressPackageStartupMessages(library("MALDIquantExamples")) 88 | suppressPackageStartupMessages(library("xtable")) 89 | @ 90 | <>= 91 | ## the main MALDIquant package 92 | library("MALDIquant") 93 | ## the import/export routines for MALDIquant 94 | library("MALDIquantForeign") 95 | 96 | ## example data 97 | library("MALDIquantExamples") 98 | @ 99 | 100 | \subsection{Import Raw Data} 101 | 102 | We use the \Rfunction{getPathFiedler2009} function to get 103 | the correct file path to the spectra and the metadata file respectively. 104 | <>= 105 | ## import the spectra 106 | spectra <- import(getPathFiedler2009()["spectra"], 107 | verbose=FALSE) 108 | 109 | ## import metadata 110 | spectra.info <- read.table(getPathFiedler2009()["info"], 111 | sep=",", header=TRUE) 112 | @ 113 | 114 | Because of heavy batch effects between the two hospitals we consider only the 115 | data collected in the University Hospital Heidelberg. 116 | 117 | <>= 118 | isHeidelberg <- spectra.info$location == "heidelberg" 119 | 120 | spectra <- spectra[isHeidelberg] 121 | spectra.info <- spectra.info[isHeidelberg,] 122 | @ 123 | 124 | We do a basic quality control and test whether all spectra contain the same 125 | number of data points and are not empty. 126 | 127 | \subsection{Quality Control} 128 | 129 | <>= 130 | table(sapply(spectra, length)) 131 | any(sapply(spectra, isEmpty)) 132 | all(sapply(spectra, isRegular)) 133 | @ 134 | 135 | Subsequently we ensure that all spectra have the same mass range. 136 | <>= 137 | spectra <- trim(spectra) 138 | @ 139 | 140 | Finally we draw some plots and inspect the spectra visually. 141 | 142 | <>= 143 | set.seed(123) 144 | @ 145 | <>= 146 | idx <- sample(length(spectra), size=2) 147 | plot(spectra[[idx[1]]]) 148 | plot(spectra[[idx[2]]]) 149 | @ 150 | 151 | \subsection{Transformation and Smoothing} 152 | 153 | We apply the square root transformation to simplify graphical visualization 154 | and to overcome the potential dependency of the variance from the mean. 155 | 156 | <>= 157 | spectra <- transformIntensity(spectra, method="sqrt") 158 | @ 159 | 160 | In the next step we use a 41 point \emph{Savitzky-Golay}-Filter 161 | \citep{Savitzky1964} to smooth the spectra. 162 | <>= 163 | spectra <- smoothIntensity(spectra, method="SavitzkyGolay", 164 | halfWindowSize=20) 165 | @ 166 | 167 | \subsection{Baseline Correction} 168 | 169 | Matrix effects and chemical noise results in some background noise. That's why 170 | we have to apply a baseline correction. In this example we use the 171 | \emph{SNIP} algorithm \citep{Ryan1988} to correct the baseline. 172 | 173 | <>= 174 | baseline <- estimateBaseline(spectra[[1]], method="SNIP", 175 | iterations=150) 176 | plot(spectra[[1]]) 177 | lines(baseline, col="red", lwd=2) 178 | @ 179 | 180 | <>= 181 | spectra <- removeBaseline(spectra, method="SNIP", 182 | iterations=150) 183 | plot(spectra[[1]]) 184 | @ 185 | 186 | \subsection{Intensity Calibration} 187 | 188 | We perform the \emph{Total-Ion-Current}-calibration (TIC; often called 189 | normalization) to equalize the intensities across spectra. 190 | 191 | <>= 192 | spectra <- calibrateIntensity(spectra, method="TIC") 193 | @ 194 | 195 | \subsection{Alignment} 196 | 197 | Next we need to (re)calibrate the mass values. Our alignment procedure is a peak 198 | based warping algorithm. \Mq{} offers \Rfunction{alignSpectra} as a wrapper 199 | around more complicated functions. If you need a finer control or want to 200 | investigate the impact of different parameters please use 201 | \Rfunction{determineWarpingFunctions} instead (see 202 | \Rfunction{?determineWarpingFunctions} for details). 203 | 204 | <>= 205 | spectra <- alignSpectra(spectra) 206 | @ 207 | 208 | We average the technical replicates before we look for peaks and adjust 209 | our metadata table accordingly. 210 | 211 | <>= 212 | avgSpectra <- 213 | averageMassSpectra(spectra, labels=spectra.info$patientID) 214 | avgSpectra.info <- 215 | spectra.info[!duplicated(spectra.info$patientID), ] 216 | @ 217 | 218 | \subsection{Peak Detection} 219 | 220 | The peak detection is the crucial feature reduction step. Before performing the 221 | peak detection we estimate the noise of some spectra to get a feeling for the 222 | \emph{signal-to-noise ratio} (SNR). 223 | <>= 224 | noise <- estimateNoise(avgSpectra[[1]]) 225 | plot(avgSpectra[[1]], xlim=c(4000, 5000), ylim=c(0, 0.002)) 226 | lines(noise, col="red") # SNR == 1 227 | lines(noise[, 1], 2*noise[, 2], col="blue") # SNR == 2 228 | @ 229 | 230 | In this case we decide to set a \emph{SNR} of 2 (blue line). 231 | 232 | <>= 233 | peaks <- detectPeaks(avgSpectra, SNR=2, halfWindowSize=20) 234 | @ 235 | 236 | <>= 237 | plot(avgSpectra[[1]], xlim=c(4000, 5000), ylim=c(0, 0.002)) 238 | points(peaks[[1]], col="red", pch=4) 239 | @ 240 | 241 | \subsection{Post Processing} 242 | 243 | After the alignment the peak positions (mass) are very similar but not 244 | identical. The binning is needed to make similar peak mass values identical. 245 | 246 | <>= 247 | peaks <- binPeaks(peaks) 248 | @ 249 | 250 | We choose a very low signal-to-noise ratio to keep as much features as possible. 251 | To remove some false positive peaks we remove peaks that appear in less than 50 252 | \% of all spectra in each group. 253 | 254 | <>= 255 | peaks <- filterPeaks(peaks, minFrequency=c(0.5, 0.5), 256 | labels=avgSpectra.info$health, 257 | mergeWhitelists=TRUE) 258 | @ 259 | 260 | Finally we create the feature matrix and label the rows with the corresponding 261 | patient ID. 262 | 263 | <>= 264 | featureMatrix <- intensityMatrix(peaks, avgSpectra) 265 | rownames(featureMatrix) <- avgSpectra.info$patientID 266 | @ 267 | 268 | \subsection{Diagonal Discriminant Analysis} 269 | 270 | We finish the \Mq{} preprocessing and use the 271 | \emph{diagonal discriminant analysis} (DDA) 272 | function of \Rpackage{sda} \citep{sda} to find the most important peaks. 273 | 274 | <>= 275 | library("sda") 276 | Xtrain <- featureMatrix 277 | Ytrain <- avgSpectra.info$health 278 | ddar <- sda.ranking(Xtrain=featureMatrix, L=Ytrain, fdr=FALSE, 279 | diagonal=TRUE) 280 | @ 281 | <>= 282 | xtable(ddar[1:10, ], booktabs=TRUE) 283 | @ 284 | 285 | \subsection{Hierarchical Clustering} 286 | 287 | To visualize the results without any feature selection by \emph{DDA} we apply a 288 | hierarchical cluster analysis based on the euclidean distance. 289 | 290 | <>= 291 | distanceMatrix <- dist(featureMatrix, method="euclidean") 292 | 293 | hClust <- hclust(distanceMatrix, method="complete") 294 | 295 | plot(hClust, hang=-1) 296 | @ 297 | 298 | Next we use only the 2 top peaks selected in the \emph{DDA} and we get a 299 | nearly perfect split between the cancer and control group. 300 | 301 | <>= 302 | top <- ddar[1:2, "idx"] 303 | 304 | distanceMatrixTop <- dist(featureMatrix[, top], 305 | method="euclidean") 306 | 307 | hClustTop <- hclust(distanceMatrixTop, method="complete") 308 | 309 | plot(hClustTop, hang=-1) 310 | @ 311 | 312 | \subsection{Cross Validation} 313 | 314 | Subsequently we use the \Rpackage{crossval} \citep{crossval} package to perform 315 | a 10-fold cross validation of these two selected peaks. 316 | 317 | <>= 318 | library("crossval") 319 | # create a prediction function for the cross validation 320 | predfun.dda <- function(Xtrain, Ytrain, Xtest, Ytest, 321 | negative) { 322 | dda.fit <- sda(Xtrain, Ytrain, diagonal=TRUE, verbose=FALSE) 323 | ynew <- predict(dda.fit, Xtest, verbose=FALSE)$class 324 | return(confusionMatrix(Ytest, ynew, negative=negative)) 325 | } 326 | 327 | # set seed to get reproducible results 328 | set.seed(1234) 329 | 330 | cv.out <- crossval(predfun.dda, 331 | X=featureMatrix[, top], 332 | Y=avgSpectra.info$health, 333 | K=10, B=20, 334 | negative="control", 335 | verbose=FALSE) 336 | diagnosticErrors(cv.out$stat) 337 | @ 338 | 339 | \subsection{Summary} 340 | 341 | We found the peaks \textit{m/z} 8937 and 4467 as important features for the 342 | discrimination between the cancer and control group. 343 | 344 | \section{Session Information} 345 | <>= 346 | toLatex(sessionInfo(), locale=FALSE) 347 | @ 348 | 349 | \bibliographystyle{apalike} 350 | \bibliography{bibliography} 351 | 352 | \end{document} 353 | -------------------------------------------------------------------------------- /vignettes/foreword.tex: -------------------------------------------------------------------------------- 1 | \section{Foreword} 2 | 3 | \Mq{} is free and open source software for the \R{} \citep{RPROJECT} 4 | environment and under active development. 5 | If you use it, please support the project by citing it in publications: 6 | 7 | \begin{quote} 8 | \bibentry{MALDIquant} 9 | \end{quote} 10 | 11 | If you have any questions, bugs, or suggestions do not hesitate to contact 12 | me (\email{mail@sebastiangibb.de}). \\ 13 | Please visit \url{http://strimmerlab.org/software/maldiquant/}. 14 | 15 | -------------------------------------------------------------------------------- /vignettes/nyakas2013.Rnw: -------------------------------------------------------------------------------- 1 | %\VignetteEngine{knitr} 2 | %\VignetteIndexEntry{Mass Spectrometry Imaging using MALDIquant} 3 | %\VignetteKeywords{Bioinformatics, Proteomics, Mass Spectrometry} 4 | %\VignettePackage{nyakas2013} 5 | 6 | \documentclass[12pt]{article} 7 | 8 | \input{utils} 9 | 10 | \title{\MSI{} using \Mq{} } 11 | 12 | \author{ 13 | Sebastian Gibb% 14 | \thanks{\email{mail@sebastiangibb.de}} 15 | } 16 | \date{\today} 17 | 18 | \begin{document} 19 | 20 | <>= 21 | library("knitr") 22 | opts_chunk$set(width=40, tidy.opts=list(width.cutoff=45), tidy=FALSE, 23 | fig.path=file.path("figures", "nyakas2013/"), 24 | fig.align="center", fig.height=4.25, comment=NA, prompt=FALSE) 25 | @ 26 | 27 | \maketitle 28 | 29 | \begin{abstract} 30 | This vignette describes the analysis of \MSI{} data using \Mq{}\\ 31 | \end{abstract} 32 | 33 | \tableofcontents 34 | 35 | \clearpage 36 | 37 | \input{foreword} 38 | 39 | \input{vignettes} 40 | 41 | \section{Setup} 42 | 43 | Before any analysis we need to install the necessary packages 44 | (you can skip this part if you have already done this). 45 | You can install \Mq{} \citep{MALDIquant}, 46 | \MqF{} \citep{MALDIquantForeign} directly from \CRAN{}. 47 | To install this data package from 48 | \url{http://github.com/sgibb/MALDIquantExamples} 49 | you need the \Rpackage{devtools} \citep{devtools} package. 50 | <>= 51 | install.packages(c("MALDIquant", "MALDIquantForeign", 52 | "devtools")) 53 | library("devtools") 54 | install_github("sgibb/MALDIquantExamples") 55 | @ 56 | 57 | \section{Dataset} 58 | 59 | The dataset we use in this vignette was kindly provided by 60 | Dr. Adrien Nyakas (\email{adrien.nyakas@dcb.unibe.ch}; 61 | \url{http://dx.doi.org/10.6084/m9.figshare.735961}). 62 | It contains 2222 MALDI-TOF spectra (coordinates: (29, 61) to (101, 98)) 63 | of a mouse kidney. 64 | 65 | \section{Analysis} 66 | 67 | First we have to load the packages. 68 | <>= 69 | suppressPackageStartupMessages(library("MALDIquantExamples")) 70 | @ 71 | <>= 72 | ## the main MALDIquant package 73 | library("MALDIquant") 74 | ## the import/export routines for MALDIquant 75 | library("MALDIquantForeign") 76 | 77 | ## example data 78 | library("MALDIquantExamples") 79 | @ 80 | 81 | \subsection{Import Raw Data} 82 | 83 | Next we use the \Rfunction{getPathNyakas2013} function to get 84 | the correct file path of our example data and import them into \R{}. 85 | <>= 86 | ## import the spectra 87 | spectra <- import(getPathNyakas2013(), verbose=FALSE) 88 | @ 89 | 90 | \subsection{Preprocessing} 91 | 92 | The complete preprocessing is very similar to the default worflow for mass 93 | spectrometry data. Please find a more detailed description in the vignette 94 | \href{http://cran.r-project.org/web/packages/MALDIquant/vignettes/MALDIquant-intro.pdf}{MALDIquant 95 | Introduction}. 96 | 97 | <>= 98 | spectra <- transformIntensity(spectra, method="sqrt") 99 | spectra <- smoothIntensity(spectra, method="SavitzkyGolay", 100 | halfWindowSize=10) 101 | spectra <- removeBaseline(spectra, method="SNIP", 102 | iterations=10) 103 | spectra <- calibrateIntensity(spectra, method="TIC") 104 | @ 105 | 106 | \subsection{Average Spectrum} 107 | 108 | After a basic preprocessing of all spectra we produce a mean spectrum and run a 109 | peak detection on it to find regions of interest. 110 | 111 | <>= 112 | meanSpectrum <- averageMassSpectra(spectra) 113 | 114 | roi <- detectPeaks(meanSpectrum, SNR=4, 115 | halfWindowSize=10) 116 | 117 | plot(meanSpectrum, main="Mean Spectrum") 118 | points(roi, col="red") 119 | @ 120 | 121 | \subsection{Plotting Slices} 122 | 123 | We want to plot a mass spectrometry image slice around the highest peak in our 124 | mean spectrum. 125 | 126 | <>= 127 | ## find order of peak intensities 128 | o <- order(intensity(roi), decreasing=TRUE) 129 | 130 | ## plot MSI slice for the highest one 131 | plotMsiSlice(spectra, center=mass(roi)[o[1]], tolerance=0.5) 132 | @ 133 | 134 | We could plot multiple slices as well. 135 | <>= 136 | plotMsiSlice(spectra, center=mass(roi)[o[2:3]], tolerance=0.5) 137 | @ 138 | 139 | Another possibility would be to combine these regions of interest in one plot. 140 | <>= 141 | plotMsiSlice(spectra, center=mass(roi)[o[1:2]], tolerance=0.5, 142 | combine=TRUE, 143 | colRamp=list(colorRamp(c("#000000", "#FF00FF")), 144 | colorRamp(c("#000000", "#00FF00")))) 145 | @ 146 | 147 | \subsection{Working with slices/coordinates} 148 | 149 | Sometimes the slices should be processed further. For this purposes 150 | \Rfunction{msiSlices} generates an array with the dimensions x coordinates, y 151 | coordinates and center mass. 152 | <>= 153 | slices <- msiSlices(spectra, center=mass(roi), tolerance=0.5) 154 | attributes(slices) 155 | @ 156 | 157 | Via the \Rfunction{coordinates} method we get the pixel coordinates of our 158 | spectra. Use the argument \Rfunction{adjust} to set the minimal values to 1. 159 | <>= 160 | head(coordinates(spectra)) 161 | head(coordinates(spectra, adjust=TRUE)) 162 | @ 163 | 164 | \subsection{Clustering} 165 | 166 | While we could highlight some mass values in our slices we sometimes want to do 167 | some clustering to get information about the spatial order. 168 | 169 | Therefore we build a peak intensity matrix first. 170 | <>= 171 | peaks <- detectPeaks(spectra, SNR=3, 172 | halfWindowSize=10) 173 | peaks <- binPeaks(peaks) 174 | intMatrix <- intensityMatrix(peaks, spectra) 175 | @ 176 | 177 | Subsequently we run a \Rfunction{kmeans} clustering with 3 centers. We choose 3 178 | centers because the kidney is divided in 2 main anatomical parts, the renal 179 | cortex (the outer part) and the renal medulla (the inner part, containing the 180 | renal pyramids). 181 | <>= 182 | km <- kmeans(intMatrix, centers=2) 183 | @ 184 | 185 | For visualisation we create a new matrix and replace each coordinate by its 186 | cluster number. 187 | <>= 188 | coord <- coordinates(spectra, adjust=TRUE) 189 | maxPixels <- apply(coord, MARGIN=2, FUN=max) 190 | m <- matrix(NA, nrow=maxPixels["x"], ncol=maxPixels["y"]) 191 | m[coord] <- km$cluster 192 | @ 193 | 194 | In the following step we use the \Rfunction{plotMsiSlice} function again to plot 195 | our cluster matrix. Now we use the argument \Rfunction{scale=FALSE} to avoid the 196 | scaling to values between 0 and 1. Also we provide an own \Rfunction{colRamp} 197 | function that returns red or green for the clusters 1, and 2 respectively 198 | (must generated the same matrix output as \Rfunction{graphics::colorRamp}). 199 | <>= 200 | rgbCluster <- function(x) { 201 | col <- matrix(c(255, 0, 0, 202 | 0, 255, 0), nrow=2, byrow=TRUE) 203 | col[x, ] 204 | } 205 | plotMsiSlice(m, colRamp=rgbCluster, scale=FALSE) 206 | @ 207 | 208 | Please note that the base \Rfunction{kmeans} doesn't respect any spatial 209 | information. 210 | 211 | \subsection{Summary} 212 | 213 | While the default \MSI{} workflow is very similar to the default profile spectra 214 | workflow (and could be found in a detailed discussion in the other \Mq{} 215 | vignettes) we demonstrate typical \MSI{} functions like plotting slices and 216 | clustering data. 217 | 218 | \section{Session Information} 219 | <>= 220 | toLatex(sessionInfo(), locale=FALSE) 221 | @ 222 | 223 | \bibliographystyle{apalike} 224 | \bibliography{bibliography} 225 | 226 | \end{document} 227 | -------------------------------------------------------------------------------- /vignettes/species.Rnw: -------------------------------------------------------------------------------- 1 | %\VignetteEngine{knitr} 2 | %\VignetteIndexEntry{Species Identification using MALDIquant} 3 | %\VignetteKeywords{Bioinformatics, Proteomics, Mass Spectrometry} 4 | %\VignettePackage{species} 5 | 6 | \documentclass[12pt]{article} 7 | 8 | \input{utils} 9 | 10 | \title{Species Identification using \Mq{} } 11 | 12 | \author{ 13 | Sebastian Gibb\thanks{\email{mail@sebastiangibb.de}} {} and Korbinian Strimmer 14 | \thanks{\email{k.strimmer@imperial.ac.uk}} 15 | % 16 | } 17 | 18 | \date{\today} 19 | 20 | \begin{document} 21 | 22 | <>= 23 | library("knitr") 24 | opts_chunk$set(width=40, tidy.opts=list(width.cutoff=45), tidy=FALSE, 25 | fig.path=file.path("figures", "species/"), 26 | fig.align="center", fig.height=4.25, comment=NA, prompt=FALSE) 27 | @ 28 | 29 | \maketitle 30 | 31 | \begin{abstract} 32 | This vignette describes how to use \Mq{} for species identification. 33 | \end{abstract} 34 | 35 | \tableofcontents 36 | 37 | \clearpage 38 | 39 | \input{foreword} 40 | 41 | \input{vignettes} 42 | 43 | \section{Setup} 44 | 45 | Before any analysis we need to install the necessary packages 46 | (you can skip this part if you have already done this). 47 | You can install \Mq{} \citep{MALDIquant}, 48 | \MqF{} \citep{MALDIquantForeign}, \Rpackage{sda} 49 | \citep{sda} and \Rpackage{crossval} \citep{crossval} directly from \CRAN{}. 50 | To install this data package from 51 | \url{http://github.com/sgibb/MALDIquantExamples} 52 | you need the \Rpackage{devtools} \citep{devtools} package. 53 | <>= 54 | install.packages(c("MALDIquant", "MALDIquantForeign", 55 | "sda", "crossval", "devtools")) 56 | library("devtools") 57 | install_github("sgibb/MALDIquantExamples") 58 | @ 59 | 60 | \section{Dataset} 61 | 62 | The dataset we use in this vignette was kindly provided by 63 | Dr. Bryan R. Thoma (\email{bryanthoma@yahoo.com}). 64 | It contains spectra of four different bacteria species. 65 | Each species is represented by eight individual samples and each sample has 66 | three technical replicates. 67 | 68 | \section{Analysis} 69 | 70 | First we have to load the packages. 71 | <>= 72 | suppressPackageStartupMessages(library("MALDIquantExamples")) 73 | suppressPackageStartupMessages(library("xtable")) 74 | @ 75 | <>= 76 | ## the main MALDIquant package 77 | library("MALDIquant") 78 | ## the import/export routines for MALDIquant 79 | library("MALDIquantForeign") 80 | 81 | ## example data 82 | library("MALDIquantExamples") 83 | @ 84 | 85 | \subsection{Import Raw Data} 86 | 87 | We use the \Rfunction{getPathSpecies} function to get 88 | the correct local file path to the spectra. 89 | <>= 90 | spectra <- import(getPathSpecies(), verbose=FALSE) 91 | @ 92 | 93 | We do a basic quality control and test whether all spectra contain the same 94 | number of data points and are not empty. 95 | 96 | \subsection{Quality Control} 97 | 98 | <>= 99 | table(sapply(spectra, length)) 100 | any(sapply(spectra, isEmpty)) 101 | all(sapply(spectra, isRegular)) 102 | @ 103 | 104 | Subsequently we ensure that all spectra have the same mass range. 105 | <>= 106 | spectra <- trim(spectra) 107 | @ 108 | 109 | Finally we draw some plots and inspect the spectra visually. 110 | 111 | <>= 112 | set.seed(123) 113 | @ 114 | <>= 115 | idx <- sample(length(spectra), size=2) 116 | plot(spectra[[idx[1]]]) 117 | plot(spectra[[idx[2]]]) 118 | @ 119 | 120 | \subsection{Transformation and Smoothing} 121 | \label{subsec:sm} 122 | 123 | We apply the square root transformation to simplify graphical visualization 124 | and to overcome the potential dependency of the variance from the mean. 125 | 126 | <>= 127 | spectra <- transformIntensity(spectra, method="sqrt") 128 | @ 129 | 130 | In the next step we want to smooth our spectra with the 131 | \emph{Savitzky-Golay}-Filter \citep{Savitzky1964}. According to 132 | \citet{Bromba1981} the best \Rcode{halfWindowSize} should be smaller than the 133 | \emph{FWHM} (full width at half maximum) of the peaks. 134 | We add the argument \Rcode{type="b"} to the \Rfunction{plot} command to show both 135 | lines and data points in our plots. We count the data points in a few different 136 | regions of some spectra to estimate the average \emph{FWHM} (of course this is 137 | not the most sophisticated method). In the figure below we consider all points 138 | above the dashed blue line and get a \emph{FWHM} around 10-12 data points. We 139 | choose \Rcode{halfWindowSize=10}. 140 | <>= 141 | plot(spectra[[1]], type="b", 142 | xlim=c(2235.3, 2252.0), ylim=c(45, 100)) 143 | abline(h=72, col=4, lty=2) 144 | plot(spectra[[1]], type="b", 145 | xlim=c(11220, 11250), ylim=c(24, 40)) 146 | abline(h=32, col=4, lty=2) 147 | @ 148 | 149 | Afterwards we apply a 21 (\Rcode{2*halfWindowSize+1}) point 150 | \emph{Savitzky-Golay}-Filter \citep{Savitzky1964} to smooth the spectra. 151 | <>= 152 | spectra <- smoothIntensity(spectra, method="SavitzkyGolay", 153 | halfWindowSize=10) 154 | @ 155 | 156 | \subsection{Baseline Correction} 157 | \label{subsec:bc} 158 | 159 | Matrix effects and chemical noise results in some background noise. That's why 160 | we have to apply a baseline correction. In this example we use the 161 | \emph{SNIP} algorithm \citep{Ryan1988} to correct the baseline. 162 | 163 | Similar to the problem of the \Rcode{halfWindowSize} in section \ref{subsec:sm} 164 | we need to choose a \Rcode{halfWindowSize} respectively number of 165 | \Rcode{iterations} for the baseline correction algorithm as well. The baseline 166 | should be flexible enough to follow trends but must not reduce the high of the 167 | peaks. We simply try a few different numbers of \Rcode{iterations}. 168 | <>= 169 | ## define iteration steps: 25, 50, ..., 100 170 | iterations <- seq(from=25, to=100, by=25) 171 | ## define different colors for each step 172 | col <- rainbow(length(iterations)) 173 | 174 | plot(spectra[[1]], xlim=c(2000, 12000)) 175 | 176 | ## draw different baseline estimates 177 | for (i in seq(along=iterations)) { 178 | baseline <- estimateBaseline(spectra[[1]], method="SNIP", 179 | iterations=iterations[i]) 180 | lines(baseline, col=col[i], lwd=2) 181 | } 182 | 183 | legend("topright", legend=iterations, col=col, lwd=1) 184 | @ 185 | 186 | 25 \Rcode{iterations} are already very flexible but 50 is not flexible enough 187 | and the height of the peaks is not reduced very much. 188 | So we choose \Rcode{iterations=25} for the baseline removal. 189 | 190 | <>= 191 | spectra <- removeBaseline(spectra, method="SNIP", 192 | iterations=25) 193 | plot(spectra[[1]]) 194 | @ 195 | 196 | \subsection{Intensity Calibration} 197 | 198 | We perform the \emph{Total-Ion-Current}-calibration (TIC; often called 199 | normalization) to equalize the intensities across spectra. 200 | 201 | <>= 202 | spectra <- calibrateIntensity(spectra, method="TIC") 203 | @ 204 | 205 | \subsection{Alignment} 206 | \label{subsec:pa} 207 | 208 | Next we need to (re)calibrate the mass values. Our alignment procedure is a peak 209 | based warping algorithm. \Mq{} offers \Rfunction{alignSpectra} as a wrapper 210 | around more complicated functions. If you need a finer control or want to 211 | investigate the impact of different parameters please use 212 | \Rfunction{determineWarpingFunctions} instead (see 213 | \Rfunction{?determineWarpingFunctions} for details). 214 | 215 | <>= 216 | spectra <- alignSpectra(spectra) 217 | @ 218 | 219 | We want to average the technical replicates before we are looking for peaks. Our 220 | spectra are recorded thrice for each spot. That's why we average each spot. We 221 | get the spot information using the \Rfunction{metaData} method. 222 | 223 | <>= 224 | metaData(spectra[[1]])$spot 225 | @ 226 | 227 | We collect all spots with a \Rfunction{sapply} call (to loop over all spectra) 228 | and use this information to create our average spectra. Because some species are 229 | measured in different runs on the same spot location we also add the species 230 | name to average only corresponding spectra. 231 | 232 | <>= 233 | spots <- sapply(spectra, function(x)metaData(x)$spot) 234 | species <- sapply(spectra, function(x)metaData(x)$sampleName) 235 | head(spots) 236 | head(species) 237 | @ 238 | 239 | <>= 240 | avgSpectra <- 241 | averageMassSpectra(spectra, labels=paste0(species, spots)) 242 | @ 243 | 244 | \subsection{Peak Detection} 245 | 246 | The peak detection is the crucial feature reduction step. Before performing the 247 | peak detection we need to estimate the noise of some spectra to get a feeling 248 | for the \emph{signal-to-noise ratio} (SNR). We use a similar approach as in 249 | section \ref{subsec:bc}. 250 | <>= 251 | ## define snrs steps: 1, 1.5, ... 2.5 252 | snrs <- seq(from=1, to=2.5, by=0.5) 253 | ## define different colors for each step 254 | col <- rainbow(length(snrs)) 255 | 256 | ## estimate noise 257 | noise <- estimateNoise(avgSpectra[[1]], 258 | method="SuperSmoother") 259 | 260 | plot(avgSpectra[[1]], 261 | xlim=c(6000, 16000), ylim=c(0, 0.0016)) 262 | 263 | for (i in seq(along=snrs)) { 264 | lines(noise[, "mass"], 265 | noise[, "intensity"]*snrs[i], 266 | col=col[i], lwd=2) 267 | } 268 | legend("topright", legend=snrs, col=col, lwd=1) 269 | @ 270 | 271 | 2 or 2.5 look like a good compromise between sensitivity and specificity. We 272 | prefer a higher sensitivity and choose a \emph{SNR} of 2 (blue line) for the 273 | peak detection. For the \Rcode{halfWindowSize} we use a similar value as 274 | determined in section \ref{subsec:sm}. 275 | 276 | <>= 277 | peaks <- detectPeaks(avgSpectra, SNR=2, halfWindowSize=10) 278 | @ 279 | 280 | <>= 281 | plot(avgSpectra[[1]], xlim=c(6000, 16000), ylim=c(0, 0.0016)) 282 | points(peaks[[1]], col="red", pch=4) 283 | @ 284 | 285 | \subsection{Post Processing} 286 | 287 | After the alignment the peak positions (mass) are very similar but not 288 | identical. The binning is needed to make similar peak mass values identical. 289 | 290 | <>= 291 | peaks <- binPeaks(peaks) 292 | @ 293 | 294 | We chose a very low signal-to-noise ratio to keep as much features as possible. 295 | To remove some false positive peaks we remove peaks that appear in less than 296 | 25~\% (because we have four groups) of all spectra. 297 | 298 | <>= 299 | peaks <- filterPeaks(peaks, minFrequency=0.25) 300 | @ 301 | 302 | Finally we create the feature matrix and label the rows with the corresponding 303 | species and spot name. We need to recollect both information because we reduce 304 | the number of spectra in the average step (see section \ref{subsec:pa}). 305 | 306 | <>= 307 | spots <- sapply(avgSpectra, function(x)metaData(x)$spot) 308 | species <- sapply(avgSpectra, function(x)metaData(x)$sampleName) 309 | species <- factor(species) # convert to factor 310 | # (needed later in crossval) 311 | @ 312 | 313 | <>= 314 | featureMatrix <- intensityMatrix(peaks, avgSpectra) 315 | rownames(featureMatrix) <- paste(species, spots, sep=".") 316 | @ 317 | 318 | \subsection{Clustering} 319 | 320 | Now we use the \Rpackage{pvclust} package \citep{pvclust} to apply a 321 | hierarchical clustering analysis with bootstrapping. 322 | 323 | <>= 324 | library("pvclust") 325 | pv <- pvclust(t(featureMatrix), 326 | method.hclust="ward.D2", 327 | method.dist="euclidean") 328 | plot(pv, print.num=FALSE) 329 | @ 330 | 331 | \subsection{Diagonal Discriminant Analysis} 332 | 333 | We finish our analysis using the \emph{diagonal discriminant analysis} (DDA) 334 | function of \Rpackage{sda} \citep{sda} to find the peaks that are typical 335 | for a specific species. 336 | 337 | <>= 338 | library("sda") 339 | ddar <- sda.ranking(Xtrain=featureMatrix, L=species, 340 | fdr=FALSE, diagonal=TRUE) 341 | plot(ddar) 342 | @ 343 | 344 | In the plot above we could see that the peak \textit{m/z} 9509 seems to be 345 | typical for \emph{species2}, \textit{m/z} 6343 for \emph{species4} and so on. 346 | 347 | \subsection{Linear Discriminant Analysis} 348 | 349 | We try the \emph{linear discriminant analysis} (LDA), too 350 | (it is part of \Rpackage{sda} \citep{sda} as well). 351 | 352 | <>= 353 | ldar <- sda.ranking(Xtrain=featureMatrix, L=species, 354 | fdr=FALSE, diagonal=FALSE) 355 | plot(ldar) 356 | @ 357 | 358 | \subsection{Variable Selection using Cross-Validation} 359 | 360 | In this section we want to apply cross-validation to find out, how many peaks 361 | and which ones we need to discriminate between the species. 362 | 363 | We use the package \Rpackage{crossval} \citep{crossval}. This package provides 364 | the \Rfunction{crossval} function which needs a specific prediction function. 365 | The prediction function combines the model creation, the prediction 366 | and the comparison between the true and the predicted results. 367 | 368 | <>= 369 | library("crossval") 370 | predfun <- function(Xtrain, Ytrain, Xtest, Ytest, 371 | numVars, diagonal=FALSE) { 372 | # estimate ranking and determine the best numVars variables 373 | ra <- sda.ranking(Xtrain, Ytrain, 374 | verbose=FALSE, diagonal=diagonal, fdr=FALSE) 375 | selVars <- ra[,"idx"][1:numVars] 376 | 377 | # fit and predict 378 | sda.out <- sda(Xtrain[, selVars, drop=FALSE], Ytrain, 379 | diagonal=diagonal, verbose=FALSE) 380 | ynew <- predict(sda.out, Xtest[, selVars, drop=FALSE], 381 | verbose=FALSE)$class 382 | 383 | # compute accuracy 384 | acc <- mean(Ytest == ynew) 385 | 386 | return(acc) 387 | } 388 | @ 389 | 390 | We want to repeat the cross-validation 20 times and use 5 folds. 391 | 392 | <>= 393 | K <- 5 # number of folds 394 | B <- 20 # number of repetitions 395 | @ 396 | 397 | To test our cross-validation setup we want to determine the performance of DDA 398 | using the top 10 features (peaks) ranked by $t$ scores. 399 | 400 | <>= 401 | set.seed(12345) 402 | cv.dda10 <- crossval(predfun, 403 | X=featureMatrix, Y=species, 404 | K=K, B=B, 405 | numVars=10, diagonal=FALSE, 406 | verbose=FALSE) 407 | cv.dda10$stat 408 | @ 409 | 410 | In the next step we look for the optimal number of peaks (which is more 411 | interesting than calculating the performance for the top 10 features). 412 | 413 | We calculate the performance of the top 1-15 (and all features) in a similar way 414 | as the top 10 features in the example above. 415 | 416 | <>= 417 | npeaks <- c(1:15, ncol(featureMatrix)) # number of peaks 418 | @ 419 | 420 | First we use DDA. 421 | 422 | <>= 423 | # estimate accuracy for DDA 424 | set.seed(12345) 425 | cvsim.dda <- sapply(npeaks, function(i) { 426 | cv <- crossval(predfun, 427 | X=featureMatrix, Y=species, 428 | K=K, B=B, numVars=i, diagonal=TRUE, 429 | verbose=FALSE) 430 | return(cv$stat) 431 | }) 432 | @ 433 | 434 | The same using LDA (the only difference is \Rcode{diagonal=FALSE}). 435 | 436 | <>= 437 | # estimate accuracy for LDA 438 | set.seed(12345) 439 | cvsim.lda <- sapply(npeaks, function(i) { 440 | cv <- crossval(predfun, 441 | X=featureMatrix, Y=species, 442 | K=K, B=B, numVars=i, diagonal=FALSE, 443 | verbose=FALSE) 444 | return(cv$stat) 445 | }) 446 | @ 447 | 448 | We combine the results and put them into a table. 449 | 450 | <>= 451 | result.sim <- cbind(nPeaks=npeaks, 452 | "DDA-ACC"=cvsim.dda, 453 | "LDA-ACC"=cvsim.lda) 454 | @ 455 | <>= 456 | xtable(result.sim, booktabs=TRUE, digits=c(0, 0, 3, 3)) 457 | @ 458 | 459 | We find out that LDA and DDA perform very similar 460 | and we need only 9 respectively 10 features (peaks) 461 | for a perfect discrimination of the species. 462 | 463 | \subsection{Summary} 464 | 465 | We have shown how to identify species based on MALDI spectra using \Mq{} 466 | and \Rpackage{pvclust}. Additionaly we performed a variable selection using 467 | \Rpackage{sda} and \Rpackage{crossval} to find the minimal number of peaks for a 468 | perfect discriminant. 469 | 470 | \section{Session Information} 471 | <>= 472 | toLatex(sessionInfo(), locale=FALSE) 473 | @ 474 | 475 | \bibliographystyle{apalike} 476 | \bibliography{bibliography} 477 | 478 | \end{document} 479 | -------------------------------------------------------------------------------- /vignettes/utils.tex: -------------------------------------------------------------------------------- 1 | \usepackage{natbib} 2 | \usepackage{hyperref} 3 | \usepackage{bibentry} % inline bibentries 4 | \nobibliography* % no special bibliography for bibentry 5 | 6 | % Fonts 7 | \usepackage{mathpazo} 8 | \usepackage[scaled]{helvet} 9 | \usepackage{microtype} 10 | 11 | \newcommand{\R}{\texttt{R}} 12 | \newcommand{\CRAN}{\texttt{CRAN}} 13 | \newcommand{\Rfunction}[1]{{\texttt{#1}}} 14 | \newcommand{\Robject}[1]{{\texttt{#1}}} 15 | \newcommand{\Rpackage}[1]{{\texttt{#1}}} 16 | \newcommand{\Rcode}[1]{{\texttt{#1}}} 17 | \newcommand{\Mq}{\Rpackage{MALDIquant}} 18 | \newcommand{\MqF}{\Rpackage{MALDIquantForeign}} 19 | \newcommand{\MqE}{\Rpackage{MALDIquantExamples}} 20 | \newcommand{\email}[1]{\href{mailto:#1}{\normalfont\texttt{#1}}} 21 | \newcommand{\MSI}{Mass Spectrometry Imaging} 22 | -------------------------------------------------------------------------------- /vignettes/vignettes.tex: -------------------------------------------------------------------------------- 1 | \section{Other vignettes} 2 | 3 | Please have a look at our other vignettes on 4 | \url{https://github.com/sgibb/MALDIquantExamples}: 5 | \begin{itemize} 6 | \item 7 | \href{http://cran.r-project.org/web/packages/MALDIquant/vignettes/MALDIquant-intro.pdf}{MALDIquant 8 | Introduction} --- a general introduction how to analyze mass spectrometry 9 | data using \Mq{}. 10 | \item 11 | \href{http://cran.r-project.org/web/packages/MALDIquantForeign/vignettes/MALDIquantForeign-intro.pdf}{MALDIquantForeign 12 | Introduction} --- a general introduction how to import/export data using 13 | \MqF{}. 14 | \item 15 | \href{https://github.com/sgibb/MALDIquantExamples/blob/master/inst/doc/fiedler2009.pdf?raw=true}{Analysis 16 | of Fiedler et al. 2009} --- a guidance to analyse the serum profile 17 | MALDI-TOF data described in \citet{Fiedler2009}. 18 | \item 19 | \href{https://github.com/sgibb/MALDIquantExamples/blob/master/inst/doc/species.pdf?raw=true}{Bacterial 20 | Species Determination} --- a guidance to determine different species based 21 | on their MALDI-TOF spectra. 22 | \item 23 | \href{https://github.com/sgibb/MALDIquantExamples/blob/master/inst/doc/nyakas2013.pdf?raw=true}{Mass 24 | Spectrometry Imaging} --- a guidance how to analyse mass spectrometry 25 | imaging data using \Mq{}. 26 | \end{itemize} 27 | --------------------------------------------------------------------------------