├── .nojekyll ├── R ├── .Rapp.history ├── .DS_Store ├── MFCCFunction.R ├── gibbonID.R ├── DetectBLED.R └── gibbonR.R ├── .gitignore ├── vignettes ├── .gitignore ├── .DS_Store ├── DetectionsAffinityPlot.png └── gibbonR-tutorial.Rmd ├── .Rbuildignore ├── .DS_Store ├── data └── .DS_Store ├── man ├── .DS_Store ├── figures │ ├── README-unnamed-chunk-12-1.png │ ├── README-unnamed-chunk-13-1.png │ ├── README-unnamed-chunk-14-1.png │ ├── README-unnamed-chunk-14-2.png │ ├── README-unnamed-chunk-15-1.png │ ├── README-unnamed-chunk-16-1.png │ ├── README-unnamed-chunk-2-1.png │ ├── README-unnamed-chunk-3-1.png │ ├── README-unnamed-chunk-8-1.png │ └── README-unnamed-chunk-9-1.png ├── MFCCFunction.Rd ├── gibbonID.Rd ├── DetectBLED.Rd └── gibbonR.Rd ├── _pkgdown.yml ├── NAMESPACE ├── gibbonR.Rproj ├── .Rapp.history ├── DESCRIPTION ├── .github └── workflows │ └── jekyll-gh-pages.yml ├── README.Rmd ├── README.md └── .Rhistory /.nojekyll: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /R/.Rapp.history: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | inst/doc 3 | -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^gibbonR\.Rproj$ 2 | ^\.Rproj\.user$ 3 | -------------------------------------------------------------------------------- /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DenaJGibbon/gibbonR/HEAD/.DS_Store -------------------------------------------------------------------------------- /R/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DenaJGibbon/gibbonR/HEAD/R/.DS_Store -------------------------------------------------------------------------------- /data/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DenaJGibbon/gibbonR/HEAD/data/.DS_Store -------------------------------------------------------------------------------- /man/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DenaJGibbon/gibbonR/HEAD/man/.DS_Store -------------------------------------------------------------------------------- /vignettes/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DenaJGibbon/gibbonR/HEAD/vignettes/.DS_Store -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: https://denajgibbon.github.io/gibbonR/ 2 | template: 3 | bootstrap: 5 4 | 5 | -------------------------------------------------------------------------------- /vignettes/DetectionsAffinityPlot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DenaJGibbon/gibbonR/HEAD/vignettes/DetectionsAffinityPlot.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-12-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DenaJGibbon/gibbonR/HEAD/man/figures/README-unnamed-chunk-12-1.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-13-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DenaJGibbon/gibbonR/HEAD/man/figures/README-unnamed-chunk-13-1.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-14-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DenaJGibbon/gibbonR/HEAD/man/figures/README-unnamed-chunk-14-1.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-14-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DenaJGibbon/gibbonR/HEAD/man/figures/README-unnamed-chunk-14-2.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-15-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DenaJGibbon/gibbonR/HEAD/man/figures/README-unnamed-chunk-15-1.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-16-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DenaJGibbon/gibbonR/HEAD/man/figures/README-unnamed-chunk-16-1.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DenaJGibbon/gibbonR/HEAD/man/figures/README-unnamed-chunk-2-1.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DenaJGibbon/gibbonR/HEAD/man/figures/README-unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-8-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DenaJGibbon/gibbonR/HEAD/man/figures/README-unnamed-chunk-8-1.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-9-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DenaJGibbon/gibbonR/HEAD/man/figures/README-unnamed-chunk-9-1.png -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(DetectBLED) 4 | export(MFCCFunction) 5 | export(gibbonID) 6 | export(gibbonR) 7 | import(e1071) 8 | import(randomForest) 9 | import(seewave) 10 | import(stringr) 11 | import(tuneR) 12 | -------------------------------------------------------------------------------- /gibbonR.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /.Rapp.history: -------------------------------------------------------------------------------- 1 | library("devtools")# 2 | #devtools::install_github("klutometis/roxygen",force = TRUE)# 3 | library(roxygen2)# 4 | # Add documentation to function# 5 | setwd("/Users/denasmacbook/Desktop") 6 | create("gibbonR.github") 7 | library("devtools")# 8 | #devtools::install_github("klutometis/roxygen",force = TRUE)# 9 | library(roxygen2)# 10 | # Add documentation to function# 11 | #setwd("/Users/denasmacbook/Desktop")# 12 | #create("gibbonR.github")# 13 | # 14 | setwd("/Users/denasmacbook/Desktop/gibbonR.github")# 15 | document()# 16 | install("/Users/denasmacbook/Desktop/gibbonR.github") 17 | document() 18 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: gibbonR 2 | Title: gibbonR: An R package for the detection and classification of acoustic signals using machine learning 3 | Version: 1.0.1 4 | Authors@R: person("Dena", "Clink", email = "dena.clink@cornell.edu", role = c("aut", "cre")) 5 | Description: Detection, classification and visualization of acoustic signals. 6 | Depends: R (>= 3.4.2), 7 | stringr, 8 | e1071, 9 | randomForest, 10 | tuneR, 11 | seewave, 12 | ggpubr, 13 | apcluster, 14 | umap, 15 | matlab, 16 | magick 17 | License: What license is it under? 18 | Encoding: UTF-8 19 | LazyData: true 20 | RoxygenNote: 7.3.1 21 | Suggests: 22 | knitr, 23 | rmarkdown 24 | VignetteBuilder: knitr 25 | URL: https://denajgibbon.github.io/gibbonR/ 26 | -------------------------------------------------------------------------------- /man/MFCCFunction.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/MFCCFunction.R 3 | \name{MFCCFunction} 4 | \alias{MFCCFunction} 5 | \title{MFCCFunction} 6 | \usage{ 7 | MFCCFunction( 8 | input.dir, 9 | min.freq = 400, 10 | max.freq = 2000, 11 | n.windows = 9, 12 | num.cep = 12, 13 | win.avg = "standard", 14 | win.hop.time = 0.25 15 | ) 16 | } 17 | \arguments{ 18 | \item{input.dir}{where the .wav files are stored} 19 | 20 | \item{min.freq}{the minimum frequency (Hz) of the signal of interest} 21 | 22 | \item{max.freq}{the maximum frequency (Hz) of the signal of interest} 23 | 24 | \item{n.windows}{the number of time windows to divide the signal by} 25 | 26 | \item{num.cep}{the number of cepstra to calculate for each time window} 27 | 28 | \item{win.avg}{Option of 'no.avg','mean.sd' or 'standard'; whether to return MFCCs for each non-overlapping time window, calculate mean and SD over each MFCC or calculated MFCCs for a set number of time windows.} 29 | 30 | \item{win.hop.time}{If win.avg='standard' the specified window size.} 31 | } 32 | \value{ 33 | a data frame with a row of MFCCs for each .wav file 34 | } 35 | \description{ 36 | Function to calculate Mel-frequency cepstral coefficents over a directory of focal recordings 37 | } 38 | \examples{ 39 | \donttest{MFCCFunction(input.dir = "FocalRecordings",min.freq = 400,max.freq=2500)} 40 | } 41 | -------------------------------------------------------------------------------- /.github/workflows/jekyll-gh-pages.yml: -------------------------------------------------------------------------------- 1 | # Sample workflow for building and deploying a Jekyll site to GitHub Pages 2 | name: Deploy Jekyll with GitHub Pages dependencies preinstalled 3 | 4 | on: 5 | # Runs on pushes targeting the default branch 6 | push: 7 | branches: ["master"] 8 | 9 | # Allows you to run this workflow manually from the Actions tab 10 | workflow_dispatch: 11 | 12 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages 13 | permissions: 14 | contents: read 15 | pages: write 16 | id-token: write 17 | 18 | # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. 19 | # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. 20 | concurrency: 21 | group: "pages" 22 | cancel-in-progress: false 23 | 24 | jobs: 25 | # Build job 26 | build: 27 | runs-on: ubuntu-latest 28 | steps: 29 | - name: Checkout 30 | uses: actions/checkout@v4 31 | - name: Setup Pages 32 | uses: actions/configure-pages@v5 33 | - name: Build with Jekyll 34 | uses: actions/jekyll-build-pages@v1 35 | with: 36 | source: ./ 37 | destination: ./_site 38 | - name: Upload artifact 39 | uses: actions/upload-pages-artifact@v3 40 | 41 | # Deployment job 42 | deploy: 43 | environment: 44 | name: github-pages 45 | url: ${{ steps.deployment.outputs.page_url }} 46 | runs-on: ubuntu-latest 47 | needs: build 48 | steps: 49 | - name: Deploy to GitHub Pages 50 | id: deployment 51 | uses: actions/deploy-pages@v4 52 | -------------------------------------------------------------------------------- /man/gibbonID.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gibbonID.R 3 | \name{gibbonID} 4 | \alias{gibbonID} 5 | \title{gibbonID} 6 | \usage{ 7 | gibbonID( 8 | input.dir, 9 | output.dir, 10 | min.freq, 11 | max.freq, 12 | pattern = ".wav", 13 | add.spectrograms = FALSE, 14 | class = "fixed", 15 | q.fixed = 0.1, 16 | win.avg = "standard", 17 | spec.ratio = 40 18 | ) 19 | } 20 | \arguments{ 21 | \item{input.dir}{Directory where the .wav file clips are location} 22 | 23 | \item{output.dir}{Directory to save the spectrogram thumbnails.} 24 | 25 | \item{min.freq}{Minimum frequency (Hz) of signals of interest} 26 | 27 | \item{max.freq}{Maximum frequency (Hz) of signals of interest} 28 | 29 | \item{pattern}{Pattern to search fo rin input.dir; default is '.wav'} 30 | 31 | \item{add.spectrograms}{Logical; overlay spectrogram images} 32 | 33 | \item{class}{Option of 'affinity.adaptive', 'fixed.affinity' or 'no.clustering'; Specifies whether to do adaptive or fixed 'q' affinity propagation clustering, or to color points by class label.} 34 | 35 | \item{q.fixed}{If class=='fixed.affinity' specify value of 'q'. See ??apcluster for more details.} 36 | 37 | \item{win.avg}{Option of 'false','mean.sd' or 'standard'; whether to return MFCCs for each non-overlapping time window, calculate mean and SD over each MFCC or calculated MFCCs for a set number of time windows.} 38 | 39 | \item{spec.ratio}{Value to scale the spectrograms.} 40 | } 41 | \description{ 42 | Function that extracts MFCCs as features from .wav files and plots them using UMAP. Points can be colored using affinity propagation clustering or by class labels. With the option to overlay spectrogram images. 43 | } 44 | \examples{ 45 | gibbonID(input.dir="/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/MultipleSoundClasses/",output.dir="/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/MultipleSoundClasses/Thumbnails/",win.avg='standard',add.spectrograms=TRUE,min.freq=400,max.freq=1600,class='no.clustering') 46 | } 47 | -------------------------------------------------------------------------------- /man/DetectBLED.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/DetectBLED.R 3 | \name{DetectBLED} 4 | \alias{DetectBLED} 5 | \title{DetectBLED} 6 | \usage{ 7 | DetectBLED( 8 | input, 9 | input.type = "wav", 10 | min.freq = 200, 11 | max.freq = 6000, 12 | noise.quantile.val = 0.75, 13 | spectrogram.window = 1600, 14 | subsample.dur = 300, 15 | training.label = "noise", 16 | pattern.split = ".wav", 17 | min.signal.dur = 1, 18 | max.sound.event.dur = 6, 19 | wav.output = "TRUE", 20 | output.dir = getwd(), 21 | swift.time = TRUE, 22 | time.start = 18, 23 | time.stop = 23, 24 | write.table.output = TRUE, 25 | verbose = TRUE, 26 | random.sample = 100 27 | ) 28 | } 29 | \arguments{ 30 | \item{input}{Either full path to directory containing .wav files or a list with file name as first element and .wav as second element} 31 | 32 | \item{input.type}{Either 'directory', 'list' or 'wav'} 33 | 34 | \item{min.freq}{Minimum frequency (Hz) of signal of interest} 35 | 36 | \item{max.freq}{Maximum frequency (Hz) of signal of interest} 37 | 38 | \item{noise.quantile.val}{A quantile value between 0 to 1 for the band energy summation} 39 | 40 | \item{spectrogram.window}{Window length for spectrogram analysis (input to spectro fuction from 'seewave')} 41 | 42 | \item{subsample.dur}{Duration (s) to divide longer sound file to increase processing efficiency} 43 | 44 | \item{training.label}{Label to append to saved .wav files} 45 | 46 | \item{pattern.split}{Pattern to find and remove to create file name; currently set to ".rda"} 47 | 48 | \item{min.signal.dur}{The minimum duration (s) sound events must be to be considered sound events} 49 | 50 | \item{max.sound.event.dur}{The maximum duration (s) sound events must be to be considered sound events} 51 | 52 | \item{wav.output}{Logical; output wave file of sound events?} 53 | 54 | \item{output.dir}{Specified output directory; set to current working directory} 55 | 56 | \item{swift.time}{If file name is in structure recorder_YYYYMMDD_HHMMSS can subset files based on specific times} 57 | 58 | \item{time.start}{Time recordings start (hour)} 59 | 60 | \item{time.stop}{Time recordings stop (hour)} 61 | 62 | \item{write.table.output}{Logical; write Raven selection tables to output directory} 63 | 64 | \item{verbose}{Logical; print out steps} 65 | 66 | \item{random.sample}{If a random subset of files in a directory are desired specify a value, otherwise 'NA'} 67 | 68 | \item{output}{Either 'spectro', 'table' or 'wav'} 69 | } 70 | \description{ 71 | Function to do band-limited energy summation to find sound events. This function only identifies sound events based on frequency and duration so is not expected to have high precision. 72 | } 73 | -------------------------------------------------------------------------------- /man/gibbonR.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gibbonR.R 3 | \name{gibbonR} 4 | \alias{gibbonR} 5 | \title{gibbonR} 6 | \usage{ 7 | {input, input.type='list', feature.df,model.type.list=c("SVM"), tune = FALSE, target.signal = "female.gibbon", 8 | short.wav.duration=300,min.freq = 400, max.freq = 2000, 9 | noise.quantile.val=0.5, minimum.separation =5, n.windows = 9, num.cep = 12, spectrogram.window =1600, 10 | pattern.split = ".wav", min.signal.dur = 4, maximum.separation =1,max.sound.event.dur = 12, 11 | probability.thresh.svm = 0.75, probability.thresh.rf = 0.75, wav.output = "TRUE", output.dir = getwd(), 12 | swift.time=TRUE,time.start=6,time.stop=12, write.table.output=TRUE,verbose=TRUE, random.sample='NA'} 13 | } 14 | \arguments{ 15 | \item{input}{Either full path to directory containing .wav files, a list of .wav files, or a the path to a single .wav file} 16 | 17 | \item{input.type}{Either 'directory', 'list' or 'wav'} 18 | 19 | \item{feature.df}{Data frame of features from labeled sound files; first column must be class labels} 20 | 21 | \item{model.type.list}{Which machine learning model to use; SVM or RF} 22 | 23 | \item{tune}{Logical; if want to use 'tune' function for SVM; NOTE: for large datasets adds significant computing time} 24 | 25 | \item{target.signal}{Labeled signal(s) of interest from training data (feature.df); can include multiple classes.} 26 | 27 | \item{short.wav.duration}{Duration (s) to divide longer sound file to increase processing efficiency} 28 | 29 | \item{min.freq}{Minimum frequency (Hz) of signal of interest} 30 | 31 | \item{max.freq}{Maximum frequency (Hz) of signal of interest} 32 | 33 | \item{noise.quantile.val}{A quantile value between 0 to 1 for the band energy summation} 34 | 35 | \item{minimum.separation}{The minimum number of consecutive time windows that signals must be separated by to be considered a separate sound event} 36 | 37 | \item{n.windows}{Number of time windows to calculate for MFCCs} 38 | 39 | \item{num.cep}{Number of cepstra coefficients to calculate for MFCCs} 40 | 41 | \item{spectrogram.window}{Window length for spectrogram analysis (input to spectro fuction from 'seewave')} 42 | 43 | \item{pattern.split}{Pattern to find and remove to create full sound file name; currently set to ".wav"} 44 | 45 | \item{min.signal.dur}{The minimum duration (s) sound events must be to be considered sound events} 46 | 47 | \item{maximum.separation}{The maximum number of consecutive time windows that signals must be separated by to be considered a separate sound event} 48 | 49 | \item{max.sound.event.dur}{The maximum duration (s) sound events must be to be considered sound events; NOTE this only happens when writing text file} 50 | 51 | \item{probability.thresh.svm}{Probability threshold (provided by SVM) to be considered as target signal} 52 | 53 | \item{probability.thresh.rf}{Probability threshold (provided by RF) to be considered as target signal} 54 | 55 | \item{wav.output}{Logical; output .wav files of detections in specified directory} 56 | 57 | \item{output.dir}{Specified output directory; set to current working directory} 58 | 59 | \item{swift.time}{If file name is in structure recorder_YYYYMMDD_HHMMSS can subset files based on specific times} 60 | 61 | \item{time.start}{Time recordings start (hour)} 62 | 63 | \item{time.stop}{Time recordings stop (hour)} 64 | 65 | \item{write.table.output}{Logical; write Raven selection tables to output directory} 66 | 67 | \item{verbose}{Logical; print out steps} 68 | 69 | \item{random.sample}{If a random subset of files in a directory are desired specify a value, otherwise 'NA'} 70 | } 71 | \value{ 72 | If write.table.output=TRUE writes a .txt file for each sound file with detections 73 | 74 | If write.table.output=TRUE writes a .txt file for each sound file with detections 75 | } 76 | \description{ 77 | This function identifies sound events using band-limited energy summation and then classifies the sound events using a trained support vector machine or random forest algorithm. 78 | } 79 | \examples{ 80 | \donttest{MFCCFunction(input.dir = "FocalRecordings",min.freq = 400,max.freq=2500)} 81 | } 82 | -------------------------------------------------------------------------------- /R/MFCCFunction.R: -------------------------------------------------------------------------------- 1 | #' MFCCFunction 2 | #' @description Function to calculate Mel-frequency cepstral coefficents over a directory of focal recordings 3 | #' @param input.dir where the .wav files are stored 4 | #' @param min.freq the minimum frequency (Hz) of the signal of interest 5 | #' @param max.freq the maximum frequency (Hz) of the signal of interest 6 | #' @param n.windows the number of time windows to divide the signal by 7 | #' @param win.avg Option of 'no.avg','mean.sd' or 'standard'; whether to return MFCCs for each non-overlapping time window, calculate mean and SD over each MFCC or calculated MFCCs for a set number of time windows. 8 | #' @param win.hop.time If win.avg='standard' the specified window size. 9 | #' @param num.cep the number of cepstra to calculate for each time window 10 | #' @export 11 | #' @return a data frame with a row of MFCCs for each .wav file 12 | #' @examples 13 | #' \donttest{MFCCFunction(input.dir = "FocalRecordings",min.freq = 400,max.freq=2500)} 14 | 15 | MFCCFunction <- 16 | function(input.dir, 17 | min.freq = 400, 18 | max.freq = 2000, 19 | n.windows = 9, 20 | num.cep = 12, 21 | win.avg = 'standard', 22 | win.hop.time = 0.25) { 23 | 24 | if (is.list(input.dir) == 'TRUE') { 25 | subsamps <- input.dir 26 | class <- 'NA' 27 | 28 | } else{ 29 | call.timing.list <- 30 | list.files(input.dir, full.names = T, pattern = '.wav', recursive = T) 31 | 32 | call.timing.list.short <- 33 | basename(call.timing.list) 34 | 35 | subsamps <- lapply(1:length(call.timing.list), 36 | function(i) 37 | readWave(call.timing.list[[i]])) 38 | 39 | class <- 40 | stringr::str_split_fixed(call.timing.list.short, pattern = '_', n = 2)[, 1] 41 | 42 | } 43 | 44 | if (win.avg == "no.avg") { 45 | mfcc.output.df <- data.frame() 46 | ####Loop to calculate MFCC for each .wav file in the directory 47 | for (j in 1:length(subsamps)) { 48 | #print(paste("processing",j)) 49 | wav.name <- call.timing.list.short[[j]] 50 | wav.file <- subsamps[[j]] 51 | 52 | 53 | # Calculate MFCCs 54 | melfcc.output <- tuneR::melfcc( 55 | wav.file, 56 | minfreq = min.freq, 57 | maxfreq = max.freq, 58 | wintime = win.hop.time, 59 | hoptime = win.hop.time, 60 | numcep = num.cep 61 | ) 62 | 63 | melfcc.output <- as.data.frame(melfcc.output) 64 | 65 | class <- 66 | rep(stringr::str_split_fixed(wav.name, pattern = '_', n = 2)[, 1], 67 | nrow(melfcc.output)) 68 | 69 | melfcc.output <- cbind.data.frame(class, melfcc.output) 70 | 71 | mfcc.output.df <- 72 | rbind.data.frame(mfcc.output.df, melfcc.output) 73 | } 74 | 75 | return(mfcc.output.df) 76 | } 77 | 78 | if (win.avg == "mean.sd") { 79 | mfcc.vector.list <- vector("list", 10000) 80 | 81 | for (x in 1:length(subsamps)) { 82 | print(paste("processing sound event", x, 'out of',length(subsamps) )) 83 | 84 | short.wav <- subsamps[[x]] 85 | wav.dur <- seewave::duration(short.wav) 86 | # Calculate MFCCs 87 | melfcc.output <- 88 | tuneR::melfcc( 89 | short.wav, 90 | minfreq = min.freq, 91 | maxfreq = max.freq, 92 | numcep = num.cep 93 | ) 94 | 95 | # Calculate delta cepstral coefficients 96 | deltas.output <- as.data.frame(tuneR::deltas(melfcc.output)) 97 | 98 | 99 | melfcc.output <- as.data.frame(melfcc.output) 100 | 101 | mfcc.mean <- colMeans(melfcc.output) 102 | mfcc.sd <- apply(melfcc.output, 2, sd) 103 | delta.mean <- colMeans(deltas.output) 104 | delta.sd <- apply(deltas.output, 2, sd) 105 | 106 | # Ensure only same number of time windows are used for MFCC and delta coefficients Also append .wav duration 107 | mfcc.vector <- 108 | c(mfcc.mean, mfcc.sd, delta.mean, delta.sd, wav.dur) 109 | mfcc.vector.list[[x]] <- mfcc.vector 110 | } 111 | 112 | mfcc.output <- mfcc.vector.list 113 | 114 | mfcc.output.df <- do.call(rbind.data.frame, mfcc.output) 115 | colnames(mfcc.output.df) <- 116 | seq(from = 1, 117 | to = ncol(mfcc.output.df), 118 | by = 1) 119 | 120 | mfcc.output.df <- cbind.data.frame(class, mfcc.output.df) 121 | return(mfcc.output.df) 122 | } 123 | 124 | if (win.avg == 'standard') { 125 | 126 | mfcc.vector.list <- vector("list", 10000) 127 | 128 | for (x in 1:length(subsamps)) { 129 | print(paste("processing sound event", x, 'out of',length(subsamps) )) 130 | short.wav <- subsamps[[x]] 131 | wav.dur <- duration(short.wav) 132 | win.time <- wav.dur / n.windows 133 | 134 | # Calculate MFCCs 135 | melfcc.output <- 136 | tuneR::melfcc( 137 | short.wav, 138 | minfreq = min.freq, 139 | hoptime = win.time, 140 | maxfreq = max.freq, 141 | numcep = num.cep, 142 | wintime = win.time 143 | ) 144 | 145 | # Calculate delta cepstral coefficients 146 | deltas.output <- tuneR::deltas(melfcc.output) 147 | 148 | # Ensure only same number of time windows are used for MFCC and delta coefficients Also append .wav duration 149 | mfcc.vector <- 150 | c(as.vector(t(melfcc.output[1:(n.windows - 1), 2:num.cep])), as.vector(t(deltas.output[1:(n.windows - 1), 2:num.cep])), wav.dur) 151 | mfcc.vector.list[[x]] <- mfcc.vector 152 | } 153 | 154 | mfcc.output <- mfcc.vector.list 155 | 156 | mfcc.output.df <- do.call(rbind.data.frame, mfcc.output) 157 | colnames(mfcc.output.df) <- 158 | seq(from = 1, 159 | to = ncol(mfcc.output.df), 160 | by = 1) 161 | 162 | mfcc.output.df <- cbind.data.frame(class, mfcc.output.df) 163 | return(mfcc.output.df) 164 | } 165 | 166 | } 167 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "gibbonR: An R package for the automated detection and classification of female gibbon calls from long-term acoustic recordings" 3 | output: 4 | github_document: 5 | toc: true 6 | toc_depth: 2 7 | --- 8 | 9 | 10 | 11 | ```{r eval=FALSE, include = FALSE} 12 | knitr::opts_chunk$set( 13 | collapse = TRUE, 14 | comment = "#>", 15 | fig.path = "man/figures/README-", 16 | out.width = "100%" 17 | ) 18 | ``` 19 | 20 | # Authors 21 | Dena J. Clink & Holger Klinck \ 22 | K. Lisa Yang Center for Conservation Bioacoustics, Cornell Lab of Ornithology, Cornell University 23 | 24 | # Package description 25 | 26 | The field of bioacoustics is inherently multidisciplinary and relies on 27 | computer scientists, engineers, and ecologists. This package is directed 28 | towards ecologists who are interested in incorporating bioacoustics into 29 | their research, but may not have the skills or training. The goal for 30 | the creation of this package was to make commonly used signal processing 31 | techniques and various machine learning algorithms readily available in R for 32 | anyone interested in using bioacoustics in their research. 33 | 34 | 35 | ```{r eval=FALSE, include = FALSE} 36 | knitr::opts_chunk$set( 37 | collapse = TRUE, 38 | comment = "#>" 39 | ) 40 | ``` 41 | 42 | # Tutorial 43 | https://denajgibbon.github.io/gibbonR-tutorial/ 44 | 45 | # Quick start guide 46 | ## You can install the development version from [GitHub](https://github.com/DenaJGibbon) with: 47 | ```{r eval=FALSE} 48 | # install.packages("devtools") 49 | # devtools::install_github("DenaJGibbon/gibbonR") 50 | library(gibbonR) 51 | ``` 52 | 53 | ```{eval=FALSE} 54 | # You need to tell R where to store the zip files on your computer. 55 | destination.file.path.zip <- 56 | "dataBorneoExampleData.zip" 57 | 58 | # You also need to tell R where to save the unzipped files 59 | destination.file.path <- "data/gibbonR/data/" 60 | 61 | # This function will download the data from github 62 | 63 | utils::download.file("https://github.com/DenaJGibbon/BorneoExampleData/archive/master.zip", 64 | destfile = destination.file.path.zip) 65 | 66 | # This function will unzip the file 67 | utils::unzip(zipfile = destination.file.path.zip, 68 | exdir = destination.file.path) 69 | 70 | # Examine the contents 71 | list.of.sound.files <- list.files(paste(destination.file.path, 72 | "BorneoExampleData-master", "data", sep = 73 | "/"), 74 | full.names = T) 75 | list.of.sound.files 76 | 77 | ``` 78 | 79 | Use this function to read in the .RDA file and save it as an R object from https://stackoverflow.com/questions/5577221/how-can-i-load-an-object-into-a-variable-name-that-i-specify-from-an-r-data-file 80 | 81 | ```{r eval=FALSE} 82 | loadRData <- function(fileName) { 83 | #loads an RData file, and returns it 84 | load(fileName) 85 | get(ls()[ls() != "fileName"]) 86 | } 87 | ``` 88 | 89 | This function will load the entire list of r data files 90 | ```{r eval=FALSE} 91 | list.rda.files <- list() 92 | for(x in 1:length(list.of.sound.files)){ 93 | list.rda.files[[x]] <- loadRData(list.of.sound.files[[x]]) 94 | } 95 | ``` 96 | 97 | Assign each rda an informative name 98 | ```{r eval=FALSE, warning=FALSE} 99 | multi.class.list <- list.rda.files[[1]] 100 | S11_20180219_060002_1800sto3600s <- list.rda.files[[2]] 101 | ``` 102 | 103 | Now we create a directory with the training .wav files 104 | ```{r eval=FALSE, warning=FALSE} 105 | TrainingDataDirectory <- "data/gibbonR/data/BorneoMultiClass" 106 | 107 | for(a in 1:length(multi.class.list)){ 108 | Temp.element <- multi.class.list[[a]] 109 | writeWave(Temp.element[[2]], paste(TrainingDataDirectory,Temp.element[[1]],sep='/')) 110 | } 111 | 112 | ``` 113 | 114 | 115 | # Part 1. Training Data with Labeled .wav clips 116 | ### Read in clips and calculate MFCCs 117 | ```{r eval=FALSE, echo = T, results = 'hide' } 118 | TrainingWavFilesDir <- 119 | "data/gibbonR/data/BorneoMultiClass/" 120 | 121 | trainingdata <- gibbonR::MFCCFunction(input.dir=TrainingWavFilesDir, min.freq = 400, max.freq = 1600,win.avg='standard') 122 | 123 | trainingdata$class <- as.factor(trainingdata$class) 124 | ``` 125 | 126 | ### Compare Random Forest and Support Vector Machine for Supervised Classification 127 | ```{r eval=FALSE } 128 | 129 | trainingdata$class <- as.factor(trainingdata$class) 130 | 131 | 132 | ml.model.svm <- e1071::svm(trainingdata[, 2:ncol(trainingdata)], trainingdata$class, kernel = "radial", 133 | cross = 25, 134 | probability = TRUE) 135 | 136 | print(paste('SVM accuracy',ml.model.svm$tot.accuracy)) 137 | 138 | 139 | ml.model.rf <- randomForest::randomForest(x=trainingdata[, 2:ncol(trainingdata)], y = trainingdata$class) 140 | 141 | 142 | print(ml.model.rf) 143 | ``` 144 | 145 | # Part 2. Run the detector/classifier 146 | 147 | ## Part 2a. Feature extraction 148 | ```{r eval=FALSE } 149 | # Specify the folder where the training data will be saved 150 | TrainingDataFolderLocation <- "data/gibbonR/data/TrainingDataFromRavenSelectionTables/" 151 | 152 | TrainingDataMFCC <- MFCCFunction(input.dir= TrainingDataFolderLocation, min.freq = 400, max.freq = 1600,win.avg="standard") 153 | 154 | TrainingDataMFCC$class <- as.factor(TrainingDataMFCC$class) 155 | 156 | ``` 157 | 158 | ## Part 2b. Run DetectClassify 159 | ```{r eval=FALSE} 160 | 161 | TestFileDirectory <- '/Users/denaclink/Library/CloudStorage/Box-Box/gibbonRSampleFiles/GibbonTestFiles' 162 | 163 | OutputDirectory <- "data/gibbonR/data/DetectAndClassifyOutput" 164 | 165 | gibbonR(input=TestFileDirectory, 166 | feature.df=TrainingDataMFCC, 167 | model.type.list=c('SVM','RF'), 168 | tune = TRUE, 169 | short.wav.duration=300, 170 | target.signal = c("female.gibbon"), 171 | min.freq = 400, max.freq = 1600, 172 | noise.quantile.val=0.15, 173 | minimum.separation =3, 174 | n.windows = 9, num.cep = 12, 175 | spectrogram.window =160, 176 | pattern.split = ".wav", 177 | min.signal.dur = 3, 178 | max.sound.event.dur = 25, 179 | maximum.separation =1, 180 | probability.thresh.svm = 0.15, 181 | probability.thresh.rf = 0.15, 182 | wav.output = "TRUE", 183 | output.dir =OutputDirectory, 184 | swift.time=TRUE,time.start=5,time.stop=10, 185 | write.table.output=FALSE,verbose=TRUE, 186 | random.sample='NA') 187 | 188 | 189 | ``` 190 | 191 | 192 | # Part 3. Data visualization 193 | 194 | ## Part 3a. Create a UMAP plot colored by class 195 | ```{r eval=FALSE } 196 | library(gibbonR) 197 | library(ggpubr) 198 | gibbonID(input.dir="data/gibbonR/data/MultipleSoundClasses/",output.dir="data/gibbonR/data/MultipleSoundClasses/Thumbnails/",win.avg='standard',add.spectrograms=TRUE,min.freq=400,max.freq=1600,class='no.clustering') 199 | 200 | ``` 201 | 202 | ## Part 3b. Create a UMAP plot colored by affinity propagation clustering 203 | ```{r eval=FALSE } 204 | library(gibbonR) 205 | library(ggpubr) 206 | library(apcluster) 207 | gibbonID(input.dir="data/gibbonR/data/MultipleSoundClasses/",output.dir="data/gibbonR/data/MultipleSoundClasses/Thumbnails/",win.avg='standard',class='affinity.fixed', q.fixed=0.1,add.spectrograms=TRUE,min.freq=400,max.freq=1600) 208 | 209 | ``` 210 | 211 | 212 | ### How to cite 213 | 214 | This package is currently in development, with submission to JOSS planned shortly. In the interim, please cite the arXiv preprint: 215 | 216 | Clink, D. J. & H. Klinck. (2019). gibbonR: An R package for the detection and classification of acoustic signals using machine learning. arXiv, 1906.02572. 217 | https://doi.org/10.48550/arXiv.1906.02572 218 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | gibbonR: An R package for the automated detection and classification of 2 | female gibbon calls from long-term acoustic recordings 3 | ================ 4 | 5 | - [Authors](#authors) 6 | - [Package description](#package-description) 7 | - [Tutorial](#tutorial) 8 | - [Quick start guide](#quick-start-guide) 9 | - [You can install the development version from GitHub 10 | with:](#you-can-install-the-development-version-from-github-with) 11 | - [Part 1. Training Data with Labeled .wav 12 | clips](#part-1-training-data-with-labeled-wav-clips) 13 | - [Part 2. Run the 14 | detector/classifier](#part-2-run-the-detectorclassifier) 15 | - [Part 2a. Feature extraction](#part-2a-feature-extraction) 16 | - [Part 2b. Run DetectClassify](#part-2b-run-detectclassify) 17 | - [Part 3. Data visualization](#part-3-data-visualization) 18 | - [Part 3a. Create a UMAP plot colored by 19 | class](#part-3a-create-a-umap-plot-colored-by-class) 20 | - [Part 3b. Create a UMAP plot colored by affinity propagation 21 | clustering](#part-3b-create-a-umap-plot-colored-by-affinity-propagation-clustering) 22 | 23 | 24 | 25 | # Authors 26 | 27 | Dena J. Clink & Holger Klinck 28 | K. Lisa Yang Center for Conservation Bioacoustics, Cornell Lab of 29 | Ornithology, Cornell University 30 | 31 | # Package description 32 | 33 | The field of bioacoustics is inherently multidisciplinary and relies on 34 | computer scientists, engineers, and ecologists. This package is directed 35 | towards ecologists who are interested in incorporating bioacoustics into 36 | their research, but may not have the skills or training. The goal for 37 | the creation of this package was to make commonly used signal processing 38 | techniques and various machine learning algorithms readily available in 39 | R for anyone interested in using bioacoustics in their research. 40 | 41 | # Tutorial 42 | 43 | 44 | 45 | # Quick start guide 46 | 47 | ## You can install the development version from [GitHub](https://github.com/DenaJGibbon) with: 48 | 49 | ``` r 50 | # install.packages("devtools") 51 | # devtools::install_github("DenaJGibbon/gibbonR") 52 | library(gibbonR) 53 | ``` 54 | 55 | ``` 56 | # You need to tell R where to store the zip files on your computer. 57 | destination.file.path.zip <- 58 | "dataBorneoExampleData.zip" 59 | 60 | # You also need to tell R where to save the unzipped files 61 | destination.file.path <- "data/gibbonR/data/" 62 | 63 | # This function will download the data from github 64 | 65 | utils::download.file("https://github.com/DenaJGibbon/BorneoExampleData/archive/master.zip", 66 | destfile = destination.file.path.zip) 67 | 68 | # This function will unzip the file 69 | utils::unzip(zipfile = destination.file.path.zip, 70 | exdir = destination.file.path) 71 | 72 | # Examine the contents 73 | list.of.sound.files <- list.files(paste(destination.file.path, 74 | "BorneoExampleData-master", "data", sep = 75 | "/"), 76 | full.names = T) 77 | list.of.sound.files 78 | ``` 79 | 80 | Use this function to read in the .RDA file and save it as an R object 81 | from 82 | 83 | 84 | ``` r 85 | loadRData <- function(fileName) { 86 | #loads an RData file, and returns it 87 | load(fileName) 88 | get(ls()[ls() != "fileName"]) 89 | } 90 | ``` 91 | 92 | This function will load the entire list of r data files 93 | 94 | ``` r 95 | list.rda.files <- list() 96 | for(x in 1:length(list.of.sound.files)){ 97 | list.rda.files[[x]] <- loadRData(list.of.sound.files[[x]]) 98 | } 99 | ``` 100 | 101 | Assign each rda an informative name 102 | 103 | ``` r 104 | multi.class.list <- list.rda.files[[1]] 105 | S11_20180219_060002_1800sto3600s <- list.rda.files[[2]] 106 | ``` 107 | 108 | Now we create a directory with the training .wav files 109 | 110 | ``` r 111 | TrainingDataDirectory <- "data/gibbonR/data/BorneoMultiClass" 112 | 113 | for(a in 1:length(multi.class.list)){ 114 | Temp.element <- multi.class.list[[a]] 115 | writeWave(Temp.element[[2]], paste(TrainingDataDirectory,Temp.element[[1]],sep='/')) 116 | } 117 | ``` 118 | 119 | # Part 1. Training Data with Labeled .wav clips 120 | 121 | ### Read in clips and calculate MFCCs 122 | 123 | ``` r 124 | TrainingWavFilesDir <- 125 | "data/gibbonR/data/BorneoMultiClass/" 126 | 127 | trainingdata <- gibbonR::MFCCFunction(input.dir=TrainingWavFilesDir, min.freq = 400, max.freq = 1600,win.avg='standard') 128 | 129 | trainingdata$class <- as.factor(trainingdata$class) 130 | ``` 131 | 132 | ### Compare Random Forest and Support Vector Machine for Supervised Classification 133 | 134 | ``` r 135 | trainingdata$class <- as.factor(trainingdata$class) 136 | 137 | 138 | ml.model.svm <- e1071::svm(trainingdata[, 2:ncol(trainingdata)], trainingdata$class, kernel = "radial", 139 | cross = 25, 140 | probability = TRUE) 141 | 142 | print(paste('SVM accuracy',ml.model.svm$tot.accuracy)) 143 | 144 | 145 | ml.model.rf <- randomForest::randomForest(x=trainingdata[, 2:ncol(trainingdata)], y = trainingdata$class) 146 | 147 | 148 | print(ml.model.rf) 149 | ``` 150 | 151 | # Part 2. Run the detector/classifier 152 | 153 | ## Part 2a. Feature extraction 154 | 155 | ``` r 156 | # Specify the folder where the training data will be saved 157 | TrainingDataFolderLocation <- "data/gibbonR/data/TrainingDataFromRavenSelectionTables/" 158 | 159 | TrainingDataMFCC <- MFCCFunction(input.dir= TrainingDataFolderLocation, min.freq = 400, max.freq = 1600,win.avg="standard") 160 | 161 | TrainingDataMFCC$class <- as.factor(TrainingDataMFCC$class) 162 | ``` 163 | 164 | ## Part 2b. Run DetectClassify 165 | 166 | ``` r 167 | TestFileDirectory <- '/Users/denaclink/Library/CloudStorage/Box-Box/gibbonRSampleFiles/GibbonTestFiles' 168 | 169 | OutputDirectory <- "data/gibbonR/data/DetectAndClassifyOutput" 170 | 171 | gibbonR(input=TestFileDirectory, 172 | feature.df=TrainingDataMFCC, 173 | model.type.list=c('SVM','RF'), 174 | tune = TRUE, 175 | short.wav.duration=300, 176 | target.signal = c("female.gibbon"), 177 | min.freq = 400, max.freq = 1600, 178 | noise.quantile.val=0.15, 179 | minimum.separation =3, 180 | n.windows = 9, num.cep = 12, 181 | spectrogram.window =160, 182 | pattern.split = ".wav", 183 | min.signal.dur = 3, 184 | max.sound.event.dur = 25, 185 | maximum.separation =1, 186 | probability.thresh.svm = 0.15, 187 | probability.thresh.rf = 0.15, 188 | wav.output = "TRUE", 189 | output.dir =OutputDirectory, 190 | swift.time=TRUE,time.start=5,time.stop=10, 191 | write.table.output=FALSE,verbose=TRUE, 192 | random.sample='NA') 193 | ``` 194 | 195 | # Part 3. Data visualization 196 | 197 | ## Part 3a. Create a UMAP plot colored by class 198 | 199 | ``` r 200 | library(gibbonR) 201 | library(ggpubr) 202 | gibbonID(input.dir="data/gibbonR/data/MultipleSoundClasses/",output.dir="data/gibbonR/data/MultipleSoundClasses/Thumbnails/",win.avg='standard',add.spectrograms=TRUE,min.freq=400,max.freq=1600,class='no.clustering') 203 | ``` 204 | 205 | ## Part 3b. Create a UMAP plot colored by affinity propagation clustering 206 | 207 | ``` r 208 | library(gibbonR) 209 | library(ggpubr) 210 | library(apcluster) 211 | gibbonID(input.dir="data/gibbonR/data/MultipleSoundClasses/",output.dir="data/gibbonR/data/MultipleSoundClasses/Thumbnails/",win.avg='standard',class='affinity.fixed', q.fixed=0.1,add.spectrograms=TRUE,min.freq=400,max.freq=1600) 212 | ``` 213 | 214 | ### How to cite 215 | 216 | This package is currently in development, with submission to JOSS 217 | planned shortly. In the interim, please cite the arXiv preprint: 218 | 219 | Clink, D. J. & H. Klinck. (2019). gibbonR: An R package for the 220 | detection and classification of acoustic signals using machine learning. 221 | arXiv, 1906.02572. 222 | -------------------------------------------------------------------------------- /R/gibbonID.R: -------------------------------------------------------------------------------- 1 | #' gibbonID 2 | #' @description Function that extracts MFCCs as features from .wav files and plots them using UMAP. Points can be colored using affinity propagation clustering or by class labels. With the option to overlay spectrogram images. 3 | #' @param input.dir Directory where the .wav file clips are location 4 | #' @param output.dir Directory to save the spectrogram thumbnails. 5 | #' @param min.freq Minimum frequency (Hz) of signals of interest 6 | #' @param max.freq Maximum frequency (Hz) of signals of interest 7 | #' @param pattern Pattern to search fo rin input.dir; default is '.wav' 8 | #' @param add.spectrograms Logical; overlay spectrogram images 9 | #' @param class Option of 'affinity.adaptive', 'fixed.affinity' or 'no.clustering'; Specifies whether to do adaptive or fixed 'q' affinity propagation clustering, or to color points by class label. 10 | #' @param q.fixed If class=='fixed.affinity' specify value of 'q'. See ??apcluster for more details. 11 | #' @param win.avg Option of 'false','mean.sd' or 'standard'; whether to return MFCCs for each non-overlapping time window, calculate mean and SD over each MFCC or calculated MFCCs for a set number of time windows. 12 | #' @param spec.ratio Value to scale the spectrograms. 13 | #' 14 | #' @return 15 | #' @export 16 | #' 17 | #' @examples gibbonID(input.dir="/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/MultipleSoundClasses/",output.dir="/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/MultipleSoundClasses/Thumbnails/",win.avg='standard',add.spectrograms=TRUE,min.freq=400,max.freq=1600,class='no.clustering') 18 | 19 | gibbonID <- 20 | function(input.dir, 21 | output.dir, 22 | min.freq, 23 | max.freq, 24 | pattern = '.wav', 25 | add.spectrograms = FALSE, 26 | class = 'fixed', 27 | q.fixed = 0.1, 28 | win.avg = 'standard', 29 | spec.ratio=40) 30 | { 31 | Focal.exemplars <- 32 | list.files(input.dir, full.names = T, pattern = pattern) 33 | 34 | 35 | print('Step 1 Calculating MFCCs') 36 | AcousticSignalsMFCCs <- MFCCFunction( 37 | input.dir = input.dir, 38 | min.freq = min.freq, 39 | max.freq = max.freq, 40 | num.cep = 12, 41 | win.avg = win.avg 42 | ) 43 | 44 | if (class == 'affinity.adaptive') { 45 | print('Step 2 Computing unsupervised clustering') 46 | 47 | q.val.seq <- seq(from = 0.1, to = 0.9, by = 0.1) 48 | 49 | AcousticSignal.sil.df <- data.frame() 50 | for (a in 1:length(q.val.seq)) { 51 | print(a) 52 | AcousticSignalsAP <- 53 | apcluster::apcluster( 54 | negDistMat(r = 2), 55 | q = q.val.seq[a], 56 | AcousticSignalsMFCCs[, c(2:ncol(AcousticSignalsMFCCs))], 57 | maxits = 100000, 58 | convits = 10000 59 | ) 60 | 61 | 62 | sil <- 63 | cluster::silhouette(x = AcousticSignalsAP@idx, 64 | dist = dist(AcousticSignalsMFCCs[, c(2:ncol(AcousticSignalsMFCCs))])) 65 | 66 | sil.val <- (summary(sil)$avg.width) 67 | temp.sil.df <- cbind.data.frame(sil.val, q.val.seq[a]) 68 | AcousticSignal.sil.df <- 69 | rbind.data.frame(AcousticSignal.sil.df, temp.sil.df) 70 | } 71 | 72 | MaxSil <- which.max(AcousticSignal.sil.df$sil.val) 73 | 74 | 75 | AcousticSignalsAP <- 76 | apcluster::apcluster( 77 | negDistMat(r = 2), 78 | q = q.val.seq[MaxSil], 79 | AcousticSignalsMFCCs[, c(2:ncol(AcousticSignalsMFCCs))], 80 | maxits = 100000, 81 | convits = 10000 82 | ) 83 | 84 | print(q.val.seq[MaxSil]) 85 | print(paste('N clusters=', length(AcousticSignalsAP@exemplars))) 86 | AcousticSignalsMFCCs$class <- as.factor(AcousticSignalsAP@idx) 87 | } 88 | 89 | if (class == 'affinity.fixed') { 90 | print('Step 2 Computing unsupervised clustering with fixed q') 91 | 92 | AcousticSignalsAP <- 93 | apcluster::apcluster( 94 | negDistMat(r = 2), 95 | q = q.fixed, 96 | AcousticSignalsMFCCs[, c(2:ncol(AcousticSignalsMFCCs))], 97 | maxits = 100000, 98 | convits = 10000 99 | ) 100 | 101 | AcousticSignalsMFCCs$class <- as.factor(AcousticSignalsAP@idx) 102 | 103 | } 104 | 105 | if (class == 'no.clustering') { 106 | print('Step 2 Using class labels for clustering') 107 | AcousticSignalsMFCCs$class <- AcousticSignalsMFCCs$class 108 | } 109 | 110 | AcousticSignals.umap <- 111 | umap::umap( 112 | AcousticSignalsMFCCs[, c(2:ncol(AcousticSignalsMFCCs))], 113 | n_neighbors = 12, 114 | controlscale = TRUE, 115 | scale = 3 116 | ) 117 | 118 | plot.for.AcousticSignals <- 119 | cbind.data.frame(AcousticSignals.umap$layout[, 1:2], 120 | AcousticSignalsMFCCs$class) 121 | 122 | colnames(plot.for.AcousticSignals) <- 123 | c("Dim.1", "Dim.2", "class") 124 | 125 | plot.for.AcousticSignals$class <- 126 | as.factor(plot.for.AcousticSignals$class) 127 | 128 | my_plot_AcousticSignals <- 129 | ggpubr::ggscatter( 130 | data = plot.for.AcousticSignals, 131 | x = "Dim.1", 132 | y = "Dim.2", 133 | color = "class" 134 | ) + 135 | geom_point(size = 3) + 136 | scale_color_manual(values = matlab::jet.colors (length( 137 | unique(plot.for.AcousticSignals$class) 138 | ))) + 139 | theme_bw() + xlab('UMAP: Dim 1') + ylab('UMAP: Dim 2') + 140 | ggtitle(paste('N Clusters =', length(unique( 141 | AcousticSignalsMFCCs$class 142 | )))) + 143 | theme( 144 | axis.text.x = element_blank(), 145 | #remove x axis labels 146 | axis.ticks.x = element_blank(), 147 | #remove x axis ticks 148 | axis.text.y = element_blank(), 149 | #remove y axis labels 150 | axis.ticks.y = element_blank() #remove y axis ticks 151 | )+labs(color="Cluster") 152 | 153 | if (add.spectrograms == TRUE) { 154 | print('Step 3 Creating Spectrograms ') 155 | 156 | if (!dir.exists(output.dir)) { 157 | dir.create(output.dir) 158 | print(paste('Created output dir', output.dir)) 159 | 160 | for (b in 1:length(Focal.exemplars)) { 161 | #print(b) 162 | short.wav <- tuneR::readWave(Focal.exemplars[[b]]) 163 | 164 | png(filename = paste(output.dir, b, 'Focal.png', sep = ''), 165 | width = 1000) 166 | temp.spec <- 167 | signal::specgram( 168 | short.wav@left, 169 | Fs = short.wav@samp.rate, 170 | n = 1024, 171 | overlap = 0 172 | ) 173 | plot( 174 | temp.spec, 175 | xlab = "", 176 | ylab = "", 177 | ylim = c(min.freq, max.freq), 178 | rev(gray(0:512 / 512)), 179 | axes = F, 180 | useRaster = TRUE 181 | ) 182 | 183 | graphics.off() 184 | 185 | } 186 | } else { 187 | print(paste(output.dir, 'already exists')) 188 | } 189 | 190 | 191 | 192 | print('Adding Spectrograms to Plot Step 3 of 3') 193 | 194 | col.index <- unique(plot.for.AcousticSignals$class) 195 | xrange <- 196 | (abs(range(plot.for.AcousticSignals$Dim.1)[1]) + abs(range(plot.for.AcousticSignals$Dim.1)[2])) / 197 | spec.ratio 198 | yrange <- 199 | (abs(range(plot.for.AcousticSignals$Dim.2)[1]) + abs(range(plot.for.AcousticSignals$Dim.2)[2])) / 200 | spec.ratio 201 | color.vals <- 202 | matlab::jet.colors (length(unique(plot.for.AcousticSignals$class))) 203 | 204 | for (y in 1:length(Focal.exemplars)) { 205 | #print(y, 'out of', length(Focal.exemplars)) 206 | figure1.png <- 207 | magick::image_trim(magick::image_read(paste(output.dir, y, 'Focal.png', sep = 208 | ''))) 209 | figure1.png <- 210 | magick::image_modulate(figure1.png, brightness = 300) 211 | 212 | figure1.png <- 213 | magick::image_border(figure1.png, col = color.vals[which(col.index == plot.for.AcousticSignals[y, ]$class)]) 214 | 215 | figure1.png <- as.raster(figure1.png) 216 | #exemplar.index <- Focal.cluster.results@idx[y] 217 | 218 | clust.df.subset <- plot.for.AcousticSignals[y, ] 219 | xmin = clust.df.subset$Dim.1 - xrange 220 | xmax = clust.df.subset$Dim.1 + xrange 221 | ymin = clust.df.subset$Dim.2 + yrange 222 | ymax = clust.df.subset$Dim.2 - yrange 223 | my_plot_AcousticSignals <- 224 | my_plot_AcousticSignals + annotation_raster(figure1.png, xmin, xmax, ymin, ymax) 225 | } 226 | } 227 | ggsave( 228 | "DetectionsAffinityPlot.png", 229 | my_plot_AcousticSignals, 230 | width = 4.25, 231 | height = 3.25, 232 | dpi = 1200 233 | ) 234 | 235 | 236 | return(my_plot_AcousticSignals) 237 | } 238 | -------------------------------------------------------------------------------- /R/DetectBLED.R: -------------------------------------------------------------------------------- 1 | #' DetectBLED 2 | #' @description Function to do band-limited energy summation to find sound events. This function only identifies sound events based on frequency and duration so is not expected to have high precision. 3 | #' @param input Either full path to directory containing .wav files or a list with file name as first element and .wav as second element 4 | #' @param input.type Either 'directory', 'list' or 'wav' 5 | #' @param min.freq Minimum frequency (Hz) of signal of interest 6 | #' @param max.freq Maximum frequency (Hz) of signal of interest 7 | #' @param pattern.split Pattern to find and remove to create file name; currently set to ".rda" 8 | #' @param output Either 'spectro', 'table' or 'wav' 9 | #' @param noise.quantile.val A quantile value between 0 to 1 for the band energy summation 10 | #' @param spectrogram.window Window length for spectrogram analysis (input to spectro fuction from 'seewave') 11 | #' @param subsample.dur Duration (s) to divide longer sound file to increase processing efficiency 12 | #' @param training.label Label to append to saved .wav files 13 | #' @param min.signal.dur The minimum duration (s) sound events must be to be considered sound events 14 | #' @param max.sound.event.dur The maximum duration (s) sound events must be to be considered sound events 15 | #' @param wav.output Logical; output wave file of sound events? 16 | #' @param swift.time If file name is in structure recorder_YYYYMMDD_HHMMSS can subset files based on specific times 17 | #' @param time.start Time recordings start (hour) 18 | #' @param time.stop Time recordings stop (hour) 19 | #' @param write.table.output Logical; write Raven selection tables to output directory 20 | #' @param verbose Logical; print out steps 21 | #' @param random.sample If a random subset of files in a directory are desired specify a value, otherwise 'NA' 22 | #' @param output.dir Specified output directory; set to current working directory 23 | #' @export 24 | #' @import e1071 25 | #' @import tuneR 26 | #' @import seewave 27 | #' @import tuneR 28 | #' @import stringr 29 | #' @examples 30 | 31 | DetectBLED <- function(input,input.type ='wav', 32 | min.freq = 200, 33 | max.freq = 6000, 34 | noise.quantile.val = 0.75, 35 | spectrogram.window = 1600, 36 | subsample.dur = 300, 37 | training.label = 'noise', 38 | pattern.split = ".wav", 39 | min.signal.dur = 1, 40 | max.sound.event.dur = 6, 41 | wav.output = "TRUE", 42 | output.dir = getwd(), 43 | swift.time = TRUE, 44 | time.start = 18, 45 | time.stop = 23, 46 | write.table.output = TRUE, 47 | verbose = TRUE, 48 | random.sample = 100) { 49 | if (wav.output == "TRUE" & output.dir == "") { 50 | stop("Specify output directory") 51 | } 52 | 53 | if (input.type == 'list') { 54 | list.file.input <- unlist(input) 55 | nslash <- str_count(input, pattern = '/') + 1 56 | list.file.input.short <- 57 | str_split_fixed(input, pattern = '/', nslash)[, nslash] 58 | } 59 | 60 | if (input.type == "directory") { 61 | list.file.input <- 62 | list.files(input, full.names = TRUE, recursive = T) 63 | list.file.input.short <- 64 | list.files(input, full.names = FALSE, recursive = T) 65 | } 66 | 67 | if (input.type == "wav") { 68 | list.file.input <- input 69 | } 70 | 71 | 72 | if (swift.time == TRUE) { 73 | number.of.slash <- str_count(list.file.input, pattern = "/")[1] 74 | base.file.name.all <- 75 | str_split_fixed(list.file.input, 76 | pattern = "/", 77 | n = (number.of.slash + 1))[, number.of.slash + 1] 78 | temp.name.all <- 79 | stringr::str_split_fixed(base.file.name.all, pattern = pattern.split, n = 2)[, 1] 80 | times <- str_split_fixed(temp.name.all, pattern = '_', n = 3)[, 3] 81 | times <- as.numeric(substr(times, start = 1, stop = 2)) 82 | list.file.input <- 83 | list.file.input[which(times >= time.start & times <= time.stop)] 84 | } 85 | 86 | if (length(list.file.input) == 0) { 87 | print("No sound files detected") 88 | break 89 | } 90 | 91 | if (is.numeric(random.sample) == TRUE) { 92 | list.file.input <- 93 | list.file.input[sample(1:length(list.file.input), random.sample, replace = 94 | F)] 95 | } 96 | 97 | 98 | for (i in 1:length(list.file.input)) { 99 | timing.df <- data.frame() 100 | 101 | 102 | contains.slash <- str_detect(list.file.input[i], pattern = "/") 103 | 104 | if (contains.slash == 'TRUE') { 105 | number.of.slash <- str_count(list.file.input[i], pattern = "/") 106 | base.file.name <- 107 | str_split_fixed(list.file.input[i], 108 | pattern = "/", 109 | n = (number.of.slash + 1))[, number.of.slash + 1] 110 | temp.name <- 111 | stringr::str_split_fixed(base.file.name, pattern = pattern.split, n = 2)[1] 112 | } else{ 113 | temp.name <- 114 | stringr::str_split_fixed(list.file.input[i], pattern = pattern.split, n = 2)[1] 115 | 116 | } 117 | 118 | # Convert .wav file to spectrogram 119 | if (verbose == TRUE) { 120 | print(paste( 121 | "Computing spectrogram for file", 122 | temp.name, 123 | i, 124 | 'out of', 125 | length(list.file.input) 126 | )) 127 | } 128 | 129 | RavenSelectionTableDF <- data.frame() 130 | temp.wav <- readWave(list.file.input[i]) 131 | 132 | sound_length <- 133 | round(length(temp.wav@left) / temp.wav@samp.rate, 2) 134 | 135 | cutwave.list <- 136 | c(seq( 137 | from = 1, 138 | to = (sound_length), 139 | by = subsample.dur 140 | ), sound_length) 141 | 142 | short.sound.files <- lapply(1:(length(cutwave.list) - 1), 143 | function(i) 144 | extractWave( 145 | temp.wav, 146 | from = cutwave.list[i], 147 | to = cutwave.list[i + 148 | 1], 149 | xunit = c("time"), 150 | plot = F, 151 | output = "Wave" 152 | )) 153 | 154 | for (j in 1:length(short.sound.files)) { 155 | swift.spectro <- 156 | spectro( 157 | short.sound.files[[j]], 158 | wl = spectrogram.window, 159 | overlap = 0, 160 | plot = F 161 | ) 162 | 163 | 164 | # Identify the frequency band of interest 165 | min.freq.cols <- 166 | which.min(abs(round(swift.spectro$freq, digits = 2) - (min.freq / 1000))) 167 | max.freq.cols <- 168 | which.min(abs(round(swift.spectro$freq, digits = 2) - (max.freq / 1000))) 169 | 170 | 171 | # Calculate the column sums for each time window 172 | col.sum <- 173 | colSums(swift.spectro$amp[min.freq.cols:max.freq.cols,]) 174 | 175 | 176 | # Calculate noise value 177 | noise.value <- 178 | quantile(unlist(col.sum), c(noise.quantile.val)) 179 | 180 | # Determine which values are above specified cutoff 181 | list.sub <- which(col.sum > noise.value) 182 | call.timing <- 183 | split(list.sub, cumsum(c(1, diff(list.sub)) != 1)) 184 | 185 | # Calculate minimum signal duration to be considered signal 186 | if( length(which(swift.spectro$time > 1))>0){ 187 | number.time.windows.1sec <- min(which(swift.spectro$time > 1)) 188 | signal.dur <- number.time.windows.1sec * min.signal.dur 189 | 190 | # Combine all potential sound events into a list 191 | call.timing.list <- 192 | as.list(call.timing[which(sapply(call.timing, length) > signal.dur)]) 193 | 194 | # If user indicated maximum duration create list of sound events under certain duration 195 | if (max.sound.event.dur != 'NULL') { 196 | sound.event.index.max <- 197 | which.min(abs(swift.spectro$time - max.sound.event.dur)) 198 | call.timing.list <- 199 | call.timing.list[which(sapply(call.timing.list, length) < sound.event.index.max)] 200 | } 201 | } else{ 202 | call.timing.list <- list() 203 | } 204 | 205 | if (length(call.timing.list) >= 1) { 206 | subsamps <- lapply(1:length(call.timing.list), 207 | function(i) 208 | extractWave( 209 | short.sound.files[[j]], 210 | from = swift.spectro$time[min(call.timing.list[[i]])], 211 | to = swift.spectro$time[max(call.timing.list[[i]])], 212 | xunit = c("time"), 213 | plot = F, 214 | output = "Wave" 215 | )) 216 | 217 | if (j == 1) { 218 | if (wav.output == "TRUE") 219 | lapply(1:length(subsamps), 220 | function(i) 221 | writeWave( 222 | subsamps[[i]], 223 | filename = paste( 224 | output.dir, 225 | training.label, 226 | '_', 227 | paste( 228 | temp.name, 229 | round(swift.spectro$t[min(call.timing.list[[i]])],2), 230 | round(swift.spectro$t[max(call.timing.list[[i]])],2), 231 | '.wav', 232 | sep = '_' 233 | ), 234 | sep = '' 235 | ), 236 | extensible = FALSE 237 | )) 238 | } 239 | 240 | if (j > 1) { 241 | if (wav.output == "TRUE") 242 | lapply(1:length(subsamps), 243 | function(i) 244 | writeWave( 245 | subsamps[[i]], 246 | filename = paste( 247 | output.dir, 248 | training.label, 249 | '_', 250 | paste( 251 | temp.name, 252 | round( (swift.spectro$t[min(call.timing.list[[i]])] + 253 | (subsample.dur * (j - 1))),2) , 254 | round((swift.spectro$t[max(call.timing.list[[i]])] + 255 | (subsample.dur * (j - 1))),2), 256 | '.wav', 257 | sep = '_' 258 | ), 259 | sep = '' 260 | ), 261 | extensible = FALSE 262 | )) 263 | } 264 | 265 | timing.df <- lapply(1:length(call.timing.list), 266 | function(i) 267 | cbind.data.frame(swift.spectro$t[min(call.timing.list[[i]])], 268 | swift.spectro$t[max(call.timing.list[[i]])])) 269 | 270 | timing.df <- do.call(rbind.data.frame, timing.df) 271 | 272 | colnames(timing.df) <- c('start.time', 'stop.time') 273 | file.name <- rep(temp.name, nrow(timing.df)) 274 | timing.df <- cbind.data.frame(timing.df, file.name) 275 | 276 | if (j > 1) { 277 | timing.df$start.time <- timing.df$start.time + (subsample.dur * (j - 1)) 278 | timing.df$stop.time <- 279 | timing.df$stop.time + (subsample.dur * (j - 1)) 280 | } 281 | 282 | timing.df <- rbind.data.frame(timing.df) 283 | Selection <- seq(1, nrow(timing.df)) 284 | View <- rep('Spectrogram 1', nrow(timing.df)) 285 | Channel <- rep(1, nrow(timing.df)) 286 | MinFreq <- rep(min.freq, nrow(timing.df)) 287 | MaxFreq <- rep(max.freq, nrow(timing.df)) 288 | timing.df.temp <- 289 | cbind.data.frame(Selection, View, Channel, MinFreq, MaxFreq, timing.df) 290 | 291 | timing.df.temp <- 292 | timing.df.temp[, c( 293 | "Selection", 294 | "View", 295 | "Channel", 296 | "start.time", 297 | "stop.time", 298 | "MinFreq", 299 | "MaxFreq", 300 | "file.name" 301 | )] 302 | 303 | colnames(timing.df.temp) <- 304 | c( 305 | "Selection", 306 | "View", 307 | "Channel", 308 | "Begin Time (s)", 309 | "End Time (s)", 310 | "Low Freq (Hz)", 311 | "High Freq (Hz)", 312 | "File Name" 313 | ) 314 | 315 | 316 | RavenSelectionTableDF <- 317 | rbind.data.frame(RavenSelectionTableDF, timing.df.temp) 318 | RavenSelectionTableDF$Selection <- 319 | seq(1, nrow(RavenSelectionTableDF), 1) 320 | if (write.table.output == TRUE) { 321 | csv.file.name <- 322 | paste(output.dir, '/', temp.name, 'BLED.txt', sep = '') 323 | write.table( 324 | x = RavenSelectionTableDF, 325 | sep = "\t", 326 | file = csv.file.name, 327 | row.names = FALSE, 328 | quote = FALSE 329 | ) 330 | } 331 | rm(subsamps) 332 | } 333 | } 334 | } 335 | print(RavenSelectionTableDF) 336 | rm(RavenSelectionTableDF) 337 | rm(swift.spectro) 338 | rm(temp.wav) 339 | } 340 | -------------------------------------------------------------------------------- /vignettes/gibbonR-tutorial.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "gibbonR: An R package for the automated detection and classification of female gibbon calls from long-term acoustic recordings" 3 | output: 4 | html_document: 5 | toc: true 6 | toc_depth: 2 7 | --- 8 | 9 | ```{r eval=FALSE,, include = FALSE} 10 | knitr::opts_chunk$set( 11 | collapse = TRUE, 12 | comment = "#>" 13 | ) 14 | ``` 15 | 16 | # Getting started 17 | ## You can install the development version from [GitHub](https://github.com/DenaJGibbon) with: 18 | ```{r eval=FALSE,, echo=T,warning=FALSE, results='hide'} 19 | # install.packages("devtools") 20 | # devtools::install_github("DenaJGibbon/gibbonR") 21 | 22 | library(gibbonR) 23 | ``` 24 | 25 | # Part 1. Prepare Training Data 26 | In 'gibbonR' there are two ways that you can format your training data. The first can be a set of labelled .wav clips with the class indicated in the name of the file (e.g., 'gibbon_01.wav' and 'noise_01.wav'). The second is to have a folder of selection tables created in Raven Pro (K. Lisa Yang Center for Conservation Bioacoustics) and a folder with the associated '.wav' files. For the second approach there must be an annotation column indicating the call type and it is assumed that all signals of interest are annotated, and the rest of the files contain only background noise. 27 | 28 | ## Part 1A. Training Data with Labeled .wav clips 29 | ### Read in clips and calculate MFCCs 30 | ```{r eval=FALSE, echo = T, results = 'hide' } 31 | TrainingWavFilesDir <- 32 | "data/MultipleSoundClasses/" 33 | 34 | trainingdata <- gibbonR::MFCCFunction(input.dir=TrainingWavFilesDir, min.freq = 400, max.freq = 1600,win.avg="TRUE") 35 | 36 | 37 | trainingdata$class <- as.factor(trainingdata$class) 38 | ``` 39 | 40 | ### Compare Random Forest and Support Vector Machine for Supervised Classification 41 | ```{r eval=FALSE, } 42 | 43 | trainingdata$class <- as.factor(trainingdata$class) 44 | 45 | 46 | ml.model.svm <- e1071::svm(trainingdata[, 2:ncol(trainingdata)], trainingdata$class, kernel = "radial", 47 | cross = 25, 48 | probability = TRUE) 49 | 50 | print(paste('SVM accuracy',ml.model.svm$tot.accuracy)) 51 | 52 | 53 | ml.model.rf <- randomForest::randomForest(x=trainingdata[, 2:ncol(trainingdata)], y = trainingdata$class) 54 | 55 | 56 | print(ml.model.rf) 57 | ``` 58 | 59 | ## Part 1B. Training Data with Raven Selection Tables 60 | ### Prepare training data from labeled annotations 61 | ```{r eval=FALSE,,eval=F } 62 | # Specify the folder where the training data will be saved 63 | TrainingDataFolderLocation <- "/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/TrainingDataFromRavenSelectionTables" 64 | 65 | # Directory with annotated selection tables 66 | AnnotatedSelectionTables <- list.files("/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/SelectionTables/GibbonTrainingSelectionTables/", 67 | full.names = T) 68 | 69 | # Directory with corresponding .wav files 70 | AnnotatedWaveFiles <- list.files("/Users/denaclink/Library/CloudStorage/Box-Box/gibbonRSampleFiles/GibbonTrainingFiles/",full.names = T) 71 | AnnotatedWaveFilesShort <- list.files("/Users/denaclink/Library/CloudStorage/Box-Box/gibbonRSampleFiles/GibbonTrainingFiles/",full.names = F) 72 | AnnotatedWaveFilesShort <- str_split_fixed(AnnotatedWaveFilesShort,pattern = '.wav', n=2)[,1] 73 | 74 | # Loop to cut out the corresponding annotations into short clips 75 | for(i in 1: length(AnnotatedSelectionTables)){ 76 | 77 | # Read in selection table 78 | TempSelectionTable <- read.delim2(AnnotatedSelectionTables[i]) 79 | 80 | # Find the corresponding soundfile 81 | SoundFileIndex <- which(str_detect(AnnotatedSelectionTables[i],AnnotatedWaveFilesShort)) 82 | 83 | TempAnnotateWave <- readWave(AnnotatedWaveFiles[SoundFileIndex]) 84 | 85 | ShortSoundClips <- lapply(1:nrow(TempSelectionTable), 86 | function(j) extractWave(TempAnnotateWave, 87 | from= as.numeric(TempSelectionTable[j,]$Begin.Time..s.), 88 | to=as.numeric(TempSelectionTable[j,]$ End.Time..s.), 89 | xunit = c("time"),plot=F,output="Wave")) 90 | # Write wave files to folder 91 | for(k in 1:length(ShortSoundClips)){ 92 | TempClip <- ShortSoundClips[[k]] 93 | WavFileName <- paste(TrainingDataFolderLocation,'/female.gibbon_', k, '.wav',sep="") 94 | writeWave(TempClip,WavFileName,extensible = F) 95 | } 96 | 97 | 98 | } 99 | 100 | ``` 101 | 102 | ### Prepare noise training data from files without target signal 103 | ```{r eval=FALSE, eval=F} 104 | # Specify the folder where the training data will be saved 105 | TrainingDataFolderLocation <- "/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/TrainingDataFromRavenSelectionTables/" 106 | 107 | # Directory with annotated selection tables 108 | NoiseSelectionTables <- list.files("/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/SelectionTables/NoiseSelectionTables/", 109 | full.names = T) 110 | 111 | # Directory with corresponding .wav files 112 | NoiseWaveFiles <- list.files("/Users/denaclink/Library/CloudStorage/Box-Box/gibbonRSampleFiles/NoiseFiles/",full.names = T) 113 | NoiseWaveFilesShort <- list.files("/Users/denaclink/Library/CloudStorage/Box-Box/gibbonRSampleFiles/NoiseFiles/",full.names = F) 114 | NoiseWaveFilesShort <- str_split_fixed(NoiseWaveFilesShort,pattern = '.wav', n=2)[,1] 115 | 116 | for(i in 1:length(NoiseSelectionTables)){ 117 | 118 | # Find the corresponding soundfile 119 | SoundFileIndex <- which(str_detect(NoiseSelectionTables[i],NoiseWaveFilesShort)) 120 | 121 | DetectBLED(input=NoiseWaveFiles[SoundFileIndex], 122 | min.freq = 400, 123 | max.freq = 1600, 124 | noise.quantile.val=0.3, 125 | spectrogram.window =512, 126 | pattern.split = ".wav", 127 | min.signal.dur = 3, 128 | max.sound.event.dur = 12, 129 | wav.output = "TRUE", 130 | output.dir = TrainingDataFolderLocation, 131 | swift.time=TRUE, 132 | time.start=06, 133 | time.stop=11, 134 | write.table.output=TRUE, 135 | verbose=TRUE, 136 | random.sample=FALSE) 137 | } 138 | 139 | ``` 140 | 141 | ### Now read in clips based on Raven Selection tables and calculate MFCCs 142 | ```{r eval=FALSE, echo = T, results = 'hide' } 143 | 144 | TrainingWavFilesDir <- 145 | "/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/TrainingDataFromRavenSelectionTables/" 146 | 147 | trainingdata <- gibbonR::MFCCFunction(input.dir=TrainingWavFilesDir, min.freq = 400, max.freq = 1600,win.avg="TRUE") 148 | 149 | 150 | trainingdata$class <- as.factor(trainingdata$class) 151 | ``` 152 | 153 | ### Compare Random Forest and Support Vector Machine for Supervised Classification 154 | ```{r eval=FALSE, } 155 | 156 | trainingdata$class <- as.factor(trainingdata$class) 157 | 158 | 159 | ml.model.svm <- e1071::svm(trainingdata[, 2:ncol(trainingdata)], trainingdata$class, kernel = "radial", 160 | cross = 25, 161 | probability = TRUE) 162 | 163 | print(paste('SVM accuracy',ml.model.svm$tot.accuracy)) 164 | 165 | 166 | ml.model.rf <- randomForest::randomForest(x=trainingdata[, 2:ncol(trainingdata)], y = trainingdata$class) 167 | 168 | 169 | print(ml.model.rf) 170 | 171 | 172 | ``` 173 | 174 | # Part 2. Run the detector/classifier 175 | 176 | ## Part 2a. Feature extraction 177 | ```{r eval=FALSE, } 178 | # Specify the folder where the training data will be saved 179 | TrainingDataFolderLocation <- "/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/TrainingDataFromRavenSelectionTables/" 180 | 181 | TrainingDataMFCC <- MFCCFunction(input.dir= TrainingDataFolderLocation, min.freq = 400, max.freq = 1600,win.avg="standard") 182 | 183 | TrainingDataMFCC$class <- as.factor(TrainingDataMFCC$class) 184 | ``` 185 | 186 | ## Part 2b. Run DetectClassify 187 | ```{r eval=FALSE, } 188 | 189 | TestFileDirectory <- '/Users/denaclink/Library/CloudStorage/Box-Box/gibbonRSampleFiles/GibbonTestFiles' 190 | 191 | OutputDirectory <- "/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/DetectAndClassifyOutput" 192 | 193 | DetectAndClassify(input=TestFileDirectory, 194 | input.type='directory', 195 | feature.df=TrainingDataMFCC, 196 | model.type.list=c('SVM','RF'), 197 | tune = TRUE, 198 | short.wav.duration=300, 199 | target.signal = c("female.gibbon"), 200 | min.freq = 400, max.freq = 1600, 201 | noise.quantile.val=0.15, 202 | time.window.number =3, 203 | n.windows = 9, num.cep = 12, 204 | spectrogram.window =160, 205 | pattern.split = ".wav", 206 | min.signal.dur = 3, 207 | max.sound.event.dur = 25, 208 | maximum.separation =1, 209 | probability.thresh.svm = 0.15, 210 | probability.thresh.rf = 0.15, 211 | wav.output = "TRUE", 212 | output.dir =OutputDirectory, 213 | swift.time=TRUE,time.start=5,time.stop=10, 214 | write.csv.output=FALSE,verbose=TRUE, 215 | random.sample='NA') 216 | 217 | 218 | ``` 219 | 220 | # Part 3. Calculate performance metrics 221 | 222 | ## Part 3a. Prepare data for performance metrics 223 | ```{r eval=FALSE, } 224 | # Set location of test file selection tables 225 | input.dir.text.files <- "/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/SelectionTables/GibbonTestSelectionTables" 226 | 227 | Annotatedfiles <- list.files(input.dir.text.files,full.names = T) 228 | 229 | ListOfAnnotatedFilesShort <- list.files(input.dir.text.files,full.names = F) 230 | 231 | nslash <- str_count(Annotatedfiles,pattern = '/')[1]+1 232 | snames <- str_split_fixed(Annotatedfiles,pattern = '/',n=nslash)[,nslash] 233 | 234 | all.detections <- data.frame() 235 | for(x in 1:length(Annotatedfiles)){ 236 | temp.table <- read.delim2(Annotatedfiles[x],fill = T,header =T) 237 | file.name <- str_split_fixed(snames[x],pattern = '[.]',n=2)[,1] 238 | recorder <- str_split_fixed(file.name,pattern='_',n=3)[,1] 239 | date <- str_split_fixed(file.name,pattern='_',n=3)[,2] 240 | time <- str_split_fixed(file.name,pattern='_',n=3)[,3] 241 | 242 | if(nrow(temp.table >0)){ 243 | temp.table.updated <- cbind.data.frame(file.name,recorder,date,time,temp.table) 244 | } else { 245 | temp.row <- as.data.frame(t(rep('NA',ncol(temp.table)))) 246 | colnames(temp.row) <- colnames(temp.table) 247 | temp.table.updated <- cbind.data.frame(file.name,recorder,date,time,temp.row) 248 | 249 | } 250 | all.detections <- rbind.data.frame(all.detections,temp.table.updated) 251 | } 252 | 253 | ``` 254 | 255 | ## Part 3b. Identify true and false positives 256 | ```{r eval=FALSE, } 257 | OutputDirectory <- "/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/DetectAndClassifyOutput" 258 | 259 | all.combinedprecision.recall.randomiter <- data.frame() 260 | range.secs.start <- 6 261 | range.secs.end <- 6 262 | 263 | ### Detections using band-limited energy summation 264 | gibbondetects <- OutputDirectory 265 | list.ml <- list.files(gibbondetects, full.names = T, pattern='.wav') 266 | 267 | 268 | # Need to focus on gibbons for this validation 269 | nslash <- str_count(list.ml[[1]],'/')+1 270 | list.ml.signals <- str_split_fixed(list.ml,pattern = '/',n=nslash)[,nslash] 271 | 272 | list.ml.signals <- str_split_fixed(list.ml.signals,pattern = '_',n=5)[,4] 273 | 274 | 275 | list.ml <- 276 | list.ml[which(list.ml.signals=='female.gibbon')] 277 | 278 | 279 | ml.detection.df <- data.frame() 280 | 281 | for(y in 1:length(list.ml)){ 282 | L.wav <- list.ml[[y]] 283 | n.slash <- str_count(L.wav, pattern = "/")[1] + 1 284 | 285 | det.file.name <- str_split_fixed(L.wav,"/",n=n.slash)[,n.slash] 286 | det.file.name <- str_split_fixed(det.file.name,".wav",n=2)[,1] 287 | 288 | file.name <- paste(str_split_fixed(det.file.name,"_",n=5)[,1],str_split_fixed(det.file.name,"_",n=5)[,2], 289 | str_split_fixed(det.file.name,"_",n=5)[,3], sep='_') 290 | det.date <- str_split_fixed(det.file.name,"_",n=5)[,2] 291 | det.time <- str_split_fixed(det.file.name,"_",n=5)[,3] 292 | det.swift <- str_split_fixed(det.file.name,"_",n=5)[,1] 293 | det.time.start <- as.numeric(str_split_fixed(det.file.name,"_",n=9)[,6]) 294 | det.time.end <- as.numeric(str_split_fixed(det.file.name,"_",n=9)[,7]) 295 | probability <- str_split_fixed(det.file.name,"_",n=8)[,8] 296 | ml.algorithm <- str_split_fixed(det.file.name,"_",n=7)[,5] 297 | 298 | detections.df <- cbind.data.frame(file.name,det.swift, det.date, det.time,det.time.start,det.time.end,probability,ml.algorithm) 299 | 300 | ml.detection.df <- rbind.data.frame(ml.detection.df,detections.df) 301 | } 302 | 303 | 304 | recall.snr.all.df <- data.frame() 305 | for(x in 1:nrow(ml.detection.df)){ 306 | all.detections.subset <- ml.detection.df[x,] 307 | validate.detect.subset <-subset(all.detections,file.name==as.character(all.detections.subset$file.name)) 308 | validate.detect.subset$Begin.Time..s. <- as.numeric(validate.detect.subset$Begin.Time..s.) 309 | min.start.time <- as.numeric(all.detections.subset$det.time.start)-range.secs.start 310 | max.start.time <- as.numeric(all.detections.subset$det.time.start)+range.secs.end 311 | 312 | detections.ml <- subset(validate.detect.subset, Begin.Time..s.>min.start.time & Begin.Time..s.< max.start.time) 313 | 314 | if(nrow(detections.ml)>0){ 315 | all.detections.subset$class.label <- '1' 316 | } else{ 317 | all.detections.subset$class.label <- '-1' 318 | } 319 | 320 | recall.snr.all.df <- rbind.data.frame(recall.snr.all.df,all.detections.subset) 321 | } 322 | 323 | 324 | ``` 325 | ## Part 3c. Calculate and plot performance metrics using 'ROCR' 326 | 327 | ```{r eval=FALSE, } 328 | library(ROCR) 329 | 330 | auc.df <- data.frame() 331 | performance.df <- data.frame() 332 | 333 | 334 | ml.index <- unique(recall.snr.all.df$ml.algorithm) 335 | for(m in 1:length(ml.index)){ 336 | 337 | temp.subset <- 338 | subset(recall.snr.all.df, 339 | ml.algorithm==ml.index[m]) 340 | 341 | predictions <- as.numeric(temp.subset$probability) 342 | labels <- (temp.subset$class.label) 343 | pred <- prediction(predictions, labels) 344 | perf <- performance(pred, "rec", "prec") 345 | perfauc <- performance(pred, "aucpr") 346 | Precision <- perf@x.values[[1]] 347 | Recall <- perf@y.values[[1]] 348 | Threshold <- perf@alpha.values[[1]] 349 | AUC <- perfauc@y.values[[1]] 350 | perfF1 <- performance(pred, "f") 351 | F1 <- perfF1@y.values[[1]] 352 | print(AUC) 353 | ml.algorithm <- ml.index[m] 354 | tempauc <- cbind.data.frame(AUC,ml.algorithm) 355 | auc.df <- rbind.data.frame(auc.df,tempauc) 356 | 357 | temp.performance <- cbind.data.frame(Precision,Recall,Threshold,F1,ml.algorithm) 358 | performance.df <- rbind.data.frame(performance.df,temp.performance) 359 | 360 | perf <- performance(pred, "prec", "rec") 361 | 362 | plot(perf, 363 | avg= "threshold", 364 | colorize=TRUE, 365 | lwd= 3, 366 | main= paste(ml.index[m],'Precision/Recall')) 367 | 368 | plot(perf, 369 | lty=3, 370 | col="grey78", 371 | add=TRUE) 372 | 373 | } 374 | 375 | ``` 376 | 377 | # Part 4. Unsupervised clustering 378 | ## Part 4a. Create a UMAP plot colored by class 379 | ```{r eval=FALSE, } 380 | library(gibbonR) 381 | library(ggpubr) 382 | UMAPBiplotAddSpectrograms(input.dir.Focal="/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/MultipleSoundClasses/",output.dir.Focal="/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/MultipleSoundClasses/Thumbnails/",add.spectrograms=TRUE,min.freq=400,max.freq=1600,main="UMAP Plot") 383 | 384 | ``` 385 | ## Part 4b. Create a UMAP plot colored by affinity propagation clustering 386 | ```{r eval=FALSE, } 387 | library(gibbonR) 388 | library(ggpubr) 389 | library(apcluster) 390 | AffinityBiplotAddSpectrograms(input.dir.Focal="/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/MultipleSoundClasses/",output.dir.Focal="/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/MultipleSoundClasses/Thumbnails/",class='fixed', q.fixed=0.1,add.spectrograms=TRUE,min.freq=400,max.freq=1600,main="UMAP Plot") 391 | 392 | ``` 393 | 394 | -------------------------------------------------------------------------------- /.Rhistory: -------------------------------------------------------------------------------- 1 | c("Dim.1", "Dim.2", "Class") 2 | plot.for.AcousticSignalsMFCC.F$Class <- as.factor(plot.for.AcousticSignalsMFCC.F$Class) 3 | Plot1Females <- ggpubr::ggscatter(data = plot.for.AcousticSignalsMFCC.F,x = "Dim.1", 4 | y = "Dim.2", 5 | color = "Class", alpha=0.4)+ggtitle('Recording units')+ 6 | theme(axis.text.x=element_blank(), #remove x axis labels 7 | axis.ticks.x=element_blank(), #remove x axis ticks 8 | axis.text.y=element_blank(), #remove y axis labels 9 | axis.ticks.y=element_blank() #remove y axis ticks 10 | ) 11 | Plot1Females 12 | # Unsupervised clustering ------------------------------------------------- 13 | library(apcluster) 14 | aricode::NMI(as.factor(AcousticSignalsAPFemales@idx),trainingdataFemalesUpdate$class) 15 | # Adaptive returns q=0.1 16 | q.val.seq <- seq(from=0.1,to=0.9,by=0.1) 17 | AcousticSignal.sil.df <- data.frame() 18 | for(a in 1:length(q.val.seq)){ 19 | print(a) 20 | AcousticSignalsAP <- 21 | apcluster::apcluster(negDistMat(r=2),q=q.val.seq[a], 22 | trainingdataFemalesUpdate[,-c(1,179)], 23 | maxits=100000,convits=10000) 24 | sil <- 25 | cluster::silhouette(x = AcousticSignalsAP@idx, 26 | dist = dist( trainingdataFemalesUpdate[,-c(1,179)])) 27 | sil.val <- (summary(sil)$avg.width) 28 | temp.sil.df <- cbind.data.frame(sil.val,q.val.seq[a]) 29 | AcousticSignal.sil.df <- rbind.data.frame(AcousticSignal.sil.df,temp.sil.df) 30 | } 31 | MaxSil <- which.max(AcousticSignal.sil.df$sil.val) 32 | max(AcousticSignal.sil.df$sil.val) 33 | AcousticSignalsAPFemales <- 34 | apcluster::apcluster(negDistMat(r=2),q= q.val.seq[MaxSil], 35 | trainingdataFemalesUpdate[,-c(1,179)], 36 | maxits=100000,convits=10000) 37 | AcousticSignals.umap.F <- 38 | umap::umap(trainingdataFemales[,-c(1,179)], 39 | #labels=as.factor( as.numeric(AcousticSignalsAPFemales@idx)), 40 | controlscale=TRUE,scale=3) 41 | length(AcousticSignalsAPFemales@exemplars) 42 | plot.for.AcousticSignals.F <- 43 | cbind.data.frame(AcousticSignals.umap.F$layout[,1:2],#VGGishDF$PercentClass, 44 | as.factor( as.numeric(AcousticSignalsAPFemales@idx))) 45 | colnames(plot.for.AcousticSignals.F) <- 46 | c("Dim.1", "Dim.2","Cluster") 47 | plot.for.AcousticSignals.F$Cluster <- as.factor(plot.for.AcousticSignals.F$Cluster) 48 | plot.for.AcousticSignals.F$Class <- trainingdataFemales$Class 49 | Plot2Females <- ggpubr::ggscatter(data = plot.for.AcousticSignals.F,x = "Dim.1", 50 | y = "Dim.2", 51 | color='Cluster', alpha=0.4) + guides(color='none')+ggtitle('Affinity propagation')+ 52 | theme(axis.text.x=element_blank(), #remove x axis labels 53 | axis.ticks.x=element_blank(), #remove x axis ticks 54 | axis.text.y=element_blank(), #remove y axis labels 55 | axis.ticks.y=element_blank() #remove y axis ticks 56 | ) 57 | Plot2Females 58 | AcousticSignalsAPFemales <- 59 | apcluster::apcluster(negDistMat(r=2),q=0.1,# q.val.seq[MaxSil], 60 | trainingdataFemalesUpdate[,-c(1,179)], 61 | maxits=100000,convits=10000) 62 | AcousticSignals.umap.F <- 63 | umap::umap(trainingdataFemales[,-c(1,179)], 64 | #labels=as.factor( as.numeric(AcousticSignalsAPFemales@idx)), 65 | controlscale=TRUE,scale=3) 66 | length(AcousticSignalsAPFemales@exemplars) 67 | plot.for.AcousticSignals.F <- 68 | cbind.data.frame(AcousticSignals.umap.F$layout[,1:2],#VGGishDF$PercentClass, 69 | as.factor( as.numeric(AcousticSignalsAPFemales@idx))) 70 | colnames(plot.for.AcousticSignals.F) <- 71 | c("Dim.1", "Dim.2","Cluster") 72 | plot.for.AcousticSignals.F$Cluster <- as.factor(plot.for.AcousticSignals.F$Cluster) 73 | plot.for.AcousticSignals.F$Class <- trainingdataFemales$Class 74 | Plot2Females <- ggpubr::ggscatter(data = plot.for.AcousticSignals.F,x = "Dim.1", 75 | y = "Dim.2", 76 | color='Cluster', alpha=0.4) + guides(color='none')+ggtitle('Affinity propagation')+ 77 | theme(axis.text.x=element_blank(), #remove x axis labels 78 | axis.ticks.x=element_blank(), #remove x axis ticks 79 | axis.text.y=element_blank(), #remove y axis labels 80 | axis.ticks.y=element_blank() #remove y axis ticks 81 | ) 82 | Plot2Females 83 | trainingdataFemales 84 | # Female individuals ------------------------------------------------------ 85 | source('R/MFCCFunctionMeanSD.R') 86 | # Female individuals ------------------------------------------------------ 87 | source('gibbonR/R/MFCCFunctionMeanSD.R') 88 | # Female individuals ------------------------------------------------------ 89 | source('gibbonR/R/MFCCFunction.R') 90 | # Female individuals ------------------------------------------------------ 91 | source('R/MFCCFunction.R') 92 | subset.directory <- '/Users/denaclink/Desktop/RStudio Projects/gibbonID/data/FemaleGibbonsSwiftHQ/' 93 | trainingdataFemales <- MFCCFunction(input.dir=subset.directory , min.freq = 400, max.freq = 1600,win.avg = 'mean.sd') 94 | trainingdataFemalesnames <- list.files(subset.directory, 95 | full.names = F,pattern = '.wav') 96 | trainingdataFemales$Class <- str_split_fixed(trainingdataFemalesnames,pattern = '_',n=2)[,1] 97 | trainingdataFemales$Class <- as.factor(trainingdataFemales$Class) 98 | MetaData <- read.csv("/Users/denaclink/Desktop/RStudio Projects/T0010_SEAsia2018_2019.csv") 99 | # Save as new object 100 | trainingdataFemalesUpdate <- data.frame() 101 | UniqueClass <- unique(trainingdataFemales$Class) 102 | for(b in 1:length(UniqueClass)){ 103 | TempClass <- UniqueClass[b] 104 | TempMeta <- subset(MetaData,Deployment.Comments==TempClass) 105 | trainingdataFemalessub <- subset(trainingdataFemales,class==TempClass) 106 | trainingdataFemalessub$lat <- TempMeta$LAT..decimal.degrees. 107 | trainingdataFemalessub$lon <- TempMeta$LON..decimal.degrees. 108 | trainingdataFemalesUpdate <- rbind.data.frame(trainingdataFemalesUpdate, trainingdataFemalessub) 109 | } 110 | head(trainingdataFemalesUpdate) 111 | AcousticSignalsMFCC.umap.F <- 112 | umap::umap(trainingdataFemalesUpdate[,-c(1,50)], 113 | #labels=as.factor(trainingdataFemales$Class), 114 | controlscale=TRUE,scale=3) 115 | colnames(trainingdataFemalesUpdate) 116 | AcousticSignalsMFCC.umap.F <- 117 | umap::umap(trainingdataFemalesUpdate[,-c(1,51)], 118 | #labels=as.factor(trainingdataFemales$Class), 119 | controlscale=TRUE,scale=3) 120 | plot.for.AcousticSignalsMFCC.F <- 121 | cbind.data.frame(AcousticSignalsMFCC.umap.F$layout[,1:2], 122 | trainingdataFemalesUpdate$class) 123 | colnames(plot.for.AcousticSignalsMFCC.F) <- 124 | c("Dim.1", "Dim.2", "Class") 125 | plot.for.AcousticSignalsMFCC.F$Class <- as.factor(plot.for.AcousticSignalsMFCC.F$Class) 126 | Plot1Females <- ggpubr::ggscatter(data = plot.for.AcousticSignalsMFCC.F,x = "Dim.1", 127 | y = "Dim.2", 128 | color = "Class", alpha=0.4)+ggtitle('Recording units')+ 129 | theme(axis.text.x=element_blank(), #remove x axis labels 130 | axis.ticks.x=element_blank(), #remove x axis ticks 131 | axis.text.y=element_blank(), #remove y axis labels 132 | axis.ticks.y=element_blank() #remove y axis ticks 133 | ) 134 | Plot1Females 135 | # Unsupervised clustering ------------------------------------------------- 136 | library(apcluster) 137 | aricode::NMI(as.factor(AcousticSignalsAPFemales@idx),trainingdataFemalesUpdate$class) 138 | # Adaptive returns q=0.1 139 | q.val.seq <- seq(from=0.1,to=0.9,by=0.1) 140 | AcousticSignal.sil.df <- data.frame() 141 | for(a in 1:length(q.val.seq)){ 142 | print(a) 143 | AcousticSignalsAP <- 144 | apcluster::apcluster(negDistMat(r=2),q=q.val.seq[a], 145 | trainingdataFemalesUpdate[,-c(1,51)], 146 | maxits=100000,convits=10000) 147 | sil <- 148 | cluster::silhouette(x = AcousticSignalsAP@idx, 149 | dist = dist( trainingdataFemalesUpdate[,-c(1,51)])) 150 | sil.val <- (summary(sil)$avg.width) 151 | temp.sil.df <- cbind.data.frame(sil.val,q.val.seq[a]) 152 | AcousticSignal.sil.df <- rbind.data.frame(AcousticSignal.sil.df,temp.sil.df) 153 | } 154 | MaxSil <- which.max(AcousticSignal.sil.df$sil.val) 155 | max(AcousticSignal.sil.df$sil.val) 156 | MaxSil <- which.max(AcousticSignal.sil.df$sil.val) 157 | max(AcousticSignal.sil.df$sil.val) 158 | AcousticSignalsAPFemales <- 159 | apcluster::apcluster(negDistMat(r=2),q.val.seq[MaxSil], 160 | trainingdataFemalesUpdate[,-c(1,51)], 161 | maxits=100000,convits=10000) 162 | AcousticSignals.umap.F <- 163 | umap::umap(trainingdataFemales[,-c(1,179)], 164 | #labels=as.factor( as.numeric(AcousticSignalsAPFemales@idx)), 165 | controlscale=TRUE,scale=3) 166 | length(AcousticSignalsAPFemales@exemplars) 167 | plot.for.AcousticSignals.F <- 168 | cbind.data.frame(AcousticSignals.umap.F$layout[,1:2],#VGGishDF$PercentClass, 169 | as.factor( as.numeric(AcousticSignalsAPFemales@idx))) 170 | colnames(plot.for.AcousticSignals.F) <- 171 | c("Dim.1", "Dim.2","Cluster") 172 | plot.for.AcousticSignals.F$Cluster <- as.factor(plot.for.AcousticSignals.F$Cluster) 173 | plot.for.AcousticSignals.F$Class <- trainingdataFemales$Class 174 | Plot2Females <- ggpubr::ggscatter(data = plot.for.AcousticSignals.F,x = "Dim.1", 175 | y = "Dim.2", 176 | color='Cluster', alpha=0.4) + guides(color='none')+ggtitle('Affinity propagation')+ 177 | theme(axis.text.x=element_blank(), #remove x axis labels 178 | axis.ticks.x=element_blank(), #remove x axis ticks 179 | axis.text.y=element_blank(), #remove y axis labels 180 | axis.ticks.y=element_blank() #remove y axis ticks 181 | ) 182 | Plot2Females 183 | AcousticSignals.umap.F <- 184 | umap::umap(trainingdataFemalesUpdate[,-c(1,51)], 185 | #labels=as.factor( as.numeric(AcousticSignalsAPFemales@idx)), 186 | controlscale=TRUE,scale=3) 187 | length(AcousticSignalsAPFemales@exemplars) 188 | plot.for.AcousticSignals.F <- 189 | cbind.data.frame(AcousticSignals.umap.F$layout[,1:2],#VGGishDF$PercentClass, 190 | as.factor( as.numeric(AcousticSignalsAPFemales@idx))) 191 | colnames(plot.for.AcousticSignals.F) <- 192 | c("Dim.1", "Dim.2","Cluster") 193 | plot.for.AcousticSignals.F$Cluster <- as.factor(plot.for.AcousticSignals.F$Cluster) 194 | plot.for.AcousticSignals.F$Class <- trainingdataFemales$Class 195 | Plot2Females <- ggpubr::ggscatter(data = plot.for.AcousticSignals.F,x = "Dim.1", 196 | y = "Dim.2", 197 | color='Cluster', alpha=0.4) + guides(color='none')+ggtitle('Affinity propagation')+ 198 | theme(axis.text.x=element_blank(), #remove x axis labels 199 | axis.ticks.x=element_blank(), #remove x axis ticks 200 | axis.text.y=element_blank(), #remove y axis labels 201 | axis.ticks.y=element_blank() #remove y axis ticks 202 | ) 203 | Plot2Females 204 | AcousticSignals.umap.F <- 205 | umap::umap(trainingdataFemalesUpdate[,-c(1,51)], 206 | #labels=as.factor( as.numeric(AcousticSignalsAPFemales@idx)), 207 | controlscale=TRUE,scale=3) 208 | length(AcousticSignalsAPFemales@exemplars) 209 | plot.for.AcousticSignals.F <- 210 | cbind.data.frame(AcousticSignals.umap.F$layout[,1:2],#VGGishDF$PercentClass, 211 | as.factor( as.numeric(AcousticSignalsAPFemales@idx))) 212 | colnames(plot.for.AcousticSignals.F) <- 213 | c("Dim.1", "Dim.2","Cluster") 214 | plot.for.AcousticSignals.F$Cluster <- as.factor(plot.for.AcousticSignals.F$Cluster) 215 | plot.for.AcousticSignals.F$Class <- trainingdataFemales$Class 216 | Plot2Females <- ggpubr::ggscatter(data = plot.for.AcousticSignals.F,x = "Dim.1", 217 | y = "Dim.2", 218 | color='Cluster', alpha=0.4) + guides(color='none')+ggtitle('Affinity propagation')+ 219 | theme(axis.text.x=element_blank(), #remove x axis labels 220 | axis.ticks.x=element_blank(), #remove x axis ticks 221 | axis.text.y=element_blank(), #remove y axis labels 222 | axis.ticks.y=element_blank() #remove y axis ticks 223 | ) 224 | Plot2Females 225 | AcousticSignals.umap.F <- 226 | umap::umap(trainingdataFemalesUpdate[,-c(1,51)], 227 | #labels=as.factor( as.numeric(AcousticSignalsAPFemales@idx)), 228 | controlscale=TRUE,scale=3) 229 | length(AcousticSignalsAPFemales@exemplars) 230 | plot.for.AcousticSignals.F <- 231 | cbind.data.frame(AcousticSignals.umap.F$layout[,1:2],#VGGishDF$PercentClass, 232 | as.factor( as.numeric(AcousticSignalsAPFemales@idx))) 233 | colnames(plot.for.AcousticSignals.F) <- 234 | c("Dim.1", "Dim.2","Cluster") 235 | plot.for.AcousticSignals.F$Cluster <- as.factor(plot.for.AcousticSignals.F$Cluster) 236 | plot.for.AcousticSignals.F$Class <- trainingdataFemales$Class 237 | Plot2Females <- ggpubr::ggscatter(data = plot.for.AcousticSignals.F,x = "Dim.1", 238 | y = "Dim.2", 239 | color='Cluster', alpha=0.4) + guides(color='none')+ggtitle('Affinity propagation')+ 240 | theme(axis.text.x=element_blank(), #remove x axis labels 241 | axis.ticks.x=element_blank(), #remove x axis ticks 242 | axis.text.y=element_blank(), #remove y axis labels 243 | axis.ticks.y=element_blank() #remove y axis ticks 244 | ) 245 | Plot2Females 246 | AcousticSignals.umap.F <- 247 | umap::umap(trainingdataFemalesUpdate[,-c(1,51)], 248 | #labels=as.factor( as.numeric(AcousticSignalsAPFemales@idx)), 249 | controlscale=TRUE,scale=3) 250 | length(AcousticSignalsAPFemales@exemplars) 251 | plot.for.AcousticSignals.F <- 252 | cbind.data.frame(AcousticSignals.umap.F$layout[,1:2],#VGGishDF$PercentClass, 253 | as.factor( as.numeric(AcousticSignalsAPFemales@idx))) 254 | colnames(plot.for.AcousticSignals.F) <- 255 | c("Dim.1", "Dim.2","Cluster") 256 | plot.for.AcousticSignals.F$Cluster <- as.factor(plot.for.AcousticSignals.F$Cluster) 257 | plot.for.AcousticSignals.F$Class <- trainingdataFemales$Class 258 | Plot2Females <- ggpubr::ggscatter(data = plot.for.AcousticSignals.F,x = "Dim.1", 259 | y = "Dim.2", 260 | color='Cluster', alpha=0.4) + guides(color='none')+ggtitle('Affinity propagation')+ 261 | theme(axis.text.x=element_blank(), #remove x axis labels 262 | axis.ticks.x=element_blank(), #remove x axis ticks 263 | axis.text.y=element_blank(), #remove y axis labels 264 | axis.ticks.y=element_blank() #remove y axis ticks 265 | ) 266 | Plot2Females 267 | set.seed(4) 268 | AcousticSignals.umap.F <- 269 | umap::umap(trainingdataFemalesUpdate[,-c(1,51)], 270 | #labels=as.factor( as.numeric(AcousticSignalsAPFemales@idx)), 271 | controlscale=TRUE,scale=3) 272 | length(AcousticSignalsAPFemales@exemplars) 273 | plot.for.AcousticSignals.F <- 274 | cbind.data.frame(AcousticSignals.umap.F$layout[,1:2],#VGGishDF$PercentClass, 275 | as.factor( as.numeric(AcousticSignalsAPFemales@idx))) 276 | colnames(plot.for.AcousticSignals.F) <- 277 | c("Dim.1", "Dim.2","Cluster") 278 | plot.for.AcousticSignals.F$Cluster <- as.factor(plot.for.AcousticSignals.F$Cluster) 279 | plot.for.AcousticSignals.F$Class <- trainingdataFemales$Class 280 | Plot2Females <- ggpubr::ggscatter(data = plot.for.AcousticSignals.F,x = "Dim.1", 281 | y = "Dim.2", 282 | color='Cluster', alpha=0.4) + guides(color='none')+ggtitle('Affinity propagation')+ 283 | theme(axis.text.x=element_blank(), #remove x axis labels 284 | axis.ticks.x=element_blank(), #remove x axis ticks 285 | axis.text.y=element_blank(), #remove y axis labels 286 | axis.ticks.y=element_blank() #remove y axis ticks 287 | ) 288 | Plot2Females 289 | trainingdataFemales <- MFCCFunction(input.dir=subset.directory , min.freq = 600, max.freq = 1400,win.avg = 'mean.sd') 290 | trainingdataFemalesnames <- list.files(subset.directory, 291 | full.names = F,pattern = '.wav') 292 | trainingdataFemales$Class <- str_split_fixed(trainingdataFemalesnames,pattern = '_',n=2)[,1] 293 | trainingdataFemales$Class <- as.factor(trainingdataFemales$Class) 294 | MetaData <- read.csv("/Users/denaclink/Desktop/RStudio Projects/T0010_SEAsia2018_2019.csv") 295 | # Save as new object 296 | trainingdataFemalesUpdate <- data.frame() 297 | UniqueClass <- unique(trainingdataFemales$Class) 298 | for(b in 1:length(UniqueClass)){ 299 | TempClass <- UniqueClass[b] 300 | TempMeta <- subset(MetaData,Deployment.Comments==TempClass) 301 | trainingdataFemalessub <- subset(trainingdataFemales,class==TempClass) 302 | trainingdataFemalessub$lat <- TempMeta$LAT..decimal.degrees. 303 | trainingdataFemalessub$lon <- TempMeta$LON..decimal.degrees. 304 | trainingdataFemalesUpdate <- rbind.data.frame(trainingdataFemalesUpdate, trainingdataFemalessub) 305 | } 306 | head(trainingdataFemalesUpdate) 307 | AcousticSignalsMFCC.umap.F <- 308 | umap::umap(trainingdataFemalesUpdate[,-c(1,51)], 309 | #labels=as.factor(trainingdataFemales$Class), 310 | controlscale=TRUE,scale=3) 311 | plot.for.AcousticSignalsMFCC.F <- 312 | cbind.data.frame(AcousticSignalsMFCC.umap.F$layout[,1:2], 313 | trainingdataFemalesUpdate$class) 314 | colnames(plot.for.AcousticSignalsMFCC.F) <- 315 | c("Dim.1", "Dim.2", "Class") 316 | plot.for.AcousticSignalsMFCC.F$Class <- as.factor(plot.for.AcousticSignalsMFCC.F$Class) 317 | Plot1Females <- ggpubr::ggscatter(data = plot.for.AcousticSignalsMFCC.F,x = "Dim.1", 318 | y = "Dim.2", 319 | color = "Class", alpha=0.4)+ggtitle('Recording units')+ 320 | theme(axis.text.x=element_blank(), #remove x axis labels 321 | axis.ticks.x=element_blank(), #remove x axis ticks 322 | axis.text.y=element_blank(), #remove y axis labels 323 | axis.ticks.y=element_blank() #remove y axis ticks 324 | ) 325 | Plot1Females 326 | # Unsupervised clustering ------------------------------------------------- 327 | library(apcluster) 328 | aricode::NMI(as.factor(AcousticSignalsAPFemales@idx),trainingdataFemalesUpdate$class) 329 | # Adaptive returns q=0.1 330 | q.val.seq <- seq(from=0.1,to=0.9,by=0.1) 331 | AcousticSignal.sil.df <- data.frame() 332 | for(a in 1:length(q.val.seq)){ 333 | print(a) 334 | AcousticSignalsAP <- 335 | apcluster::apcluster(negDistMat(r=2),q=q.val.seq[a], 336 | trainingdataFemalesUpdate[,-c(1,51)], 337 | maxits=100000,convits=10000) 338 | sil <- 339 | cluster::silhouette(x = AcousticSignalsAP@idx, 340 | dist = dist( trainingdataFemalesUpdate[,-c(1,51)])) 341 | sil.val <- (summary(sil)$avg.width) 342 | temp.sil.df <- cbind.data.frame(sil.val,q.val.seq[a]) 343 | AcousticSignal.sil.df <- rbind.data.frame(AcousticSignal.sil.df,temp.sil.df) 344 | } 345 | MaxSil <- which.max(AcousticSignal.sil.df$sil.val) 346 | max(AcousticSignal.sil.df$sil.val) 347 | AcousticSignalsAPFemales <- 348 | apcluster::apcluster(negDistMat(r=2),q.val.seq[MaxSil], 349 | trainingdataFemalesUpdate[,-c(1,51)], 350 | maxits=100000,convits=10000) 351 | AcousticSignals.umap.F <- 352 | umap::umap(trainingdataFemalesUpdate[,-c(1,51)], 353 | #labels=as.factor( as.numeric(AcousticSignalsAPFemales@idx)), 354 | controlscale=TRUE,scale=3) 355 | length(AcousticSignalsAPFemales@exemplars) 356 | plot.for.AcousticSignals.F <- 357 | cbind.data.frame(AcousticSignals.umap.F$layout[,1:2],#VGGishDF$PercentClass, 358 | as.factor( as.numeric(AcousticSignalsAPFemales@idx))) 359 | colnames(plot.for.AcousticSignals.F) <- 360 | c("Dim.1", "Dim.2","Cluster") 361 | plot.for.AcousticSignals.F$Cluster <- as.factor(plot.for.AcousticSignals.F$Cluster) 362 | plot.for.AcousticSignals.F$Class <- trainingdataFemales$Class 363 | Plot2Females <- ggpubr::ggscatter(data = plot.for.AcousticSignals.F,x = "Dim.1", 364 | y = "Dim.2", 365 | color='Cluster', alpha=0.4) + guides(color='none')+ggtitle('Affinity propagation')+ 366 | theme(axis.text.x=element_blank(), #remove x axis labels 367 | axis.ticks.x=element_blank(), #remove x axis ticks 368 | axis.text.y=element_blank(), #remove y axis labels 369 | axis.ticks.y=element_blank() #remove y axis ticks 370 | ) 371 | Plot2Females 372 | q.val.seq[MaxSil] 373 | AcousticSignalsAPFemales <- 374 | apcluster::apcluster(negDistMat(r=2),q=0.1,#q.val.seq[MaxSil], 375 | trainingdataFemalesUpdate[,-c(1,51)], 376 | maxits=100000,convits=10000) 377 | AcousticSignals.umap.F <- 378 | umap::umap(trainingdataFemalesUpdate[,-c(1,51)], 379 | #labels=as.factor( as.numeric(AcousticSignalsAPFemales@idx)), 380 | controlscale=TRUE,scale=3) 381 | length(AcousticSignalsAPFemales@exemplars) 382 | plot.for.AcousticSignals.F <- 383 | cbind.data.frame(AcousticSignals.umap.F$layout[,1:2],#VGGishDF$PercentClass, 384 | as.factor( as.numeric(AcousticSignalsAPFemales@idx))) 385 | colnames(plot.for.AcousticSignals.F) <- 386 | c("Dim.1", "Dim.2","Cluster") 387 | plot.for.AcousticSignals.F$Cluster <- as.factor(plot.for.AcousticSignals.F$Cluster) 388 | plot.for.AcousticSignals.F$Class <- trainingdataFemales$Class 389 | Plot2Females <- ggpubr::ggscatter(data = plot.for.AcousticSignals.F,x = "Dim.1", 390 | y = "Dim.2", 391 | color='Cluster', alpha=0.4) + guides(color='none')+ggtitle('Affinity propagation')+ 392 | theme(axis.text.x=element_blank(), #remove x axis labels 393 | axis.ticks.x=element_blank(), #remove x axis ticks 394 | axis.text.y=element_blank(), #remove y axis labels 395 | axis.ticks.y=element_blank() #remove y axis ticks 396 | ) 397 | Plot2Females 398 | length(AcousticSignalsAPFemales@exemplars) 399 | file.choose() 400 | subsamps 401 | subsamps <- c(1,2,3) 402 | numeric(length(subsamps)) 403 | remove.packages('gibbonR') 404 | # Add documentation opt+cmd+shfit+r 405 | library(devtools) 406 | library(ggpubr) 407 | library(rmarkdown) 408 | install_github("DenaJGibbon/gibbonR") 409 | remove.packages('gibbonR') 410 | # Load required libraries 411 | # Add documentation opt+cmd+shfit+r 412 | library(devtools) 413 | library(ggpubr) 414 | library(rmarkdown) 415 | install_github("DenaJGibbon/gibbonR") 416 | mfcc.vector.list <- vector("list", 10000) 417 | mfcc.vector.list 418 | remove.packages('gibbonR') 419 | library(devtools) 420 | library(ggpubr) 421 | library(rmarkdown) 422 | install_github("DenaJGibbon/gibbonR") 423 | remove.packages('gibbonR') 424 | install_github("DenaJGibbon/gibbonR") 425 | file.choose() 426 | 9*24 427 | library(plyr) 428 | library(stringr) 429 | library(ggpubr) 430 | library(apcluster) 431 | library(tuneR) 432 | library(aricode) 433 | library(clValid) 434 | library(gibbonR) 435 | library(dplyr) 436 | library(tidyr) 437 | library(ggpubr) 438 | set.seed(13) 439 | # Female individuals ------------------------------------------------------ 440 | source('R/MFCCFunction.R') 441 | source('R/MFCCFunction.R') 442 | subset.directory <- '/Users/denaclink/Desktop/RStudio Projects/gibbonID/data/FemaleGibbonsSwiftHQ/' 443 | trainingdataFemales <- gibbonR::MFCCFunction(input.dir=subset.directory , min.freq = 600, max.freq = 1400,win.avg = 'standard') 444 | trainingdataFemalesnames <- list.files(subset.directory, 445 | full.names = F,pattern = '.wav') 446 | trainingdataFemales$Class <- str_split_fixed(trainingdataFemalesnames,pattern = '_',n=2)[,1] 447 | trainingdataFemales$Class <- as.factor(trainingdataFemales$Class) 448 | MetaData <- read.csv("/Users/denaclink/Desktop/RStudio Projects/T0010_SEAsia2018_2019.csv") 449 | # Save as new object 450 | trainingdataFemalesUpdate <- data.frame() 451 | UniqueClass <- unique(trainingdataFemales$Class) 452 | for(b in 1:length(UniqueClass)){ 453 | TempClass <- UniqueClass[b] 454 | TempMeta <- subset(MetaData,Deployment.Comments==TempClass) 455 | trainingdataFemalessub <- subset(trainingdataFemales,class==TempClass) 456 | trainingdataFemalessub$lat <- TempMeta$LAT..decimal.degrees. 457 | trainingdataFemalessub$lon <- TempMeta$LON..decimal.degrees. 458 | trainingdataFemalesUpdate <- rbind.data.frame(trainingdataFemalesUpdate, trainingdataFemalessub) 459 | } 460 | head(trainingdataFemalesUpdate) 461 | AcousticSignalsMFCC.umap.F <- 462 | umap::umap(trainingdataFemalesUpdate[,-c(1,179)], 463 | #labels=as.factor(trainingdataFemales$Class), 464 | controlscale=TRUE,scale=3) 465 | plot.for.AcousticSignalsMFCC.F <- 466 | cbind.data.frame(AcousticSignalsMFCC.umap.F$layout[,1:2], 467 | trainingdataFemalesUpdate$class) 468 | colnames(plot.for.AcousticSignalsMFCC.F) <- 469 | c("Dim.1", "Dim.2", "Class") 470 | plot.for.AcousticSignalsMFCC.F$Class <- as.factor(plot.for.AcousticSignalsMFCC.F$Class) 471 | Plot1Females <- ggpubr::ggscatter(data = plot.for.AcousticSignalsMFCC.F,x = "Dim.1", 472 | y = "Dim.2", 473 | color = "Class", alpha=0.4)+ggtitle('Recording units')+ 474 | theme(axis.text.x=element_blank(), #remove x axis labels 475 | axis.ticks.x=element_blank(), #remove x axis ticks 476 | axis.text.y=element_blank(), #remove y axis labels 477 | axis.ticks.y=element_blank() #remove y axis ticks 478 | ) 479 | Plot1Females 480 | # Unsupervised clustering ------------------------------------------------- 481 | library(apcluster) 482 | # Adaptive returns q=0.1 483 | q.val.seq <- seq(from=0.1,to=0.9,by=0.1) 484 | AcousticSignal.sil.df <- data.frame() 485 | for(a in 1:length(q.val.seq)){ 486 | print(a) 487 | AcousticSignalsAP <- 488 | apcluster::apcluster(negDistMat(r=2),q=q.val.seq[a], 489 | trainingdataFemalesUpdate[,-c(1,179)], 490 | maxits=100000,convits=10000) 491 | sil <- 492 | cluster::silhouette(x = AcousticSignalsAP@idx, 493 | dist = dist( trainingdataFemalesUpdate[,-c(1,179)])) 494 | sil.val <- (summary(sil)$avg.width) 495 | temp.sil.df <- cbind.data.frame(sil.val,q.val.seq[a]) 496 | AcousticSignal.sil.df <- rbind.data.frame(AcousticSignal.sil.df,temp.sil.df) 497 | } 498 | MaxSil <- which.max(AcousticSignal.sil.df$sil.val) 499 | max(AcousticSignal.sil.df$sil.val) 500 | q.val.seq[MaxSil] 501 | AcousticSignalsAPFemales <- 502 | apcluster::apcluster(negDistMat(r=2),q=q.val.seq[MaxSil], 503 | trainingdataFemalesUpdate[,-c(1,179)], 504 | maxits=100000,convits=10000) 505 | length(AcousticSignalsAPFemales@exemplars) 506 | remove.packages('gibbonR') 507 | # Load required libraries 508 | # Add documentation opt+cmd+shfit+r 509 | library(devtools) 510 | library(ggpubr) 511 | library(rmarkdown) 512 | install_github("DenaJGibbon/gibbonR") 513 | -------------------------------------------------------------------------------- /R/gibbonR.R: -------------------------------------------------------------------------------- 1 | #' gibbonR 2 | #' @description This function identifies sound events using band-limited energy summation and then classifies the sound events using a trained support vector machine or random forest algorithm. 3 | #' @usage {input, input.type='list', feature.df,model.type.list=c("SVM"), tune = FALSE, target.signal = "female.gibbon", 4 | #' short.wav.duration=300,min.freq = 400, max.freq = 2000, 5 | #' noise.quantile.val=0.5, minimum.separation =5, n.windows = 9, num.cep = 12, spectrogram.window =1600, 6 | #' pattern.split = ".wav", min.signal.dur = 4, maximum.separation =1,max.sound.event.dur = 12, 7 | #' probability.thresh.svm = 0.75, probability.thresh.rf = 0.75, wav.output = "TRUE", output.dir = getwd(), 8 | #' swift.time=TRUE,time.start=6,time.stop=12, write.table.output=TRUE,verbose=TRUE, random.sample='NA'} 9 | #' @param input Either full path to directory containing .wav files, a list of .wav files, or a the path to a single .wav file 10 | #' @param input.type Either 'directory', 'list' or 'wav' 11 | #' @param feature.df Data frame of features from labeled sound files; first column must be class labels 12 | #' @param tune Logical; if want to use 'tune' function for SVM; NOTE: for large datasets adds significant computing time 13 | #' @param target.signal Labeled signal(s) of interest from training data (feature.df); can include multiple classes. 14 | #' @param min.freq Minimum frequency (Hz) of signal of interest 15 | #' @param max.freq Maximum frequency (Hz) of signal of interest 16 | #' @param n.windows Number of time windows to calculate for MFCCs 17 | #' @param num.cep Number of cepstra coefficients to calculate for MFCCs 18 | #' @param pattern.split Pattern to find and remove to create full sound file name; currently set to ".wav" 19 | #' @param probability.thresh.svm Probability threshold (provided by SVM) to be considered as target signal 20 | #' @param probability.thresh.rf Probability threshold (provided by RF) to be considered as target signal 21 | #' @param model.type.list Which machine learning model to use; SVM or RF 22 | #' @param short.wav.duration Duration (s) to divide longer sound file to increase processing efficiency 23 | #' @param noise.quantile.val A quantile value between 0 to 1 for the band energy summation 24 | #' @param minimum.separation The minimum number of consecutive time windows that signals must be separated by to be considered a separate sound event 25 | #' @param maximum.separation The maximum number of consecutive time windows that signals must be separated by to be considered a separate sound event 26 | #' @param spectrogram.window Window length for spectrogram analysis (input to spectro fuction from 'seewave') 27 | #' @param min.signal.dur The minimum duration (s) sound events must be to be considered sound events 28 | #' @param max.sound.event.dur The maximum duration (s) sound events must be to be considered sound events; NOTE this only happens when writing text file 29 | #' @param wav.output Logical; output .wav files of detections in specified directory 30 | #' @param swift.time If file name is in structure recorder_YYYYMMDD_HHMMSS can subset files based on specific times 31 | #' @param time.start Time recordings start (hour) 32 | #' @param time.stop Time recordings stop (hour) 33 | #' @param write.table.output Logical; write Raven selection tables to output directory 34 | #' @param verbose Logical; print out steps 35 | #' @param random.sample If a random subset of files in a directory are desired specify a value, otherwise 'NA' 36 | #' @param output.dir Specified output directory; set to current working directory 37 | #' @details 38 | #' @export 39 | #' @import e1071 40 | #' @import randomForest 41 | #' @import tuneR 42 | #' @import seewave 43 | #' @import tuneR 44 | #' @import stringr 45 | #' @return If write.table.output=TRUE writes a .txt file for each sound file with detections 46 | #' @return If write.table.output=TRUE writes a .txt file for each sound file with detections 47 | #' @examples 48 | #' \donttest{MFCCFunction(input.dir = "FocalRecordings",min.freq = 400,max.freq=2500)} 49 | 50 | 51 | gibbonR <- 52 | function(input, 53 | input.type = 'list', 54 | feature.df, 55 | model.type.list = c("SVM"), 56 | tune = FALSE, 57 | target.signal = "female.gibbon", 58 | short.wav.duration = 300, 59 | min.freq = 400, 60 | max.freq = 2000, 61 | noise.quantile.val = 0.5, 62 | minimum.separation = 5, 63 | n.windows = 9, 64 | num.cep = 12, 65 | spectrogram.window = 1600, 66 | pattern.split = ".wav", 67 | min.signal.dur = 4, 68 | maximum.separation = 1, 69 | max.sound.event.dur = 12, 70 | probability.thresh.svm = 0.75, 71 | probability.thresh.rf = 0.75, 72 | wav.output = "TRUE", 73 | output.dir = getwd(), 74 | swift.time = TRUE, 75 | time.start = 6, 76 | time.stop = 12, 77 | write.table.output = TRUE, 78 | verbose = TRUE, 79 | random.sample = 'NA') { 80 | 81 | TrainingMatch <- match( target.signal,unique(feature.df$class) ) 82 | 83 | 84 | if (any(is.na(TrainingMatch)) %in% TRUE) { 85 | print("Training data does not contain target signal") 86 | } 87 | 88 | 89 | if ((wav.output == "TRUE" & output.dir == "")) 90 | stop("Specify output directory") 91 | 92 | 93 | if (input.type == 'list') { 94 | list.file.input <- unlist(input) 95 | nslash <- str_count(input, pattern = '/') + 1 96 | list.file.input.short <- 97 | str_split_fixed(input, pattern = '/', nslash)[, nslash] 98 | } 99 | 100 | if (input.type == "directory") { 101 | list.file.input <- 102 | list.files(input, full.names = TRUE, recursive = T) 103 | list.file.input.short <- 104 | list.files(input, full.names = FALSE, recursive = T) 105 | } 106 | 107 | if (input.type == "wav") { 108 | list.file.input <- input 109 | } 110 | 111 | if (swift.time == TRUE) { 112 | number.of.slash <- str_count(list.file.input, pattern = "/")[1] 113 | base.file.name.all <- 114 | str_split_fixed(list.file.input, 115 | pattern = "/", 116 | n = (number.of.slash + 1))[, number.of.slash + 1] 117 | temp.name.all <- 118 | stringr::str_split_fixed(base.file.name.all, pattern = pattern.split, n = 2)[, 1] 119 | times <- str_split_fixed(temp.name.all, pattern = '_', n = 3)[, 3] 120 | times <- as.numeric(substr(times, start = 1, stop = 2)) 121 | list.file.input <- 122 | list.file.input[which(times >= time.start & times <= time.stop)] 123 | } 124 | 125 | if (length(list.file.input) == 0) { 126 | print("No sound files detected") 127 | break 128 | } 129 | 130 | if (is.numeric(random.sample) == TRUE) { 131 | list.file.input <- 132 | list.file.input[sample(1:length(list.file.input), random.sample, replace = 133 | F)] 134 | } 135 | 136 | print("Machine learning in progress...") 137 | 138 | if ("SVM" %in% model.type.list == TRUE) { 139 | print("SVM in progress...") 140 | start_time <- Sys.time() 141 | if (tune == TRUE) { 142 | ## SVM classification 143 | 144 | tune.rad <- 145 | e1071::tune( 146 | svm, 147 | feature.df[, 2:ncol(feature.df)], 148 | feature.df$class, 149 | kernel = "radial", 150 | tunecontrol = tune.control(cross = 5), 151 | ranges = list( 152 | cost = c(0.001, 0.01, 0.1, 1, 2, 153 | 10, 100, 1000), 154 | gamma = c(0.01, 0.1, 0.5, 1, 2) 155 | ) 156 | ) 157 | 158 | 159 | ml.model.svm <- 160 | e1071::svm( 161 | feature.df[, 2:ncol(feature.df)], 162 | feature.df$class, 163 | kernel = "radial", 164 | gamma = tune.rad$best.parameters$gamma, 165 | cost = tune.rad$best.parameters$cost, 166 | cross = 20, 167 | probability = TRUE 168 | ) 169 | 170 | } else { 171 | ml.model.svm <- 172 | e1071::svm( 173 | feature.df[, 2:ncol(feature.df)], 174 | feature.df$class, 175 | kernel = "radial", 176 | gamma = 0.01, 177 | cost = 2, 178 | cross = 25, 179 | probability = TRUE 180 | ) 181 | } 182 | print(paste('SVM accuracy', ml.model.svm$tot.accuracy)) 183 | end_time <- Sys.time() 184 | print(end_time - start_time) 185 | } 186 | 187 | 188 | if ("RF" %in% model.type.list == TRUE) { 189 | print("RF in progress...") 190 | tryCatch({ 191 | start_time <- Sys.time() 192 | 193 | ml.model.rf <- 194 | randomForest::randomForest(x = feature.df[, 2:ncol(feature.df)], y = feature.df$class) 195 | 196 | 197 | print(ml.model.rf) 198 | 199 | end_time <- Sys.time() 200 | print(end_time - start_time) 201 | }, error = function(e) { 202 | cat("ERROR :", conditionMessage(e), "\n") 203 | }) 204 | } 205 | 206 | print(paste("Classifying for target signal", c(target.signal))) 207 | 208 | 209 | for (i in 1:length(list.file.input)) { 210 | model.results.list <- list() 211 | RavenSelectionTableDF <- data.frame() 212 | tryCatch({ 213 | start_time <- Sys.time() 214 | contains.slash <- str_detect(list.file.input[i], pattern = "/") 215 | 216 | if (contains.slash == 'TRUE') { 217 | number.of.slash <- str_count(list.file.input[i], pattern = "/") 218 | base.file.name <- 219 | str_split_fixed(list.file.input[i], 220 | pattern = "/", 221 | n = (number.of.slash + 1))[, number.of.slash + 1] 222 | temp.name <- 223 | stringr::str_split_fixed(base.file.name, pattern = pattern.split, n = 2)[1] 224 | } else{ 225 | temp.name <- 226 | stringr::str_split_fixed(list.file.input[i], pattern = pattern.split, n = 2)[1] 227 | 228 | } 229 | 230 | # Convert .wav file to spectrogram 231 | if (verbose == TRUE) { 232 | print(paste( 233 | "Computing spectrogram for file", 234 | temp.name, 235 | i, 236 | 'out of', 237 | length(list.file.input) 238 | )) 239 | } 240 | 241 | temp.wav <- readWave(list.file.input[i]) 242 | 243 | sound_length <- 244 | round(length(temp.wav@left) / temp.wav@samp.rate, 2) 245 | cutwave.list <- 246 | c(seq( 247 | from = 1, 248 | to = (sound_length), 249 | by = short.wav.duration 250 | ), sound_length) 251 | 252 | short.sound.files <- lapply( 1:(length(cutwave.list) - 1), 253 | function(i) 254 | extractWave( 255 | temp.wav, 256 | from = cutwave.list[i], 257 | to = cutwave.list[i + 258 | 1], 259 | xunit = c("time"), 260 | plot = F, 261 | output = "Wave" 262 | )) 263 | 264 | print('Running detector over sound files') 265 | for (j in 1:length(short.sound.files)) { 266 | swift.spectro <- 267 | spectro( 268 | short.sound.files[[j]], 269 | wl = spectrogram.window, 270 | overlap = 0, 271 | plot = F 272 | ) 273 | 274 | 275 | 276 | # Identify the frequency band of interest 277 | min.freq.cols <- 278 | which.min(abs(round(swift.spectro$freq, digits = 2) - (min.freq / 1000))) 279 | max.freq.cols <- 280 | which.min(abs(round(swift.spectro$freq, digits = 2) - (max.freq / 1000))) 281 | 282 | 283 | # Calculate the column sums for each time window 284 | col.sum <- 285 | colSums(swift.spectro$amp[min.freq.cols:max.freq.cols,]) 286 | 287 | 288 | # Calculate noise value 289 | noise.value <- 290 | quantile(unlist(col.sum), c(noise.quantile.val)) 291 | 292 | # Determine which columns are above specified cutoff 293 | list.sub <- which(col.sum > noise.value) 294 | 295 | if (minimum.separation != 1) { 296 | # Find length differences between columns that match the specified cutoff 297 | detection.differences <- 298 | unlist(lapply(1:(length(list.sub) - 1), 299 | function(i) 300 | c(list.sub[i + 1] - list.sub[i]))) 301 | # 302 | detection.separation.list <- 303 | which(detection.differences >= minimum.separation) 304 | 305 | # Add one to remove 306 | detection.separation.list <- 307 | c(1, detection.separation.list + 1) 308 | call.timing <- list() 309 | for (x in 1:(length(detection.separation.list) - 1)) { 310 | start.index <- detection.separation.list[x] 311 | finish.index <- detection.separation.list[x + 1] 312 | call.timing[[x]] <- 313 | list.sub[start.index]:list.sub[finish.index] 314 | } 315 | 316 | 317 | } else { 318 | call.timing <- split(list.sub, cumsum(c(1, diff(list.sub)) != 1)) 319 | } 320 | 321 | # Calculate minimum signal duration to be considered signal 322 | if( length(which(swift.spectro$time > 1))>0){ 323 | number.time.windows.1sec <- min(which(swift.spectro$time > 1)) 324 | signal.dur <- number.time.windows.1sec * min.signal.dur 325 | 326 | # Combine all potential sound events into a list 327 | call.timing.list <- 328 | as.list(call.timing[which(sapply(call.timing, length) > signal.dur)]) 329 | 330 | # If user indicated maximum duration create list of sound events under certain duration 331 | if (max.sound.event.dur != 'NULL') { 332 | sound.event.index.max <- 333 | which.min(abs(swift.spectro$time - max.sound.event.dur)) 334 | call.timing.list <- 335 | call.timing.list[which(sapply(call.timing.list, length) < sound.event.index.max)] 336 | } 337 | } else{ 338 | call.timing.list <- list() 339 | } 340 | 341 | if (length(call.timing.list) >= 1) { 342 | subsamps <- lapply(1:length(call.timing.list), 343 | function(i) 344 | extractWave( 345 | short.sound.files[[j]], 346 | from = swift.spectro$time[min(call.timing.list[[i]])], 347 | to = swift.spectro$time[max(call.timing.list[[i]])], 348 | xunit = c("time"), 349 | plot = F, 350 | output = "Wave" 351 | )) 352 | 353 | calltimes <- lapply(1:length(call.timing.list), 354 | function(i) 355 | cbind.data.frame(from = swift.spectro$time[min(call.timing.list[[i]])], 356 | to = swift.spectro$time[max(call.timing.list[[i]])])) 357 | 358 | 359 | mfcc.list <- list() 360 | temp.model.results.list.svm <- list() 361 | temp.model.results.list.rf <- list() 362 | for (y in 1:length(target.signal)) { 363 | for (x in 1:length(subsamps)) { 364 | 365 | 366 | calltimes.subset <- calltimes[[x]] 367 | 368 | start.time <- calltimes.subset$from 369 | end.time <- calltimes.subset$to 370 | 371 | if (j > 1) { 372 | start.time <- short.wav.duration * (j - 1) + start.time 373 | end.time <- short.wav.duration * (j - 1) + end.time 374 | } 375 | 376 | start.time <- round(start.time, 3) 377 | end.time <- round(end.time, 3) 378 | short.wav <- subsamps[[x]] 379 | 380 | wav.dur <- duration(short.wav) 381 | win.time <- wav.dur / n.windows 382 | 383 | # Calculate MFCCs 384 | melfcc.output <- 385 | tuneR::melfcc( 386 | short.wav, 387 | minfreq = min.freq, 388 | hoptime = win.time, 389 | maxfreq = max.freq, 390 | numcep = num.cep, 391 | wintime = win.time 392 | ) 393 | 394 | # Calculate delta cepstral coefficients 395 | deltas.output <- deltas(melfcc.output) 396 | 397 | # Ensure only same number of time windows are used for MFCC and delta coefficients Also append .wav duration 398 | mfcc.vector <- 399 | c(as.vector(t(melfcc.output[1:(n.windows - 1), 2:num.cep])), as.vector(t(deltas.output[1:(n.windows - 1), 2:num.cep])), wav.dur) 400 | 401 | 402 | mfcc.vector <- as.data.frame(t(mfcc.vector)) 403 | 404 | if (length(colnames(mfcc.vector)) != length(colnames(feature.df[, 2:ncol(feature.df)]))) { 405 | print( 406 | 'Training dataset columns do not match test dataset; check MFCC settings' 407 | ) 408 | break 409 | } 410 | 411 | colnames(mfcc.vector) <- 412 | colnames(feature.df[, 2:ncol(feature.df)]) 413 | 414 | 415 | if ("SVM" %in% model.type.list == TRUE) { 416 | svm.prob <- predict(ml.model.svm, mfcc.vector, probability = T) 417 | 418 | model.output <- attr(svm.prob, "probabilities") 419 | signal.loc <- 420 | which(attr(model.output, "dimnames")[[2]] == target.signal[y]) 421 | signal.probability <- model.output[signal.loc] 422 | temp.svm.df <- 423 | cbind.data.frame(target.signal[y], signal.probability) 424 | if (temp.svm.df$signal.probability >= probability.thresh.svm) { 425 | if (wav.output == "TRUE") { 426 | tuneR::writeWave( 427 | subsamps[[x]], 428 | filename = paste( 429 | output.dir, 430 | "/", 431 | temp.name, 432 | "_", 433 | target.signal[y], 434 | "_", 435 | "SVM", 436 | "_", 437 | start.time, 438 | "_", 439 | end.time, 440 | "_", 441 | round(signal.probability, 3), 442 | ".wav", 443 | sep = "" 444 | ), 445 | extensible = F 446 | ) 447 | } 448 | # 449 | temp.df <- 450 | cbind.data.frame( 451 | temp.name, 452 | paste(j, x, sep = '.'), 453 | "SVM", 454 | target.signal[y], 455 | round(signal.probability, 3), 456 | start.time, 457 | end.time 458 | ) 459 | colnames(temp.df) <- 460 | c( 461 | "file.name", 462 | "detect.num", 463 | "model.type", 464 | "signal", 465 | "probability", 466 | "start.time", 467 | "end.time" 468 | ) 469 | temp.model.results.list.svm[[ length(temp.model.results.list.svm)+1 ]] <- temp.df 470 | } 471 | 472 | } 473 | 474 | if ("RF" %in% model.type.list == TRUE) { 475 | RF.prob <- predict(ml.model.rf, mfcc.vector, type = 'prob') 476 | 477 | model.output <- colnames(RF.prob) 478 | signal.loc <- which(model.output == target.signal[y]) 479 | signal.probability <- RF.prob[, signal.loc] 480 | temp.RF.df <- 481 | cbind.data.frame(target.signal[y], signal.probability) 482 | if (temp.RF.df$signal.probability >= probability.thresh.rf) { 483 | if (wav.output == "TRUE") { 484 | tuneR::writeWave( 485 | subsamps[[x]], 486 | filename = paste( 487 | output.dir, 488 | "/", 489 | temp.name, 490 | "_", 491 | target.signal[y], 492 | "_", 493 | "RF", 494 | "_", 495 | start.time, 496 | "_", 497 | end.time, 498 | "_", 499 | round(signal.probability, 3), 500 | ".wav", 501 | sep = "" 502 | ), 503 | extensible = F 504 | ) 505 | } 506 | # 507 | temp.df <- 508 | cbind.data.frame( 509 | temp.name, 510 | paste(j, x, sep = '.'), 511 | "RF", 512 | target.signal[y], 513 | round(signal.probability, 3), 514 | start.time, 515 | end.time 516 | ) 517 | colnames(temp.df) <- 518 | c( 519 | "file.name", 520 | "detect.num", 521 | "model.type", 522 | "signal", 523 | "probability", 524 | "start.time", 525 | "end.time" 526 | ) 527 | temp.model.results.list.rf[[ length(temp.model.results.list.rf)+1]] <- temp.df 528 | } 529 | 530 | } 531 | 532 | 533 | 534 | if (exists("temp.model.results.list.svm") == TRUE | 535 | exists("temp.model.results.list.rf") == TRUE) { 536 | if ("SVM" %in% model.type.list == TRUE & 537 | "RF" %in% model.type.list == TRUE) { 538 | if (exists("temp.model.results.list.svm") == TRUE & 539 | exists("temp.model.results.list.rf") == TRUE) { 540 | temp.model.results.list.svm <- 541 | temp.model.results.list.svm[lengths(temp.model.results.list.svm) != 0] 542 | temp.model.results.list.rf <- 543 | temp.model.results.list.rf[lengths(temp.model.results.list.rf) != 0] 544 | 545 | temp.model.results.list <- 546 | append(temp.model.results.list.svm, 547 | temp.model.results.list.rf) 548 | } 549 | 550 | if (exists("temp.model.results.list.svm") == FALSE & 551 | exists("temp.model.results.list.rf") == TRUE) { 552 | temp.model.results.list.rf <- 553 | temp.model.results.list.rf[lengths(temp.model.results.list.rf) != 0] 554 | temp.model.results.list <- 555 | temp.model.results.list.rf 556 | } 557 | 558 | if (exists("temp.model.results.list.svm") == TRUE & 559 | exists("temp.model.results.list.rf") == FALSE) { 560 | temp.model.results.list.svm <- 561 | temp.model.results.list.svm[lengths(temp.model.results.list.svm) != 0] 562 | temp.model.results.list <- 563 | temp.model.results.list.svm 564 | } 565 | 566 | } 567 | 568 | if ("SVM" %in% model.type.list == TRUE & 569 | "RF" %in% model.type.list == FALSE & 570 | exists("temp.model.results.list.svm") == TRUE) { 571 | temp.model.results.list.svm <- 572 | temp.model.results.list.svm[lengths(temp.model.results.list.svm) != 0] 573 | temp.model.results.list <- 574 | temp.model.results.list.svm 575 | } 576 | 577 | if ("SVM" %in% model.type.list == FALSE & 578 | "RF" %in% model.type.list == TRUE & 579 | exists("temp.model.results.list.rf") == TRUE) { 580 | temp.model.results.list.rf <- 581 | temp.model.results.list.rf[lengths(temp.model.results.list.rf) != 0] 582 | temp.model.results.list <- 583 | temp.model.results.list.rf 584 | } 585 | 586 | 587 | } 588 | 589 | } 590 | 591 | model.results.list[[j]] <- 592 | do.call(rbind.data.frame, temp.model.results.list) 593 | } 594 | } 595 | } 596 | 597 | model.results.list <- 598 | model.results.list[lengths(model.results.list) != 0] 599 | 600 | if (exists("model.results.list") == TRUE & 601 | length(model.results.list) > 0) { 602 | if (exists("model.results.list") == TRUE & 603 | length(model.results.list) > 0) { 604 | print('Creating datasheet') 605 | 606 | timing.df <- do.call(rbind.data.frame, model.results.list) 607 | 608 | # Add maximum separation 609 | for (k in 1:length(model.type.list)) { 610 | timing.df.subset <- 611 | subset(timing.df, model.type == model.type.list[[k]]) 612 | 613 | detection.time.differences <- 614 | unlist(lapply(1:(nrow(timing.df.subset) - 1), 615 | function(i) 616 | c( 617 | timing.df.subset$start.time[i + 1] - timing.df.subset$end.time[i] 618 | ))) 619 | 620 | detection.separation.list <- 621 | which(detection.time.differences < maximum.separation) 622 | 623 | detection.timing <- 624 | split(detection.separation.list, cumsum(c( 625 | 1, diff(detection.separation.list) 626 | ) != 1)) 627 | 628 | if (length(detection.timing) > 1) { 629 | for (j in 1:length(detection.timing)) { 630 | temp.df <- detection.timing[[j]] 631 | detection.timing[[j]] <- c(temp.df, max(temp.df) + 1) 632 | } 633 | 634 | DetectionDFtemp <- 635 | timing.df.subset[-c(unlist(detection.timing)), ] 636 | 637 | 638 | for (l in 1:length(detection.timing)) { 639 | temp.subset <- detection.timing[[l]] 640 | 641 | temprow1 <- 642 | timing.df.subset[min(temp.subset):max(temp.subset), ] 643 | 644 | probability <- median(temprow1$probability) 645 | start.time <- round(min(temprow1$start.time), 3) 646 | end.time <- round(max(temprow1$end.time), 3) 647 | newselection <- 648 | cbind.data.frame(temprow1[1, 1:4], probability, start.time, end.time) 649 | DetectionDFtemp <- 650 | rbind.data.frame(DetectionDFtemp, newselection) 651 | } 652 | } 653 | else { 654 | DetectionDFtemp <- timing.df.subset 655 | } 656 | 657 | RavenSelectionTableDF <- 658 | rbind.data.frame(RavenSelectionTableDF, DetectionDFtemp) 659 | } 660 | 661 | RavenSelectionTableDF <- 662 | RavenSelectionTableDF[order(RavenSelectionTableDF$start.time), ] 663 | 664 | Selection <- seq(1, nrow(RavenSelectionTableDF)) 665 | View <- rep('Spectrogram 1', nrow(RavenSelectionTableDF)) 666 | Channel <- rep(1, nrow(RavenSelectionTableDF)) 667 | MinFreq <- rep(min.freq, nrow(RavenSelectionTableDF)) 668 | MaxFreq <- rep(max.freq, nrow(RavenSelectionTableDF)) 669 | 670 | if (nrow(RavenSelectionTableDF) > 0) { 671 | RavenSelectionTableDF <- 672 | cbind.data.frame(Selection, 673 | View, 674 | Channel, 675 | MinFreq, 676 | MaxFreq, 677 | RavenSelectionTableDF) 678 | 679 | RavenSelectionTableDF <- 680 | RavenSelectionTableDF[, c( 681 | "Selection", 682 | "View", 683 | "Channel", 684 | "start.time", 685 | "end.time", 686 | "MinFreq", 687 | "MaxFreq", 688 | "file.name", 689 | 'model.type', 690 | 'probability', 691 | 'signal' 692 | )] 693 | 694 | colnames(RavenSelectionTableDF) <- 695 | c( 696 | "Selection", 697 | "View", 698 | "Channel", 699 | "Begin Time (s)", 700 | "End Time (s)", 701 | "Low Freq (Hz)", 702 | "High Freq (Hz)", 703 | "File Name", 704 | 'model.type', 705 | 'probability', 706 | 'signal' 707 | ) 708 | 709 | 710 | 711 | if (write.table.output == TRUE) { 712 | csv.file.name <- 713 | paste(output.dir, 714 | '/', 715 | temp.name, 716 | 'gibbonRresults.txt', 717 | sep = '') 718 | write.table( 719 | x = RavenSelectionTableDF, 720 | sep = "\t", 721 | file = csv.file.name, 722 | row.names = FALSE, 723 | quote = FALSE 724 | ) 725 | print(paste( 726 | "Saving Sound Files", 727 | temp.name, 728 | i, 729 | 'out of', 730 | length(list.file.input) 731 | )) 732 | # print(RavenSelectionTableDF) 733 | 734 | 735 | } 736 | 737 | end_time <- Sys.time() 738 | print( 739 | paste( 740 | 'System processed', 741 | round(seewave::duration(temp.wav)), 742 | 'seconds in', 743 | round(end_time - start_time), 744 | 'seconds', 745 | 'this translates to', 746 | round( 747 | round(seewave::duration(temp.wav)) / 60 / 60 * 3600 / as.numeric(end_time - start_time) , 748 | 1 749 | ), 750 | 'hours processed in 1 hour' 751 | ) 752 | ) 753 | } 754 | } 755 | 756 | rm(RavenSelectionTableDF) 757 | rm(swift.spectro) 758 | rm(temp.wav) 759 | rm(short.sound.files) 760 | } 761 | }, error = function(e) { 762 | cat("ERROR :", conditionMessage(e), "\n") 763 | }) 764 | } 765 | } 766 | --------------------------------------------------------------------------------