├── .nojekyll
├── R
    ├── .Rapp.history
    ├── .DS_Store
    ├── MFCCFunction.R
    ├── gibbonID.R
    ├── DetectBLED.R
    └── gibbonR.R
├── .gitignore
├── vignettes
    ├── .gitignore
    ├── .DS_Store
    ├── DetectionsAffinityPlot.png
    └── gibbonR-tutorial.Rmd
├── .Rbuildignore
├── .DS_Store
├── data
    └── .DS_Store
├── man
    ├── .DS_Store
    ├── figures
    │   ├── README-unnamed-chunk-12-1.png
    │   ├── README-unnamed-chunk-13-1.png
    │   ├── README-unnamed-chunk-14-1.png
    │   ├── README-unnamed-chunk-14-2.png
    │   ├── README-unnamed-chunk-15-1.png
    │   ├── README-unnamed-chunk-16-1.png
    │   ├── README-unnamed-chunk-2-1.png
    │   ├── README-unnamed-chunk-3-1.png
    │   ├── README-unnamed-chunk-8-1.png
    │   └── README-unnamed-chunk-9-1.png
    ├── MFCCFunction.Rd
    ├── gibbonID.Rd
    ├── DetectBLED.Rd
    └── gibbonR.Rd
├── _pkgdown.yml
├── NAMESPACE
├── gibbonR.Rproj
├── .Rapp.history
├── DESCRIPTION
├── .github
    └── workflows
    │   └── jekyll-gh-pages.yml
├── README.Rmd
├── README.md
└── .Rhistory


/.nojekyll:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/R/.Rapp.history:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | inst/doc
3 | 


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^gibbonR\.Rproj$
2 | ^\.Rproj\.user$
3 | 


--------------------------------------------------------------------------------
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DenaJGibbon/gibbonR/HEAD/.DS_Store


--------------------------------------------------------------------------------
/R/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DenaJGibbon/gibbonR/HEAD/R/.DS_Store


--------------------------------------------------------------------------------
/data/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DenaJGibbon/gibbonR/HEAD/data/.DS_Store


--------------------------------------------------------------------------------
/man/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DenaJGibbon/gibbonR/HEAD/man/.DS_Store


--------------------------------------------------------------------------------
/vignettes/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DenaJGibbon/gibbonR/HEAD/vignettes/.DS_Store


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
1 | url: https://denajgibbon.github.io/gibbonR/
2 | template:
3 |   bootstrap: 5
4 | 
5 | 


--------------------------------------------------------------------------------
/vignettes/DetectionsAffinityPlot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DenaJGibbon/gibbonR/HEAD/vignettes/DetectionsAffinityPlot.png


--------------------------------------------------------------------------------
/man/figures/README-unnamed-chunk-12-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DenaJGibbon/gibbonR/HEAD/man/figures/README-unnamed-chunk-12-1.png


--------------------------------------------------------------------------------
/man/figures/README-unnamed-chunk-13-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DenaJGibbon/gibbonR/HEAD/man/figures/README-unnamed-chunk-13-1.png


--------------------------------------------------------------------------------
/man/figures/README-unnamed-chunk-14-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DenaJGibbon/gibbonR/HEAD/man/figures/README-unnamed-chunk-14-1.png


--------------------------------------------------------------------------------
/man/figures/README-unnamed-chunk-14-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DenaJGibbon/gibbonR/HEAD/man/figures/README-unnamed-chunk-14-2.png


--------------------------------------------------------------------------------
/man/figures/README-unnamed-chunk-15-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DenaJGibbon/gibbonR/HEAD/man/figures/README-unnamed-chunk-15-1.png


--------------------------------------------------------------------------------
/man/figures/README-unnamed-chunk-16-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DenaJGibbon/gibbonR/HEAD/man/figures/README-unnamed-chunk-16-1.png


--------------------------------------------------------------------------------
/man/figures/README-unnamed-chunk-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DenaJGibbon/gibbonR/HEAD/man/figures/README-unnamed-chunk-2-1.png


--------------------------------------------------------------------------------
/man/figures/README-unnamed-chunk-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DenaJGibbon/gibbonR/HEAD/man/figures/README-unnamed-chunk-3-1.png


--------------------------------------------------------------------------------
/man/figures/README-unnamed-chunk-8-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DenaJGibbon/gibbonR/HEAD/man/figures/README-unnamed-chunk-8-1.png


--------------------------------------------------------------------------------
/man/figures/README-unnamed-chunk-9-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DenaJGibbon/gibbonR/HEAD/man/figures/README-unnamed-chunk-9-1.png


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(DetectBLED)
 4 | export(MFCCFunction)
 5 | export(gibbonID)
 6 | export(gibbonR)
 7 | import(e1071)
 8 | import(randomForest)
 9 | import(seewave)
10 | import(stringr)
11 | import(tuneR)
12 | 


--------------------------------------------------------------------------------
/gibbonR.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace
22 | 


--------------------------------------------------------------------------------
/.Rapp.history:
--------------------------------------------------------------------------------
 1 | library("devtools")#
 2 | #devtools::install_github("klutometis/roxygen",force = TRUE)#
 3 | library(roxygen2)#
 4 | # Add documentation to function#
 5 | setwd("/Users/denasmacbook/Desktop")
 6 | create("gibbonR.github")
 7 | library("devtools")#
 8 | #devtools::install_github("klutometis/roxygen",force = TRUE)#
 9 | library(roxygen2)#
10 | # Add documentation to function#
11 | #setwd("/Users/denasmacbook/Desktop")#
12 | #create("gibbonR.github")#
13 | #
14 | setwd("/Users/denasmacbook/Desktop/gibbonR.github")#
15 | document()#
16 | install("/Users/denasmacbook/Desktop/gibbonR.github")
17 | document()
18 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: gibbonR
 2 | Title: gibbonR: An R package for the detection and classification of acoustic signals using machine learning
 3 | Version: 1.0.1
 4 | Authors@R: person("Dena", "Clink", email = "dena.clink@cornell.edu", role = c("aut", "cre"))
 5 | Description: Detection, classification and visualization of acoustic signals.
 6 | Depends: R (>= 3.4.2),
 7 |     stringr,
 8 |     e1071,
 9 |     randomForest,
10 |     tuneR,
11 |     seewave,
12 |     ggpubr,
13 |     apcluster,
14 |     umap,
15 |     matlab,
16 |     magick
17 | License: What license is it under?
18 | Encoding: UTF-8
19 | LazyData: true
20 | RoxygenNote: 7.3.1
21 | Suggests: 
22 |     knitr,
23 |     rmarkdown
24 | VignetteBuilder: knitr
25 | URL: https://denajgibbon.github.io/gibbonR/
26 | 


--------------------------------------------------------------------------------
/man/MFCCFunction.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/MFCCFunction.R
 3 | \name{MFCCFunction}
 4 | \alias{MFCCFunction}
 5 | \title{MFCCFunction}
 6 | \usage{
 7 | MFCCFunction(
 8 |   input.dir,
 9 |   min.freq = 400,
10 |   max.freq = 2000,
11 |   n.windows = 9,
12 |   num.cep = 12,
13 |   win.avg = "standard",
14 |   win.hop.time = 0.25
15 | )
16 | }
17 | \arguments{
18 | \item{input.dir}{where the .wav files are stored}
19 | 
20 | \item{min.freq}{the minimum frequency (Hz) of the signal of interest}
21 | 
22 | \item{max.freq}{the maximum frequency (Hz) of the signal of interest}
23 | 
24 | \item{n.windows}{the number of time windows to divide the signal by}
25 | 
26 | \item{num.cep}{the number of cepstra to calculate for each time window}
27 | 
28 | \item{win.avg}{Option of 'no.avg','mean.sd' or 'standard'; whether to return MFCCs for each non-overlapping time window, calculate mean and SD over each MFCC or calculated MFCCs for a set number of time windows.}
29 | 
30 | \item{win.hop.time}{If win.avg='standard' the specified window size.}
31 | }
32 | \value{
33 | a data frame with a row of MFCCs for each .wav file
34 | }
35 | \description{
36 | Function to calculate Mel-frequency cepstral coefficents over a directory of focal recordings
37 | }
38 | \examples{
39 | \donttest{MFCCFunction(input.dir = "FocalRecordings",min.freq = 400,max.freq=2500)}
40 | }
41 | 


--------------------------------------------------------------------------------
/.github/workflows/jekyll-gh-pages.yml:
--------------------------------------------------------------------------------
 1 | # Sample workflow for building and deploying a Jekyll site to GitHub Pages
 2 | name: Deploy Jekyll with GitHub Pages dependencies preinstalled
 3 | 
 4 | on:
 5 |   # Runs on pushes targeting the default branch
 6 |   push:
 7 |     branches: ["master"]
 8 | 
 9 |   # Allows you to run this workflow manually from the Actions tab
10 |   workflow_dispatch:
11 | 
12 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
13 | permissions:
14 |   contents: read
15 |   pages: write
16 |   id-token: write
17 | 
18 | # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
19 | # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
20 | concurrency:
21 |   group: "pages"
22 |   cancel-in-progress: false
23 | 
24 | jobs:
25 |   # Build job
26 |   build:
27 |     runs-on: ubuntu-latest
28 |     steps:
29 |       - name: Checkout
30 |         uses: actions/checkout@v4
31 |       - name: Setup Pages
32 |         uses: actions/configure-pages@v5
33 |       - name: Build with Jekyll
34 |         uses: actions/jekyll-build-pages@v1
35 |         with:
36 |           source: ./
37 |           destination: ./_site
38 |       - name: Upload artifact
39 |         uses: actions/upload-pages-artifact@v3
40 | 
41 |   # Deployment job
42 |   deploy:
43 |     environment:
44 |       name: github-pages
45 |       url: ${{ steps.deployment.outputs.page_url }}
46 |     runs-on: ubuntu-latest
47 |     needs: build
48 |     steps:
49 |       - name: Deploy to GitHub Pages
50 |         id: deployment
51 |         uses: actions/deploy-pages@v4
52 | 


--------------------------------------------------------------------------------
/man/gibbonID.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gibbonID.R
 3 | \name{gibbonID}
 4 | \alias{gibbonID}
 5 | \title{gibbonID}
 6 | \usage{
 7 | gibbonID(
 8 |   input.dir,
 9 |   output.dir,
10 |   min.freq,
11 |   max.freq,
12 |   pattern = ".wav",
13 |   add.spectrograms = FALSE,
14 |   class = "fixed",
15 |   q.fixed = 0.1,
16 |   win.avg = "standard",
17 |   spec.ratio = 40
18 | )
19 | }
20 | \arguments{
21 | \item{input.dir}{Directory where the .wav file clips are location}
22 | 
23 | \item{output.dir}{Directory to save the spectrogram thumbnails.}
24 | 
25 | \item{min.freq}{Minimum frequency (Hz) of signals of interest}
26 | 
27 | \item{max.freq}{Maximum frequency (Hz) of signals of interest}
28 | 
29 | \item{pattern}{Pattern to search fo rin input.dir; default is '.wav'}
30 | 
31 | \item{add.spectrograms}{Logical; overlay spectrogram images}
32 | 
33 | \item{class}{Option of 'affinity.adaptive', 'fixed.affinity' or 'no.clustering'; Specifies whether to do adaptive or fixed 'q' affinity propagation clustering, or to color points by class label.}
34 | 
35 | \item{q.fixed}{If class=='fixed.affinity' specify value of 'q'. See ??apcluster for more details.}
36 | 
37 | \item{win.avg}{Option of 'false','mean.sd' or 'standard'; whether to return MFCCs for each non-overlapping time window, calculate mean and SD over each MFCC or calculated MFCCs for a set number of time windows.}
38 | 
39 | \item{spec.ratio}{Value to scale the spectrograms.}
40 | }
41 | \description{
42 | Function that extracts MFCCs as features from .wav files and plots them using UMAP. Points can be colored using affinity propagation clustering or by class labels. With the option to overlay spectrogram images.
43 | }
44 | \examples{
45 | gibbonID(input.dir="/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/MultipleSoundClasses/",output.dir="/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/MultipleSoundClasses/Thumbnails/",win.avg='standard',add.spectrograms=TRUE,min.freq=400,max.freq=1600,class='no.clustering')
46 | }
47 | 


--------------------------------------------------------------------------------
/man/DetectBLED.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/DetectBLED.R
 3 | \name{DetectBLED}
 4 | \alias{DetectBLED}
 5 | \title{DetectBLED}
 6 | \usage{
 7 | DetectBLED(
 8 |   input,
 9 |   input.type = "wav",
10 |   min.freq = 200,
11 |   max.freq = 6000,
12 |   noise.quantile.val = 0.75,
13 |   spectrogram.window = 1600,
14 |   subsample.dur = 300,
15 |   training.label = "noise",
16 |   pattern.split = ".wav",
17 |   min.signal.dur = 1,
18 |   max.sound.event.dur = 6,
19 |   wav.output = "TRUE",
20 |   output.dir = getwd(),
21 |   swift.time = TRUE,
22 |   time.start = 18,
23 |   time.stop = 23,
24 |   write.table.output = TRUE,
25 |   verbose = TRUE,
26 |   random.sample = 100
27 | )
28 | }
29 | \arguments{
30 | \item{input}{Either full path to directory containing .wav files or a list with file name as first element and .wav as second element}
31 | 
32 | \item{input.type}{Either 'directory', 'list' or 'wav'}
33 | 
34 | \item{min.freq}{Minimum frequency (Hz) of signal of interest}
35 | 
36 | \item{max.freq}{Maximum frequency (Hz) of signal of interest}
37 | 
38 | \item{noise.quantile.val}{A quantile value between 0 to 1 for the band energy summation}
39 | 
40 | \item{spectrogram.window}{Window length for spectrogram analysis (input to spectro fuction from 'seewave')}
41 | 
42 | \item{subsample.dur}{Duration (s) to divide longer sound file to increase processing efficiency}
43 | 
44 | \item{training.label}{Label to append to saved .wav files}
45 | 
46 | \item{pattern.split}{Pattern to find and remove to create file name; currently set to ".rda"}
47 | 
48 | \item{min.signal.dur}{The minimum duration (s) sound events must be to be considered sound events}
49 | 
50 | \item{max.sound.event.dur}{The maximum duration (s) sound events must be to be considered sound events}
51 | 
52 | \item{wav.output}{Logical; output wave file of sound events?}
53 | 
54 | \item{output.dir}{Specified output directory; set to current working directory}
55 | 
56 | \item{swift.time}{If file name is in structure recorder_YYYYMMDD_HHMMSS can subset files based on specific times}
57 | 
58 | \item{time.start}{Time recordings start (hour)}
59 | 
60 | \item{time.stop}{Time recordings stop (hour)}
61 | 
62 | \item{write.table.output}{Logical; write Raven selection tables to output directory}
63 | 
64 | \item{verbose}{Logical; print out steps}
65 | 
66 | \item{random.sample}{If a random subset of files in a directory are desired specify a value, otherwise 'NA'}
67 | 
68 | \item{output}{Either 'spectro', 'table' or 'wav'}
69 | }
70 | \description{
71 | Function to do band-limited energy summation to find sound events. This function only identifies sound events based on frequency and duration so is not expected to have high precision.
72 | }
73 | 


--------------------------------------------------------------------------------
/man/gibbonR.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gibbonR.R
 3 | \name{gibbonR}
 4 | \alias{gibbonR}
 5 | \title{gibbonR}
 6 | \usage{
 7 | {input, input.type='list', feature.df,model.type.list=c("SVM"), tune = FALSE, target.signal = "female.gibbon",
 8 | short.wav.duration=300,min.freq = 400, max.freq = 2000,
 9 | noise.quantile.val=0.5, minimum.separation =5, n.windows = 9, num.cep = 12, spectrogram.window =1600,
10 | pattern.split = ".wav", min.signal.dur = 4, maximum.separation =1,max.sound.event.dur = 12,
11 | probability.thresh.svm = 0.75, probability.thresh.rf = 0.75, wav.output = "TRUE", output.dir = getwd(),
12 | swift.time=TRUE,time.start=6,time.stop=12, write.table.output=TRUE,verbose=TRUE, random.sample='NA'}
13 | }
14 | \arguments{
15 | \item{input}{Either full path to directory containing .wav files, a list of .wav files, or a the path to a single .wav file}
16 | 
17 | \item{input.type}{Either 'directory', 'list' or 'wav'}
18 | 
19 | \item{feature.df}{Data frame of features from labeled sound files; first column must be class labels}
20 | 
21 | \item{model.type.list}{Which machine learning model to use; SVM or RF}
22 | 
23 | \item{tune}{Logical; if want to use 'tune' function for SVM; NOTE: for large datasets adds significant computing time}
24 | 
25 | \item{target.signal}{Labeled signal(s) of interest from training data (feature.df); can include multiple classes.}
26 | 
27 | \item{short.wav.duration}{Duration (s) to divide longer sound file to increase processing efficiency}
28 | 
29 | \item{min.freq}{Minimum frequency (Hz) of signal of interest}
30 | 
31 | \item{max.freq}{Maximum frequency (Hz) of signal of interest}
32 | 
33 | \item{noise.quantile.val}{A quantile value between 0 to 1 for the band energy summation}
34 | 
35 | \item{minimum.separation}{The minimum number of consecutive time windows that signals must be separated by to be considered a separate sound event}
36 | 
37 | \item{n.windows}{Number of time windows to calculate for MFCCs}
38 | 
39 | \item{num.cep}{Number of cepstra coefficients to calculate for MFCCs}
40 | 
41 | \item{spectrogram.window}{Window length for spectrogram analysis (input to spectro fuction from 'seewave')}
42 | 
43 | \item{pattern.split}{Pattern to find and remove to create full sound file name; currently set to ".wav"}
44 | 
45 | \item{min.signal.dur}{The minimum duration (s) sound events must be to be considered sound events}
46 | 
47 | \item{maximum.separation}{The maximum number of consecutive time windows that signals must be separated by to be considered a separate sound event}
48 | 
49 | \item{max.sound.event.dur}{The maximum duration (s) sound events must be to be considered sound events; NOTE this only happens when writing text file}
50 | 
51 | \item{probability.thresh.svm}{Probability threshold (provided by SVM) to be considered as target signal}
52 | 
53 | \item{probability.thresh.rf}{Probability threshold (provided by RF) to be considered as target signal}
54 | 
55 | \item{wav.output}{Logical; output .wav files of detections in specified directory}
56 | 
57 | \item{output.dir}{Specified output directory; set to current working directory}
58 | 
59 | \item{swift.time}{If file name is in structure recorder_YYYYMMDD_HHMMSS can subset files based on specific times}
60 | 
61 | \item{time.start}{Time recordings start (hour)}
62 | 
63 | \item{time.stop}{Time recordings stop (hour)}
64 | 
65 | \item{write.table.output}{Logical; write Raven selection tables to output directory}
66 | 
67 | \item{verbose}{Logical; print out steps}
68 | 
69 | \item{random.sample}{If a random subset of files in a directory are desired specify a value, otherwise 'NA'}
70 | }
71 | \value{
72 | If write.table.output=TRUE writes a .txt file for each sound file with detections
73 | 
74 | If write.table.output=TRUE writes a .txt file for each sound file with detections
75 | }
76 | \description{
77 | This function identifies sound events using band-limited energy summation and then classifies the sound events using a trained support vector machine or random forest algorithm.
78 | }
79 | \examples{
80 | \donttest{MFCCFunction(input.dir = "FocalRecordings",min.freq = 400,max.freq=2500)}
81 | }
82 | 


--------------------------------------------------------------------------------
/R/MFCCFunction.R:
--------------------------------------------------------------------------------
  1 | #' MFCCFunction
  2 | #' @description Function to calculate Mel-frequency cepstral coefficents over a directory of focal recordings
  3 | #' @param input.dir where the .wav files are stored
  4 | #' @param min.freq the minimum frequency (Hz) of the signal of interest
  5 | #' @param max.freq the maximum frequency (Hz) of the signal of interest
  6 | #' @param n.windows the number of time windows to divide the signal by
  7 | #' @param win.avg Option of 'no.avg','mean.sd' or 'standard'; whether to return MFCCs for each non-overlapping time window, calculate mean and SD over each MFCC or calculated MFCCs for a set number of time windows.
  8 | #' @param win.hop.time If win.avg='standard' the specified window size.
  9 | #' @param num.cep the number of cepstra to calculate for each time window
 10 | #' @export
 11 | #' @return a data frame with a row of MFCCs for each .wav file
 12 | #' @examples
 13 | #' \donttest{MFCCFunction(input.dir = "FocalRecordings",min.freq = 400,max.freq=2500)}
 14 | 
 15 | MFCCFunction <-
 16 |   function(input.dir,
 17 |            min.freq = 400,
 18 |            max.freq = 2000,
 19 |            n.windows = 9,
 20 |            num.cep = 12,
 21 |            win.avg = 'standard',
 22 |            win.hop.time = 0.25) {
 23 | 
 24 |     if (is.list(input.dir) == 'TRUE') {
 25 |       subsamps <- input.dir
 26 |       class <- 'NA'
 27 | 
 28 |     } else{
 29 |       call.timing.list <-
 30 |         list.files(input.dir, full.names = T, pattern = '.wav', recursive = T)
 31 | 
 32 |       call.timing.list.short <-
 33 |         basename(call.timing.list)
 34 | 
 35 |       subsamps <- lapply(1:length(call.timing.list),
 36 |                          function(i)
 37 |                            readWave(call.timing.list[[i]]))
 38 | 
 39 |       class <-
 40 |         stringr::str_split_fixed(call.timing.list.short, pattern = '_', n = 2)[, 1]
 41 | 
 42 |     }
 43 | 
 44 |     if (win.avg == "no.avg") {
 45 |       mfcc.output.df <- data.frame()
 46 |       ####Loop to calculate MFCC for each .wav file in the directory
 47 |       for (j in 1:length(subsamps)) {
 48 |         #print(paste("processing",j))
 49 |         wav.name <- call.timing.list.short[[j]]
 50 |         wav.file <- subsamps[[j]]
 51 | 
 52 | 
 53 |         # Calculate MFCCs
 54 |         melfcc.output <- tuneR::melfcc(
 55 |           wav.file,
 56 |           minfreq = min.freq,
 57 |           maxfreq = max.freq,
 58 |           wintime = win.hop.time,
 59 |           hoptime = win.hop.time,
 60 |           numcep = num.cep
 61 |         )
 62 | 
 63 |         melfcc.output <- as.data.frame(melfcc.output)
 64 | 
 65 |         class <-
 66 |           rep(stringr::str_split_fixed(wav.name, pattern = '_', n = 2)[, 1],
 67 |               nrow(melfcc.output))
 68 | 
 69 |         melfcc.output <- cbind.data.frame(class, melfcc.output)
 70 | 
 71 |         mfcc.output.df <-
 72 |           rbind.data.frame(mfcc.output.df, melfcc.output)
 73 |       }
 74 | 
 75 |       return(mfcc.output.df)
 76 |     }
 77 | 
 78 |     if (win.avg == "mean.sd") {
 79 |       mfcc.vector.list <- vector("list", 10000)
 80 | 
 81 |       for (x in 1:length(subsamps)) {
 82 |         print(paste("processing sound event", x, 'out of',length(subsamps) ))
 83 | 
 84 |         short.wav <- subsamps[[x]]
 85 |         wav.dur <- seewave::duration(short.wav)
 86 |         # Calculate MFCCs
 87 |         melfcc.output <-
 88 |           tuneR::melfcc(
 89 |             short.wav,
 90 |             minfreq = min.freq,
 91 |             maxfreq = max.freq,
 92 |             numcep = num.cep
 93 |           )
 94 | 
 95 |         # Calculate delta cepstral coefficients
 96 |         deltas.output <- as.data.frame(tuneR::deltas(melfcc.output))
 97 | 
 98 | 
 99 |         melfcc.output <- as.data.frame(melfcc.output)
100 | 
101 |         mfcc.mean <- colMeans(melfcc.output)
102 |         mfcc.sd <- apply(melfcc.output, 2, sd)
103 |         delta.mean <- colMeans(deltas.output)
104 |         delta.sd <- apply(deltas.output, 2, sd)
105 | 
106 |         # Ensure only same number of time windows are used for MFCC and delta coefficients Also append .wav duration
107 |         mfcc.vector <-
108 |           c(mfcc.mean, mfcc.sd, delta.mean, delta.sd, wav.dur)
109 |         mfcc.vector.list[[x]] <- mfcc.vector
110 |       }
111 | 
112 |       mfcc.output <- mfcc.vector.list
113 | 
114 |       mfcc.output.df <- do.call(rbind.data.frame, mfcc.output)
115 |       colnames(mfcc.output.df) <-
116 |         seq(from = 1,
117 |             to = ncol(mfcc.output.df),
118 |             by = 1)
119 | 
120 |       mfcc.output.df <- cbind.data.frame(class, mfcc.output.df)
121 |       return(mfcc.output.df)
122 |     }
123 | 
124 |     if (win.avg == 'standard') {
125 | 
126 |       mfcc.vector.list <- vector("list", 10000)
127 | 
128 |       for (x in 1:length(subsamps)) {
129 |         print(paste("processing sound event", x, 'out of',length(subsamps) ))
130 |         short.wav <- subsamps[[x]]
131 |         wav.dur <- duration(short.wav)
132 |         win.time <- wav.dur / n.windows
133 | 
134 |         # Calculate MFCCs
135 |         melfcc.output <-
136 |           tuneR::melfcc(
137 |             short.wav,
138 |             minfreq = min.freq,
139 |             hoptime = win.time,
140 |             maxfreq = max.freq,
141 |             numcep = num.cep,
142 |             wintime = win.time
143 |           )
144 | 
145 |         # Calculate delta cepstral coefficients
146 |         deltas.output <- tuneR::deltas(melfcc.output)
147 | 
148 |         # Ensure only same number of time windows are used for MFCC and delta coefficients Also append .wav duration
149 |         mfcc.vector <-
150 |           c(as.vector(t(melfcc.output[1:(n.windows - 1), 2:num.cep])), as.vector(t(deltas.output[1:(n.windows - 1), 2:num.cep])), wav.dur)
151 |         mfcc.vector.list[[x]] <- mfcc.vector
152 |       }
153 | 
154 |       mfcc.output <- mfcc.vector.list
155 | 
156 |       mfcc.output.df <- do.call(rbind.data.frame, mfcc.output)
157 |       colnames(mfcc.output.df) <-
158 |         seq(from = 1,
159 |             to = ncol(mfcc.output.df),
160 |             by = 1)
161 | 
162 |       mfcc.output.df <- cbind.data.frame(class, mfcc.output.df)
163 |       return(mfcc.output.df)
164 |     }
165 | 
166 |   }
167 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "gibbonR: An R package for the automated detection and classification of female gibbon calls from long-term acoustic recordings"
  3 | output:
  4 |   github_document:
  5 |     toc: true
  6 |     toc_depth: 2
  7 | ---
  8 | 
  9 | <!-- README.md is generated from README.Rmd. Please edit that file -->
 10 | 
 11 | ```{r eval=FALSE, include = FALSE}
 12 | knitr::opts_chunk$set(
 13 |   collapse = TRUE,
 14 |   comment = "#>",
 15 |   fig.path = "man/figures/README-",
 16 |   out.width = "100%"
 17 | )
 18 | ```
 19 | 
 20 | # Authors
 21 | Dena J. Clink & Holger Klinck \
 22 | K. Lisa Yang Center for Conservation Bioacoustics, Cornell Lab of Ornithology, Cornell University
 23 | 
 24 | # Package description
 25 | 
 26 | The field of bioacoustics is inherently multidisciplinary and relies on
 27 | computer scientists, engineers, and ecologists. This package is directed
 28 | towards ecologists who are interested in incorporating bioacoustics into
 29 | their research, but may not have the skills or training. The goal for
 30 | the creation of this package was to make commonly used signal processing
 31 | techniques and various machine learning algorithms readily available in R for
 32 | anyone interested in using bioacoustics in their research.
 33 | 
 34 | 
 35 | ```{r eval=FALSE, include = FALSE}
 36 | knitr::opts_chunk$set(
 37 |   collapse = TRUE,
 38 |   comment = "#>"
 39 | )
 40 | ```
 41 | 
 42 | # Tutorial
 43 | https://denajgibbon.github.io/gibbonR-tutorial/
 44 | 
 45 | # Quick start guide
 46 | ## You can install the development version from [GitHub](https://github.com/DenaJGibbon) with:
 47 | ```{r eval=FALSE}
 48 | # install.packages("devtools")
 49 | # devtools::install_github("DenaJGibbon/gibbonR")
 50 | library(gibbonR)
 51 | ```
 52 | 
 53 | ```{eval=FALSE}
 54 | # You need to tell R where to store the zip files on your computer.
 55 | destination.file.path.zip <-
 56 |   "dataBorneoExampleData.zip"
 57 | 
 58 | # You also need to tell R where to save the unzipped files
 59 | destination.file.path <- "data/gibbonR/data/"
 60 | 
 61 | # This function will download the data from github
 62 | 
 63 | utils::download.file("https://github.com/DenaJGibbon/BorneoExampleData/archive/master.zip",
 64 |                      destfile = destination.file.path.zip)
 65 | 
 66 | # This function will unzip the file
 67 | utils::unzip(zipfile = destination.file.path.zip,
 68 |              exdir = destination.file.path)
 69 | 
 70 | # Examine the contents
 71 | list.of.sound.files <- list.files(paste(destination.file.path,
 72 |                                         "BorneoExampleData-master", "data", sep =
 73 |                                           "/"),
 74 |                                   full.names = T)
 75 | list.of.sound.files
 76 | 
 77 | ```
 78 | 
 79 | Use this function to read in the .RDA file and save it as an R object from https://stackoverflow.com/questions/5577221/how-can-i-load-an-object-into-a-variable-name-that-i-specify-from-an-r-data-file
 80 | 
 81 | ```{r eval=FALSE}
 82 | loadRData <- function(fileName) {
 83 |   #loads an RData file, and returns it
 84 |   load(fileName)
 85 |   get(ls()[ls() != "fileName"])
 86 | }
 87 | ```
 88 | 
 89 | This function will load the entire list of r data files
 90 | ```{r eval=FALSE}
 91 | list.rda.files <- list()
 92 | for(x in 1:length(list.of.sound.files)){
 93 |   list.rda.files[[x]] <-  loadRData(list.of.sound.files[[x]])
 94 | }
 95 | ```
 96 | 
 97 | Assign each rda an informative name
 98 | ```{r eval=FALSE, warning=FALSE}
 99 | multi.class.list <- list.rda.files[[1]]
100 | S11_20180219_060002_1800sto3600s <- list.rda.files[[2]]
101 | ```
102 | 
103 | Now we create a directory with the training .wav files
104 | ```{r eval=FALSE, warning=FALSE}
105 | TrainingDataDirectory <- "data/gibbonR/data/BorneoMultiClass"
106 | 
107 | for(a in 1:length(multi.class.list)){
108 |   Temp.element <- multi.class.list[[a]]
109 |   writeWave(Temp.element[[2]], paste(TrainingDataDirectory,Temp.element[[1]],sep='/'))
110 | }
111 | 
112 | ```
113 | 
114 | 
115 | # Part 1. Training Data with Labeled .wav clips
116 | ### Read in clips and calculate MFCCs
117 | ```{r eval=FALSE, echo = T, results = 'hide' }
118 | TrainingWavFilesDir <- 
119 |   "data/gibbonR/data/BorneoMultiClass/"
120 | 
121 | trainingdata <- gibbonR::MFCCFunction(input.dir=TrainingWavFilesDir, min.freq = 400, max.freq = 1600,win.avg='standard')
122 | 
123 | trainingdata$class <- as.factor(trainingdata$class)
124 | ```
125 | 
126 | ### Compare Random Forest and Support Vector Machine for Supervised Classification
127 | ```{r eval=FALSE }
128 | 
129 | trainingdata$class <- as.factor(trainingdata$class)
130 | 
131 | 
132 | ml.model.svm <- e1071::svm(trainingdata[, 2:ncol(trainingdata)], trainingdata$class, kernel = "radial", 
133 |                            cross = 25,
134 |                            probability = TRUE)
135 | 
136 | print(paste('SVM accuracy',ml.model.svm$tot.accuracy))
137 | 
138 | 
139 | ml.model.rf <- randomForest::randomForest(x=trainingdata[, 2:ncol(trainingdata)], y = trainingdata$class)
140 | 
141 | 
142 | print(ml.model.rf)
143 | ```
144 | 
145 | # Part 2. Run the detector/classifier
146 | 
147 | ## Part 2a. Feature extraction
148 | ```{r eval=FALSE }
149 | # Specify the folder where the training data will be saved
150 | TrainingDataFolderLocation <- "data/gibbonR/data/TrainingDataFromRavenSelectionTables/"
151 |   
152 | TrainingDataMFCC <- MFCCFunction(input.dir= TrainingDataFolderLocation, min.freq = 400, max.freq = 1600,win.avg="standard")
153 |   
154 | TrainingDataMFCC$class <- as.factor(TrainingDataMFCC$class)
155 | 
156 | ```
157 |   
158 | ## Part 2b. Run DetectClassify  
159 | ```{r eval=FALSE}
160 | 
161 |   TestFileDirectory <- '/Users/denaclink/Library/CloudStorage/Box-Box/gibbonRSampleFiles/GibbonTestFiles'
162 |   
163 |   OutputDirectory <-  "data/gibbonR/data/DetectAndClassifyOutput"
164 |   
165 |   gibbonR(input=TestFileDirectory,
166 |                     feature.df=TrainingDataMFCC,
167 |                     model.type.list=c('SVM','RF'),
168 |                     tune = TRUE,
169 |                     short.wav.duration=300,
170 |                     target.signal = c("female.gibbon"),
171 |                     min.freq = 400, max.freq = 1600,
172 |                     noise.quantile.val=0.15,
173 |                     minimum.separation =3,
174 |                     n.windows = 9, num.cep = 12,
175 |                     spectrogram.window =160,
176 |                     pattern.split = ".wav",
177 |                     min.signal.dur = 3,
178 |                     max.sound.event.dur = 25,
179 |                     maximum.separation =1,
180 |                     probability.thresh.svm = 0.15,
181 |                     probability.thresh.rf = 0.15,
182 |                     wav.output = "TRUE",
183 |                     output.dir =OutputDirectory,
184 |                     swift.time=TRUE,time.start=5,time.stop=10,
185 |                     write.table.output=FALSE,verbose=TRUE,
186 |                     random.sample='NA')
187 |   
188 | 
189 | ```
190 | 
191 | 
192 | # Part 3. Data visualization  
193 | 
194 | ## Part 3a. Create a UMAP plot colored by class
195 | ```{r eval=FALSE }
196 | library(gibbonR)
197 | library(ggpubr)
198 | gibbonID(input.dir="data/gibbonR/data/MultipleSoundClasses/",output.dir="data/gibbonR/data/MultipleSoundClasses/Thumbnails/",win.avg='standard',add.spectrograms=TRUE,min.freq=400,max.freq=1600,class='no.clustering')
199 |   
200 | ```
201 | 
202 | ## Part 3b. Create a UMAP plot colored by affinity propagation clustering
203 | ```{r eval=FALSE }
204 | library(gibbonR)
205 | library(ggpubr)
206 | library(apcluster)
207 | gibbonID(input.dir="data/gibbonR/data/MultipleSoundClasses/",output.dir="data/gibbonR/data/MultipleSoundClasses/Thumbnails/",win.avg='standard',class='affinity.fixed', q.fixed=0.1,add.spectrograms=TRUE,min.freq=400,max.freq=1600)
208 |   
209 | ```
210 | 
211 | 
212 | ### How to cite
213 | 
214 | This package is currently in development, with submission to JOSS planned shortly. In the interim, please cite the arXiv preprint:
215 | 
216 | Clink, D. J. & H. Klinck. (2019). gibbonR: An R package for the detection and classification of acoustic signals using machine learning. arXiv, 1906.02572.
217 | https://doi.org/10.48550/arXiv.1906.02572
218 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | gibbonR: An R package for the automated detection and classification of
  2 | female gibbon calls from long-term acoustic recordings
  3 | ================
  4 | 
  5 | - [Authors](#authors)
  6 | - [Package description](#package-description)
  7 | - [Tutorial](#tutorial)
  8 | - [Quick start guide](#quick-start-guide)
  9 |   - [You can install the development version from GitHub
 10 |     with:](#you-can-install-the-development-version-from-github-with)
 11 | - [Part 1. Training Data with Labeled .wav
 12 |   clips](#part-1-training-data-with-labeled-wav-clips)
 13 | - [Part 2. Run the
 14 |   detector/classifier](#part-2-run-the-detectorclassifier)
 15 |   - [Part 2a. Feature extraction](#part-2a-feature-extraction)
 16 |   - [Part 2b. Run DetectClassify](#part-2b-run-detectclassify)
 17 | - [Part 3. Data visualization](#part-3-data-visualization)
 18 |   - [Part 3a. Create a UMAP plot colored by
 19 |     class](#part-3a-create-a-umap-plot-colored-by-class)
 20 |   - [Part 3b. Create a UMAP plot colored by affinity propagation
 21 |     clustering](#part-3b-create-a-umap-plot-colored-by-affinity-propagation-clustering)
 22 | 
 23 | <!-- README.md is generated from README.Rmd. Please edit that file -->
 24 | 
 25 | # Authors
 26 | 
 27 | Dena J. Clink & Holger Klinck  
 28 | K. Lisa Yang Center for Conservation Bioacoustics, Cornell Lab of
 29 | Ornithology, Cornell University
 30 | 
 31 | # Package description
 32 | 
 33 | The field of bioacoustics is inherently multidisciplinary and relies on
 34 | computer scientists, engineers, and ecologists. This package is directed
 35 | towards ecologists who are interested in incorporating bioacoustics into
 36 | their research, but may not have the skills or training. The goal for
 37 | the creation of this package was to make commonly used signal processing
 38 | techniques and various machine learning algorithms readily available in
 39 | R for anyone interested in using bioacoustics in their research.
 40 | 
 41 | # Tutorial
 42 | 
 43 | <https://denajgibbon.github.io/gibbonR-tutorial/>
 44 | 
 45 | # Quick start guide
 46 | 
 47 | ## You can install the development version from [GitHub](https://github.com/DenaJGibbon) with:
 48 | 
 49 | ``` r
 50 | # install.packages("devtools")
 51 | # devtools::install_github("DenaJGibbon/gibbonR")
 52 | library(gibbonR)
 53 | ```
 54 | 
 55 | ```
 56 | # You need to tell R where to store the zip files on your computer.
 57 | destination.file.path.zip <-
 58 |   "dataBorneoExampleData.zip"
 59 | 
 60 | # You also need to tell R where to save the unzipped files
 61 | destination.file.path <- "data/gibbonR/data/"
 62 | 
 63 | # This function will download the data from github
 64 | 
 65 | utils::download.file("https://github.com/DenaJGibbon/BorneoExampleData/archive/master.zip",
 66 |                      destfile = destination.file.path.zip)
 67 | 
 68 | # This function will unzip the file
 69 | utils::unzip(zipfile = destination.file.path.zip,
 70 |              exdir = destination.file.path)
 71 | 
 72 | # Examine the contents
 73 | list.of.sound.files <- list.files(paste(destination.file.path,
 74 |                                         "BorneoExampleData-master", "data", sep =
 75 |                                           "/"),
 76 |                                   full.names = T)
 77 | list.of.sound.files
 78 | ```
 79 | 
 80 | Use this function to read in the .RDA file and save it as an R object
 81 | from
 82 | <https://stackoverflow.com/questions/5577221/how-can-i-load-an-object-into-a-variable-name-that-i-specify-from-an-r-data-file>
 83 | 
 84 | ``` r
 85 | loadRData <- function(fileName) {
 86 |   #loads an RData file, and returns it
 87 |   load(fileName)
 88 |   get(ls()[ls() != "fileName"])
 89 | }
 90 | ```
 91 | 
 92 | This function will load the entire list of r data files
 93 | 
 94 | ``` r
 95 | list.rda.files <- list()
 96 | for(x in 1:length(list.of.sound.files)){
 97 |   list.rda.files[[x]] <-  loadRData(list.of.sound.files[[x]])
 98 | }
 99 | ```
100 | 
101 | Assign each rda an informative name
102 | 
103 | ``` r
104 | multi.class.list <- list.rda.files[[1]]
105 | S11_20180219_060002_1800sto3600s <- list.rda.files[[2]]
106 | ```
107 | 
108 | Now we create a directory with the training .wav files
109 | 
110 | ``` r
111 | TrainingDataDirectory <- "data/gibbonR/data/BorneoMultiClass"
112 | 
113 | for(a in 1:length(multi.class.list)){
114 |   Temp.element <- multi.class.list[[a]]
115 |   writeWave(Temp.element[[2]], paste(TrainingDataDirectory,Temp.element[[1]],sep='/'))
116 | }
117 | ```
118 | 
119 | # Part 1. Training Data with Labeled .wav clips
120 | 
121 | ### Read in clips and calculate MFCCs
122 | 
123 | ``` r
124 | TrainingWavFilesDir <- 
125 |   "data/gibbonR/data/BorneoMultiClass/"
126 | 
127 | trainingdata <- gibbonR::MFCCFunction(input.dir=TrainingWavFilesDir, min.freq = 400, max.freq = 1600,win.avg='standard')
128 | 
129 | trainingdata$class <- as.factor(trainingdata$class)
130 | ```
131 | 
132 | ### Compare Random Forest and Support Vector Machine for Supervised Classification
133 | 
134 | ``` r
135 | trainingdata$class <- as.factor(trainingdata$class)
136 | 
137 | 
138 | ml.model.svm <- e1071::svm(trainingdata[, 2:ncol(trainingdata)], trainingdata$class, kernel = "radial", 
139 |                            cross = 25,
140 |                            probability = TRUE)
141 | 
142 | print(paste('SVM accuracy',ml.model.svm$tot.accuracy))
143 | 
144 | 
145 | ml.model.rf <- randomForest::randomForest(x=trainingdata[, 2:ncol(trainingdata)], y = trainingdata$class)
146 | 
147 | 
148 | print(ml.model.rf)
149 | ```
150 | 
151 | # Part 2. Run the detector/classifier
152 | 
153 | ## Part 2a. Feature extraction
154 | 
155 | ``` r
156 | # Specify the folder where the training data will be saved
157 | TrainingDataFolderLocation <- "data/gibbonR/data/TrainingDataFromRavenSelectionTables/"
158 |   
159 | TrainingDataMFCC <- MFCCFunction(input.dir= TrainingDataFolderLocation, min.freq = 400, max.freq = 1600,win.avg="standard")
160 |   
161 | TrainingDataMFCC$class <- as.factor(TrainingDataMFCC$class)
162 | ```
163 | 
164 | ## Part 2b. Run DetectClassify
165 | 
166 | ``` r
167 |   TestFileDirectory <- '/Users/denaclink/Library/CloudStorage/Box-Box/gibbonRSampleFiles/GibbonTestFiles'
168 |   
169 |   OutputDirectory <-  "data/gibbonR/data/DetectAndClassifyOutput"
170 |   
171 |   gibbonR(input=TestFileDirectory,
172 |                     feature.df=TrainingDataMFCC,
173 |                     model.type.list=c('SVM','RF'),
174 |                     tune = TRUE,
175 |                     short.wav.duration=300,
176 |                     target.signal = c("female.gibbon"),
177 |                     min.freq = 400, max.freq = 1600,
178 |                     noise.quantile.val=0.15,
179 |                     minimum.separation =3,
180 |                     n.windows = 9, num.cep = 12,
181 |                     spectrogram.window =160,
182 |                     pattern.split = ".wav",
183 |                     min.signal.dur = 3,
184 |                     max.sound.event.dur = 25,
185 |                     maximum.separation =1,
186 |                     probability.thresh.svm = 0.15,
187 |                     probability.thresh.rf = 0.15,
188 |                     wav.output = "TRUE",
189 |                     output.dir =OutputDirectory,
190 |                     swift.time=TRUE,time.start=5,time.stop=10,
191 |                     write.table.output=FALSE,verbose=TRUE,
192 |                     random.sample='NA')
193 | ```
194 | 
195 | # Part 3. Data visualization
196 | 
197 | ## Part 3a. Create a UMAP plot colored by class
198 | 
199 | ``` r
200 | library(gibbonR)
201 | library(ggpubr)
202 | gibbonID(input.dir="data/gibbonR/data/MultipleSoundClasses/",output.dir="data/gibbonR/data/MultipleSoundClasses/Thumbnails/",win.avg='standard',add.spectrograms=TRUE,min.freq=400,max.freq=1600,class='no.clustering')
203 | ```
204 | 
205 | ## Part 3b. Create a UMAP plot colored by affinity propagation clustering
206 | 
207 | ``` r
208 | library(gibbonR)
209 | library(ggpubr)
210 | library(apcluster)
211 | gibbonID(input.dir="data/gibbonR/data/MultipleSoundClasses/",output.dir="data/gibbonR/data/MultipleSoundClasses/Thumbnails/",win.avg='standard',class='affinity.fixed', q.fixed=0.1,add.spectrograms=TRUE,min.freq=400,max.freq=1600)
212 | ```
213 | 
214 | ### How to cite
215 | 
216 | This package is currently in development, with submission to JOSS
217 | planned shortly. In the interim, please cite the arXiv preprint:
218 | 
219 | Clink, D. J. & H. Klinck. (2019). gibbonR: An R package for the
220 | detection and classification of acoustic signals using machine learning.
221 | arXiv, 1906.02572. <https://doi.org/10.48550/arXiv.1906.02572>
222 | 


--------------------------------------------------------------------------------
/R/gibbonID.R:
--------------------------------------------------------------------------------
  1 | #' gibbonID
  2 | #' @description Function that extracts MFCCs as features from .wav files and plots them using UMAP. Points can be colored using affinity propagation clustering or by class labels. With the option to overlay spectrogram images.
  3 | #' @param input.dir Directory where the .wav file clips are location
  4 | #' @param output.dir Directory to save the spectrogram thumbnails.
  5 | #' @param min.freq Minimum frequency (Hz) of signals of interest
  6 | #' @param max.freq Maximum frequency (Hz) of signals of interest
  7 | #' @param pattern Pattern to search fo rin input.dir; default is '.wav'
  8 | #' @param add.spectrograms Logical; overlay spectrogram images
  9 | #' @param class Option of 'affinity.adaptive', 'fixed.affinity' or 'no.clustering'; Specifies whether to do adaptive or fixed 'q' affinity propagation clustering, or to color points by class label.
 10 | #' @param q.fixed If class=='fixed.affinity' specify value of 'q'. See ??apcluster for more details.
 11 | #' @param win.avg Option of 'false','mean.sd' or 'standard'; whether to return MFCCs for each non-overlapping time window, calculate mean and SD over each MFCC or calculated MFCCs for a set number of time windows.
 12 | #' @param spec.ratio Value to scale the spectrograms.
 13 | #'
 14 | #' @return
 15 | #' @export
 16 | #'
 17 | #' @examples gibbonID(input.dir="/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/MultipleSoundClasses/",output.dir="/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/MultipleSoundClasses/Thumbnails/",win.avg='standard',add.spectrograms=TRUE,min.freq=400,max.freq=1600,class='no.clustering')
 18 | 
 19 | gibbonID <-
 20 |   function(input.dir,
 21 |            output.dir,
 22 |            min.freq,
 23 |            max.freq,
 24 |            pattern = '.wav',
 25 |            add.spectrograms = FALSE,
 26 |            class = 'fixed',
 27 |            q.fixed = 0.1,
 28 |            win.avg = 'standard',
 29 |            spec.ratio=40)
 30 |   {
 31 |     Focal.exemplars <-
 32 |       list.files(input.dir, full.names = T, pattern = pattern)
 33 | 
 34 | 
 35 |     print('Step 1 Calculating MFCCs')
 36 |     AcousticSignalsMFCCs <- MFCCFunction(
 37 |       input.dir = input.dir,
 38 |       min.freq = min.freq,
 39 |       max.freq = max.freq,
 40 |       num.cep = 12,
 41 |       win.avg = win.avg
 42 |     )
 43 | 
 44 |     if (class == 'affinity.adaptive') {
 45 |       print('Step 2 Computing unsupervised clustering')
 46 | 
 47 |       q.val.seq <- seq(from = 0.1, to = 0.9, by = 0.1)
 48 | 
 49 |       AcousticSignal.sil.df <- data.frame()
 50 |       for (a in 1:length(q.val.seq)) {
 51 |         print(a)
 52 |         AcousticSignalsAP <-
 53 |           apcluster::apcluster(
 54 |             negDistMat(r = 2),
 55 |             q = q.val.seq[a],
 56 |             AcousticSignalsMFCCs[, c(2:ncol(AcousticSignalsMFCCs))],
 57 |             maxits = 100000,
 58 |             convits = 10000
 59 |           )
 60 | 
 61 | 
 62 |         sil <-
 63 |           cluster::silhouette(x = AcousticSignalsAP@idx,
 64 |                               dist = dist(AcousticSignalsMFCCs[, c(2:ncol(AcousticSignalsMFCCs))]))
 65 | 
 66 |         sil.val <- (summary(sil)$avg.width)
 67 |         temp.sil.df <-  cbind.data.frame(sil.val, q.val.seq[a])
 68 |         AcousticSignal.sil.df <-
 69 |           rbind.data.frame(AcousticSignal.sil.df, temp.sil.df)
 70 |       }
 71 | 
 72 |       MaxSil <- which.max(AcousticSignal.sil.df$sil.val)
 73 | 
 74 | 
 75 |       AcousticSignalsAP <-
 76 |         apcluster::apcluster(
 77 |           negDistMat(r = 2),
 78 |           q = q.val.seq[MaxSil],
 79 |           AcousticSignalsMFCCs[, c(2:ncol(AcousticSignalsMFCCs))],
 80 |           maxits = 100000,
 81 |           convits = 10000
 82 |         )
 83 | 
 84 |       print(q.val.seq[MaxSil])
 85 |       print(paste('N clusters=', length(AcousticSignalsAP@exemplars)))
 86 |       AcousticSignalsMFCCs$class <- as.factor(AcousticSignalsAP@idx)
 87 |     }
 88 | 
 89 |     if (class == 'affinity.fixed') {
 90 |       print('Step 2 Computing unsupervised clustering with fixed q')
 91 | 
 92 |       AcousticSignalsAP <-
 93 |         apcluster::apcluster(
 94 |           negDistMat(r = 2),
 95 |           q = q.fixed,
 96 |           AcousticSignalsMFCCs[, c(2:ncol(AcousticSignalsMFCCs))],
 97 |           maxits = 100000,
 98 |           convits = 10000
 99 |         )
100 | 
101 |       AcousticSignalsMFCCs$class <- as.factor(AcousticSignalsAP@idx)
102 | 
103 |     }
104 | 
105 |     if (class == 'no.clustering') {
106 |       print('Step 2 Using class labels for clustering')
107 |       AcousticSignalsMFCCs$class <- AcousticSignalsMFCCs$class
108 |     }
109 | 
110 |     AcousticSignals.umap <-
111 |       umap::umap(
112 |         AcousticSignalsMFCCs[, c(2:ncol(AcousticSignalsMFCCs))],
113 |         n_neighbors = 12,
114 |         controlscale = TRUE,
115 |         scale = 3
116 |       )
117 | 
118 |     plot.for.AcousticSignals <-
119 |       cbind.data.frame(AcousticSignals.umap$layout[, 1:2],
120 |                        AcousticSignalsMFCCs$class)
121 | 
122 |     colnames(plot.for.AcousticSignals) <-
123 |       c("Dim.1", "Dim.2", "class")
124 | 
125 |     plot.for.AcousticSignals$class <-
126 |       as.factor(plot.for.AcousticSignals$class)
127 | 
128 |     my_plot_AcousticSignals <-
129 |       ggpubr::ggscatter(
130 |         data = plot.for.AcousticSignals,
131 |         x = "Dim.1",
132 |         y = "Dim.2",
133 |         color  = "class"
134 |       ) +
135 |       geom_point(size = 3) +
136 |       scale_color_manual(values = matlab::jet.colors (length(
137 |         unique(plot.for.AcousticSignals$class)
138 |       ))) +
139 |       theme_bw() + xlab('UMAP: Dim 1') + ylab('UMAP: Dim 2') +
140 |       ggtitle(paste('N Clusters =', length(unique(
141 |         AcousticSignalsMFCCs$class
142 |       )))) +
143 |       theme(
144 |         axis.text.x = element_blank(),
145 |         #remove x axis labels
146 |         axis.ticks.x = element_blank(),
147 |         #remove x axis ticks
148 |         axis.text.y = element_blank(),
149 |         #remove y axis labels
150 |         axis.ticks.y = element_blank()  #remove y axis ticks
151 |       )+labs(color="Cluster")
152 | 
153 |     if (add.spectrograms == TRUE) {
154 |       print('Step 3 Creating Spectrograms ')
155 | 
156 |       if (!dir.exists(output.dir)) {
157 |         dir.create(output.dir)
158 |         print(paste('Created output dir', output.dir))
159 | 
160 |         for (b in 1:length(Focal.exemplars)) {
161 |           #print(b)
162 |           short.wav <- tuneR::readWave(Focal.exemplars[[b]])
163 | 
164 |           png(filename = paste(output.dir, b, 'Focal.png', sep = ''),
165 |               width = 1000)
166 |           temp.spec <-
167 |             signal::specgram(
168 |               short.wav@left,
169 |               Fs = short.wav@samp.rate,
170 |               n = 1024,
171 |               overlap = 0
172 |             )
173 |           plot(
174 |             temp.spec,
175 |             xlab = "",
176 |             ylab = "",
177 |             ylim = c(min.freq, max.freq),
178 |             rev(gray(0:512 / 512)),
179 |             axes = F,
180 |             useRaster = TRUE
181 |           )
182 | 
183 |           graphics.off()
184 | 
185 |         }
186 |       } else {
187 |         print(paste(output.dir, 'already exists'))
188 |       }
189 | 
190 | 
191 | 
192 |       print('Adding Spectrograms to Plot Step 3 of 3')
193 | 
194 |       col.index <- unique(plot.for.AcousticSignals$class)
195 |       xrange <-
196 |         (abs(range(plot.for.AcousticSignals$Dim.1)[1]) + abs(range(plot.for.AcousticSignals$Dim.1)[2])) /
197 |         spec.ratio
198 |       yrange <-
199 |         (abs(range(plot.for.AcousticSignals$Dim.2)[1]) + abs(range(plot.for.AcousticSignals$Dim.2)[2])) /
200 |         spec.ratio
201 |       color.vals <-
202 |         matlab::jet.colors (length(unique(plot.for.AcousticSignals$class)))
203 | 
204 |       for (y in 1:length(Focal.exemplars)) {
205 |         #print(y, 'out of', length(Focal.exemplars))
206 |         figure1.png <-
207 |           magick::image_trim(magick::image_read(paste(output.dir, y, 'Focal.png', sep =
208 |                                                         '')))
209 |         figure1.png <-
210 |           magick::image_modulate(figure1.png, brightness = 300)
211 | 
212 |         figure1.png <-
213 |           magick::image_border(figure1.png, col = color.vals[which(col.index == plot.for.AcousticSignals[y, ]$class)])
214 | 
215 |         figure1.png <- as.raster(figure1.png)
216 |         #exemplar.index <- Focal.cluster.results@idx[y]
217 | 
218 |         clust.df.subset <- plot.for.AcousticSignals[y, ]
219 |         xmin = clust.df.subset$Dim.1 - xrange
220 |         xmax = clust.df.subset$Dim.1 + xrange
221 |         ymin = clust.df.subset$Dim.2 + yrange
222 |         ymax = clust.df.subset$Dim.2 - yrange
223 |         my_plot_AcousticSignals <-
224 |           my_plot_AcousticSignals + annotation_raster(figure1.png, xmin, xmax, ymin, ymax)
225 |       }
226 |     }
227 |     ggsave(
228 |       "DetectionsAffinityPlot.png",
229 |       my_plot_AcousticSignals,
230 |       width = 4.25,
231 |       height = 3.25,
232 |       dpi = 1200
233 |     )
234 | 
235 | 
236 |     return(my_plot_AcousticSignals)
237 |   }
238 | 


--------------------------------------------------------------------------------
/R/DetectBLED.R:
--------------------------------------------------------------------------------
  1 | #' DetectBLED
  2 | #' @description Function to do band-limited energy summation to find sound events. This function only identifies sound events based on frequency and duration so is not expected to have high precision.
  3 | #' @param input Either full path to directory containing .wav files or a list with file name as first element and .wav as second element
  4 | #' @param input.type Either 'directory', 'list' or 'wav'
  5 | #' @param min.freq Minimum frequency (Hz) of signal of interest
  6 | #' @param max.freq Maximum frequency (Hz) of signal of interest
  7 | #' @param pattern.split Pattern to find and remove to create file name; currently set to ".rda"
  8 | #' @param output Either 'spectro', 'table' or 'wav'
  9 | #' @param noise.quantile.val A quantile value between 0 to 1 for the band energy summation
 10 | #' @param spectrogram.window Window length for spectrogram analysis (input to spectro fuction from 'seewave')
 11 | #' @param subsample.dur Duration (s) to divide longer sound file to increase processing efficiency
 12 | #' @param training.label Label to append to saved .wav files
 13 | #' @param min.signal.dur The minimum duration (s) sound events must be to be considered sound events
 14 | #' @param max.sound.event.dur The maximum duration (s) sound events must be to be considered sound events
 15 | #' @param wav.output Logical; output wave file of sound events?
 16 | #' @param swift.time If file name is in structure recorder_YYYYMMDD_HHMMSS can subset files based on specific times
 17 | #' @param time.start Time recordings start (hour)
 18 | #' @param time.stop Time recordings stop (hour)
 19 | #' @param write.table.output Logical; write Raven selection tables to output directory
 20 | #' @param verbose Logical; print out steps
 21 | #' @param random.sample If a random subset of files in a directory are desired specify a value, otherwise 'NA'
 22 | #' @param output.dir Specified output directory; set to current working directory
 23 | #' @export
 24 | #' @import e1071
 25 | #' @import tuneR
 26 | #' @import seewave
 27 | #' @import tuneR
 28 | #' @import stringr
 29 | #' @examples
 30 | 
 31 | DetectBLED <- function(input,input.type ='wav',
 32 |                        min.freq = 200,
 33 |                        max.freq = 6000,
 34 |                        noise.quantile.val = 0.75,
 35 |                        spectrogram.window = 1600,
 36 |                        subsample.dur = 300,
 37 |                        training.label = 'noise',
 38 |                        pattern.split = ".wav",
 39 |                        min.signal.dur = 1,
 40 |                        max.sound.event.dur = 6,
 41 |                        wav.output = "TRUE",
 42 |                        output.dir = getwd(),
 43 |                        swift.time = TRUE,
 44 |                        time.start = 18,
 45 |                        time.stop = 23,
 46 |                        write.table.output = TRUE,
 47 |                        verbose = TRUE,
 48 |                        random.sample = 100) {
 49 |   if (wav.output == "TRUE" & output.dir == "") {
 50 |     stop("Specify output directory")
 51 |   }
 52 | 
 53 |   if (input.type == 'list') {
 54 |     list.file.input <- unlist(input)
 55 |     nslash <- str_count(input, pattern = '/') + 1
 56 |     list.file.input.short <-
 57 |       str_split_fixed(input, pattern = '/', nslash)[, nslash]
 58 |   }
 59 | 
 60 |   if (input.type == "directory") {
 61 |     list.file.input <-
 62 |       list.files(input, full.names = TRUE, recursive = T)
 63 |     list.file.input.short <-
 64 |       list.files(input, full.names = FALSE, recursive = T)
 65 |   }
 66 | 
 67 |   if (input.type == "wav") {
 68 |     list.file.input <- input
 69 |   }
 70 | 
 71 | 
 72 |   if (swift.time == TRUE) {
 73 |     number.of.slash <- str_count(list.file.input, pattern = "/")[1]
 74 |     base.file.name.all <-
 75 |       str_split_fixed(list.file.input,
 76 |                       pattern = "/",
 77 |                       n = (number.of.slash + 1))[, number.of.slash + 1]
 78 |     temp.name.all <-
 79 |       stringr::str_split_fixed(base.file.name.all, pattern = pattern.split, n = 2)[, 1]
 80 |     times <- str_split_fixed(temp.name.all, pattern = '_', n = 3)[, 3]
 81 |     times <- as.numeric(substr(times, start = 1, stop = 2))
 82 |     list.file.input <-
 83 |       list.file.input[which(times >= time.start & times <= time.stop)]
 84 |   }
 85 | 
 86 |   if (length(list.file.input) == 0) {
 87 |     print("No sound files detected")
 88 |     break
 89 |   }
 90 | 
 91 |   if (is.numeric(random.sample) == TRUE) {
 92 |     list.file.input <-
 93 |       list.file.input[sample(1:length(list.file.input), random.sample, replace =
 94 |                                F)]
 95 |   }
 96 | 
 97 | 
 98 |   for (i in 1:length(list.file.input)) {
 99 |     timing.df <- data.frame()
100 | 
101 | 
102 |     contains.slash <- str_detect(list.file.input[i], pattern = "/")
103 | 
104 |     if (contains.slash == 'TRUE') {
105 |       number.of.slash <- str_count(list.file.input[i], pattern = "/")
106 |       base.file.name <-
107 |         str_split_fixed(list.file.input[i],
108 |                         pattern = "/",
109 |                         n = (number.of.slash + 1))[, number.of.slash + 1]
110 |       temp.name <-
111 |         stringr::str_split_fixed(base.file.name, pattern = pattern.split, n = 2)[1]
112 |     } else{
113 |       temp.name <-
114 |         stringr::str_split_fixed(list.file.input[i], pattern = pattern.split, n = 2)[1]
115 | 
116 |     }
117 | 
118 |     # Convert .wav file to spectrogram
119 |     if (verbose == TRUE) {
120 |       print(paste(
121 |         "Computing spectrogram for file",
122 |         temp.name,
123 |         i,
124 |         'out of',
125 |         length(list.file.input)
126 |       ))
127 |     }
128 | 
129 |     RavenSelectionTableDF <- data.frame()
130 |     temp.wav <- readWave(list.file.input[i])
131 | 
132 |     sound_length <-
133 |       round(length(temp.wav@left) / temp.wav@samp.rate, 2)
134 | 
135 |     cutwave.list <-
136 |       c(seq(
137 |         from = 1,
138 |         to = (sound_length),
139 |         by = subsample.dur
140 |       ), sound_length)
141 | 
142 |     short.sound.files <- lapply(1:(length(cutwave.list) - 1),
143 |                                 function(i)
144 |                                   extractWave(
145 |                                     temp.wav,
146 |                                     from = cutwave.list[i],
147 |                                     to = cutwave.list[i +
148 |                                                         1],
149 |                                     xunit = c("time"),
150 |                                     plot = F,
151 |                                     output = "Wave"
152 |                                   ))
153 | 
154 |     for (j in 1:length(short.sound.files)) {
155 |       swift.spectro <-
156 |         spectro(
157 |           short.sound.files[[j]],
158 |           wl = spectrogram.window,
159 |           overlap = 0,
160 |           plot = F
161 |         )
162 | 
163 | 
164 |       # Identify the frequency band of interest
165 |       min.freq.cols <-
166 |         which.min(abs(round(swift.spectro$freq, digits = 2) - (min.freq / 1000)))
167 |       max.freq.cols <-
168 |         which.min(abs(round(swift.spectro$freq, digits = 2) - (max.freq / 1000)))
169 | 
170 | 
171 |       # Calculate the column sums for each time window
172 |       col.sum <-
173 |         colSums(swift.spectro$amp[min.freq.cols:max.freq.cols,])
174 | 
175 | 
176 |       # Calculate noise value
177 |       noise.value <-
178 |         quantile(unlist(col.sum), c(noise.quantile.val))
179 | 
180 |       # Determine which values are above specified cutoff
181 |       list.sub <- which(col.sum > noise.value)
182 |       call.timing <-
183 |         split(list.sub, cumsum(c(1, diff(list.sub)) != 1))
184 | 
185 |       # Calculate minimum signal duration to be considered signal
186 |       if( length(which(swift.spectro$time > 1))>0){
187 |         number.time.windows.1sec <- min(which(swift.spectro$time > 1))
188 |         signal.dur <- number.time.windows.1sec * min.signal.dur
189 | 
190 |         # Combine all potential sound events into a list
191 |         call.timing.list <-
192 |           as.list(call.timing[which(sapply(call.timing, length) > signal.dur)])
193 | 
194 |         # If user indicated maximum duration create list of sound events under certain duration
195 |         if (max.sound.event.dur != 'NULL') {
196 |           sound.event.index.max <-
197 |             which.min(abs(swift.spectro$time - max.sound.event.dur))
198 |           call.timing.list <-
199 |             call.timing.list[which(sapply(call.timing.list, length) < sound.event.index.max)]
200 |         }
201 |       } else{
202 |         call.timing.list <- list()
203 |       }
204 | 
205 |       if (length(call.timing.list) >= 1) {
206 |         subsamps <- lapply(1:length(call.timing.list),
207 |                            function(i)
208 |                              extractWave(
209 |                                short.sound.files[[j]],
210 |                                from = swift.spectro$time[min(call.timing.list[[i]])],
211 |                                to = swift.spectro$time[max(call.timing.list[[i]])],
212 |                                xunit = c("time"),
213 |                                plot = F,
214 |                                output = "Wave"
215 |                              ))
216 | 
217 |         if (j == 1) {
218 |           if (wav.output == "TRUE")
219 |             lapply(1:length(subsamps),
220 |                    function(i)
221 |                      writeWave(
222 |                        subsamps[[i]],
223 |                        filename = paste(
224 |                          output.dir,
225 |                          training.label,
226 |                          '_',
227 |                          paste(
228 |                            temp.name,
229 |                            round(swift.spectro$t[min(call.timing.list[[i]])],2),
230 |                            round(swift.spectro$t[max(call.timing.list[[i]])],2),
231 |                            '.wav',
232 |                            sep = '_'
233 |                          ),
234 |                          sep = ''
235 |                        ),
236 |                        extensible = FALSE
237 |                      ))
238 |         }
239 | 
240 |         if (j > 1) {
241 |           if (wav.output == "TRUE")
242 |             lapply(1:length(subsamps),
243 |                    function(i)
244 |                      writeWave(
245 |                        subsamps[[i]],
246 |                        filename =  paste(
247 |                          output.dir,
248 |                          training.label,
249 |                          '_',
250 |                          paste(
251 |                            temp.name,
252 |                           round( (swift.spectro$t[min(call.timing.list[[i]])] +
253 |                               (subsample.dur * (j - 1))),2) ,
254 |                            round((swift.spectro$t[max(call.timing.list[[i]])] +
255 |                               (subsample.dur * (j - 1))),2),
256 |                            '.wav',
257 |                            sep = '_'
258 |                          ),
259 |                          sep = ''
260 |                        ),
261 |                        extensible = FALSE
262 |                      ))
263 |         }
264 | 
265 |         timing.df <- lapply(1:length(call.timing.list),
266 |                             function(i)
267 |                               cbind.data.frame(swift.spectro$t[min(call.timing.list[[i]])],
268 |                                                swift.spectro$t[max(call.timing.list[[i]])]))
269 | 
270 |         timing.df <- do.call(rbind.data.frame, timing.df)
271 | 
272 |         colnames(timing.df) <- c('start.time', 'stop.time')
273 |         file.name <- rep(temp.name, nrow(timing.df))
274 |         timing.df <- cbind.data.frame(timing.df, file.name)
275 | 
276 |         if (j > 1) {
277 |           timing.df$start.time <- timing.df$start.time + (subsample.dur * (j - 1))
278 |           timing.df$stop.time <-
279 |             timing.df$stop.time + (subsample.dur * (j - 1))
280 |         }
281 | 
282 |         timing.df <- rbind.data.frame(timing.df)
283 |         Selection <- seq(1, nrow(timing.df))
284 |         View <- rep('Spectrogram 1', nrow(timing.df))
285 |         Channel <- rep(1, nrow(timing.df))
286 |         MinFreq <- rep(min.freq, nrow(timing.df))
287 |         MaxFreq <- rep(max.freq, nrow(timing.df))
288 |         timing.df.temp <-
289 |           cbind.data.frame(Selection, View, Channel, MinFreq, MaxFreq, timing.df)
290 | 
291 |         timing.df.temp <-
292 |           timing.df.temp[, c(
293 |             "Selection",
294 |             "View",
295 |             "Channel",
296 |             "start.time",
297 |             "stop.time",
298 |             "MinFreq",
299 |             "MaxFreq",
300 |             "file.name"
301 |           )]
302 | 
303 |         colnames(timing.df.temp) <-
304 |           c(
305 |             "Selection",
306 |             "View",
307 |             "Channel",
308 |             "Begin Time (s)",
309 |             "End Time (s)",
310 |             "Low Freq (Hz)",
311 |             "High Freq (Hz)",
312 |             "File Name"
313 |           )
314 | 
315 | 
316 |         RavenSelectionTableDF <-
317 |           rbind.data.frame(RavenSelectionTableDF, timing.df.temp)
318 |         RavenSelectionTableDF$Selection <-
319 |           seq(1, nrow(RavenSelectionTableDF), 1)
320 |         if (write.table.output == TRUE) {
321 |           csv.file.name <-
322 |             paste(output.dir, '/', temp.name, 'BLED.txt', sep = '')
323 |           write.table(
324 |             x = RavenSelectionTableDF,
325 |             sep = "\t",
326 |             file = csv.file.name,
327 |             row.names = FALSE,
328 |             quote = FALSE
329 |           )
330 |         }
331 |         rm(subsamps)
332 |       }
333 |     }
334 |   }
335 |   print(RavenSelectionTableDF)
336 |   rm(RavenSelectionTableDF)
337 |   rm(swift.spectro)
338 |   rm(temp.wav)
339 | }
340 | 


--------------------------------------------------------------------------------
/vignettes/gibbonR-tutorial.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "gibbonR: An R package for the automated detection and classification of female gibbon calls from long-term acoustic recordings"
  3 | output:
  4 |   html_document:
  5 |     toc: true
  6 |     toc_depth: 2
  7 | ---
  8 | 
  9 | ```{r eval=FALSE,, include = FALSE}
 10 | knitr::opts_chunk$set(
 11 |   collapse = TRUE,
 12 |   comment = "#>"
 13 | )
 14 | ```
 15 | 
 16 | # Getting started
 17 | ## You can install the development version from [GitHub](https://github.com/DenaJGibbon) with:
 18 | ```{r eval=FALSE,, echo=T,warning=FALSE, results='hide'}
 19 | # install.packages("devtools")
 20 | # devtools::install_github("DenaJGibbon/gibbonR")
 21 | 
 22 | library(gibbonR)
 23 | ```
 24 | 
 25 | # Part 1. Prepare Training Data
 26 | In 'gibbonR' there are two ways that you can format your training data. The first can be a set of labelled .wav clips with the class indicated in the name of the file (e.g., 'gibbon_01.wav' and 'noise_01.wav'). The second is to have a folder of selection tables created in Raven Pro (K. Lisa Yang Center for Conservation Bioacoustics) and a folder with the associated '.wav' files. For the second approach there must be an annotation column indicating the call type and it is assumed that all signals of interest are annotated, and the rest of the files contain only background noise.  
 27 | 
 28 | ## Part 1A. Training Data with Labeled .wav clips
 29 | ### Read in clips and calculate MFCCs
 30 | ```{r eval=FALSE, echo = T, results = 'hide' }
 31 | TrainingWavFilesDir <- 
 32 |   "data/MultipleSoundClasses/"
 33 | 
 34 | trainingdata <- gibbonR::MFCCFunction(input.dir=TrainingWavFilesDir, min.freq = 400, max.freq = 1600,win.avg="TRUE")
 35 | 
 36 | 
 37 | trainingdata$class <- as.factor(trainingdata$class)
 38 | ```
 39 | 
 40 | ### Compare Random Forest and Support Vector Machine for Supervised Classification
 41 | ```{r eval=FALSE, }
 42 | 
 43 | trainingdata$class <- as.factor(trainingdata$class)
 44 | 
 45 | 
 46 | ml.model.svm <- e1071::svm(trainingdata[, 2:ncol(trainingdata)], trainingdata$class, kernel = "radial", 
 47 |                            cross = 25,
 48 |                            probability = TRUE)
 49 | 
 50 | print(paste('SVM accuracy',ml.model.svm$tot.accuracy))
 51 | 
 52 | 
 53 | ml.model.rf <- randomForest::randomForest(x=trainingdata[, 2:ncol(trainingdata)], y = trainingdata$class)
 54 | 
 55 | 
 56 | print(ml.model.rf)
 57 | ```
 58 | 
 59 | ## Part 1B. Training Data with Raven Selection Tables
 60 | ### Prepare training data from labeled annotations
 61 | ```{r eval=FALSE,,eval=F }
 62 | # Specify the folder where the training data will be saved
 63 | TrainingDataFolderLocation <- "/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/TrainingDataFromRavenSelectionTables"
 64 | 
 65 | # Directory with annotated selection tables
 66 | AnnotatedSelectionTables <- list.files("/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/SelectionTables/GibbonTrainingSelectionTables/",
 67 |                                        full.names = T)
 68 | 
 69 | # Directory with corresponding .wav files
 70 | AnnotatedWaveFiles <- list.files("/Users/denaclink/Library/CloudStorage/Box-Box/gibbonRSampleFiles/GibbonTrainingFiles/",full.names = T)
 71 | AnnotatedWaveFilesShort <- list.files("/Users/denaclink/Library/CloudStorage/Box-Box/gibbonRSampleFiles/GibbonTrainingFiles/",full.names = F)
 72 | AnnotatedWaveFilesShort <- str_split_fixed(AnnotatedWaveFilesShort,pattern = '.wav', n=2)[,1]
 73 | 
 74 | # Loop to cut out the corresponding annotations into short clips
 75 | for(i in 1: length(AnnotatedSelectionTables)){
 76 |   
 77 |   # Read in selection table
 78 |   TempSelectionTable <- read.delim2(AnnotatedSelectionTables[i])
 79 |   
 80 |   # Find the corresponding soundfile
 81 |   SoundFileIndex <- which(str_detect(AnnotatedSelectionTables[i],AnnotatedWaveFilesShort))
 82 |   
 83 |   TempAnnotateWave <- readWave(AnnotatedWaveFiles[SoundFileIndex])
 84 |   
 85 |   ShortSoundClips <- lapply(1:nrow(TempSelectionTable),
 86 |                                 function(j) extractWave(TempAnnotateWave,
 87 |                                                         from= as.numeric(TempSelectionTable[j,]$Begin.Time..s.),
 88 |                                                         to=as.numeric(TempSelectionTable[j,]$ End.Time..s.),
 89 |                                                         xunit = c("time"),plot=F,output="Wave"))
 90 |   # Write wave files to folder
 91 |   for(k in 1:length(ShortSoundClips)){
 92 |     TempClip <- ShortSoundClips[[k]]
 93 |     WavFileName <- paste(TrainingDataFolderLocation,'/female.gibbon_', k, '.wav',sep="")
 94 |     writeWave(TempClip,WavFileName,extensible = F)
 95 |   }
 96 |   
 97 |   
 98 | }
 99 | 
100 | ```
101 | 
102 | ### Prepare noise training data from files without target signal
103 | ```{r eval=FALSE, eval=F}
104 | # Specify the folder where the training data will be saved
105 | TrainingDataFolderLocation <- "/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/TrainingDataFromRavenSelectionTables/"
106 | 
107 | # Directory with annotated selection tables
108 | NoiseSelectionTables <- list.files("/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/SelectionTables/NoiseSelectionTables/",
109 |                                        full.names = T)
110 | 
111 | # Directory with corresponding .wav files
112 | NoiseWaveFiles <- list.files("/Users/denaclink/Library/CloudStorage/Box-Box/gibbonRSampleFiles/NoiseFiles/",full.names = T)
113 | NoiseWaveFilesShort <- list.files("/Users/denaclink/Library/CloudStorage/Box-Box/gibbonRSampleFiles/NoiseFiles/",full.names = F)
114 | NoiseWaveFilesShort <- str_split_fixed(NoiseWaveFilesShort,pattern = '.wav', n=2)[,1]
115 | 
116 | for(i in 1:length(NoiseSelectionTables)){
117 |   
118 |   # Find the corresponding soundfile
119 |   SoundFileIndex <- which(str_detect(NoiseSelectionTables[i],NoiseWaveFilesShort))
120 | 
121 |   DetectBLED(input=NoiseWaveFiles[SoundFileIndex],
122 |            min.freq = 400, 
123 |            max.freq = 1600,
124 |            noise.quantile.val=0.3,
125 |            spectrogram.window =512,
126 |            pattern.split = ".wav", 
127 |            min.signal.dur = 3,
128 |            max.sound.event.dur = 12, 
129 |            wav.output = "TRUE", 
130 |            output.dir = TrainingDataFolderLocation,
131 |            swift.time=TRUE,
132 |            time.start=06,
133 |            time.stop=11,
134 |            write.table.output=TRUE,
135 |            verbose=TRUE,
136 |            random.sample=FALSE)
137 | }
138 | 
139 | ```
140 | 
141 | ### Now read in clips based on Raven Selection tables and calculate MFCCs
142 | ```{r eval=FALSE, echo = T, results = 'hide' }
143 | 
144 | TrainingWavFilesDir <- 
145 |   "/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/TrainingDataFromRavenSelectionTables/"
146 | 
147 | trainingdata <- gibbonR::MFCCFunction(input.dir=TrainingWavFilesDir, min.freq = 400, max.freq = 1600,win.avg="TRUE")
148 | 
149 | 
150 | trainingdata$class <- as.factor(trainingdata$class)
151 | ```
152 | 
153 | ### Compare Random Forest and Support Vector Machine for Supervised Classification
154 | ```{r eval=FALSE, }
155 | 
156 | trainingdata$class <- as.factor(trainingdata$class)
157 | 
158 | 
159 | ml.model.svm <- e1071::svm(trainingdata[, 2:ncol(trainingdata)], trainingdata$class, kernel = "radial", 
160 |                            cross = 25,
161 |                            probability = TRUE)
162 | 
163 | print(paste('SVM accuracy',ml.model.svm$tot.accuracy))
164 | 
165 | 
166 | ml.model.rf <- randomForest::randomForest(x=trainingdata[, 2:ncol(trainingdata)], y = trainingdata$class)
167 | 
168 | 
169 | print(ml.model.rf)
170 | 
171 | 
172 | ```
173 | 
174 | # Part 2. Run the detector/classifier
175 | 
176 | ## Part 2a. Feature extraction
177 | ```{r eval=FALSE, }
178 | # Specify the folder where the training data will be saved
179 | TrainingDataFolderLocation <- "/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/TrainingDataFromRavenSelectionTables/"
180 |   
181 | TrainingDataMFCC <- MFCCFunction(input.dir= TrainingDataFolderLocation, min.freq = 400, max.freq = 1600,win.avg="standard")
182 |   
183 |   TrainingDataMFCC$class <- as.factor(TrainingDataMFCC$class)
184 | ```
185 |   
186 | ## Part 2b. Run DetectClassify  
187 | ```{r eval=FALSE, }
188 | 
189 |   TestFileDirectory <- '/Users/denaclink/Library/CloudStorage/Box-Box/gibbonRSampleFiles/GibbonTestFiles'
190 |   
191 |   OutputDirectory <-  "/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/DetectAndClassifyOutput"
192 |   
193 |   DetectAndClassify(input=TestFileDirectory,
194 |                     input.type='directory',
195 |                     feature.df=TrainingDataMFCC,
196 |                     model.type.list=c('SVM','RF'),
197 |                     tune = TRUE,
198 |                     short.wav.duration=300,
199 |                     target.signal = c("female.gibbon"),
200 |                     min.freq = 400, max.freq = 1600,
201 |                     noise.quantile.val=0.15,
202 |                     time.window.number =3,
203 |                     n.windows = 9, num.cep = 12,
204 |                     spectrogram.window =160,
205 |                     pattern.split = ".wav",
206 |                     min.signal.dur = 3,
207 |                     max.sound.event.dur = 25,
208 |                     maximum.separation =1,
209 |                     probability.thresh.svm = 0.15,
210 |                     probability.thresh.rf = 0.15,
211 |                     wav.output = "TRUE",
212 |                     output.dir =OutputDirectory,
213 |                     swift.time=TRUE,time.start=5,time.stop=10,
214 |                     write.csv.output=FALSE,verbose=TRUE,
215 |                     random.sample='NA')
216 |   
217 | 
218 | ```
219 | 
220 | # Part 3. Calculate performance metrics
221 | 
222 | ## Part 3a. Prepare data for performance metrics
223 | ```{r eval=FALSE, }
224 | # Set location of test file selection tables
225 | input.dir.text.files <- "/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/SelectionTables/GibbonTestSelectionTables"
226 | 
227 | Annotatedfiles <- list.files(input.dir.text.files,full.names = T)
228 | 
229 | ListOfAnnotatedFilesShort <- list.files(input.dir.text.files,full.names = F)
230 | 
231 | nslash <- str_count(Annotatedfiles,pattern = '/')[1]+1
232 | snames <- str_split_fixed(Annotatedfiles,pattern = '/',n=nslash)[,nslash]
233 | 
234 | all.detections <- data.frame()
235 | for(x in 1:length(Annotatedfiles)){
236 |   temp.table <- read.delim2(Annotatedfiles[x],fill = T,header =T)
237 |   file.name <- str_split_fixed(snames[x],pattern = '[.]',n=2)[,1]
238 |   recorder <- str_split_fixed(file.name,pattern='_',n=3)[,1]
239 |   date <- str_split_fixed(file.name,pattern='_',n=3)[,2]
240 |   time <- str_split_fixed(file.name,pattern='_',n=3)[,3]
241 |   
242 |   if(nrow(temp.table >0)){
243 |     temp.table.updated <- cbind.data.frame(file.name,recorder,date,time,temp.table)
244 |   } else {
245 |     temp.row <- as.data.frame(t(rep('NA',ncol(temp.table))))
246 |     colnames(temp.row) <- colnames(temp.table)
247 |     temp.table.updated <- cbind.data.frame(file.name,recorder,date,time,temp.row)
248 |     
249 |   }
250 |   all.detections <- rbind.data.frame(all.detections,temp.table.updated)
251 | }
252 | 
253 | ```
254 | 
255 | ## Part 3b. Identify true and false positives
256 | ```{r eval=FALSE, }
257 |   OutputDirectory <-  "/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/DetectAndClassifyOutput"
258 |     
259 |   all.combinedprecision.recall.randomiter <- data.frame()
260 |   range.secs.start <- 6
261 |   range.secs.end <- 6
262 |   
263 |   ### Detections using band-limited energy summation
264 |   gibbondetects <- OutputDirectory
265 |   list.ml <-  list.files(gibbondetects, full.names = T, pattern='.wav')
266 | 
267 |   
268 |   # Need to focus on gibbons for this validation
269 |   nslash <- str_count(list.ml[[1]],'/')+1
270 |   list.ml.signals <- str_split_fixed(list.ml,pattern = '/',n=nslash)[,nslash]
271 |   
272 |   list.ml.signals <- str_split_fixed(list.ml.signals,pattern = '_',n=5)[,4]
273 |   
274 |   
275 |   list.ml <- 
276 |     list.ml[which(list.ml.signals=='female.gibbon')]
277 |   
278 |   
279 |   ml.detection.df <- data.frame()
280 |   
281 |   for(y in 1:length(list.ml)){
282 |     L.wav <- list.ml[[y]]
283 |     n.slash  <- str_count(L.wav, pattern = "/")[1] + 1
284 |     
285 |     det.file.name <- str_split_fixed(L.wav,"/",n=n.slash)[,n.slash]
286 |     det.file.name <- str_split_fixed(det.file.name,".wav",n=2)[,1]
287 |     
288 |     file.name <- paste(str_split_fixed(det.file.name,"_",n=5)[,1],str_split_fixed(det.file.name,"_",n=5)[,2],
289 |                        str_split_fixed(det.file.name,"_",n=5)[,3], sep='_')
290 |     det.date <- str_split_fixed(det.file.name,"_",n=5)[,2]
291 |     det.time <- str_split_fixed(det.file.name,"_",n=5)[,3]
292 |     det.swift <- str_split_fixed(det.file.name,"_",n=5)[,1]
293 |     det.time.start <- as.numeric(str_split_fixed(det.file.name,"_",n=9)[,6])
294 |     det.time.end <- as.numeric(str_split_fixed(det.file.name,"_",n=9)[,7])
295 |     probability <- str_split_fixed(det.file.name,"_",n=8)[,8]
296 |     ml.algorithm <- str_split_fixed(det.file.name,"_",n=7)[,5]
297 |     
298 |     detections.df <- cbind.data.frame(file.name,det.swift, det.date, det.time,det.time.start,det.time.end,probability,ml.algorithm)
299 |     
300 |     ml.detection.df <- rbind.data.frame(ml.detection.df,detections.df)
301 |   }
302 |   
303 |   
304 |   recall.snr.all.df <- data.frame()
305 |   for(x in 1:nrow(ml.detection.df)){
306 |     all.detections.subset <- ml.detection.df[x,]
307 |     validate.detect.subset <-subset(all.detections,file.name==as.character(all.detections.subset$file.name))
308 |     validate.detect.subset$Begin.Time..s. <- as.numeric(validate.detect.subset$Begin.Time..s.)
309 |     min.start.time <- as.numeric(all.detections.subset$det.time.start)-range.secs.start
310 |     max.start.time <- as.numeric(all.detections.subset$det.time.start)+range.secs.end
311 |     
312 |     detections.ml <- subset(validate.detect.subset, Begin.Time..s.>min.start.time & Begin.Time..s.< max.start.time)
313 |     
314 |       if(nrow(detections.ml)>0){
315 |       all.detections.subset$class.label <- '1'
316 |       } else{
317 |         all.detections.subset$class.label <- '-1'
318 |       }
319 |    
320 |     recall.snr.all.df <- rbind.data.frame(recall.snr.all.df,all.detections.subset)
321 |   }
322 |   
323 | 
324 | ```
325 | ## Part 3c. Calculate and plot performance metrics using 'ROCR'
326 | 
327 | ```{r eval=FALSE, }
328 | library(ROCR)
329 | 
330 | auc.df <- data.frame()
331 | performance.df <- data.frame()
332 | 
333 |   
334 |   ml.index <- unique(recall.snr.all.df$ml.algorithm)
335 |   for(m in 1:length(ml.index)){
336 |   
337 |     temp.subset <-
338 |     subset(recall.snr.all.df,
339 |            ml.algorithm==ml.index[m])
340 |   
341 |   predictions <- as.numeric(temp.subset$probability)
342 |   labels <- (temp.subset$class.label)
343 |   pred <- prediction(predictions, labels)
344 |   perf <- performance(pred, "rec", "prec")
345 |   perfauc <- performance(pred, "aucpr")
346 |   Precision <- perf@x.values[[1]]
347 |   Recall <- perf@y.values[[1]]
348 |   Threshold <- perf@alpha.values[[1]]
349 |   AUC <- perfauc@y.values[[1]]
350 |   perfF1 <- performance(pred, "f")
351 |   F1 <-  perfF1@y.values[[1]]
352 |   print(AUC)
353 |   ml.algorithm <- ml.index[m]
354 |   tempauc <- cbind.data.frame(AUC,ml.algorithm)
355 |   auc.df <- rbind.data.frame(auc.df,tempauc)
356 |   
357 |   temp.performance <- cbind.data.frame(Precision,Recall,Threshold,F1,ml.algorithm)
358 |   performance.df <- rbind.data.frame(performance.df,temp.performance)
359 |   
360 |   perf <- performance(pred, "prec", "rec")
361 | 
362 |   plot(perf,
363 |      avg= "threshold",
364 |      colorize=TRUE,
365 |      lwd= 3,
366 |      main= paste(ml.index[m],'Precision/Recall'))
367 |   
368 | plot(perf,
369 |      lty=3,
370 |      col="grey78",
371 |      add=TRUE)
372 | 
373 |   }  
374 | 
375 | ```
376 | 
377 | # Part 4. Unsupervised clustering 
378 | ## Part 4a. Create a UMAP plot colored by class
379 | ```{r eval=FALSE, }
380 | library(gibbonR)
381 | library(ggpubr)
382 | UMAPBiplotAddSpectrograms(input.dir.Focal="/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/MultipleSoundClasses/",output.dir.Focal="/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/MultipleSoundClasses/Thumbnails/",add.spectrograms=TRUE,min.freq=400,max.freq=1600,main="UMAP Plot")
383 |   
384 | ```
385 | ## Part 4b. Create a UMAP plot colored by affinity propagation clustering
386 | ```{r eval=FALSE, }
387 | library(gibbonR)
388 | library(ggpubr)
389 | library(apcluster)
390 | AffinityBiplotAddSpectrograms(input.dir.Focal="/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/MultipleSoundClasses/",output.dir.Focal="/Users/denaclink/Desktop/RStudio Projects/gibbonR/data/MultipleSoundClasses/Thumbnails/",class='fixed', q.fixed=0.1,add.spectrograms=TRUE,min.freq=400,max.freq=1600,main="UMAP Plot")
391 |   
392 | ```
393 | 
394 | 


--------------------------------------------------------------------------------
/.Rhistory:
--------------------------------------------------------------------------------
  1 | c("Dim.1", "Dim.2", "Class")
  2 | plot.for.AcousticSignalsMFCC.F$Class <- as.factor(plot.for.AcousticSignalsMFCC.F$Class)
  3 | Plot1Females <- ggpubr::ggscatter(data = plot.for.AcousticSignalsMFCC.F,x = "Dim.1",
  4 | y = "Dim.2",
  5 | color  = "Class", alpha=0.4)+ggtitle('Recording units')+
  6 | theme(axis.text.x=element_blank(), #remove x axis labels
  7 | axis.ticks.x=element_blank(), #remove x axis ticks
  8 | axis.text.y=element_blank(),  #remove y axis labels
  9 | axis.ticks.y=element_blank()  #remove y axis ticks
 10 | )
 11 | Plot1Females
 12 | # Unsupervised clustering -------------------------------------------------
 13 | library(apcluster)
 14 | aricode::NMI(as.factor(AcousticSignalsAPFemales@idx),trainingdataFemalesUpdate$class)
 15 | # Adaptive returns q=0.1
 16 | q.val.seq <- seq(from=0.1,to=0.9,by=0.1)
 17 | AcousticSignal.sil.df <- data.frame()
 18 | for(a in 1:length(q.val.seq)){
 19 | print(a)
 20 | AcousticSignalsAP <-
 21 | apcluster::apcluster(negDistMat(r=2),q=q.val.seq[a],
 22 | trainingdataFemalesUpdate[,-c(1,179)],
 23 | maxits=100000,convits=10000)
 24 | sil <-
 25 | cluster::silhouette(x = AcousticSignalsAP@idx,
 26 | dist = dist( trainingdataFemalesUpdate[,-c(1,179)]))
 27 | sil.val <- (summary(sil)$avg.width)
 28 | temp.sil.df <-  cbind.data.frame(sil.val,q.val.seq[a])
 29 | AcousticSignal.sil.df <- rbind.data.frame(AcousticSignal.sil.df,temp.sil.df)
 30 | }
 31 | MaxSil <- which.max(AcousticSignal.sil.df$sil.val)
 32 | max(AcousticSignal.sil.df$sil.val)
 33 | AcousticSignalsAPFemales <-
 34 | apcluster::apcluster(negDistMat(r=2),q= q.val.seq[MaxSil],
 35 | trainingdataFemalesUpdate[,-c(1,179)],
 36 | maxits=100000,convits=10000)
 37 | AcousticSignals.umap.F <-
 38 | umap::umap(trainingdataFemales[,-c(1,179)],
 39 | #labels=as.factor( as.numeric(AcousticSignalsAPFemales@idx)),
 40 | controlscale=TRUE,scale=3)
 41 | length(AcousticSignalsAPFemales@exemplars)
 42 | plot.for.AcousticSignals.F <-
 43 | cbind.data.frame(AcousticSignals.umap.F$layout[,1:2],#VGGishDF$PercentClass,
 44 | as.factor( as.numeric(AcousticSignalsAPFemales@idx)))
 45 | colnames(plot.for.AcousticSignals.F) <-
 46 | c("Dim.1", "Dim.2","Cluster")
 47 | plot.for.AcousticSignals.F$Cluster <- as.factor(plot.for.AcousticSignals.F$Cluster)
 48 | plot.for.AcousticSignals.F$Class <- trainingdataFemales$Class
 49 | Plot2Females <- ggpubr::ggscatter(data = plot.for.AcousticSignals.F,x = "Dim.1",
 50 | y = "Dim.2",
 51 | color='Cluster', alpha=0.4) + guides(color='none')+ggtitle('Affinity propagation')+
 52 | theme(axis.text.x=element_blank(), #remove x axis labels
 53 | axis.ticks.x=element_blank(), #remove x axis ticks
 54 | axis.text.y=element_blank(),  #remove y axis labels
 55 | axis.ticks.y=element_blank()  #remove y axis ticks
 56 | )
 57 | Plot2Females
 58 | AcousticSignalsAPFemales <-
 59 | apcluster::apcluster(negDistMat(r=2),q=0.1,# q.val.seq[MaxSil],
 60 | trainingdataFemalesUpdate[,-c(1,179)],
 61 | maxits=100000,convits=10000)
 62 | AcousticSignals.umap.F <-
 63 | umap::umap(trainingdataFemales[,-c(1,179)],
 64 | #labels=as.factor( as.numeric(AcousticSignalsAPFemales@idx)),
 65 | controlscale=TRUE,scale=3)
 66 | length(AcousticSignalsAPFemales@exemplars)
 67 | plot.for.AcousticSignals.F <-
 68 | cbind.data.frame(AcousticSignals.umap.F$layout[,1:2],#VGGishDF$PercentClass,
 69 | as.factor( as.numeric(AcousticSignalsAPFemales@idx)))
 70 | colnames(plot.for.AcousticSignals.F) <-
 71 | c("Dim.1", "Dim.2","Cluster")
 72 | plot.for.AcousticSignals.F$Cluster <- as.factor(plot.for.AcousticSignals.F$Cluster)
 73 | plot.for.AcousticSignals.F$Class <- trainingdataFemales$Class
 74 | Plot2Females <- ggpubr::ggscatter(data = plot.for.AcousticSignals.F,x = "Dim.1",
 75 | y = "Dim.2",
 76 | color='Cluster', alpha=0.4) + guides(color='none')+ggtitle('Affinity propagation')+
 77 | theme(axis.text.x=element_blank(), #remove x axis labels
 78 | axis.ticks.x=element_blank(), #remove x axis ticks
 79 | axis.text.y=element_blank(),  #remove y axis labels
 80 | axis.ticks.y=element_blank()  #remove y axis ticks
 81 | )
 82 | Plot2Females
 83 | trainingdataFemales
 84 | # Female individuals ------------------------------------------------------
 85 | source('R/MFCCFunctionMeanSD.R')
 86 | # Female individuals ------------------------------------------------------
 87 | source('gibbonR/R/MFCCFunctionMeanSD.R')
 88 | # Female individuals ------------------------------------------------------
 89 | source('gibbonR/R/MFCCFunction.R')
 90 | # Female individuals ------------------------------------------------------
 91 | source('R/MFCCFunction.R')
 92 | subset.directory <- '/Users/denaclink/Desktop/RStudio Projects/gibbonID/data/FemaleGibbonsSwiftHQ/'
 93 | trainingdataFemales <- MFCCFunction(input.dir=subset.directory , min.freq = 400, max.freq = 1600,win.avg = 'mean.sd')
 94 | trainingdataFemalesnames <- list.files(subset.directory,
 95 | full.names = F,pattern = '.wav')
 96 | trainingdataFemales$Class <- str_split_fixed(trainingdataFemalesnames,pattern = '_',n=2)[,1]
 97 | trainingdataFemales$Class <- as.factor(trainingdataFemales$Class)
 98 | MetaData <- read.csv("/Users/denaclink/Desktop/RStudio Projects/T0010_SEAsia2018_2019.csv")
 99 | # Save as new object
100 | trainingdataFemalesUpdate <- data.frame()
101 | UniqueClass <- unique(trainingdataFemales$Class)
102 | for(b in 1:length(UniqueClass)){
103 | TempClass <-  UniqueClass[b]
104 | TempMeta <- subset(MetaData,Deployment.Comments==TempClass)
105 | trainingdataFemalessub <- subset(trainingdataFemales,class==TempClass)
106 | trainingdataFemalessub$lat <- TempMeta$LAT..decimal.degrees.
107 | trainingdataFemalessub$lon <- TempMeta$LON..decimal.degrees.
108 | trainingdataFemalesUpdate <- rbind.data.frame(trainingdataFemalesUpdate, trainingdataFemalessub)
109 | }
110 | head(trainingdataFemalesUpdate)
111 | AcousticSignalsMFCC.umap.F <-
112 | umap::umap(trainingdataFemalesUpdate[,-c(1,50)],
113 | #labels=as.factor(trainingdataFemales$Class),
114 | controlscale=TRUE,scale=3)
115 | colnames(trainingdataFemalesUpdate)
116 | AcousticSignalsMFCC.umap.F <-
117 | umap::umap(trainingdataFemalesUpdate[,-c(1,51)],
118 | #labels=as.factor(trainingdataFemales$Class),
119 | controlscale=TRUE,scale=3)
120 | plot.for.AcousticSignalsMFCC.F <-
121 | cbind.data.frame(AcousticSignalsMFCC.umap.F$layout[,1:2],
122 | trainingdataFemalesUpdate$class)
123 | colnames(plot.for.AcousticSignalsMFCC.F) <-
124 | c("Dim.1", "Dim.2", "Class")
125 | plot.for.AcousticSignalsMFCC.F$Class <- as.factor(plot.for.AcousticSignalsMFCC.F$Class)
126 | Plot1Females <- ggpubr::ggscatter(data = plot.for.AcousticSignalsMFCC.F,x = "Dim.1",
127 | y = "Dim.2",
128 | color  = "Class", alpha=0.4)+ggtitle('Recording units')+
129 | theme(axis.text.x=element_blank(), #remove x axis labels
130 | axis.ticks.x=element_blank(), #remove x axis ticks
131 | axis.text.y=element_blank(),  #remove y axis labels
132 | axis.ticks.y=element_blank()  #remove y axis ticks
133 | )
134 | Plot1Females
135 | # Unsupervised clustering -------------------------------------------------
136 | library(apcluster)
137 | aricode::NMI(as.factor(AcousticSignalsAPFemales@idx),trainingdataFemalesUpdate$class)
138 | # Adaptive returns q=0.1
139 | q.val.seq <- seq(from=0.1,to=0.9,by=0.1)
140 | AcousticSignal.sil.df <- data.frame()
141 | for(a in 1:length(q.val.seq)){
142 | print(a)
143 | AcousticSignalsAP <-
144 | apcluster::apcluster(negDistMat(r=2),q=q.val.seq[a],
145 | trainingdataFemalesUpdate[,-c(1,51)],
146 | maxits=100000,convits=10000)
147 | sil <-
148 | cluster::silhouette(x = AcousticSignalsAP@idx,
149 | dist = dist( trainingdataFemalesUpdate[,-c(1,51)]))
150 | sil.val <- (summary(sil)$avg.width)
151 | temp.sil.df <-  cbind.data.frame(sil.val,q.val.seq[a])
152 | AcousticSignal.sil.df <- rbind.data.frame(AcousticSignal.sil.df,temp.sil.df)
153 | }
154 | MaxSil <- which.max(AcousticSignal.sil.df$sil.val)
155 | max(AcousticSignal.sil.df$sil.val)
156 | MaxSil <- which.max(AcousticSignal.sil.df$sil.val)
157 | max(AcousticSignal.sil.df$sil.val)
158 | AcousticSignalsAPFemales <-
159 | apcluster::apcluster(negDistMat(r=2),q.val.seq[MaxSil],
160 | trainingdataFemalesUpdate[,-c(1,51)],
161 | maxits=100000,convits=10000)
162 | AcousticSignals.umap.F <-
163 | umap::umap(trainingdataFemales[,-c(1,179)],
164 | #labels=as.factor( as.numeric(AcousticSignalsAPFemales@idx)),
165 | controlscale=TRUE,scale=3)
166 | length(AcousticSignalsAPFemales@exemplars)
167 | plot.for.AcousticSignals.F <-
168 | cbind.data.frame(AcousticSignals.umap.F$layout[,1:2],#VGGishDF$PercentClass,
169 | as.factor( as.numeric(AcousticSignalsAPFemales@idx)))
170 | colnames(plot.for.AcousticSignals.F) <-
171 | c("Dim.1", "Dim.2","Cluster")
172 | plot.for.AcousticSignals.F$Cluster <- as.factor(plot.for.AcousticSignals.F$Cluster)
173 | plot.for.AcousticSignals.F$Class <- trainingdataFemales$Class
174 | Plot2Females <- ggpubr::ggscatter(data = plot.for.AcousticSignals.F,x = "Dim.1",
175 | y = "Dim.2",
176 | color='Cluster', alpha=0.4) + guides(color='none')+ggtitle('Affinity propagation')+
177 | theme(axis.text.x=element_blank(), #remove x axis labels
178 | axis.ticks.x=element_blank(), #remove x axis ticks
179 | axis.text.y=element_blank(),  #remove y axis labels
180 | axis.ticks.y=element_blank()  #remove y axis ticks
181 | )
182 | Plot2Females
183 | AcousticSignals.umap.F <-
184 | umap::umap(trainingdataFemalesUpdate[,-c(1,51)],
185 | #labels=as.factor( as.numeric(AcousticSignalsAPFemales@idx)),
186 | controlscale=TRUE,scale=3)
187 | length(AcousticSignalsAPFemales@exemplars)
188 | plot.for.AcousticSignals.F <-
189 | cbind.data.frame(AcousticSignals.umap.F$layout[,1:2],#VGGishDF$PercentClass,
190 | as.factor( as.numeric(AcousticSignalsAPFemales@idx)))
191 | colnames(plot.for.AcousticSignals.F) <-
192 | c("Dim.1", "Dim.2","Cluster")
193 | plot.for.AcousticSignals.F$Cluster <- as.factor(plot.for.AcousticSignals.F$Cluster)
194 | plot.for.AcousticSignals.F$Class <- trainingdataFemales$Class
195 | Plot2Females <- ggpubr::ggscatter(data = plot.for.AcousticSignals.F,x = "Dim.1",
196 | y = "Dim.2",
197 | color='Cluster', alpha=0.4) + guides(color='none')+ggtitle('Affinity propagation')+
198 | theme(axis.text.x=element_blank(), #remove x axis labels
199 | axis.ticks.x=element_blank(), #remove x axis ticks
200 | axis.text.y=element_blank(),  #remove y axis labels
201 | axis.ticks.y=element_blank()  #remove y axis ticks
202 | )
203 | Plot2Females
204 | AcousticSignals.umap.F <-
205 | umap::umap(trainingdataFemalesUpdate[,-c(1,51)],
206 | #labels=as.factor( as.numeric(AcousticSignalsAPFemales@idx)),
207 | controlscale=TRUE,scale=3)
208 | length(AcousticSignalsAPFemales@exemplars)
209 | plot.for.AcousticSignals.F <-
210 | cbind.data.frame(AcousticSignals.umap.F$layout[,1:2],#VGGishDF$PercentClass,
211 | as.factor( as.numeric(AcousticSignalsAPFemales@idx)))
212 | colnames(plot.for.AcousticSignals.F) <-
213 | c("Dim.1", "Dim.2","Cluster")
214 | plot.for.AcousticSignals.F$Cluster <- as.factor(plot.for.AcousticSignals.F$Cluster)
215 | plot.for.AcousticSignals.F$Class <- trainingdataFemales$Class
216 | Plot2Females <- ggpubr::ggscatter(data = plot.for.AcousticSignals.F,x = "Dim.1",
217 | y = "Dim.2",
218 | color='Cluster', alpha=0.4) + guides(color='none')+ggtitle('Affinity propagation')+
219 | theme(axis.text.x=element_blank(), #remove x axis labels
220 | axis.ticks.x=element_blank(), #remove x axis ticks
221 | axis.text.y=element_blank(),  #remove y axis labels
222 | axis.ticks.y=element_blank()  #remove y axis ticks
223 | )
224 | Plot2Females
225 | AcousticSignals.umap.F <-
226 | umap::umap(trainingdataFemalesUpdate[,-c(1,51)],
227 | #labels=as.factor( as.numeric(AcousticSignalsAPFemales@idx)),
228 | controlscale=TRUE,scale=3)
229 | length(AcousticSignalsAPFemales@exemplars)
230 | plot.for.AcousticSignals.F <-
231 | cbind.data.frame(AcousticSignals.umap.F$layout[,1:2],#VGGishDF$PercentClass,
232 | as.factor( as.numeric(AcousticSignalsAPFemales@idx)))
233 | colnames(plot.for.AcousticSignals.F) <-
234 | c("Dim.1", "Dim.2","Cluster")
235 | plot.for.AcousticSignals.F$Cluster <- as.factor(plot.for.AcousticSignals.F$Cluster)
236 | plot.for.AcousticSignals.F$Class <- trainingdataFemales$Class
237 | Plot2Females <- ggpubr::ggscatter(data = plot.for.AcousticSignals.F,x = "Dim.1",
238 | y = "Dim.2",
239 | color='Cluster', alpha=0.4) + guides(color='none')+ggtitle('Affinity propagation')+
240 | theme(axis.text.x=element_blank(), #remove x axis labels
241 | axis.ticks.x=element_blank(), #remove x axis ticks
242 | axis.text.y=element_blank(),  #remove y axis labels
243 | axis.ticks.y=element_blank()  #remove y axis ticks
244 | )
245 | Plot2Females
246 | AcousticSignals.umap.F <-
247 | umap::umap(trainingdataFemalesUpdate[,-c(1,51)],
248 | #labels=as.factor( as.numeric(AcousticSignalsAPFemales@idx)),
249 | controlscale=TRUE,scale=3)
250 | length(AcousticSignalsAPFemales@exemplars)
251 | plot.for.AcousticSignals.F <-
252 | cbind.data.frame(AcousticSignals.umap.F$layout[,1:2],#VGGishDF$PercentClass,
253 | as.factor( as.numeric(AcousticSignalsAPFemales@idx)))
254 | colnames(plot.for.AcousticSignals.F) <-
255 | c("Dim.1", "Dim.2","Cluster")
256 | plot.for.AcousticSignals.F$Cluster <- as.factor(plot.for.AcousticSignals.F$Cluster)
257 | plot.for.AcousticSignals.F$Class <- trainingdataFemales$Class
258 | Plot2Females <- ggpubr::ggscatter(data = plot.for.AcousticSignals.F,x = "Dim.1",
259 | y = "Dim.2",
260 | color='Cluster', alpha=0.4) + guides(color='none')+ggtitle('Affinity propagation')+
261 | theme(axis.text.x=element_blank(), #remove x axis labels
262 | axis.ticks.x=element_blank(), #remove x axis ticks
263 | axis.text.y=element_blank(),  #remove y axis labels
264 | axis.ticks.y=element_blank()  #remove y axis ticks
265 | )
266 | Plot2Females
267 | set.seed(4)
268 | AcousticSignals.umap.F <-
269 | umap::umap(trainingdataFemalesUpdate[,-c(1,51)],
270 | #labels=as.factor( as.numeric(AcousticSignalsAPFemales@idx)),
271 | controlscale=TRUE,scale=3)
272 | length(AcousticSignalsAPFemales@exemplars)
273 | plot.for.AcousticSignals.F <-
274 | cbind.data.frame(AcousticSignals.umap.F$layout[,1:2],#VGGishDF$PercentClass,
275 | as.factor( as.numeric(AcousticSignalsAPFemales@idx)))
276 | colnames(plot.for.AcousticSignals.F) <-
277 | c("Dim.1", "Dim.2","Cluster")
278 | plot.for.AcousticSignals.F$Cluster <- as.factor(plot.for.AcousticSignals.F$Cluster)
279 | plot.for.AcousticSignals.F$Class <- trainingdataFemales$Class
280 | Plot2Females <- ggpubr::ggscatter(data = plot.for.AcousticSignals.F,x = "Dim.1",
281 | y = "Dim.2",
282 | color='Cluster', alpha=0.4) + guides(color='none')+ggtitle('Affinity propagation')+
283 | theme(axis.text.x=element_blank(), #remove x axis labels
284 | axis.ticks.x=element_blank(), #remove x axis ticks
285 | axis.text.y=element_blank(),  #remove y axis labels
286 | axis.ticks.y=element_blank()  #remove y axis ticks
287 | )
288 | Plot2Females
289 | trainingdataFemales <- MFCCFunction(input.dir=subset.directory , min.freq = 600, max.freq = 1400,win.avg = 'mean.sd')
290 | trainingdataFemalesnames <- list.files(subset.directory,
291 | full.names = F,pattern = '.wav')
292 | trainingdataFemales$Class <- str_split_fixed(trainingdataFemalesnames,pattern = '_',n=2)[,1]
293 | trainingdataFemales$Class <- as.factor(trainingdataFemales$Class)
294 | MetaData <- read.csv("/Users/denaclink/Desktop/RStudio Projects/T0010_SEAsia2018_2019.csv")
295 | # Save as new object
296 | trainingdataFemalesUpdate <- data.frame()
297 | UniqueClass <- unique(trainingdataFemales$Class)
298 | for(b in 1:length(UniqueClass)){
299 | TempClass <-  UniqueClass[b]
300 | TempMeta <- subset(MetaData,Deployment.Comments==TempClass)
301 | trainingdataFemalessub <- subset(trainingdataFemales,class==TempClass)
302 | trainingdataFemalessub$lat <- TempMeta$LAT..decimal.degrees.
303 | trainingdataFemalessub$lon <- TempMeta$LON..decimal.degrees.
304 | trainingdataFemalesUpdate <- rbind.data.frame(trainingdataFemalesUpdate, trainingdataFemalessub)
305 | }
306 | head(trainingdataFemalesUpdate)
307 | AcousticSignalsMFCC.umap.F <-
308 | umap::umap(trainingdataFemalesUpdate[,-c(1,51)],
309 | #labels=as.factor(trainingdataFemales$Class),
310 | controlscale=TRUE,scale=3)
311 | plot.for.AcousticSignalsMFCC.F <-
312 | cbind.data.frame(AcousticSignalsMFCC.umap.F$layout[,1:2],
313 | trainingdataFemalesUpdate$class)
314 | colnames(plot.for.AcousticSignalsMFCC.F) <-
315 | c("Dim.1", "Dim.2", "Class")
316 | plot.for.AcousticSignalsMFCC.F$Class <- as.factor(plot.for.AcousticSignalsMFCC.F$Class)
317 | Plot1Females <- ggpubr::ggscatter(data = plot.for.AcousticSignalsMFCC.F,x = "Dim.1",
318 | y = "Dim.2",
319 | color  = "Class", alpha=0.4)+ggtitle('Recording units')+
320 | theme(axis.text.x=element_blank(), #remove x axis labels
321 | axis.ticks.x=element_blank(), #remove x axis ticks
322 | axis.text.y=element_blank(),  #remove y axis labels
323 | axis.ticks.y=element_blank()  #remove y axis ticks
324 | )
325 | Plot1Females
326 | # Unsupervised clustering -------------------------------------------------
327 | library(apcluster)
328 | aricode::NMI(as.factor(AcousticSignalsAPFemales@idx),trainingdataFemalesUpdate$class)
329 | # Adaptive returns q=0.1
330 | q.val.seq <- seq(from=0.1,to=0.9,by=0.1)
331 | AcousticSignal.sil.df <- data.frame()
332 | for(a in 1:length(q.val.seq)){
333 | print(a)
334 | AcousticSignalsAP <-
335 | apcluster::apcluster(negDistMat(r=2),q=q.val.seq[a],
336 | trainingdataFemalesUpdate[,-c(1,51)],
337 | maxits=100000,convits=10000)
338 | sil <-
339 | cluster::silhouette(x = AcousticSignalsAP@idx,
340 | dist = dist( trainingdataFemalesUpdate[,-c(1,51)]))
341 | sil.val <- (summary(sil)$avg.width)
342 | temp.sil.df <-  cbind.data.frame(sil.val,q.val.seq[a])
343 | AcousticSignal.sil.df <- rbind.data.frame(AcousticSignal.sil.df,temp.sil.df)
344 | }
345 | MaxSil <- which.max(AcousticSignal.sil.df$sil.val)
346 | max(AcousticSignal.sil.df$sil.val)
347 | AcousticSignalsAPFemales <-
348 | apcluster::apcluster(negDistMat(r=2),q.val.seq[MaxSil],
349 | trainingdataFemalesUpdate[,-c(1,51)],
350 | maxits=100000,convits=10000)
351 | AcousticSignals.umap.F <-
352 | umap::umap(trainingdataFemalesUpdate[,-c(1,51)],
353 | #labels=as.factor( as.numeric(AcousticSignalsAPFemales@idx)),
354 | controlscale=TRUE,scale=3)
355 | length(AcousticSignalsAPFemales@exemplars)
356 | plot.for.AcousticSignals.F <-
357 | cbind.data.frame(AcousticSignals.umap.F$layout[,1:2],#VGGishDF$PercentClass,
358 | as.factor( as.numeric(AcousticSignalsAPFemales@idx)))
359 | colnames(plot.for.AcousticSignals.F) <-
360 | c("Dim.1", "Dim.2","Cluster")
361 | plot.for.AcousticSignals.F$Cluster <- as.factor(plot.for.AcousticSignals.F$Cluster)
362 | plot.for.AcousticSignals.F$Class <- trainingdataFemales$Class
363 | Plot2Females <- ggpubr::ggscatter(data = plot.for.AcousticSignals.F,x = "Dim.1",
364 | y = "Dim.2",
365 | color='Cluster', alpha=0.4) + guides(color='none')+ggtitle('Affinity propagation')+
366 | theme(axis.text.x=element_blank(), #remove x axis labels
367 | axis.ticks.x=element_blank(), #remove x axis ticks
368 | axis.text.y=element_blank(),  #remove y axis labels
369 | axis.ticks.y=element_blank()  #remove y axis ticks
370 | )
371 | Plot2Females
372 | q.val.seq[MaxSil]
373 | AcousticSignalsAPFemales <-
374 | apcluster::apcluster(negDistMat(r=2),q=0.1,#q.val.seq[MaxSil],
375 | trainingdataFemalesUpdate[,-c(1,51)],
376 | maxits=100000,convits=10000)
377 | AcousticSignals.umap.F <-
378 | umap::umap(trainingdataFemalesUpdate[,-c(1,51)],
379 | #labels=as.factor( as.numeric(AcousticSignalsAPFemales@idx)),
380 | controlscale=TRUE,scale=3)
381 | length(AcousticSignalsAPFemales@exemplars)
382 | plot.for.AcousticSignals.F <-
383 | cbind.data.frame(AcousticSignals.umap.F$layout[,1:2],#VGGishDF$PercentClass,
384 | as.factor( as.numeric(AcousticSignalsAPFemales@idx)))
385 | colnames(plot.for.AcousticSignals.F) <-
386 | c("Dim.1", "Dim.2","Cluster")
387 | plot.for.AcousticSignals.F$Cluster <- as.factor(plot.for.AcousticSignals.F$Cluster)
388 | plot.for.AcousticSignals.F$Class <- trainingdataFemales$Class
389 | Plot2Females <- ggpubr::ggscatter(data = plot.for.AcousticSignals.F,x = "Dim.1",
390 | y = "Dim.2",
391 | color='Cluster', alpha=0.4) + guides(color='none')+ggtitle('Affinity propagation')+
392 | theme(axis.text.x=element_blank(), #remove x axis labels
393 | axis.ticks.x=element_blank(), #remove x axis ticks
394 | axis.text.y=element_blank(),  #remove y axis labels
395 | axis.ticks.y=element_blank()  #remove y axis ticks
396 | )
397 | Plot2Females
398 | length(AcousticSignalsAPFemales@exemplars)
399 | file.choose()
400 | subsamps
401 | subsamps <- c(1,2,3)
402 | numeric(length(subsamps))
403 | remove.packages('gibbonR')
404 | # Add documentation opt+cmd+shfit+r
405 | library(devtools)
406 | library(ggpubr)
407 | library(rmarkdown)
408 | install_github("DenaJGibbon/gibbonR")
409 | remove.packages('gibbonR')
410 | # Load required libraries
411 | # Add documentation opt+cmd+shfit+r
412 | library(devtools)
413 | library(ggpubr)
414 | library(rmarkdown)
415 | install_github("DenaJGibbon/gibbonR")
416 | mfcc.vector.list <- vector("list", 10000)
417 | mfcc.vector.list
418 | remove.packages('gibbonR')
419 | library(devtools)
420 | library(ggpubr)
421 | library(rmarkdown)
422 | install_github("DenaJGibbon/gibbonR")
423 | remove.packages('gibbonR')
424 | install_github("DenaJGibbon/gibbonR")
425 | file.choose()
426 | 9*24
427 | library(plyr)
428 | library(stringr)
429 | library(ggpubr)
430 | library(apcluster)
431 | library(tuneR)
432 | library(aricode)
433 | library(clValid)
434 | library(gibbonR)
435 | library(dplyr)
436 | library(tidyr)
437 | library(ggpubr)
438 | set.seed(13)
439 | # Female individuals ------------------------------------------------------
440 | source('R/MFCCFunction.R')
441 | source('R/MFCCFunction.R')
442 | subset.directory <- '/Users/denaclink/Desktop/RStudio Projects/gibbonID/data/FemaleGibbonsSwiftHQ/'
443 | trainingdataFemales <- gibbonR::MFCCFunction(input.dir=subset.directory , min.freq = 600, max.freq = 1400,win.avg = 'standard')
444 | trainingdataFemalesnames <- list.files(subset.directory,
445 | full.names = F,pattern = '.wav')
446 | trainingdataFemales$Class <- str_split_fixed(trainingdataFemalesnames,pattern = '_',n=2)[,1]
447 | trainingdataFemales$Class <- as.factor(trainingdataFemales$Class)
448 | MetaData <- read.csv("/Users/denaclink/Desktop/RStudio Projects/T0010_SEAsia2018_2019.csv")
449 | # Save as new object
450 | trainingdataFemalesUpdate <- data.frame()
451 | UniqueClass <- unique(trainingdataFemales$Class)
452 | for(b in 1:length(UniqueClass)){
453 | TempClass <-  UniqueClass[b]
454 | TempMeta <- subset(MetaData,Deployment.Comments==TempClass)
455 | trainingdataFemalessub <- subset(trainingdataFemales,class==TempClass)
456 | trainingdataFemalessub$lat <- TempMeta$LAT..decimal.degrees.
457 | trainingdataFemalessub$lon <- TempMeta$LON..decimal.degrees.
458 | trainingdataFemalesUpdate <- rbind.data.frame(trainingdataFemalesUpdate, trainingdataFemalessub)
459 | }
460 | head(trainingdataFemalesUpdate)
461 | AcousticSignalsMFCC.umap.F <-
462 | umap::umap(trainingdataFemalesUpdate[,-c(1,179)],
463 | #labels=as.factor(trainingdataFemales$Class),
464 | controlscale=TRUE,scale=3)
465 | plot.for.AcousticSignalsMFCC.F <-
466 | cbind.data.frame(AcousticSignalsMFCC.umap.F$layout[,1:2],
467 | trainingdataFemalesUpdate$class)
468 | colnames(plot.for.AcousticSignalsMFCC.F) <-
469 | c("Dim.1", "Dim.2", "Class")
470 | plot.for.AcousticSignalsMFCC.F$Class <- as.factor(plot.for.AcousticSignalsMFCC.F$Class)
471 | Plot1Females <- ggpubr::ggscatter(data = plot.for.AcousticSignalsMFCC.F,x = "Dim.1",
472 | y = "Dim.2",
473 | color  = "Class", alpha=0.4)+ggtitle('Recording units')+
474 | theme(axis.text.x=element_blank(), #remove x axis labels
475 | axis.ticks.x=element_blank(), #remove x axis ticks
476 | axis.text.y=element_blank(),  #remove y axis labels
477 | axis.ticks.y=element_blank()  #remove y axis ticks
478 | )
479 | Plot1Females
480 | # Unsupervised clustering -------------------------------------------------
481 | library(apcluster)
482 | # Adaptive returns q=0.1
483 | q.val.seq <- seq(from=0.1,to=0.9,by=0.1)
484 | AcousticSignal.sil.df <- data.frame()
485 | for(a in 1:length(q.val.seq)){
486 | print(a)
487 | AcousticSignalsAP <-
488 | apcluster::apcluster(negDistMat(r=2),q=q.val.seq[a],
489 | trainingdataFemalesUpdate[,-c(1,179)],
490 | maxits=100000,convits=10000)
491 | sil <-
492 | cluster::silhouette(x = AcousticSignalsAP@idx,
493 | dist = dist( trainingdataFemalesUpdate[,-c(1,179)]))
494 | sil.val <- (summary(sil)$avg.width)
495 | temp.sil.df <-  cbind.data.frame(sil.val,q.val.seq[a])
496 | AcousticSignal.sil.df <- rbind.data.frame(AcousticSignal.sil.df,temp.sil.df)
497 | }
498 | MaxSil <- which.max(AcousticSignal.sil.df$sil.val)
499 | max(AcousticSignal.sil.df$sil.val)
500 | q.val.seq[MaxSil]
501 | AcousticSignalsAPFemales <-
502 | apcluster::apcluster(negDistMat(r=2),q=q.val.seq[MaxSil],
503 | trainingdataFemalesUpdate[,-c(1,179)],
504 | maxits=100000,convits=10000)
505 | length(AcousticSignalsAPFemales@exemplars)
506 | remove.packages('gibbonR')
507 | # Load required libraries
508 | # Add documentation opt+cmd+shfit+r
509 | library(devtools)
510 | library(ggpubr)
511 | library(rmarkdown)
512 | install_github("DenaJGibbon/gibbonR")
513 | 


--------------------------------------------------------------------------------
/R/gibbonR.R:
--------------------------------------------------------------------------------
  1 | #' gibbonR
  2 | #' @description This function identifies sound events using band-limited energy summation and then classifies the sound events using a trained support vector machine or random forest algorithm.
  3 | #' @usage {input, input.type='list', feature.df,model.type.list=c("SVM"), tune = FALSE, target.signal = "female.gibbon",
  4 | #' short.wav.duration=300,min.freq = 400, max.freq = 2000,
  5 | #' noise.quantile.val=0.5, minimum.separation =5, n.windows = 9, num.cep = 12, spectrogram.window =1600,
  6 | #' pattern.split = ".wav", min.signal.dur = 4, maximum.separation =1,max.sound.event.dur = 12,
  7 | #' probability.thresh.svm = 0.75, probability.thresh.rf = 0.75, wav.output = "TRUE", output.dir = getwd(),
  8 | #' swift.time=TRUE,time.start=6,time.stop=12, write.table.output=TRUE,verbose=TRUE, random.sample='NA'}
  9 | #' @param input Either full path to directory containing .wav files, a list of .wav files, or a the path to a single .wav file
 10 | #' @param input.type Either 'directory', 'list' or 'wav'
 11 | #' @param feature.df Data frame of features from labeled sound files; first column must be class labels
 12 | #' @param tune Logical; if want to use 'tune' function for SVM; NOTE: for large datasets adds significant computing time
 13 | #' @param target.signal Labeled signal(s) of interest from training data (feature.df); can include multiple classes.
 14 | #' @param min.freq Minimum frequency (Hz) of signal of interest
 15 | #' @param max.freq Maximum frequency (Hz) of signal of interest
 16 | #' @param n.windows Number of time windows to calculate for MFCCs
 17 | #' @param num.cep Number of cepstra coefficients to calculate for MFCCs
 18 | #' @param pattern.split Pattern to find and remove to create full sound file name; currently set to ".wav"
 19 | #' @param probability.thresh.svm Probability threshold (provided by SVM) to be considered as target signal
 20 | #' @param probability.thresh.rf Probability threshold (provided by RF) to be considered as target signal
 21 | #' @param model.type.list Which machine learning model to use; SVM or RF
 22 | #' @param short.wav.duration Duration (s) to divide longer sound file to increase processing efficiency
 23 | #' @param noise.quantile.val A quantile value between 0 to 1 for the band energy summation
 24 | #' @param minimum.separation The minimum number of consecutive time windows that signals must be separated by to be considered a separate sound event
 25 | #' @param maximum.separation The maximum number of consecutive time windows that signals must be separated by to be considered a separate sound event
 26 | #' @param spectrogram.window Window length for spectrogram analysis (input to spectro fuction from 'seewave')
 27 | #' @param min.signal.dur The minimum duration (s) sound events must be to be considered sound events
 28 | #' @param max.sound.event.dur The maximum duration (s) sound events must be to be considered sound events; NOTE this only happens when writing text file
 29 | #' @param wav.output Logical; output .wav files of detections in specified directory
 30 | #' @param swift.time If file name is in structure recorder_YYYYMMDD_HHMMSS can subset files based on specific times
 31 | #' @param time.start Time recordings start (hour)
 32 | #' @param time.stop Time recordings stop (hour)
 33 | #' @param write.table.output Logical; write Raven selection tables to output directory
 34 | #' @param verbose Logical; print out steps
 35 | #' @param random.sample If a random subset of files in a directory are desired specify a value, otherwise 'NA'
 36 | #' @param output.dir Specified output directory; set to current working directory
 37 | #' @details
 38 | #' @export
 39 | #' @import e1071
 40 | #' @import randomForest
 41 | #' @import tuneR
 42 | #' @import seewave
 43 | #' @import tuneR
 44 | #' @import stringr
 45 | #' @return If write.table.output=TRUE writes a .txt file for each sound file with detections
 46 | #' @return If write.table.output=TRUE writes a .txt file for each sound file with detections
 47 | #' @examples
 48 | #' \donttest{MFCCFunction(input.dir = "FocalRecordings",min.freq = 400,max.freq=2500)}
 49 | 
 50 | 
 51 | gibbonR <-
 52 |   function(input,
 53 |            input.type = 'list',
 54 |            feature.df,
 55 |            model.type.list = c("SVM"),
 56 |            tune = FALSE,
 57 |            target.signal = "female.gibbon",
 58 |            short.wav.duration = 300,
 59 |            min.freq = 400,
 60 |            max.freq = 2000,
 61 |            noise.quantile.val = 0.5,
 62 |            minimum.separation = 5,
 63 |            n.windows = 9,
 64 |            num.cep = 12,
 65 |            spectrogram.window = 1600,
 66 |            pattern.split = ".wav",
 67 |            min.signal.dur = 4,
 68 |            maximum.separation = 1,
 69 |            max.sound.event.dur = 12,
 70 |            probability.thresh.svm = 0.75,
 71 |            probability.thresh.rf = 0.75,
 72 |            wav.output = "TRUE",
 73 |            output.dir = getwd(),
 74 |            swift.time = TRUE,
 75 |            time.start = 6,
 76 |            time.stop = 12,
 77 |            write.table.output = TRUE,
 78 |            verbose = TRUE,
 79 |            random.sample = 'NA') {
 80 | 
 81 |     TrainingMatch <- match( target.signal,unique(feature.df$class) )
 82 | 
 83 | 
 84 |     if (any(is.na(TrainingMatch)) %in% TRUE) {
 85 |       print("Training data does not contain target signal")
 86 |     }
 87 | 
 88 | 
 89 |     if ((wav.output == "TRUE" & output.dir == ""))
 90 |       stop("Specify output directory")
 91 | 
 92 | 
 93 |     if (input.type == 'list') {
 94 |       list.file.input <- unlist(input)
 95 |       nslash <- str_count(input, pattern = '/') + 1
 96 |       list.file.input.short <-
 97 |         str_split_fixed(input, pattern = '/', nslash)[, nslash]
 98 |     }
 99 | 
100 |     if (input.type == "directory") {
101 |       list.file.input <-
102 |         list.files(input, full.names = TRUE, recursive = T)
103 |       list.file.input.short <-
104 |         list.files(input, full.names = FALSE, recursive = T)
105 |     }
106 | 
107 |     if (input.type == "wav") {
108 |       list.file.input <- input
109 |     }
110 | 
111 |     if (swift.time == TRUE) {
112 |       number.of.slash <- str_count(list.file.input, pattern = "/")[1]
113 |       base.file.name.all <-
114 |         str_split_fixed(list.file.input,
115 |                         pattern = "/",
116 |                         n = (number.of.slash + 1))[, number.of.slash + 1]
117 |       temp.name.all <-
118 |         stringr::str_split_fixed(base.file.name.all, pattern = pattern.split, n = 2)[, 1]
119 |       times <- str_split_fixed(temp.name.all, pattern = '_', n = 3)[, 3]
120 |       times <- as.numeric(substr(times, start = 1, stop = 2))
121 |       list.file.input <-
122 |         list.file.input[which(times >= time.start & times <= time.stop)]
123 |     }
124 | 
125 |     if (length(list.file.input) == 0) {
126 |       print("No sound files detected")
127 |       break
128 |     }
129 | 
130 |     if (is.numeric(random.sample) == TRUE) {
131 |       list.file.input <-
132 |         list.file.input[sample(1:length(list.file.input), random.sample, replace =
133 |                                  F)]
134 |     }
135 | 
136 |     print("Machine learning in progress...")
137 | 
138 |     if ("SVM" %in% model.type.list == TRUE) {
139 |       print("SVM in progress...")
140 |       start_time <- Sys.time()
141 |       if (tune == TRUE) {
142 |         ## SVM classification
143 | 
144 |         tune.rad <-
145 |           e1071::tune(
146 |             svm,
147 |             feature.df[, 2:ncol(feature.df)],
148 |             feature.df$class,
149 |             kernel = "radial",
150 |             tunecontrol = tune.control(cross = 5),
151 |             ranges = list(
152 |               cost = c(0.001, 0.01, 0.1, 1, 2,
153 |                        10, 100, 1000),
154 |               gamma = c(0.01, 0.1, 0.5, 1, 2)
155 |             )
156 |           )
157 | 
158 | 
159 |         ml.model.svm <-
160 |           e1071::svm(
161 |             feature.df[, 2:ncol(feature.df)],
162 |             feature.df$class,
163 |             kernel = "radial",
164 |             gamma = tune.rad$best.parameters$gamma,
165 |             cost = tune.rad$best.parameters$cost,
166 |             cross = 20,
167 |             probability = TRUE
168 |           )
169 | 
170 |       } else {
171 |         ml.model.svm <-
172 |           e1071::svm(
173 |             feature.df[, 2:ncol(feature.df)],
174 |             feature.df$class,
175 |             kernel = "radial",
176 |             gamma = 0.01,
177 |             cost = 2,
178 |             cross = 25,
179 |             probability = TRUE
180 |           )
181 |       }
182 |       print(paste('SVM accuracy', ml.model.svm$tot.accuracy))
183 |       end_time <- Sys.time()
184 |       print(end_time - start_time)
185 |     }
186 | 
187 | 
188 |     if ("RF" %in% model.type.list == TRUE) {
189 |       print("RF in progress...")
190 |       tryCatch({
191 |         start_time <- Sys.time()
192 | 
193 |         ml.model.rf <-
194 |           randomForest::randomForest(x = feature.df[, 2:ncol(feature.df)], y = feature.df$class)
195 | 
196 | 
197 |         print(ml.model.rf)
198 | 
199 |         end_time <- Sys.time()
200 |         print(end_time - start_time)
201 |       }, error = function(e) {
202 |         cat("ERROR :", conditionMessage(e), "\n")
203 |       })
204 |     }
205 | 
206 |     print(paste("Classifying for target signal", c(target.signal)))
207 | 
208 | 
209 |     for (i in 1:length(list.file.input)) {
210 |       model.results.list <- list()
211 |       RavenSelectionTableDF <- data.frame()
212 |       tryCatch({
213 |         start_time <- Sys.time()
214 |         contains.slash <- str_detect(list.file.input[i], pattern = "/")
215 | 
216 |         if (contains.slash == 'TRUE') {
217 |           number.of.slash <- str_count(list.file.input[i], pattern = "/")
218 |           base.file.name <-
219 |             str_split_fixed(list.file.input[i],
220 |                             pattern = "/",
221 |                             n = (number.of.slash + 1))[, number.of.slash + 1]
222 |           temp.name <-
223 |             stringr::str_split_fixed(base.file.name, pattern = pattern.split, n = 2)[1]
224 |         } else{
225 |           temp.name <-
226 |             stringr::str_split_fixed(list.file.input[i], pattern = pattern.split, n = 2)[1]
227 | 
228 |         }
229 | 
230 |         # Convert .wav file to spectrogram
231 |         if (verbose == TRUE) {
232 |           print(paste(
233 |             "Computing spectrogram for file",
234 |             temp.name,
235 |             i,
236 |             'out of',
237 |             length(list.file.input)
238 |           ))
239 |         }
240 | 
241 |         temp.wav <- readWave(list.file.input[i])
242 | 
243 |         sound_length <-
244 |           round(length(temp.wav@left) / temp.wav@samp.rate, 2)
245 |         cutwave.list <-
246 |           c(seq(
247 |             from = 1,
248 |             to = (sound_length),
249 |             by = short.wav.duration
250 |           ), sound_length)
251 | 
252 |         short.sound.files <- lapply( 1:(length(cutwave.list) - 1),
253 |                                     function(i)
254 |                                       extractWave(
255 |                                         temp.wav,
256 |                                         from = cutwave.list[i],
257 |                                         to = cutwave.list[i +
258 |                                                             1],
259 |                                         xunit = c("time"),
260 |                                         plot = F,
261 |                                         output = "Wave"
262 |                                       ))
263 | 
264 |         print('Running detector over sound files')
265 |         for (j in 1:length(short.sound.files)) {
266 |           swift.spectro <-
267 |             spectro(
268 |               short.sound.files[[j]],
269 |               wl = spectrogram.window,
270 |               overlap = 0,
271 |               plot = F
272 |             )
273 | 
274 | 
275 | 
276 |           # Identify the frequency band of interest
277 |           min.freq.cols <-
278 |             which.min(abs(round(swift.spectro$freq, digits = 2) - (min.freq / 1000)))
279 |           max.freq.cols <-
280 |             which.min(abs(round(swift.spectro$freq, digits = 2) - (max.freq / 1000)))
281 | 
282 | 
283 |           # Calculate the column sums for each time window
284 |           col.sum <-
285 |             colSums(swift.spectro$amp[min.freq.cols:max.freq.cols,])
286 | 
287 | 
288 |           # Calculate noise value
289 |           noise.value <-
290 |             quantile(unlist(col.sum), c(noise.quantile.val))
291 | 
292 |           # Determine which columns are above specified cutoff
293 |           list.sub <- which(col.sum > noise.value)
294 | 
295 |           if (minimum.separation != 1) {
296 |             # Find length differences between columns that match the specified cutoff
297 |             detection.differences <-
298 |               unlist(lapply(1:(length(list.sub) - 1),
299 |                             function(i)
300 |                               c(list.sub[i + 1] - list.sub[i])))
301 |             #
302 |             detection.separation.list <-
303 |               which(detection.differences >= minimum.separation)
304 | 
305 |             # Add one to remove
306 |             detection.separation.list <-
307 |               c(1, detection.separation.list + 1)
308 |             call.timing <- list()
309 |             for (x in 1:(length(detection.separation.list) - 1)) {
310 |               start.index <- detection.separation.list[x]
311 |               finish.index <- detection.separation.list[x + 1]
312 |               call.timing[[x]] <-
313 |                 list.sub[start.index]:list.sub[finish.index]
314 |             }
315 | 
316 | 
317 |           } else {
318 |             call.timing <- split(list.sub, cumsum(c(1, diff(list.sub)) != 1))
319 |           }
320 | 
321 |           # Calculate minimum signal duration to be considered signal
322 |           if( length(which(swift.spectro$time > 1))>0){
323 |           number.time.windows.1sec <- min(which(swift.spectro$time > 1))
324 |           signal.dur <- number.time.windows.1sec * min.signal.dur
325 | 
326 |           # Combine all potential sound events into a list
327 |           call.timing.list <-
328 |             as.list(call.timing[which(sapply(call.timing, length) > signal.dur)])
329 | 
330 |           # If user indicated maximum duration create list of sound events under certain duration
331 |           if (max.sound.event.dur != 'NULL') {
332 |             sound.event.index.max <-
333 |               which.min(abs(swift.spectro$time - max.sound.event.dur))
334 |             call.timing.list <-
335 |               call.timing.list[which(sapply(call.timing.list, length) < sound.event.index.max)]
336 |           }
337 |           } else{
338 |   call.timing.list <- list()
339 | }
340 | 
341 |           if (length(call.timing.list) >= 1) {
342 |             subsamps <- lapply(1:length(call.timing.list),
343 |                                function(i)
344 |                                  extractWave(
345 |                                    short.sound.files[[j]],
346 |                                    from = swift.spectro$time[min(call.timing.list[[i]])],
347 |                                    to = swift.spectro$time[max(call.timing.list[[i]])],
348 |                                    xunit = c("time"),
349 |                                    plot = F,
350 |                                    output = "Wave"
351 |                                  ))
352 | 
353 |             calltimes <- lapply(1:length(call.timing.list),
354 |                                 function(i)
355 |                                   cbind.data.frame(from = swift.spectro$time[min(call.timing.list[[i]])],
356 |                                                    to = swift.spectro$time[max(call.timing.list[[i]])]))
357 | 
358 | 
359 |             mfcc.list <- list()
360 |             temp.model.results.list.svm <- list()
361 |             temp.model.results.list.rf <- list()
362 |             for (y in 1:length(target.signal)) {
363 |             for (x in 1:length(subsamps)) {
364 | 
365 | 
366 |                 calltimes.subset <- calltimes[[x]]
367 | 
368 |                 start.time <- calltimes.subset$from
369 |                 end.time <- calltimes.subset$to
370 | 
371 |                 if (j > 1) {
372 |                   start.time <- short.wav.duration * (j - 1) + start.time
373 |                   end.time <- short.wav.duration * (j - 1) + end.time
374 |                 }
375 | 
376 |                 start.time <-  round(start.time, 3)
377 |                 end.time <- round(end.time, 3)
378 |                 short.wav <- subsamps[[x]]
379 | 
380 |                 wav.dur <- duration(short.wav)
381 |                 win.time <- wav.dur / n.windows
382 | 
383 |                 # Calculate MFCCs
384 |                 melfcc.output <-
385 |                   tuneR::melfcc(
386 |                     short.wav,
387 |                     minfreq = min.freq,
388 |                     hoptime = win.time,
389 |                     maxfreq = max.freq,
390 |                     numcep = num.cep,
391 |                     wintime = win.time
392 |                   )
393 | 
394 |                 # Calculate delta cepstral coefficients
395 |                 deltas.output <- deltas(melfcc.output)
396 | 
397 |                 # Ensure only same number of time windows are used for MFCC and delta coefficients Also append .wav duration
398 |                 mfcc.vector <-
399 |                   c(as.vector(t(melfcc.output[1:(n.windows - 1), 2:num.cep])), as.vector(t(deltas.output[1:(n.windows - 1), 2:num.cep])), wav.dur)
400 | 
401 | 
402 |                 mfcc.vector <- as.data.frame(t(mfcc.vector))
403 | 
404 |                 if (length(colnames(mfcc.vector)) != length(colnames(feature.df[, 2:ncol(feature.df)]))) {
405 |                   print(
406 |                     'Training dataset columns do not match test dataset; check MFCC settings'
407 |                   )
408 |                   break
409 |                 }
410 | 
411 |                 colnames(mfcc.vector) <-
412 |                   colnames(feature.df[, 2:ncol(feature.df)])
413 | 
414 | 
415 |                 if ("SVM" %in% model.type.list == TRUE) {
416 |                   svm.prob <- predict(ml.model.svm, mfcc.vector, probability = T)
417 | 
418 |                   model.output <- attr(svm.prob, "probabilities")
419 |                   signal.loc <-
420 |                     which(attr(model.output, "dimnames")[[2]] == target.signal[y])
421 |                   signal.probability <- model.output[signal.loc]
422 |                   temp.svm.df <-
423 |                     cbind.data.frame(target.signal[y], signal.probability)
424 |                   if (temp.svm.df$signal.probability >= probability.thresh.svm) {
425 |                     if (wav.output == "TRUE") {
426 |                       tuneR::writeWave(
427 |                         subsamps[[x]],
428 |                         filename = paste(
429 |                           output.dir,
430 |                           "/",
431 |                           temp.name,
432 |                           "_",
433 |                           target.signal[y],
434 |                           "_",
435 |                           "SVM",
436 |                           "_",
437 |                           start.time,
438 |                           "_",
439 |                           end.time,
440 |                           "_",
441 |                           round(signal.probability, 3),
442 |                           ".wav",
443 |                           sep = ""
444 |                         ),
445 |                         extensible = F
446 |                       )
447 |                     }
448 |                     #
449 |                     temp.df <-
450 |                       cbind.data.frame(
451 |                         temp.name,
452 |                         paste(j, x, sep = '.'),
453 |                         "SVM",
454 |                         target.signal[y],
455 |                         round(signal.probability, 3),
456 |                         start.time,
457 |                         end.time
458 |                       )
459 |                     colnames(temp.df) <-
460 |                       c(
461 |                         "file.name",
462 |                         "detect.num",
463 |                         "model.type",
464 |                         "signal",
465 |                         "probability",
466 |                         "start.time",
467 |                         "end.time"
468 |                       )
469 |                     temp.model.results.list.svm[[ length(temp.model.results.list.svm)+1 ]] <- temp.df
470 |                   }
471 | 
472 |                 }
473 | 
474 |                 if ("RF" %in% model.type.list == TRUE) {
475 |                   RF.prob <- predict(ml.model.rf, mfcc.vector, type = 'prob')
476 | 
477 |                   model.output <- colnames(RF.prob)
478 |                   signal.loc <- which(model.output == target.signal[y])
479 |                   signal.probability <- RF.prob[, signal.loc]
480 |                   temp.RF.df <-
481 |                     cbind.data.frame(target.signal[y], signal.probability)
482 |                   if (temp.RF.df$signal.probability >= probability.thresh.rf) {
483 |                     if (wav.output == "TRUE") {
484 |                       tuneR::writeWave(
485 |                         subsamps[[x]],
486 |                         filename = paste(
487 |                           output.dir,
488 |                           "/",
489 |                           temp.name,
490 |                           "_",
491 |                           target.signal[y],
492 |                           "_",
493 |                           "RF",
494 |                           "_",
495 |                           start.time,
496 |                           "_",
497 |                           end.time,
498 |                           "_",
499 |                           round(signal.probability, 3),
500 |                           ".wav",
501 |                           sep = ""
502 |                         ),
503 |                         extensible = F
504 |                       )
505 |                     }
506 |                     #
507 |                     temp.df <-
508 |                       cbind.data.frame(
509 |                         temp.name,
510 |                         paste(j, x, sep = '.'),
511 |                         "RF",
512 |                         target.signal[y],
513 |                         round(signal.probability, 3),
514 |                         start.time,
515 |                         end.time
516 |                       )
517 |                     colnames(temp.df) <-
518 |                       c(
519 |                         "file.name",
520 |                         "detect.num",
521 |                         "model.type",
522 |                         "signal",
523 |                         "probability",
524 |                         "start.time",
525 |                         "end.time"
526 |                       )
527 |                     temp.model.results.list.rf[[ length(temp.model.results.list.rf)+1]] <- temp.df
528 |                   }
529 | 
530 |                 }
531 | 
532 | 
533 | 
534 |                 if (exists("temp.model.results.list.svm") == TRUE  |
535 |                     exists("temp.model.results.list.rf") == TRUE) {
536 |                   if ("SVM" %in% model.type.list == TRUE &
537 |                       "RF" %in% model.type.list == TRUE) {
538 |                     if (exists("temp.model.results.list.svm") == TRUE  &
539 |                         exists("temp.model.results.list.rf") == TRUE) {
540 |                       temp.model.results.list.svm <-
541 |                         temp.model.results.list.svm[lengths(temp.model.results.list.svm) != 0]
542 |                       temp.model.results.list.rf <-
543 |                         temp.model.results.list.rf[lengths(temp.model.results.list.rf) != 0]
544 | 
545 |                       temp.model.results.list <-
546 |                         append(temp.model.results.list.svm,
547 |                                temp.model.results.list.rf)
548 |                     }
549 | 
550 |                     if (exists("temp.model.results.list.svm") == FALSE  &
551 |                         exists("temp.model.results.list.rf") == TRUE) {
552 |                       temp.model.results.list.rf <-
553 |                         temp.model.results.list.rf[lengths(temp.model.results.list.rf) != 0]
554 |                       temp.model.results.list <-
555 |                         temp.model.results.list.rf
556 |                     }
557 | 
558 |                     if (exists("temp.model.results.list.svm") == TRUE  &
559 |                         exists("temp.model.results.list.rf") == FALSE) {
560 |                       temp.model.results.list.svm <-
561 |                         temp.model.results.list.svm[lengths(temp.model.results.list.svm) != 0]
562 |                       temp.model.results.list <-
563 |                         temp.model.results.list.svm
564 |                     }
565 | 
566 |                   }
567 | 
568 |                   if ("SVM" %in% model.type.list == TRUE &
569 |                       "RF" %in% model.type.list == FALSE &
570 |                       exists("temp.model.results.list.svm") == TRUE) {
571 |                     temp.model.results.list.svm <-
572 |                       temp.model.results.list.svm[lengths(temp.model.results.list.svm) != 0]
573 |                     temp.model.results.list <-
574 |                       temp.model.results.list.svm
575 |                   }
576 | 
577 |                   if ("SVM" %in% model.type.list == FALSE &
578 |                       "RF" %in% model.type.list == TRUE &
579 |                       exists("temp.model.results.list.rf") == TRUE) {
580 |                     temp.model.results.list.rf <-
581 |                       temp.model.results.list.rf[lengths(temp.model.results.list.rf) != 0]
582 |                     temp.model.results.list <-
583 |                       temp.model.results.list.rf
584 |                   }
585 | 
586 | 
587 |                 }
588 | 
589 |               }
590 | 
591 |               model.results.list[[j]] <-
592 |                 do.call(rbind.data.frame, temp.model.results.list)
593 |             }
594 |           }
595 |         }
596 | 
597 |         model.results.list <-
598 |           model.results.list[lengths(model.results.list) != 0]
599 | 
600 |         if (exists("model.results.list") == TRUE &
601 |             length(model.results.list) > 0) {
602 |           if (exists("model.results.list") == TRUE &
603 |               length(model.results.list) > 0) {
604 |             print('Creating datasheet')
605 | 
606 |             timing.df <-  do.call(rbind.data.frame, model.results.list)
607 | 
608 |             # Add maximum separation
609 |             for (k in 1:length(model.type.list)) {
610 |               timing.df.subset <-
611 |                 subset(timing.df, model.type == model.type.list[[k]])
612 | 
613 |               detection.time.differences <-
614 |                 unlist(lapply(1:(nrow(timing.df.subset) - 1),
615 |                               function(i)
616 |                                 c(
617 |                                   timing.df.subset$start.time[i + 1] - timing.df.subset$end.time[i]
618 |                                 )))
619 | 
620 |               detection.separation.list <-
621 |                 which(detection.time.differences < maximum.separation)
622 | 
623 |               detection.timing <-
624 |                 split(detection.separation.list, cumsum(c(
625 |                   1, diff(detection.separation.list)
626 |                 ) != 1))
627 | 
628 |               if (length(detection.timing) > 1) {
629 |                 for (j in 1:length(detection.timing)) {
630 |                   temp.df <- detection.timing[[j]]
631 |                   detection.timing[[j]] <-  c(temp.df, max(temp.df) + 1)
632 |                 }
633 | 
634 |                 DetectionDFtemp <-
635 |                   timing.df.subset[-c(unlist(detection.timing)), ]
636 | 
637 | 
638 |                 for (l in 1:length(detection.timing)) {
639 |                   temp.subset <- detection.timing[[l]]
640 | 
641 |                   temprow1 <-
642 |                     timing.df.subset[min(temp.subset):max(temp.subset), ]
643 | 
644 |                   probability <- median(temprow1$probability)
645 |                   start.time <- round(min(temprow1$start.time), 3)
646 |                   end.time <- round(max(temprow1$end.time), 3)
647 |                   newselection <-
648 |                     cbind.data.frame(temprow1[1, 1:4], probability, start.time, end.time)
649 |                   DetectionDFtemp <-
650 |                     rbind.data.frame(DetectionDFtemp, newselection)
651 |                 }
652 |               }
653 |               else {
654 |                 DetectionDFtemp <- timing.df.subset
655 |               }
656 | 
657 |               RavenSelectionTableDF <-
658 |                 rbind.data.frame(RavenSelectionTableDF, DetectionDFtemp)
659 |             }
660 | 
661 |             RavenSelectionTableDF <-
662 |               RavenSelectionTableDF[order(RavenSelectionTableDF$start.time), ]
663 | 
664 |             Selection <- seq(1, nrow(RavenSelectionTableDF))
665 |             View <- rep('Spectrogram 1', nrow(RavenSelectionTableDF))
666 |             Channel <- rep(1, nrow(RavenSelectionTableDF))
667 |             MinFreq <- rep(min.freq, nrow(RavenSelectionTableDF))
668 |             MaxFreq <- rep(max.freq, nrow(RavenSelectionTableDF))
669 | 
670 |             if (nrow(RavenSelectionTableDF) > 0) {
671 |               RavenSelectionTableDF <-
672 |                 cbind.data.frame(Selection,
673 |                                  View,
674 |                                  Channel,
675 |                                  MinFreq,
676 |                                  MaxFreq,
677 |                                  RavenSelectionTableDF)
678 | 
679 |               RavenSelectionTableDF <-
680 |                 RavenSelectionTableDF[, c(
681 |                   "Selection",
682 |                   "View",
683 |                   "Channel",
684 |                   "start.time",
685 |                   "end.time",
686 |                   "MinFreq",
687 |                   "MaxFreq",
688 |                   "file.name",
689 |                   'model.type',
690 |                   'probability',
691 |                   'signal'
692 |                 )]
693 | 
694 |               colnames(RavenSelectionTableDF) <-
695 |                 c(
696 |                   "Selection",
697 |                   "View",
698 |                   "Channel",
699 |                   "Begin Time (s)",
700 |                   "End Time (s)",
701 |                   "Low Freq (Hz)",
702 |                   "High Freq (Hz)",
703 |                   "File Name",
704 |                   'model.type',
705 |                   'probability',
706 |                   'signal'
707 |                 )
708 | 
709 | 
710 | 
711 |               if (write.table.output == TRUE) {
712 |                 csv.file.name <-
713 |                   paste(output.dir,
714 |                         '/',
715 |                         temp.name,
716 |                         'gibbonRresults.txt',
717 |                         sep = '')
718 |                 write.table(
719 |                   x = RavenSelectionTableDF,
720 |                   sep = "\t",
721 |                   file = csv.file.name,
722 |                   row.names = FALSE,
723 |                   quote = FALSE
724 |                 )
725 |                 print(paste(
726 |                   "Saving Sound Files",
727 |                   temp.name,
728 |                   i,
729 |                   'out of',
730 |                   length(list.file.input)
731 |                 ))
732 |                # print(RavenSelectionTableDF)
733 | 
734 | 
735 |               }
736 | 
737 |               end_time <- Sys.time()
738 |               print(
739 |                 paste(
740 |                   'System processed',
741 |                   round(seewave::duration(temp.wav)),
742 |                   'seconds in',
743 |                   round(end_time - start_time),
744 |                   'seconds',
745 |                   'this translates to',
746 |                   round(
747 |                     round(seewave::duration(temp.wav)) / 60 / 60 * 3600 / as.numeric(end_time - start_time) ,
748 |                     1
749 |                   ),
750 |                   'hours processed in 1 hour'
751 |                 )
752 |               )
753 |             }
754 |           }
755 | 
756 |           rm(RavenSelectionTableDF)
757 |           rm(swift.spectro)
758 |           rm(temp.wav)
759 |           rm(short.sound.files)
760 |         }
761 |       }, error = function(e) {
762 |         cat("ERROR :", conditionMessage(e), "\n")
763 |       })
764 |     }
765 |   }
766 | 


--------------------------------------------------------------------------------