├── LICENSE
├── Makefile
├── README.md
├── assets
    ├── continuous-element.png
    └── dynamic-thresholding.png
├── docs
    ├── Makefile
    ├── commands.rst
    ├── conf.py
    ├── getting-started.rst
    ├── index.rst
    └── make.bat
├── models
    └── .gitkeep
├── notebooks
    ├── .gitkeep
    ├── continuity_segmentation-bengalese-finch.ipynb
    ├── continuity_segmentation-canary.ipynb
    ├── continuity_segmentation-mouse.ipynb
    ├── continuity_segmentation-starling.ipynb
    ├── dynamic_thresholding_segmentation-European-starling.ipynb
    ├── dynamic_thresholding_segmentation-bengalese-finch.ipynb
    ├── dynamic_thresholding_segmentation-canary.ipynb
    ├── dynamic_thresholding_segmentation-mocking.ipynb
    └── dynamic_thresholding_segmentation-mouse.ipynb
├── references
    └── .gitkeep
├── reports
    ├── .gitkeep
    └── figures
    │   └── .gitkeep
├── requirements.txt
├── setup.py
├── test_environment.py
├── tox.ini
└── vocalseg
    ├── __init__.py
    ├── continuity_filtering.py
    ├── dynamic_thresholding.py
    ├── examples
        ├── __init__.py
        ├── bengalese_finch.wav
        ├── canary.wav
        ├── mocking.wav
        ├── mouse_usv.wav
        └── starling.wav
    ├── utils.py
    └── vocalseg.code-workspace


/LICENSE:
--------------------------------------------------------------------------------
 1 | 
 2 | The MIT License (MIT)
 3 | Copyright (c) 2019, Tim Sainburg
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 6 | 
 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 8 | 
 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
10 | 
11 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | .PHONY: clean data lint requirements sync_data_to_s3 sync_data_from_s3
  2 | 
  3 | #################################################################################
  4 | # GLOBALS                                                                       #
  5 | #################################################################################
  6 | 
  7 | PROJECT_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
  8 | BUCKET = [OPTIONAL] your-bucket-for-syncing-data (do not include 's3://')
  9 | PROFILE = default
 10 | PROJECT_NAME = vocalization_segmentation
 11 | PYTHON_INTERPRETER = python3
 12 | 
 13 | ifeq (,$(shell which conda))
 14 | HAS_CONDA=False
 15 | else
 16 | HAS_CONDA=True
 17 | endif
 18 | 
 19 | #################################################################################
 20 | # COMMANDS                                                                      #
 21 | #################################################################################
 22 | 
 23 | ## Install Python Dependencies
 24 | requirements: test_environment
 25 | 	$(PYTHON_INTERPRETER) -m pip install -U pip setuptools wheel
 26 | 	$(PYTHON_INTERPRETER) -m pip install -r requirements.txt
 27 | 
 28 | ## Make Dataset
 29 | data: requirements
 30 | 	$(PYTHON_INTERPRETER) src/data/make_dataset.py data/raw data/processed
 31 | 
 32 | ## Delete all compiled Python files
 33 | clean:
 34 | 	find . -type f -name "*.py[co]" -delete
 35 | 	find . -type d -name "__pycache__" -delete
 36 | 
 37 | ## Lint using flake8
 38 | lint:
 39 | 	flake8 src
 40 | 
 41 | ## Upload Data to S3
 42 | sync_data_to_s3:
 43 | ifeq (default,$(PROFILE))
 44 | 	aws s3 sync data/ s3://$(BUCKET)/data/
 45 | else
 46 | 	aws s3 sync data/ s3://$(BUCKET)/data/ --profile $(PROFILE)
 47 | endif
 48 | 
 49 | ## Download Data from S3
 50 | sync_data_from_s3:
 51 | ifeq (default,$(PROFILE))
 52 | 	aws s3 sync s3://$(BUCKET)/data/ data/
 53 | else
 54 | 	aws s3 sync s3://$(BUCKET)/data/ data/ --profile $(PROFILE)
 55 | endif
 56 | 
 57 | ## Set up python interpreter environment
 58 | create_environment:
 59 | ifeq (True,$(HAS_CONDA))
 60 | 		@echo ">>> Detected conda, creating conda environment."
 61 | ifeq (3,$(findstring 3,$(PYTHON_INTERPRETER)))
 62 | 	conda create --name $(PROJECT_NAME) python=3
 63 | else
 64 | 	conda create --name $(PROJECT_NAME) python=2.7
 65 | endif
 66 | 		@echo ">>> New conda env created. Activate with:\nsource activate $(PROJECT_NAME)"
 67 | else
 68 | 	$(PYTHON_INTERPRETER) -m pip install -q virtualenv virtualenvwrapper
 69 | 	@echo ">>> Installing virtualenvwrapper if not already installed.\nMake sure the following lines are in shell startup file\n\
 70 | 	export WORKON_HOME=$$HOME/.virtualenvs\nexport PROJECT_HOME=$$HOME/Devel\nsource /usr/local/bin/virtualenvwrapper.sh\n"
 71 | 	@bash -c "source `which virtualenvwrapper.sh`;mkvirtualenv $(PROJECT_NAME) --python=$(PYTHON_INTERPRETER)"
 72 | 	@echo ">>> New virtualenv created. Activate with:\nworkon $(PROJECT_NAME)"
 73 | endif
 74 | 
 75 | ## Test python environment is setup correctly
 76 | test_environment:
 77 | 	$(PYTHON_INTERPRETER) test_environment.py
 78 | 
 79 | #################################################################################
 80 | # PROJECT RULES                                                                 #
 81 | #################################################################################
 82 | 
 83 | 
 84 | 
 85 | #################################################################################
 86 | # Self Documenting Commands                                                     #
 87 | #################################################################################
 88 | 
 89 | .DEFAULT_GOAL := help
 90 | 
 91 | # Inspired by <http://marmelab.com/blog/2016/02/29/auto-documented-makefile.html>
 92 | # sed script explained:
 93 | # /^##/:
 94 | # 	* save line in hold space
 95 | # 	* purge line
 96 | # 	* Loop:
 97 | # 		* append newline + line to hold space
 98 | # 		* go to next line
 99 | # 		* if line starts with doc comment, strip comment character off and loop
100 | # 	* remove target prerequisites
101 | # 	* append hold space (+ newline) to line
102 | # 	* replace newline plus comments by `---`
103 | # 	* print line
104 | # Separate expressions are necessary because labels cannot be delimited by
105 | # semicolon; see <http://stackoverflow.com/a/11799865/1968>
106 | .PHONY: help
107 | help:
108 | 	@echo "$$(tput bold)Available rules:$$(tput sgr0)"
109 | 	@echo
110 | 	@sed -n -e "/^## / { \
111 | 		h; \
112 | 		s/.*//; \
113 | 		:doc" \
114 | 		-e "H; \
115 | 		n; \
116 | 		s/^## //; \
117 | 		t doc" \
118 | 		-e "s/:.*//; \
119 | 		G; \
120 | 		s/\\n## /---/; \
121 | 		s/\\n/ /g; \
122 | 		p; \
123 | 	}" ${MAKEFILE_LIST} \
124 | 	| LC_ALL='C' sort --ignore-case \
125 | 	| awk -F '---' \
126 | 		-v ncol=$$(tput cols) \
127 | 		-v indent=19 \
128 | 		-v col_on="$$(tput setaf 6)" \
129 | 		-v col_off="$$(tput sgr0)" \
130 | 	'{ \
131 | 		printf "%s%*s%s ", col_on, -indent, $$1, col_off; \
132 | 		n = split($$2, words, " "); \
133 | 		line_length = ncol - indent; \
134 | 		for (i = 1; i <= n; i++) { \
135 | 			line_length -= length(words[i]) + 1; \
136 | 			if (line_length <= 0) { \
137 | 				line_length = ncol - indent - length(words[i]) - 1; \
138 | 				printf "\n%*s ", -indent, " "; \
139 | 			} \
140 | 			printf "%s ", words[i]; \
141 | 		} \
142 | 		printf "\n"; \
143 | 	}' \
144 | 	| more $(shell test $(shell uname) = Darwin && echo '--no-init --raw-control-chars')
145 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Vocalization segmentation
 2 | ==============================
 3 | 
 4 | This is a set of simple algorithms for segmenting vocalizations without supervision using (many) parameters. It's basically meant as an easy dumb way of doing segmentation by eye so that you can repeat segmentation on many different vocalizations simultaneously using the same methods. 
 5 | 
 6 | There are a bunch of Jupyter notebook examples for European starlings, bengalese finch, canary, and mouse USVs. 
 7 | 
 8 | There are two main segmentation algorithms:
 9 | 
10 | 1) <b>Dynamic thresholding:</b> segments syllables in time by computing a spectral envelope and modifying the threshold for segmentation on that envelope based on a set of parameters.
11 | 
12 | <img src="assets/dynamic-thresholding.png" />
13 | 
14 | 2) <b>Continuous element segmentation:</b> segments elements of song spectro-temporally, so that two elements can be overlapping in time but not frequency. 
15 | 
16 | <img src="assets/continuous-element.png" />
17 | 
18 | 
19 | ### Parameters
20 | 
21 | There are a whole bunch of different parameters for the two algorithms. You need to carefully set them to get the results you want. Here is a quick description of each:
22 | 
23 | ```
24 | Arguments:
25 |         vocalization {[type]} -- waveform of song
26 |         rate {[type]} -- samplerate of datas
27 | 
28 |     Keyword Arguments:
29 |         min_level_db {int} -- default dB minimum of spectrogram (threshold anything below) (default: {-80})
30 |         min_level_db_floor {int} -- highest number min_level_db is allowed to reach dynamically (default: {-40})
31 |         db_delta {int} -- delta in setting min_level_db (default: {5})
32 |         n_fft {int} -- FFT window size (default: {1024})
33 |         hop_length_ms {int} -- number audio of frames in ms between STFT columns (default: {1})
34 |         win_length_ms {int} -- size of fft window (ms) (default: {5})
35 |         ref_level_db {int} -- reference level dB of audio (default: {20})
36 |         pre {float} -- coefficient for preemphasis filter (default: {0.97})
37 |         spectral_range {[type]} -- spectral range to care about for spectrogram (default: {None})
38 |         verbose {bool} -- display output (default: {False})
39 |         mask_thresh_std {int} -- standard deviations above median to threshold out noise (higher = threshold more noise) (default: {1})
40 |         neighborhood_time_ms {int} -- size in time of neighborhood-continuity filter (default: {5})
41 |         neighborhood_freq_hz {int} -- size in Hz of neighborhood-continuity filter (default: {500})
42 |         neighborhood_thresh {float} -- threshold number of neighborhood time-frequency bins above 0 to consider a bin not noise (default: {0.5})
43 |         min_syllable_length_s {float} -- shortest expected length of syllable (default: {0.1})
44 |         min_silence_for_spec {float} -- shortest expected length of silence in a song (used to set dynamic threshold) (default: {0.1})
45 |         silence_threshold {float} -- threshold for spectrogram to consider noise as silence (default: {0.05})
46 |         max_vocal_for_spec {float} -- longest expected vocalization in seconds  (default: {1.0})
47 |         temporal_neighbor_merge_distance_ms {float} -- longest distance at which two elements should be considered one (default: {0.0})
48 |         overlapping_element_merge_thresh {float} -- proportion of temporal overlap to consider two elements one (default: {np.inf})
49 |         min_element_size_ms_hz {list} --  smallest expected element size (in ms and HZ). Everything smaller is removed. (default: {[0, 0]})
50 |         figsize {tuple} -- size of figure for displaying output (default: {(20, 5)})
51 | 
52 | ```
53 | 
54 | --------
55 | 
56 | <p><small>Project based on the <a target="_blank" href="https://drivendata.github.io/cookiecutter-data-science/">cookiecutter data science project template</a>. #cookiecutterdatascience</small></p>
57 | 


--------------------------------------------------------------------------------
/assets/continuous-element.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timsainb/vocalization-segmentation/8bc85ee9bb644cc5928535959faee5e5b184dd36/assets/continuous-element.png


--------------------------------------------------------------------------------
/assets/dynamic-thresholding.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timsainb/vocalization-segmentation/8bc85ee9bb644cc5928535959faee5e5b184dd36/assets/dynamic-thresholding.png


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # Internal variables.
 11 | PAPEROPT_a4     = -D latex_paper_size=a4
 12 | PAPEROPT_letter = -D latex_paper_size=letter
 13 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 14 | # the i18n builder cannot share the environment and doctrees with the others
 15 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 16 | 
 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 18 | 
 19 | help:
 20 | 	@echo "Please use \`make <target>' where <target> is one of"
 21 | 	@echo "  html       to make standalone HTML files"
 22 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 23 | 	@echo "  singlehtml to make a single large HTML file"
 24 | 	@echo "  pickle     to make pickle files"
 25 | 	@echo "  json       to make JSON files"
 26 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 27 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 28 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 29 | 	@echo "  epub       to make an epub"
 30 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 31 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 32 | 	@echo "  text       to make text files"
 33 | 	@echo "  man        to make manual pages"
 34 | 	@echo "  texinfo    to make Texinfo files"
 35 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 36 | 	@echo "  gettext    to make PO message catalogs"
 37 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 38 | 	@echo "  linkcheck  to check all external links for integrity"
 39 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 40 | 
 41 | clean:
 42 | 	-rm -rf $(BUILDDIR)/*
 43 | 
 44 | html:
 45 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 46 | 	@echo
 47 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 48 | 
 49 | dirhtml:
 50 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 51 | 	@echo
 52 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 53 | 
 54 | singlehtml:
 55 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 56 | 	@echo
 57 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 58 | 
 59 | pickle:
 60 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 61 | 	@echo
 62 | 	@echo "Build finished; now you can process the pickle files."
 63 | 
 64 | json:
 65 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 66 | 	@echo
 67 | 	@echo "Build finished; now you can process the JSON files."
 68 | 
 69 | htmlhelp:
 70 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 71 | 	@echo
 72 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 73 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 74 | 
 75 | qthelp:
 76 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 77 | 	@echo
 78 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 79 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 80 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/vocalization_segmentation.qhcp"
 81 | 	@echo "To view the help file:"
 82 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/vocalization_segmentation.qhc"
 83 | 
 84 | devhelp:
 85 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 86 | 	@echo
 87 | 	@echo "Build finished."
 88 | 	@echo "To view the help file:"
 89 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/vocalization_segmentation"
 90 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/vocalization_segmentation"
 91 | 	@echo "# devhelp"
 92 | 
 93 | epub:
 94 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
 95 | 	@echo
 96 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
 97 | 
 98 | latex:
 99 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
100 | 	@echo
101 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
102 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
103 | 	      "(use \`make latexpdf' here to do that automatically)."
104 | 
105 | latexpdf:
106 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
107 | 	@echo "Running LaTeX files through pdflatex..."
108 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
109 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
110 | 
111 | text:
112 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
113 | 	@echo
114 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
115 | 
116 | man:
117 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
118 | 	@echo
119 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
120 | 
121 | texinfo:
122 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
123 | 	@echo
124 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
125 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
126 | 	      "(use \`make info' here to do that automatically)."
127 | 
128 | info:
129 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
130 | 	@echo "Running Texinfo files through makeinfo..."
131 | 	make -C $(BUILDDIR)/texinfo info
132 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
133 | 
134 | gettext:
135 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
136 | 	@echo
137 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
138 | 
139 | changes:
140 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
141 | 	@echo
142 | 	@echo "The overview file is in $(BUILDDIR)/changes."
143 | 
144 | linkcheck:
145 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
146 | 	@echo
147 | 	@echo "Link check complete; look for any errors in the above output " \
148 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
149 | 
150 | doctest:
151 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
152 | 	@echo "Testing of doctests in the sources finished, look at the " \
153 | 	      "results in $(BUILDDIR)/doctest/output.txt."
154 | 


--------------------------------------------------------------------------------
/docs/commands.rst:
--------------------------------------------------------------------------------
 1 | Commands
 2 | ========
 3 | 
 4 | The Makefile contains the central entry points for common tasks related to this project.
 5 | 
 6 | Syncing data to S3
 7 | ^^^^^^^^^^^^^^^^^^
 8 | 
 9 | * `make sync_data_to_s3` will use `aws s3 sync` to recursively sync files in `data/` up to `s3://[OPTIONAL] your-bucket-for-syncing-data (do not include 's3://')/data/`.
10 | * `make sync_data_from_s3` will use `aws s3 sync` to recursively sync files from `s3://[OPTIONAL] your-bucket-for-syncing-data (do not include 's3://')/data/` to `data/`.
11 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # vocalization_segmentation documentation build configuration file, created by
  4 | # sphinx-quickstart.
  5 | #
  6 | # This file is execfile()d with the current directory set to its containing dir.
  7 | #
  8 | # Note that not all possible configuration values are present in this
  9 | # autogenerated file.
 10 | #
 11 | # All configuration values have a default; values that are commented out
 12 | # serve to show the default.
 13 | 
 14 | import os
 15 | import sys
 16 | 
 17 | # If extensions (or modules to document with autodoc) are in another directory,
 18 | # add these directories to sys.path here. If the directory is relative to the
 19 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 20 | # sys.path.insert(0, os.path.abspath('.'))
 21 | 
 22 | # -- General configuration -----------------------------------------------------
 23 | 
 24 | # If your documentation needs a minimal Sphinx version, state it here.
 25 | # needs_sphinx = '1.0'
 26 | 
 27 | # Add any Sphinx extension module names here, as strings. They can be extensions
 28 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 29 | extensions = []
 30 | 
 31 | # Add any paths that contain templates here, relative to this directory.
 32 | templates_path = ['_templates']
 33 | 
 34 | # The suffix of source filenames.
 35 | source_suffix = '.rst'
 36 | 
 37 | # The encoding of source files.
 38 | # source_encoding = 'utf-8-sig'
 39 | 
 40 | # The master toctree document.
 41 | master_doc = 'index'
 42 | 
 43 | # General information about the project.
 44 | project = u'vocalization_segmentation'
 45 | 
 46 | # The version info for the project you're documenting, acts as replacement for
 47 | # |version| and |release|, also used in various other places throughout the
 48 | # built documents.
 49 | #
 50 | # The short X.Y version.
 51 | version = '0.1'
 52 | # The full version, including alpha/beta/rc tags.
 53 | release = '0.1'
 54 | 
 55 | # The language for content autogenerated by Sphinx. Refer to documentation
 56 | # for a list of supported languages.
 57 | # language = None
 58 | 
 59 | # There are two options for replacing |today|: either, you set today to some
 60 | # non-false value, then it is used:
 61 | # today = ''
 62 | # Else, today_fmt is used as the format for a strftime call.
 63 | # today_fmt = '%B %d, %Y'
 64 | 
 65 | # List of patterns, relative to source directory, that match files and
 66 | # directories to ignore when looking for source files.
 67 | exclude_patterns = ['_build']
 68 | 
 69 | # The reST default role (used for this markup: `text`) to use for all documents.
 70 | # default_role = None
 71 | 
 72 | # If true, '()' will be appended to :func: etc. cross-reference text.
 73 | # add_function_parentheses = True
 74 | 
 75 | # If true, the current module name will be prepended to all description
 76 | # unit titles (such as .. function::).
 77 | # add_module_names = True
 78 | 
 79 | # If true, sectionauthor and moduleauthor directives will be shown in the
 80 | # output. They are ignored by default.
 81 | # show_authors = False
 82 | 
 83 | # The name of the Pygments (syntax highlighting) style to use.
 84 | pygments_style = 'sphinx'
 85 | 
 86 | # A list of ignored prefixes for module index sorting.
 87 | # modindex_common_prefix = []
 88 | 
 89 | 
 90 | # -- Options for HTML output ---------------------------------------------------
 91 | 
 92 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 93 | # a list of builtin themes.
 94 | html_theme = 'default'
 95 | 
 96 | # Theme options are theme-specific and customize the look and feel of a theme
 97 | # further.  For a list of options available for each theme, see the
 98 | # documentation.
 99 | # html_theme_options = {}
100 | 
101 | # Add any paths that contain custom themes here, relative to this directory.
102 | # html_theme_path = []
103 | 
104 | # The name for this set of Sphinx documents.  If None, it defaults to
105 | # "<project> v<release> documentation".
106 | # html_title = None
107 | 
108 | # A shorter title for the navigation bar.  Default is the same as html_title.
109 | # html_short_title = None
110 | 
111 | # The name of an image file (relative to this directory) to place at the top
112 | # of the sidebar.
113 | # html_logo = None
114 | 
115 | # The name of an image file (within the static path) to use as favicon of the
116 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
117 | # pixels large.
118 | # html_favicon = None
119 | 
120 | # Add any paths that contain custom static files (such as style sheets) here,
121 | # relative to this directory. They are copied after the builtin static files,
122 | # so a file named "default.css" will overwrite the builtin "default.css".
123 | html_static_path = ['_static']
124 | 
125 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
126 | # using the given strftime format.
127 | # html_last_updated_fmt = '%b %d, %Y'
128 | 
129 | # If true, SmartyPants will be used to convert quotes and dashes to
130 | # typographically correct entities.
131 | # html_use_smartypants = True
132 | 
133 | # Custom sidebar templates, maps document names to template names.
134 | # html_sidebars = {}
135 | 
136 | # Additional templates that should be rendered to pages, maps page names to
137 | # template names.
138 | # html_additional_pages = {}
139 | 
140 | # If false, no module index is generated.
141 | # html_domain_indices = True
142 | 
143 | # If false, no index is generated.
144 | # html_use_index = True
145 | 
146 | # If true, the index is split into individual pages for each letter.
147 | # html_split_index = False
148 | 
149 | # If true, links to the reST sources are added to the pages.
150 | # html_show_sourcelink = True
151 | 
152 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
153 | # html_show_sphinx = True
154 | 
155 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
156 | # html_show_copyright = True
157 | 
158 | # If true, an OpenSearch description file will be output, and all pages will
159 | # contain a <link> tag referring to it.  The value of this option must be the
160 | # base URL from which the finished HTML is served.
161 | # html_use_opensearch = ''
162 | 
163 | # This is the file name suffix for HTML files (e.g. ".xhtml").
164 | # html_file_suffix = None
165 | 
166 | # Output file base name for HTML help builder.
167 | htmlhelp_basename = 'vocalization_segmentationdoc'
168 | 
169 | 
170 | # -- Options for LaTeX output --------------------------------------------------
171 | 
172 | latex_elements = {
173 |     # The paper size ('letterpaper' or 'a4paper').
174 |     # 'papersize': 'letterpaper',
175 | 
176 |     # The font size ('10pt', '11pt' or '12pt').
177 |     # 'pointsize': '10pt',
178 | 
179 |     # Additional stuff for the LaTeX preamble.
180 |     # 'preamble': '',
181 | }
182 | 
183 | # Grouping the document tree into LaTeX files. List of tuples
184 | # (source start file, target name, title, author, documentclass [howto/manual]).
185 | latex_documents = [
186 |     ('index',
187 |      'vocalization_segmentation.tex',
188 |      u'vocalization_segmentation Documentation',
189 |      u"Tim Sainburg", 'manual'),
190 | ]
191 | 
192 | # The name of an image file (relative to this directory) to place at the top of
193 | # the title page.
194 | # latex_logo = None
195 | 
196 | # For "manual" documents, if this is true, then toplevel headings are parts,
197 | # not chapters.
198 | # latex_use_parts = False
199 | 
200 | # If true, show page references after internal links.
201 | # latex_show_pagerefs = False
202 | 
203 | # If true, show URL addresses after external links.
204 | # latex_show_urls = False
205 | 
206 | # Documents to append as an appendix to all manuals.
207 | # latex_appendices = []
208 | 
209 | # If false, no module index is generated.
210 | # latex_domain_indices = True
211 | 
212 | 
213 | # -- Options for manual page output --------------------------------------------
214 | 
215 | # One entry per manual page. List of tuples
216 | # (source start file, name, description, authors, manual section).
217 | man_pages = [
218 |     ('index', 'vocalization_segmentation', u'vocalization_segmentation Documentation',
219 |      [u"Tim Sainburg"], 1)
220 | ]
221 | 
222 | # If true, show URL addresses after external links.
223 | # man_show_urls = False
224 | 
225 | 
226 | # -- Options for Texinfo output ------------------------------------------------
227 | 
228 | # Grouping the document tree into Texinfo files. List of tuples
229 | # (source start file, target name, title, author,
230 | #  dir menu entry, description, category)
231 | texinfo_documents = [
232 |     ('index', 'vocalization_segmentation', u'vocalization_segmentation Documentation',
233 |      u"Tim Sainburg", 'vocalization_segmentation',
234 |      'Simple algorithms for segmenting vocalizations without supervision', 'Miscellaneous'),
235 | ]
236 | 
237 | # Documents to append as an appendix to all manuals.
238 | # texinfo_appendices = []
239 | 
240 | # If false, no module index is generated.
241 | # texinfo_domain_indices = True
242 | 
243 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
244 | # texinfo_show_urls = 'footnote'
245 | 


--------------------------------------------------------------------------------
/docs/getting-started.rst:
--------------------------------------------------------------------------------
1 | Getting started
2 | ===============
3 | 
4 | This is where you describe how to get set up on a clean install, including the
5 | commands necessary to get the raw data (using the `sync_data_from_s3` command,
6 | for example), and then how to make the cleaned, final data sets.
7 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. vocalization_segmentation documentation master file, created by
 2 |    sphinx-quickstart.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | vocalization_segmentation documentation!
 7 | ==============================================
 8 | 
 9 | Contents:
10 | 
11 | .. toctree::
12 |    :maxdepth: 2
13 | 
14 |    getting-started
15 |    commands
16 | 
17 | 
18 | 
19 | Indices and tables
20 | ==================
21 | 
22 | * :ref:`genindex`
23 | * :ref:`modindex`
24 | * :ref:`search`
25 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=_build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% .
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 31 | 	echo.  text       to make text files
 32 | 	echo.  man        to make manual pages
 33 | 	echo.  texinfo    to make Texinfo files
 34 | 	echo.  gettext    to make PO message catalogs
 35 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 36 | 	echo.  linkcheck  to check all external links for integrity
 37 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 38 | 	goto end
 39 | )
 40 | 
 41 | if "%1" == "clean" (
 42 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 43 | 	del /q /s %BUILDDIR%\*
 44 | 	goto end
 45 | )
 46 | 
 47 | if "%1" == "html" (
 48 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 49 | 	if errorlevel 1 exit /b 1
 50 | 	echo.
 51 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 52 | 	goto end
 53 | )
 54 | 
 55 | if "%1" == "dirhtml" (
 56 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 57 | 	if errorlevel 1 exit /b 1
 58 | 	echo.
 59 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 60 | 	goto end
 61 | )
 62 | 
 63 | if "%1" == "singlehtml" (
 64 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 65 | 	if errorlevel 1 exit /b 1
 66 | 	echo.
 67 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 68 | 	goto end
 69 | )
 70 | 
 71 | if "%1" == "pickle" (
 72 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
 73 | 	if errorlevel 1 exit /b 1
 74 | 	echo.
 75 | 	echo.Build finished; now you can process the pickle files.
 76 | 	goto end
 77 | )
 78 | 
 79 | if "%1" == "json" (
 80 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
 81 | 	if errorlevel 1 exit /b 1
 82 | 	echo.
 83 | 	echo.Build finished; now you can process the JSON files.
 84 | 	goto end
 85 | )
 86 | 
 87 | if "%1" == "htmlhelp" (
 88 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
 89 | 	if errorlevel 1 exit /b 1
 90 | 	echo.
 91 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
 92 | .hhp project file in %BUILDDIR%/htmlhelp.
 93 | 	goto end
 94 | )
 95 | 
 96 | if "%1" == "qthelp" (
 97 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
 98 | 	if errorlevel 1 exit /b 1
 99 | 	echo.
100 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
101 | .qhcp project file in %BUILDDIR%/qthelp, like this:
102 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\vocalization_segmentation.qhcp
103 | 	echo.To view the help file:
104 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\vocalization_segmentation.ghc
105 | 	goto end
106 | )
107 | 
108 | if "%1" == "devhelp" (
109 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
110 | 	if errorlevel 1 exit /b 1
111 | 	echo.
112 | 	echo.Build finished.
113 | 	goto end
114 | )
115 | 
116 | if "%1" == "epub" (
117 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
118 | 	if errorlevel 1 exit /b 1
119 | 	echo.
120 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
121 | 	goto end
122 | )
123 | 
124 | if "%1" == "latex" (
125 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
126 | 	if errorlevel 1 exit /b 1
127 | 	echo.
128 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
129 | 	goto end
130 | )
131 | 
132 | if "%1" == "text" (
133 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
134 | 	if errorlevel 1 exit /b 1
135 | 	echo.
136 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
137 | 	goto end
138 | )
139 | 
140 | if "%1" == "man" (
141 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
142 | 	if errorlevel 1 exit /b 1
143 | 	echo.
144 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
145 | 	goto end
146 | )
147 | 
148 | if "%1" == "texinfo" (
149 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
150 | 	if errorlevel 1 exit /b 1
151 | 	echo.
152 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
153 | 	goto end
154 | )
155 | 
156 | if "%1" == "gettext" (
157 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
158 | 	if errorlevel 1 exit /b 1
159 | 	echo.
160 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
161 | 	goto end
162 | )
163 | 
164 | if "%1" == "changes" (
165 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
166 | 	if errorlevel 1 exit /b 1
167 | 	echo.
168 | 	echo.The overview file is in %BUILDDIR%/changes.
169 | 	goto end
170 | )
171 | 
172 | if "%1" == "linkcheck" (
173 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
174 | 	if errorlevel 1 exit /b 1
175 | 	echo.
176 | 	echo.Link check complete; look for any errors in the above output ^
177 | or in %BUILDDIR%/linkcheck/output.txt.
178 | 	goto end
179 | )
180 | 
181 | if "%1" == "doctest" (
182 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
183 | 	if errorlevel 1 exit /b 1
184 | 	echo.
185 | 	echo.Testing of doctests in the sources finished, look at the ^
186 | results in %BUILDDIR%/doctest/output.txt.
187 | 	goto end
188 | )
189 | 
190 | :end
191 | 


--------------------------------------------------------------------------------
/models/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timsainb/vocalization-segmentation/8bc85ee9bb644cc5928535959faee5e5b184dd36/models/.gitkeep


--------------------------------------------------------------------------------
/notebooks/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timsainb/vocalization-segmentation/8bc85ee9bb644cc5928535959faee5e5b184dd36/notebooks/.gitkeep


--------------------------------------------------------------------------------
/references/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timsainb/vocalization-segmentation/8bc85ee9bb644cc5928535959faee5e5b184dd36/references/.gitkeep


--------------------------------------------------------------------------------
/reports/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timsainb/vocalization-segmentation/8bc85ee9bb644cc5928535959faee5e5b184dd36/reports/.gitkeep


--------------------------------------------------------------------------------
/reports/figures/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timsainb/vocalization-segmentation/8bc85ee9bb644cc5928535959faee5e5b184dd36/reports/figures/.gitkeep


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # local package
 2 | -e .
 3 | 
 4 | matplotlib
 5 | librosa
 6 | numpy
 7 | scipy
 8 | tqdm
 9 | seaborn
10 | 
11 | 
12 | # for testing
13 | coverage
14 | pytest>=3.6
15 | pytest-ordering
16 | pytest-cov
17 | python-coveralls


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages, setup
 2 | 
 3 | setup(
 4 |     name="vocalseg",
 5 |     packages=find_packages(),
 6 |     version="0.1.0",
 7 |     description="Simple algorithms for segmenting vocalizations without supervision",
 8 |     author="Tim Sainburg",
 9 |     license="MIT",
10 | )
11 | 


--------------------------------------------------------------------------------
/test_environment.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | REQUIRED_PYTHON = "python3"
 4 | 
 5 | 
 6 | def main():
 7 |     system_major = sys.version_info.major
 8 |     if REQUIRED_PYTHON == "python":
 9 |         required_major = 2
10 |     elif REQUIRED_PYTHON == "python3":
11 |         required_major = 3
12 |     else:
13 |         raise ValueError("Unrecognized python interpreter: {}".format(
14 |             REQUIRED_PYTHON))
15 | 
16 |     if system_major != required_major:
17 |         raise TypeError(
18 |             "This project requires Python {}. Found: Python {}".format(
19 |                 required_major, sys.version))
20 |     else:
21 |         print(">>> Development environment passes all tests!")
22 | 
23 | 
24 | if __name__ == '__main__':
25 |     main()
26 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 79
3 | max-complexity = 10
4 | 


--------------------------------------------------------------------------------
/vocalseg/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timsainb/vocalization-segmentation/8bc85ee9bb644cc5928535959faee5e5b184dd36/vocalseg/__init__.py


--------------------------------------------------------------------------------
/vocalseg/continuity_filtering.py:
--------------------------------------------------------------------------------
  1 | from tqdm.autonotebook import tqdm
  2 | from vocalseg.utils import _normalize, spectrogram, norm, plot_spec
  3 | from vocalseg.dynamic_thresholding import dynamic_threshold_segmentation
  4 | import numpy as np
  5 | from scipy import ndimage, signal
  6 | import matplotlib.pyplot as plt
  7 | import matplotlib
  8 | import seaborn as sns
  9 | from matplotlib.colors import LinearSegmentedColormap
 10 | 
 11 | cmap = matplotlib.colors.ListedColormap(np.random.rand(256, 3))
 12 | cmap.set_bad(color=(0, 0, 0, 0))
 13 | 
 14 | 
 15 | def continuity_segmentation(
 16 |     vocalization,
 17 |     rate,
 18 |     min_level_db=-80,
 19 |     min_level_db_floor=-40,
 20 |     db_delta=5,
 21 |     n_fft=1024,
 22 |     hop_length_ms=1,
 23 |     win_length_ms=5,
 24 |     ref_level_db=20,
 25 |     pre=0.97,
 26 |     spectral_range=None,
 27 |     verbose=False,
 28 |     mask_thresh_std=1,
 29 |     neighborhood_time_ms=5,
 30 |     neighborhood_freq_hz=500,
 31 |     neighborhood_thresh=0.5,
 32 |     min_syllable_length_s=0.1,
 33 |     min_silence_for_spec=0.1,
 34 |     silence_threshold=0.05,
 35 |     max_vocal_for_spec=1.0,
 36 |     temporal_neighbor_merge_distance_ms=0.0,
 37 |     overlapping_element_merge_thresh=np.inf,
 38 |     min_element_size_ms_hz=[0, 0],  # ms, hz
 39 |     figsize=(20, 5),
 40 | ):
 41 |     """
 42 |     segments song into continuous elements
 43 | 
 44 |     Arguments:
 45 |         vocalization {[type]} -- waveform of song
 46 |         rate {[type]} -- samplerate of datas
 47 | 
 48 |     Keyword Arguments:
 49 |         min_level_db {int} -- default dB minimum of spectrogram (threshold anything below) (default: {-80})
 50 |         min_level_db_floor {int} -- highest number min_level_db is allowed to reach dynamically (default: {-40})
 51 |         db_delta {int} -- delta in setting min_level_db (default: {5})
 52 |         n_fft {int} -- FFT window size (default: {1024})
 53 |         hop_length_ms {int} -- number audio of frames in ms between STFT columns (default: {1})
 54 |         win_length_ms {int} -- size of fft window (ms) (default: {5})
 55 |         ref_level_db {int} -- reference level dB of audio (default: {20})
 56 |         pre {float} -- coefficient for preemphasis filter (default: {0.97})
 57 |         spectral_range {[type]} -- spectral range to care about for spectrogram (default: {None})
 58 |         verbose {bool} -- display output (default: {False})
 59 |         mask_thresh_std {int} -- standard deviations above median to threshold out noise (higher = threshold more noise) (default: {1})
 60 |         neighborhood_time_ms {int} -- size in time of neighborhood-continuity filter (default: {5})
 61 |         neighborhood_freq_hz {int} -- size in Hz of neighborhood-continuity filter (default: {500})
 62 |         neighborhood_thresh {float} -- threshold number of neighborhood time-frequency bins above 0 to consider a bin not noise (default: {0.5})
 63 |         min_syllable_length_s {float} -- shortest expected length of syllable (default: {0.1})
 64 |         min_silence_for_spec {float} -- shortest expected length of silence in a song (used to set dynamic threshold) (default: {0.1})
 65 |         silence_threshold {float} -- threshold for spectrogram to consider noise as silence (default: {0.05})
 66 |         max_vocal_for_spec {float} -- longest expected vocalization in seconds  (default: {1.0})
 67 |         temporal_neighbor_merge_distance_ms {float} -- longest distance at which two elements should be considered one (default: {0.0})
 68 |         overlapping_element_merge_thresh {float} -- proportion of temporal overlap to consider two elements one (default: {np.inf})
 69 |         min_element_size_ms_hz {list} --  smallest expected element size (in ms and HZ). Everything smaller is removed. (default: {[0, 0]})
 70 |         figsize {tuple} -- size of figure for displaying output (default: {(20, 5)})
 71 | 
 72 |     Returns:
 73 |         results -- a dictionary with results of segmentation
 74 |     """
 75 | 
 76 |     def plot_interim(spec, cmap=plt.cm.afmhot, zero_nan=False):
 77 |         fig, ax = plt.subplots(figsize=figsize)
 78 |         if zero_nan:
 79 |             spec = spec.copy()
 80 |             spec[spec == 0] = np.nan
 81 |         plot_spec(
 82 |             spec,
 83 |             fig=fig,
 84 |             ax=ax,
 85 |             rate=rate,
 86 |             hop_len_ms=hop_length_ms,
 87 |             show_cbar=False,
 88 |             cmap=cmap,
 89 |         )
 90 |         plt.show()
 91 | 
 92 |     results = dynamic_threshold_segmentation(
 93 |         vocalization,
 94 |         rate,
 95 |         n_fft=n_fft,
 96 |         hop_length_ms=hop_length_ms,
 97 |         win_length_ms=win_length_ms,
 98 |         ref_level_db=ref_level_db,
 99 |         pre=pre,
100 |         min_level_db=min_level_db,
101 |         db_delta=db_delta,
102 |         silence_threshold=silence_threshold,
103 |         verbose=verbose,
104 |         spectral_range=spectral_range,
105 |         min_syllable_length_s=min_syllable_length_s,
106 |         min_silence_for_spec=min_silence_for_spec,
107 |         max_vocal_for_spec=max_vocal_for_spec,
108 |     )
109 |     if results is None:
110 |         return None
111 | 
112 |     spec = results["spec"]
113 | 
114 |     # bin width in Hz
115 |     if spectral_range is None:
116 |         spec_bin_hz = (rate / 2) / np.shape(spec)[0]
117 |     else:
118 |         spec_bin_hz = (spectral_range[1] - spectral_range[0]) / np.shape(spec)[0]
119 | 
120 |     if verbose:
121 |         plot_interim(spec, cmap=plt.cm.Greys)
122 | 
123 |     ### create a mask
124 |     mask = mask_spectrogram(spec, mask_thresh_std)
125 | 
126 |     if verbose:
127 |         plot_interim(mask)
128 | 
129 |     # Create a smoothing filter for the mask in time and frequency
130 |     continuity_filter = make_continuity_filter(
131 |         neighborhood_freq_hz, neighborhood_time_ms, spec_bin_hz, hop_length_ms
132 |     )
133 |     print(np.shape(continuity_filter))
134 |     ### remove non-continuous regions of the mask
135 |     # apply filter
136 |     mask = signal.fftconvolve(
137 |         (1 - mask.astype("float32")), continuity_filter, mode="same"
138 |     )
139 |     # threshold filter
140 |     mask = mask < neighborhood_thresh
141 | 
142 |     if verbose:
143 |         plot_interim(mask)
144 | 
145 |     # find continous elements
146 |     elements = segment_mask(mask)
147 | 
148 |     if verbose:
149 |         plot_interim(elements, cmap=cmap, zero_nan=True)
150 | 
151 |     # get element timing
152 |     unique_elements, syllable_start_times, syllable_end_times = get_syllable_timing(
153 |         elements, hop_length_ms
154 |     )
155 |     print("unique elements: {}".format(len(unique_elements)))
156 |     # merge elements that are nearby to each other
157 |     if temporal_neighbor_merge_distance_ms > 0:
158 |         elements = merge_temporal_neighbors(
159 |             elements,
160 |             unique_elements,
161 |             syllable_start_times,
162 |             syllable_end_times,
163 |             temporal_neighbor_merge_distance_ms,
164 |         )
165 | 
166 |         if verbose:
167 |             plot_interim(elements, cmap=cmap, zero_nan=True)
168 |             unique_elements = np.unique(elements[elements != 0].astype(int))
169 |             print("unique elements: {}".format(len(unique_elements)))
170 | 
171 |     # no reason to merge overlapping if already merging neighbords
172 |     elif overlapping_element_merge_thresh <= 1.0:
173 |         # merge elements that are overlapping in time by some amount
174 |         elements = merge_overlapping_elements(
175 |             elements,
176 |             unique_elements,
177 |             syllable_start_times,
178 |             syllable_end_times,
179 |             overlapping_element_merge_thresh,
180 |         )
181 |         if verbose:
182 |             plot_interim(elements, cmap=cmap, zero_nan=True)
183 |             unique_elements = np.unique(elements[elements != 0].astype(int))
184 |             print("unique elements: {}".format(len(unique_elements)))
185 | 
186 |     # remove elements that are
187 |     if np.product(min_element_size_ms_hz) > 0:
188 |         min_element_size = int(
189 |             np.product(
190 |                 (
191 |                     min_element_size_ms_hz[0] / hop_length_ms,
192 |                     min_element_size_ms_hz[1] / spec_bin_hz,
193 |                 )
194 |             )
195 |         )
196 |         if min_element_size > 0:
197 |             elements = remove_small_elements(elements, min_element_size)
198 | 
199 |     # randomize label values since they are temporally/frequency continuous
200 |     # elements = randomize_labels(elements)
201 |     if verbose:
202 |         plot_interim(elements, cmap=cmap, zero_nan=True)
203 |         unique_elements = np.unique(elements[elements != 0].astype(int))
204 |         print("unique elements: {}".format(len(unique_elements)))
205 | 
206 |     results["elements"] = elements
207 | 
208 |     # get time in seconds for each element's start and stop
209 |     fft_rate = rate / int(hop_length_ms / 1000 * rate)
210 |     results["onsets"] = []
211 |     results["offsets"] = []
212 |     for element in np.unique(results["elements"])[1:]:
213 |         element_in_frame = np.sum(results["elements"] == element, axis=0) > 0
214 |         element_start, element_end = np.where(element_in_frame)[0][[0, -1]] / fft_rate
215 |         results["onsets"].append(element_start)
216 |         results["offsets"].append(element_end)
217 | 
218 |     return results
219 | 
220 | 
221 | def remove_small_elements(elements, min_element_size):
222 |     """ remove elements that are below some threshold size
223 |     """
224 |     # get unique points
225 |     unique_elements = np.unique(elements[elements != 0].astype(int))
226 | 
227 |     print(min_element_size)
228 |     for element in unique_elements:
229 |         # if the size of the cluster is smaller than the minimum, remove it
230 |         if np.sum(elements == element) < min_element_size:
231 |             elements[elements == element] = 0
232 | 
233 |     return elements
234 | 
235 | 
236 | def merge_temporal_neighbors(
237 |     elements,
238 |     unique_elements,
239 |     syllable_start_times,
240 |     syllable_end_times,
241 |     temporal_neighbor_merge_distance_ms,
242 | ):
243 |     """
244 |     merge elements that are within temporal_neighbor_merge_distance_ms
245 |      ms of each other
246 |     
247 |     Arguments:
248 |         elements {[type]} -- [description]
249 |         unique_elements {[type]} -- [description]
250 |         syllable_start_times {[type]} -- [description]
251 |         syllable_end_times {[type]} -- [description]
252 |         temporal_neighbor_merge_distance_ms {[type]} -- [description]
253 |     
254 |     Returns:
255 |         [type] -- [description]
256 |     """
257 |     merged_elements = {}
258 |     for element, st, et in tqdm(
259 |         zip(unique_elements, syllable_start_times, syllable_end_times),
260 |         total=len(unique_elements),
261 |         desc="merging temporal neighbors",
262 |         leave=False,
263 |     ):
264 |         # if this element has already been merged, ignore it
265 |         if element in merged_elements.keys():
266 |             element = merged_elements[element]
267 |         # get elements that start between the beginning of this element and the
268 |         #    end of this element plus temporal_neighbor_merge_distance_ms
269 |         overlapping_syllables = np.where(
270 |             (syllable_start_times > st)
271 |             & (syllable_start_times < et + (temporal_neighbor_merge_distance_ms))
272 |         )[0]
273 |         # print(overlapping_syllables)
274 |         if len(overlapping_syllables) > 0:
275 |             for overlapping_syllable in overlapping_syllables:
276 |                 syll_name = unique_elements[overlapping_syllable]
277 |                 merged_elements[syll_name] = element
278 |                 elements[elements == syll_name] = element
279 |             # remove from lists
280 |             unique_elements = np.delete(unique_elements, overlapping_syllables)
281 |             syllable_start_times = np.delete(
282 |                 syllable_start_times, overlapping_syllables
283 |             )
284 |             syllable_end_times = np.delete(syllable_end_times, overlapping_syllables)
285 | 
286 |     return elements
287 | 
288 | 
289 | def merge_overlapping_elements(
290 |     elements,
291 |     unique_elements,
292 |     syllable_start_times,
293 |     syllable_end_times,
294 |     overlapping_element_merge_thresh,
295 | ):
296 |     """
297 |     merge elements that are overlapping by at least overlapping_element_merge_thresh
298 |     
299 |     Arguments:
300 |         elements {[type]} -- [description]
301 |         unique_elements {[type]} -- [description]
302 |         syllable_start_times {[type]} -- [description]
303 |         syllable_end_times {[type]} -- [description]
304 |         overlapping_element_merge_thresh {[type]} -- [description]
305 |     
306 |     Returns:
307 |         [type] -- [description]
308 |     """
309 |     # sort syllables by length
310 |     sort_mask = np.argsort(syllable_end_times - syllable_start_times)
311 |     syllable_end_times = syllable_end_times[sort_mask]
312 |     syllable_start_times = syllable_start_times[sort_mask]
313 |     unique_elements = unique_elements[sort_mask]
314 | 
315 |     # loop through elements
316 |     for element, st, et in tqdm(
317 |         zip(unique_elements, syllable_start_times, syllable_end_times),
318 |         total=len(unique_elements),
319 |         desc="merging temporally overlapping elements",
320 |         leave=False,
321 |     ):
322 |         # elements have to be overlapped at least this length to merge
323 |         overlap_thresh = (et - st) * overlapping_element_merge_thresh
324 | 
325 |         # get elements that
326 |         # # c1: start befre et - overlap_thresh and end after et,
327 |         #   c2: start before st and end after st + overlap_thresh
328 |         #   c3: or start after st and before et and are longer than overlap_thresh
329 |         #   c4: fully overlap syllable
330 |         c1 = (syllable_start_times < (et - overlap_thresh)) & (syllable_end_times > et)
331 |         c2 = (syllable_start_times < (st)) & (
332 |             syllable_end_times > (st + overlap_thresh)
333 |         )
334 |         c3 = ((syllable_start_times > (st)) & (syllable_end_times < et)) & (
335 |             (syllable_end_times - syllable_start_times) > overlap_thresh
336 |         )
337 |         c4 = (syllable_start_times < st) & (syllable_end_times > et)
338 | 
339 |         # get list of overlapping elements
340 |         overlapping_syllables = np.where(c1 | c2 | c3 | c4)[0]
341 | 
342 |         # print(overlapping_syllables)
343 |         if len(overlapping_syllables) > 0:
344 |             # get the longest syllable
345 |             overlapping_syllable = overlapping_syllables[-1]
346 |             syll_name = unique_elements[overlapping_syllable]
347 | 
348 |             # change all elements to that element
349 |             elements[elements == element] = syll_name
350 |             # remove from lists
351 |             el = np.where(unique_elements == element)[-1]
352 |             unique_elements = np.delete(unique_elements, el)
353 |             syllable_start_times = np.delete(syllable_start_times, el)
354 |             syllable_end_times = np.delete(syllable_end_times, el)
355 | 
356 |     return elements
357 | 
358 | 
359 | def randomize_labels(elements):
360 |     unique_elements = np.unique(elements[elements != 0].astype(int))
361 |     perm = np.random.permutation(unique_elements)
362 |     el_dict = {i: j for i, j in zip(unique_elements, perm)}
363 |     for el, val in el_dict.items():
364 |         elements[elements == el] = val
365 |     return elements
366 | 
367 | 
368 | def mask_spectrogram(spec, mask_thresh_std):
369 |     """
370 |     masks low power noise in a spectrogram
371 |     
372 |     Arguments:
373 |         spec {[type]} -- [description]
374 |         mask_thresh_std {[type]} -- [description]
375 |     
376 |     Returns:
377 |         [type] -- [description]
378 |     """
379 |     return (
380 |         (
381 |             spec.T
382 |             < (np.median(spec, axis=1) + mask_thresh_std * np.std(spec, axis=1)) + 1e-5
383 |         )
384 |         .astype("float32")
385 |         .T
386 |     )
387 | 
388 | 
389 | def make_continuity_filter(
390 |     neighborhood_freq_hz, neighborhood_time_ms, spec_bin_hz, hop_length_ms
391 | ):
392 |     """
393 |      Generate a filter for continuous elements
394 |     
395 |     Arguments:
396 |         neighborhood_freq_hz {[type]} -- [description]
397 |         neighborhood_time_ms {[type]} -- [description]
398 |         spec_bin_hz {[type]} -- [description]
399 |         hop_length_ms {[type]} -- [description]
400 |     
401 |     Returns:
402 |         [type] -- [description]
403 |     """
404 |     n_bin_freq = int(neighborhood_freq_hz / spec_bin_hz)
405 |     n_bin_time = int(neighborhood_time_ms / hop_length_ms)
406 |     return np.ones((n_bin_freq, n_bin_time)) / np.product((n_bin_freq, n_bin_time))
407 | 
408 | 
409 | def segment_mask(mask):
410 |     """
411 |     segments a binary spectrogram mask into individual elements
412 |     
413 |     Arguments:
414 |         mask {[type]} -- [description]
415 |     
416 |     Returns:
417 |         [type] -- [description]
418 |     """
419 |     elements, _ = ndimage.label(mask == False)
420 |     elements = np.ma.masked_where(elements == 0, elements)
421 |     elements = np.array(elements.data).astype("float32")
422 |     return elements
423 | 
424 | 
425 | def get_syllable_timing(elements, hop_length_ms):
426 |     """
427 |     gets length of elements of each mask type
428 |     
429 |     Arguments:
430 |         elements {[type]} -- [description]
431 |         hop_length_ms {[type]} -- [description]
432 |     
433 |     Returns:
434 |         [type] -- [description]
435 |     """
436 | 
437 |     # get unique points
438 |     unique_elements = np.unique(elements[elements != 0].astype(int))
439 | 
440 |     # get the time coverage of each element
441 |     total_coverage = [
442 |         np.sum(elements == i, axis=0)
443 |         for i in tqdm(unique_elements, desc="element coverage", leave=False)
444 |     ]
445 | 
446 |     # get the start and end times of each syllable
447 |     syllable_start_times, syllable_end_times = np.array(
448 |         [
449 |             np.where(i > 0)[0][np.array([0, -1])] + np.array([0.0, 1.0])
450 |             for i in tqdm(total_coverage, desc="element length", leave=False)
451 |         ]
452 |     ).T * float(hop_length_ms)
453 | 
454 |     sort_mask = np.argsort(syllable_start_times)
455 |     syllable_start_times = syllable_start_times[sort_mask]
456 |     syllable_end_times = syllable_end_times[sort_mask]
457 |     unique_elements = unique_elements[sort_mask]
458 | 
459 |     return unique_elements, syllable_start_times, syllable_end_times
460 | 
461 | 
462 | def plot_labelled_elements(elements, spec, background="white", figsize=(30, 5)):
463 |     """ plots a spectrogram with colormap labels
464 |     """
465 |     unique_elements = np.unique(elements[elements != 0].astype(int))
466 |     pal = np.random.permutation(
467 |         sns.color_palette("rainbow", n_colors=len(unique_elements))
468 |     )
469 | 
470 |     new_spec = np.zeros(list(np.shape(elements)) + [4])
471 |     # fill spectrogram with colored regions
472 |     for el, pi in tqdm(
473 |         zip(unique_elements, pal), total=len(unique_elements), leave=False
474 |     ):
475 | 
476 |         if background == "black":
477 | 
478 |             cdict = {
479 |                 "red": [(0, pi[0], pi[0]), (1, 1, 1)],
480 |                 "green": [(0, pi[1], pi[1]), (1, 1, 1)],
481 |                 "blue": [(0, pi[2], pi[2]), (1, 1, 1)],
482 |                 "alpha": [(0, 0, 0), (0.25, 0.5, 0.5), (1, 1, 1)],
483 |             }
484 |         else:
485 |             cdict = {
486 |                 "red": [(0, pi[0], pi[0]), (1, 0, 0)],
487 |                 "green": [(0, pi[1], pi[1]), (1, 0, 0)],
488 |                 "blue": [(0, pi[2], pi[2]), (1, 0, 0)],
489 |                 "alpha": [(0, 0, 0), (1, 1, 1)],
490 |             }
491 |         cmap = LinearSegmentedColormap("CustomMap", cdict)
492 | 
493 |         new_spec[elements == el] = cmap(spec[elements == el])
494 | 
495 |     fig, ax = plt.subplots(figsize=figsize)
496 |     ax.set_facecolor(background)
497 |     ax.imshow(new_spec, interpolation=None, aspect="auto", origin="lower")
498 | 
499 |     return new_spec
500 | 


--------------------------------------------------------------------------------
/vocalseg/dynamic_thresholding.py:
--------------------------------------------------------------------------------
  1 | from tqdm import tqdm
  2 | from vocalseg.utils import _normalize, spectrogram_nn, norm
  3 | import numpy as np
  4 | from scipy import ndimage
  5 | from matplotlib.patches import Rectangle
  6 | from matplotlib.collections import PatchCollection
  7 | from matplotlib import gridspec
  8 | from vocalseg.utils import plot_spec
  9 | 
 10 | 
 11 | def contiguous_regions(condition):
 12 |     """
 13 |     Compute contiguous region of binary value (e.g. silence in waveform) to 
 14 |         ensure noise levels are sufficiently low
 15 |     
 16 |     Arguments:
 17 |         condition {[type]} -- [description]
 18 |     
 19 |     Returns:
 20 |         [type] -- [description]
 21 |     """
 22 |     idx = []
 23 |     i = 0
 24 |     while i < len(condition):
 25 |         x1 = i + condition[i:].argmax()
 26 |         try:
 27 |             x2 = x1 + condition[x1:].argmin()
 28 |         except:
 29 |             x2 = x1 + 1
 30 |         if x1 == x2:
 31 |             if condition[x1] == True:
 32 |                 x2 = len(condition)
 33 |             else:
 34 |                 break
 35 |         idx.append([x1, x2])
 36 |         i = x2
 37 |     return idx
 38 | 
 39 | 
 40 | def dynamic_threshold_segmentation(
 41 |     vocalization,
 42 |     rate,
 43 |     min_level_db=-80,
 44 |     min_level_db_floor=-40,
 45 |     db_delta=5,
 46 |     n_fft=1024,
 47 |     hop_length_ms=1,
 48 |     win_length_ms=5,
 49 |     ref_level_db=20,
 50 |     pre=0.97,
 51 |     silence_threshold=0.05,
 52 |     min_silence_for_spec=0.1,
 53 |     max_vocal_for_spec=1.0,
 54 |     min_syllable_length_s=0.1,
 55 |     spectral_range=None,
 56 |     verbose=False,
 57 | ):
 58 |     """
 59 |     computes a spectrogram from a waveform by iterating through thresholds
 60 |          to ensure a consistent noise level
 61 |     
 62 |     Arguments:
 63 |         vocalization {[type]} -- waveform of song
 64 |         rate {[type]} -- samplerate of datas
 65 |     
 66 |     Keyword Arguments:
 67 |         min_level_db {int} -- default dB minimum of spectrogram (threshold anything below) (default: {-80})
 68 |         min_level_db_floor {int} -- highest number min_level_db is allowed to reach dynamically (default: {-40})
 69 |         db_delta {int} -- delta in setting min_level_db (default: {5})
 70 |         n_fft {int} -- FFT window size (default: {1024})
 71 |         hop_length_ms {int} -- number audio of frames in ms between STFT columns (default: {1})
 72 |         win_length_ms {int} -- size of fft window (ms) (default: {5})
 73 |         ref_level_db {int} -- reference level dB of audio (default: {20})
 74 |         pre {float} -- coefficient for preemphasis filter (default: {0.97})
 75 |         min_syllable_length_s {float} -- shortest expected length of syllable (default: {0.1})
 76 |         min_silence_for_spec {float} -- shortest expected length of silence in a song (used to set dynamic threshold) (default: {0.1})
 77 |         silence_threshold {float} -- threshold for spectrogram to consider noise as silence (default: {0.05})
 78 |         max_vocal_for_spec {float} -- longest expected vocalization in seconds  (default: {1.0})
 79 |         spectral_range {[type]} -- spectral range to care about for spectrogram (default: {None})
 80 |         verbose {bool} -- display output (default: {False})
 81 |     
 82 |     
 83 |     Returns:
 84 |         [results] -- [dictionary of results]
 85 |     """
 86 | 
 87 |     # does the envelope meet the standards necessary to consider this a bout
 88 |     envelope_is_good = False
 89 | 
 90 |     # make a copy of the hyperparameters
 91 | 
 92 |     # make a copy of the original spectrogram
 93 |     spec_orig = spectrogram_nn(
 94 |         vocalization,
 95 |         rate,
 96 |         n_fft=n_fft,
 97 |         hop_length_ms=hop_length_ms,
 98 |         win_length_ms=win_length_ms,
 99 |         ref_level_db=ref_level_db,
100 |         pre=pre,
101 |     )
102 |     # fft_rate = 1000 / hop_length_ms
103 |     fft_rate = rate / int(hop_length_ms / 1000 * rate)
104 | 
105 |     if spectral_range is not None:
106 |         spec_bin_hz = (rate / 2) / np.shape(spec_orig)[0]
107 |         spec_orig = spec_orig[
108 |             int(spectral_range[0] / spec_bin_hz) : int(spectral_range[1] / spec_bin_hz),
109 |             :,
110 |         ]
111 | 
112 |     # loop through possible thresholding configurations starting at the highest
113 |     for _, mldb in enumerate(
114 |         tqdm(
115 |             np.arange(min_level_db, min_level_db_floor, db_delta),
116 |             leave=False,
117 |             disable=(not verbose),
118 |         )
119 |     ):
120 |         # set the minimum dB threshold
121 |         min_level_db = mldb
122 |         # normalize the spectrogram
123 |         spec = norm(_normalize(spec_orig, min_level_db=min_level_db))
124 | 
125 |         # subtract the median
126 |         spec = spec - np.median(spec, axis=1).reshape((len(spec), 1))
127 |         spec[spec < 0] = 0
128 | 
129 |         # get the vocal envelope
130 |         vocal_envelope = np.max(spec, axis=0) * np.sqrt(np.mean(spec, axis=0))
131 |         # normalize envelope
132 |         vocal_envelope = vocal_envelope / np.max(vocal_envelope)
133 | 
134 |         # Look at how much silence exists in the signal
135 |         onsets, offsets = onsets_offsets(vocal_envelope > silence_threshold) / fft_rate
136 |         onsets_sil, offsets_sil = (
137 |             onsets_offsets(vocal_envelope <= silence_threshold) / fft_rate
138 |         )
139 | 
140 |         # if there is a silence of at least min_silence_for_spec length,
141 |         #  and a vocalization of no greater than max_vocal_for_spec length, the env is good
142 |         if len(onsets_sil) > 0:
143 |             # frames per second of spectrogram
144 | 
145 |             # longest silences and periods of vocalization
146 |             max_silence_len = np.max(offsets_sil - onsets_sil)
147 |             max_vocalization_len = np.max(offsets - onsets)
148 |             if verbose:
149 |                 print("longest silence", max_silence_len)
150 |                 print("longest vocalization", max_vocalization_len)
151 | 
152 |             if max_silence_len > min_silence_for_spec:
153 |                 if max_vocalization_len < max_vocal_for_spec:
154 |                     envelope_is_good = True
155 |                     break
156 |         if verbose:
157 |             print("Current min_level_db: {}".format(min_level_db))
158 | 
159 |     if not envelope_is_good:
160 |         return None
161 | 
162 |     onsets, offsets = onsets_offsets(vocal_envelope > silence_threshold) / fft_rate
163 | 
164 |     # threshold out short syllables
165 |     length_mask = (offsets - onsets) >= min_syllable_length_s
166 | 
167 |     return {
168 |         "spec": spec,
169 |         "vocal_envelope": vocal_envelope.astype("float32"),
170 |         "min_level_db": min_level_db,
171 |         "onsets": onsets[length_mask],
172 |         "offsets": offsets[length_mask],
173 |     }
174 | 
175 | 
176 | def onsets_offsets(signal):
177 |     """
178 |     [summary]
179 |     
180 |     Arguments:
181 |         signal {[type]} -- [description]
182 |     
183 |     Returns:
184 |         [type] -- [description]
185 |     """
186 |     elements, nelements = ndimage.label(signal)
187 |     if nelements == 0:
188 |         return np.array([[0], [0]])
189 |     onsets, offsets = np.array(
190 |         [
191 |             np.where(elements == element)[0][np.array([0, -1])] + np.array([0, 1])
192 |             for element in np.unique(elements)
193 |             if element != 0
194 |         ]
195 |     ).T
196 |     return np.array([onsets, offsets])
197 | 
198 | 
199 | import seaborn as sns
200 | from matplotlib.colors import LinearSegmentedColormap
201 | import matplotlib.pyplot as plt
202 | 
203 | 
204 | def plot_segmented_spec(
205 |     spec, onsets, offsets, hop_length_ms, background="black", figsize=(30, 5)
206 | ):
207 |     """ plot spectrogram with colormap labels
208 |     """
209 |     pal = np.random.permutation(sns.color_palette("hsv", n_colors=len(onsets)))
210 |     fft_rate = 1000 / hop_length_ms
211 |     new_spec = np.zeros(list(np.shape(spec)) + [4])
212 |     for onset, offset, pi in zip(onsets, offsets, pal):
213 |         if background == "black":
214 |             cdict = {
215 |                 "red": [(0, pi[0], pi[0]), (1, 1, 1)],
216 |                 "green": [(0, pi[1], pi[1]), (1, 1, 1)],
217 |                 "blue": [(0, pi[2], pi[2]), (1, 1, 1)],
218 |                 "alpha": [(0, 0, 0), (0.25, 0.5, 0.5), (1, 1, 1)],
219 |             }
220 |         else:
221 |             cdict = {
222 |                 "red": [(0, pi[0], pi[0]), (1, 0, 0)],
223 |                 "green": [(0, pi[1], pi[1]), (1, 0, 0)],
224 |                 "blue": [(0, pi[2], pi[2]), (1, 0, 0)],
225 |                 "alpha": [(0, 0, 0), (1, 1, 1)],
226 |             }
227 | 
228 |         cmap = LinearSegmentedColormap("CustomMap", cdict)
229 | 
230 |         start_frame = int(onset * fft_rate)
231 |         stop_frame = int(offset * fft_rate)
232 |         new_spec[:, start_frame:stop_frame, :] = cmap(spec[:, start_frame:stop_frame])
233 | 
234 |     fig, ax = plt.subplots(figsize=figsize)
235 |     ax.set_facecolor(background)
236 |     ax.imshow(new_spec, interpolation=None, aspect="auto", origin="lower")
237 | 
238 | 
239 | def plot_segmentations(
240 |     spec, vocal_envelope, onsets, offsets, hop_length_ms, rate, figsize=(30, 5)
241 | ):
242 |     fig = plt.figure(figsize=figsize)
243 |     gs = gridspec.GridSpec(2, 1, height_ratios=[1, 3])
244 |     gs.update(hspace=0.0)  # set the spacing between axes.
245 |     ax0 = plt.subplot(gs[0])
246 |     ax1 = plt.subplot(gs[1])
247 |     plot_spec(spec, fig, ax1, rate=rate, hop_len_ms=hop_length_ms, show_cbar=False)
248 |     ax0.plot(vocal_envelope, color="k")
249 |     ax0.set_xlim([0, len(vocal_envelope)])
250 |     ax1.xaxis.tick_bottom()
251 |     ylmin, ylmax = ax1.get_ylim()
252 |     ysize = (ylmax - ylmin) * 0.1
253 |     ymin = ylmax - ysize
254 | 
255 |     patches = []
256 |     for onset, offset in zip(onsets, offsets):
257 |         ax1.axvline(onset, color="#FFFFFF", ls="dashed", lw=0.75)
258 |         ax1.axvline(offset, color="#FFFFFF", ls="dashed", lw=0.75)
259 |         patches.append(Rectangle(xy=(onset, ymin), width=offset - onset, height=ysize))
260 | 
261 |     collection = PatchCollection(patches, color="white", alpha=0.5)
262 |     ax1.add_collection(collection)
263 |     ax0.axis("off")
264 |     return fig
265 | 
266 | 


--------------------------------------------------------------------------------
/vocalseg/examples/__init__.py:
--------------------------------------------------------------------------------
 1 | from scipy.io import wavfile
 2 | import os
 3 | 
 4 | FP = os.path.dirname(os.path.abspath(__file__))
 5 | 
 6 | 
 7 | def starling():
 8 |     rate, data = wavfile.read(os.path.join(FP, "starling.wav"))
 9 |     return rate, data
10 | 
11 | 
12 | def mouse():
13 |     rate, data = wavfile.read(os.path.join(FP, "mouse_usv.wav"))
14 |     return rate, data
15 | 
16 | 
17 | def canary():
18 |     rate, data = wavfile.read(os.path.join(FP, "canary.wav"))
19 |     return rate, data
20 | 
21 | 
22 | def bengalese_finch():
23 |     rate, data = wavfile.read(os.path.join(FP, "bengalese_finch.wav"))
24 |     return rate, data
25 | 
26 | 
27 | def mocking():
28 |     rate, data = wavfile.read(os.path.join(FP, "mocking.wav"))
29 |     return rate, data
30 | 


--------------------------------------------------------------------------------
/vocalseg/examples/bengalese_finch.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timsainb/vocalization-segmentation/8bc85ee9bb644cc5928535959faee5e5b184dd36/vocalseg/examples/bengalese_finch.wav


--------------------------------------------------------------------------------
/vocalseg/examples/canary.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timsainb/vocalization-segmentation/8bc85ee9bb644cc5928535959faee5e5b184dd36/vocalseg/examples/canary.wav


--------------------------------------------------------------------------------
/vocalseg/examples/mocking.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timsainb/vocalization-segmentation/8bc85ee9bb644cc5928535959faee5e5b184dd36/vocalseg/examples/mocking.wav


--------------------------------------------------------------------------------
/vocalseg/examples/mouse_usv.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timsainb/vocalization-segmentation/8bc85ee9bb644cc5928535959faee5e5b184dd36/vocalseg/examples/mouse_usv.wav


--------------------------------------------------------------------------------
/vocalseg/examples/starling.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timsainb/vocalization-segmentation/8bc85ee9bb644cc5928535959faee5e5b184dd36/vocalseg/examples/starling.wav


--------------------------------------------------------------------------------
/vocalseg/utils.py:
--------------------------------------------------------------------------------
  1 | from scipy.signal import butter, lfilter
  2 | import numpy as np
  3 | import librosa
  4 | from scipy import signal
  5 | import warnings
  6 | import matplotlib.pyplot as plt
  7 | 
  8 | ### General
  9 | 
 10 | 
 11 | def int16tofloat32(data):
 12 |     return np.array(data / 32768).astype("float32")
 13 | 
 14 | 
 15 | def norm(x, _type="zero_one"):
 16 |     return (x - np.min(x)) / (np.max(x) - np.min(x))
 17 | 
 18 | 
 19 | ### Filtering
 20 | 
 21 | 
 22 | def butter_bandpass(lowcut, highcut, fs, order=5):
 23 |     nyq = 0.5 * fs
 24 |     low = lowcut / nyq
 25 |     high = highcut / nyq
 26 |     b, a = butter(order, [low, high], btype="band")
 27 |     return b, a
 28 | 
 29 | 
 30 | def butter_bandpass_filter(data, lowcut, highcut, fs, order=5):
 31 |     if highcut > int(fs / 2):
 32 |         warnings.warn("Highcut is too high for bandpass filter. Setting to nyquist")
 33 |         highcut = int(fs / 2)
 34 |     b, a = butter_bandpass(lowcut, highcut, fs, order=order)
 35 |     y = lfilter(b, a, data)
 36 |     return y
 37 | 
 38 | 
 39 | ### Spectrogramming
 40 | 
 41 | 
 42 | def spectrogram(
 43 |     y,
 44 |     fs,
 45 |     n_fft=1024,
 46 |     hop_length_ms=1,
 47 |     win_length_ms=5,
 48 |     ref_level_db=20,
 49 |     pre=0.97,
 50 |     min_level_db=-50,
 51 | ):
 52 |     return _normalize(
 53 |         spectrogram_nn(
 54 |             y,
 55 |             fs,
 56 |             n_fft=n_fft,
 57 |             hop_length_ms=hop_length_ms,
 58 |             win_length_ms=win_length_ms,
 59 |             ref_level_db=ref_level_db,
 60 |             pre=pre,
 61 |         ),
 62 |         min_level_db=min_level_db,
 63 |     )
 64 | 
 65 | 
 66 | def spectrogram_nn(y, fs, n_fft, hop_length_ms, win_length_ms, ref_level_db, pre):
 67 |     D = _stft(preemphasis(y, pre), fs, n_fft, hop_length_ms, win_length_ms)
 68 |     S = _amp_to_db(np.abs(D)) - ref_level_db
 69 |     return S
 70 | 
 71 | 
 72 | def preemphasis(x, pre):
 73 |     return signal.lfilter([1, -pre], [1], x)
 74 | 
 75 | 
 76 | def _stft(y, fs, n_fft, hop_length_ms, win_length_ms):
 77 |     return librosa.stft(
 78 |         y=y,
 79 |         n_fft=n_fft,
 80 |         hop_length=int(hop_length_ms / 1000 * fs),
 81 |         win_length=int(win_length_ms / 1000 * fs),
 82 |     )
 83 | 
 84 | 
 85 | def _amp_to_db(x):
 86 |     return 20 * np.log10(np.maximum(1e-5, x))
 87 | 
 88 | 
 89 | def _normalize(S, min_level_db):
 90 |     return np.clip((S - min_level_db) / -min_level_db, 0, 1)
 91 | 
 92 | 
 93 | ### viz
 94 | 
 95 | import matplotlib.pyplot as plt
 96 | 
 97 | 
 98 | def plot_spec(
 99 |     spec,
100 |     fig=None,
101 |     ax=None,
102 |     rate=None,
103 |     hop_len_ms=None,
104 |     cmap=plt.cm.afmhot,
105 |     show_cbar=True,
106 |     spectral_range=None,
107 |     time_range=None,
108 |     figsize=(20, 6),
109 | ):
110 |     """plot spectrogram
111 |     
112 |     [description]
113 |     
114 |     Arguments:
115 |         spec {[type]} -- [description]
116 |         fig {[type]} -- [description]
117 |         ax {[type]} -- [description]
118 |     
119 |     Keyword Arguments:
120 |         cmap {[type]} -- [description] (default: {plt.cm.afmhot})
121 |     """
122 |     if ax is None:
123 |         fig, ax = plt.subplots(figsize=figsize)
124 | 
125 |     extent = [0, np.shape(spec)[1], 0, np.shape(spec)[0]]
126 |     if rate is not None:
127 |         extent[3] = rate / 2
128 |     if hop_len_ms is not None:
129 |         # adjust for integeger
130 |         hop_len_ms_int_adj = int(hop_len_ms / 1000 * rate) / (rate / 1000)
131 |         extent[1] = (np.shape(spec)[1] * hop_len_ms_int_adj) / 1000
132 |     if spectral_range is not None:
133 |         extent[2] = spectral_range[0]
134 |         extent[3] = spectral_range[1]
135 |     if time_range is not None:
136 |         extent[0] = time_range[0]
137 |         extent[1] = time_range[1]
138 | 
139 |     spec_ax = ax.matshow(
140 |         spec,
141 |         interpolation=None,
142 |         aspect="auto",
143 |         cmap=cmap,
144 |         origin="lower",
145 |         extent=extent,
146 |     )
147 |     if show_cbar:
148 |         cbar = fig.colorbar(spec_ax, ax=ax)
149 |         return spec_ax, cbar
150 |     else:
151 |         return spec_ax
152 | 


--------------------------------------------------------------------------------
/vocalseg/vocalseg.code-workspace:
--------------------------------------------------------------------------------
1 | {
2 | 	"folders": [
3 | 		{
4 | 			"path": "/mnt/cube/tsainbur/Projects/github_repos/vocalization_segmentation"
5 | 		}
6 | 	]
7 | }


--------------------------------------------------------------------------------