├── vggish
    ├── __init__.py
    ├── vggish_params.py
    ├── vggish_input.py
    ├── vggish_postprocess.py
    ├── vggish_slim.py
    └── mel_features.py
├── requirements-docker.txt
├── requirements.txt
├── assets
    ├── uhu.png
    ├── seagull.png
    ├── blackbird.png
    ├── Typical_cnn.png
    ├── anser_anser.png
    ├── steinadler.png
    ├── angry_hissing.png
    ├── steinadler_50_50.png
    └── Typical_cnn_spectrogram.png
├── requirements-gpu.txt
├── input
    └── bird_id_map.pickle
├── Dockerfile.gpu
├── train_docker.sh
├── Dockerfile.cpu
├── audio_splitter.py
├── modelbuilder.py
├── .gitignore
├── README.md
├── code
    ├── train_LSTM.py
    └── vggish_train.py
└── LICENSE


/vggish/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements-docker.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | scipy
3 | resampy
4 | six
5 | sklearn
6 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | scipy
3 | resampy
4 | tensorflow
5 | six
6 | sklearn
7 | 


--------------------------------------------------------------------------------
/assets/uhu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gojibjib/jibjib-model/HEAD/assets/uhu.png


--------------------------------------------------------------------------------
/assets/seagull.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gojibjib/jibjib-model/HEAD/assets/seagull.png


--------------------------------------------------------------------------------
/assets/blackbird.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gojibjib/jibjib-model/HEAD/assets/blackbird.png


--------------------------------------------------------------------------------
/assets/Typical_cnn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gojibjib/jibjib-model/HEAD/assets/Typical_cnn.png


--------------------------------------------------------------------------------
/assets/anser_anser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gojibjib/jibjib-model/HEAD/assets/anser_anser.png


--------------------------------------------------------------------------------
/assets/steinadler.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gojibjib/jibjib-model/HEAD/assets/steinadler.png


--------------------------------------------------------------------------------
/requirements-gpu.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | scipy
3 | resampy
4 | tensorflow-gpu
5 | six
6 | sklearn
7 | cython
8 | 


--------------------------------------------------------------------------------
/assets/angry_hissing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gojibjib/jibjib-model/HEAD/assets/angry_hissing.png


--------------------------------------------------------------------------------
/input/bird_id_map.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gojibjib/jibjib-model/HEAD/input/bird_id_map.pickle


--------------------------------------------------------------------------------
/assets/steinadler_50_50.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gojibjib/jibjib-model/HEAD/assets/steinadler_50_50.png


--------------------------------------------------------------------------------
/assets/Typical_cnn_spectrogram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gojibjib/jibjib-model/HEAD/assets/Typical_cnn_spectrogram.png


--------------------------------------------------------------------------------
/Dockerfile.gpu:
--------------------------------------------------------------------------------
 1 | # FROM nvidia/cuda:9.0-runtime
 2 | FROM tensorflow/tensorflow:latest-gpu
 3 | 
 4 | WORKDIR /model
 5 | 
 6 | COPY vggish/ ./vggish
 7 | COPY requirements-docker.txt requirements.txt
 8 | 
 9 | RUN pip install -r requirements.txt
10 | 
11 | # Put your code at the end so rebuilds are faster
12 | COPY code/ ./code
13 | 
14 | VOLUME /model/input
15 | VOLUME /model/output
16 | 
17 | WORKDIR /model/code
18 | CMD ["python", "vggish_train.py"]
19 | 


--------------------------------------------------------------------------------
/train_docker.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # Start training via Docker, run this from the jibjib-model directory!
 3 | 
 4 | docker pull obitech/jibjib-model:latest-gpu
 5 | docker run --rm --name jibjib-model -d \
 6 |     -v $(pwd)/input:/model/input \
 7 |     -v $(pwd)/output:/model/output \
 8 |     --runtime=nvidia \
 9 |     obitech/jibjib-model:latest-gpu \
10 |     python vggish_train.py \
11 |     --model_version=1.1 \
12 |     --num_mini_batches=1000 \
13 |     --num_batches=101 \
14 |     --save_step=20


--------------------------------------------------------------------------------
/Dockerfile.cpu:
--------------------------------------------------------------------------------
 1 | # FROM nvidia/cuda:9.0-runtime
 2 | FROM tensorflow/tensorflow
 3 | 
 4 | WORKDIR /model
 5 | 
 6 | # Install Python
 7 | # RUN apt-get install -y python \
 8 | #         python-dev \
 9 | #         rsync \
10 | #         software-properties-common && \
11 | #         apt-get clean && \
12 | #         rm -rf /var/lib/apt/lists/*
13 | 
14 | # # Install pip
15 | # RUN curl -O https://bootstrap.pypa.io/get-pip.py && \
16 | #     python get-pip.py && \
17 | #     rm get-pip.py
18 | 
19 | COPY requirements-docker.txt ./
20 | COPY *.py ./
21 | COPY *.ckpt ./
22 | 
23 | RUN pip install -r requirements-docker.txt
24 | 
25 | VOLUME /model/input
26 | VOLUME /model/output
27 | 
28 | CMD ["python", "vggish_train.py"]
29 | 


--------------------------------------------------------------------------------
/audio_splitter.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # audio_splitter.py - Split .wav audio files into 9s parts
 3 | 
 4 | from pydub import AudioSegment
 5 | import os 
 6 | 
 7 | def splitter():
 8 | 	for x,subdirList, fileList in os.walk("./wav/"):
 9 | 		print (x)
10 | 		for filename in fileList:
11 | 			if filename.endswith(".wav"):
12 | 				path = str(x+filename)
13 | 				print(filename)
14 | 				sound = AudioSegment.from_wav(path)
15 | 				#over 30 seocnds
16 | 				if(len(sound)>15000):
17 | 					print(filename + " is: "+str(len(sound)))
18 | 					slices = sound[::9000]
19 | 					counter=1
20 | 					for element in slices:
21 | 						name, ext = os.path.splitext(filename)
22 | 						new_name = str(name+"_"+str(counter))
23 | 						element.export(str("./splitted/"+new_name+".wav"), format="wav")
24 | 						counter+=1
25 | 
26 | 
27 | 
28 | 
29 | print("starting")
30 | splitter()
31 | print("program executed")


--------------------------------------------------------------------------------
/vggish/vggish_params.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Global parameters for the VGGish model.
17 | 
18 | See vggish_slim.py for more information.
19 | """
20 | 
21 | # Architectural constants.
22 | NUM_FRAMES = 96  # Frames in input mel-spectrogram patch.
23 | NUM_BANDS = 64  # Frequency bands in input mel-spectrogram patch.
24 | EMBEDDING_SIZE = 128  # Size of embedding layer.
25 | 
26 | # Hyperparameters used in feature and example generation.
27 | SAMPLE_RATE = 16000
28 | STFT_WINDOW_LENGTH_SECONDS = 0.025
29 | STFT_HOP_LENGTH_SECONDS = 0.010
30 | NUM_MEL_BINS = NUM_BANDS
31 | MEL_MIN_HZ = 125
32 | MEL_MAX_HZ = 7500
33 | LOG_OFFSET = 0.01  # Offset used for stabilized log of input mel-spectrogram.
34 | EXAMPLE_WINDOW_SECONDS = 0.96  # Each example contains 96 10ms frames
35 | EXAMPLE_HOP_SECONDS = 0.96     # with zero overlap.
36 | 
37 | # Parameters used for embedding postprocessing.
38 | PCA_EIGEN_VECTORS_NAME = 'pca_eigen_vectors'
39 | PCA_MEANS_NAME = 'pca_means'
40 | QUANTIZE_MIN_VAL = -2.0
41 | QUANTIZE_MAX_VAL = +2.0
42 | 
43 | # Hyperparameters used in training.
44 | INIT_STDDEV = 0.01  # Standard deviation used to initialize weights.
45 | LEARNING_RATE = 1e-4  # Learning rate for the Adam optimizer.
46 | ADAM_EPSILON = 1e-8  # Epsilon for the Adam optimizer.
47 | 
48 | # Names of ops, tensors, and features.
49 | INPUT_OP_NAME = 'vggish/input_features'
50 | INPUT_TENSOR_NAME = INPUT_OP_NAME + ':0'
51 | OUTPUT_OP_NAME = 'vggish/embedding'
52 | OUTPUT_TENSOR_NAME = OUTPUT_OP_NAME + ':0'
53 | AUDIO_EMBEDDING_FEATURE_NAME = 'audio_embedding'
54 | 


--------------------------------------------------------------------------------
/vggish/vggish_input.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Compute input examples for VGGish from audio waveform."""
17 | 
18 | import numpy as np
19 | import resampy
20 | from scipy.io import wavfile
21 | 
22 | import mel_features
23 | import vggish_params
24 | 
25 | 
26 | def waveform_to_examples(data, sample_rate):
27 |   """Converts audio waveform into an array of examples for VGGish.
28 | 
29 |   Args:
30 |     data: np.array of either one dimension (mono) or two dimensions
31 |       (multi-channel, with the outer dimension representing channels).
32 |       Each sample is generally expected to lie in the range [-1.0, +1.0],
33 |       although this is not required.
34 |     sample_rate: Sample rate of data.
35 | 
36 |   Returns:
37 |     3-D np.array of shape [num_examples, num_frames, num_bands] which represents
38 |     a sequence of examples, each of which contains a patch of log mel
39 |     spectrogram, covering num_frames frames of audio and num_bands mel frequency
40 |     bands, where the frame length is vggish_params.STFT_HOP_LENGTH_SECONDS.
41 |   """
42 |   # Convert to mono.
43 |   if len(data.shape) > 1:
44 |     data = np.mean(data, axis=1)
45 |   # Resample to the rate assumed by VGGish.
46 |   if sample_rate != vggish_params.SAMPLE_RATE:
47 |     data = resampy.resample(data, sample_rate, vggish_params.SAMPLE_RATE)
48 | 
49 |   # Compute log mel spectrogram features.
50 |   log_mel = mel_features.log_mel_spectrogram(
51 |       data,
52 |       audio_sample_rate=vggish_params.SAMPLE_RATE,
53 |       log_offset=vggish_params.LOG_OFFSET,
54 |       window_length_secs=vggish_params.STFT_WINDOW_LENGTH_SECONDS,
55 |       hop_length_secs=vggish_params.STFT_HOP_LENGTH_SECONDS,
56 |       num_mel_bins=vggish_params.NUM_MEL_BINS,
57 |       lower_edge_hertz=vggish_params.MEL_MIN_HZ,
58 |       upper_edge_hertz=vggish_params.MEL_MAX_HZ)
59 | 
60 |   # Frame features into examples.
61 |   features_sample_rate = 1.0 / vggish_params.STFT_HOP_LENGTH_SECONDS
62 |   example_window_length = int(round(
63 |       vggish_params.EXAMPLE_WINDOW_SECONDS * features_sample_rate))
64 |   example_hop_length = int(round(
65 |       vggish_params.EXAMPLE_HOP_SECONDS * features_sample_rate))
66 |   log_mel_examples = mel_features.frame(
67 |       log_mel,
68 |       window_length=example_window_length,
69 |       hop_length=example_hop_length)
70 |   return log_mel_examples
71 | 
72 | 
73 | def wavfile_to_examples(wav_file):
74 |   """Convenience wrapper around waveform_to_examples() for a common WAV format.
75 | 
76 |   Args:
77 |     wav_file: String path to a file, or a file-like object. The file
78 |     is assumed to contain WAV audio data with signed 16-bit PCM samples.
79 | 
80 |   Returns:
81 |     See waveform_to_examples.
82 |   """
83 |   sr, wav_data = wavfile.read(wav_file)
84 |   assert wav_data.dtype == np.int16, 'Bad sample type: %r' % wav_data.dtype
85 |   samples = wav_data / 32768.0  # Convert to [-1.0, +1.0]
86 |   return waveform_to_examples(samples, sr)
87 | 


--------------------------------------------------------------------------------
/modelbuilder.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # modelbuilder.py - Exports a TensorFlow model in protocol buffer format
  3 | 
  4 | import tensorflow as tf
  5 | import os, sys
  6 | from traceback import print_exc
  7 | 
  8 | FEATURE_TENSOR = "vggish/input_features:0"
  9 | LOGITS = "mymodel/prediction:0"
 10 | SAVE_TO = 'serve'
 11 | VERSION = '1'
 12 | MODEL_NAME = 'jibjib_model'
 13 | SAVE_PATH = os.path.abspath(os.path.join(os.getcwd(), SAVE_TO, MODEL_NAME, VERSION))
 14 | 
 15 | loaded_graph = tf.Graph()
 16 | 
 17 | def create_parser():
 18 | 	import argparse
 19 | 
 20 | 	arg_desc = "Serializes a saved TensorFlow model into protocol buffer format."
 21 | 
 22 | 	parser = argparse.ArgumentParser(description=arg_desc)
 23 | 	parser.add_argument('checkpoint',
 24 | 		help='The full path to the checkpoint ckpt file and meta file. Example: output/model/mymodel.ckpt-10 will use the files output/model/mymodel.ckpt-10 and output/mymodel.ckpt-10.meta',
 25 | 		type=str)
 26 | 	parser.add_argument('--features_tensor',
 27 | 		help='The name of the features Tensor. Default: {}'.format(FEATURE_TENSOR),
 28 | 		type=str,
 29 | 		default=FEATURE_TENSOR)
 30 | 	parser.add_argument('--logits',
 31 | 		help='The name of the logits Tensor. Default: {}'.format(LOGITS),
 32 | 		type=str,
 33 | 		default=LOGITS)
 34 | 	parser.add_argument('--save_path',
 35 | 		help='The path to save the serialized model to. Will create on absence. Schema: ./<save_path>/<model_version/<model_name> . Default: {} => {}'.format(SAVE_TO, SAVE_PATH),
 36 | 		type=str)
 37 | 	parser.add_argument('--model_version',
 38 | 		help='The model version. Default: {}'.format(VERSION),
 39 | 		type=str)
 40 | 	parser.add_argument('--model_name',
 41 | 		help='The name of the model. Default: {}'.format(MODEL_NAME),
 42 | 		type=str)
 43 | 
 44 | 	return parser
 45 | 
 46 | args = create_parser().parse_args()
 47 | with tf.Session(graph = loaded_graph) as sess:
 48 | 	try:
 49 | 		saver = tf.train.import_meta_graph("{}.meta".format(args.checkpoint))
 50 | 	except:
 51 | 		print("Unable to restore meta graph: {}.meta".format(args.checkpoint))
 52 | 		print_exc()
 53 | 		sys.exit(1)
 54 | 	
 55 | 	try:
 56 | 		saver.restore(sess, args.checkpoint)
 57 | 	except:
 58 | 		print("Unable to restore model {}".format(args.checkpoint))
 59 | 		print_exc()
 60 | 		sys.exit(1)
 61 | 
 62 | 	features_tensor = loaded_graph.get_tensor_by_name(args.features_tensor)
 63 | 	model_input = tf.saved_model.utils.build_tensor_info(features_tensor)
 64 | 	
 65 | 	logits = loaded_graph.get_tensor_by_name(args.logits)
 66 | 	model_output = tf.saved_model.utils.build_tensor_info(logits)
 67 | 
 68 | 	# build signature definition
 69 | 	signature_definition = tf.saved_model.signature_def_utils.build_signature_def(
 70 | 		inputs={'inputs': model_input},
 71 | 		outputs={'outputs': model_output},
 72 | 		method_name= tf.saved_model.signature_constants.PREDICT_METHOD_NAME)
 73 | 
 74 | 	# Construct save path
 75 | 	out_path = os.getcwd()
 76 | 	if args.save_path or args.model_version or args.model_name:
 77 | 		if args.save_path:
 78 | 			out_path = os.path.join(out_path, args.save_path)
 79 | 		else:
 80 | 			out_path = os.path.join(out_path, SAVE_TO)
 81 | 		
 82 | 		if args.model_name:
 83 | 			out_path = os.path.join(out_path, args.model_name)
 84 | 		else:
 85 | 			out_path = os.path.join(out_path, MODEL_NAME)
 86 | 		
 87 | 		if args.model_version:
 88 | 			out_path =  os.path.join(out_path, args.model_version)
 89 | 		else:
 90 | 			out_path = os.path.join(out_path, VERSION)
 91 | 		
 92 | 	else:
 93 | 		out_path = SAVE_PATH
 94 | 
 95 | 	try:
 96 | 		builder = tf.saved_model.builder.SavedModelBuilder(out_path)
 97 | 	except:
 98 | 		print("Unable to create SavedModelBuilder")
 99 | 		print_exc()
100 | 		sys.exit(1)
101 | 
102 | 	builder.add_meta_graph_and_variables(
103 | 		sess, [tf.saved_model.tag_constants.SERVING],
104 | 		signature_def_map={
105 | 			tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
106 | 				signature_definition
107 | 		})
108 | 
109 | 	builder.save()
110 | 	print("Model saved under {}".format(out_path))


--------------------------------------------------------------------------------
/vggish/vggish_postprocess.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | 
16 | """Post-process embeddings from VGGish."""
17 | 
18 | import numpy as np
19 | 
20 | import vggish_params
21 | 
22 | 
23 | class Postprocessor(object):
24 |   """Post-processes VGGish embeddings.
25 | 
26 |   The initial release of AudioSet included 128-D VGGish embeddings for each
27 |   segment of AudioSet. These released embeddings were produced by applying
28 |   a PCA transformation (technically, a whitening transform is included as well)
29 |   and 8-bit quantization to the raw embedding output from VGGish, in order to
30 |   stay compatible with the YouTube-8M project which provides visual embeddings
31 |   in the same format for a large set of YouTube videos. This class implements
32 |   the same PCA (with whitening) and quantization transformations.
33 |   """
34 | 
35 |   def __init__(self, pca_params_npz_path):
36 |     """Constructs a postprocessor.
37 | 
38 |     Args:
39 |       pca_params_npz_path: Path to a NumPy-format .npz file that
40 |         contains the PCA parameters used in postprocessing.
41 |     """
42 |     params = np.load(pca_params_npz_path)
43 |     self._pca_matrix = params[vggish_params.PCA_EIGEN_VECTORS_NAME]
44 |     # Load means into a column vector for easier broadcasting later.
45 |     self._pca_means = params[vggish_params.PCA_MEANS_NAME].reshape(-1, 1)
46 |     assert self._pca_matrix.shape == (
47 |         vggish_params.EMBEDDING_SIZE, vggish_params.EMBEDDING_SIZE), (
48 |             'Bad PCA matrix shape: %r' % (self._pca_matrix.shape,))
49 |     assert self._pca_means.shape == (vggish_params.EMBEDDING_SIZE, 1), (
50 |         'Bad PCA means shape: %r' % (self._pca_means.shape,))
51 | 
52 |   def postprocess(self, embeddings_batch):
53 |     """Applies postprocessing to a batch of embeddings.
54 | 
55 |     Args:
56 |       embeddings_batch: An nparray of shape [batch_size, embedding_size]
57 |         containing output from the embedding layer of VGGish.
58 | 
59 |     Returns:
60 |       An nparray of the same shape as the input but of type uint8,
61 |       containing the PCA-transformed and quantized version of the input.
62 |     """
63 |     assert len(embeddings_batch.shape) == 2, (
64 |         'Expected 2-d batch, got %r' % (embeddings_batch.shape,))
65 |     assert embeddings_batch.shape[1] == vggish_params.EMBEDDING_SIZE, (
66 |         'Bad batch shape: %r' % (embeddings_batch.shape,))
67 | 
68 |     # Apply PCA.
69 |     # - Embeddings come in as [batch_size, embedding_size].
70 |     # - Transpose to [embedding_size, batch_size].
71 |     # - Subtract pca_means column vector from each column.
72 |     # - Premultiply by PCA matrix of shape [output_dims, input_dims]
73 |     #   where both are are equal to embedding_size in our case.
74 |     # - Transpose result back to [batch_size, embedding_size].
75 |     pca_applied = np.dot(self._pca_matrix,
76 |                          (embeddings_batch.T - self._pca_means)).T
77 | 
78 |     # Quantize by:
79 |     # - clipping to [min, max] range
80 |     clipped_embeddings = np.clip(
81 |         pca_applied, vggish_params.QUANTIZE_MIN_VAL,
82 |         vggish_params.QUANTIZE_MAX_VAL)
83 |     # - convert to 8-bit in range [0.0, 255.0]
84 |     quantized_embeddings = (
85 |         (clipped_embeddings - vggish_params.QUANTIZE_MIN_VAL) *
86 |         (255.0 /
87 |          (vggish_params.QUANTIZE_MAX_VAL - vggish_params.QUANTIZE_MIN_VAL)))
88 |     # - cast 8-bit float to uint8
89 |     quantized_embeddings = quantized_embeddings.astype(np.uint8)
90 | 
91 |     return quantized_embeddings
92 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Project #
  2 | *.wav
  3 | *.local
  4 | *.ckpt*
  5 | py2env
  6 | venv2/
  7 | venv3/
  8 | *.pickle
  9 | results/
 10 | serve/
 11 | save/
 12 | output/*
 13 | 
 14 | # End of Project #
 15 | # Created by https://www.gitignore.io/api/go,macos,python,intellij+all
 16 | 
 17 | ### Go ###
 18 | # Binaries for programs and plugins
 19 | *.exe
 20 | *.exe~
 21 | *.dll
 22 | *.so
 23 | *.dylib
 24 | 
 25 | # Test binary, build with `go test -c`
 26 | *.test
 27 | 
 28 | # Output of the go coverage tool, specifically when used with LiteIDE
 29 | *.out
 30 | 
 31 | ### Intellij+all ###
 32 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
 33 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
 34 | 
 35 | # User-specific stuff:
 36 | .idea/**/workspace.xml
 37 | .idea/**/tasks.xml
 38 | .idea/dictionaries
 39 | 
 40 | # Sensitive or high-churn files:
 41 | .idea/**/dataSources/
 42 | .idea/**/dataSources.ids
 43 | .idea/**/dataSources.xml
 44 | .idea/**/dataSources.local.xml
 45 | .idea/**/sqlDataSources.xml
 46 | .idea/**/dynamic.xml
 47 | .idea/**/uiDesigner.xml
 48 | 
 49 | # Gradle:
 50 | .idea/**/gradle.xml
 51 | .idea/**/libraries
 52 | 
 53 | # CMake
 54 | cmake-build-debug/
 55 | 
 56 | # Mongo Explorer plugin:
 57 | .idea/**/mongoSettings.xml
 58 | 
 59 | ## File-based project format:
 60 | *.iws
 61 | 
 62 | ## Plugin-specific files:
 63 | 
 64 | # IntelliJ
 65 | /out/
 66 | 
 67 | # mpeltonen/sbt-idea plugin
 68 | .idea_modules/
 69 | 
 70 | # JIRA plugin
 71 | atlassian-ide-plugin.xml
 72 | 
 73 | # Cursive Clojure plugin
 74 | .idea/replstate.xml
 75 | 
 76 | # Ruby plugin and RubyMine
 77 | /.rakeTasks
 78 | 
 79 | # Crashlytics plugin (for Android Studio and IntelliJ)
 80 | com_crashlytics_export_strings.xml
 81 | crashlytics.properties
 82 | crashlytics-build.properties
 83 | fabric.properties
 84 | 
 85 | ### Intellij+all Patch ###
 86 | # Ignores the whole .idea folder and all .iml files
 87 | # See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360
 88 | 
 89 | .idea/
 90 | 
 91 | # Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023
 92 | 
 93 | *.iml
 94 | modules.xml
 95 | .idea/misc.xml
 96 | *.ipr
 97 | 
 98 | ### macOS ###
 99 | *.DS_Store
100 | .AppleDouble
101 | .LSOverride
102 | 
103 | # Icon must end with two \r
104 | Icon
105 | 
106 | # Thumbnails
107 | ._*
108 | 
109 | # Files that might appear in the root of a volume
110 | .DocumentRevisions-V100
111 | .fseventsd
112 | .Spotlight-V100
113 | .TemporaryItems
114 | .Trashes
115 | .VolumeIcon.icns
116 | .com.apple.timemachine.donotpresent
117 | 
118 | # Directories potentially created on remote AFP share
119 | .AppleDB
120 | .AppleDesktop
121 | Network Trash Folder
122 | Temporary Items
123 | .apdisk
124 | 
125 | ### Python ###
126 | # Byte-compiled / optimized / DLL files
127 | __pycache__/
128 | *.py[cod]
129 | *$py.class
130 | 
131 | # C extensions
132 | 
133 | # Distribution / packaging
134 | .Python
135 | build/
136 | develop-eggs/
137 | dist/
138 | downloads/
139 | eggs/
140 | .eggs/
141 | lib/
142 | lib64/
143 | parts/
144 | sdist/
145 | var/
146 | wheels/
147 | *.egg-info/
148 | .installed.cfg
149 | *.egg
150 | 
151 | # PyInstaller
152 | #  Usually these files are written by a python script from a template
153 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
154 | *.manifest
155 | *.spec
156 | 
157 | # Installer logs
158 | pip-log.txt
159 | pip-delete-this-directory.txt
160 | 
161 | # Unit test / coverage reports
162 | htmlcov/
163 | .tox/
164 | .coverage
165 | .coverage.*
166 | .cache
167 | .pytest_cache/
168 | nosetests.xml
169 | coverage.xml
170 | *.cover
171 | .hypothesis/
172 | 
173 | # Translations
174 | *.mo
175 | *.pot
176 | 
177 | # Flask stuff:
178 | instance/
179 | .webassets-cache
180 | 
181 | # Scrapy stuff:
182 | .scrapy
183 | 
184 | # Sphinx documentation
185 | docs/_build/
186 | 
187 | # PyBuilder
188 | target/
189 | 
190 | # Jupyter Notebook
191 | .ipynb_checkpoints
192 | 
193 | # pyenv
194 | .python-version
195 | 
196 | # celery beat schedule file
197 | celerybeat-schedule.*
198 | 
199 | # SageMath parsed files
200 | *.sage.py
201 | 
202 | # Environments
203 | .env
204 | .venv
205 | env/
206 | venv/
207 | ENV/
208 | env.bak/
209 | venv.bak/
210 | 
211 | # Spyder project settings
212 | .spyderproject
213 | .spyproject
214 | 
215 | # Rope project settings
216 | .ropeproject
217 | 
218 | # mkdocs documentation
219 | /site
220 | 
221 | # mypy
222 | .mypy_cache/
223 | 
224 | 
225 | # End of https://www.gitignore.io/api/go,macos,python,intellij+all
226 | 


--------------------------------------------------------------------------------
/vggish/vggish_slim.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Defines the 'VGGish' model used to generate AudioSet embedding features.
 17 | 
 18 | The public AudioSet release (https://research.google.com/audioset/download.html)
 19 | includes 128-D features extracted from the embedding layer of a VGG-like model
 20 | that was trained on a large Google-internal YouTube dataset. Here we provide
 21 | a TF-Slim definition of the same model, without any dependences on libraries
 22 | internal to Google. We call it 'VGGish'.
 23 | 
 24 | Note that we only define the model up to the embedding layer, which is the
 25 | penultimate layer before the final classifier layer. We also provide various
 26 | hyperparameter values (in vggish_params.py) that were used to train this model
 27 | internally.
 28 | 
 29 | For comparison, here is TF-Slim's VGG definition:
 30 | https://github.com/tensorflow/models/blob/master/research/slim/nets/vgg.py
 31 | """
 32 | 
 33 | import tensorflow as tf
 34 | import vggish_params as params
 35 | 
 36 | slim = tf.contrib.slim
 37 | 
 38 | 
 39 | def define_vggish_slim(training=False):
 40 |   """Defines the VGGish TensorFlow model.
 41 | 
 42 |   All ops are created in the current default graph, under the scope 'vggish/'.
 43 | 
 44 |   The input is a placeholder named 'vggish/input_features' of type float32 and
 45 |   shape [batch_size, num_frames, num_bands] where batch_size is variable and
 46 |   num_frames and num_bands are constants, and [num_frames, num_bands] represents
 47 |   a log-mel-scale spectrogram patch covering num_bands frequency bands and
 48 |   num_frames time frames (where each frame step is usually 10ms). This is
 49 |   produced by computing the stabilized log(mel-spectrogram + params.LOG_OFFSET).
 50 |   The output is an op named 'vggish/embedding' which produces the activations of
 51 |   a 128-D embedding layer, which is usually the penultimate layer when used as
 52 |   part of a full model with a final classifier layer.
 53 | 
 54 |   Args:
 55 |     training: If true, all parameters are marked trainable.
 56 | 
 57 |   Returns:
 58 |     The op 'vggish/embeddings'.
 59 |   """
 60 |   # Defaults:
 61 |   # - All weights are initialized to N(0, INIT_STDDEV).
 62 |   # - All biases are initialized to 0.
 63 |   # - All activations are ReLU.
 64 |   # - All convolutions are 3x3 with stride 1 and SAME padding.
 65 |   # - All max-pools are 2x2 with stride 2 and SAME padding.
 66 |   with slim.arg_scope([slim.conv2d, slim.fully_connected],
 67 |                       weights_initializer=tf.truncated_normal_initializer(
 68 |                           stddev=params.INIT_STDDEV),
 69 |                       biases_initializer=tf.zeros_initializer(),
 70 |                       activation_fn=tf.nn.relu,
 71 |                       trainable=training), \
 72 |        slim.arg_scope([slim.conv2d],
 73 |                       kernel_size=[3, 3], stride=1, padding='SAME'), \
 74 |        slim.arg_scope([slim.max_pool2d],
 75 |                       kernel_size=[2, 2], stride=2, padding='SAME'), \
 76 |        tf.variable_scope('vggish'):
 77 |     # Input: a batch of 2-D log-mel-spectrogram patches.
 78 |     features = tf.placeholder(
 79 |         tf.float32, shape=(None, params.NUM_FRAMES, params.NUM_BANDS),
 80 |         name='input_features')
 81 |     # Reshape to 4-D so that we can convolve a batch with conv2d().
 82 |     net = tf.reshape(features, [-1, params.NUM_FRAMES, params.NUM_BANDS, 1])
 83 | 
 84 |     # The VGG stack of alternating convolutions and max-pools.
 85 |     net = slim.conv2d(net, 64, scope='conv1')
 86 |     net = slim.max_pool2d(net, scope='pool1')
 87 |     net = slim.conv2d(net, 128, scope='conv2')
 88 |     net = slim.max_pool2d(net, scope='pool2')
 89 |     net = slim.repeat(net, 2, slim.conv2d, 256, scope='conv3')
 90 |     net = slim.max_pool2d(net, scope='pool3')
 91 |     net = slim.repeat(net, 2, slim.conv2d, 512, scope='conv4')
 92 |     net = slim.max_pool2d(net, scope='pool4')
 93 | 
 94 |     # Flatten before entering fully-connected layers
 95 |     net = slim.flatten(net)
 96 |     net = slim.repeat(net, 2, slim.fully_connected, 4096, scope='fc1')
 97 |     # The embedding layer.
 98 |     net = slim.fully_connected(net, params.EMBEDDING_SIZE, scope='fc2')
 99 |     return tf.identity(net, name='embedding')
100 | 
101 | 
102 | def load_vggish_slim_checkpoint(session, checkpoint_path):
103 |   """Loads a pre-trained VGGish-compatible checkpoint.
104 | 
105 |   This function can be used as an initialization function (referred to as
106 |   init_fn in TensorFlow documentation) which is called in a Session after
107 |   initializating all variables. When used as an init_fn, this will load
108 |   a pre-trained checkpoint that is compatible with the VGGish model
109 |   definition. Only variables defined by VGGish will be loaded.
110 | 
111 |   Args:
112 |     session: an active TensorFlow session.
113 |     checkpoint_path: path to a file containing a checkpoint that is
114 |       compatible with the VGGish model definition.
115 |   """
116 |   # Get the list of names of all VGGish variables that exist in
117 |   # the checkpoint (i.e., all inference-mode VGGish variables).
118 |   with tf.Graph().as_default():
119 |     define_vggish_slim(training=False)
120 |     vggish_var_names = [v.name for v in tf.global_variables()]
121 | 
122 |   # Get the list of all currently existing variables that match
123 |   # the list of variable names we just computed.
124 |   vggish_vars = [v for v in tf.global_variables() if v.name in vggish_var_names]
125 | 
126 |   # Use a Saver to restore just the variables selected above.
127 |   saver = tf.train.Saver(vggish_vars, name='vggish_load_pretrained',
128 |                          write_version=1)
129 |   saver.restore(session, checkpoint_path)
130 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # A model for bird sound classification
  2 | 
  3 | The model for training the bird classifier.
  4 | 
  5 | ## Repo layout
  6 | The complete list of JibJib repos is:
  7 | 
  8 | - [jibjib](https://github.com/gojibjib/jibjib): Our Android app. Records sounds and looks fantastic.
  9 | - [deploy](https://github.com/gojibjib/deploy): Instructions to deploy the JibJib stack.
 10 | - [jibjib-model](https://github.com/gojibjib/jibjib-model): Code for training the machine learning model for bird classification
 11 | - [jibjib-api](https://github.com/gojibjib/jibjib-api): Main API to receive database requests & audio files.
 12 | - [jibjib-data](https://github.com/gojibjib/jibjib-data): A MongoDB instance holding information about detectable birds.
 13 | - [jibjib-query](https://github.com/gojibjib/jibjib-query): A thin Python Flask API that handles communication with the [TensorFlow Serving](https://www.tensorflow.org/serving/) instance.
 14 | - [gopeana](https://github.com/gojibjib/gopeana): A API client for [Europeana](https://europeana.eu), written in Go.
 15 | - [voice-grabber](https://github.com/gojibjib/voice-grabber): A collection of scripts to construct the dataset required for model training
 16 | 
 17 | ## Overview
 18 | 
 19 | ### CNN for Spectrogram-wise Classification
 20 | In vggish_train.py we are training a convolutional classifier model for an arbitrary number of birds. We take a pretrained [VGGish/ Audioset](https://github.com/tensorflow/models/tree/master/research/audioset) model by Google and finetune it by letting it iterate during training on more than 80,000 audio samples of 10 second length. Please read the following papers for more information:
 21 | 
 22 | - Hershey, S. et. al., [CNN Architectures for Large-Scale Audio Classification](https://research.google.com/pubs/pub45611.html), ICASSP 2017
 23 | - Gemmeke, J. et. al., [AudioSet: An ontology and human-labelled dataset for audio events](https://research.google.com/pubs/pub45857.html), ICASSP 2017
 24 | 
 25 | Before you can start, you first need to download a VGGish checkpoint file. You can either use a checkpoint provided by ![Google](https://storage.googleapis.com/audioset/vggish_model.ckpt) or ![our](https://s3-eu-west-1.amazonaws.com/jibjib/model/jibjib_model_raw.tgz) very own model that has been additionally trained for more than 100 hours and 60 epochs on a GPU cluster inside a Docker container.
 26 | 
 27 | The original final layer is cut off and replaced with our own output nodes.
 28 | 
 29 | During the first training step a directory containing labeled bird songs is iterated over and each .wav file is converted into a spectrogram where the x-axis is the time and the y-axis symbolyzes the frequency. For instance, this is the spectrogram of a golden eagles call:
 30 | 
 31 | ![mel spectogram](https://github.com/gojibjib/jibjib-model/blob/master/assets/steinadler_50_50.png)
 32 | 
 33 | Furthermore, each bird class is one-hot-encoded and then in pairs of features and corresponding labels fed into the model.
 34 | After, VGGish's convolutional filters run over each spectrogram and extract meaningful features. The following graphic gives a short overview about how after some convolutions and subpooling the extracted features are then fed into the fully connected layer just like in any other CNN:
 35 | 
 36 | ![mel spectogram](https://raw.githubusercontent.com/gojibjib/jibjib-model/master/assets/Typical_cnn_spectrogram.png)
 37 | 
 38 | After every epoch a snapshot of the models weights and biases is saved on disk. In the next step we can restore the model to either do a query or continue with training.
 39 | 
 40 | We are deploying the model by enabling TensorFlow Serving to reduce response time drastically. Check out ![jibjib-query](https://github.com/gojibjib/jibjib-query) to learn more about how we implemented TensorFlow Serving for our model.
 41 | 
 42 | ### New: Convolutional LSTM for Sequence Classification
 43 | In train_LSTM.py we provide a Convolutional LSTM for audio event recognition. Similar to vggish_train.py it performs classification tasks on mel spectrograms. In contrast to vggish_train.py, it does not perform a classification for each spectrogram but analyzes an array of matrices and then performs a single classification on the entire sequence. C-LSTMs may outperform traditional CNNs when data only contains sparse specific features or when audio scenes are event-rich with many overlapping signals.
 44 | The script train_LSTM.py uses the same input function as in vggish_train.py converting .wav files into their audio footprint using mel-frequency cepstral coeeficients, separating each file into 1 second frame where each frame is made up of mel features. Simultaneously, the corresponding labels are extracted, one-hot-encoded and shown to our model further downstream at the fully connected layer. The script uses Keras as a TensorFlow wrapper to build the model and is compatible with Python3.6 or upwards.
 45 | 
 46 | 
 47 | ## Training
 48 | 
 49 | ### Docker
 50 | 
 51 | Get the container:
 52 | 
 53 | ```
 54 | # GPU, needs nvidia-docker installed
 55 | docker pull obitech/jibjib-model:latest-gpu
 56 | 
 57 | # CPU
 58 | docker pull obitech/jibjib-model:latest-cpu
 59 | ```
 60 | 
 61 | Create folders, if necessary:
 62 | ```
 63 | mkdir -p output/logs output/train output/model input/data
 64 | ```
 65 | 
 66 | Get the [audioset](https://github.com/tensorflow/models/tree/master/research/audioset) checkpoint:
 67 | 
 68 | ```
 69 | curl -O input/vggish_model.ckpt https://storage.googleapis.com/audioset/vggish_model.ckpt
 70 | ```
 71 | 
 72 | Copy all training folders / files into `input/data/`
 73 | 
 74 | 
 75 | Get the [`bird_id_map.pickle`](github.com/gojibjib/voice-grabber):
 76 | 
 77 | ```
 78 | curl -O input/bird_id_map.pickle https://github.com/gojibjib/voice-grabber/raw/master/meta/bird_id_map.pickle
 79 | ```
 80 | 
 81 | Run the container:
 82 | 
 83 | ```
 84 | docker container run --rm -d \
 85 |     --runtime=nvidia \
 86 |     -v $(pwd)/input:/model/input \
 87 |     -v $(pwd)/output:/model/output \
 88 |     obitech/jibjib-model:latest-gpu
 89 | ```
 90 | 
 91 | For quickly starting training run:
 92 | 
 93 | ```
 94 | # GPU
 95 | ./train_docker.sh
 96 | 
 97 | # CPU
 98 | ./train_docker.sh
 99 | ```
100 | 
101 | ### Locally
102 | 
103 | Clone the repo:
104 | 
105 | ```
106 | git clone https://github.com/gojibjib/jibjib-model
107 | ```
108 | 
109 | Install dependencies, **use python3.6 or upwards**:
110 | 
111 | ```
112 | # CPU training
113 | pip install -r requirements.txt
114 | 
115 | # GPU training
116 | pip install -r requirements-gpu.txt
117 | ```
118 | 
119 | Copy all training folders / files into `input/data/`
120 | 
121 | Get the [audioset](https://github.com/tensorflow/models/tree/master/research/audioset) checkpoint:
122 | 
123 | ```
124 | curl -O input/vggish_model.ckpt https://storage.googleapis.com/audioset/vggish_model.ckpt
125 | ```
126 | 
127 | Get the [`bird_id_map.pickle`](github.com/gojibjib/voice-grabber):
128 | 
129 | ```
130 | curl -O input/bird_id_map.pickle https://github.com/gojibjib/voice-grabber/raw/master/meta/bird_id_map.pickle
131 | ```
132 | 
133 | Start training:
134 | 
135 | ```
136 | # Make sure to start the script from the code/ directory !
137 | cd code
138 | python ./vggish_train.py
139 | ```
140 | 
141 | You can then use `modelbuilder.py` to convert the model to protocol buffer.
142 | 


--------------------------------------------------------------------------------
/vggish/mel_features.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | """Defines routines to compute mel spectrogram features from audio waveform."""
 17 | 
 18 | import numpy as np
 19 | 
 20 | 
 21 | def frame(data, window_length, hop_length):
 22 |   """Convert array into a sequence of successive possibly overlapping frames.
 23 | 
 24 |   An n-dimensional array of shape (num_samples, ...) is converted into an
 25 |   (n+1)-D array of shape (num_frames, window_length, ...), where each frame
 26 |   starts hop_length points after the preceding one.
 27 | 
 28 |   This is accomplished using stride_tricks, so the original data is not
 29 |   copied.  However, there is no zero-padding, so any incomplete frames at the
 30 |   end are not included.
 31 | 
 32 |   Args:
 33 |     data: np.array of dimension N >= 1.
 34 |     window_length: Number of samples in each frame.
 35 |     hop_length: Advance (in samples) between each window.
 36 | 
 37 |   Returns:
 38 |     (N+1)-D np.array with as many rows as there are complete frames that can be
 39 |     extracted.
 40 |   """
 41 |   num_samples = data.shape[0]
 42 |   num_frames = 1 + int(np.floor((num_samples - window_length) / hop_length))
 43 |   shape = (num_frames, window_length) + data.shape[1:]
 44 |   strides = (data.strides[0] * hop_length,) + data.strides
 45 |   return np.lib.stride_tricks.as_strided(data, shape=shape, strides=strides)
 46 | 
 47 | 
 48 | def periodic_hann(window_length):
 49 |   """Calculate a "periodic" Hann window.
 50 | 
 51 |   The classic Hann window is defined as a raised cosine that starts and
 52 |   ends on zero, and where every value appears twice, except the middle
 53 |   point for an odd-length window.  Matlab calls this a "symmetric" window
 54 |   and np.hanning() returns it.  However, for Fourier analysis, this
 55 |   actually represents just over one cycle of a period N-1 cosine, and
 56 |   thus is not compactly expressed on a length-N Fourier basis.  Instead,
 57 |   it's better to use a raised cosine that ends just before the final
 58 |   zero value - i.e. a complete cycle of a period-N cosine.  Matlab
 59 |   calls this a "periodic" window. This routine calculates it.
 60 | 
 61 |   Args:
 62 |     window_length: The number of points in the returned window.
 63 | 
 64 |   Returns:
 65 |     A 1D np.array containing the periodic hann window.
 66 |   """
 67 |   return 0.5 - (0.5 * np.cos(2 * np.pi / window_length *
 68 |                              np.arange(window_length)))
 69 | 
 70 | 
 71 | def stft_magnitude(signal, fft_length,
 72 |                    hop_length=None,
 73 |                    window_length=None):
 74 |   """Calculate the short-time Fourier transform magnitude.
 75 | 
 76 |   Args:
 77 |     signal: 1D np.array of the input time-domain signal.
 78 |     fft_length: Size of the FFT to apply.
 79 |     hop_length: Advance (in samples) between each frame passed to FFT.
 80 |     window_length: Length of each block of samples to pass to FFT.
 81 | 
 82 |   Returns:
 83 |     2D np.array where each row contains the magnitudes of the fft_length/2+1
 84 |     unique values of the FFT for the corresponding frame of input samples.
 85 |   """
 86 |   frames = frame(signal, window_length, hop_length)
 87 |   # Apply frame window to each frame. We use a periodic Hann (cosine of period
 88 |   # window_length) instead of the symmetric Hann of np.hanning (period
 89 |   # window_length-1).
 90 |   window = periodic_hann(window_length)
 91 |   windowed_frames = frames * window
 92 |   return np.abs(np.fft.rfft(windowed_frames, int(fft_length)))
 93 | 
 94 | 
 95 | # Mel spectrum constants and functions.
 96 | _MEL_BREAK_FREQUENCY_HERTZ = 700.0
 97 | _MEL_HIGH_FREQUENCY_Q = 1127.0
 98 | 
 99 | 
100 | def hertz_to_mel(frequencies_hertz):
101 |   """Convert frequencies to mel scale using HTK formula.
102 | 
103 |   Args:
104 |     frequencies_hertz: Scalar or np.array of frequencies in hertz.
105 | 
106 |   Returns:
107 |     Object of same size as frequencies_hertz containing corresponding values
108 |     on the mel scale.
109 |   """
110 |   return _MEL_HIGH_FREQUENCY_Q * np.log(
111 |       1.0 + (frequencies_hertz / _MEL_BREAK_FREQUENCY_HERTZ))
112 | 
113 | 
114 | def spectrogram_to_mel_matrix(num_mel_bins=20,
115 |                               num_spectrogram_bins=129,
116 |                               audio_sample_rate=8000,
117 |                               lower_edge_hertz=125.0,
118 |                               upper_edge_hertz=12000.0):
119 |   """Return a matrix that can post-multiply spectrogram rows to make mel.
120 | 
121 |   Returns a np.array matrix A that can be used to post-multiply a matrix S of
122 |   spectrogram values (STFT magnitudes) arranged as frames x bins to generate a
123 |   "mel spectrogram" M of frames x num_mel_bins.  M = S A.
124 | 
125 |   The classic HTK algorithm exploits the complementarity of adjacent mel bands
126 |   to multiply each FFT bin by only one mel weight, then add it, with positive
127 |   and negative signs, to the two adjacent mel bands to which that bin
128 |   contributes.  Here, by expressing this operation as a matrix multiply, we go
129 |   from num_fft multiplies per frame (plus around 2*num_fft adds) to around
130 |   num_fft^2 multiplies and adds.  However, because these are all presumably
131 |   accomplished in a single call to np.dot(), it's not clear which approach is
132 |   faster in Python.  The matrix multiplication has the attraction of being more
133 |   general and flexible, and much easier to read.
134 | 
135 |   Args:
136 |     num_mel_bins: How many bands in the resulting mel spectrum.  This is
137 |       the number of columns in the output matrix.
138 |     num_spectrogram_bins: How many bins there are in the source spectrogram
139 |       data, which is understood to be fft_size/2 + 1, i.e. the spectrogram
140 |       only contains the nonredundant FFT bins.
141 |     audio_sample_rate: Samples per second of the audio at the input to the
142 |       spectrogram. We need this to figure out the actual frequencies for
143 |       each spectrogram bin, which dictates how they are mapped into mel.
144 |     lower_edge_hertz: Lower bound on the frequencies to be included in the mel
145 |       spectrum.  This corresponds to the lower edge of the lowest triangular
146 |       band.
147 |     upper_edge_hertz: The desired top edge of the highest frequency band.
148 | 
149 |   Returns:
150 |     An np.array with shape (num_spectrogram_bins, num_mel_bins).
151 | 
152 |   Raises:
153 |     ValueError: if frequency edges are incorrectly ordered or out of range.
154 |   """
155 |   nyquist_hertz = audio_sample_rate / 2.
156 |   if lower_edge_hertz < 0.0:
157 |     raise ValueError("lower_edge_hertz %.1f must be >= 0" % lower_edge_hertz)
158 |   if lower_edge_hertz >= upper_edge_hertz:
159 |     raise ValueError("lower_edge_hertz %.1f >= upper_edge_hertz %.1f" %
160 |                      (lower_edge_hertz, upper_edge_hertz))
161 |   if upper_edge_hertz > nyquist_hertz:
162 |     raise ValueError("upper_edge_hertz %.1f is greater than Nyquist %.1f" %
163 |                      (upper_edge_hertz, nyquist_hertz))
164 |   spectrogram_bins_hertz = np.linspace(0.0, nyquist_hertz, num_spectrogram_bins)
165 |   spectrogram_bins_mel = hertz_to_mel(spectrogram_bins_hertz)
166 |   # The i'th mel band (starting from i=1) has center frequency
167 |   # band_edges_mel[i], lower edge band_edges_mel[i-1], and higher edge
168 |   # band_edges_mel[i+1].  Thus, we need num_mel_bins + 2 values in
169 |   # the band_edges_mel arrays.
170 |   band_edges_mel = np.linspace(hertz_to_mel(lower_edge_hertz),
171 |                                hertz_to_mel(upper_edge_hertz), num_mel_bins + 2)
172 |   # Matrix to post-multiply feature arrays whose rows are num_spectrogram_bins
173 |   # of spectrogram values.
174 |   mel_weights_matrix = np.empty((num_spectrogram_bins, num_mel_bins))
175 |   for i in range(num_mel_bins):
176 |     lower_edge_mel, center_mel, upper_edge_mel = band_edges_mel[i:i + 3]
177 |     # Calculate lower and upper slopes for every spectrogram bin.
178 |     # Line segments are linear in the *mel* domain, not hertz.
179 |     lower_slope = ((spectrogram_bins_mel - lower_edge_mel) /
180 |                    (center_mel - lower_edge_mel))
181 |     upper_slope = ((upper_edge_mel - spectrogram_bins_mel) /
182 |                    (upper_edge_mel - center_mel))
183 |     # .. then intersect them with each other and zero.
184 |     mel_weights_matrix[:, i] = np.maximum(0.0, np.minimum(lower_slope,
185 |                                                           upper_slope))
186 |   # HTK excludes the spectrogram DC bin; make sure it always gets a zero
187 |   # coefficient.
188 |   mel_weights_matrix[0, :] = 0.0
189 |   return mel_weights_matrix
190 | 
191 | 
192 | def log_mel_spectrogram(data,
193 |                         audio_sample_rate=8000,
194 |                         log_offset=0.0,
195 |                         window_length_secs=0.025,
196 |                         hop_length_secs=0.010,
197 |                         **kwargs):
198 |   """Convert waveform to a log magnitude mel-frequency spectrogram.
199 | 
200 |   Args:
201 |     data: 1D np.array of waveform data.
202 |     audio_sample_rate: The sampling rate of data.
203 |     log_offset: Add this to values when taking log to avoid -Infs.
204 |     window_length_secs: Duration of each window to analyze.
205 |     hop_length_secs: Advance between successive analysis windows.
206 |     **kwargs: Additional arguments to pass to spectrogram_to_mel_matrix.
207 | 
208 |   Returns:
209 |     2D np.array of (num_frames, num_mel_bins) consisting of log mel filterbank
210 |     magnitudes for successive frames.
211 |   """
212 |   window_length_samples = int(round(audio_sample_rate * window_length_secs))
213 |   hop_length_samples = int(round(audio_sample_rate * hop_length_secs))
214 |   fft_length = 2 ** int(np.ceil(np.log(window_length_samples) / np.log(2.0)))
215 |   spectrogram = stft_magnitude(
216 |       data,
217 |       fft_length=fft_length,
218 |       hop_length=hop_length_samples,
219 |       window_length=window_length_samples)
220 |   mel_spectrogram = np.dot(spectrogram, spectrogram_to_mel_matrix(
221 |       num_spectrogram_bins=spectrogram.shape[1],
222 |       audio_sample_rate=audio_sample_rate, **kwargs))
223 |   return np.log(mel_spectrogram + log_offset)
224 | 


--------------------------------------------------------------------------------
/code/train_LSTM.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # train_LSTM.py - Train a Recurrent Convolutional Network to recognize bird voices 
  3 | 
  4 | import tensorflow as tf
  5 | import keras 
  6 | from keras import Sequential
  7 | from keras.models import Sequential, Model
  8 | from keras.layers import Input, Dense, TimeDistributed, LSTM, Dropout, Activation, Reshape
  9 | from keras.layers import Conv1D, Conv2D, MaxPooling1D, MaxPooling2D, Flatten, Conv2D, BatchNormalization, Lambda
 10 | from keras.optimizers import Adam, RMSprop
 11 | from keras import regularizers
 12 | import logging
 13 | import keras.backend as K
 14 | import os
 15 | import sys
 16 | import numpy as np 
 17 | import vggish_input
 18 | import vggish_params
 19 | from sklearn.model_selection import train_test_split
 20 | import vggish_params
 21 | import matplotlib.pyplot as plt
 22 | from keras.utils import plot_model
 23 | 
 24 | import scipy
 25 | import vggish_params
 26 | import datetime
 27 | 
 28 | flags = tf.app.flags
 29 | slim = tf.contrib.slim
 30 | import time
 31 | 
 32 | flags.DEFINE_boolean(
 33 |     'debug', False,
 34 |     'Sets log level to debug. Default: false (defaults to log level info)')
 35 | 
 36 | flags.DEFINE_integer('minibatch_size', 16, 'Number of Mini batches executed per epoch (batch).')
 37 | 
 38 | flags.DEFINE_integer('num_classes', 7, 'Number of classes to train on')
 39 | 
 40 | flags.DEFINE_integer('sample_length', 10, 'Length of sample')
 41 | 
 42 | flags.DEFINE_float('test_size', 0.2, 'Size of test set as chunk of batch')
 43 | 
 44 | flags.DEFINE_integer('save_step', 4, 'Defines _after_ how many epochs the model should be saved.')
 45 | 
 46 | flags.DEFINE_integer('epochs', 150, 'Defines how many times the entire train set is fed into the model')
 47 | 
 48 | #batch_size=16,epochs=100,
 49 | 
 50 | flags.DEFINE_string('model_version', "1.0", "Defines the model version. Will be used for pickle output file")
 51 | 
 52 | FLAGS = flags.FLAGS
 53 | 
 54 | 
 55 | # Folders
 56 | input_dir = os.path.abspath("../input")
 57 | data_dir = os.path.join(input_dir, "data/")
 58 | output_dir = os.path.abspath("../output")
 59 | log_dir = os.path.join(output_dir, "log/")
 60 | log_dir_test = os.path.join(log_dir, "test/")
 61 | log_dir_train = os.path.join(log_dir, "train/")
 62 | model_dir = os.path.join(output_dir, "model/")
 63 | 
 64 | # Set log level depending on flags
 65 | log_level = None
 66 | if FLAGS.debug:
 67 |   log_level = logging.DEBUG
 68 | else:
 69 |   log_level = logging.INFO
 70 | 
 71 | 
 72 | # Save train_id_list as pickle, so we can later translate back train IDs/labels to birds
 73 | train_id_list = []
 74 | train_id_list_path = os.path.join(output_dir, "train_id_list-{}.pickle".format(FLAGS.model_version))
 75 | 
 76 | def create_dir(path):
 77 |   """Checks if a directory exists and creates it, if necessary
 78 |   Args:
 79 |     path (str): The path of the directory to be checked for existence
 80 |   """
 81 |   if not os.path.exists(path):
 82 |     try:
 83 |       print("Creating {}".format(path))
 84 |       os.makedirs(path)
 85 |     except:
 86 |       print("Unable to create {}.".format(path))
 87 |       print_exc()
 88 | 
 89 | import keras
 90 | import pydot as pyd
 91 | from IPython.display import SVG
 92 | from keras.utils.vis_utils import model_to_dot
 93 | 
 94 | keras.utils.vis_utils.pydot = pyd
 95 | 
 96 | def visualize_model(model):
 97 |   return SVG(model_to_dot(model).create(prog='dot', format='svg'))
 98 | 
 99 | def show_summary_stats(history):
100 |     # List all data in history
101 |     print(history.history.keys())
102 | 
103 |     #First image: Acc during training on X_train and X_test
104 |     plt.plot(history.history['acc'])
105 |     plt.plot(history.history['val_acc'])
106 |     plt.title('model accuracy')
107 |     plt.ylabel('accuracy')
108 |     plt.xlabel('epoch')
109 |     plt.legend(['train', 'test'], loc='upper left')
110 |     plt.show()
111 | 
112 |     #Second image: Training and test loss
113 |     plt.plot(history.history['loss'])
114 |     plt.plot(history.history['val_loss'])
115 |     plt.title('model loss')
116 |     plt.ylabel('loss')
117 |     plt.xlabel('epoch')
118 |     plt.legend(['train', 'test'], loc='upper left')
119 |     plt.show()
120 | 
121 | 
122 | 
123 | 
124 | def load_spectrogram(rootDir):
125 |   """Iterate over a directory and add each file as an input label
126 |   The file tree should be of the structure <rootDir>/<class>/{data point n}, for example:
127 |   input/data/
128 |   + Accipiter_gentilis
129 |   -   + Accipiter_gentilis__1.wav
130 |   -   + Accipiter_gentilis__2.wav
131 |   + Cygnus_olor
132 |   -   + Cygnus_olor_1.wav
133 |   -   + Cygnus_olor_2.wav
134 |   + Regulus_regulus
135 |       + Regulus_regulus_4_1.wav
136 |       + Regulus_regulus_4_2.wav
137 |   The function iterates over each audio file and extracts both a signal example and a signal label.
138 |   
139 |   A signal example is a 3-D np.array of shape [num_examples, num_frames, num_bands] which represents
140 |   a sequence of examples, each of which contains a patch of log mel
141 |   spectrogram, covering num_frames frames of audio and num_bands mel frequency
142 |   bands, where the frame length is vggish_params.STFT_HOP_LENGTH_SECONDS. The length of num_examples
143 |   corresponds with the duration of the audio file in seconds.
144 |   A signal label is a one-hot encoded vector of the input labels. For example:
145 |   Accipiter_gentilis  --> [1, 0, 0]  
146 |   Cygnus_olor         --> [0, 1, 0] 
147 |   Regulus_regulus     --> [0, 0, 1]
148 |   Each audio file will be split into 0.96s frames, where each frame is one-hot encoded.
149 |   Args:
150 |     rootDir (str): The root directory where dataset is located.
151 |     log: A Python logging object.
152 |   
153 |   Returns:
154 |     (input_examples, input_labels): A tuple of lists, containing feature spectrograms and with corresponding labels.
155 |   """
156 |   counter = 0
157 |   input_examples =[]
158 |   input_labels = []
159 |   train_id_list =[]
160 |   for dirName, subdirList, fileList  in os.walk(rootDir):
161 |     bird = os.path.basename(os.path.normpath(dirName))
162 |     if bird == "data":
163 |       continue
164 | 
165 |     print("{} -> {}".format(bird, counter))
166 |     train_id_list.append(bird)
167 |     for fname in fileList:
168 |       if fname.endswith(".wav"):
169 |           path = os.path.join(dirName, fname)
170 |           #calling vggish function, reads in wav file and returns mel spectrogram
171 |           try:
172 |             signal_example = vggish_input.wavfile_to_examples(path)
173 |           except Exception as e: 
174 |             print(e)
175 |             print("Skipping {}, unable to extract clean signal example".format(fname))
176 |             continue
177 | 
178 |           # Build own one-hot encoder 
179 |           encoded = np.zeros((FLAGS.num_classes))
180 |           encoded[counter]=1
181 |           encoded=encoded.tolist()
182 | 
183 |           # Encode each frame of the example, which results in the final label for this file
184 |           #signal_label =np.array([encoded]*signal_example.shape[0])          
185 |           signal_label =np.array(encoded)
186 |           #print("Signal label shape of {}: {}:".format(fname, signal_label.shape))
187 | 
188 |           # Check if a clean label can be extracted
189 |           if signal_label != []:
190 |             if len(signal_example)  >= FLAGS.sample_length:
191 |               input_labels.append(signal_label)
192 |               input_examples.append(signal_example[:FLAGS.sample_length])
193 |               #print('shape is {}'.format(input_examples.shape))
194 |           else:
195 |             print("Skipping {}, unable extract clean signal label".format(fname))
196 |             continue
197 |     counter +=1
198 |   
199 |   try:
200 |     with open(output_dir, "wb") as wf:
201 |       pickle.dump(train_id_list, wf)
202 |   except:
203 |     print("Unable to dump into {}".format(output_dir))
204 | 
205 |   print("Input examples created: {}, Labels created: {}".format(len(input_examples), len(input_labels)))
206 |   return np.array(input_examples), np.array(input_labels)
207 | 
208 | def save_model(model):
209 |   json_model = model.to_json()
210 |   # Serialize model as JSON
211 |   with open("../output/model/model.json", "w") as json_file:
212 |     json_file.write(json_model)
213 |   
214 |   # Serialize weights to HDF5
215 |   model.save_weights("../output/model/model.h5")
216 | 
217 | def LSTM_model():
218 |   
219 |   # Setting input shape dynamically, taking values from vggish_params
220 |   input_shape = (FLAGS.sample_length, vggish_params.NUM_FRAMES, vggish_params.NUM_BANDS)
221 |   model_input = Input(input_shape, name='input')
222 | 
223 |   layer =  model_input
224 | 
225 |   FILTER_LENGTH = 2
226 |   CONV_FILTER_COUNT = 56  
227 |   LSTM_COUNT = 96
228 |   NUM_HIDDEN = 64
229 |   L2_regularization = 0.001
230 |   N_DENSE = 3
231 | 
232 |   layer = Conv2D(
233 |       filters=CONV_FILTER_COUNT,
234 |       kernel_size=FILTER_LENGTH,
235 |       kernel_regularizer=regularizers.l2(L2_regularization),  
236 |       name='conv_{}'.format(0)
237 |       )(layer)
238 | 
239 |   layer = BatchNormalization(momentum=0.9)(layer)
240 |   layer = Activation('relu')(layer)
241 |   layer = MaxPooling2D(2)(layer)
242 |   layer = Dropout(0.4)(layer)   
243 |   layer = Reshape(( int(layer.shape[1]), int(layer.shape[2]) * int(layer.shape[3])))(layer)
244 |   layer = LSTM(LSTM_COUNT, return_sequences=False)(layer)
245 |   layer = Dropout(0.4)(layer)
246 | 
247 |   # Dense Layers
248 |   for i in range(N_DENSE):
249 |     layer = Dense(NUM_HIDDEN, 
250 |       kernel_regularizer=regularizers.l2(L2_regularization), 
251 |       name='dense{}'.format(i))(layer)
252 |   layer = Dropout(0.1)(layer)
253 | 
254 |   # FC Output
255 |   layer = Dense(FLAGS.num_classes)(layer)
256 |   layer = Activation('softmax', name='output_realtime')(layer)
257 |   model_output = layer
258 |   model = Model(model_input, model_output)
259 |   
260 |   opt = Adam(lr=0.001)
261 |   model.compile(
262 |           loss='categorical_crossentropy',
263 |           optimizer=opt,
264 |            metrics=['accuracy']
265 |       )
266 |     
267 |   print(model.summary())
268 |   #visualize_model(model)
269 |   return model
270 | 
271 | 
272 | if __name__ == '__main__':
273 | 
274 |   print("Start")
275 |   print("Loading all examples with corresponding labels...")
276 |   all_examples, all_labels = load_spectrogram(os.path.join(data_dir))
277 |   print("Splitting dataset into training test...")
278 |   X_train_entire, X_validation_entire, y_train_entire, y_validation_entire = train_test_split(all_examples, all_labels, test_size=FLAGS.test_size)
279 |   print("Creating Recurrent LSTM model...")
280 |   lstm_model = LSTM_model()
281 |   print("Fitting model...")
282 |   #TODO loading all data too hard on memory, need to implement generators to save memory
283 |   print('X_train_entire is shape {}'.format(X_train_entire.shape))
284 |   history = lstm_model.fit(X_train_entire, y_train_entire, validation_data = (X_validation_entire, y_validation_entire), batch_size=FLAGS.minibatch_size,epochs=FLAGS.epochs,verbose=1,shuffle=True)
285 |   
286 |   print("Saving model...")
287 |   save_model(lstm_model)
288 |   print("Model saved.")
289 | 
290 |   print('Displaying training statistics')
291 |   show_summary_stats(history)


--------------------------------------------------------------------------------
/code/vggish_train.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | 
  3 | # vggish_train.py - Train a model to recognize bird voices upon Google's audioset model
  4 | # https://github.com/tensorflow/models/tree/master/research/audioset for more information.
  5 | 
  6 | from __future__ import print_function
  7 | from random import shuffle
  8 | import sys
  9 | import math
 10 | import numpy as np
 11 | import tensorflow as tf
 12 | import os 
 13 | import numpy as np
 14 | from numpy import array
 15 | import sklearn.model_selection as sk
 16 | sys.path.insert(0, os.path.abspath("../vggish"))
 17 | import logging
 18 | import vggish_input
 19 | import vggish_params
 20 | import vggish_slim
 21 | import time 
 22 | from numpy import array
 23 | import pickle
 24 | from traceback import print_exc
 25 | 
 26 | import datetime
 27 | import scipy
 28 | 
 29 | flags = tf.app.flags
 30 | slim = tf.contrib.slim
 31 | import time
 32 | 
 33 | flags.DEFINE_boolean(
 34 |     'debug', False,
 35 |     'Sets log level to debug. Default: false (defaults to log level info)')
 36 | 
 37 | flags.DEFINE_integer(
 38 |     'num_batches', 5,
 39 |     'Number of batches (epochs) of examples to feed into the model. Each batch is of '
 40 |     'variable size and contains shuffled examples of each class of audio.')
 41 | 
 42 | flags.DEFINE_integer('num_mini_batches', 5, 'Number of Mini batches executed per epoch (batch).')
 43 | 
 44 | flags.DEFINE_integer('num_classes', 3, 'Number of classes to train on')
 45 | 
 46 | flags.DEFINE_boolean(
 47 |     'train_vggish', True,
 48 |     'If Frue, allow VGGish parameters to change during training, thus '
 49 |     'fine-tuning VGGish. If False, VGGish parameters are fixed, thus using '
 50 |     'VGGish as a fixed feature extractor.')
 51 | 
 52 | flags.DEFINE_boolean('validation', True, 'If enabled, checks against validation set')
 53 | 
 54 | flags.DEFINE_string(
 55 |     'checkpoint', '../input/vggish_model.ckpt',
 56 |     'Path to the VGGish checkpoint file.')
 57 | 
 58 | flags.DEFINE_float('test_size', 0.2, 'Size of validation set as chunk of batch')
 59 | 
 60 | flags.DEFINE_integer('save_step', 4, 'Defines _after_ how many epochs the model should be saved.')
 61 | 
 62 | flags.DEFINE_string('model_version', "1.0", "Defines the model version. Will be used for output files like model ckpt and pickle")
 63 | 
 64 | FLAGS = flags.FLAGS
 65 | 
 66 | # Folders
 67 | input_dir = os.path.abspath("../input")
 68 | data_dir = os.path.join(input_dir, "data/")
 69 | output_dir = os.path.abspath("../output")
 70 | log_dir = os.path.join(output_dir, "log/")
 71 | log_dir_test = os.path.join(log_dir, "test/")
 72 | log_dir_train = os.path.join(log_dir, "train/")
 73 | model_dir = os.path.join(output_dir, "model/")
 74 | 
 75 | # Set log level depending on flags
 76 | log_level = None
 77 | if FLAGS.debug:
 78 |   log_level = logging.DEBUG
 79 | else:
 80 |   log_level = logging.INFO
 81 | 
 82 | # Save train_id_list as pickle, so we can later translate back train IDs/labels to birds
 83 | train_id_list = []
 84 | train_id_list_path = os.path.join(output_dir, "train_id_list-{}.pickle".format(FLAGS.model_version))
 85 | 
 86 | def create_dir(path):
 87 |   """Checks if a directory exists and creates it, if necessary
 88 |   Args:
 89 |     path (str): The path of the directory to be checked for existence
 90 |   """
 91 |   if not os.path.exists(path):
 92 |     try:
 93 |       print("Creating {}".format(path))
 94 |       os.makedirs(path)
 95 |     except:
 96 |       print("Unable to create {}.".format(path))
 97 |       print_exc()
 98 | 
 99 | def load_spectrogram(rootDir, log):
100 |   """Iterate over a directory and add each file as an input label
101 | 
102 |   The file tree should be of the structure <rootDir>/<class>/{data point n}, for example:
103 |   input/data/
104 |   + Accipiter_gentilis
105 |   -   + Accipiter_gentilis__1.wav
106 |   -   + Accipiter_gentilis__2.wav
107 |   + Cygnus_olor
108 |   -   + Cygnus_olor_1.wav
109 |   -   + Cygnus_olor_2.wav
110 |   + Regulus_regulus
111 |       + Regulus_regulus_4_1.wav
112 |       + Regulus_regulus_4_2.wav
113 | 
114 |   The function iterates over each audio file and extracts both a signal example and a signal label.
115 |   
116 |   A signal example is a 3-D np.array of shape [num_examples, num_frames, num_bands] which represents
117 |   a sequence of examples, each of which contains a patch of log mel
118 |   spectrogram, covering num_frames frames of audio and num_bands mel frequency
119 |   bands, where the frame length is vggish_params.STFT_HOP_LENGTH_SECONDS. The length of num_examples
120 |   corresponds with the duration of the audio file in seconds.
121 | 
122 |   A signal label is a one-hot encoded vector of the input labels. For example:
123 |   Accipiter_gentilis  --> [1, 0, 0]  
124 |   Cygnus_olor         --> [0, 1, 0] 
125 |   Regulus_regulus     --> [0, 0, 1]
126 |   Each audio file will be split into 0.96s frames, where each frame is one-hot encoded.
127 | 
128 |   Args:
129 |     rootDir (str): The root directory where dataset is located.
130 |     log: A Python logging object.
131 |   
132 |   Returns:
133 |     (input_examples, input_labels): A tuple of lists, containing feature spectrograms and with corresponding labels.
134 |   """
135 |   counter = 0
136 |   input_examples =[]
137 |   input_labels = []
138 | 
139 |   for dirName, subdirList, fileList  in os.walk(rootDir):
140 |     bird = os.path.basename(os.path.normpath(dirName))
141 |     if bird == "data":
142 |       continue
143 | 
144 |     log.info("{} -> {}".format(bird, counter))
145 |     train_id_list.append(bird)
146 |     for fname in fileList:
147 |       if fname.endswith(".wav"):
148 |           path = os.path.join(dirName, fname)
149 | 
150 |           #calling vggish function, reads in wav file and returns mel spectrogram
151 |           try:
152 |             signal_example = vggish_input.wavfile_to_examples(path)
153 |           except:
154 |             log.warn("Skipping {}, unable to extract clean signal example".format(fname))
155 |             continue
156 | 
157 |           log.debug("Signal example shape of {}: {}".format(fname, signal_example.shape))
158 | 
159 |           # Build own one-hot encoder 
160 |           encoded = np.zeros((FLAGS.num_classes))
161 |           encoded[counter]=1
162 |           encoded=encoded.tolist()
163 | 
164 |           # Encode each frame of the example, which results in the final label for this file
165 |           signal_label =np.array([encoded]*signal_example.shape[0])          
166 | 
167 |           log.debug("Signal label shape of {}: {}:".format(fname, signal_label.shape))
168 | 
169 |           # Check if a clean label can be extracted
170 |           if signal_label != []:
171 |             input_labels.append(signal_label)
172 |             input_examples.append(signal_example)
173 |           else:
174 |             log.warn("Skipping {}, unable extract clean signal label".format(fname))
175 |             continue
176 | 
177 |     counter +=1
178 | 
179 |   try:
180 |     with open(train_id_list_path, "wb") as wf:
181 |       pickle.dump(train_id_list, wf)
182 |   except:
183 |     log.warn("Unable to dump into {}".format(train_id_list_path))
184 | 
185 |   log.debug("Input examples created: {}, Labels created: {}".format(len(input_examples), len(input_labels)))
186 |   return input_examples, input_labels
187 | 
188 | def get_random_batches(input_examples, input_labels, log):
189 |   """Shuffles up read-in examples and labels.
190 | 
191 |   The input audio files and the corresponding one-hot encoded labels of their audio frames are first 
192 |   paired up, then shuffled and seperated again. Shuffling is done to prevent a common pattern due to
193 |   reading in audio files in the same order each time and improve the model's ability to generalize.
194 | 
195 |   Args:
196 |     input_examples (list): A list of 3-D np.arrays of shape [num_example, num_frames, num_bands]
197 |     input_labels (list): A list 2-D np.arrays of shape [encoded_label, num_classes] where each
198 |       example will consist n encoded labels, with n being the number of audio frames the example
199 |       consists of.
200 |     log: A Python logging object
201 | 
202 |   Returns:
203 |     features (list): A shuffled list of input examples.
204 |     labels (list): A shuffled list of input labels.
205 |   """
206 | 
207 |   # Create a 3-D np.array of [sum(num_example), num_frames, num_bands]
208 |   all_examples = np.concatenate([x for x in input_examples])
209 | 
210 |   # Create a 2-D np.array of [sum(encoded_labels), num_classes]
211 |   all_labels = np.concatenate([x for x in input_labels])  
212 |   
213 |   # Pair up examples with corresponding labels in a list, shuffle it
214 |   labeled_examples = list(zip(all_examples,all_labels))
215 |   shuffle(labeled_examples)
216 |   
217 |   # Separate the shuffled list return the features and labels individually
218 |   features = [example for (example, _) in labeled_examples]
219 |   labels = [label for (_, label) in labeled_examples]
220 | 
221 |   return (features, labels)
222 |   
223 | def main(_):
224 |   # Create folders, if necessary
225 |   for p in (output_dir, log_dir, log_dir_test, log_dir_train, model_dir):
226 |     create_dir(p)
227 | 
228 |   # allow_soft_placement gives fallback GPU, log_device_placement=True displays device info
229 |   with tf.Graph().as_default(), tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
230 |     now = datetime.datetime.now().isoformat().replace(":", "_")
231 |     fmt = logging.Formatter('%(asctime)s:%(name)s:%(levelname)s:%(message)s',
232 |                             '%Y%m%d-%H%M%S')
233 | 
234 |     # TF logger
235 |     tflog = logging.getLogger('tensorflow')
236 |     tflog.setLevel(log_level)
237 |     tflog_fh = logging.FileHandler(os.path.join(log_dir, "{}-{}-tf.log".format(FLAGS.model_version, now)))
238 |     tflog_fh.setLevel(log_level)
239 |     tflog_fh.setFormatter(fmt)
240 |     tflog_sh = logging.StreamHandler(sys.stdout)
241 |     tflog_sh.setLevel(log_level)
242 |     tflog_sh.setFormatter(fmt)
243 |     tflog.addHandler(tflog_fh)
244 |     tflog.addHandler(tflog_sh)
245 | 
246 |     # Root logger
247 |     log = logging.getLogger()
248 |     log.setLevel(log_level)
249 |     root_fh = logging.FileHandler(os.path.join(log_dir, "{}-{}-run.log".format(FLAGS.model_version, now)))
250 |     root_fh.setFormatter(fmt)
251 |     root_fh.setLevel(log_level)
252 |     root_sh = logging.StreamHandler(sys.stdout)
253 |     root_sh.setFormatter(fmt)
254 |     root_sh.setLevel(log_level)
255 |     log.addHandler(root_fh)
256 |     log.addHandler(root_sh)
257 | 
258 |     start = time.time()
259 |     log.info("Model version: {}".format(FLAGS.model_version))
260 |     log.info("Number of epochs: {}".format(FLAGS.num_batches))
261 |     log.info("Number of classes: {}".format(FLAGS.num_classes))
262 |     log.info("Number of Mini batches: {}".format(FLAGS.num_mini_batches))
263 |     log.info("Validation enabled: {}".format(FLAGS.validation))
264 |     log.info("Size of Validation set: {}".format(FLAGS.test_size))
265 |     log.info("Saving model after every {}th step".format(FLAGS.save_step))
266 | 
267 |     run_options = tf.RunOptions(report_tensor_allocations_upon_oom=True)
268 | 
269 |     # Define VGGish as our convolutional blocks
270 |     embeddings = vggish_slim.define_vggish_slim(FLAGS.train_vggish)
271 | 
272 |     # Define a shallow classification model and associated training ops on top of VGGish.
273 |     with tf.variable_scope('mymodel'):
274 |       # Add a fully connected layer with 100 units.
275 |       num_units = 100
276 |       fc = slim.fully_connected(embeddings, num_units)
277 |       
278 |       # Add a classifier layer at the end, consisting of parallel logistic
279 |       # classifiers, one per class. This allows for multi-class tasks.
280 |       logits = slim.fully_connected(fc, FLAGS.num_classes, activation_fn=None, scope='logits')
281 |       
282 |       # Use Sigmoid as our activation function
283 |       tf.sigmoid(logits, name='prediction')
284 |       
285 |       log.debug("Logits: {}".format(logits))
286 | 
287 |       # Add training ops.
288 |       with tf.variable_scope('train'):
289 |         
290 |         global_step = tf.Variable(
291 |             0, name='global_step', trainable=False,
292 |             collections=[tf.GraphKeys.GLOBAL_VARIABLES,
293 |                          tf.GraphKeys.GLOBAL_STEP])
294 | 
295 |         # Labels are assumed to be fed as a batch multi-hot vectors, with
296 |         # a 1 in the position of each positive class label, and 0 elsewhere.
297 |         """
298 |         Accipiter_gentilis  --> [1, 0, 0]  
299 |         Cygnus_olor         --> [0, 1, 0] 
300 |         Regulus_regulus     --> [0, 0, 1]
301 |         """
302 |         labels = tf.placeholder(
303 |             tf.float32, shape=(None,FLAGS.num_classes), name='labels')
304 |       
305 |         # Cross-entropy label loss.
306 |         xent = tf.nn.sigmoid_cross_entropy_with_logits(
307 |             logits=logits, labels=labels, name='xent')
308 |         loss = tf.reduce_mean(xent, name='loss_op')
309 |         tf.summary.scalar('loss', loss)
310 | 
311 |         # We use the same optimizer and hyperparameters as used to train VGGish.    
312 |         optimizer = tf.train.AdamOptimizer(
313 |             learning_rate=vggish_params.LEARNING_RATE,
314 |             epsilon=vggish_params.ADAM_EPSILON)
315 |         optimizer.minimize(loss, global_step=global_step, name='train_op')
316 | 
317 |       # Add evaluation ops.
318 |       with tf.variable_scope("evaluation"):
319 |         prediction = tf.argmax(logits,1)
320 |         correct_prediction = tf.equal(tf.argmax(logits,1), tf.argmax(labels,1))
321 |         accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
322 | 
323 |     # Create a summarizer that summarizes loss and accuracy
324 |     # TODO: Fix validation loss summary
325 |     tf.summary.scalar("Accuracy", accuracy)
326 |     # Add average loss summary over entire batch
327 |     tf.summary.scalar("Loss", tf.reduce_mean(xent)) 
328 |     # Merge all the summaries and write them out to /tmp/mnist_logs (by default)
329 |     summary_op = tf.summary.merge_all()
330 | 
331 |     # TensorBoard stuff
332 |     train_writer = tf.summary.FileWriter(log_dir_train, sess.graph)
333 |     validation_writer = tf.summary.FileWriter(log_dir_test, sess.graph)
334 |     
335 |     #tf.global_variables_initializer().run()
336 | 
337 |     # Initialize all variables in the model, and then load the pre-trained
338 |     # VGGish checkpoint.
339 |     sess.run(tf.global_variables_initializer())
340 |     vggish_slim.load_vggish_slim_checkpoint(sess, FLAGS.checkpoint)
341 | 
342 |     # Locate all the tensors and ops we need for the training loop.
343 |     features_tensor = sess.graph.get_tensor_by_name(vggish_params.INPUT_TENSOR_NAME)
344 |     output_tensor = sess.graph.get_tensor_by_name(vggish_params.OUTPUT_TENSOR_NAME)
345 |     labels_tensor = sess.graph.get_tensor_by_name('mymodel/train/labels:0')
346 |     global_step_tensor = sess.graph.get_tensor_by_name('mymodel/train/global_step:0')
347 |     loss_tensor = sess.graph.get_tensor_by_name('mymodel/train/loss_op:0')
348 |     train_op = sess.graph.get_operation_by_name('mymodel/train/train_op')
349 | 
350 |   
351 |     # Load all input with corresponding labels
352 |     log.info("Loading data set and mapping birds to training IDs...")
353 |     all_examples, all_labels = load_spectrogram(os.path.join(data_dir), log)
354 |     
355 |     # Create training and test sets
356 |     X_train_entire, X_validation_entire, y_train_entire, y_validation_entire = sk.train_test_split(all_examples, all_labels, test_size=FLAGS.test_size)
357 | 
358 |     # Test set stays the same throughout all epochs
359 |     (X_validation, y_validation) = get_random_batches(X_validation_entire, y_validation_entire, log)
360 | 
361 |     # Start training
362 |     for step in range(FLAGS.num_batches):
363 |       log.info("######## Epoch {}/{} started ########".format(step + 1, FLAGS.num_batches))      
364 |       
365 |       # Shuffle the order of input examples to foster generalization
366 |       (X_train, y_train) = get_random_batches(X_train_entire,y_train_entire, log)
367 |       
368 |       # Train on n batches per epoch
369 |       minibatch_n = FLAGS.num_mini_batches
370 |       minibatch_size = len(X_train) / minibatch_n
371 |       if minibatch_size <= 0:
372 |         log.error("Size of minibatch too small ({}), choose smaller number of minibatches or use more classes!".format(minibatch_size))
373 |         sys.exit(1)
374 |     
375 |       counter = 1
376 |       for i in range(0, len(X_train), minibatch_size):
377 |         log.info("(Epoch {}/{}) ==> Minibatch {} started ...".format(step+1, FLAGS.num_batches, counter))
378 |         
379 |         # Get pair of (X, y) of the current minibatch/chunk
380 |         X_train_mini = X_train[i:i + minibatch_size]
381 |         y_train_mini = y_train[i:i + minibatch_size]
382 | 
383 |         log.info("Size of mini batch (features): {}".format(len(X_train_mini)))
384 |         log.info("Size of mini batch (labels): {}".format(len(y_train_mini)))
385 |         
386 |         # Actual execution of the graph
387 |         [summary,num_steps, loss,_, train_acc,temp] = sess.run([summary_op,global_step_tensor, loss_tensor, train_op,accuracy,prediction],feed_dict={features_tensor: X_train_mini, labels_tensor: y_train_mini}, options=run_options)
388 |         
389 |         train_writer.add_summary(summary, step*minibatch_size+i)
390 |         log.info("Loss in minibatch: {} ".format(loss))
391 |         log.info("Training accuracy in minibatch: {}".format(train_acc))
392 | 
393 |         log.info("(Epoch {}/{}) ==> Minibatch {} finished ...\n".format(step+1, FLAGS.num_batches, counter))
394 |         counter += 1
395 | 
396 |         # Test set mini batching
397 |         minibatch_valid_size = 4
398 |         val_acc_entire = 0.
399 |         for j in range(0, len(X_validation), minibatch_valid_size):
400 |           X_validation_mini = X_validation[j:j + minibatch_valid_size]
401 |           y_validation_mini = y_validation[j:j + minibatch_valid_size]
402 | 
403 |           summary,val_acc,pred,corr_pred = sess.run([summary_op,accuracy,prediction,correct_prediction], feed_dict={features_tensor: X_validation_mini, labels_tensor: y_validation_mini},  options=run_options)
404 |           val_acc_entire += val_acc
405 | 
406 |           validation_writer.add_summary(summary, step*minibatch_valid_size+j)
407 | 
408 |         average_val_acc= val_acc_entire/(j/minibatch_valid_size)
409 |         log.info("Epoch {} -- Validation Accuracy: {}".format(step+1, average_val_acc))
410 |         log.debug("Correct prediction: {}".format(corr_pred))
411 |       # Save model to disk.
412 |       saver = tf.train.Saver()
413 |       if step % FLAGS.save_step == 0:
414 |         save_path = saver.save(sess, os.path.join(model_dir, "jibjib_model-{}.ckpt".format(FLAGS.model_version)),global_step=step)
415 |         log.info("Model saved to {}".format(save_path))
416 | 
417 |     now = datetime.datetime.now().isoformat().replace(":", "_").split(".")[0]
418 |     end = time.time()
419 |     out = "Training finished after {}s".format(end - start)
420 |     log.info(out)
421 |   
422 | if __name__ == '__main__':
423 |   # Disable stdout buffer
424 |   sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
425 | 
426 |   tf.app.run()
427 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 |                             Preamble
  9 | 
 10 |   The GNU General Public License is a free, copyleft license for
 11 | software and other kinds of works.
 12 | 
 13 |   The licenses for most software and other practical works are designed
 14 | to take away your freedom to share and change the works.  By contrast,
 15 | the GNU General Public License is intended to guarantee your freedom to
 16 | share and change all versions of a program--to make sure it remains free
 17 | software for all its users.  We, the Free Software Foundation, use the
 18 | GNU General Public License for most of our software; it applies also to
 19 | any other work released this way by its authors.  You can apply it to
 20 | your programs, too.
 21 | 
 22 |   When we speak of free software, we are referring to freedom, not
 23 | price.  Our General Public Licenses are designed to make sure that you
 24 | have the freedom to distribute copies of free software (and charge for
 25 | them if you wish), that you receive source code or can get it if you
 26 | want it, that you can change the software or use pieces of it in new
 27 | free programs, and that you know you can do these things.
 28 | 
 29 |   To protect your rights, we need to prevent others from denying you
 30 | these rights or asking you to surrender the rights.  Therefore, you have
 31 | certain responsibilities if you distribute copies of the software, or if
 32 | you modify it: responsibilities to respect the freedom of others.
 33 | 
 34 |   For example, if you distribute copies of such a program, whether
 35 | gratis or for a fee, you must pass on to the recipients the same
 36 | freedoms that you received.  You must make sure that they, too, receive
 37 | or can get the source code.  And you must show them these terms so they
 38 | know their rights.
 39 | 
 40 |   Developers that use the GNU GPL protect your rights with two steps:
 41 | (1) assert copyright on the software, and (2) offer you this License
 42 | giving you legal permission to copy, distribute and/or modify it.
 43 | 
 44 |   For the developers' and authors' protection, the GPL clearly explains
 45 | that there is no warranty for this free software.  For both users' and
 46 | authors' sake, the GPL requires that modified versions be marked as
 47 | changed, so that their problems will not be attributed erroneously to
 48 | authors of previous versions.
 49 | 
 50 |   Some devices are designed to deny users access to install or run
 51 | modified versions of the software inside them, although the manufacturer
 52 | can do so.  This is fundamentally incompatible with the aim of
 53 | protecting users' freedom to change the software.  The systematic
 54 | pattern of such abuse occurs in the area of products for individuals to
 55 | use, which is precisely where it is most unacceptable.  Therefore, we
 56 | have designed this version of the GPL to prohibit the practice for those
 57 | products.  If such problems arise substantially in other domains, we
 58 | stand ready to extend this provision to those domains in future versions
 59 | of the GPL, as needed to protect the freedom of users.
 60 | 
 61 |   Finally, every program is threatened constantly by software patents.
 62 | States should not allow patents to restrict development and use of
 63 | software on general-purpose computers, but in those that do, we wish to
 64 | avoid the special danger that patents applied to a free program could
 65 | make it effectively proprietary.  To prevent this, the GPL assures that
 66 | patents cannot be used to render the program non-free.
 67 | 
 68 |   The precise terms and conditions for copying, distribution and
 69 | modification follow.
 70 | 
 71 |                        TERMS AND CONDITIONS
 72 | 
 73 |   0. Definitions.
 74 | 
 75 |   "This License" refers to version 3 of the GNU General Public License.
 76 | 
 77 |   "Copyright" also means copyright-like laws that apply to other kinds of
 78 | works, such as semiconductor masks.
 79 | 
 80 |   "The Program" refers to any copyrightable work licensed under this
 81 | License.  Each licensee is addressed as "you".  "Licensees" and
 82 | "recipients" may be individuals or organizations.
 83 | 
 84 |   To "modify" a work means to copy from or adapt all or part of the work
 85 | in a fashion requiring copyright permission, other than the making of an
 86 | exact copy.  The resulting work is called a "modified version" of the
 87 | earlier work or a work "based on" the earlier work.
 88 | 
 89 |   A "covered work" means either the unmodified Program or a work based
 90 | on the Program.
 91 | 
 92 |   To "propagate" a work means to do anything with it that, without
 93 | permission, would make you directly or secondarily liable for
 94 | infringement under applicable copyright law, except executing it on a
 95 | computer or modifying a private copy.  Propagation includes copying,
 96 | distribution (with or without modification), making available to the
 97 | public, and in some countries other activities as well.
 98 | 
 99 |   To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies.  Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 | 
103 |   An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License.  If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 | 
112 |   1. Source Code.
113 | 
114 |   The "source code" for a work means the preferred form of the work
115 | for making modifications to it.  "Object code" means any non-source
116 | form of a work.
117 | 
118 |   A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 | 
123 |   The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form.  A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 | 
134 |   The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities.  However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work.  For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 | 
147 |   The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 | 
151 |   The Corresponding Source for a work in source code form is that
152 | same work.
153 | 
154 |   2. Basic Permissions.
155 | 
156 |   All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met.  This License explicitly affirms your unlimited
159 | permission to run the unmodified Program.  The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work.  This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 | 
164 |   You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force.  You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright.  Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 | 
175 |   Conveying under any other circumstances is permitted solely under
176 | the conditions stated below.  Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 | 
179 |   3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 | 
181 |   No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 | 
187 |   When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 | 
195 |   4. Conveying Verbatim Copies.
196 | 
197 |   You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 | 
205 |   You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 | 
208 |   5. Conveying Modified Source Versions.
209 | 
210 |   You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 | 
214 |     a) The work must carry prominent notices stating that you modified
215 |     it, and giving a relevant date.
216 | 
217 |     b) The work must carry prominent notices stating that it is
218 |     released under this License and any conditions added under section
219 |     7.  This requirement modifies the requirement in section 4 to
220 |     "keep intact all notices".
221 | 
222 |     c) You must license the entire work, as a whole, under this
223 |     License to anyone who comes into possession of a copy.  This
224 |     License will therefore apply, along with any applicable section 7
225 |     additional terms, to the whole of the work, and all its parts,
226 |     regardless of how they are packaged.  This License gives no
227 |     permission to license the work in any other way, but it does not
228 |     invalidate such permission if you have separately received it.
229 | 
230 |     d) If the work has interactive user interfaces, each must display
231 |     Appropriate Legal Notices; however, if the Program has interactive
232 |     interfaces that do not display Appropriate Legal Notices, your
233 |     work need not make them do so.
234 | 
235 |   A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit.  Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 | 
245 |   6. Conveying Non-Source Forms.
246 | 
247 |   You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 | 
252 |     a) Convey the object code in, or embodied in, a physical product
253 |     (including a physical distribution medium), accompanied by the
254 |     Corresponding Source fixed on a durable physical medium
255 |     customarily used for software interchange.
256 | 
257 |     b) Convey the object code in, or embodied in, a physical product
258 |     (including a physical distribution medium), accompanied by a
259 |     written offer, valid for at least three years and valid for as
260 |     long as you offer spare parts or customer support for that product
261 |     model, to give anyone who possesses the object code either (1) a
262 |     copy of the Corresponding Source for all the software in the
263 |     product that is covered by this License, on a durable physical
264 |     medium customarily used for software interchange, for a price no
265 |     more than your reasonable cost of physically performing this
266 |     conveying of source, or (2) access to copy the
267 |     Corresponding Source from a network server at no charge.
268 | 
269 |     c) Convey individual copies of the object code with a copy of the
270 |     written offer to provide the Corresponding Source.  This
271 |     alternative is allowed only occasionally and noncommercially, and
272 |     only if you received the object code with such an offer, in accord
273 |     with subsection 6b.
274 | 
275 |     d) Convey the object code by offering access from a designated
276 |     place (gratis or for a charge), and offer equivalent access to the
277 |     Corresponding Source in the same way through the same place at no
278 |     further charge.  You need not require recipients to copy the
279 |     Corresponding Source along with the object code.  If the place to
280 |     copy the object code is a network server, the Corresponding Source
281 |     may be on a different server (operated by you or a third party)
282 |     that supports equivalent copying facilities, provided you maintain
283 |     clear directions next to the object code saying where to find the
284 |     Corresponding Source.  Regardless of what server hosts the
285 |     Corresponding Source, you remain obligated to ensure that it is
286 |     available for as long as needed to satisfy these requirements.
287 | 
288 |     e) Convey the object code using peer-to-peer transmission, provided
289 |     you inform other peers where the object code and Corresponding
290 |     Source of the work are being offered to the general public at no
291 |     charge under subsection 6d.
292 | 
293 |   A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 | 
297 |   A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling.  In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage.  For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product.  A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 | 
310 |   "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source.  The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 | 
318 |   If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information.  But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 | 
329 |   The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed.  Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 | 
337 |   Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 | 
343 |   7. Additional Terms.
344 | 
345 |   "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law.  If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 | 
354 |   When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it.  (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.)  You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 | 
361 |   Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 | 
365 |     a) Disclaiming warranty or limiting liability differently from the
366 |     terms of sections 15 and 16 of this License; or
367 | 
368 |     b) Requiring preservation of specified reasonable legal notices or
369 |     author attributions in that material or in the Appropriate Legal
370 |     Notices displayed by works containing it; or
371 | 
372 |     c) Prohibiting misrepresentation of the origin of that material, or
373 |     requiring that modified versions of such material be marked in
374 |     reasonable ways as different from the original version; or
375 | 
376 |     d) Limiting the use for publicity purposes of names of licensors or
377 |     authors of the material; or
378 | 
379 |     e) Declining to grant rights under trademark law for use of some
380 |     trade names, trademarks, or service marks; or
381 | 
382 |     f) Requiring indemnification of licensors and authors of that
383 |     material by anyone who conveys the material (or modified versions of
384 |     it) with contractual assumptions of liability to the recipient, for
385 |     any liability that these contractual assumptions directly impose on
386 |     those licensors and authors.
387 | 
388 |   All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10.  If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term.  If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 | 
398 |   If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 | 
403 |   Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 | 
407 |   8. Termination.
408 | 
409 |   You may not propagate or modify a covered work except as expressly
410 | provided under this License.  Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 | 
415 |   However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 | 
422 |   Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 | 
429 |   Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License.  If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 | 
435 |   9. Acceptance Not Required for Having Copies.
436 | 
437 |   You are not required to accept this License in order to receive or
438 | run a copy of the Program.  Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance.  However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work.  These actions infringe copyright if you do
443 | not accept this License.  Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 | 
446 |   10. Automatic Licensing of Downstream Recipients.
447 | 
448 |   Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License.  You are not responsible
451 | for enforcing compliance by third parties with this License.
452 | 
453 |   An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations.  If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 | 
463 |   You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License.  For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 | 
471 |   11. Patents.
472 | 
473 |   A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based.  The
475 | work thus licensed is called the contributor's "contributor version".
476 | 
477 |   A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version.  For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 | 
487 |   Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 | 
492 |   In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement).  To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 | 
499 |   If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients.  "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 | 
513 |   If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 | 
521 |   A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License.  You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 | 
536 |   Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 | 
540 |   12. No Surrender of Others' Freedom.
541 | 
542 |   If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License.  If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all.  For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 | 
552 |   13. Use with the GNU Affero General Public License.
553 | 
554 |   Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work.  The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 | 
563 |   14. Revised Versions of this License.
564 | 
565 |   The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time.  Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 | 
570 |   Each version is given a distinguishing version number.  If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation.  If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 | 
579 |   If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 | 
584 |   Later license versions may give you additional or different
585 | permissions.  However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 | 
589 |   15. Disclaimer of Warranty.
590 | 
591 |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 | 
600 |   16. Limitation of Liability.
601 | 
602 |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 | 
612 |   17. Interpretation of Sections 15 and 16.
613 | 
614 |   If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 | 
621 |                      END OF TERMS AND CONDITIONS
622 | 
623 |             How to Apply These Terms to Your New Programs
624 | 
625 |   If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 | 
629 |   To do so, attach the following notices to the program.  It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 | 
634 |     <one line to give the program's name and a brief idea of what it does.>
635 |     Copyright (C) <year>  <name of author>
636 | 
637 |     This program is free software: you can redistribute it and/or modify
638 |     it under the terms of the GNU General Public License as published by
639 |     the Free Software Foundation, either version 3 of the License, or
640 |     (at your option) any later version.
641 | 
642 |     This program is distributed in the hope that it will be useful,
643 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
644 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
645 |     GNU General Public License for more details.
646 | 
647 |     You should have received a copy of the GNU General Public License
648 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
649 | 
650 | Also add information on how to contact you by electronic and paper mail.
651 | 
652 |   If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 | 
655 |     <program>  Copyright (C) <year>  <name of author>
656 |     This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 |     This is free software, and you are welcome to redistribute it
658 |     under certain conditions; type `show c' for details.
659 | 
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License.  Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 | 
664 |   You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | <http://www.gnu.org/licenses/>.
668 | 
669 |   The GNU General Public License does not permit incorporating your program
670 | into proprietary programs.  If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library.  If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License.  But first, please read
674 | <http://www.gnu.org/philosophy/why-not-lgpl.html>.
675 | 


--------------------------------------------------------------------------------