├── LICENSE.md ├── README.md ├── fingerprint.cpp ├── fingerprint.h └── test.mp3 /LICENSE.md: -------------------------------------------------------------------------------- 1 | ## MIT License 2 | 3 | Copyright (c) 2019 Suliman Alsowelim 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE 21 | *** 22 | 23 | ### MIT License 24 | 25 | Copyright (c) 2013 Will Drevo 26 | 27 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 28 | 29 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 30 | 31 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 32 | *** 33 | ### OpenCV License Agreement 34 | For Open Source Computer Vision Library 35 | (3-clause BSD License) 36 | 37 | Copyright (C) 2000-2019, Intel Corporation, all rights reserved. 38 | Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved. 39 | Copyright (C) 2009-2016, NVIDIA Corporation, all rights reserved. 40 | Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved. 41 | Copyright (C) 2015-2016, OpenCV Foundation, all rights reserved. 42 | Copyright (C) 2015-2016, Itseez Inc., all rights reserved. 43 | Third party copyrights are property of their respective owners. 44 | 45 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 46 | 47 | Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 48 | Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 49 | Neither the names of the copyright holders nor the names of the contributors may be used to endorse or promote products derived from this software without specific prior written permission. 50 | This software is provided by the copyright holders and contributors “as is” and any express or implied warranties, including, but not limited to, the implied warranties of merchantability and fitness for a particular purpose are disclaimed. In no event shall copyright holders or contributors be liable for any direct, indirect, incidental, special, exemplary, or consequential damages (including, but not limited to, procurement of substitute goods or services; loss of use, data, or profits; or business interruption) however caused and on any theory of liability, whether in contract, strict liability, or tort (including negligence or otherwise) arising in any way out of the use of this software, even if advised of the possibility of such damage. 51 | *** 52 | ### Boost Software License - Version 1.0 - August 17th, 2003 53 | 54 | Permission is hereby granted, free of charge, to any person or organization 55 | obtaining a copy of the software and accompanying documentation covered by 56 | this license (the "Software") to use, reproduce, display, distribute, 57 | execute, and transmit the Software, and to prepare derivative works of the 58 | Software, and to permit third-parties to whom the Software is furnished to 59 | do so, all subject to the following: 60 | 61 | The copyright notices in the Software and this entire statement, including 62 | the above license grant, this restriction and the following disclaimer, 63 | must be included in all copies of the Software, in whole or in part, and 64 | all derivative works of the Software, unless such copies or derivative 65 | works are solely in the form of machine-executable object code generated by 66 | a source language processor. 67 | 68 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 69 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 70 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 71 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 72 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 73 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 74 | DEALINGS IN THE SOFTWARE. 75 | *** 76 | ### MP3 file license 77 | License: The sound effect is permitted for non-commercial use under license ìAttribution-NonCommercial 4.0 International (CC BY-NC 4.0) 78 | 79 | http://www.orangefreesounds.com/ 80 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | Dejavu c++ port (audio fingerprinting) 3 | ========== 4 | 5 | This is a c++ implementation of the fingerprinting algorithm suggested in the dejavu audio fingerprinting project (https://github.com/worldveil/dejavu specifically: the fingerprint.py file). [link on how dejavu works](http://willdrevo.com/fingerprinting-and-audio-recognition-with-python/) 6 | 7 | ## Prerequisites: 8 | - opencv (= 3.4.5) 9 | - boost library (= 1.60) 10 | - c++14 11 | - for demo only: ffmpeg 12 | 13 | The "fingerprint" function will accept array of floats as an input (raw audio pcm data). It will returns a list of hashes with offsets (as a json string). 14 | 15 | ## Why the c++ port? 16 | 1. You can run c++ code on iOS or Android (tested it myself, works Flawlessly). 17 | 2. In theory: performance boost. In practice: didn't see much difference. 18 | 19 | ## Demo 20 | The main function will load a test mp3 file, uncompressed it using ffmpeg (as a mono channel, 22050 sample rate), feed it to fingerprint function, then prints the list of hashes-offsets. 21 | 22 | ## Update 2020: Include it in react-native/iOS/Android 23 | Now a sample code is provided in a [different repository](https://github.com/salsowelim/mobileAfp) that demonstrates how to include this function in a react-native project. [Blog post](http://en.suliman.ws/posts/mafp) is available that explains how to do it. 24 | ## Final notes 25 | - I wrote this code initially for prototype purposes. while I have tested the correctness of the algorithm output, I didn't optimise the code to c++ best standards. If you want to improve the code in this manner I will gladly accept pull requests. 26 | - Performance (speed wise) is really slow when the audio input is large. However on small audio files (<10 seconds) it is similar or better than python implementation. For now, I'm not interested to look after this issue. If you have a fix, PR. 27 | -------------------------------------------------------------------------------- /fingerprint.cpp: -------------------------------------------------------------------------------- 1 | /*Copyright (c) 2019, Suliman Alsowelim 2 | All rights reserved. 3 | This source code is licensed under the MIT license found in the 4 | LICENSE file in the root directory of this source tree. 5 | */ 6 | #import 7 | #import "fingerprint.h" 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | using boost::property_tree::ptree; 23 | using namespace std; 24 | 25 | int DEFAULT_FAN_VALUE = 15; 26 | int MIN_HASH_TIME_DELTA = 0; 27 | int MAX_HASH_TIME_DELTA = 200; 28 | int FINGERPRINT_REDUCTION = 20; 29 | int PEAK_NEIGHBORHOOD_SIZE = 20; 30 | float DEFAULT_AMP_MIN = 10; 31 | int DEFAULT_WINDOW_SIZE = 4096; 32 | float DEFAULT_OVERLAP_RATIO = 0.5; 33 | float FS = 44100.0; 34 | 35 | 36 | std::vector> stride_windows(const std::vector& data, size_t blocksize, size_t overlap){ 37 | //https://stackoverflow.com/questions/21344296/striding-windows/21345055 38 | std::vector> res; 39 | size_t minlen = (data.size() - overlap)/(blocksize - overlap); 40 | auto start = data.begin(); 41 | for (size_t i=0; i()); 44 | std::vector& block = res.back(); 45 | auto it = start++; 46 | for (size_t j=0; j>& data){ 56 | size_t nocols = data[0].size(); 57 | size_t norows = data.size(); 58 | float mean = 0; 59 | for (size_t i=0; i create_window(int wsize){ 74 | std::vector res; 75 | float multiplier; 76 | for (int i = 0; i < wsize; i++) { 77 | multiplier = 0.5 - 0.5 *(cos(2.0*M_PI*i/(wsize-1))); 78 | res.emplace_back(multiplier); 79 | } 80 | return res; 81 | } 82 | 83 | void apply_window(std::vector &hann_window,std::vector>& data){ 84 | size_t nocols = data[0].size(); 85 | size_t norows = data.size(); 86 | for (size_t i=0; i> &v_in){ 112 | //sorting 113 | //https://stackoverflow.com/questions/279854/how-do-i-sort-a-vector-of-pairs-based-on-the-second-element-of-the-pair 114 | std::sort(v_in.begin(), v_in.end(), [](auto &left, auto &right) { 115 | if (left.second == right.second) 116 | return left.first < right.first; 117 | return left.second < right.second; 118 | }); 119 | std::ostringstream buf; 120 | buf << "["; 121 | for(int i=0; i= MIN_HASH_TIME_DELTA) and (t_delta <= MAX_HASH_TIME_DELTA)){ 130 | char buffer [100]; 131 | snprintf(buffer, sizeof(buffer),"%d|%d|%d", freq1,freq2,t_delta); 132 | std::string to_be_hashed = buffer; 133 | std::string hash_result = get_sha1(to_be_hashed).erase(FINGERPRINT_REDUCTION,40); 134 | ptree pt; 135 | pt.put ("hash", hash_result); 136 | pt.put ("offset", time1); 137 | if(buf.str() != "["){ 138 | buf << ","; 139 | } 140 | write_json(buf, pt, false); 141 | } 142 | } 143 | } 144 | } 145 | buf << "]"; 146 | return buf.str(); 147 | } 148 | 149 | vector> get_2D_peaks (cv::Mat data){ 150 | /* generate binary structure and apply maximum filter*/ 151 | cv::Mat tmpkernel = cv::getStructuringElement(cv::MORPH_CROSS,cv::Size(3,3),cv::Point(-1,-1)); 152 | cv::Mat kernel = cv::Mat(PEAK_NEIGHBORHOOD_SIZE*2+1,PEAK_NEIGHBORHOOD_SIZE*2+1, CV_8U, uint8_t(0)); 153 | kernel.at(PEAK_NEIGHBORHOOD_SIZE,PEAK_NEIGHBORHOOD_SIZE) = uint8_t(1); 154 | cv::dilate(kernel, kernel, tmpkernel,cv::Point(-1, -1), PEAK_NEIGHBORHOOD_SIZE,1,1); 155 | cv::Mat d1; 156 | cv::dilate(data, d1, kernel);/* d1 now contain m1 with max filter applied */ 157 | /* generate eroded background */ 158 | cv::Mat background = (data == 0); // 255 if element == 0 , 0 otherwise 159 | cv::Mat local_max = (data == d1); // 255 if true, 0 otherwise 160 | cv::Mat eroded_background; 161 | cv::erode(background, eroded_background, kernel); 162 | cv::Mat detected_peaks = local_max - eroded_background; 163 | /* now detected peaks.size == m1.size .. iterate through m1. get amp where peak == 255 (true), get indices i,j as well.*/ 164 | vector> freq_time_idx_pairs; 165 | for(int i=0; i(i, j) == 255) and (data.at(i,j) > DEFAULT_AMP_MIN)) { 168 | freq_time_idx_pairs.push_back(std::make_pair(i,j)); 169 | } 170 | } 171 | } 172 | 173 | return freq_time_idx_pairs; 174 | 175 | } 176 | 177 | 178 | void max_filter(std::vector>& data){ 179 | //https://gist.github.com/otmb/014107e7b6c6d6a79f0ac1ccc456580a 180 | cv::Mat m1(data.size(), data.at(0).size(), CV_32F); 181 | for(int i=0; i(i, j) = data.at(i).at(j); 184 | 185 | /* generate binary structure and apply maximum filter*/ 186 | cv::Mat tmpkernel = cv::getStructuringElement(cv::MORPH_CROSS,cv::Size(3,3),cv::Point(-1,-1)); 187 | cv::Mat kernel = cv::Mat(PEAK_NEIGHBORHOOD_SIZE*2+1,PEAK_NEIGHBORHOOD_SIZE*2+1, CV_8U, uint8_t(0)); 188 | kernel.at(PEAK_NEIGHBORHOOD_SIZE,PEAK_NEIGHBORHOOD_SIZE) = uint8_t(1); 189 | cv::dilate(kernel, kernel, tmpkernel,cv::Point(-1, -1), PEAK_NEIGHBORHOOD_SIZE,1,1); 190 | cv::Mat d1; 191 | cv::dilate(m1, d1, kernel); 192 | /* d1 now contain m1 with max filter applied */ 193 | /* generate eroded background */ 194 | cv::Mat background = (m1 == 0); 195 | cv::Mat local_max = (m1 == d1); 196 | cv::Mat eroded_background; 197 | cv::erode(background, eroded_background, kernel); 198 | cv::Mat detected_peaks = local_max - eroded_background; 199 | vector> freq_time_idx_pairs; 200 | for(int i=0; i(i, j) == 255) and (m1.at(i,j) > DEFAULT_AMP_MIN)) { 203 | freq_time_idx_pairs.push_back(std::make_pair(i,j)); 204 | } 205 | } 206 | } 207 | } 208 | 209 | 210 | 211 | std::string fingerprint (float * data, int data_size){ 212 | std::vector vec(&data[0], data + data_size); 213 | // see mlab.py on how to decide number of frequencies 214 | int max_freq = 0; //onesided 215 | if (DEFAULT_WINDOW_SIZE % 2 == 0){ 216 | max_freq = int(std::floor(DEFAULT_WINDOW_SIZE / 2)) + 1; 217 | }else{ 218 | max_freq = int(std::floor((DEFAULT_WINDOW_SIZE+1) / 2)); 219 | } 220 | 221 | std::vector> blocks = stride_windows(vec, DEFAULT_WINDOW_SIZE, DEFAULT_WINDOW_SIZE*DEFAULT_OVERLAP_RATIO); 222 | std::vector hann_window = create_window(DEFAULT_WINDOW_SIZE); 223 | apply_window(hann_window,blocks); 224 | 225 | cv::Mat dst(blocks[0].size(),blocks.size(), CV_32F); 226 | for(int i=0; i(i, j) = blocks[j][i]; 229 | } 230 | cv::dft(dst,dst,cv::DftFlags::DFT_COMPLEX_OUTPUT+cv::DftFlags::DFT_ROWS,0); 231 | cv::mulSpectrums(dst,dst,dst,0,true); 232 | 233 | cv::Mat dst2(max_freq,blocks.at(0).size(), CV_32F); 234 | for(int i=0; i(i, j) = dst.ptr(j)[2*i]; 237 | } 238 | 239 | for(int i=1; i(i, j) = dst2.at(i, j)*2; 242 | 243 | dst2 = dst2 * (1.0/FS); 244 | float sum = 0.0; 245 | float tmp = 0.0; 246 | for(unsigned int i = 0; i < hann_window.size(); i++){ 247 | if(hann_window[i] < 0) 248 | tmp = hann_window[i]* -1; 249 | else 250 | tmp = hann_window[i]; 251 | sum = sum + (tmp*tmp); 252 | } 253 | dst2 = dst2 * (1.0/sum); 254 | //see https://github.com/worldveil/dejavu/issues/118 255 | float threshold = 0.00000001; 256 | for(int i=0; i(i, j)) < threshold){ 259 | dst2.at(i, j) = threshold; 260 | } 261 | dst2.at(i, j) = 10 * log10(dst2.at(i, j)); 262 | } 263 | } 264 | 265 | vector> v_in = get_2D_peaks(dst2); 266 | std::string json = generate_hashes(v_in); 267 | return json; 268 | } 269 | 270 | int main () { 271 | std::system("ffmpeg -hide_banner -loglevel panic -i test.mp3 -f s16le -acodec pcm_s16le -ss 0 -ac 1 -ar 22050 - > raw_data "); 272 | //https://www.daniweb.com/programming/software-development/threads/128352/read-a-raw-pcm-file-and-then-play-it-with-sound-in-c-or-c 273 | //https://stackoverflow.com/questions/49161854/reading-raw-audio-file 274 | std::fstream f_in; 275 | short speech; 276 | float data[200000]; 277 | f_in.open("raw_data", std::ios::in | std::ios::binary); 278 | int i = 0; 279 | while (true) { 280 | f_in.read((char *)&speech, 2); 281 | if (!f_in.good()){ 282 | break; 283 | } 284 | data[i] = speech; 285 | i++; 286 | } 287 | f_in.close(); 288 | std::string json = fingerprint(data,i); 289 | cout << json << std::endl; 290 | return 0; 291 | } 292 | -------------------------------------------------------------------------------- /fingerprint.h: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | std::string fingerprint (float * data, int data_size); -------------------------------------------------------------------------------- /test.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/salsowelim/dejavu_cpp_port/70b4307111be4a2481e40e7d58b763ac50a2081b/test.mp3 --------------------------------------------------------------------------------