├── .editorconfig ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── collect-fingerprints-of-songs.py ├── config-development.sample.json ├── config.json ├── db └── .keep ├── get-database-stat.py ├── libs ├── __init__.py ├── config.py ├── db.py ├── db_mongo.py ├── db_sqlite.py ├── fingerprint.py ├── reader.py ├── reader_file.py ├── reader_microphone.py ├── visualiser.py ├── visualiser_console.py └── visualiser_plot.py ├── recognize-from-file.py ├── recognize-from-microphone.py ├── requirements.txt ├── reset-database.py ├── sql-execute.py └── tests └── sqlite.py /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = space 5 | indent_size = 2 6 | insert_final_newline = true 7 | trim_trailing_whitespace = true 8 | end_of_line = lf 9 | charset = utf-8 10 | 11 | [*.py] 12 | max_line_length = 119 13 | 14 | [Makefile] 15 | indent_style = tab 16 | insert_final_newline = false 17 | trim_trailing_whitespace = false 18 | 19 | [*.md] 20 | insert_final_newline = false 21 | trim_trailing_whitespace = false 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | 7 | pip-log.txt 8 | pip-delete-this-directory.txt 9 | 10 | *.wav 11 | *.mp3 12 | 13 | config-development.json 14 | 15 | db/*.db 16 | db/*.db-journal 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Roman Rodomansky 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: tests 2 | 3 | install: 4 | @echo soon 5 | 6 | clean: 7 | @find . -name \*.pyc -delete 8 | 9 | reset: 10 | @python reset-database.py 11 | 12 | tests: 13 | @python tests/*.py 14 | 15 | stat: 16 | @python get-database-stat.py 17 | 18 | fingerprint-songs: clean 19 | @python collect-fingerprints-of-songs.py 20 | 21 | recognize-listen: clean 22 | @python recognize-from-microphone.py -s $(seconds) 23 | 24 | recognize-file: clean 25 | @python recognize-from-file.py -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Fingerprint audio files & identify what's playing 2 | 3 | - conference [PaceMaker: BackEnd-2016 conference](http://www.pacemaker.in.ua/BackEnd-2016/about) 4 | - slides are on [slideshare.net/rodomansky/ok-shazam-la-lalalaa](http://www.slideshare.net/rodomansky/ok-shazam-la-lalalaa) 5 | 6 | ![](http://new.tinygrab.com/7020c0e8b010392da4053fa90ab8e0c8419bded864.png) 7 | 8 | ## How to set up 9 | 10 | 1. Run `$ make clean reset` to clean & init database struct 11 | 1. Run `$ make tests` to make sure that everything is properly configurated 12 | 1. Copy some `.mp3` audio files into `mp3/` directory 13 | 1. Run `$ make fingerprint-songs` to analyze audio files & fill your db with hashes 14 | 1. Start play any of audio file (from any source) from `mp3/` directory, and run (parallely) `$ make recognize-listen seconds=5` 15 | 16 | ![](http://new.tinygrab.com/7020c0e8b0393eec4a18c62170458c029577d378c2.png) 17 | 18 | ## How to 19 | - To remove a specific song & related hash from db 20 | 21 | ```bash 22 | $ python sql-execute.py -q "DELETE FROM songs WHERE id = 6;" 23 | $ python sql-execute.py -q "DELETE FROM fingerprints WHERE song_fk = 6;" 24 | ``` 25 | 26 | ## Thanks to 27 | - [How does Shazam work](http://coding-geek.com/how-shazam-works/) 28 | - [Audio fingerprinting and recognition in Python](https://github.com/worldveil/dejavu) - thanks for fingerprinting login via pynum 29 | - [Audio Fingerprinting with Python and Numpy](http://willdrevo.com/fingerprinting-and-audio-recognition-with-python/) 30 | - [Shazam It! Music Recognition Algorithms, Fingerprinting, and Processing](https://www.toptal.com/algorithms/shazam-it-music-processing-fingerprinting-and-recognition) 31 | - [Creating Shazam in Java](http://royvanrijn.com/blog/2010/06/creating-shazam-in-java/) 32 | -------------------------------------------------------------------------------- /collect-fingerprints-of-songs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import os 3 | import sys 4 | import libs 5 | import libs.fingerprint as fingerprint 6 | 7 | from termcolor import colored 8 | from libs.reader_file import FileReader 9 | from libs.db_sqlite import SqliteDatabase 10 | from libs.config import get_config 11 | 12 | if __name__ == '__main__': 13 | config = get_config() 14 | 15 | db = SqliteDatabase() 16 | path = "mp3/" 17 | 18 | # fingerprint all files in a directory 19 | 20 | for filename in os.listdir(path): 21 | if filename.endswith(".mp3"): 22 | reader = FileReader(path + filename) 23 | audio = reader.parse_audio() 24 | 25 | song = db.get_song_by_filehash(audio['file_hash']) 26 | song_id = db.add_song(filename, audio['file_hash']) 27 | 28 | msg = ' * %s %s: %s' % ( 29 | colored('id=%s', 'white', attrs=['dark']), # id 30 | colored('channels=%d', 'white', attrs=['dark']), # channels 31 | colored('%s', 'white', attrs=['bold']) # filename 32 | ) 33 | print msg % (song_id, len(audio['channels']), filename) 34 | 35 | if song: 36 | hash_count = db.get_song_hashes_count(song_id) 37 | 38 | if hash_count > 0: 39 | msg = ' already exists (%d hashes), skip' % hash_count 40 | print colored(msg, 'red') 41 | 42 | continue 43 | 44 | print colored(' new song, going to analyze..', 'green') 45 | 46 | hashes = set() 47 | channel_amount = len(audio['channels']) 48 | 49 | for channeln, channel in enumerate(audio['channels']): 50 | msg = ' fingerprinting channel %d/%d' 51 | print colored(msg, attrs=['dark']) % (channeln+1, channel_amount) 52 | 53 | channel_hashes = fingerprint.fingerprint(channel, Fs=audio['Fs'], plots=config['fingerprint.show_plots']) 54 | channel_hashes = set(channel_hashes) 55 | 56 | msg = ' finished channel %d/%d, got %d hashes' 57 | print colored(msg, attrs=['dark']) % ( 58 | channeln+1, channel_amount, len(channel_hashes) 59 | ) 60 | 61 | hashes |= channel_hashes 62 | 63 | msg = ' finished fingerprinting, got %d unique hashes' 64 | 65 | values = [] 66 | for hash, offset in hashes: 67 | values.append((song_id, hash, offset)) 68 | 69 | msg = ' storing %d hashes in db' % len(values) 70 | print colored(msg, 'green') 71 | 72 | db.store_fingerprints(values) 73 | 74 | print('end') 75 | -------------------------------------------------------------------------------- /config-development.sample.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "development", 3 | 4 | // you can use any of cloud-based or local mongo servers 5 | // like https://mlab.com/ 6 | "db.dsn": "mongodb://user:password@cloud-domain:port/", 7 | "db.database": "database-name-here" 8 | } 9 | -------------------------------------------------------------------------------- /config.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "default", 3 | 4 | "db.dsn": null, 5 | "db.database": null, 6 | "db.file": "db/fingerprints2.db", 7 | 8 | "channels": "2", 9 | "mic.visualise_console": true, 10 | "mic.visualise_plot": false, 11 | 12 | "fingerprint.show_plots": false 13 | } 14 | -------------------------------------------------------------------------------- /db/.keep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/itspoma/audio-fingerprint-identifying-python/b6a3e1167763dee4006eee2d83600e621d62b270/db/.keep -------------------------------------------------------------------------------- /get-database-stat.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | from libs.db_sqlite import SqliteDatabase 3 | from termcolor import colored 4 | 5 | # get summary information 6 | def printSummary(): 7 | row = db.executeOne(""" 8 | SELECT 9 | (SELECT COUNT(*) FROM songs) as songs_count, 10 | (SELECT COUNT(*) FROM fingerprints) as fingerprints_count 11 | """) 12 | 13 | msg = ' * %s: %s (%s)' % ( 14 | colored('total', 'yellow'), # total 15 | colored('%d song(s)', 'yellow'), # songs 16 | colored('%d fingerprint(s)', 'yellow') # fingerprints 17 | ) 18 | print msg % row 19 | 20 | return row[0] # total 21 | 22 | # get songs \w details 23 | def printSongs(): 24 | rows = db.executeAll(""" 25 | SELECT 26 | s.id, 27 | s.name, 28 | (SELECT count(f.id) FROM fingerprints AS f WHERE f.song_fk = s.id) AS fingerprints_count 29 | FROM songs AS s 30 | ORDER BY fingerprints_count DESC 31 | """) 32 | 33 | for row in rows: 34 | msg = ' ** %s %s: %s' % ( 35 | colored('id=%s','white',attrs=['dark']), # id 36 | colored('%s', 'white', attrs=['bold']), # name 37 | colored('%d hashes', 'green') # hashes 38 | ) 39 | print msg % row 40 | 41 | # find duplicates 42 | def printDuplicates(): 43 | rows = db.executeAll(""" 44 | SELECT a.song_fk, s.name, SUM(a.cnt) 45 | FROM ( 46 | SELECT song_fk, COUNT(*) cnt 47 | FROM fingerprints 48 | GROUP BY hash, song_fk, offset 49 | HAVING cnt > 1 50 | ORDER BY cnt ASC 51 | ) a 52 | JOIN songs s ON s.id = a.song_fk 53 | GROUP BY a.song_fk 54 | """) 55 | 56 | msg = ' * duplications: %s' % colored('%d song(s)', 'yellow') 57 | print msg % len(rows) 58 | 59 | for row in rows: 60 | msg = ' ** %s %s: %s' % ( 61 | colored('id=%s','white',attrs=['dark']), 62 | colored('%s', 'white', attrs=['bold']), 63 | colored('%d duplicate(s)', 'red') 64 | ) 65 | print msg % row 66 | 67 | # find colissions 68 | def printColissions(): 69 | rows = db.executeAll(""" 70 | SELECT sum(a.n) FROM ( 71 | SELECT 72 | hash, 73 | count(distinct song_fk) AS n 74 | FROM fingerprints 75 | GROUP BY `hash` 76 | ORDER BY n DESC 77 | ) a 78 | """) 79 | 80 | msg = ' * colissions: %s' % colored('%d hash(es)', 'red') 81 | val = 0 82 | if rows[0][0] is not None: 83 | val = rows[0] 84 | 85 | print msg % val 86 | 87 | if __name__ == '__main__': 88 | db = SqliteDatabase() 89 | print '' 90 | 91 | x = printSummary() 92 | printSongs() 93 | if x: print '' 94 | 95 | printDuplicates() 96 | if x: print '' 97 | 98 | printColissions() 99 | 100 | print '\ndone' 101 | -------------------------------------------------------------------------------- /libs/__init__.py: -------------------------------------------------------------------------------- 1 | import matplotlib 2 | # matplotlib.use('Agg') 3 | matplotlib.use('TkAgg') 4 | 5 | def x(): 6 | print('XXX') 7 | -------------------------------------------------------------------------------- /libs/config.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os.path 3 | 4 | CONFIG_DEFAULT_FILE = 'config.json' 5 | CONFIG_DEVELOPMENT_FILE = 'config-development.json' 6 | 7 | # load config from multiple files, 8 | # and return merged result 9 | def get_config(): 10 | defaultConfig = {"env": "unknown"} 11 | 12 | return merge_configs( 13 | defaultConfig, 14 | parse_config(CONFIG_DEFAULT_FILE), 15 | parse_config(CONFIG_DEVELOPMENT_FILE) 16 | ) 17 | 18 | # parse config from specific filename 19 | # will return empty config if file not exists, or isn't readable 20 | def parse_config(filename): 21 | config = {} 22 | 23 | if os.path.isfile(filename): 24 | f = open(filename, 'r') 25 | config = json.load(f) 26 | f.close() 27 | 28 | return config 29 | 30 | # @merge multiple dicts into one 31 | def merge_configs(*configs): 32 | z = {} 33 | 34 | for config in configs: 35 | z.update(config) 36 | 37 | return z 38 | -------------------------------------------------------------------------------- /libs/db.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | class Database(object): 4 | TABLE_SONGS = None 5 | TABLE_FINGERPRINTS = None 6 | 7 | def __init__(self, a): 8 | self.a = a 9 | 10 | def connect(self): pass 11 | def insert(self, table, params): pass 12 | 13 | def get_song_by_filehash(self, filehash): 14 | return self.findOne(self.TABLE_SONGS, { 15 | "filehash": filehash 16 | }) 17 | 18 | def get_song_by_id(self, id): 19 | return self.findOne(self.TABLE_SONGS, { 20 | "id": id 21 | }) 22 | 23 | def add_song(self, filename, filehash): 24 | song = self.get_song_by_filehash(filehash) 25 | 26 | if not song: 27 | song_id = self.insert(self.TABLE_SONGS, { 28 | "name": filename, 29 | "filehash": filehash 30 | }) 31 | else: 32 | song_id = song[0] 33 | 34 | return song_id 35 | 36 | def get_song_hashes_count(self, song_id): 37 | pass 38 | 39 | def store_fingerprints(self, values): 40 | self.insertMany(self.TABLE_FINGERPRINTS, 41 | ['song_fk', 'hash', 'offset'], values 42 | ) 43 | -------------------------------------------------------------------------------- /libs/db_mongo.py: -------------------------------------------------------------------------------- 1 | from pymongo import MongoClient 2 | from db import Database 3 | from config import get_config 4 | 5 | class MongoDatabase(Database): 6 | def __init__(self): 7 | pass 8 | 9 | def connect(self): 10 | config = get_config() 11 | 12 | self.client = MongoClient(config['db.dsn']) 13 | self.db = self.client[config['db.database']] 14 | 15 | def insert(self, collection, document): 16 | # if not self.db: 17 | self.connect() 18 | 19 | return self.db[collection].insert_one(document).inserted_id 20 | -------------------------------------------------------------------------------- /libs/db_sqlite.py: -------------------------------------------------------------------------------- 1 | from db import Database 2 | from config import get_config 3 | import sqlite3 4 | import sys 5 | from itertools import izip_longest 6 | from termcolor import colored 7 | 8 | class SqliteDatabase(Database): 9 | TABLE_SONGS = 'songs' 10 | TABLE_FINGERPRINTS = 'fingerprints' 11 | 12 | def __init__(self): 13 | self.connect() 14 | 15 | def connect(self): 16 | config = get_config() 17 | 18 | self.conn = sqlite3.connect(config['db.file']) 19 | self.conn.text_factory = str 20 | 21 | self.cur = self.conn.cursor() 22 | 23 | print(colored('sqlite - connection opened','white',attrs=['dark'])) 24 | 25 | def __del__(self): 26 | self.conn.commit() 27 | self.conn.close() 28 | print(colored('sqlite - connection has been closed','white',attrs=['dark'])) 29 | 30 | def query(self, query, values = []): 31 | self.cur.execute(query, values) 32 | 33 | def executeOne(self, query, values = []): 34 | self.cur.execute(query, values) 35 | return self.cur.fetchone() 36 | 37 | def executeAll(self, query, values = []): 38 | self.cur.execute(query, values) 39 | return self.cur.fetchall() 40 | 41 | def buildSelectQuery(self, table, params): 42 | conditions = [] 43 | values = [] 44 | 45 | for k, v in enumerate(params): 46 | key = v 47 | value = params[v] 48 | conditions.append("%s = ?" % key) 49 | values.append(value) 50 | 51 | conditions = ' AND '.join(conditions) 52 | query = "SELECT * FROM %s WHERE %s" % (table, conditions) 53 | 54 | return { 55 | "query": query, 56 | "values": values 57 | } 58 | 59 | def findOne(self, table, params): 60 | select = self.buildSelectQuery(table, params) 61 | return self.executeOne(select['query'], select['values']) 62 | 63 | def findAll(self, table, params): 64 | select = self.buildSelectQuery(table, params) 65 | return self.executeAll(select['query'], select['values']) 66 | 67 | def insert(self, table, params): 68 | keys = ', '.join(params.keys()) 69 | values = params.values() 70 | 71 | query = "INSERT INTO songs (%s) VALUES (?, ?)" % (keys); 72 | 73 | self.cur.execute(query, values) 74 | self.conn.commit() 75 | 76 | return self.cur.lastrowid 77 | 78 | def insertMany(self, table, columns, values): 79 | def grouper(iterable, n, fillvalue=None): 80 | args = [iter(iterable)] * n 81 | return (filter(None, values) for values 82 | in izip_longest(fillvalue=fillvalue, *args)) 83 | 84 | for split_values in grouper(values, 1000): 85 | query = "INSERT OR IGNORE INTO %s (%s) VALUES (?, ?, ?)" % (table, ", ".join(columns)) 86 | self.cur.executemany(query, split_values) 87 | 88 | self.conn.commit() 89 | 90 | def get_song_hashes_count(self, song_id): 91 | query = 'SELECT count(*) FROM %s WHERE song_fk = %d' % (self.TABLE_FINGERPRINTS, song_id) 92 | rows = self.executeOne(query) 93 | return int(rows[0]) 94 | -------------------------------------------------------------------------------- /libs/fingerprint.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import numpy as np 3 | import matplotlib.mlab as mlab 4 | import matplotlib.pyplot as plt 5 | 6 | from termcolor import colored 7 | from scipy.ndimage.filters import maximum_filter 8 | from scipy.ndimage.morphology import (generate_binary_structure, iterate_structure, binary_erosion) 9 | from operator import itemgetter 10 | 11 | IDX_FREQ_I = 0 12 | IDX_TIME_J = 1 13 | 14 | # Sampling rate, related to the Nyquist conditions, which affects 15 | # the range frequencies we can detect. 16 | DEFAULT_FS = 44100 17 | 18 | # Size of the FFT window, affects frequency granularity 19 | DEFAULT_WINDOW_SIZE = 4096 20 | 21 | # Ratio by which each sequential window overlaps the last and the 22 | # next window. Higher overlap will allow a higher granularity of offset 23 | # matching, but potentially more fingerprints. 24 | DEFAULT_OVERLAP_RATIO = 0.5 25 | 26 | # Degree to which a fingerprint can be paired with its neighbors -- 27 | # higher will cause more fingerprints, but potentially better accuracy. 28 | DEFAULT_FAN_VALUE = 15 29 | 30 | # Minimum amplitude in spectrogram in order to be considered a peak. 31 | # This can be raised to reduce number of fingerprints, but can negatively 32 | # affect accuracy. 33 | DEFAULT_AMP_MIN = 10 34 | 35 | # Number of cells around an amplitude peak in the spectrogram in order 36 | # for Dejavu to consider it a spectral peak. Higher values mean less 37 | # fingerprints and faster matching, but can potentially affect accuracy. 38 | PEAK_NEIGHBORHOOD_SIZE = 20 39 | 40 | # Thresholds on how close or far fingerprints can be in time in order 41 | # to be paired as a fingerprint. If your max is too low, higher values of 42 | # DEFAULT_FAN_VALUE may not perform as expected. 43 | MIN_HASH_TIME_DELTA = 0 44 | MAX_HASH_TIME_DELTA = 200 45 | 46 | # If True, will sort peaks temporally for fingerprinting; 47 | # not sorting will cut down number of fingerprints, but potentially 48 | # affect performance. 49 | PEAK_SORT = True 50 | 51 | # Number of bits to throw away from the front of the SHA1 hash in the 52 | # fingerprint calculation. The more you throw away, the less storage, but 53 | # potentially higher collisions and misclassifications when identifying songs. 54 | FINGERPRINT_REDUCTION = 20 55 | 56 | def fingerprint(channel_samples, Fs=DEFAULT_FS, 57 | wsize=DEFAULT_WINDOW_SIZE, 58 | wratio=DEFAULT_OVERLAP_RATIO, 59 | fan_value=DEFAULT_FAN_VALUE, 60 | amp_min=DEFAULT_AMP_MIN, 61 | plots=False): 62 | 63 | # show samples plot 64 | if plots: 65 | plt.plot(channel_samples) 66 | plt.title('%d samples' % len(channel_samples)) 67 | plt.xlabel('time (s)') 68 | plt.ylabel('amplitude (A)') 69 | plt.show() 70 | plt.gca().invert_yaxis() 71 | 72 | # FFT the channel, log transform output, find local maxima, then return 73 | # locally sensitive hashes. 74 | # FFT the signal and extract frequency components 75 | 76 | # plot the angle spectrum of segments within the signal in a colormap 77 | arr2D = mlab.specgram( 78 | channel_samples, 79 | NFFT=wsize, 80 | Fs=Fs, 81 | window=mlab.window_hanning, 82 | noverlap=int(wsize * wratio))[0] 83 | 84 | # show spectrogram plot 85 | if plots: 86 | plt.plot(arr2D) 87 | plt.title('FFT') 88 | plt.show() 89 | 90 | # apply log transform since specgram() returns linear array 91 | arr2D = 10 * np.log10(arr2D) # calculates the base 10 logarithm for all elements of arr2D 92 | arr2D[arr2D == -np.inf] = 0 # replace infs with zeros 93 | 94 | # find local maxima 95 | local_maxima = get_2D_peaks(arr2D, plot=plots, amp_min=amp_min) 96 | 97 | msg = ' local_maxima: %d of frequency & time pairs' 98 | print colored(msg, attrs=['dark']) % len(local_maxima) 99 | 100 | # return hashes 101 | return generate_hashes(local_maxima, fan_value=fan_value) 102 | 103 | def get_2D_peaks(arr2D, plot=False, amp_min=DEFAULT_AMP_MIN): 104 | # http://docs.scipy.org/doc/scipy/reference/generated/scipy.ndimage.morphology.iterate_structure.html#scipy.ndimage.morphology.iterate_structure 105 | struct = generate_binary_structure(2, 1) 106 | neighborhood = iterate_structure(struct, PEAK_NEIGHBORHOOD_SIZE) 107 | 108 | # find local maxima using our fliter shape 109 | local_max = maximum_filter(arr2D, footprint=neighborhood) == arr2D 110 | background = (arr2D == 0) 111 | eroded_background = binary_erosion(background, structure=neighborhood, 112 | border_value=1) 113 | 114 | # Boolean mask of arr2D with True at peaks 115 | detected_peaks = local_max ^ eroded_background 116 | 117 | # extract peaks 118 | amps = arr2D[detected_peaks] 119 | j, i = np.where(detected_peaks) 120 | 121 | # filter peaks 122 | amps = amps.flatten() 123 | peaks = zip(i, j, amps) 124 | peaks_filtered = [x for x in peaks if x[2] > amp_min] # freq, time, amp 125 | 126 | # get indices for frequency and time 127 | frequency_idx = [x[1] for x in peaks_filtered] 128 | time_idx = [x[0] for x in peaks_filtered] 129 | 130 | # scatter of the peaks 131 | if plot: 132 | fig, ax = plt.subplots() 133 | ax.imshow(arr2D) 134 | ax.scatter(time_idx, frequency_idx) 135 | ax.set_xlabel('Time') 136 | ax.set_ylabel('Frequency') 137 | ax.set_title("Spectrogram") 138 | plt.gca().invert_yaxis() 139 | plt.show() 140 | 141 | return zip(frequency_idx, time_idx) 142 | 143 | # Hash list structure: sha1_hash[0:20] time_offset 144 | # example: [(e05b341a9b77a51fd26, 32), ... ] 145 | def generate_hashes(peaks, fan_value=DEFAULT_FAN_VALUE): 146 | if PEAK_SORT: 147 | peaks.sort(key=itemgetter(1)) 148 | 149 | # bruteforce all peaks 150 | for i in range(len(peaks)): 151 | for j in range(1, fan_value): 152 | if (i + j) < len(peaks): 153 | 154 | # take current & next peak frequency value 155 | freq1 = peaks[i][IDX_FREQ_I] 156 | freq2 = peaks[i + j][IDX_FREQ_I] 157 | 158 | # take current & next -peak time offset 159 | t1 = peaks[i][IDX_TIME_J] 160 | t2 = peaks[i + j][IDX_TIME_J] 161 | 162 | # get diff of time offsets 163 | t_delta = t2 - t1 164 | 165 | # check if delta is between min & max 166 | if t_delta >= MIN_HASH_TIME_DELTA and t_delta <= MAX_HASH_TIME_DELTA: 167 | h = hashlib.sha1("%s|%s|%s" % (str(freq1), str(freq2), str(t_delta))) 168 | yield (h.hexdigest()[0:FINGERPRINT_REDUCTION], t1) 169 | -------------------------------------------------------------------------------- /libs/reader.py: -------------------------------------------------------------------------------- 1 | class BaseReader(object): 2 | def __init__(self, a): 3 | self.a = a 4 | 5 | def recognize(self): 6 | pass # base class does nothing 7 | -------------------------------------------------------------------------------- /libs/reader_file.py: -------------------------------------------------------------------------------- 1 | from reader import BaseReader 2 | import os 3 | from pydub import AudioSegment 4 | from pydub.utils import audioop 5 | import numpy as np 6 | from hashlib import sha1 7 | 8 | class FileReader(BaseReader): 9 | def __init__(self, filename): 10 | # super(FileReader, self).__init__(a) 11 | self.filename = filename 12 | 13 | """ 14 | Reads any file supported by pydub (ffmpeg) and returns the data contained 15 | within. If file reading fails due to input being a 24-bit wav file, 16 | wavio is used as a backup. 17 | 18 | Can be optionally limited to a certain amount of seconds from the start 19 | of the file by specifying the `limit` parameter. This is the amount of 20 | seconds from the start of the file. 21 | 22 | returns: (channels, samplerate) 23 | """ 24 | # pydub does not support 24-bit wav files, use wavio when this occurs 25 | def parse_audio(self): 26 | limit = None 27 | # limit = 10 28 | 29 | songname, extension = os.path.splitext(os.path.basename(self.filename)) 30 | 31 | try: 32 | audiofile = AudioSegment.from_file(self.filename) 33 | 34 | if limit: 35 | audiofile = audiofile[:limit * 1000] 36 | 37 | data = np.fromstring(audiofile._data, np.int16) 38 | 39 | channels = [] 40 | for chn in xrange(audiofile.channels): 41 | channels.append(data[chn::audiofile.channels]) 42 | 43 | fs = audiofile.frame_rate 44 | except audioop.error: 45 | print('audioop.error') 46 | pass 47 | # fs, _, audiofile = wavio.readwav(filename) 48 | 49 | # if limit: 50 | # audiofile = audiofile[:limit * 1000] 51 | 52 | # audiofile = audiofile.T 53 | # audiofile = audiofile.astype(np.int16) 54 | 55 | # channels = [] 56 | # for chn in audiofile: 57 | # channels.append(chn) 58 | 59 | return { 60 | "songname": songname, 61 | "extension": extension, 62 | "channels": channels, 63 | "Fs": audiofile.frame_rate, 64 | "file_hash": self.parse_file_hash() 65 | } 66 | 67 | def parse_file_hash(self, blocksize=2**20): 68 | """ Small function to generate a hash to uniquely generate 69 | a file. Inspired by MD5 version here: 70 | http://stackoverflow.com/a/1131255/712997 71 | 72 | Works with large files. 73 | """ 74 | s = sha1() 75 | 76 | with open(self.filename , "rb") as f: 77 | while True: 78 | buf = f.read(blocksize) 79 | if not buf: break 80 | s.update(buf) 81 | 82 | return s.hexdigest().upper() 83 | -------------------------------------------------------------------------------- /libs/reader_microphone.py: -------------------------------------------------------------------------------- 1 | import pyaudio 2 | import numpy 3 | import wave 4 | from reader import BaseReader 5 | 6 | class MicrophoneReader(BaseReader): 7 | default_chunksize = 8192 8 | default_format = pyaudio.paInt16 9 | default_channels = 2 10 | default_rate = 44100 11 | default_seconds = 0 12 | 13 | # set default 14 | def __init__(self, a): 15 | super(MicrophoneReader, self).__init__(a) 16 | self.audio = pyaudio.PyAudio() 17 | self.stream = None 18 | self.data = [] 19 | self.channels = MicrophoneReader.default_channels 20 | self.chunksize = MicrophoneReader.default_chunksize 21 | self.rate = MicrophoneReader.default_rate 22 | self.recorded = False 23 | 24 | def start_recording(self, channels=default_channels, 25 | rate=default_rate, 26 | chunksize=default_chunksize, 27 | seconds=default_seconds): 28 | self.chunksize = chunksize 29 | self.channels = channels 30 | self.recorded = False 31 | self.rate = rate 32 | 33 | if self.stream: 34 | self.stream.stop_stream() 35 | self.stream.close() 36 | 37 | self.stream = self.audio.open( 38 | format=self.default_format, 39 | channels=channels, 40 | rate=rate, 41 | input=True, 42 | frames_per_buffer=chunksize, 43 | ) 44 | 45 | self.data = [[] for i in range(channels)] 46 | 47 | def process_recording(self): 48 | data = self.stream.read(self.chunksize) 49 | 50 | # http://docs.scipy.org/doc/numpy/reference/generated/numpy.fromstring.html 51 | # A new 1-D array initialized from raw binary or text data in a string. 52 | nums = numpy.fromstring(data, numpy.int16) 53 | 54 | for c in range(self.channels): 55 | self.data[c].extend(nums[c::self.channels]) 56 | # self.data[c].append(data) 57 | 58 | return nums 59 | 60 | def stop_recording(self): 61 | self.stream.stop_stream() 62 | self.stream.close() 63 | self.stream = None 64 | self.recorded = True 65 | 66 | def get_recorded_data(self): 67 | return self.data 68 | 69 | def save_recorded(self, output_filename): 70 | wf = wave.open(output_filename, 'wb') 71 | wf.setnchannels(self.channels) 72 | wf.setsampwidth(self.audio.get_sample_size(self.default_format)) 73 | wf.setframerate(self.rate) 74 | 75 | # values = ','.join(str(v) for v in self.data[1]) 76 | # numpydata = numpy.hstack(self.data[1]) 77 | 78 | chunk_length = len(self.data[0]) / self.channels 79 | result = numpy.reshape(self.data[0], (chunk_length, self.channels)) 80 | # wf.writeframes(b''.join(numpydata)) 81 | wf.writeframes(result) 82 | wf.close() 83 | 84 | def play(self): 85 | pass 86 | 87 | def get_recorded_time(self): 88 | return len(self.data[0]) / self.rate 89 | -------------------------------------------------------------------------------- /libs/visualiser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/itspoma/audio-fingerprint-identifying-python/b6a3e1167763dee4006eee2d83600e621d62b270/libs/visualiser.py -------------------------------------------------------------------------------- /libs/visualiser_console.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class VisualiserConsole(): 4 | def __init__(self): 5 | pass 6 | 7 | @staticmethod 8 | def calc(data): 9 | peak = np.average(np.abs(data)) * 2 10 | bars = "#" * int(200 * peak / 2**16) 11 | return (peak, bars) 12 | -------------------------------------------------------------------------------- /libs/visualiser_plot.py: -------------------------------------------------------------------------------- 1 | from matplotlib import pyplot 2 | 3 | class VisualiserPlot(): 4 | def __init__(self): 5 | pass 6 | 7 | @staticmethod 8 | def show(data): 9 | pyplot.plot(data) 10 | pyplot.show() 11 | -------------------------------------------------------------------------------- /recognize-from-file.py: -------------------------------------------------------------------------------- 1 | from libs.reader_microphone import FileReader 2 | 3 | song = None 4 | seconds = 5 5 | 6 | r = FileReader(123) 7 | r.recognize(seconds=seconds) 8 | 9 | print(song) 10 | -------------------------------------------------------------------------------- /recognize-from-microphone.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import os 3 | import sys 4 | import libs 5 | import libs.fingerprint as fingerprint 6 | import argparse 7 | 8 | from argparse import RawTextHelpFormatter 9 | from itertools import izip_longest 10 | from termcolor import colored 11 | from libs.config import get_config 12 | from libs.reader_microphone import MicrophoneReader 13 | from libs.visualiser_console import VisualiserConsole as visual_peak 14 | from libs.visualiser_plot import VisualiserPlot as visual_plot 15 | from libs.db_sqlite import SqliteDatabase 16 | # from libs.db_mongo import MongoDatabase 17 | 18 | if __name__ == '__main__': 19 | config = get_config() 20 | 21 | db = SqliteDatabase() 22 | 23 | parser = argparse.ArgumentParser(formatter_class=RawTextHelpFormatter) 24 | parser.add_argument('-s', '--seconds', nargs='?') 25 | args = parser.parse_args() 26 | 27 | if not args.seconds: 28 | parser.print_help() 29 | sys.exit(0) 30 | 31 | seconds = int(args.seconds) 32 | 33 | chunksize = 2**12 # 4096 34 | channels = 2#int(config['channels']) # 1=mono, 2=stereo 35 | 36 | record_forever = False 37 | visualise_console = bool(config['mic.visualise_console']) 38 | visualise_plot = bool(config['mic.visualise_plot']) 39 | 40 | reader = MicrophoneReader(None) 41 | 42 | reader.start_recording(seconds=seconds, 43 | chunksize=chunksize, 44 | channels=channels) 45 | 46 | msg = ' * started recording..' 47 | print colored(msg, attrs=['dark']) 48 | 49 | while True: 50 | bufferSize = int(reader.rate / reader.chunksize * seconds) 51 | 52 | for i in range(0, bufferSize): 53 | nums = reader.process_recording() 54 | 55 | if visualise_console: 56 | msg = colored(' %05d', attrs=['dark']) + colored(' %s', 'green') 57 | print msg % visual_peak.calc(nums) 58 | else: 59 | msg = ' processing %d of %d..' % (i, bufferSize) 60 | print colored(msg, attrs=['dark']) 61 | 62 | if not record_forever: break 63 | 64 | if visualise_plot: 65 | data = reader.get_recorded_data()[0] 66 | visual_plot.show(data) 67 | 68 | reader.stop_recording() 69 | 70 | msg = ' * recording has been stopped' 71 | print colored(msg, attrs=['dark']) 72 | 73 | 74 | 75 | def grouper(iterable, n, fillvalue=None): 76 | args = [iter(iterable)] * n 77 | return (filter(None, values) for values 78 | in izip_longest(fillvalue=fillvalue, *args)) 79 | 80 | data = reader.get_recorded_data() 81 | 82 | msg = ' * recorded %d samples' 83 | print colored(msg, attrs=['dark']) % len(data[0]) 84 | 85 | # reader.save_recorded('test.wav') 86 | 87 | 88 | Fs = fingerprint.DEFAULT_FS 89 | channel_amount = len(data) 90 | 91 | result = set() 92 | matches = [] 93 | 94 | def find_matches(samples, Fs=fingerprint.DEFAULT_FS): 95 | hashes = fingerprint.fingerprint(samples, Fs=Fs) 96 | return return_matches(hashes) 97 | 98 | def return_matches(hashes): 99 | mapper = {} 100 | for hash, offset in hashes: 101 | mapper[hash.upper()] = offset 102 | values = mapper.keys() 103 | 104 | for split_values in grouper(values, 1000): 105 | # @todo move to db related files 106 | query = """ 107 | SELECT upper(hash), song_fk, offset 108 | FROM fingerprints 109 | WHERE upper(hash) IN (%s) 110 | """ 111 | query = query % ', '.join('?' * len(split_values)) 112 | 113 | x = db.executeAll(query, split_values) 114 | matches_found = len(x) 115 | 116 | if matches_found > 0: 117 | msg = ' ** found %d hash matches (step %d/%d)' 118 | print colored(msg, 'green') % ( 119 | matches_found, 120 | len(split_values), 121 | len(values) 122 | ) 123 | else: 124 | msg = ' ** not matches found (step %d/%d)' 125 | print colored(msg, 'red') % ( 126 | len(split_values), 127 | len(values) 128 | ) 129 | 130 | for hash, sid, offset in x: 131 | # (sid, db_offset - song_sampled_offset) 132 | yield (sid, offset - mapper[hash]) 133 | 134 | for channeln, channel in enumerate(data): 135 | # TODO: Remove prints or change them into optional logging. 136 | msg = ' fingerprinting channel %d/%d' 137 | print colored(msg, attrs=['dark']) % (channeln+1, channel_amount) 138 | 139 | matches.extend(find_matches(channel)) 140 | 141 | msg = ' finished channel %d/%d, got %d hashes' 142 | print colored(msg, attrs=['dark']) % ( 143 | channeln+1, channel_amount, len(matches) 144 | ) 145 | 146 | def align_matches(matches): 147 | diff_counter = {} 148 | largest = 0 149 | largest_count = 0 150 | song_id = -1 151 | 152 | for tup in matches: 153 | sid, diff = tup 154 | 155 | if diff not in diff_counter: 156 | diff_counter[diff] = {} 157 | 158 | if sid not in diff_counter[diff]: 159 | diff_counter[diff][sid] = 0 160 | 161 | diff_counter[diff][sid] += 1 162 | 163 | if diff_counter[diff][sid] > largest_count: 164 | largest = diff 165 | largest_count = diff_counter[diff][sid] 166 | song_id = sid 167 | 168 | songM = db.get_song_by_id(song_id) 169 | 170 | nseconds = round(float(largest) / fingerprint.DEFAULT_FS * 171 | fingerprint.DEFAULT_WINDOW_SIZE * 172 | fingerprint.DEFAULT_OVERLAP_RATIO, 5) 173 | 174 | return { 175 | "SONG_ID" : song_id, 176 | "SONG_NAME" : songM[1], 177 | "CONFIDENCE" : largest_count, 178 | "OFFSET" : int(largest), 179 | "OFFSET_SECS" : nseconds 180 | } 181 | 182 | total_matches_found = len(matches) 183 | 184 | print '' 185 | 186 | if total_matches_found > 0: 187 | msg = ' ** totally found %d hash matches' 188 | print colored(msg, 'green') % total_matches_found 189 | 190 | song = align_matches(matches) 191 | 192 | msg = ' => song: %s (id=%d)\n' 193 | msg += ' offset: %d (%d secs)\n' 194 | msg += ' confidence: %d' 195 | 196 | print colored(msg, 'green') % ( 197 | song['SONG_NAME'], song['SONG_ID'], 198 | song['OFFSET'], song['OFFSET_SECS'], 199 | song['CONFIDENCE'] 200 | ) 201 | else: 202 | msg = ' ** not matches found at all' 203 | print colored(msg, 'red') 204 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1 2 | termcolor 3 | pyaudio 4 | wave 5 | pydub 6 | -------------------------------------------------------------------------------- /reset-database.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | from libs.db_sqlite import SqliteDatabase 3 | 4 | if __name__ == '__main__': 5 | db = SqliteDatabase() 6 | 7 | # 8 | # songs table 9 | 10 | db.query("DROP TABLE IF EXISTS songs;") 11 | print('removed db.songs'); 12 | 13 | db.query(""" 14 | CREATE TABLE songs ( 15 | id INTEGER PRIMARY KEY AUTOINCREMENT, 16 | name TEXT, 17 | filehash TEXT 18 | ); 19 | """) 20 | print('created db.songs'); 21 | 22 | # 23 | # fingerprints table 24 | 25 | db.query("DROP TABLE IF EXISTS fingerprints;") 26 | print('removed db.fingerprints'); 27 | 28 | db.query(""" 29 | CREATE TABLE `fingerprints` ( 30 | `id` INTEGER PRIMARY KEY AUTOINCREMENT, 31 | `song_fk` INTEGER, 32 | `hash` TEXT, 33 | `offset` INTEGER 34 | ); 35 | """) 36 | print('created db.fingerprints'); 37 | 38 | print('done'); 39 | -------------------------------------------------------------------------------- /sql-execute.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import argparse 3 | import sys 4 | 5 | from libs.db_sqlite import SqliteDatabase 6 | from termcolor import colored 7 | from argparse import RawTextHelpFormatter 8 | 9 | if __name__ == '__main__': 10 | parser = argparse.ArgumentParser(formatter_class=RawTextHelpFormatter) 11 | parser.add_argument('-q', '--query', nargs='?') 12 | args = parser.parse_args() 13 | 14 | if not args.query: 15 | parser.print_help() 16 | sys.exit(0) 17 | 18 | db = SqliteDatabase() 19 | 20 | row = db.executeOne(args.query) 21 | 22 | print row 23 | -------------------------------------------------------------------------------- /tests/sqlite.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import sys, os 3 | sys.path.append(os.path.join(sys.path[0], '..')) 4 | 5 | from libs.db_sqlite import SqliteDatabase 6 | from termcolor import colored 7 | 8 | if __name__ == '__main__': 9 | db = SqliteDatabase() 10 | 11 | row = db.executeOne("SELECT 2+3 as x;") 12 | 13 | assert row[0] == 5, "failed simple sql execution" 14 | print ' * %s' % colored('ok', 'green') 15 | --------------------------------------------------------------------------------